const { TokenManager } = require("../tiktoken"); /** * @typedef {import("openai/streaming").Stream<import("openai").OpenAI.ChatCompletionChunk>} OpenAICompatibleStream * @typedef {(reportedUsage: {[key: string]: number, completion_tokens?: number, prompt_tokens?: number}) => StreamMetrics} EndMeasurementFunction * @typedef {Array<{content: string}>} Messages */ /** * @typedef {Object} StreamMetrics * @property {number} prompt_tokens - the number of tokens in the prompt * @property {number} completion_tokens - the number of tokens in the completion * @property {number} total_tokens - the total number of tokens * @property {number} outputTps - the tokens per second of the output * @property {number} duration - the duration of the stream */ /** * @typedef {Object} MonitoredStream * @property {number} start - the start time of the stream * @property {number} duration - the duration of the stream * @property {StreamMetrics} metrics - the metrics of the stream * @property {EndMeasurementFunction} endMeasurement - the method to end the stream and calculate the metrics */ class LLMPerformanceMonitor { static tokenManager = new TokenManager(); /** * Counts the tokens in the messages. * @param {Array<{content: string}>} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream * @returns {number} */ static countTokens(messages = []) { try { return this.tokenManager.statsFrom(messages); } catch (e) { return 0; } } /** * Wraps a function and logs the duration (in seconds) of the function call. * @param {Function} func * @returns {Promise<{output: any, duration: number}>} */ static measureAsyncFunction(func) { return (async () => { const start = Date.now(); const output = await func; // is a promise const end = Date.now(); return { output, duration: (end - start) / 1000 }; })(); } /** * Wraps a completion stream and and attaches a start time and duration property to the stream. * Also attaches an `endMeasurement` method to the stream that will calculate the duration of the stream and metrics. * @param {Promise<OpenAICompatibleStream>} func * @param {Messages} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream * @param {boolean} runPromptTokenCalculation - whether to run the prompt token calculation to estimate the `prompt_tokens` metric. This is useful for providers that do not return this on stream. * @returns {Promise<MonitoredStream>} */ static async measureStream( func, messages = [], runPromptTokenCalculation = true ) { const stream = await func; stream.start = Date.now(); stream.duration = 0; stream.metrics = { completion_tokens: 0, prompt_tokens: runPromptTokenCalculation ? this.countTokens(messages) : 0, total_tokens: 0, outputTps: 0, duration: 0, }; stream.endMeasurement = (reportedUsage = {}) => { const end = Date.now(); const duration = (end - stream.start) / 1000; // Merge the reported usage with the existing metrics // so the math in the metrics object is correct when calculating stream.metrics = { ...stream.metrics, ...reportedUsage, }; stream.metrics.total_tokens = stream.metrics.prompt_tokens + (stream.metrics.completion_tokens || 0); stream.metrics.outputTps = stream.metrics.completion_tokens / duration; stream.metrics.duration = duration; return stream.metrics; }; return stream; } } module.exports = { LLMPerformanceMonitor, };