anything-llm/server/utils/helpers/chat/LLMPerformanceMonitor.js

const { TokenManager } = require("../tiktoken");

/**
 * @typedef {import("openai/streaming").Stream<import("openai").OpenAI.ChatCompletionChunk>} OpenAICompatibleStream
 * @typedef {(reportedUsage: {[key: string]: number, completion_tokens?: number, prompt_tokens?: number}) => StreamMetrics} EndMeasurementFunction
 * @typedef {Array<{content: string}>} Messages
 */

/**
 * @typedef {Object} StreamMetrics
 * @property {number} prompt_tokens - the number of tokens in the prompt
 * @property {number} completion_tokens - the number of tokens in the completion
 * @property {number} total_tokens - the total number of tokens
 * @property {number} outputTps - the tokens per second of the output
 * @property {number} duration - the duration of the stream
 */

/**
 * @typedef {Object} MonitoredStream
 * @property {number} start - the start time of the stream
 * @property {number} duration - the duration of the stream
 * @property {StreamMetrics} metrics - the metrics of the stream
 * @property {EndMeasurementFunction} endMeasurement - the method to end the stream and calculate the metrics
 */

class LLMPerformanceMonitor {
  static tokenManager = new TokenManager();
  /**
   * Counts the tokens in the messages.
   * @param {Array<{content: string}>} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream
   * @returns {number}
   */
  static countTokens(messages = []) {
    try {
      return this.tokenManager.statsFrom(messages);
    } catch (e) {
      return 0;
    }
  }
  /**
   * Wraps a function and logs the duration (in seconds) of the function call.
   * @param {Function} func
   * @returns {Promise<{output: any, duration: number}>}
   */
  static measureAsyncFunction(func) {
    return (async () => {
      const start = Date.now();
      const output = await func; // is a promise
      const end = Date.now();
      return { output, duration: (end - start) / 1000 };
    })();
  }

  /**
   * Wraps a completion stream and and attaches a start time and duration property to the stream.
   * Also attaches an `endMeasurement` method to the stream that will calculate the duration of the stream and metrics.
   * @param {Promise<OpenAICompatibleStream>} func
   * @param {Messages} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream
   * @param {boolean} runPromptTokenCalculation - whether to run the prompt token calculation to estimate the `prompt_tokens` metric. This is useful for providers that do not return this on stream.
   * @returns {Promise<MonitoredStream>}
   */
  static async measureStream(
    func,
    messages = [],
    runPromptTokenCalculation = true
  ) {
    const stream = await func;
    stream.start = Date.now();
    stream.duration = 0;
    stream.metrics = {
      completion_tokens: 0,
      prompt_tokens: runPromptTokenCalculation ? this.countTokens(messages) : 0,
      total_tokens: 0,
      outputTps: 0,
      duration: 0,
    };

    stream.endMeasurement = (reportedUsage = {}) => {
      const end = Date.now();
      const duration = (end - stream.start) / 1000;

      // Merge the reported usage with the existing metrics
      // so the math in the metrics object is correct when calculating
      stream.metrics = {
        ...stream.metrics,
        ...reportedUsage,
      };

      stream.metrics.total_tokens =
        stream.metrics.prompt_tokens + (stream.metrics.completion_tokens || 0);
      stream.metrics.outputTps = stream.metrics.completion_tokens / duration;
      stream.metrics.duration = duration;
      return stream.metrics;
    };
    return stream;
  }
}

module.exports = {
  LLMPerformanceMonitor,
};
LLM performance metric tracking (#2825) * WIP performance metric tracking * fix: patch UI trying to .toFixed() null metric Anthropic tracking migraiton cleanup logs * Apipie implmentation, not tested * Cleanup Anthropic notes, Add support for AzureOpenAI tracking * bedrock token metric tracking * Cohere support * feat: improve default stream handler to track for provider who are actually OpenAI compliant in usage reporting add deepseek support * feat: Add FireworksAI tracking reporting fix: improve handler when usage:null is reported (why?) * Add token reporting for GenericOpenAI * token reporting for koboldcpp + lmstudio * lint * support Groq token tracking * HF token tracking * token tracking for togetherai * LiteLLM token tracking * linting + Mitral token tracking support * XAI token metric reporting * native provider runner * LocalAI token tracking * Novita token tracking * OpenRouter token tracking * Apipie stream metrics * textwebgenui token tracking * perplexity token reporting * ollama token reporting * lint * put back comment * Rip out LC ollama wrapper and use official library * patch images with new ollama lib * improve ollama offline message * fix image handling in ollama llm provider * lint * NVIDIA NIM token tracking * update openai compatbility responses * UI/UX show/hide metrics on click for user preference * update bedrock client --------- Co-authored-by: shatfield4 <seanhatfield5@gmail.com> 2024-12-16 14:31:17 -08:00			`const { TokenManager } = require("../tiktoken");`

			`/**`
			`* @typedef {import("openai/streaming").Stream<import("openai").OpenAI.ChatCompletionChunk>} OpenAICompatibleStream`
			`* @typedef {(reportedUsage: {[key: string]: number, completion_tokens?: number, prompt_tokens?: number}) => StreamMetrics} EndMeasurementFunction`
			`* @typedef {Array<{content: string}>} Messages`
			`*/`

			`/**`
			`* @typedef {Object} StreamMetrics`
			`* @property {number} prompt_tokens - the number of tokens in the prompt`
			`* @property {number} completion_tokens - the number of tokens in the completion`
			`* @property {number} total_tokens - the total number of tokens`
			`* @property {number} outputTps - the tokens per second of the output`
			`* @property {number} duration - the duration of the stream`
			`*/`

			`/**`
			`* @typedef {Object} MonitoredStream`
			`* @property {number} start - the start time of the stream`
			`* @property {number} duration - the duration of the stream`
			`* @property {StreamMetrics} metrics - the metrics of the stream`
			`* @property {EndMeasurementFunction} endMeasurement - the method to end the stream and calculate the metrics`
			`*/`

			`class LLMPerformanceMonitor {`
			`static tokenManager = new TokenManager();`
			`/**`
			`* Counts the tokens in the messages.`
			`* @param {Array<{content: string}>} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream`
			`* @returns {number}`
			`*/`
			`static countTokens(messages = []) {`
			`try {`
			`return this.tokenManager.statsFrom(messages);`
			`} catch (e) {`
			`return 0;`
			`}`
			`}`
			`/**`
			`* Wraps a function and logs the duration (in seconds) of the function call.`
			`* @param {Function} func`
			`* @returns {Promise<{output: any, duration: number}>}`
			`*/`
			`static measureAsyncFunction(func) {`
			`return (async () => {`
			`const start = Date.now();`
			`const output = await func; // is a promise`
			`const end = Date.now();`
			`return { output, duration: (end - start) / 1000 };`
			`})();`
			`}`

			`/**`
			`* Wraps a completion stream and and attaches a start time and duration property to the stream.`
			* Also attaches an `endMeasurement` method to the stream that will calculate the duration of the stream and metrics.
			`* @param {Promise<OpenAICompatibleStream>} func`
			`* @param {Messages} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream`
			* @param {boolean} runPromptTokenCalculation - whether to run the prompt token calculation to estimate the `prompt_tokens` metric. This is useful for providers that do not return this on stream.
			`* @returns {Promise<MonitoredStream>}`
			`*/`
			`static async measureStream(`
			`func,`
			`messages = [],`
			`runPromptTokenCalculation = true`
			`) {`
			`const stream = await func;`
			`stream.start = Date.now();`
			`stream.duration = 0;`
			`stream.metrics = {`
			`completion_tokens: 0,`
			`prompt_tokens: runPromptTokenCalculation ? this.countTokens(messages) : 0,`
			`total_tokens: 0,`
			`outputTps: 0,`
			`duration: 0,`
			`};`

			`stream.endMeasurement = (reportedUsage = {}) => {`
			`const end = Date.now();`
			`const duration = (end - stream.start) / 1000;`

			`// Merge the reported usage with the existing metrics`
			`// so the math in the metrics object is correct when calculating`
			`stream.metrics = {`
			`...stream.metrics,`
			`...reportedUsage,`
			`};`

			`stream.metrics.total_tokens =`
			`stream.metrics.prompt_tokens + (stream.metrics.completion_tokens \|\| 0);`
			`stream.metrics.outputTps = stream.metrics.completion_tokens / duration;`
			`stream.metrics.duration = duration;`
			`return stream.metrics;`
			`};`
			`return stream;`
			`}`
			`}`

			`module.exports = {`
			`LLMPerformanceMonitor,`
			`};`
No results found.