anything-llm/server/utils/helpers/tiktoken.js

const { getEncodingNameForModel, getEncoding } = require("js-tiktoken");

/**
 * @class TokenManager
 *
 * @notice
 * We cannot do estimation of tokens here like we do in the collector
 * because we need to know the model to do it.
 * Other issues are we also do reverse tokenization here for the chat history during cannonballing.
 * So here we are stuck doing the actual tokenization and encoding until we figure out what to do with prompt overflows.
 */
class TokenManager {
  static instance = null;
  static currentModel = null;

  constructor(model = "gpt-3.5-turbo") {
    if (TokenManager.instance && TokenManager.currentModel === model) {
      this.log("Returning existing instance for model:", model);
      return TokenManager.instance;
    }

    this.model = model;
    this.encoderName = this.#getEncodingFromModel(model);
    this.encoder = getEncoding(this.encoderName);

    TokenManager.instance = this;
    TokenManager.currentModel = model;
    this.log("Initialized new TokenManager instance for model:", model);
    return this;
  }

  log(text, ...args) {
    console.log(`\x1b[35m[TokenManager]\x1b[0m ${text}`, ...args);
  }

  #getEncodingFromModel(model) {
    try {
      return getEncodingNameForModel(model);
    } catch {
      return "cl100k_base";
    }
  }

  /**
   * Pass in an empty array of disallowedSpecials to handle all tokens as text and to be tokenized.
   * @param {string} input
   * @returns {number[]}
   */
  tokensFromString(input = "") {
    try {
      const tokens = this.encoder.encode(String(input), undefined, []);
      return tokens;
    } catch (e) {
      console.error(e);
      return [];
    }
  }

  /**
   * Converts an array of tokens back to a string.
   * @param {number[]} tokens
   * @returns {string}
   */
  bytesFromTokens(tokens = []) {
    const bytes = this.encoder.decode(tokens);
    return bytes;
  }

  /**
   * Counts the number of tokens in a string.
   * @param {string} input
   * @returns {number}
   */
  countFromString(input = "") {
    const tokens = this.tokensFromString(input);
    return tokens.length;
  }

  /**
   * Estimates the number of tokens in a string or array of strings.
   * @param {string | string[]} input
   * @returns {number}
   */
  statsFrom(input) {
    if (typeof input === "string") return this.countFromString(input);

    // What is going on here?
    // https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb Item 6.
    // The only option is to estimate. From repeated testing using the static values in the code we are always 2 off,
    // which means as of Nov 1, 2023 the additional factor on ln: 476 changed from 3 to 5.
    if (Array.isArray(input)) {
      const perMessageFactorTokens = input.length * 3;
      const tokensFromContent = input.reduce(
        (a, b) => a + this.countFromString(b.content),
        0
      );
      const diffCoefficient = 5;
      return perMessageFactorTokens + tokensFromContent + diffCoefficient;
    }

    throw new Error("Not a supported tokenized format.");
  }
}

module.exports = {
  TokenManager,
};
Infinite prompt input and compression implementation (#332) * WIP on continuous prompt window summary * wip * Move chat out of VDB simplify chat interface normalize LLM model interface have compression abstraction Cleanup compressor TODO: Anthropic stuff * Implement compression for Anythropic Fix lancedb sources * cleanup vectorDBs and check that lance, chroma, and pinecone are returning valid metadata sources * Resolve Weaviate citation sources not working with schema * comment cleanup 2023-11-06 13:13:53 -08:00			`const { getEncodingNameForModel, getEncoding } = require("js-tiktoken");`

Add tokenizer improvments via Singleton class and estimation (#3072) * Add tokenizer improvments via Singleton class linting * dev build * Estimation fallback when string exceeds a fixed byte size * Add notice to tiktoken on backend 2025-01-30 17:55:03 -08:00			`/**`
			`* @class TokenManager`
			`*`
			`* @notice`
			`* We cannot do estimation of tokens here like we do in the collector`
			`* because we need to know the model to do it.`
			`* Other issues are we also do reverse tokenization here for the chat history during cannonballing.`
			`* So here we are stuck doing the actual tokenization and encoding until we figure out what to do with prompt overflows.`
			`*/`
Infinite prompt input and compression implementation (#332) * WIP on continuous prompt window summary * wip * Move chat out of VDB simplify chat interface normalize LLM model interface have compression abstraction Cleanup compressor TODO: Anthropic stuff * Implement compression for Anythropic Fix lancedb sources * cleanup vectorDBs and check that lance, chroma, and pinecone are returning valid metadata sources * Resolve Weaviate citation sources not working with schema * comment cleanup 2023-11-06 13:13:53 -08:00			`class TokenManager {`
Add tokenizer improvments via Singleton class and estimation (#3072) * Add tokenizer improvments via Singleton class linting * dev build * Estimation fallback when string exceeds a fixed byte size * Add notice to tiktoken on backend 2025-01-30 17:55:03 -08:00			`static instance = null;`
			`static currentModel = null;`

Infinite prompt input and compression implementation (#332) * WIP on continuous prompt window summary * wip * Move chat out of VDB simplify chat interface normalize LLM model interface have compression abstraction Cleanup compressor TODO: Anthropic stuff * Implement compression for Anythropic Fix lancedb sources * cleanup vectorDBs and check that lance, chroma, and pinecone are returning valid metadata sources * Resolve Weaviate citation sources not working with schema * comment cleanup 2023-11-06 13:13:53 -08:00			`constructor(model = "gpt-3.5-turbo") {`
Add tokenizer improvments via Singleton class and estimation (#3072) * Add tokenizer improvments via Singleton class linting * dev build * Estimation fallback when string exceeds a fixed byte size * Add notice to tiktoken on backend 2025-01-30 17:55:03 -08:00			`if (TokenManager.instance && TokenManager.currentModel === model) {`
			`this.log("Returning existing instance for model:", model);`
			`return TokenManager.instance;`
			`}`

Infinite prompt input and compression implementation (#332) * WIP on continuous prompt window summary * wip * Move chat out of VDB simplify chat interface normalize LLM model interface have compression abstraction Cleanup compressor TODO: Anthropic stuff * Implement compression for Anythropic Fix lancedb sources * cleanup vectorDBs and check that lance, chroma, and pinecone are returning valid metadata sources * Resolve Weaviate citation sources not working with schema * comment cleanup 2023-11-06 13:13:53 -08:00			`this.model = model;`
Handle special token in TikToken (#528) * Handle special token in TikToken resolves #525 * remove duplicate method add clarification comment on implementation 2024-01-04 15:47:00 -08:00			`this.encoderName = this.#getEncodingFromModel(model);`
Infinite prompt input and compression implementation (#332) * WIP on continuous prompt window summary * wip * Move chat out of VDB simplify chat interface normalize LLM model interface have compression abstraction Cleanup compressor TODO: Anthropic stuff * Implement compression for Anythropic Fix lancedb sources * cleanup vectorDBs and check that lance, chroma, and pinecone are returning valid metadata sources * Resolve Weaviate citation sources not working with schema * comment cleanup 2023-11-06 13:13:53 -08:00			`this.encoder = getEncoding(this.encoderName);`
Add tokenizer improvments via Singleton class and estimation (#3072) * Add tokenizer improvments via Singleton class linting * dev build * Estimation fallback when string exceeds a fixed byte size * Add notice to tiktoken on backend 2025-01-30 17:55:03 -08:00
			`TokenManager.instance = this;`
			`TokenManager.currentModel = model;`
			`this.log("Initialized new TokenManager instance for model:", model);`
			`return this;`
			`}`

			`log(text, ...args) {`
			console.log(`\x1b[35m[TokenManager]\x1b[0m ${text}`, ...args);
Infinite prompt input and compression implementation (#332) * WIP on continuous prompt window summary * wip * Move chat out of VDB simplify chat interface normalize LLM model interface have compression abstraction Cleanup compressor TODO: Anthropic stuff * Implement compression for Anythropic Fix lancedb sources * cleanup vectorDBs and check that lance, chroma, and pinecone are returning valid metadata sources * Resolve Weaviate citation sources not working with schema * comment cleanup 2023-11-06 13:13:53 -08:00			`}`

Handle special token in TikToken (#528) * Handle special token in TikToken resolves #525 * remove duplicate method add clarification comment on implementation 2024-01-04 15:47:00 -08:00			`#getEncodingFromModel(model) {`
Infinite prompt input and compression implementation (#332) * WIP on continuous prompt window summary * wip * Move chat out of VDB simplify chat interface normalize LLM model interface have compression abstraction Cleanup compressor TODO: Anthropic stuff * Implement compression for Anythropic Fix lancedb sources * cleanup vectorDBs and check that lance, chroma, and pinecone are returning valid metadata sources * Resolve Weaviate citation sources not working with schema * comment cleanup 2023-11-06 13:13:53 -08:00			`try {`
			`return getEncodingNameForModel(model);`
			`} catch {`
			`return "cl100k_base";`
			`}`
			`}`

Add tokenizer improvments via Singleton class and estimation (#3072) * Add tokenizer improvments via Singleton class linting * dev build * Estimation fallback when string exceeds a fixed byte size * Add notice to tiktoken on backend 2025-01-30 17:55:03 -08:00			`/**`
			`* Pass in an empty array of disallowedSpecials to handle all tokens as text and to be tokenized.`
			`* @param {string} input`
			`* @returns {number[]}`
			`*/`
fix: Patch tiktoken method missing resolves #541 2024-01-05 09:39:19 -08:00			`tokensFromString(input = "") {`
patch text.substring bug from compressor 2024-07-22 12:53:11 -07:00			`try {`
			`const tokens = this.encoder.encode(String(input), undefined, []);`
			`return tokens;`
			`} catch (e) {`
			`console.error(e);`
			`return [];`
			`}`
fix: Patch tiktoken method missing resolves #541 2024-01-05 09:39:19 -08:00			`}`

Add tokenizer improvments via Singleton class and estimation (#3072) * Add tokenizer improvments via Singleton class linting * dev build * Estimation fallback when string exceeds a fixed byte size * Add notice to tiktoken on backend 2025-01-30 17:55:03 -08:00			`/**`
			`* Converts an array of tokens back to a string.`
			`* @param {number[]} tokens`
			`* @returns {string}`
			`*/`
Infinite prompt input and compression implementation (#332) * WIP on continuous prompt window summary * wip * Move chat out of VDB simplify chat interface normalize LLM model interface have compression abstraction Cleanup compressor TODO: Anthropic stuff * Implement compression for Anythropic Fix lancedb sources * cleanup vectorDBs and check that lance, chroma, and pinecone are returning valid metadata sources * Resolve Weaviate citation sources not working with schema * comment cleanup 2023-11-06 13:13:53 -08:00			`bytesFromTokens(tokens = []) {`
			`const bytes = this.encoder.decode(tokens);`
			`return bytes;`
			`}`

Add tokenizer improvments via Singleton class and estimation (#3072) * Add tokenizer improvments via Singleton class linting * dev build * Estimation fallback when string exceeds a fixed byte size * Add notice to tiktoken on backend 2025-01-30 17:55:03 -08:00			`/**`
			`* Counts the number of tokens in a string.`
			`* @param {string} input`
			`* @returns {number}`
			`*/`
Infinite prompt input and compression implementation (#332) * WIP on continuous prompt window summary * wip * Move chat out of VDB simplify chat interface normalize LLM model interface have compression abstraction Cleanup compressor TODO: Anthropic stuff * Implement compression for Anythropic Fix lancedb sources * cleanup vectorDBs and check that lance, chroma, and pinecone are returning valid metadata sources * Resolve Weaviate citation sources not working with schema * comment cleanup 2023-11-06 13:13:53 -08:00			`countFromString(input = "") {`
fix: Patch tiktoken method missing resolves #541 2024-01-05 09:39:19 -08:00			`const tokens = this.tokensFromString(input);`
Infinite prompt input and compression implementation (#332) * WIP on continuous prompt window summary * wip * Move chat out of VDB simplify chat interface normalize LLM model interface have compression abstraction Cleanup compressor TODO: Anthropic stuff * Implement compression for Anythropic Fix lancedb sources * cleanup vectorDBs and check that lance, chroma, and pinecone are returning valid metadata sources * Resolve Weaviate citation sources not working with schema * comment cleanup 2023-11-06 13:13:53 -08:00			`return tokens.length;`
			`}`

Add tokenizer improvments via Singleton class and estimation (#3072) * Add tokenizer improvments via Singleton class linting * dev build * Estimation fallback when string exceeds a fixed byte size * Add notice to tiktoken on backend 2025-01-30 17:55:03 -08:00			`/**`
			`* Estimates the number of tokens in a string or array of strings.`
			`* @param {string \| string[]} input`
			`* @returns {number}`
			`*/`
Infinite prompt input and compression implementation (#332) * WIP on continuous prompt window summary * wip * Move chat out of VDB simplify chat interface normalize LLM model interface have compression abstraction Cleanup compressor TODO: Anthropic stuff * Implement compression for Anythropic Fix lancedb sources * cleanup vectorDBs and check that lance, chroma, and pinecone are returning valid metadata sources * Resolve Weaviate citation sources not working with schema * comment cleanup 2023-11-06 13:13:53 -08:00			`statsFrom(input) {`
			`if (typeof input === "string") return this.countFromString(input);`

			`// What is going on here?`
			`// https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb Item 6.`
			`// The only option is to estimate. From repeated testing using the static values in the code we are always 2 off,`
			`// which means as of Nov 1, 2023 the additional factor on ln: 476 changed from 3 to 5.`
			`if (Array.isArray(input)) {`
			`const perMessageFactorTokens = input.length * 3;`
			`const tokensFromContent = input.reduce(`
			`(a, b) => a + this.countFromString(b.content),`
			`0`
			`);`
			`const diffCoefficient = 5;`
			`return perMessageFactorTokens + tokensFromContent + diffCoefficient;`
			`}`

			`throw new Error("Not a supported tokenized format.");`
			`}`
			`}`

			`module.exports = {`
			`TokenManager,`
			`};`
No results found.