Gemini model list sync (#3609)

* Update defaultModels.js add gemma-3-27b-it to v1BetaModels * Update defaultModels.js 20250330 model update * Update defaultModels.js remove text embedding * Update name and inputTokenLimit modelMap.js * Update gemini to load models from both endpoints dedupe models decide endpoint based on expieremental status from fetch add util script for maintainers reduce cache time on gemini models to 1 day * remove comment --------- Co-authored-by: DreamerC <dreamerwolf.tw@gmail.com>
2025-05-02 09:03:12 +00:00 · 2025-04-07 13:45:16 -07:00 · 2025-04-07 13:45:16 -07:00 · 4ac900f645
commit 4ac900f645
parent a5e7d87bed
5 changed files with 345 additions and 143 deletions
--- a/.prettierignore
+++ b/.prettierignore
@ -10,6 +10,7 @@ frontend/bundleinspector.html

 #server
 server/swagger/openapi.json
+server/**/*.mjs

 #embed
 **/static/**
--- a/server/utils/AiProviders/gemini/defaultModels.js
+++ b/server/utils/AiProviders/gemini/defaultModels.js
@ -1,29 +1,52 @@
 const { MODEL_MAP } = require("../modelMap");

 const stableModels = [
-  "gemini-pro",
-  "gemini-1.0-pro",
-  "gemini-1.5-pro-latest",
-  "gemini-1.5-flash-latest",
-];
-
-const experimentalModels = [
-  "gemini-1.5-pro-exp-0801",
-  "gemini-1.5-pro-exp-0827",
-  "gemini-1.5-flash-exp-0827",
-  "gemini-1.5-flash-8b-exp-0827",
-  "gemini-exp-1114",
-  "gemini-exp-1121",
-  "gemini-exp-1206",
-  "learnlm-1.5-pro-experimental",
-  "gemini-2.0-flash-exp",
+  // %STABLE_MODELS% - updated 2025-04-07T20:29:49.276Z
+  "gemini-1.5-pro-001",
+  "gemini-1.5-pro-002",
+  "gemini-1.5-pro",
+  "gemini-1.5-flash-001",
+  "gemini-1.5-flash",
+  "gemini-1.5-flash-002",
+  "gemini-1.5-flash-8b",
+  "gemini-1.5-flash-8b-001",
+  "gemini-2.0-flash",
+  "gemini-2.0-flash-001",
+  "gemini-2.0-flash-lite-001",
+  "gemini-2.0-flash-lite",
+  // %EOC_STABLE_MODELS%
 ];

 // There are some models that are only available in the v1beta API
 // and some models that are only available in the v1 API
 // generally, v1beta models have `exp` in the name, but not always
-// so we check for both against a static list as well.
-const v1BetaModels = ["gemini-1.5-pro-latest", "gemini-1.5-flash-latest"];
+// so we check for both against a static list as well via API.
+const v1BetaModels = [
+  // %V1BETA_MODELS% - updated 2025-04-07T20:29:49.276Z
+  "gemini-1.5-pro-latest",
+  "gemini-1.5-flash-latest",
+  "gemini-1.5-flash-8b-latest",
+  "gemini-1.5-flash-8b-exp-0827",
+  "gemini-1.5-flash-8b-exp-0924",
+  "gemini-2.5-pro-exp-03-25",
+  "gemini-2.5-pro-preview-03-25",
+  "gemini-2.0-flash-exp",
+  "gemini-2.0-flash-exp-image-generation",
+  "gemini-2.0-flash-lite-preview-02-05",
+  "gemini-2.0-flash-lite-preview",
+  "gemini-2.0-pro-exp",
+  "gemini-2.0-pro-exp-02-05",
+  "gemini-exp-1206",
+  "gemini-2.0-flash-thinking-exp-01-21",
+  "gemini-2.0-flash-thinking-exp",
+  "gemini-2.0-flash-thinking-exp-1219",
+  "learnlm-1.5-pro-experimental",
+  "gemma-3-1b-it",
+  "gemma-3-4b-it",
+  "gemma-3-12b-it",
+  "gemma-3-27b-it",
+  // %EOC_V1BETA_MODELS%
+];

 const defaultGeminiModels = [
  ...stableModels.map((model) => ({
@ -32,7 +55,7 @@ const defaultGeminiModels = [
    contextWindow: MODEL_MAP.gemini[model],
    experimental: false,
  })),
-  ...experimentalModels.map((model) => ({
+  ...v1BetaModels.map((model) => ({
    id: model,
    name: model,
    contextWindow: MODEL_MAP.gemini[model],
--- a/server/utils/AiProviders/gemini/index.js
+++ b/server/utils/AiProviders/gemini/index.js
@ -28,21 +28,11 @@ class GeminiLLM {
    const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);
    this.model =
      modelPreference || process.env.GEMINI_LLM_MODEL_PREF || "gemini-pro";
+
+    const isExperimental = this.isExperimentalModel(this.model);
    this.gemini = genAI.getGenerativeModel(
      { model: this.model },
-      {
-        apiVersion:
-          /**
-           * There are some models that are only available in the v1beta API
-           * and some models that are only available in the v1 API
-           * generally, v1beta models have `exp` in the name, but not always
-           * so we check for both against a static list as well.
-           * @see {v1BetaModels}
-           */
-          this.model.includes("exp") || v1BetaModels.includes(this.model)
-            ? "v1beta"
-            : "v1",
-      }
+      { apiVersion: isExperimental ? "v1beta" : "v1" }
    );
    this.limits = {
      history: this.promptWindowLimit() * 0.15,
@ -59,7 +49,7 @@ class GeminiLLM {
    this.cacheModelPath = path.resolve(cacheFolder, "models.json");
    this.cacheAtPath = path.resolve(cacheFolder, ".cached_at");
    this.#log(
-      `Initialized with model: ${this.model} (${this.promptWindowLimit()})`
+      `Initialized with model: ${this.model} ${isExperimental ? "[Experimental v1beta]" : "[Stable v1]"} - ctx: ${this.promptWindowLimit()}`
    );
  }

@ -71,7 +61,7 @@ class GeminiLLM {
  // from the current date. If it is, then we will refetch the API so that all the models are up
  // to date.
  static cacheIsStale() {
-    const MAX_STALE = 6.048e8; // 1 Week in MS
+    const MAX_STALE = 8.64e7; // 1 day in MS
    if (!fs.existsSync(path.resolve(cacheFolder, ".cached_at"))) return true;
    const now = Number(new Date());
    const timestampMs = Number(
@ -168,6 +158,28 @@ class GeminiLLM {
    }
  }

+  /**
+   * Checks if a model is experimental by reading from the cache if available, otherwise it will perform
+   * a blind check against the v1BetaModels list - which is manually maintained and updated.
+   * @param {string} modelName - The name of the model to check
+   * @returns {boolean} A boolean indicating if the model is experimental
+   */
+  isExperimentalModel(modelName) {
+    if (
+      fs.existsSync(cacheFolder) &&
+      fs.existsSync(path.resolve(cacheFolder, "models.json"))
+    ) {
+      const models = safeJsonParse(
+        fs.readFileSync(path.resolve(cacheFolder, "models.json"))
+      );
+      const model = models.find((model) => model.id === modelName);
+      if (!model) return false;
+      return model.experimental;
+    }
+
+    return modelName.includes("exp") || v1BetaModels.includes(modelName);
+  }
+
  /**
   * Fetches Gemini models from the Google Generative AI API
   * @param {string} apiKey - The API key to use for the request
@ -186,63 +198,125 @@ class GeminiLLM {
      );
    }

-    const url = new URL(
-      "https://generativelanguage.googleapis.com/v1beta/models"
-    );
-    url.searchParams.set("pageSize", limit);
-    url.searchParams.set("key", apiKey);
-    if (pageToken) url.searchParams.set("pageToken", pageToken);
-    let success = false;
+    const stableModels = [];
+    const allModels = [];

-    const models = await fetch(url.toString(), {
-      method: "GET",
-      headers: { "Content-Type": "application/json" },
-    })
-      .then((res) => res.json())
-      .then((data) => {
-        if (data.error) throw new Error(data.error.message);
-        return data.models ?? [];
+    // Fetch from v1
+    try {
+      const url = new URL(
+        "https://generativelanguage.googleapis.com/v1/models"
+      );
+      url.searchParams.set("pageSize", limit);
+      url.searchParams.set("key", apiKey);
+      if (pageToken) url.searchParams.set("pageToken", pageToken);
+      await fetch(url.toString(), {
+        method: "GET",
+        headers: { "Content-Type": "application/json" },
      })
-      .then((models) => {
-        success = true;
-        return models
-          .filter(
-            (model) => !model.displayName.toLowerCase().includes("tuning")
-          )
-          .filter((model) =>
-            model.supportedGenerationMethods.includes("generateContent")
-          ) //  Only generateContent is supported
-          .map((model) => {
-            return {
-              id: model.name.split("/").pop(),
-              name: model.displayName,
-              contextWindow: model.inputTokenLimit,
-              experimental: model.name.includes("exp"),
-            };
-          });
-      })
-      .catch((e) => {
-        console.error(`Gemini:getGeminiModels`, e.message);
-        success = false;
-        return defaultGeminiModels;
-      });
-
-    if (success) {
-      console.log(
-        `\x1b[32m[GeminiLLM]\x1b[0m Writing cached models API response to disk.`
-      );
-      if (!fs.existsSync(cacheFolder))
-        fs.mkdirSync(cacheFolder, { recursive: true });
-      fs.writeFileSync(
-        path.resolve(cacheFolder, "models.json"),
-        JSON.stringify(models)
-      );
-      fs.writeFileSync(
-        path.resolve(cacheFolder, ".cached_at"),
-        new Date().getTime().toString()
-      );
+        .then((res) => res.json())
+        .then((data) => {
+          if (data.error) throw new Error(data.error.message);
+          return data.models ?? [];
+        })
+        .then((models) => {
+          return models
+            .filter(
+              (model) => !model.displayName?.toLowerCase()?.includes("tuning")
+            ) // remove tuning models
+            .filter(
+              (model) =>
+                !model.description?.toLowerCase()?.includes("deprecated")
+            ) // remove deprecated models (in comment)
+            .filter((model) =>
+              //  Only generateContent is supported
+              model.supportedGenerationMethods.includes("generateContent")
+            )
+            .map((model) => {
+              stableModels.push(model.name);
+              allModels.push({
+                id: model.name.split("/").pop(),
+                name: model.displayName,
+                contextWindow: model.inputTokenLimit,
+                experimental: false,
+              });
+            });
+        })
+        .catch((e) => {
+          console.error(`Gemini:getGeminiModelsV1`, e.message);
+          return;
+        });
+    } catch (e) {
+      console.error(`Gemini:getGeminiModelsV1`, e.message);
    }
-    return models;
+
+    // Fetch from v1beta
+    try {
+      const url = new URL(
+        "https://generativelanguage.googleapis.com/v1beta/models"
+      );
+      url.searchParams.set("pageSize", limit);
+      url.searchParams.set("key", apiKey);
+      if (pageToken) url.searchParams.set("pageToken", pageToken);
+      await fetch(url.toString(), {
+        method: "GET",
+        headers: { "Content-Type": "application/json" },
+      })
+        .then((res) => res.json())
+        .then((data) => {
+          if (data.error) throw new Error(data.error.message);
+          return data.models ?? [];
+        })
+        .then((models) => {
+          return models
+            .filter((model) => !stableModels.includes(model.name)) // remove stable models that are already in the v1 list
+            .filter(
+              (model) => !model.displayName?.toLowerCase()?.includes("tuning")
+            ) // remove tuning models
+            .filter(
+              (model) =>
+                !model.description?.toLowerCase()?.includes("deprecated")
+            ) // remove deprecated models (in comment)
+            .filter((model) =>
+              //  Only generateContent is supported
+              model.supportedGenerationMethods.includes("generateContent")
+            )
+            .map((model) => {
+              allModels.push({
+                id: model.name.split("/").pop(),
+                name: model.displayName,
+                contextWindow: model.inputTokenLimit,
+                experimental: true,
+              });
+            });
+        })
+        .catch((e) => {
+          console.error(`Gemini:getGeminiModelsV1beta`, e.message);
+          return;
+        });
+    } catch (e) {
+      console.error(`Gemini:getGeminiModelsV1beta`, e.message);
+    }
+
+    if (allModels.length === 0) {
+      console.error(`Gemini:getGeminiModels - No models found`);
+      return defaultGeminiModels;
+    }
+
+    console.log(
+      `\x1b[32m[GeminiLLM]\x1b[0m Writing cached models API response to disk.`
+    );
+    if (!fs.existsSync(cacheFolder))
+      fs.mkdirSync(cacheFolder, { recursive: true });
+    fs.writeFileSync(
+      path.resolve(cacheFolder, "models.json"),
+      JSON.stringify(allModels)
+    );
+    fs.writeFileSync(
+      path.resolve(cacheFolder, ".cached_at"),
+      new Date().getTime().toString()
+    );
+
+    return allModels;
  }

  /**
--- a/server/utils/AiProviders/gemini/syncStaticLists.mjs
+++ b/server/utils/AiProviders/gemini/syncStaticLists.mjs
@ -0,0 +1,79 @@
+/**
+ * This is a script that syncs the static lists of models from the Gemini API
+ * so that maintainers can keep the fallback lists up to date.
+ * 
+ * To run, cd into this directory and run:
+ * node syncStaticLists.mjs
+ */
+
+import fs from "fs";
+import path from "path";
+import dotenv from "dotenv";
+import { MODEL_MAP } from "../modelMap.js";
+
+dotenv.config({ path: `../../../.env.development` });
+const existingCachePath = path.resolve('../../../storage/models/gemini')
+
+// This will fetch all of the models from the Gemini API as well as post-process them
+// to remove any models that are deprecated or experimental.
+import { GeminiLLM } from "./index.js";
+
+if (fs.existsSync(existingCachePath)) {
+  console.log("Removing existing cache so we can fetch fresh models from Gemini endpoints...");
+  fs.rmSync(existingCachePath, { recursive: true, force: true });
+}
+
+const models = await GeminiLLM.fetchModels(process.env.GEMINI_API_KEY);
+
+function updateDefaultModelsFile(models) {
+  const stableModelKeys = models.filter((model) => !model.experimental).map((model) => model.id);
+  const v1BetaModelKeys = models.filter((model) => model.experimental).map((model) => model.id);
+
+  let defaultModelFileContents = fs.readFileSync(path.join("./defaultModels.js"), "utf8");
+
+  // Update the stable models between %STABLE_MODELS% and %EOC_STABLE_MODELS% comments
+  defaultModelFileContents = defaultModelFileContents.replace(
+    /%STABLE_MODELS%[\s\S]*?%EOC_STABLE_MODELS%/,
+    `%STABLE_MODELS% - updated ${new Date().toISOString()}\n"${stableModelKeys.join('",\n"')}",\n// %EOC_STABLE_MODELS%`
+  );
+
+  // Update the v1beta models between %V1BETA_MODELS% and %EOC_V1BETA_MODELS% comments
+  defaultModelFileContents = defaultModelFileContents.replace(
+    /%V1BETA_MODELS%[\s\S]*?%EOC_V1BETA_MODELS%/,
+    `%V1BETA_MODELS% - updated ${new Date().toISOString()}\n"${v1BetaModelKeys.join('",\n"')}",\n// %EOC_V1BETA_MODELS%`
+  );
+
+  fs.writeFileSync(path.join("./defaultModels.js"), defaultModelFileContents);
+  console.log("Updated defaultModels.js. Dont forget to `yarn lint` and commit!");
+}
+
+function updateModelMap(models) {
+  const existingModelMap = MODEL_MAP;
+  console.log('Updating modelMap.js `gemini` object...')
+  console.log(`Removed existing gemini object (${Object.keys(existingModelMap.gemini).length} models) from modelMap.js`);
+  existingModelMap.gemini = {};
+
+  for (const model of models) existingModelMap.gemini[model.id] = model.contextWindow;
+  console.log(`Updated modelMap.js 'gemini' object with ${Object.keys(existingModelMap.gemini).length} models from API`);
+
+  // Update the modelMap.js file
+  const contents = `/**
+ * The model name and context window for all know model windows
+ * that are available through providers which has discrete model options.
+ * This file is automatically generated by syncStaticLists.mjs
+ * and should not be edited manually.
+ * 
+ * Last updated: ${new Date().toISOString()}
+ */
+const MODEL_MAP = {
+  ${Object.entries(existingModelMap).map(([key, value]) => `${key}: ${JSON.stringify(value, null, 2)}`).join(',\n')}
+};
+
+module.exports = { MODEL_MAP };
+`;
+  fs.writeFileSync(path.resolve("../modelMap.js"), contents);
+  console.log('Updated modelMap.js `gemini` object. Dont forget to `yarn lint` and commit!');
+}
+
+updateDefaultModelsFile(models);
+updateModelMap(models);
--- a/server/utils/AiProviders/modelMap.js
+++ b/server/utils/AiProviders/modelMap.js
@ -1,46 +1,71 @@
 /**
 * The model name and context window for all know model windows
 * that are available through providers which has discrete model options.
+ * This file is automatically generated by syncStaticLists.mjs
+ * and should not be edited manually.
+ *
+ * Last updated: 2025-04-07T20:29:49.277Z
 */
 const MODEL_MAP = {
  anthropic: {
-    "claude-instant-1.2": 100_000,
-    "claude-2.0": 100_000,
-    "claude-2.1": 200_000,
-    "claude-3-haiku-20240307": 200_000,
-    "claude-3-sonnet-20240229": 200_000,
-    "claude-3-opus-20240229": 200_000,
-    "claude-3-opus-latest": 200_000,
-    "claude-3-5-haiku-latest": 200_000,
-    "claude-3-5-haiku-20241022": 200_000,
-    "claude-3-5-sonnet-latest": 200_000,
-    "claude-3-5-sonnet-20241022": 200_000,
-    "claude-3-5-sonnet-20240620": 200_000,
-    "claude-3-7-sonnet-20250219": 200_000,
-    "claude-3-7-sonnet-latest": 200_000,
+    "claude-instant-1.2": 100000,
+    "claude-2.0": 100000,
+    "claude-2.1": 200000,
+    "claude-3-haiku-20240307": 200000,
+    "claude-3-sonnet-20240229": 200000,
+    "claude-3-opus-20240229": 200000,
+    "claude-3-opus-latest": 200000,
+    "claude-3-5-haiku-latest": 200000,
+    "claude-3-5-haiku-20241022": 200000,
+    "claude-3-5-sonnet-latest": 200000,
+    "claude-3-5-sonnet-20241022": 200000,
+    "claude-3-5-sonnet-20240620": 200000,
+    "claude-3-7-sonnet-20250219": 200000,
+    "claude-3-7-sonnet-latest": 200000,
  },
  cohere: {
-    "command-r": 128_000,
-    "command-r-plus": 128_000,
-    command: 4_096,
-    "command-light": 4_096,
-    "command-nightly": 8_192,
-    "command-light-nightly": 8_192,
+    "command-r": 128000,
+    "command-r-plus": 128000,
+    command: 4096,
+    "command-light": 4096,
+    "command-nightly": 8192,
+    "command-light-nightly": 8192,
  },
  gemini: {
-    "gemini-pro": 30_720,
-    "gemini-1.0-pro": 30_720,
-    "gemini-1.5-flash-latest": 1_048_576,
-    "gemini-1.5-pro-latest": 2_097_152,
-    "gemini-1.5-pro-exp-0801": 2_097_152,
-    "gemini-1.5-pro-exp-0827": 2_097_152,
-    "gemini-1.5-flash-exp-0827": 1_048_576,
-    "gemini-1.5-flash-8b-exp-0827": 1_048_576,
-    "gemini-exp-1114": 32_767,
-    "gemini-exp-1121": 32_767,
-    "gemini-exp-1206": 32_767,
-    "learnlm-1.5-pro-experimental": 32_767,
-    "gemini-2.0-flash-exp": 1_048_576,
+    "gemini-1.5-pro-001": 2000000,
+    "gemini-1.5-pro-002": 2000000,
+    "gemini-1.5-pro": 2000000,
+    "gemini-1.5-flash-001": 1000000,
+    "gemini-1.5-flash": 1000000,
+    "gemini-1.5-flash-002": 1000000,
+    "gemini-1.5-flash-8b": 1000000,
+    "gemini-1.5-flash-8b-001": 1000000,
+    "gemini-2.0-flash": 1048576,
+    "gemini-2.0-flash-001": 1048576,
+    "gemini-2.0-flash-lite-001": 1048576,
+    "gemini-2.0-flash-lite": 1048576,
+    "gemini-1.5-pro-latest": 2000000,
+    "gemini-1.5-flash-latest": 1000000,
+    "gemini-1.5-flash-8b-latest": 1000000,
+    "gemini-1.5-flash-8b-exp-0827": 1000000,
+    "gemini-1.5-flash-8b-exp-0924": 1000000,
+    "gemini-2.5-pro-exp-03-25": 1048576,
+    "gemini-2.5-pro-preview-03-25": 1048576,
+    "gemini-2.0-flash-exp": 1048576,
+    "gemini-2.0-flash-exp-image-generation": 1048576,
+    "gemini-2.0-flash-lite-preview-02-05": 1048576,
+    "gemini-2.0-flash-lite-preview": 1048576,
+    "gemini-2.0-pro-exp": 1048576,
+    "gemini-2.0-pro-exp-02-05": 1048576,
+    "gemini-exp-1206": 1048576,
+    "gemini-2.0-flash-thinking-exp-01-21": 1048576,
+    "gemini-2.0-flash-thinking-exp": 1048576,
+    "gemini-2.0-flash-thinking-exp-1219": 1048576,
+    "learnlm-1.5-pro-experimental": 32767,
+    "gemma-3-1b-it": 32768,
+    "gemma-3-4b-it": 32768,
+    "gemma-3-12b-it": 32768,
+    "gemma-3-27b-it": 131072,
  },
  groq: {
    "gemma2-9b-it": 8192,
@ -52,32 +77,32 @@ const MODEL_MAP = {
    "mixtral-8x7b-32768": 32768,
  },
  openai: {
-    "gpt-3.5-turbo": 16_385,
-    "gpt-3.5-turbo-1106": 16_385,
-    "gpt-4o": 128_000,
-    "gpt-4o-2024-08-06": 128_000,
-    "gpt-4o-2024-05-13": 128_000,
-    "gpt-4o-mini": 128_000,
-    "gpt-4o-mini-2024-07-18": 128_000,
-    "gpt-4-turbo": 128_000,
-    "gpt-4-1106-preview": 128_000,
-    "gpt-4-turbo-preview": 128_000,
-    "gpt-4": 8_192,
-    "gpt-4-32k": 32_000,
-    "o1-preview": 128_000,
-    "o1-preview-2024-09-12": 128_000,
-    "o1-mini": 128_000,
-    "o1-mini-2024-09-12": 128_000,
-    "o3-mini": 200_000,
-    "o3-mini-2025-01-31": 200_000,
+    "gpt-3.5-turbo": 16385,
+    "gpt-3.5-turbo-1106": 16385,
+    "gpt-4o": 128000,
+    "gpt-4o-2024-08-06": 128000,
+    "gpt-4o-2024-05-13": 128000,
+    "gpt-4o-mini": 128000,
+    "gpt-4o-mini-2024-07-18": 128000,
+    "gpt-4-turbo": 128000,
+    "gpt-4-1106-preview": 128000,
+    "gpt-4-turbo-preview": 128000,
+    "gpt-4": 8192,
+    "gpt-4-32k": 32000,
+    "o1-preview": 128000,
+    "o1-preview-2024-09-12": 128000,
+    "o1-mini": 128000,
+    "o1-mini-2024-09-12": 128000,
+    "o3-mini": 200000,
+    "o3-mini-2025-01-31": 200000,
  },
  deepseek: {
-    "deepseek-chat": 128_000,
-    "deepseek-coder": 128_000,
-    "deepseek-reasoner": 128_000,
+    "deepseek-chat": 128000,
+    "deepseek-coder": 128000,
+    "deepseek-reasoner": 128000,
  },
  xai: {
-    "grok-beta": 131_072,
+    "grok-beta": 131072,
  },
 };