Dynamic fetching of TogetherAI models (#3017)

* implement dynamic fetching of togetherai models * implement caching for togetherai models * update gitignore for togetherai model caching * Remove models.json from git tracking * Remove .cached_at from git tracking * lint * revert unneeded change --------- Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
2025-04-17 18:18:11 +00:00 · 2025-01-25 03:06:59 +08:00 · 2025-01-25 03:06:59 +08:00 · 48dcb22b25
commit 48dcb22b25
parent 273d116586
8 changed files with 133 additions and 818 deletions
--- a/frontend/src/components/LLMSelection/TogetherAiOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/TogetherAiOptions/index.jsx
@ -2,6 +2,9 @@ import System from "@/models/system";
 import { useState, useEffect } from "react";

 export default function TogetherAiOptions({ settings }) {
+  const [inputValue, setInputValue] = useState(settings?.TogetherAiApiKey);
+  const [apiKey, setApiKey] = useState(settings?.TogetherAiApiKey);
+
  return (
    <div className="flex gap-[36px] mt-1.5">
      <div className="flex flex-col w-60">
@ -17,37 +20,49 @@ export default function TogetherAiOptions({ settings }) {
          required={true}
          autoComplete="off"
          spellCheck={false}
+          onChange={(e) => setInputValue(e.target.value)}
+          onBlur={() => setApiKey(inputValue)}
        />
      </div>
      {!settings?.credentialsOnly && (
-        <TogetherAiModelSelection settings={settings} />
+        <TogetherAiModelSelection settings={settings} apiKey={apiKey} />
      )}
    </div>
  );
 }
-function TogetherAiModelSelection({ settings }) {
+
+function TogetherAiModelSelection({ settings, apiKey }) {
  const [groupedModels, setGroupedModels] = useState({});
  const [loading, setLoading] = useState(true);

  useEffect(() => {
    async function findCustomModels() {
      setLoading(true);
-      const { models } = await System.customModels("togetherai");
-
-      if (models?.length > 0) {
-        const modelsByOrganization = models.reduce((acc, model) => {
-          acc[model.organization] = acc[model.organization] || [];
-          acc[model.organization].push(model);
-          return acc;
-        }, {});
-
-        setGroupedModels(modelsByOrganization);
+      try {
+        const key = apiKey === "*".repeat(20) ? null : apiKey;
+        const { models } = await System.customModels("togetherai", key);
+        if (models?.length > 0) {
+          const modelsByOrganization = models.reduce((acc, model) => {
+            if (model.type !== "chat") return acc; // Only show chat models in dropdown
+            const org = model.organization || "Unknown";
+            acc[org] = acc[org] || [];
+            acc[org].push({
+              id: model.id,
+              name: model.name || model.id,
+              organization: org,
+              maxLength: model.maxLength,
+            });
+            return acc;
+          }, {});
+          setGroupedModels(modelsByOrganization);
+        }
+      } catch (error) {
+        console.error("Error fetching Together AI models:", error);
      }
-
      setLoading(false);
    }
    findCustomModels();
-  }, []);
+  }, [apiKey]);

  if (loading || Object.keys(groupedModels).length === 0) {
    return (
--- a/server/storage/models/.gitignore
+++ b/server/storage/models/.gitignore
@ -5,4 +5,5 @@ openrouter
 apipie
 novita
 mixedbread-ai*
-gemini
+gemini
+togetherAi
--- a/server/utils/AiProviders/togetherAi/index.js
+++ b/server/utils/AiProviders/togetherAi/index.js
@ -5,10 +5,76 @@ const {
 const {
  LLMPerformanceMonitor,
 } = require("../../helpers/chat/LLMPerformanceMonitor");
+const fs = require("fs");
+const path = require("path");
+const { safeJsonParse } = require("../../http");

-function togetherAiModels() {
-  const { MODELS } = require("./models.js");
-  return MODELS || {};
+const cacheFolder = path.resolve(
+  process.env.STORAGE_DIR
+    ? path.resolve(process.env.STORAGE_DIR, "models", "togetherAi")
+    : path.resolve(__dirname, `../../../storage/models/togetherAi`)
+);
+
+async function togetherAiModels(apiKey = null) {
+  const cacheModelPath = path.resolve(cacheFolder, "models.json");
+  const cacheAtPath = path.resolve(cacheFolder, ".cached_at");
+
+  // If cache exists and is less than 1 week old, use it
+  if (fs.existsSync(cacheModelPath) && fs.existsSync(cacheAtPath)) {
+    const now = Number(new Date());
+    const timestampMs = Number(fs.readFileSync(cacheAtPath));
+    if (now - timestampMs <= 6.048e8) {
+      // 1 Week in MS
+      return safeJsonParse(
+        fs.readFileSync(cacheModelPath, { encoding: "utf-8" }),
+        []
+      );
+    }
+  }
+
+  try {
+    const { OpenAI: OpenAIApi } = require("openai");
+    const openai = new OpenAIApi({
+      baseURL: "https://api.together.xyz/v1",
+      apiKey: apiKey || process.env.TOGETHER_AI_API_KEY || null,
+    });
+
+    const response = await openai.models.list();
+
+    // Filter and transform models into the expected format
+    // Only include chat models
+    const validModels = response.body
+      .filter((model) => ["chat"].includes(model.type))
+      .map((model) => ({
+        id: model.id,
+        name: model.display_name || model.id,
+        organization: model.organization || "Unknown",
+        type: model.type,
+        maxLength: model.context_length || 4096,
+      }));
+
+    // Cache the results
+    if (!fs.existsSync(cacheFolder))
+      fs.mkdirSync(cacheFolder, { recursive: true });
+    fs.writeFileSync(cacheModelPath, JSON.stringify(validModels), {
+      encoding: "utf-8",
+    });
+    fs.writeFileSync(cacheAtPath, String(Number(new Date())), {
+      encoding: "utf-8",
+    });
+
+    return validModels;
+  } catch (error) {
+    console.error("Error fetching Together AI models:", error);
+    // If cache exists but is stale, still use it as fallback
+    if (fs.existsSync(cacheModelPath)) {
+      return safeJsonParse(
+        fs.readFileSync(cacheModelPath, { encoding: "utf-8" }),
+        []
+      );
+    }
+    return [];
+  }
 }

 class TogetherAiLLM {
@ -60,29 +126,34 @@ class TogetherAiLLM {
    return content.flat();
  }

-  allModelInformation() {
-    return togetherAiModels();
+  async allModelInformation() {
+    const models = await togetherAiModels();
+    return models.reduce((acc, model) => {
+      acc[model.id] = model;
+      return acc;
+    }, {});
  }

  streamingEnabled() {
    return "streamGetChatCompletion" in this;
  }

-  static promptWindowLimit(modelName) {
-    const availableModels = togetherAiModels();
-    return availableModels[modelName]?.maxLength || 4096;
+  static async promptWindowLimit(modelName) {
+    const models = await togetherAiModels();
+    const model = models.find((m) => m.id === modelName);
+    return model?.maxLength || 4096;
  }

-  // Ensure the user set a value for the token limit
-  // and if undefined - assume 4096 window.
-  promptWindowLimit() {
-    const availableModels = this.allModelInformation();
-    return availableModels[this.model]?.maxLength || 4096;
+  async promptWindowLimit() {
+    const models = await togetherAiModels();
+    const model = models.find((m) => m.id === this.model);
+    return model?.maxLength || 4096;
  }

  async isValidChatCompletionModel(model = "") {
-    const availableModels = this.allModelInformation();
-    return availableModels.hasOwnProperty(model);
+    const models = await togetherAiModels();
+    const foundModel = models.find((m) => m.id === model);
+    return foundModel && foundModel.type === "chat";
  }

  constructPrompt({
--- a/server/utils/AiProviders/togetherAi/models.js
+++ b/server/utils/AiProviders/togetherAi/models.js
@ -1,622 +0,0 @@
-const MODELS = {
-  "zero-one-ai/Yi-34B-Chat": {
-    id: "zero-one-ai/Yi-34B-Chat",
-    organization: "01.AI",
-    name: "01-ai Yi Chat (34B)",
-    maxLength: 4096,
-  },
-  "allenai/OLMo-7B-Instruct": {
-    id: "allenai/OLMo-7B-Instruct",
-    organization: "AllenAI",
-    name: "OLMo Instruct (7B)",
-    maxLength: 2048,
-  },
-  "Austism/chronos-hermes-13b": {
-    id: "Austism/chronos-hermes-13b",
-    organization: "Austism",
-    name: "Chronos Hermes (13B)",
-    maxLength: 2048,
-  },
-  "carson/ml318br": {
-    id: "carson/ml318br",
-    organization: "carson",
-    name: "carson ml318br",
-    maxLength: 8192,
-  },
-  "cognitivecomputations/dolphin-2.5-mixtral-8x7b": {
-    id: "cognitivecomputations/dolphin-2.5-mixtral-8x7b",
-    organization: "cognitivecomputations",
-    name: "Dolphin 2.5 Mixtral 8x7b",
-    maxLength: 32768,
-  },
-  "databricks/dbrx-instruct": {
-    id: "databricks/dbrx-instruct",
-    organization: "Databricks",
-    name: "DBRX Instruct",
-    maxLength: 32768,
-  },
-  "deepseek-ai/deepseek-llm-67b-chat": {
-    id: "deepseek-ai/deepseek-llm-67b-chat",
-    organization: "DeepSeek",
-    name: "DeepSeek LLM Chat (67B)",
-    maxLength: 4096,
-  },
-  "deepseek-ai/deepseek-coder-33b-instruct": {
-    id: "deepseek-ai/deepseek-coder-33b-instruct",
-    organization: "DeepSeek",
-    name: "Deepseek Coder Instruct (33B)",
-    maxLength: 16384,
-  },
-  "garage-bAInd/Platypus2-70B-instruct": {
-    id: "garage-bAInd/Platypus2-70B-instruct",
-    organization: "garage-bAInd",
-    name: "Platypus2 Instruct (70B)",
-    maxLength: 4096,
-  },
-  "google/gemma-2-9b-it": {
-    id: "google/gemma-2-9b-it",
-    organization: "google",
-    name: "Gemma-2 Instruct (9B)",
-    maxLength: 8192,
-  },
-  "google/gemma-2b-it": {
-    id: "google/gemma-2b-it",
-    organization: "Google",
-    name: "Gemma Instruct (2B)",
-    maxLength: 8192,
-  },
-  "google/gemma-2-27b-it": {
-    id: "google/gemma-2-27b-it",
-    organization: "Google",
-    name: "Gemma-2 Instruct (27B)",
-    maxLength: 8192,
-  },
-  "google/gemma-7b-it": {
-    id: "google/gemma-7b-it",
-    organization: "Google",
-    name: "Gemma Instruct (7B)",
-    maxLength: 8192,
-  },
-  "gradientai/Llama-3-70B-Instruct-Gradient-1048k": {
-    id: "gradientai/Llama-3-70B-Instruct-Gradient-1048k",
-    organization: "gradientai",
-    name: "Llama-3 70B Instruct Gradient 1048K",
-    maxLength: 1048576,
-  },
-  "Gryphe/MythoMax-L2-13b": {
-    id: "Gryphe/MythoMax-L2-13b",
-    organization: "Gryphe",
-    name: "MythoMax-L2 (13B)",
-    maxLength: 4096,
-  },
-  "Gryphe/MythoMax-L2-13b-Lite": {
-    id: "Gryphe/MythoMax-L2-13b-Lite",
-    organization: "Gryphe",
-    name: "Gryphe MythoMax L2 Lite (13B)",
-    maxLength: 4096,
-  },
-  "llava-hf/llava-v1.6-mistral-7b-hf": {
-    id: "llava-hf/llava-v1.6-mistral-7b-hf",
-    organization: "Haotian Liu",
-    name: "LLaVa-Next (Mistral-7B)",
-    maxLength: 4096,
-  },
-  "HuggingFaceH4/zephyr-7b-beta": {
-    id: "HuggingFaceH4/zephyr-7b-beta",
-    organization: "HuggingFace",
-    name: "Zephyr-7B-ß",
-    maxLength: 32768,
-  },
-  "togethercomputer/Koala-7B": {
-    id: "togethercomputer/Koala-7B",
-    organization: "LM Sys",
-    name: "Koala (7B)",
-    maxLength: 2048,
-  },
-  "lmsys/vicuna-7b-v1.3": {
-    id: "lmsys/vicuna-7b-v1.3",
-    organization: "LM Sys",
-    name: "Vicuna v1.3 (7B)",
-    maxLength: 2048,
-  },
-  "lmsys/vicuna-13b-v1.5-16k": {
-    id: "lmsys/vicuna-13b-v1.5-16k",
-    organization: "LM Sys",
-    name: "Vicuna v1.5 16K (13B)",
-    maxLength: 16384,
-  },
-  "lmsys/vicuna-13b-v1.5": {
-    id: "lmsys/vicuna-13b-v1.5",
-    organization: "LM Sys",
-    name: "Vicuna v1.5 (13B)",
-    maxLength: 4096,
-  },
-  "lmsys/vicuna-13b-v1.3": {
-    id: "lmsys/vicuna-13b-v1.3",
-    organization: "LM Sys",
-    name: "Vicuna v1.3 (13B)",
-    maxLength: 2048,
-  },
-  "togethercomputer/Koala-13B": {
-    id: "togethercomputer/Koala-13B",
-    organization: "LM Sys",
-    name: "Koala (13B)",
-    maxLength: 2048,
-  },
-  "lmsys/vicuna-7b-v1.5": {
-    id: "lmsys/vicuna-7b-v1.5",
-    organization: "LM Sys",
-    name: "Vicuna v1.5 (7B)",
-    maxLength: 4096,
-  },
-  "codellama/CodeLlama-34b-Instruct-hf": {
-    id: "codellama/CodeLlama-34b-Instruct-hf",
-    organization: "Meta",
-    name: "Code Llama Instruct (34B)",
-    maxLength: 16384,
-  },
-  "togethercomputer/Llama-3-8b-chat-hf-int4": {
-    id: "togethercomputer/Llama-3-8b-chat-hf-int4",
-    organization: "Meta",
-    name: "Llama3 8B Chat HF INT4",
-    maxLength: 8192,
-  },
-  "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo": {
-    id: "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
-    organization: "Meta",
-    name: "Llama 3.2 90B Vision Instruct Turbo",
-    maxLength: 131072,
-  },
-  "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo": {
-    id: "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
-    organization: "Meta",
-    name: "Llama 3.2 11B Vision Instruct Turbo",
-    maxLength: 131072,
-  },
-  "meta-llama/Llama-3.2-3B-Instruct-Turbo": {
-    id: "meta-llama/Llama-3.2-3B-Instruct-Turbo",
-    organization: "Meta",
-    name: "Meta Llama 3.2 3B Instruct Turbo",
-    maxLength: 131072,
-  },
-  "togethercomputer/Llama-3-8b-chat-hf-int8": {
-    id: "togethercomputer/Llama-3-8b-chat-hf-int8",
-    organization: "Meta",
-    name: "Togethercomputer Llama3 8B Instruct Int8",
-    maxLength: 8192,
-  },
-  "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
-    id: "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
-    organization: "Meta",
-    name: "Meta Llama 3.1 70B Instruct Turbo",
-    maxLength: 32768,
-  },
-  "meta-llama/Llama-2-13b-chat-hf": {
-    id: "meta-llama/Llama-2-13b-chat-hf",
-    organization: "Meta",
-    name: "LLaMA-2 Chat (13B)",
-    maxLength: 4096,
-  },
-  "meta-llama/Meta-Llama-3-70B-Instruct-Lite": {
-    id: "meta-llama/Meta-Llama-3-70B-Instruct-Lite",
-    organization: "Meta",
-    name: "Meta Llama 3 70B Instruct Lite",
-    maxLength: 8192,
-  },
-  "meta-llama/Llama-3-8b-chat-hf": {
-    id: "meta-llama/Llama-3-8b-chat-hf",
-    organization: "Meta",
-    name: "Meta Llama 3 8B Instruct Reference",
-    maxLength: 8192,
-  },
-  "meta-llama/Llama-3-70b-chat-hf": {
-    id: "meta-llama/Llama-3-70b-chat-hf",
-    organization: "Meta",
-    name: "Meta Llama 3 70B Instruct Reference",
-    maxLength: 8192,
-  },
-  "meta-llama/Meta-Llama-3-8B-Instruct-Turbo": {
-    id: "meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
-    organization: "Meta",
-    name: "Meta Llama 3 8B Instruct Turbo",
-    maxLength: 8192,
-  },
-  "meta-llama/Meta-Llama-3-8B-Instruct-Lite": {
-    id: "meta-llama/Meta-Llama-3-8B-Instruct-Lite",
-    organization: "Meta",
-    name: "Meta Llama 3 8B Instruct Lite",
-    maxLength: 8192,
-  },
-  "meta-llama/Meta-Llama-3.1-405B-Instruct-Lite-Pro": {
-    id: "meta-llama/Meta-Llama-3.1-405B-Instruct-Lite-Pro",
-    organization: "Meta",
-    name: "Meta Llama 3.1 405B Instruct Turbo",
-    maxLength: 4096,
-  },
-  "meta-llama/Llama-2-7b-chat-hf": {
-    id: "meta-llama/Llama-2-7b-chat-hf",
-    organization: "Meta",
-    name: "LLaMA-2 Chat (7B)",
-    maxLength: 4096,
-  },
-  "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
-    id: "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
-    organization: "Meta",
-    name: "Meta Llama 3.1 405B Instruct Turbo",
-    maxLength: 130815,
-  },
-  "meta-llama/Llama-Vision-Free": {
-    id: "meta-llama/Llama-Vision-Free",
-    organization: "Meta",
-    name: "(Free) Llama 3.2 11B Vision Instruct Turbo",
-    maxLength: 131072,
-  },
-  "meta-llama/Meta-Llama-3-70B-Instruct-Turbo": {
-    id: "meta-llama/Meta-Llama-3-70B-Instruct-Turbo",
-    organization: "Meta",
-    name: "Meta Llama 3 70B Instruct Turbo",
-    maxLength: 8192,
-  },
-  "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
-    id: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-    organization: "Meta",
-    name: "Meta Llama 3.1 8B Instruct Turbo",
-    maxLength: 32768,
-  },
-  "togethercomputer/CodeLlama-7b-Instruct": {
-    id: "togethercomputer/CodeLlama-7b-Instruct",
-    organization: "Meta",
-    name: "Code Llama Instruct (7B)",
-    maxLength: 16384,
-  },
-  "togethercomputer/CodeLlama-34b-Instruct": {
-    id: "togethercomputer/CodeLlama-34b-Instruct",
-    organization: "Meta",
-    name: "Code Llama Instruct (34B)",
-    maxLength: 16384,
-  },
-  "codellama/CodeLlama-13b-Instruct-hf": {
-    id: "codellama/CodeLlama-13b-Instruct-hf",
-    organization: "Meta",
-    name: "Code Llama Instruct (13B)",
-    maxLength: 16384,
-  },
-  "togethercomputer/CodeLlama-13b-Instruct": {
-    id: "togethercomputer/CodeLlama-13b-Instruct",
-    organization: "Meta",
-    name: "Code Llama Instruct (13B)",
-    maxLength: 16384,
-  },
-  "togethercomputer/llama-2-13b-chat": {
-    id: "togethercomputer/llama-2-13b-chat",
-    organization: "Meta",
-    name: "LLaMA-2 Chat (13B)",
-    maxLength: 4096,
-  },
-  "togethercomputer/llama-2-7b-chat": {
-    id: "togethercomputer/llama-2-7b-chat",
-    organization: "Meta",
-    name: "LLaMA-2 Chat (7B)",
-    maxLength: 4096,
-  },
-  "meta-llama/Meta-Llama-3-8B-Instruct": {
-    id: "meta-llama/Meta-Llama-3-8B-Instruct",
-    organization: "Meta",
-    name: "Meta Llama 3 8B Instruct",
-    maxLength: 8192,
-  },
-  "meta-llama/Meta-Llama-3-70B-Instruct": {
-    id: "meta-llama/Meta-Llama-3-70B-Instruct",
-    organization: "Meta",
-    name: "Meta Llama 3 70B Instruct",
-    maxLength: 8192,
-  },
-  "codellama/CodeLlama-70b-Instruct-hf": {
-    id: "codellama/CodeLlama-70b-Instruct-hf",
-    organization: "Meta",
-    name: "Code Llama Instruct (70B)",
-    maxLength: 4096,
-  },
-  "togethercomputer/llama-2-70b-chat": {
-    id: "togethercomputer/llama-2-70b-chat",
-    organization: "Meta",
-    name: "LLaMA-2 Chat (70B)",
-    maxLength: 4096,
-  },
-  "codellama/CodeLlama-7b-Instruct-hf": {
-    id: "codellama/CodeLlama-7b-Instruct-hf",
-    organization: "Meta",
-    name: "Code Llama Instruct (7B)",
-    maxLength: 16384,
-  },
-  "meta-llama/Llama-2-70b-chat-hf": {
-    id: "meta-llama/Llama-2-70b-chat-hf",
-    organization: "Meta",
-    name: "LLaMA-2 Chat (70B)",
-    maxLength: 4096,
-  },
-  "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference": {
-    id: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-    organization: "Meta",
-    name: "Meta Llama 3.1 8B Instruct",
-    maxLength: 16384,
-  },
-  "albert/meta-llama-3-1-70b-instruct-turbo": {
-    id: "albert/meta-llama-3-1-70b-instruct-turbo",
-    organization: "Meta",
-    name: "Meta Llama 3.1 70B Instruct Turbo",
-    maxLength: 131072,
-  },
-  "meta-llama/Meta-Llama-3.1-70B-Instruct-Reference": {
-    id: "meta-llama/Meta-Llama-3.1-70B-Instruct-Reference",
-    organization: "Meta",
-    name: "Meta Llama 3.1 70B Instruct",
-    maxLength: 8192,
-  },
-  "microsoft/WizardLM-2-8x22B": {
-    id: "microsoft/WizardLM-2-8x22B",
-    organization: "microsoft",
-    name: "WizardLM-2 (8x22B)",
-    maxLength: 65536,
-  },
-  "mistralai/Mistral-7B-Instruct-v0.1": {
-    id: "mistralai/Mistral-7B-Instruct-v0.1",
-    organization: "mistralai",
-    name: "Mistral (7B) Instruct",
-    maxLength: 4096,
-  },
-  "mistralai/Mistral-7B-Instruct-v0.2": {
-    id: "mistralai/Mistral-7B-Instruct-v0.2",
-    organization: "mistralai",
-    name: "Mistral (7B) Instruct v0.2",
-    maxLength: 32768,
-  },
-  "mistralai/Mistral-7B-Instruct-v0.3": {
-    id: "mistralai/Mistral-7B-Instruct-v0.3",
-    organization: "mistralai",
-    name: "Mistral (7B) Instruct v0.3",
-    maxLength: 32768,
-  },
-  "mistralai/Mixtral-8x7B-Instruct-v0.1": {
-    id: "mistralai/Mixtral-8x7B-Instruct-v0.1",
-    organization: "mistralai",
-    name: "Mixtral-8x7B Instruct v0.1",
-    maxLength: 32768,
-  },
-  "mistralai/Mixtral-8x22B-Instruct-v0.1": {
-    id: "mistralai/Mixtral-8x22B-Instruct-v0.1",
-    organization: "mistralai",
-    name: "Mixtral-8x22B Instruct v0.1",
-    maxLength: 65536,
-  },
-  "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": {
-    id: "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
-    organization: "NousResearch",
-    name: "Nous Hermes 2 - Mixtral 8x7B-DPO",
-    maxLength: 32768,
-  },
-  "NousResearch/Nous-Hermes-Llama2-70b": {
-    id: "NousResearch/Nous-Hermes-Llama2-70b",
-    organization: "NousResearch",
-    name: "Nous Hermes LLaMA-2 (70B)",
-    maxLength: 4096,
-  },
-  "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT": {
-    id: "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT",
-    organization: "NousResearch",
-    name: "Nous Hermes 2 - Mixtral 8x7B-SFT",
-    maxLength: 32768,
-  },
-  "NousResearch/Nous-Hermes-Llama2-13b": {
-    id: "NousResearch/Nous-Hermes-Llama2-13b",
-    organization: "NousResearch",
-    name: "Nous Hermes Llama-2 (13B)",
-    maxLength: 4096,
-  },
-  "NousResearch/Nous-Hermes-2-Mistral-7B-DPO": {
-    id: "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
-    organization: "NousResearch",
-    name: "Nous Hermes 2 - Mistral DPO (7B)",
-    maxLength: 32768,
-  },
-  "NousResearch/Nous-Hermes-llama-2-7b": {
-    id: "NousResearch/Nous-Hermes-llama-2-7b",
-    organization: "NousResearch",
-    name: "Nous Hermes LLaMA-2 (7B)",
-    maxLength: 4096,
-  },
-  "NousResearch/Nous-Capybara-7B-V1p9": {
-    id: "NousResearch/Nous-Capybara-7B-V1p9",
-    organization: "NousResearch",
-    name: "Nous Capybara v1.9 (7B)",
-    maxLength: 8192,
-  },
-  "NousResearch/Hermes-2-Theta-Llama-3-70B": {
-    id: "NousResearch/Hermes-2-Theta-Llama-3-70B",
-    organization: "NousResearch",
-    name: "Hermes 2 Theta Llama-3 70B",
-    maxLength: 8192,
-  },
-  "openchat/openchat-3.5-1210": {
-    id: "openchat/openchat-3.5-1210",
-    organization: "OpenChat",
-    name: "OpenChat 3.5",
-    maxLength: 8192,
-  },
-  "Open-Orca/Mistral-7B-OpenOrca": {
-    id: "Open-Orca/Mistral-7B-OpenOrca",
-    organization: "OpenOrca",
-    name: "OpenOrca Mistral (7B) 8K",
-    maxLength: 8192,
-  },
-  "Qwen/Qwen2-72B-Instruct": {
-    id: "Qwen/Qwen2-72B-Instruct",
-    organization: "Qwen",
-    name: "Qwen 2 Instruct (72B)",
-    maxLength: 32768,
-  },
-  "Qwen/Qwen2.5-72B-Instruct-Turbo": {
-    id: "Qwen/Qwen2.5-72B-Instruct-Turbo",
-    organization: "Qwen",
-    name: "Qwen2.5 72B Instruct Turbo",
-    maxLength: 32768,
-  },
-  "Qwen/Qwen2.5-7B-Instruct-Turbo": {
-    id: "Qwen/Qwen2.5-7B-Instruct-Turbo",
-    organization: "Qwen",
-    name: "Qwen2.5 7B Instruct Turbo",
-    maxLength: 32768,
-  },
-  "Qwen/Qwen1.5-110B-Chat": {
-    id: "Qwen/Qwen1.5-110B-Chat",
-    organization: "Qwen",
-    name: "Qwen 1.5 Chat (110B)",
-    maxLength: 32768,
-  },
-  "Qwen/Qwen1.5-72B-Chat": {
-    id: "Qwen/Qwen1.5-72B-Chat",
-    organization: "Qwen",
-    name: "Qwen 1.5 Chat (72B)",
-    maxLength: 32768,
-  },
-  "Qwen/Qwen2-1.5B-Instruct": {
-    id: "Qwen/Qwen2-1.5B-Instruct",
-    organization: "Qwen",
-    name: "Qwen 2 Instruct (1.5B)",
-    maxLength: 32768,
-  },
-  "Qwen/Qwen2-7B-Instruct": {
-    id: "Qwen/Qwen2-7B-Instruct",
-    organization: "Qwen",
-    name: "Qwen 2 Instruct (7B)",
-    maxLength: 32768,
-  },
-  "Qwen/Qwen1.5-14B-Chat": {
-    id: "Qwen/Qwen1.5-14B-Chat",
-    organization: "Qwen",
-    name: "Qwen 1.5 Chat (14B)",
-    maxLength: 32768,
-  },
-  "Qwen/Qwen1.5-1.8B-Chat": {
-    id: "Qwen/Qwen1.5-1.8B-Chat",
-    organization: "Qwen",
-    name: "Qwen 1.5 Chat (1.8B)",
-    maxLength: 32768,
-  },
-  "Qwen/Qwen1.5-32B-Chat": {
-    id: "Qwen/Qwen1.5-32B-Chat",
-    organization: "Qwen",
-    name: "Qwen 1.5 Chat (32B)",
-    maxLength: 32768,
-  },
-  "Qwen/Qwen1.5-7B-Chat": {
-    id: "Qwen/Qwen1.5-7B-Chat",
-    organization: "Qwen",
-    name: "Qwen 1.5 Chat (7B)",
-    maxLength: 32768,
-  },
-  "Qwen/Qwen1.5-0.5B-Chat": {
-    id: "Qwen/Qwen1.5-0.5B-Chat",
-    organization: "Qwen",
-    name: "Qwen 1.5 Chat (0.5B)",
-    maxLength: 32768,
-  },
-  "Qwen/Qwen1.5-4B-Chat": {
-    id: "Qwen/Qwen1.5-4B-Chat",
-    organization: "Qwen",
-    name: "Qwen 1.5 Chat (4B)",
-    maxLength: 32768,
-  },
-  "snorkelai/Snorkel-Mistral-PairRM-DPO": {
-    id: "snorkelai/Snorkel-Mistral-PairRM-DPO",
-    organization: "Snorkel AI",
-    name: "Snorkel Mistral PairRM DPO (7B)",
-    maxLength: 32768,
-  },
-  "Snowflake/snowflake-arctic-instruct": {
-    id: "Snowflake/snowflake-arctic-instruct",
-    organization: "Snowflake",
-    name: "Snowflake Arctic Instruct",
-    maxLength: 4096,
-  },
-  "togethercomputer/alpaca-7b": {
-    id: "togethercomputer/alpaca-7b",
-    organization: "Stanford",
-    name: "Alpaca (7B)",
-    maxLength: 2048,
-  },
-  "teknium/OpenHermes-2-Mistral-7B": {
-    id: "teknium/OpenHermes-2-Mistral-7B",
-    organization: "teknium",
-    name: "OpenHermes-2-Mistral (7B)",
-    maxLength: 8192,
-  },
-  "teknium/OpenHermes-2p5-Mistral-7B": {
-    id: "teknium/OpenHermes-2p5-Mistral-7B",
-    organization: "teknium",
-    name: "OpenHermes-2.5-Mistral (7B)",
-    maxLength: 8192,
-  },
-  "test/test11": {
-    id: "test/test11",
-    organization: "test",
-    name: "Test 11",
-    maxLength: 4096,
-  },
-  "togethercomputer/guanaco-65b": {
-    id: "togethercomputer/guanaco-65b",
-    organization: "Tim Dettmers",
-    name: "Guanaco (65B)",
-    maxLength: 2048,
-  },
-  "togethercomputer/guanaco-13b": {
-    id: "togethercomputer/guanaco-13b",
-    organization: "Tim Dettmers",
-    name: "Guanaco (13B)",
-    maxLength: 2048,
-  },
-  "togethercomputer/guanaco-33b": {
-    id: "togethercomputer/guanaco-33b",
-    organization: "Tim Dettmers",
-    name: "Guanaco (33B)",
-    maxLength: 2048,
-  },
-  "togethercomputer/guanaco-7b": {
-    id: "togethercomputer/guanaco-7b",
-    organization: "Tim Dettmers",
-    name: "Guanaco (7B)",
-    maxLength: 2048,
-  },
-  "Undi95/ReMM-SLERP-L2-13B": {
-    id: "Undi95/ReMM-SLERP-L2-13B",
-    organization: "Undi95",
-    name: "ReMM SLERP L2 (13B)",
-    maxLength: 4096,
-  },
-  "Undi95/Toppy-M-7B": {
-    id: "Undi95/Toppy-M-7B",
-    organization: "Undi95",
-    name: "Toppy M (7B)",
-    maxLength: 4096,
-  },
-  "upstage/SOLAR-10.7B-Instruct-v1.0": {
-    id: "upstage/SOLAR-10.7B-Instruct-v1.0",
-    organization: "upstage",
-    name: "Upstage SOLAR Instruct v1 (11B)",
-    maxLength: 4096,
-  },
-  "togethercomputer/SOLAR-10.7B-Instruct-v1.0-int4": {
-    id: "togethercomputer/SOLAR-10.7B-Instruct-v1.0-int4",
-    organization: "upstage",
-    name: "Upstage SOLAR Instruct v1 (11B)-Int4",
-    maxLength: 4096,
-  },
-  "WizardLM/WizardLM-13B-V1.2": {
-    id: "WizardLM/WizardLM-13B-V1.2",
-    organization: "WizardLM",
-    name: "WizardLM v1.2 (13B)",
-    maxLength: 4096,
-  },
-};
-
-module.exports.MODELS = MODELS;
--- a/server/utils/AiProviders/togetherAi/scripts/.gitignore
+++ b/server/utils/AiProviders/togetherAi/scripts/.gitignore
@ -1 +0,0 @@
-*.json
--- a/server/utils/AiProviders/togetherAi/scripts/chat_models.txt
+++ b/server/utils/AiProviders/togetherAi/scripts/chat_models.txt
@ -1,108 +0,0 @@
-| Organization          | Model Name                               | API Model String                                 | Context length | Quantization |
-| :-------------------- | :--------------------------------------- | :----------------------------------------------- | :------------- | :----------- |
-| 01.AI                 | 01-ai Yi Chat (34B)                      | zero-one-ai/Yi-34B-Chat                          | 4096           | FP16         |
-| AllenAI               | OLMo Instruct (7B)                       | allenai/OLMo-7B-Instruct                         | 2048           | FP16         |
-| Austism               | Chronos Hermes (13B)                     | Austism/chronos-hermes-13b                       | 2048           | FP16         |
-| carson                | carson ml318br                           | carson/ml318br                                   | 8192           | FP16         |
-| cognitivecomputations | Dolphin 2.5 Mixtral 8x7b                 | cognitivecomputations/dolphin-2.5-mixtral-8x7b   | 32768          | FP16         |
-| Databricks            | DBRX Instruct                            | databricks/dbrx-instruct                         | 32768          | FP16         |
-| DeepSeek              | DeepSeek LLM Chat (67B)                  | deepseek-ai/deepseek-llm-67b-chat                | 4096           | FP16         |
-| DeepSeek              | Deepseek Coder Instruct (33B)            | deepseek-ai/deepseek-coder-33b-instruct          | 16384          | FP16         |
-| garage-bAInd          | Platypus2 Instruct (70B)                 | garage-bAInd/Platypus2-70B-instruct              | 4096           | FP16         |
-| google                | Gemma-2 Instruct (9B)                    | google/gemma-2-9b-it                             | 8192           | FP16         |
-| Google                | Gemma Instruct (2B)                      | google/gemma-2b-it                               | 8192           | FP16         |
-| Google                | Gemma-2 Instruct (27B)                   | google/gemma-2-27b-it                            | 8192           | FP16         |
-| Google                | Gemma Instruct (7B)                      | google/gemma-7b-it                               | 8192           | FP16         |
-| gradientai            | Llama-3 70B Instruct Gradient 1048K      | gradientai/Llama-3-70B-Instruct-Gradient-1048k   | 1048576        | FP16         |
-| Gryphe                | MythoMax-L2 (13B)                        | Gryphe/MythoMax-L2-13b                           | 4096           | FP16         |
-| Gryphe                | Gryphe MythoMax L2 Lite (13B)            | Gryphe/MythoMax-L2-13b-Lite                      | 4096           | FP16         |
-| Haotian Liu           | LLaVa-Next (Mistral-7B)                  | llava-hf/llava-v1.6-mistral-7b-hf                | 4096           | FP16         |
-| HuggingFace           | Zephyr-7B-ß                              | HuggingFaceH4/zephyr-7b-beta                     | 32768          | FP16         |
-| LM Sys                | Koala (7B)                               | togethercomputer/Koala-7B                        | 2048           | FP16         |
-| LM Sys                | Vicuna v1.3 (7B)                         | lmsys/vicuna-7b-v1.3                             | 2048           | FP16         |
-| LM Sys                | Vicuna v1.5 16K (13B)                    | lmsys/vicuna-13b-v1.5-16k                        | 16384          | FP16         |
-| LM Sys                | Vicuna v1.5 (13B)                        | lmsys/vicuna-13b-v1.5                            | 4096           | FP16         |
-| LM Sys                | Vicuna v1.3 (13B)                        | lmsys/vicuna-13b-v1.3                            | 2048           | FP16         |
-| LM Sys                | Koala (13B)                              | togethercomputer/Koala-13B                       | 2048           | FP16         |
-| LM Sys                | Vicuna v1.5 (7B)                         | lmsys/vicuna-7b-v1.5                             | 4096           | FP16         |
-| Meta                  | Code Llama Instruct (34B)                | codellama/CodeLlama-34b-Instruct-hf              | 16384          | FP16         |
-| Meta                  | Llama3 8B Chat HF INT4                   | togethercomputer/Llama-3-8b-chat-hf-int4         | 8192           | FP16         |
-| Meta                  | Meta Llama 3.2 90B Vision Instruct Turbo | meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo   | 131072         | FP16         |
-| Meta                  | Meta Llama 3.2 11B Vision Instruct Turbo | meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo   | 131072         | FP16         |
-| Meta                  | Meta Llama 3.2 3B Instruct Turbo         | meta-llama/Llama-3.2-3B-Instruct-Turbo           | 131072         | FP16         |
-| Meta                  | Togethercomputer Llama3 8B Instruct Int8 | togethercomputer/Llama-3-8b-chat-hf-int8         | 8192           | FP16         |
-| Meta                  | Meta Llama 3.1 70B Instruct Turbo        | meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo     | 32768          | FP8          |
-| Meta                  | LLaMA-2 Chat (13B)                       | meta-llama/Llama-2-13b-chat-hf                   | 4096           | FP16         |
-| Meta                  | Meta Llama 3 70B Instruct Lite           | meta-llama/Meta-Llama-3-70B-Instruct-Lite        | 8192           | INT4         |
-| Meta                  | Meta Llama 3 8B Instruct Reference       | meta-llama/Llama-3-8b-chat-hf                    | 8192           | FP16         |
-| Meta                  | Meta Llama 3 70B Instruct Reference      | meta-llama/Llama-3-70b-chat-hf                   | 8192           | FP16         |
-| Meta                  | Meta Llama 3 8B Instruct Turbo           | meta-llama/Meta-Llama-3-8B-Instruct-Turbo        | 8192           | FP8          |
-| Meta                  | Meta Llama 3 8B Instruct Lite            | meta-llama/Meta-Llama-3-8B-Instruct-Lite         | 8192           | INT4         |
-| Meta                  | Meta Llama 3.1 405B Instruct Turbo       | meta-llama/Meta-Llama-3.1-405B-Instruct-Lite-Pro | 4096           | FP16         |
-| Meta                  | LLaMA-2 Chat (7B)                        | meta-llama/Llama-2-7b-chat-hf                    | 4096           | FP16         |
-| Meta                  | Meta Llama 3.1 405B Instruct Turbo       | meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo    | 130815         | FP8          |
-| Meta                  | Meta Llama Vision Free                   | meta-llama/Llama-Vision-Free                     | 131072         | FP16         |
-| Meta                  | Meta Llama 3 70B Instruct Turbo          | meta-llama/Meta-Llama-3-70B-Instruct-Turbo       | 8192           | FP8          |
-| Meta                  | Meta Llama 3.1 8B Instruct Turbo         | meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo      | 32768          | FP8          |
-| Meta                  | Code Llama Instruct (7B)                 | togethercomputer/CodeLlama-7b-Instruct           | 16384          | FP16         |
-| Meta                  | Code Llama Instruct (34B)                | togethercomputer/CodeLlama-34b-Instruct          | 16384          | FP16         |
-| Meta                  | Code Llama Instruct (13B)                | codellama/CodeLlama-13b-Instruct-hf              | 16384          | FP16         |
-| Meta                  | Code Llama Instruct (13B)                | togethercomputer/CodeLlama-13b-Instruct          | 16384          | FP16         |
-| Meta                  | LLaMA-2 Chat (13B)                       | togethercomputer/llama-2-13b-chat                | 4096           | FP16         |
-| Meta                  | LLaMA-2 Chat (7B)                        | togethercomputer/llama-2-7b-chat                 | 4096           | FP16         |
-| Meta                  | Meta Llama 3 8B Instruct                 | meta-llama/Meta-Llama-3-8B-Instruct              | 8192           | FP16         |
-| Meta                  | Meta Llama 3 70B Instruct                | meta-llama/Meta-Llama-3-70B-Instruct             | 8192           | FP16         |
-| Meta                  | Code Llama Instruct (70B)                | codellama/CodeLlama-70b-Instruct-hf              | 4096           | FP16         |
-| Meta                  | LLaMA-2 Chat (70B)                       | togethercomputer/llama-2-70b-chat                | 4096           | FP16         |
-| Meta                  | Code Llama Instruct (7B)                 | codellama/CodeLlama-7b-Instruct-hf               | 16384          | FP16         |
-| Meta                  | LLaMA-2 Chat (70B)                       | meta-llama/Llama-2-70b-chat-hf                   | 4096           | FP16         |
-| Meta                  | Meta Llama 3.1 8B Instruct               | meta-llama/Meta-Llama-3.1-8B-Instruct-Reference  | 16384          | FP16         |
-| Meta                  | Meta Llama 3.1 70B Instruct Turbo        | albert/meta-llama-3-1-70b-instruct-turbo         | 131072         | FP16         |
-| Meta                  | Meta Llama 3.1 70B Instruct              | meta-llama/Meta-Llama-3.1-70B-Instruct-Reference | 8192           | FP16         |
-| Meta                  | (Free) Llama 3.2 11B Vision Instruct Turbo | meta-llama/Llama-Vision-Free                   | 131072         | FP16         |
-| Meta                  | Llama 3.2 11B Vision Instruct Turbo       | meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo | 131072         | FP16         |
-| Meta                  | Llama 3.2 90B Vision Instruct Turbo       | meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo | 131072         | FP16         |
-| microsoft             | WizardLM-2 (8x22B)                       | microsoft/WizardLM-2-8x22B                       | 65536          | FP16         |
-| mistralai             | Mistral (7B) Instruct                    | mistralai/Mistral-7B-Instruct-v0.1               | 4096           | FP16         |
-| mistralai             | Mistral (7B) Instruct v0.2               | mistralai/Mistral-7B-Instruct-v0.2               | 32768          | FP16         |
-| mistralai             | Mistral (7B) Instruct v0.3               | mistralai/Mistral-7B-Instruct-v0.3               | 32768          | FP16         |
-| mistralai             | Mixtral-8x7B Instruct v0.1               | mistralai/Mixtral-8x7B-Instruct-v0.1             | 32768          | FP16         |
-| mistralai             | Mixtral-8x22B Instruct v0.1              | mistralai/Mixtral-8x22B-Instruct-v0.1            | 65536          | FP16         |
-| NousResearch          | Nous Hermes 2 - Mixtral 8x7B-DPO         | NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO      | 32768          | FP16         |
-| NousResearch          | Nous Hermes LLaMA-2 (70B)                | NousResearch/Nous-Hermes-Llama2-70b              | 4096           | FP16         |
-| NousResearch          | Nous Hermes 2 - Mixtral 8x7B-SFT         | NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT      | 32768          | FP16         |
-| NousResearch          | Nous Hermes Llama-2 (13B)                | NousResearch/Nous-Hermes-Llama2-13b              | 4096           | FP16         |
-| NousResearch          | Nous Hermes 2 - Mistral DPO (7B)         | NousResearch/Nous-Hermes-2-Mistral-7B-DPO        | 32768          | FP16         |
-| NousResearch          | Nous Hermes LLaMA-2 (7B)                 | NousResearch/Nous-Hermes-llama-2-7b              | 4096           | FP16         |
-| NousResearch          | Nous Capybara v1.9 (7B)                  | NousResearch/Nous-Capybara-7B-V1p9               | 8192           | FP16         |
-| NousResearch          | Hermes 2 Theta Llama-3 70B               | NousResearch/Hermes-2-Theta-Llama-3-70B          | 8192           | FP16         |
-| OpenChat              | OpenChat 3.5                             | openchat/openchat-3.5-1210                       | 8192           | FP16         |
-| OpenOrca              | OpenOrca Mistral (7B) 8K                 | Open-Orca/Mistral-7B-OpenOrca                    | 8192           | FP16         |
-| Qwen                  | Qwen 2 Instruct (72B)                    | Qwen/Qwen2-72B-Instruct                          | 32768          | FP16         |
-| Qwen                  | Qwen2.5 72B Instruct Turbo               | Qwen/Qwen2.5-72B-Instruct-Turbo                  | 32768          | FP8          |
-| Qwen                  | Qwen2.5 7B Instruct Turbo                | Qwen/Qwen2.5-7B-Instruct-Turbo                   | 32768          | FP8          |
-| Qwen                  | Qwen 1.5 Chat (110B)                     | Qwen/Qwen1.5-110B-Chat                           | 32768          | FP16         |
-| Qwen                  | Qwen 1.5 Chat (72B)                      | Qwen/Qwen1.5-72B-Chat                            | 32768          | FP16         |
-| Qwen                  | Qwen 2 Instruct (1.5B)                   | Qwen/Qwen2-1.5B-Instruct                         | 32768          | FP16         |
-| Qwen                  | Qwen 2 Instruct (7B)                     | Qwen/Qwen2-7B-Instruct                           | 32768          | FP16         |
-| Qwen                  | Qwen 1.5 Chat (14B)                      | Qwen/Qwen1.5-14B-Chat                            | 32768          | FP16         |
-| Qwen                  | Qwen 1.5 Chat (1.8B)                     | Qwen/Qwen1.5-1.8B-Chat                           | 32768          | FP16         |
-| Qwen                  | Qwen 1.5 Chat (32B)                      | Qwen/Qwen1.5-32B-Chat                            | 32768          | FP16         |
-| Qwen                  | Qwen 1.5 Chat (7B)                       | Qwen/Qwen1.5-7B-Chat                             | 32768          | FP16         |
-| Qwen                  | Qwen 1.5 Chat (0.5B)                     | Qwen/Qwen1.5-0.5B-Chat                           | 32768          | FP16         |
-| Qwen                  | Qwen 1.5 Chat (4B)                       | Qwen/Qwen1.5-4B-Chat                             | 32768          | FP16         |
-| Snorkel AI            | Snorkel Mistral PairRM DPO (7B)          | snorkelai/Snorkel-Mistral-PairRM-DPO             | 32768          | FP16         |
-| Snowflake             | Snowflake Arctic Instruct                | Snowflake/snowflake-arctic-instruct              | 4096           | FP16         |
-| Stanford              | Alpaca (7B)                              | togethercomputer/alpaca-7b                       | 2048           | FP16         |
-| teknium               | OpenHermes-2-Mistral (7B)                | teknium/OpenHermes-2-Mistral-7B                  | 8192           | FP16         |
-| teknium               | OpenHermes-2.5-Mistral (7B)              | teknium/OpenHermes-2p5-Mistral-7B                | 8192           | FP16         |
-| test                  | Test 11                                  | test/test11                                      | 4096           | FP16         |
-| Tim Dettmers          | Guanaco (65B)                            | togethercomputer/guanaco-65b                     | 2048           | FP16         |
-| Tim Dettmers          | Guanaco (13B)                            | togethercomputer/guanaco-13b                     | 2048           | FP16         |
-| Tim Dettmers          | Guanaco (33B)                            | togethercomputer/guanaco-33b                     | 2048           | FP16         |
-| Tim Dettmers          | Guanaco (7B)                             | togethercomputer/guanaco-7b                      | 2048           | FP16         |
-| Undi95                | ReMM SLERP L2 (13B)                      | Undi95/ReMM-SLERP-L2-13B                         | 4096           | FP16         |
-| Undi95                | Toppy M (7B)                             | Undi95/Toppy-M-7B                                | 4096           | FP16         |
-| upstage               | Upstage SOLAR Instruct v1 (11B)          | upstage/SOLAR-10.7B-Instruct-v1.0                | 4096           | FP16         |
-| upstage               | Upstage SOLAR Instruct v1 (11B)-Int4     | togethercomputer/SOLAR-10.7B-Instruct-v1.0-int4  | 4096           | FP16         |
-| WizardLM              | WizardLM v1.2 (13B)                      | WizardLM/WizardLM-13B-V1.2                       | 4096           | FP16         |
--- a/server/utils/AiProviders/togetherAi/scripts/parse.mjs
+++ b/server/utils/AiProviders/togetherAi/scripts/parse.mjs
@ -1,43 +0,0 @@
-// Together AI does not provide a simple REST API to get models,
-// so we have a table which we copy from their documentation
-// https://docs.together.ai/edit/inference-models that we can
-// then parse and get all models from in a format that makes sense
-// Why this does not exist is so bizarre, but whatever.
-
-// To run, cd into this directory and run `node parse.mjs`
-// copy outputs into the export in ../models.js
-
-// Update the date below if you run this again because TogetherAI added new models.
-// Last Collected: Nov 20, 2024
-// Since last collection Together's docs are broken. I just copied the HTML table
-// and had claude3 convert to markdown and it works well enough.
-
-import fs from "fs";
-
-function parseChatModels() {
-  const fixed = {};
-  const tableString = fs.readFileSync("chat_models.txt", { encoding: "utf-8" });
-  const rows = tableString.split("\n").slice(2);
-
-  rows.forEach((row) => {
-    const [provider, name, id, maxLength] = row.split("|").slice(1, -1);
-    const data = {
-      provider: provider.trim(),
-      name: name.trim(),
-      id: id.trim(),
-      maxLength: Number(maxLength.trim()),
-    };
-
-    fixed[data.id] = {
-      id: data.id,
-      organization: data.provider,
-      name: data.name,
-      maxLength: data.maxLength,
-    };
-  });
-
-  fs.writeFileSync("chat_models.json", JSON.stringify(fixed, null, 2), "utf-8");
-  return fixed;
-}
-
-parseChatModels();
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@ -44,7 +44,7 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
    case "ollama":
      return await ollamaAIModels(basePath);
    case "togetherai":
-      return await getTogetherAiModels();
+      return await getTogetherAiModels(apiKey);
    case "fireworksai":
      return await getFireworksAiModels(apiKey);
    case "mistral":
@ -327,19 +327,21 @@ async function ollamaAIModels(basePath = null) {
  return { models, error: null };
 }

-async function getTogetherAiModels() {
-  const knownModels = togetherAiModels();
-  if (!Object.keys(knownModels).length === 0)
-    return { models: [], error: null };
-
-  const models = Object.values(knownModels).map((model) => {
-    return {
-      id: model.id,
-      organization: model.organization,
-      name: model.name,
-    };
-  });
-  return { models, error: null };
+async function getTogetherAiModels(apiKey = null) {
+  const _apiKey =
+    apiKey === true
+      ? process.env.TOGETHER_AI_API_KEY
+      : apiKey || process.env.TOGETHER_AI_API_KEY || null;
+  try {
+    const { togetherAiModels } = require("../AiProviders/togetherAi");
+    const models = await togetherAiModels(_apiKey);
+    if (models.length > 0 && !!_apiKey)
+      process.env.TOGETHER_AI_API_KEY = _apiKey;
+    return { models, error: null };
+  } catch (error) {
+    console.error("Error in getTogetherAiModels:", error);
+    return { models: [], error: "Failed to fetch Together AI models" };
+  }
 }

 async function getFireworksAiModels() {