Add Support for NVIDIA NIM (#2766)

* Add Support for NVIDIA NIM * update README * linting
2025-04-17 18:18:11 +00:00 · 2024-12-05 10:38:23 -08:00 · 2024-12-05 10:38:23 -08:00 · b2dd35fe15
commit b2dd35fe15
parent 6c9e234227
23 changed files with 626 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -78,6 +78,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace
 - [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service)
 - [AWS Bedrock](https://aws.amazon.com/bedrock/)
 - [Anthropic](https://www.anthropic.com/)
+- [NVIDIA NIM (chat models)](https://build.nvidia.com/explore/discover)
 - [Google Gemini Pro](https://ai.google.dev/)
 - [Hugging Face (chat models)](https://huggingface.co/)
 - [Ollama (chat models)](https://ollama.ai/)
--- a/docker/.env.example
+++ b/docker/.env.example
@ -117,6 +117,10 @@ GID='1000'
 # XAI_LLM_API_KEY='xai-your-api-key-here'
 # XAI_LLM_MODEL_PREF='grok-beta'

+# LLM_PROVIDER='nvidia-nim'
+# NVIDIA_NIM_LLM_BASE_PATH='http://127.0.0.1:8000'
+# NVIDIA_NIM_LLM_MODEL_PREF='meta/llama-3.2-3b-instruct'
+
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
--- a/frontend/src/components/LLMSelection/NvidiaNimOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/NvidiaNimOptions/index.jsx
@ -0,0 +1,11 @@
+import RemoteNvidiaNimOptions from "./remote";
+import ManagedNvidiaNimOptions from "./managed";
+
+export default function NvidiaNimOptions({ settings }) {
+  const version = "remote"; // static to "remote" when in docker version.
+  return version === "remote" ? (
+    <RemoteNvidiaNimOptions settings={settings} />
+  ) : (
+    <ManagedNvidiaNimOptions settings={settings} />
+  );
+}
--- a/frontend/src/components/LLMSelection/NvidiaNimOptions/managed.jsx
+++ b/frontend/src/components/LLMSelection/NvidiaNimOptions/managed.jsx
@ -0,0 +1,7 @@
+/**
+ * This component is used to select, start, and manage NVIDIA NIM
+ * containers and images via docker management tools.
+ */
+export default function ManagedNvidiaNimOptions({ settings }) {
+  return null;
+}
--- a/frontend/src/components/LLMSelection/NvidiaNimOptions/remote.jsx
+++ b/frontend/src/components/LLMSelection/NvidiaNimOptions/remote.jsx
@ -0,0 +1,130 @@
+import PreLoader from "@/components/Preloader";
+import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
+import System from "@/models/system";
+import { NVIDIA_NIM_COMMON_URLS } from "@/utils/constants";
+import { useState, useEffect } from "react";
+
+/**
+ * This component is used to select a remote Nvidia NIM model endpoint
+ * This is the default component and way to connect to NVIDIA NIM
+ * as the "managed" provider can only work in the Desktop context.
+ */
+export default function RemoteNvidiaNimOptions({ settings }) {
+  const {
+    autoDetecting: loading,
+    basePath,
+    basePathValue,
+    handleAutoDetectClick,
+  } = useProviderEndpointAutoDiscovery({
+    provider: "nvidia-nim",
+    initialBasePath: settings?.NvidiaNimLLMBasePath,
+    ENDPOINTS: NVIDIA_NIM_COMMON_URLS,
+  });
+
+  return (
+    <div className="flex gap-[36px] mt-1.5">
+      <div className="flex flex-col w-60">
+        <div className="flex justify-between items-center mb-2">
+          <label className="text-white text-sm font-semibold">
+            Nvidia Nim Base URL
+          </label>
+          {loading ? (
+            <PreLoader size="6" />
+          ) : (
+            <>
+              {!basePathValue.value && (
+                <button
+                  onClick={handleAutoDetectClick}
+                  className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]"
+                >
+                  Auto-Detect
+                </button>
+              )}
+            </>
+          )}
+        </div>
+        <input
+          type="url"
+          name="NvidiaNimLLMBasePath"
+          className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
+          placeholder="http://localhost:8000/v1"
+          value={basePathValue.value}
+          required={true}
+          autoComplete="off"
+          spellCheck={false}
+          onChange={basePath.onChange}
+          onBlur={basePath.onBlur}
+        />
+        <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
+          Enter the URL where Nvidia NIM is running.
+        </p>
+      </div>
+      {!settings?.credentialsOnly && (
+        <NvidiaNimModelSelection
+          settings={settings}
+          basePath={basePath.value}
+        />
+      )}
+    </div>
+  );
+}
+function NvidiaNimModelSelection({ settings, basePath }) {
+  const [models, setModels] = useState([]);
+  const [loading, setLoading] = useState(true);
+
+  useEffect(() => {
+    async function findCustomModels() {
+      setLoading(true);
+      const { models } = await System.customModels(
+        "nvidia-nim",
+        null,
+        basePath
+      );
+      setModels(models);
+      setLoading(false);
+    }
+    findCustomModels();
+  }, [basePath]);
+
+  if (loading || models.length === 0) {
+    return (
+      <div className="flex flex-col w-60">
+        <label className="text-white text-sm font-semibold block mb-3">
+          Chat Model Selection
+        </label>
+        <select
+          name="NvidiaNimLLMModelPref"
+          disabled={true}
+          className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+        >
+          <option disabled={true} selected={true}>
+            -- loading available models --
+          </option>
+        </select>
+      </div>
+    );
+  }
+
+  return (
+    <div className="flex flex-col w-60">
+      <label className="text-white text-sm font-semibold block mb-3">
+        Chat Model Selection
+      </label>
+      <select
+        name="NvidiaNimLLMModelPref"
+        required={true}
+        className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+      >
+        {models.map((model) => (
+          <option
+            key={model.id}
+            value={model.id}
+            selected={settings?.NvidiaNimLLMModelPref === model.id}
+          >
+            {model.name}
+          </option>
+        ))}
+      </select>
+    </div>
+  );
+}
--- a/frontend/src/hooks/useGetProvidersModels.js
+++ b/frontend/src/hooks/useGetProvidersModels.js
@ -43,6 +43,7 @@ const PROVIDER_DEFAULT_MODELS = {
  ollama: [],
  togetherai: [],
  fireworksai: [],
+  "nvidia-nim": [],
  groq: [],
  native: [],
  cohere: [
--- a/frontend/src/media/llmprovider/nvidia-nim.png
+++ b/frontend/src/media/llmprovider/nvidia-nim.png
--- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
@ -29,6 +29,7 @@ import AWSBedrockLogo from "@/media/llmprovider/bedrock.png";
 import DeepSeekLogo from "@/media/llmprovider/deepseek.png";
 import APIPieLogo from "@/media/llmprovider/apipie.png";
 import XAILogo from "@/media/llmprovider/xai.png";
+import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png";

 import PreLoader from "@/components/Preloader";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
@ -56,6 +57,7 @@ import AWSBedrockLLMOptions from "@/components/LLMSelection/AwsBedrockLLMOptions
 import DeepSeekOptions from "@/components/LLMSelection/DeepSeekOptions";
 import ApiPieLLMOptions from "@/components/LLMSelection/ApiPieOptions";
 import XAILLMOptions from "@/components/LLMSelection/XAiLLMOptions";
+import NvidiaNimOptions from "@/components/LLMSelection/NvidiaNimOptions";

 import LLMItem from "@/components/LLMSelection/LLMItem";
 import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
@ -94,6 +96,15 @@ export const AVAILABLE_LLM_PROVIDERS = [
    description: "Google's largest and most capable AI model",
    requiredConfig: ["GeminiLLMApiKey"],
  },
+  {
+    name: "Nvidia NIM",
+    value: "nvidia-nim",
+    logo: NvidiaNimLogo,
+    options: (settings) => <NvidiaNimOptions settings={settings} />,
+    description:
+      "Run full parameter LLMs directly on your GPU using Nvidia's inference microservice via Docker.",
+    requiredConfig: ["NvidiaNimLLMBasePath"],
+  },
  {
    name: "HuggingFace",
    value: "huggingface",
--- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
@ -9,6 +9,7 @@ import GeminiLogo from "@/media/llmprovider/gemini.png";
 import OllamaLogo from "@/media/llmprovider/ollama.png";
 import TogetherAILogo from "@/media/llmprovider/togetherai.png";
 import FireworksAILogo from "@/media/llmprovider/fireworksai.jpeg";
+import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png";
 import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
 import LocalAiLogo from "@/media/llmprovider/localai.png";
 import MistralLogo from "@/media/llmprovider/mistral.jpeg";
@ -76,6 +77,13 @@ export const LLM_SELECTION_PRIVACY = {
    ],
    logo: GeminiLogo,
  },
+  "nvidia-nim": {
+    name: "Nvidia NIM",
+    description: [
+      "Your model and chats are only accessible on the machine running the Nvidia NIM service",
+    ],
+    logo: NvidiaNimLogo,
+  },
  lmstudio: {
    name: "LMStudio",
    description: [
--- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
@ -24,7 +24,7 @@ import DeepSeekLogo from "@/media/llmprovider/deepseek.png";
 import APIPieLogo from "@/media/llmprovider/apipie.png";
 import NovitaLogo from "@/media/llmprovider/novita.png";
 import XAILogo from "@/media/llmprovider/xai.png";
-
+import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png";
 import CohereLogo from "@/media/llmprovider/cohere.png";
 import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
 import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
@ -51,6 +51,7 @@ import DeepSeekOptions from "@/components/LLMSelection/DeepSeekOptions";
 import ApiPieLLMOptions from "@/components/LLMSelection/ApiPieOptions";
 import NovitaLLMOptions from "@/components/LLMSelection/NovitaLLMOptions";
 import XAILLMOptions from "@/components/LLMSelection/XAiLLMOptions";
+import NvidiaNimOptions from "@/components/LLMSelection/NvidiaNimOptions";

 import LLMItem from "@/components/LLMSelection/LLMItem";
 import System from "@/models/system";
@ -91,6 +92,14 @@ const LLMS = [
    options: (settings) => <GeminiLLMOptions settings={settings} />,
    description: "Google's largest and most capable AI model",
  },
+  {
+    name: "Nvidia NIM",
+    value: "nvidia-nim",
+    logo: NvidiaNimLogo,
+    options: (settings) => <NvidiaNimOptions settings={settings} />,
+    description:
+      "Run full parameter LLMs directly on your GPU using Nvidia's inference microservice via Docker.",
+  },
  {
    name: "HuggingFace",
    value: "huggingface",
--- a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx
+++ b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx
@ -28,6 +28,7 @@ const ENABLED_PROVIDERS = [
  "litellm",
  "apipie",
  "xai",
+  "nvidia-nim",
  // TODO: More agent support.
  // "cohere",         // Has tool calling and will need to build explicit support
  // "huggingface"     // Can be done but already has issues with no-chat templated. Needs to be tested.
--- a/frontend/src/utils/constants.js
+++ b/frontend/src/utils/constants.js
@ -37,6 +37,13 @@ export const LOCALAI_COMMON_URLS = [
  "http://172.17.0.1:8080/v1",
 ];

+export const NVIDIA_NIM_COMMON_URLS = [
+  "http://127.0.0.1:8000/v1/version",
+  "http://localhost:8000/v1/version",
+  "http://host.docker.internal:8000/v1/version",
+  "http://172.17.0.1:8000/v1/version",
+];
+
 export function fullApiUrl() {
  if (API_BASE !== "/api") return API_BASE;
  return `${window.location.origin}/api`;
--- a/server/.env.example
+++ b/server/.env.example
@ -107,6 +107,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
 # XAI_LLM_API_KEY='xai-your-api-key-here'
 # XAI_LLM_MODEL_PREF='grok-beta'

+# LLM_PROVIDER='nvidia-nim'
+# NVIDIA_NIM_LLM_BASE_PATH='http://127.0.0.1:8000'
+# NVIDIA_NIM_LLM_MODEL_PREF='meta/llama-3.2-3b-instruct'
+
 ###########################################
 ######## Embedding API SElECTION ##########
 ###########################################
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -547,6 +547,11 @@ const SystemSettings = {
      // xAI LLM API Keys
      XAIApiKey: !!process.env.XAI_LLM_API_KEY,
      XAIModelPref: process.env.XAI_LLM_MODEL_PREF,
+
+      // Nvidia NIM Keys
+      NvidiaNimLLMBasePath: process.env.NVIDIA_NIM_LLM_BASE_PATH,
+      NvidiaNimLLMModelPref: process.env.NVIDIA_NIM_LLM_MODEL_PREF,
+      NvidiaNimLLMTokenLimit: process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT,
    };
  },

--- a/server/utils/AiProviders/nvidiaNim/index.js
+++ b/server/utils/AiProviders/nvidiaNim/index.js
@ -0,0 +1,220 @@
+const { NativeEmbedder } = require("../../EmbeddingEngines/native");
+const {
+  handleDefaultStreamResponseV2,
+} = require("../../helpers/chat/responses");
+
+class NvidiaNimLLM {
+  constructor(embedder = null, modelPreference = null) {
+    if (!process.env.NVIDIA_NIM_LLM_BASE_PATH)
+      throw new Error("No Nvidia NIM API Base Path was set.");
+
+    const { OpenAI: OpenAIApi } = require("openai");
+    this.nvidiaNim = new OpenAIApi({
+      baseURL: parseNvidiaNimBasePath(process.env.NVIDIA_NIM_LLM_BASE_PATH),
+      apiKey: null,
+    });
+
+    this.model = modelPreference || process.env.NVIDIA_NIM_LLM_MODEL_PREF;
+    this.limits = {
+      history: this.promptWindowLimit() * 0.15,
+      system: this.promptWindowLimit() * 0.15,
+      user: this.promptWindowLimit() * 0.7,
+    };
+
+    this.embedder = embedder ?? new NativeEmbedder();
+    this.defaultTemp = 0.7;
+    this.#log(
+      `Loaded with model: ${this.model} with context window: ${this.promptWindowLimit()}`
+    );
+  }
+
+  #log(text, ...args) {
+    console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
+  }
+
+  #appendContext(contextTexts = []) {
+    if (!contextTexts || !contextTexts.length) return "";
+    return (
+      "\nContext:\n" +
+      contextTexts
+        .map((text, i) => {
+          return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+        })
+        .join("")
+    );
+  }
+
+  /**
+   * Set the model token limit `NVIDIA_NIM_LLM_TOKEN_LIMIT` for the given model ID
+   * @param {string} modelId
+   * @param {string} basePath
+   * @returns {Promise<void>}
+   */
+  static async setModelTokenLimit(modelId, basePath = null) {
+    if (!modelId) return;
+    const { OpenAI: OpenAIApi } = require("openai");
+    const openai = new OpenAIApi({
+      baseURL: parseNvidiaNimBasePath(
+        basePath || process.env.NVIDIA_NIM_LLM_BASE_PATH
+      ),
+      apiKey: null,
+    });
+    const model = await openai.models
+      .list()
+      .then((results) => results.data)
+      .catch(() => {
+        return [];
+      });
+
+    if (!model.length) return;
+    const modelInfo = model.find((model) => model.id === modelId);
+    if (!modelInfo) return;
+    process.env.NVIDIA_NIM_LLM_TOKEN_LIMIT = Number(
+      modelInfo.max_model_len || 4096
+    );
+  }
+
+  streamingEnabled() {
+    return "streamGetChatCompletion" in this;
+  }
+
+  static promptWindowLimit(_modelName) {
+    const limit = process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT || 4096;
+    if (!limit || isNaN(Number(limit)))
+      throw new Error("No Nvidia NIM token context limit was set.");
+    return Number(limit);
+  }
+
+  // Ensure the user set a value for the token limit
+  // and if undefined - assume 4096 window.
+  promptWindowLimit() {
+    const limit = process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT || 4096;
+    if (!limit || isNaN(Number(limit)))
+      throw new Error("No Nvidia NIM token context limit was set.");
+    return Number(limit);
+  }
+
+  async isValidChatCompletionModel(_ = "") {
+    return true;
+  }
+
+  /**
+   * Generates appropriate content array for a message + attachments.
+   * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
+   * @returns {string|object[]}
+   */
+  #generateContent({ userPrompt, attachments = [] }) {
+    if (!attachments.length) {
+      return userPrompt;
+    }
+
+    const content = [{ type: "text", text: userPrompt }];
+    for (let attachment of attachments) {
+      content.push({
+        type: "image_url",
+        image_url: {
+          url: attachment.contentString,
+          detail: "auto",
+        },
+      });
+    }
+    return content.flat();
+  }
+
+  /**
+   * Construct the user prompt for this model.
+   * @param {{attachments: import("../../helpers").Attachment[]}} param0
+   * @returns
+   */
+  constructPrompt({
+    systemPrompt = "",
+    contextTexts = [],
+    chatHistory = [],
+    userPrompt = "",
+    attachments = [],
+  }) {
+    const prompt = {
+      role: "system",
+      content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
+    };
+    return [
+      prompt,
+      ...chatHistory,
+      {
+        role: "user",
+        content: this.#generateContent({ userPrompt, attachments }),
+      },
+    ];
+  }
+
+  async getChatCompletion(messages = null, { temperature = 0.7 }) {
+    if (!this.model)
+      throw new Error(
+        `Nvidia NIM chat: ${this.model} is not valid or defined model for chat completion!`
+      );
+
+    const result = await this.nvidiaNim.chat.completions.create({
+      model: this.model,
+      messages,
+      temperature,
+    });
+
+    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
+      return null;
+    return result.choices[0].message.content;
+  }
+
+  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
+    if (!this.model)
+      throw new Error(
+        `Nvidia NIM chat: ${this.model} is not valid or defined model for chat completion!`
+      );
+
+    const streamRequest = await this.nvidiaNim.chat.completions.create({
+      model: this.model,
+      stream: true,
+      messages,
+      temperature,
+    });
+    return streamRequest;
+  }
+
+  handleStream(response, stream, responseProps) {
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
+  }
+
+  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
+  async embedTextInput(textInput) {
+    return await this.embedder.embedTextInput(textInput);
+  }
+  async embedChunks(textChunks = []) {
+    return await this.embedder.embedChunks(textChunks);
+  }
+
+  async compressMessages(promptArgs = {}, rawHistory = []) {
+    const { messageArrayCompressor } = require("../../helpers/chat");
+    const messageArray = this.constructPrompt(promptArgs);
+    return await messageArrayCompressor(this, messageArray, rawHistory);
+  }
+}
+
+/**
+ * Parse the base path for the Nvidia NIM container API. Since the base path must end in /v1 and cannot have a trailing slash,
+ * and the user can possibly set it to anything and likely incorrectly due to pasting behaviors, we need to ensure it is in the correct format.
+ * @param {string} basePath
+ * @returns {string}
+ */
+function parseNvidiaNimBasePath(providedBasePath = "") {
+  try {
+    const baseURL = new URL(providedBasePath);
+    const basePath = `${baseURL.origin}/v1`;
+    return basePath;
+  } catch (e) {
+    return providedBasePath;
+  }
+}
+
+module.exports = {
+  NvidiaNimLLM,
+  parseNvidiaNimBasePath,
+};
--- a/server/utils/agents/aibitat/index.js
+++ b/server/utils/agents/aibitat/index.js
@ -783,6 +783,8 @@ ${this.getHistory({ to: route.to })
        return new Providers.AWSBedrockProvider({});
      case "fireworksai":
        return new Providers.FireworksAIProvider({ model: config.model });
+      case "nvidia-nim":
+        return new Providers.NvidiaNimProvider({ model: config.model });
      case "deepseek":
        return new Providers.DeepSeekProvider({ model: config.model });
      case "litellm":
--- a/server/utils/agents/aibitat/providers/ai-provider.js
+++ b/server/utils/agents/aibitat/providers/ai-provider.js
@ -155,6 +155,14 @@ class Provider {
          apiKey: process.env.XAI_LLM_API_KEY ?? null,
          ...config,
        });
+      case "novita":
+        return new ChatOpenAI({
+          configuration: {
+            baseURL: "https://api.novita.ai/v3/openai",
+          },
+          apiKey: process.env.NOVITA_LLM_API_KEY ?? null,
+          ...config,
+        });

      // OSS Model Runners
      // case "anythingllm_ollama":
@ -207,12 +215,12 @@ class Provider {
          apiKey: process.env.LITE_LLM_API_KEY ?? null,
          ...config,
        });
-      case "novita":
+      case "nvidia-nim":
        return new ChatOpenAI({
          configuration: {
-            baseURL: "https://api.novita.ai/v3/openai",
+            baseURL: process.env.NVIDIA_NIM_LLM_BASE_PATH,
          },
-          apiKey: process.env.NOVITA_LLM_API_KEY ?? null,
+          apiKey: null,
          ...config,
        });

--- a/server/utils/agents/aibitat/providers/index.js
+++ b/server/utils/agents/aibitat/providers/index.js
@ -19,6 +19,7 @@ const LiteLLMProvider = require("./litellm.js");
 const ApiPieProvider = require("./apipie.js");
 const XAIProvider = require("./xai.js");
 const NovitaProvider = require("./novita.js");
+const NvidiaNimProvider = require("./nvidiaNim.js");

 module.exports = {
  OpenAIProvider,
@ -42,4 +43,5 @@ module.exports = {
  ApiPieProvider,
  XAIProvider,
  NovitaProvider,
+  NvidiaNimProvider,
 };
--- a/server/utils/agents/aibitat/providers/nvidiaNim.js
+++ b/server/utils/agents/aibitat/providers/nvidiaNim.js
@ -0,0 +1,117 @@
+const OpenAI = require("openai");
+const Provider = require("./ai-provider.js");
+const InheritMultiple = require("./helpers/classes.js");
+const UnTooled = require("./helpers/untooled.js");
+
+/**
+ * The agent provider for the Nvidia NIM provider.
+ * We wrap Nvidia NIM in UnTooled because its tool-calling may not be supported for specific models and this normalizes that.
+ */
+class NvidiaNimProvider extends InheritMultiple([Provider, UnTooled]) {
+  model;
+
+  constructor(config = {}) {
+    const { model } = config;
+    super();
+    const client = new OpenAI({
+      baseURL: process.env.NVIDIA_NIM_LLM_BASE_PATH,
+      apiKey: null,
+      maxRetries: 0,
+    });
+
+    this._client = client;
+    this.model = model;
+    this.verbose = true;
+  }
+
+  get client() {
+    return this._client;
+  }
+
+  async #handleFunctionCallChat({ messages = [] }) {
+    return await this.client.chat.completions
+      .create({
+        model: this.model,
+        temperature: 0,
+        messages,
+      })
+      .then((result) => {
+        if (!result.hasOwnProperty("choices"))
+          throw new Error("Nvidia NIM chat: No results!");
+        if (result.choices.length === 0)
+          throw new Error("Nvidia NIM chat: No results length!");
+        return result.choices[0].message.content;
+      })
+      .catch((_) => {
+        return null;
+      });
+  }
+
+  /**
+   * Create a completion based on the received messages.
+   *
+   * @param messages A list of messages to send to the API.
+   * @param functions
+   * @returns The completion.
+   */
+  async complete(messages, functions = null) {
+    try {
+      let completion;
+      if (functions.length > 0) {
+        const { toolCall, text } = await this.functionCall(
+          messages,
+          functions,
+          this.#handleFunctionCallChat.bind(this)
+        );
+
+        if (toolCall !== null) {
+          this.providerLog(`Valid tool call found - running ${toolCall.name}.`);
+          this.deduplicator.trackRun(toolCall.name, toolCall.arguments);
+          return {
+            result: null,
+            functionCall: {
+              name: toolCall.name,
+              arguments: toolCall.arguments,
+            },
+            cost: 0,
+          };
+        }
+        completion = { content: text };
+      }
+
+      if (!completion?.content) {
+        this.providerLog(
+          "Will assume chat completion without tool call inputs."
+        );
+        const response = await this.client.chat.completions.create({
+          model: this.model,
+          messages: this.cleanMsgs(messages),
+        });
+        completion = response.choices[0].message;
+      }
+
+      // The UnTooled class inherited Deduplicator is mostly useful to prevent the agent
+      // from calling the exact same function over and over in a loop within a single chat exchange
+      // _but_ we should enable it to call previously used tools in a new chat interaction.
+      this.deduplicator.reset("runs");
+      return {
+        result: completion.content,
+        cost: 0,
+      };
+    } catch (error) {
+      throw error;
+    }
+  }
+
+  /**
+   * Get the cost of the completion.
+   *
+   * @param _usage The completion to get the cost for.
+   * @returns The cost of the completion.
+   */
+  getCost(_usage) {
+    return 0;
+  }
+}
+
+module.exports = NvidiaNimProvider;
--- a/server/utils/agents/index.js
+++ b/server/utils/agents/index.js
@ -177,6 +177,12 @@ class AgentHandler {
        if (!process.env.NOVITA_LLM_API_KEY)
          throw new Error("Novita API Key must be provided to use agents.");
        break;
+      case "nvidia-nim":
+        if (!process.env.NVIDIA_NIM_LLM_BASE_PATH)
+          throw new Error(
+            "Nvidia NIM base path must be provided to use agents."
+          );
+        break;

      default:
        throw new Error(
@ -240,6 +246,8 @@ class AgentHandler {
        return process.env.XAI_LLM_MODEL_PREF ?? "grok-beta";
      case "novita":
        return process.env.NOVITA_LLM_MODEL_PREF ?? "gryphe/mythomax-l2-13b";
+      case "nvidia-nim":
+        return process.env.NVIDIA_NIM_LLM_MODEL_PREF ?? null;
      default:
        return null;
    }
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@ -6,6 +6,8 @@ const { fireworksAiModels } = require("../AiProviders/fireworksAi");
 const { ElevenLabsTTS } = require("../TextToSpeech/elevenLabs");
 const { fetchNovitaModels } = require("../AiProviders/novita");
 const { parseLMStudioBasePath } = require("../AiProviders/lmStudio");
+const { parseNvidiaNimBasePath } = require("../AiProviders/nvidiaNim");
+
 const SUPPORT_CUSTOM_MODELS = [
  "openai",
  "localai",
@ -13,6 +15,7 @@ const SUPPORT_CUSTOM_MODELS = [
  "native-llm",
  "togetherai",
  "fireworksai",
+  "nvidia-nim",
  "mistral",
  "perplexity",
  "openrouter",
@ -68,6 +71,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
      return await getNovitaModels();
    case "xai":
      return await getXAIModels(apiKey);
+    case "nvidia-nim":
+      return await getNvidiaNimModels(basePath);
    default:
      return { models: [], error: "Invalid provider for custom models" };
  }
@ -520,6 +525,37 @@ async function getXAIModels(_apiKey = null) {
  return { models, error: null };
 }

+async function getNvidiaNimModels(basePath = null) {
+  try {
+    const { OpenAI: OpenAIApi } = require("openai");
+    const openai = new OpenAIApi({
+      baseURL: parseNvidiaNimBasePath(
+        basePath ?? process.env.NVIDIA_NIM_LLM_BASE_PATH
+      ),
+      apiKey: null,
+    });
+    const modelResponse = await openai.models
+      .list()
+      .then((results) => results.data)
+      .catch((e) => {
+        throw new Error(e.message);
+      });
+
+    const models = modelResponse.map((model) => {
+      return {
+        id: model.id,
+        name: model.id,
+        organization: model.owned_by,
+      };
+    });
+
+    return { models, error: null };
+  } catch (e) {
+    console.error(`Nvidia NIM:getNvidiaNimModels`, e.message);
+    return { models: [], error: "Could not fetch Nvidia NIM Models" };
+  }
+}
+
 module.exports = {
  getCustomModels,
 };
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@ -171,6 +171,9 @@ function getLLMProvider({ provider = null, model = null } = {}) {
    case "xai":
      const { XAiLLM } = require("../AiProviders/xai");
      return new XAiLLM(embedder, model);
+    case "nvidia-nim":
+      const { NvidiaNimLLM } = require("../AiProviders/nvidiaNim");
+      return new NvidiaNimLLM(embedder, model);
    default:
      throw new Error(
        `ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}`
@ -309,6 +312,9 @@ function getLLMProviderClass({ provider = null } = {}) {
    case "xai":
      const { XAiLLM } = require("../AiProviders/xai");
      return XAiLLM;
+    case "nvidia-nim":
+      const { NvidiaNimLLM } = require("../AiProviders/nvidiaNim");
+      return NvidiaNimLLM;
    default:
      return null;
  }
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -578,6 +578,29 @@ const KEY_MAPPING = {
    envKey: "XAI_LLM_MODEL_PREF",
    checks: [isNotEmpty],
  },
+
+  // Nvidia NIM Options
+  NvidiaNimLLMBasePath: {
+    envKey: "NVIDIA_NIM_LLM_BASE_PATH",
+    checks: [isValidURL],
+    postUpdate: [
+      (_, __, nextValue) => {
+        const { parseNvidiaNimBasePath } = require("../AiProviders/nvidiaNim");
+        process.env.NVIDIA_NIM_LLM_BASE_PATH =
+          parseNvidiaNimBasePath(nextValue);
+      },
+    ],
+  },
+  NvidiaNimLLMModelPref: {
+    envKey: "NVIDIA_NIM_LLM_MODEL_PREF",
+    checks: [],
+    postUpdate: [
+      async (_, __, nextValue) => {
+        const { NvidiaNimLLM } = require("../AiProviders/nvidiaNim");
+        await NvidiaNimLLM.setModelTokenLimit(nextValue);
+      },
+    ],
+  },
 };

 function isNotEmpty(input = "") {
@ -684,6 +707,7 @@ function supportedLLM(input = "") {
    "deepseek",
    "apipie",
    "xai",
+    "nvidia-nim",
  ].includes(input);
  return validSelection ? null : `${input} is not a valid LLM provider.`;
 }