mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-04-17 18:18:11 +00:00
Add Support for NVIDIA NIM (#2766)
* Add Support for NVIDIA NIM * update README * linting
This commit is contained in:
parent
6c9e234227
commit
b2dd35fe15
23 changed files with 626 additions and 4 deletions
README.md
docker
frontend/src
components/LLMSelection/NvidiaNimOptions
hooks
media/llmprovider
pages
GeneralSettings/LLMPreference
OnboardingFlow/Steps
WorkspaceSettings/AgentConfig/AgentLLMSelection
utils
server
|
@ -78,6 +78,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace
|
|||
- [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service)
|
||||
- [AWS Bedrock](https://aws.amazon.com/bedrock/)
|
||||
- [Anthropic](https://www.anthropic.com/)
|
||||
- [NVIDIA NIM (chat models)](https://build.nvidia.com/explore/discover)
|
||||
- [Google Gemini Pro](https://ai.google.dev/)
|
||||
- [Hugging Face (chat models)](https://huggingface.co/)
|
||||
- [Ollama (chat models)](https://ollama.ai/)
|
||||
|
|
|
@ -117,6 +117,10 @@ GID='1000'
|
|||
# XAI_LLM_API_KEY='xai-your-api-key-here'
|
||||
# XAI_LLM_MODEL_PREF='grok-beta'
|
||||
|
||||
# LLM_PROVIDER='nvidia-nim'
|
||||
# NVIDIA_NIM_LLM_BASE_PATH='http://127.0.0.1:8000'
|
||||
# NVIDIA_NIM_LLM_MODEL_PREF='meta/llama-3.2-3b-instruct'
|
||||
|
||||
###########################################
|
||||
######## Embedding API SElECTION ##########
|
||||
###########################################
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
import RemoteNvidiaNimOptions from "./remote";
|
||||
import ManagedNvidiaNimOptions from "./managed";
|
||||
|
||||
export default function NvidiaNimOptions({ settings }) {
|
||||
const version = "remote"; // static to "remote" when in docker version.
|
||||
return version === "remote" ? (
|
||||
<RemoteNvidiaNimOptions settings={settings} />
|
||||
) : (
|
||||
<ManagedNvidiaNimOptions settings={settings} />
|
||||
);
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
/**
|
||||
* This component is used to select, start, and manage NVIDIA NIM
|
||||
* containers and images via docker management tools.
|
||||
*/
|
||||
export default function ManagedNvidiaNimOptions({ settings }) {
|
||||
return null;
|
||||
}
|
130
frontend/src/components/LLMSelection/NvidiaNimOptions/remote.jsx
Normal file
130
frontend/src/components/LLMSelection/NvidiaNimOptions/remote.jsx
Normal file
|
@ -0,0 +1,130 @@
|
|||
import PreLoader from "@/components/Preloader";
|
||||
import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
|
||||
import System from "@/models/system";
|
||||
import { NVIDIA_NIM_COMMON_URLS } from "@/utils/constants";
|
||||
import { useState, useEffect } from "react";
|
||||
|
||||
/**
|
||||
* This component is used to select a remote Nvidia NIM model endpoint
|
||||
* This is the default component and way to connect to NVIDIA NIM
|
||||
* as the "managed" provider can only work in the Desktop context.
|
||||
*/
|
||||
export default function RemoteNvidiaNimOptions({ settings }) {
|
||||
const {
|
||||
autoDetecting: loading,
|
||||
basePath,
|
||||
basePathValue,
|
||||
handleAutoDetectClick,
|
||||
} = useProviderEndpointAutoDiscovery({
|
||||
provider: "nvidia-nim",
|
||||
initialBasePath: settings?.NvidiaNimLLMBasePath,
|
||||
ENDPOINTS: NVIDIA_NIM_COMMON_URLS,
|
||||
});
|
||||
|
||||
return (
|
||||
<div className="flex gap-[36px] mt-1.5">
|
||||
<div className="flex flex-col w-60">
|
||||
<div className="flex justify-between items-center mb-2">
|
||||
<label className="text-white text-sm font-semibold">
|
||||
Nvidia Nim Base URL
|
||||
</label>
|
||||
{loading ? (
|
||||
<PreLoader size="6" />
|
||||
) : (
|
||||
<>
|
||||
{!basePathValue.value && (
|
||||
<button
|
||||
onClick={handleAutoDetectClick}
|
||||
className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]"
|
||||
>
|
||||
Auto-Detect
|
||||
</button>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
<input
|
||||
type="url"
|
||||
name="NvidiaNimLLMBasePath"
|
||||
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||
placeholder="http://localhost:8000/v1"
|
||||
value={basePathValue.value}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
onChange={basePath.onChange}
|
||||
onBlur={basePath.onBlur}
|
||||
/>
|
||||
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
||||
Enter the URL where Nvidia NIM is running.
|
||||
</p>
|
||||
</div>
|
||||
{!settings?.credentialsOnly && (
|
||||
<NvidiaNimModelSelection
|
||||
settings={settings}
|
||||
basePath={basePath.value}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
function NvidiaNimModelSelection({ settings, basePath }) {
|
||||
const [models, setModels] = useState([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
||||
useEffect(() => {
|
||||
async function findCustomModels() {
|
||||
setLoading(true);
|
||||
const { models } = await System.customModels(
|
||||
"nvidia-nim",
|
||||
null,
|
||||
basePath
|
||||
);
|
||||
setModels(models);
|
||||
setLoading(false);
|
||||
}
|
||||
findCustomModels();
|
||||
}, [basePath]);
|
||||
|
||||
if (loading || models.length === 0) {
|
||||
return (
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-3">
|
||||
Chat Model Selection
|
||||
</label>
|
||||
<select
|
||||
name="NvidiaNimLLMModelPref"
|
||||
disabled={true}
|
||||
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||
>
|
||||
<option disabled={true} selected={true}>
|
||||
-- loading available models --
|
||||
</option>
|
||||
</select>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-3">
|
||||
Chat Model Selection
|
||||
</label>
|
||||
<select
|
||||
name="NvidiaNimLLMModelPref"
|
||||
required={true}
|
||||
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||
>
|
||||
{models.map((model) => (
|
||||
<option
|
||||
key={model.id}
|
||||
value={model.id}
|
||||
selected={settings?.NvidiaNimLLMModelPref === model.id}
|
||||
>
|
||||
{model.name}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
);
|
||||
}
|
|
@ -43,6 +43,7 @@ const PROVIDER_DEFAULT_MODELS = {
|
|||
ollama: [],
|
||||
togetherai: [],
|
||||
fireworksai: [],
|
||||
"nvidia-nim": [],
|
||||
groq: [],
|
||||
native: [],
|
||||
cohere: [
|
||||
|
|
BIN
frontend/src/media/llmprovider/nvidia-nim.png
Normal file
BIN
frontend/src/media/llmprovider/nvidia-nim.png
Normal file
Binary file not shown.
After ![]() (image error) Size: 64 KiB |
|
@ -29,6 +29,7 @@ import AWSBedrockLogo from "@/media/llmprovider/bedrock.png";
|
|||
import DeepSeekLogo from "@/media/llmprovider/deepseek.png";
|
||||
import APIPieLogo from "@/media/llmprovider/apipie.png";
|
||||
import XAILogo from "@/media/llmprovider/xai.png";
|
||||
import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png";
|
||||
|
||||
import PreLoader from "@/components/Preloader";
|
||||
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
|
||||
|
@ -56,6 +57,7 @@ import AWSBedrockLLMOptions from "@/components/LLMSelection/AwsBedrockLLMOptions
|
|||
import DeepSeekOptions from "@/components/LLMSelection/DeepSeekOptions";
|
||||
import ApiPieLLMOptions from "@/components/LLMSelection/ApiPieOptions";
|
||||
import XAILLMOptions from "@/components/LLMSelection/XAiLLMOptions";
|
||||
import NvidiaNimOptions from "@/components/LLMSelection/NvidiaNimOptions";
|
||||
|
||||
import LLMItem from "@/components/LLMSelection/LLMItem";
|
||||
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
|
||||
|
@ -94,6 +96,15 @@ export const AVAILABLE_LLM_PROVIDERS = [
|
|||
description: "Google's largest and most capable AI model",
|
||||
requiredConfig: ["GeminiLLMApiKey"],
|
||||
},
|
||||
{
|
||||
name: "Nvidia NIM",
|
||||
value: "nvidia-nim",
|
||||
logo: NvidiaNimLogo,
|
||||
options: (settings) => <NvidiaNimOptions settings={settings} />,
|
||||
description:
|
||||
"Run full parameter LLMs directly on your GPU using Nvidia's inference microservice via Docker.",
|
||||
requiredConfig: ["NvidiaNimLLMBasePath"],
|
||||
},
|
||||
{
|
||||
name: "HuggingFace",
|
||||
value: "huggingface",
|
||||
|
|
|
@ -9,6 +9,7 @@ import GeminiLogo from "@/media/llmprovider/gemini.png";
|
|||
import OllamaLogo from "@/media/llmprovider/ollama.png";
|
||||
import TogetherAILogo from "@/media/llmprovider/togetherai.png";
|
||||
import FireworksAILogo from "@/media/llmprovider/fireworksai.jpeg";
|
||||
import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png";
|
||||
import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
|
||||
import LocalAiLogo from "@/media/llmprovider/localai.png";
|
||||
import MistralLogo from "@/media/llmprovider/mistral.jpeg";
|
||||
|
@ -76,6 +77,13 @@ export const LLM_SELECTION_PRIVACY = {
|
|||
],
|
||||
logo: GeminiLogo,
|
||||
},
|
||||
"nvidia-nim": {
|
||||
name: "Nvidia NIM",
|
||||
description: [
|
||||
"Your model and chats are only accessible on the machine running the Nvidia NIM service",
|
||||
],
|
||||
logo: NvidiaNimLogo,
|
||||
},
|
||||
lmstudio: {
|
||||
name: "LMStudio",
|
||||
description: [
|
||||
|
|
|
@ -24,7 +24,7 @@ import DeepSeekLogo from "@/media/llmprovider/deepseek.png";
|
|||
import APIPieLogo from "@/media/llmprovider/apipie.png";
|
||||
import NovitaLogo from "@/media/llmprovider/novita.png";
|
||||
import XAILogo from "@/media/llmprovider/xai.png";
|
||||
|
||||
import NvidiaNimLogo from "@/media/llmprovider/nvidia-nim.png";
|
||||
import CohereLogo from "@/media/llmprovider/cohere.png";
|
||||
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
|
||||
import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
|
||||
|
@ -51,6 +51,7 @@ import DeepSeekOptions from "@/components/LLMSelection/DeepSeekOptions";
|
|||
import ApiPieLLMOptions from "@/components/LLMSelection/ApiPieOptions";
|
||||
import NovitaLLMOptions from "@/components/LLMSelection/NovitaLLMOptions";
|
||||
import XAILLMOptions from "@/components/LLMSelection/XAiLLMOptions";
|
||||
import NvidiaNimOptions from "@/components/LLMSelection/NvidiaNimOptions";
|
||||
|
||||
import LLMItem from "@/components/LLMSelection/LLMItem";
|
||||
import System from "@/models/system";
|
||||
|
@ -91,6 +92,14 @@ const LLMS = [
|
|||
options: (settings) => <GeminiLLMOptions settings={settings} />,
|
||||
description: "Google's largest and most capable AI model",
|
||||
},
|
||||
{
|
||||
name: "Nvidia NIM",
|
||||
value: "nvidia-nim",
|
||||
logo: NvidiaNimLogo,
|
||||
options: (settings) => <NvidiaNimOptions settings={settings} />,
|
||||
description:
|
||||
"Run full parameter LLMs directly on your GPU using Nvidia's inference microservice via Docker.",
|
||||
},
|
||||
{
|
||||
name: "HuggingFace",
|
||||
value: "huggingface",
|
||||
|
|
|
@ -28,6 +28,7 @@ const ENABLED_PROVIDERS = [
|
|||
"litellm",
|
||||
"apipie",
|
||||
"xai",
|
||||
"nvidia-nim",
|
||||
// TODO: More agent support.
|
||||
// "cohere", // Has tool calling and will need to build explicit support
|
||||
// "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested.
|
||||
|
|
|
@ -37,6 +37,13 @@ export const LOCALAI_COMMON_URLS = [
|
|||
"http://172.17.0.1:8080/v1",
|
||||
];
|
||||
|
||||
export const NVIDIA_NIM_COMMON_URLS = [
|
||||
"http://127.0.0.1:8000/v1/version",
|
||||
"http://localhost:8000/v1/version",
|
||||
"http://host.docker.internal:8000/v1/version",
|
||||
"http://172.17.0.1:8000/v1/version",
|
||||
];
|
||||
|
||||
export function fullApiUrl() {
|
||||
if (API_BASE !== "/api") return API_BASE;
|
||||
return `${window.location.origin}/api`;
|
||||
|
|
|
@ -107,6 +107,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
|
|||
# XAI_LLM_API_KEY='xai-your-api-key-here'
|
||||
# XAI_LLM_MODEL_PREF='grok-beta'
|
||||
|
||||
# LLM_PROVIDER='nvidia-nim'
|
||||
# NVIDIA_NIM_LLM_BASE_PATH='http://127.0.0.1:8000'
|
||||
# NVIDIA_NIM_LLM_MODEL_PREF='meta/llama-3.2-3b-instruct'
|
||||
|
||||
###########################################
|
||||
######## Embedding API SElECTION ##########
|
||||
###########################################
|
||||
|
|
|
@ -547,6 +547,11 @@ const SystemSettings = {
|
|||
// xAI LLM API Keys
|
||||
XAIApiKey: !!process.env.XAI_LLM_API_KEY,
|
||||
XAIModelPref: process.env.XAI_LLM_MODEL_PREF,
|
||||
|
||||
// Nvidia NIM Keys
|
||||
NvidiaNimLLMBasePath: process.env.NVIDIA_NIM_LLM_BASE_PATH,
|
||||
NvidiaNimLLMModelPref: process.env.NVIDIA_NIM_LLM_MODEL_PREF,
|
||||
NvidiaNimLLMTokenLimit: process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT,
|
||||
};
|
||||
},
|
||||
|
||||
|
|
220
server/utils/AiProviders/nvidiaNim/index.js
Normal file
220
server/utils/AiProviders/nvidiaNim/index.js
Normal file
|
@ -0,0 +1,220 @@
|
|||
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
||||
const {
|
||||
handleDefaultStreamResponseV2,
|
||||
} = require("../../helpers/chat/responses");
|
||||
|
||||
class NvidiaNimLLM {
|
||||
constructor(embedder = null, modelPreference = null) {
|
||||
if (!process.env.NVIDIA_NIM_LLM_BASE_PATH)
|
||||
throw new Error("No Nvidia NIM API Base Path was set.");
|
||||
|
||||
const { OpenAI: OpenAIApi } = require("openai");
|
||||
this.nvidiaNim = new OpenAIApi({
|
||||
baseURL: parseNvidiaNimBasePath(process.env.NVIDIA_NIM_LLM_BASE_PATH),
|
||||
apiKey: null,
|
||||
});
|
||||
|
||||
this.model = modelPreference || process.env.NVIDIA_NIM_LLM_MODEL_PREF;
|
||||
this.limits = {
|
||||
history: this.promptWindowLimit() * 0.15,
|
||||
system: this.promptWindowLimit() * 0.15,
|
||||
user: this.promptWindowLimit() * 0.7,
|
||||
};
|
||||
|
||||
this.embedder = embedder ?? new NativeEmbedder();
|
||||
this.defaultTemp = 0.7;
|
||||
this.#log(
|
||||
`Loaded with model: ${this.model} with context window: ${this.promptWindowLimit()}`
|
||||
);
|
||||
}
|
||||
|
||||
#log(text, ...args) {
|
||||
console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
|
||||
}
|
||||
|
||||
#appendContext(contextTexts = []) {
|
||||
if (!contextTexts || !contextTexts.length) return "";
|
||||
return (
|
||||
"\nContext:\n" +
|
||||
contextTexts
|
||||
.map((text, i) => {
|
||||
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
|
||||
})
|
||||
.join("")
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the model token limit `NVIDIA_NIM_LLM_TOKEN_LIMIT` for the given model ID
|
||||
* @param {string} modelId
|
||||
* @param {string} basePath
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
static async setModelTokenLimit(modelId, basePath = null) {
|
||||
if (!modelId) return;
|
||||
const { OpenAI: OpenAIApi } = require("openai");
|
||||
const openai = new OpenAIApi({
|
||||
baseURL: parseNvidiaNimBasePath(
|
||||
basePath || process.env.NVIDIA_NIM_LLM_BASE_PATH
|
||||
),
|
||||
apiKey: null,
|
||||
});
|
||||
const model = await openai.models
|
||||
.list()
|
||||
.then((results) => results.data)
|
||||
.catch(() => {
|
||||
return [];
|
||||
});
|
||||
|
||||
if (!model.length) return;
|
||||
const modelInfo = model.find((model) => model.id === modelId);
|
||||
if (!modelInfo) return;
|
||||
process.env.NVIDIA_NIM_LLM_TOKEN_LIMIT = Number(
|
||||
modelInfo.max_model_len || 4096
|
||||
);
|
||||
}
|
||||
|
||||
streamingEnabled() {
|
||||
return "streamGetChatCompletion" in this;
|
||||
}
|
||||
|
||||
static promptWindowLimit(_modelName) {
|
||||
const limit = process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT || 4096;
|
||||
if (!limit || isNaN(Number(limit)))
|
||||
throw new Error("No Nvidia NIM token context limit was set.");
|
||||
return Number(limit);
|
||||
}
|
||||
|
||||
// Ensure the user set a value for the token limit
|
||||
// and if undefined - assume 4096 window.
|
||||
promptWindowLimit() {
|
||||
const limit = process.env.NVIDIA_NIM_LLM_MODEL_TOKEN_LIMIT || 4096;
|
||||
if (!limit || isNaN(Number(limit)))
|
||||
throw new Error("No Nvidia NIM token context limit was set.");
|
||||
return Number(limit);
|
||||
}
|
||||
|
||||
async isValidChatCompletionModel(_ = "") {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates appropriate content array for a message + attachments.
|
||||
* @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
|
||||
* @returns {string|object[]}
|
||||
*/
|
||||
#generateContent({ userPrompt, attachments = [] }) {
|
||||
if (!attachments.length) {
|
||||
return userPrompt;
|
||||
}
|
||||
|
||||
const content = [{ type: "text", text: userPrompt }];
|
||||
for (let attachment of attachments) {
|
||||
content.push({
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: attachment.contentString,
|
||||
detail: "auto",
|
||||
},
|
||||
});
|
||||
}
|
||||
return content.flat();
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct the user prompt for this model.
|
||||
* @param {{attachments: import("../../helpers").Attachment[]}} param0
|
||||
* @returns
|
||||
*/
|
||||
constructPrompt({
|
||||
systemPrompt = "",
|
||||
contextTexts = [],
|
||||
chatHistory = [],
|
||||
userPrompt = "",
|
||||
attachments = [],
|
||||
}) {
|
||||
const prompt = {
|
||||
role: "system",
|
||||
content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
|
||||
};
|
||||
return [
|
||||
prompt,
|
||||
...chatHistory,
|
||||
{
|
||||
role: "user",
|
||||
content: this.#generateContent({ userPrompt, attachments }),
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
async getChatCompletion(messages = null, { temperature = 0.7 }) {
|
||||
if (!this.model)
|
||||
throw new Error(
|
||||
`Nvidia NIM chat: ${this.model} is not valid or defined model for chat completion!`
|
||||
);
|
||||
|
||||
const result = await this.nvidiaNim.chat.completions.create({
|
||||
model: this.model,
|
||||
messages,
|
||||
temperature,
|
||||
});
|
||||
|
||||
if (!result.hasOwnProperty("choices") || result.choices.length === 0)
|
||||
return null;
|
||||
return result.choices[0].message.content;
|
||||
}
|
||||
|
||||
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
|
||||
if (!this.model)
|
||||
throw new Error(
|
||||
`Nvidia NIM chat: ${this.model} is not valid or defined model for chat completion!`
|
||||
);
|
||||
|
||||
const streamRequest = await this.nvidiaNim.chat.completions.create({
|
||||
model: this.model,
|
||||
stream: true,
|
||||
messages,
|
||||
temperature,
|
||||
});
|
||||
return streamRequest;
|
||||
}
|
||||
|
||||
handleStream(response, stream, responseProps) {
|
||||
return handleDefaultStreamResponseV2(response, stream, responseProps);
|
||||
}
|
||||
|
||||
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
||||
async embedTextInput(textInput) {
|
||||
return await this.embedder.embedTextInput(textInput);
|
||||
}
|
||||
async embedChunks(textChunks = []) {
|
||||
return await this.embedder.embedChunks(textChunks);
|
||||
}
|
||||
|
||||
async compressMessages(promptArgs = {}, rawHistory = []) {
|
||||
const { messageArrayCompressor } = require("../../helpers/chat");
|
||||
const messageArray = this.constructPrompt(promptArgs);
|
||||
return await messageArrayCompressor(this, messageArray, rawHistory);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the base path for the Nvidia NIM container API. Since the base path must end in /v1 and cannot have a trailing slash,
|
||||
* and the user can possibly set it to anything and likely incorrectly due to pasting behaviors, we need to ensure it is in the correct format.
|
||||
* @param {string} basePath
|
||||
* @returns {string}
|
||||
*/
|
||||
function parseNvidiaNimBasePath(providedBasePath = "") {
|
||||
try {
|
||||
const baseURL = new URL(providedBasePath);
|
||||
const basePath = `${baseURL.origin}/v1`;
|
||||
return basePath;
|
||||
} catch (e) {
|
||||
return providedBasePath;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
NvidiaNimLLM,
|
||||
parseNvidiaNimBasePath,
|
||||
};
|
|
@ -783,6 +783,8 @@ ${this.getHistory({ to: route.to })
|
|||
return new Providers.AWSBedrockProvider({});
|
||||
case "fireworksai":
|
||||
return new Providers.FireworksAIProvider({ model: config.model });
|
||||
case "nvidia-nim":
|
||||
return new Providers.NvidiaNimProvider({ model: config.model });
|
||||
case "deepseek":
|
||||
return new Providers.DeepSeekProvider({ model: config.model });
|
||||
case "litellm":
|
||||
|
|
|
@ -155,6 +155,14 @@ class Provider {
|
|||
apiKey: process.env.XAI_LLM_API_KEY ?? null,
|
||||
...config,
|
||||
});
|
||||
case "novita":
|
||||
return new ChatOpenAI({
|
||||
configuration: {
|
||||
baseURL: "https://api.novita.ai/v3/openai",
|
||||
},
|
||||
apiKey: process.env.NOVITA_LLM_API_KEY ?? null,
|
||||
...config,
|
||||
});
|
||||
|
||||
// OSS Model Runners
|
||||
// case "anythingllm_ollama":
|
||||
|
@ -207,12 +215,12 @@ class Provider {
|
|||
apiKey: process.env.LITE_LLM_API_KEY ?? null,
|
||||
...config,
|
||||
});
|
||||
case "novita":
|
||||
case "nvidia-nim":
|
||||
return new ChatOpenAI({
|
||||
configuration: {
|
||||
baseURL: "https://api.novita.ai/v3/openai",
|
||||
baseURL: process.env.NVIDIA_NIM_LLM_BASE_PATH,
|
||||
},
|
||||
apiKey: process.env.NOVITA_LLM_API_KEY ?? null,
|
||||
apiKey: null,
|
||||
...config,
|
||||
});
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ const LiteLLMProvider = require("./litellm.js");
|
|||
const ApiPieProvider = require("./apipie.js");
|
||||
const XAIProvider = require("./xai.js");
|
||||
const NovitaProvider = require("./novita.js");
|
||||
const NvidiaNimProvider = require("./nvidiaNim.js");
|
||||
|
||||
module.exports = {
|
||||
OpenAIProvider,
|
||||
|
@ -42,4 +43,5 @@ module.exports = {
|
|||
ApiPieProvider,
|
||||
XAIProvider,
|
||||
NovitaProvider,
|
||||
NvidiaNimProvider,
|
||||
};
|
||||
|
|
117
server/utils/agents/aibitat/providers/nvidiaNim.js
Normal file
117
server/utils/agents/aibitat/providers/nvidiaNim.js
Normal file
|
@ -0,0 +1,117 @@
|
|||
const OpenAI = require("openai");
|
||||
const Provider = require("./ai-provider.js");
|
||||
const InheritMultiple = require("./helpers/classes.js");
|
||||
const UnTooled = require("./helpers/untooled.js");
|
||||
|
||||
/**
|
||||
* The agent provider for the Nvidia NIM provider.
|
||||
* We wrap Nvidia NIM in UnTooled because its tool-calling may not be supported for specific models and this normalizes that.
|
||||
*/
|
||||
class NvidiaNimProvider extends InheritMultiple([Provider, UnTooled]) {
|
||||
model;
|
||||
|
||||
constructor(config = {}) {
|
||||
const { model } = config;
|
||||
super();
|
||||
const client = new OpenAI({
|
||||
baseURL: process.env.NVIDIA_NIM_LLM_BASE_PATH,
|
||||
apiKey: null,
|
||||
maxRetries: 0,
|
||||
});
|
||||
|
||||
this._client = client;
|
||||
this.model = model;
|
||||
this.verbose = true;
|
||||
}
|
||||
|
||||
get client() {
|
||||
return this._client;
|
||||
}
|
||||
|
||||
async #handleFunctionCallChat({ messages = [] }) {
|
||||
return await this.client.chat.completions
|
||||
.create({
|
||||
model: this.model,
|
||||
temperature: 0,
|
||||
messages,
|
||||
})
|
||||
.then((result) => {
|
||||
if (!result.hasOwnProperty("choices"))
|
||||
throw new Error("Nvidia NIM chat: No results!");
|
||||
if (result.choices.length === 0)
|
||||
throw new Error("Nvidia NIM chat: No results length!");
|
||||
return result.choices[0].message.content;
|
||||
})
|
||||
.catch((_) => {
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a completion based on the received messages.
|
||||
*
|
||||
* @param messages A list of messages to send to the API.
|
||||
* @param functions
|
||||
* @returns The completion.
|
||||
*/
|
||||
async complete(messages, functions = null) {
|
||||
try {
|
||||
let completion;
|
||||
if (functions.length > 0) {
|
||||
const { toolCall, text } = await this.functionCall(
|
||||
messages,
|
||||
functions,
|
||||
this.#handleFunctionCallChat.bind(this)
|
||||
);
|
||||
|
||||
if (toolCall !== null) {
|
||||
this.providerLog(`Valid tool call found - running ${toolCall.name}.`);
|
||||
this.deduplicator.trackRun(toolCall.name, toolCall.arguments);
|
||||
return {
|
||||
result: null,
|
||||
functionCall: {
|
||||
name: toolCall.name,
|
||||
arguments: toolCall.arguments,
|
||||
},
|
||||
cost: 0,
|
||||
};
|
||||
}
|
||||
completion = { content: text };
|
||||
}
|
||||
|
||||
if (!completion?.content) {
|
||||
this.providerLog(
|
||||
"Will assume chat completion without tool call inputs."
|
||||
);
|
||||
const response = await this.client.chat.completions.create({
|
||||
model: this.model,
|
||||
messages: this.cleanMsgs(messages),
|
||||
});
|
||||
completion = response.choices[0].message;
|
||||
}
|
||||
|
||||
// The UnTooled class inherited Deduplicator is mostly useful to prevent the agent
|
||||
// from calling the exact same function over and over in a loop within a single chat exchange
|
||||
// _but_ we should enable it to call previously used tools in a new chat interaction.
|
||||
this.deduplicator.reset("runs");
|
||||
return {
|
||||
result: completion.content,
|
||||
cost: 0,
|
||||
};
|
||||
} catch (error) {
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the cost of the completion.
|
||||
*
|
||||
* @param _usage The completion to get the cost for.
|
||||
* @returns The cost of the completion.
|
||||
*/
|
||||
getCost(_usage) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = NvidiaNimProvider;
|
|
@ -177,6 +177,12 @@ class AgentHandler {
|
|||
if (!process.env.NOVITA_LLM_API_KEY)
|
||||
throw new Error("Novita API Key must be provided to use agents.");
|
||||
break;
|
||||
case "nvidia-nim":
|
||||
if (!process.env.NVIDIA_NIM_LLM_BASE_PATH)
|
||||
throw new Error(
|
||||
"Nvidia NIM base path must be provided to use agents."
|
||||
);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Error(
|
||||
|
@ -240,6 +246,8 @@ class AgentHandler {
|
|||
return process.env.XAI_LLM_MODEL_PREF ?? "grok-beta";
|
||||
case "novita":
|
||||
return process.env.NOVITA_LLM_MODEL_PREF ?? "gryphe/mythomax-l2-13b";
|
||||
case "nvidia-nim":
|
||||
return process.env.NVIDIA_NIM_LLM_MODEL_PREF ?? null;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -6,6 +6,8 @@ const { fireworksAiModels } = require("../AiProviders/fireworksAi");
|
|||
const { ElevenLabsTTS } = require("../TextToSpeech/elevenLabs");
|
||||
const { fetchNovitaModels } = require("../AiProviders/novita");
|
||||
const { parseLMStudioBasePath } = require("../AiProviders/lmStudio");
|
||||
const { parseNvidiaNimBasePath } = require("../AiProviders/nvidiaNim");
|
||||
|
||||
const SUPPORT_CUSTOM_MODELS = [
|
||||
"openai",
|
||||
"localai",
|
||||
|
@ -13,6 +15,7 @@ const SUPPORT_CUSTOM_MODELS = [
|
|||
"native-llm",
|
||||
"togetherai",
|
||||
"fireworksai",
|
||||
"nvidia-nim",
|
||||
"mistral",
|
||||
"perplexity",
|
||||
"openrouter",
|
||||
|
@ -68,6 +71,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
|
|||
return await getNovitaModels();
|
||||
case "xai":
|
||||
return await getXAIModels(apiKey);
|
||||
case "nvidia-nim":
|
||||
return await getNvidiaNimModels(basePath);
|
||||
default:
|
||||
return { models: [], error: "Invalid provider for custom models" };
|
||||
}
|
||||
|
@ -520,6 +525,37 @@ async function getXAIModels(_apiKey = null) {
|
|||
return { models, error: null };
|
||||
}
|
||||
|
||||
async function getNvidiaNimModels(basePath = null) {
|
||||
try {
|
||||
const { OpenAI: OpenAIApi } = require("openai");
|
||||
const openai = new OpenAIApi({
|
||||
baseURL: parseNvidiaNimBasePath(
|
||||
basePath ?? process.env.NVIDIA_NIM_LLM_BASE_PATH
|
||||
),
|
||||
apiKey: null,
|
||||
});
|
||||
const modelResponse = await openai.models
|
||||
.list()
|
||||
.then((results) => results.data)
|
||||
.catch((e) => {
|
||||
throw new Error(e.message);
|
||||
});
|
||||
|
||||
const models = modelResponse.map((model) => {
|
||||
return {
|
||||
id: model.id,
|
||||
name: model.id,
|
||||
organization: model.owned_by,
|
||||
};
|
||||
});
|
||||
|
||||
return { models, error: null };
|
||||
} catch (e) {
|
||||
console.error(`Nvidia NIM:getNvidiaNimModels`, e.message);
|
||||
return { models: [], error: "Could not fetch Nvidia NIM Models" };
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getCustomModels,
|
||||
};
|
||||
|
|
|
@ -171,6 +171,9 @@ function getLLMProvider({ provider = null, model = null } = {}) {
|
|||
case "xai":
|
||||
const { XAiLLM } = require("../AiProviders/xai");
|
||||
return new XAiLLM(embedder, model);
|
||||
case "nvidia-nim":
|
||||
const { NvidiaNimLLM } = require("../AiProviders/nvidiaNim");
|
||||
return new NvidiaNimLLM(embedder, model);
|
||||
default:
|
||||
throw new Error(
|
||||
`ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}`
|
||||
|
@ -309,6 +312,9 @@ function getLLMProviderClass({ provider = null } = {}) {
|
|||
case "xai":
|
||||
const { XAiLLM } = require("../AiProviders/xai");
|
||||
return XAiLLM;
|
||||
case "nvidia-nim":
|
||||
const { NvidiaNimLLM } = require("../AiProviders/nvidiaNim");
|
||||
return NvidiaNimLLM;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -578,6 +578,29 @@ const KEY_MAPPING = {
|
|||
envKey: "XAI_LLM_MODEL_PREF",
|
||||
checks: [isNotEmpty],
|
||||
},
|
||||
|
||||
// Nvidia NIM Options
|
||||
NvidiaNimLLMBasePath: {
|
||||
envKey: "NVIDIA_NIM_LLM_BASE_PATH",
|
||||
checks: [isValidURL],
|
||||
postUpdate: [
|
||||
(_, __, nextValue) => {
|
||||
const { parseNvidiaNimBasePath } = require("../AiProviders/nvidiaNim");
|
||||
process.env.NVIDIA_NIM_LLM_BASE_PATH =
|
||||
parseNvidiaNimBasePath(nextValue);
|
||||
},
|
||||
],
|
||||
},
|
||||
NvidiaNimLLMModelPref: {
|
||||
envKey: "NVIDIA_NIM_LLM_MODEL_PREF",
|
||||
checks: [],
|
||||
postUpdate: [
|
||||
async (_, __, nextValue) => {
|
||||
const { NvidiaNimLLM } = require("../AiProviders/nvidiaNim");
|
||||
await NvidiaNimLLM.setModelTokenLimit(nextValue);
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
function isNotEmpty(input = "") {
|
||||
|
@ -684,6 +707,7 @@ function supportedLLM(input = "") {
|
|||
"deepseek",
|
||||
"apipie",
|
||||
"xai",
|
||||
"nvidia-nim",
|
||||
].includes(input);
|
||||
return validSelection ? null : `${input} is not a valid LLM provider.`;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue