mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-03-14 06:02:22 +00:00
Ollama performance mode option (#2014)
* ollama performance mode option * Change ENV prop Move perf setting to advanced --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
parent
8cfe855bc8
commit
7273c892a1
4 changed files with 62 additions and 8 deletions
frontend/src/components/LLMSelection/OllamaLLMOptions
server
|
@ -2,8 +2,9 @@ import React, { useEffect, useState } from "react";
|
|||
import System from "@/models/system";
|
||||
import PreLoader from "@/components/Preloader";
|
||||
import { OLLAMA_COMMON_URLS } from "@/utils/constants";
|
||||
import { CaretDown, CaretUp } from "@phosphor-icons/react";
|
||||
import { CaretDown, CaretUp, Info } from "@phosphor-icons/react";
|
||||
import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
|
||||
import { Tooltip } from "react-tooltip";
|
||||
|
||||
export default function OllamaLLMOptions({ settings }) {
|
||||
const {
|
||||
|
@ -18,15 +19,13 @@ export default function OllamaLLMOptions({ settings }) {
|
|||
initialBasePath: settings?.OllamaLLMBasePath,
|
||||
ENDPOINTS: OLLAMA_COMMON_URLS,
|
||||
});
|
||||
|
||||
const [performanceMode, setPerformanceMode] = useState(
|
||||
settings?.OllamaLLMPerformanceMode || "base"
|
||||
);
|
||||
const [maxTokens, setMaxTokens] = useState(
|
||||
settings?.OllamaLLMTokenLimit || 4096
|
||||
);
|
||||
|
||||
const handleMaxTokensChange = (e) => {
|
||||
setMaxTokens(Number(e.target.value));
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="w-full flex flex-col gap-y-7">
|
||||
<div className="w-full flex items-start gap-[36px] mt-1.5">
|
||||
|
@ -46,7 +45,7 @@ export default function OllamaLLMOptions({ settings }) {
|
|||
defaultChecked="4096"
|
||||
min={1}
|
||||
value={maxTokens}
|
||||
onChange={handleMaxTokensChange}
|
||||
onChange={(e) => setMaxTokens(Number(e.target.value))}
|
||||
onScroll={(e) => e.target.blur()}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
|
@ -64,7 +63,7 @@ export default function OllamaLLMOptions({ settings }) {
|
|||
}}
|
||||
className="text-white hover:text-white/70 flex items-center text-sm"
|
||||
>
|
||||
{showAdvancedControls ? "Hide" : "Show"} Manual Endpoint Input
|
||||
{showAdvancedControls ? "Hide" : "Show"} advanced settings
|
||||
{showAdvancedControls ? (
|
||||
<CaretUp size={14} className="ml-1" />
|
||||
) : (
|
||||
|
@ -134,12 +133,57 @@ export default function OllamaLLMOptions({ settings }) {
|
|||
className="underline text-blue-300"
|
||||
href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
|
||||
target="_blank"
|
||||
rel="noreferrer"
|
||||
>
|
||||
{" "}
|
||||
Learn more →
|
||||
</a>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold mb-2 flex items-center">
|
||||
Performance Mode
|
||||
<Info
|
||||
size={16}
|
||||
className="ml-2 text-white"
|
||||
data-tooltip-id="performance-mode-tooltip"
|
||||
/>
|
||||
</label>
|
||||
<select
|
||||
name="OllamaLLMPerformanceMode"
|
||||
required={true}
|
||||
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||
value={performanceMode}
|
||||
onChange={(e) => setPerformanceMode(e.target.value)}
|
||||
>
|
||||
<option value="base">Base (Default)</option>
|
||||
<option value="maximum">Maximum</option>
|
||||
</select>
|
||||
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
||||
Choose the performance mode for the Ollama model.
|
||||
</p>
|
||||
<Tooltip
|
||||
id="performance-mode-tooltip"
|
||||
place="bottom"
|
||||
className="tooltip !text-xs max-w-xs"
|
||||
>
|
||||
<p className="text-red-500">
|
||||
<strong>Note:</strong> Only change this setting if you
|
||||
understand its implications on performance and resource usage.
|
||||
</p>
|
||||
<br />
|
||||
<p>
|
||||
<strong>Base:</strong> Ollama automatically limits the context
|
||||
to 2048 tokens, reducing VRAM usage. Suitable for most users.
|
||||
</p>
|
||||
<br />
|
||||
<p>
|
||||
<strong>Maximum:</strong> Uses the full context window (up to
|
||||
Max Tokens). May increase VRAM usage significantly.
|
||||
</p>
|
||||
</Tooltip>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -412,6 +412,7 @@ const SystemSettings = {
|
|||
OllamaLLMModelPref: process.env.OLLAMA_MODEL_PREF,
|
||||
OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT,
|
||||
OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300,
|
||||
OllamaLLMPerformanceMode: process.env.OLLAMA_PERFORMANCE_MODE ?? "base",
|
||||
|
||||
// TogetherAI Keys
|
||||
TogetherAiApiKey: !!process.env.TOGETHER_AI_API_KEY,
|
||||
|
|
|
@ -13,6 +13,7 @@ class OllamaAILLM {
|
|||
|
||||
this.basePath = process.env.OLLAMA_BASE_PATH;
|
||||
this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
|
||||
this.performanceMode = process.env.OLLAMA_PERFORMANCE_MODE || "base";
|
||||
this.keepAlive = process.env.OLLAMA_KEEP_ALIVE_TIMEOUT
|
||||
? Number(process.env.OLLAMA_KEEP_ALIVE_TIMEOUT)
|
||||
: 300; // Default 5-minute timeout for Ollama model loading.
|
||||
|
@ -33,6 +34,10 @@ class OllamaAILLM {
|
|||
model: this.model,
|
||||
keepAlive: this.keepAlive,
|
||||
useMLock: true,
|
||||
// There are currently only two performance settings so if its not "base" - its max context.
|
||||
...(this.performanceMode === "base"
|
||||
? {}
|
||||
: { numCtx: this.promptWindowLimit() }),
|
||||
temperature,
|
||||
});
|
||||
}
|
||||
|
|
|
@ -101,6 +101,10 @@ const KEY_MAPPING = {
|
|||
envKey: "OLLAMA_MODEL_TOKEN_LIMIT",
|
||||
checks: [nonZero],
|
||||
},
|
||||
OllamaLLMPerformanceMode: {
|
||||
envKey: "OLLAMA_PERFORMANCE_MODE",
|
||||
checks: [],
|
||||
},
|
||||
OllamaLLMKeepAliveSeconds: {
|
||||
envKey: "OLLAMA_KEEP_ALIVE_TIMEOUT",
|
||||
checks: [isInteger],
|
||||
|
|
Loading…
Add table
Reference in a new issue