diff --git a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx index 41b5c3e75..d04f7cb62 100644 --- a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx +++ b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx @@ -169,18 +169,22 @@ export default function OllamaLLMOptions({ settings }) { className="tooltip !text-xs max-w-xs" > <p className="text-red-500"> - <strong>Note:</strong> Only change this setting if you - understand its implications on performance and resource usage. + <strong>Note:</strong> Be careful with the Maximum mode. It may + increase resource usage significantly. </p> <br /> <p> <strong>Base:</strong> Ollama automatically limits the context - to 2048 tokens, reducing VRAM usage. Suitable for most users. + to 2048 tokens, keeping resources usage low while maintaining + good performance. Suitable for most users and models. </p> <br /> <p> <strong>Maximum:</strong> Uses the full context window (up to - Max Tokens). May increase VRAM usage significantly. + Max Tokens). Will result in increased resource usage but allows + for larger context conversations. <br /> + <br /> + This is not recommended for most users. </p> </Tooltip> </div> diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js index b62c80929..3ba8ad1d2 100644 --- a/server/utils/AiProviders/ollama/index.js +++ b/server/utils/AiProviders/ollama/index.js @@ -29,6 +29,13 @@ class OllamaAILLM { this.client = new Ollama({ host: this.basePath }); this.embedder = embedder ?? new NativeEmbedder(); this.defaultTemp = 0.7; + this.#log( + `OllamaAILLM initialized with\nmodel: ${this.model}\nperf: ${this.performanceMode}\nn_ctx: ${this.promptWindowLimit()}` + ); + } + + #log(text, ...args) { + console.log(`\x1b[32m[Ollama]\x1b[0m ${text}`, ...args); } #appendContext(contextTexts = []) { @@ -131,11 +138,11 @@ class OllamaAILLM { keep_alive: this.keepAlive, options: { temperature, - useMLock: true, + use_mlock: true, // There are currently only two performance settings so if its not "base" - its max context. ...(this.performanceMode === "base" ? {} - : { numCtx: this.promptWindowLimit() }), + : { num_ctx: this.promptWindowLimit() }), }, }) .then((res) => { @@ -179,11 +186,11 @@ class OllamaAILLM { keep_alive: this.keepAlive, options: { temperature, - useMLock: true, + use_mlock: false, // There are currently only two performance settings so if its not "base" - its max context. ...(this.performanceMode === "base" ? {} - : { numCtx: this.promptWindowLimit() }), + : { num_ctx: this.promptWindowLimit() }), }, }), messages,