update ollama performance mode (#2874)

2025-04-17 18:18:11 +00:00 · 2024-12-18 11:21:35 -08:00 · 2024-12-18 11:21:35 -08:00 · a51de73aaa
commit a51de73aaa
parent af703427c7
2 changed files with 19 additions and 8 deletions
--- a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
@ -169,18 +169,22 @@ export default function OllamaLLMOptions({ settings }) {
              className="tooltip !text-xs max-w-xs"
            >
              <p className="text-red-500">
-                <strong>Note:</strong> Only change this setting if you
-                understand its implications on performance and resource usage.
+                <strong>Note:</strong> Be careful with the Maximum mode. It may
+                increase resource usage significantly.
              </p>
              <br />
              <p>
                <strong>Base:</strong> Ollama automatically limits the context
-                to 2048 tokens, reducing VRAM usage. Suitable for most users.
+                to 2048 tokens, keeping resources usage low while maintaining
+                good performance. Suitable for most users and models.
              </p>
              <br />
              <p>
                <strong>Maximum:</strong> Uses the full context window (up to
-                Max Tokens). May increase VRAM usage significantly.
+                Max Tokens). Will result in increased resource usage but allows
+                for larger context conversations. <br />
+                <br />
+                This is not recommended for most users.
              </p>
            </Tooltip>
          </div>
--- a/server/utils/AiProviders/ollama/index.js
+++ b/server/utils/AiProviders/ollama/index.js
@ -29,6 +29,13 @@ class OllamaAILLM {
    this.client = new Ollama({ host: this.basePath });
    this.embedder = embedder ?? new NativeEmbedder();
    this.defaultTemp = 0.7;
+    this.#log(
+      `OllamaAILLM initialized with\nmodel: ${this.model}\nperf: ${this.performanceMode}\nn_ctx: ${this.promptWindowLimit()}`
+    );
+  }
+
+  #log(text, ...args) {
+    console.log(`\x1b[32m[Ollama]\x1b[0m ${text}`, ...args);
  }

  #appendContext(contextTexts = []) {
@ -131,11 +138,11 @@ class OllamaAILLM {
          keep_alive: this.keepAlive,
          options: {
            temperature,
-            useMLock: true,
+            use_mlock: true,
            // There are currently only two performance settings so if its not "base" - its max context.
            ...(this.performanceMode === "base"
              ? {}
-              : { numCtx: this.promptWindowLimit() }),
+              : { num_ctx: this.promptWindowLimit() }),
          },
        })
        .then((res) => {
@ -179,11 +186,11 @@ class OllamaAILLM {
        keep_alive: this.keepAlive,
        options: {
          temperature,
-          useMLock: true,
+          use_mlock: false,
          // There are currently only two performance settings so if its not "base" - its max context.
          ...(this.performanceMode === "base"
            ? {}
-            : { numCtx: this.promptWindowLimit() }),
+            : { num_ctx: this.promptWindowLimit() }),
        },
      }),
      messages,