Ollama performance mode option (#2014)

* ollama performance mode option * Change ENV prop Move perf setting to advanced --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
2025-04-17 18:18:11 +00:00 · 2024-08-02 13:29:17 -07:00 · 2024-08-02 13:29:17 -07:00 · 7273c892a1
commit 7273c892a1
parent 8cfe855bc8
4 changed files with 62 additions and 8 deletions
--- a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
@ -2,8 +2,9 @@ import React, { useEffect, useState } from "react";
 import System from "@/models/system";
 import PreLoader from "@/components/Preloader";
 import { OLLAMA_COMMON_URLS } from "@/utils/constants";
-import { CaretDown, CaretUp } from "@phosphor-icons/react";
+import { CaretDown, CaretUp, Info } from "@phosphor-icons/react";
 import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
+import { Tooltip } from "react-tooltip";

 export default function OllamaLLMOptions({ settings }) {
  const {
@ -18,15 +19,13 @@ export default function OllamaLLMOptions({ settings }) {
    initialBasePath: settings?.OllamaLLMBasePath,
    ENDPOINTS: OLLAMA_COMMON_URLS,
  });
-
+  const [performanceMode, setPerformanceMode] = useState(
+    settings?.OllamaLLMPerformanceMode || "base"
+  );
  const [maxTokens, setMaxTokens] = useState(
    settings?.OllamaLLMTokenLimit || 4096
  );

-  const handleMaxTokensChange = (e) => {
-    setMaxTokens(Number(e.target.value));
-  };
-
  return (
    <div className="w-full flex flex-col gap-y-7">
      <div className="w-full flex items-start gap-[36px] mt-1.5">
@ -46,7 +45,7 @@ export default function OllamaLLMOptions({ settings }) {
            defaultChecked="4096"
            min={1}
            value={maxTokens}
-            onChange={handleMaxTokensChange}
+            onChange={(e) => setMaxTokens(Number(e.target.value))}
            onScroll={(e) => e.target.blur()}
            required={true}
            autoComplete="off"
@ -64,7 +63,7 @@ export default function OllamaLLMOptions({ settings }) {
          }}
          className="text-white hover:text-white/70 flex items-center text-sm"
        >
-          {showAdvancedControls ? "Hide" : "Show"} Manual Endpoint Input
+          {showAdvancedControls ? "Hide" : "Show"} advanced settings
          {showAdvancedControls ? (
            <CaretUp size={14} className="ml-1" />
          ) : (
@ -134,12 +133,57 @@ export default function OllamaLLMOptions({ settings }) {
                className="underline text-blue-300"
                href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
                target="_blank"
+                rel="noreferrer"
              >
                {" "}
                Learn more &rarr;
              </a>
            </p>
          </div>
+
+          <div className="flex flex-col w-60">
+            <label className="text-white text-sm font-semibold mb-2 flex items-center">
+              Performance Mode
+              <Info
+                size={16}
+                className="ml-2 text-white"
+                data-tooltip-id="performance-mode-tooltip"
+              />
+            </label>
+            <select
+              name="OllamaLLMPerformanceMode"
+              required={true}
+              className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+              value={performanceMode}
+              onChange={(e) => setPerformanceMode(e.target.value)}
+            >
+              <option value="base">Base (Default)</option>
+              <option value="maximum">Maximum</option>
+            </select>
+            <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
+              Choose the performance mode for the Ollama model.
+            </p>
+            <Tooltip
+              id="performance-mode-tooltip"
+              place="bottom"
+              className="tooltip !text-xs max-w-xs"
+            >
+              <p className="text-red-500">
+                <strong>Note:</strong> Only change this setting if you
+                understand its implications on performance and resource usage.
+              </p>
+              <br />
+              <p>
+                <strong>Base:</strong> Ollama automatically limits the context
+                to 2048 tokens, reducing VRAM usage. Suitable for most users.
+              </p>
+              <br />
+              <p>
+                <strong>Maximum:</strong> Uses the full context window (up to
+                Max Tokens). May increase VRAM usage significantly.
+              </p>
+            </Tooltip>
+          </div>
        </div>
      </div>
    </div>
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -412,6 +412,7 @@ const SystemSettings = {
      OllamaLLMModelPref: process.env.OLLAMA_MODEL_PREF,
      OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT,
      OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300,
+      OllamaLLMPerformanceMode: process.env.OLLAMA_PERFORMANCE_MODE ?? "base",

      // TogetherAI Keys
      TogetherAiApiKey: !!process.env.TOGETHER_AI_API_KEY,
--- a/server/utils/AiProviders/ollama/index.js
+++ b/server/utils/AiProviders/ollama/index.js
@ -13,6 +13,7 @@ class OllamaAILLM {

    this.basePath = process.env.OLLAMA_BASE_PATH;
    this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
+    this.performanceMode = process.env.OLLAMA_PERFORMANCE_MODE || "base";
    this.keepAlive = process.env.OLLAMA_KEEP_ALIVE_TIMEOUT
      ? Number(process.env.OLLAMA_KEEP_ALIVE_TIMEOUT)
      : 300; // Default 5-minute timeout for Ollama model loading.
@ -33,6 +34,10 @@ class OllamaAILLM {
      model: this.model,
      keepAlive: this.keepAlive,
      useMLock: true,
+      // There are currently only two performance settings so if its not "base" - its max context.
+      ...(this.performanceMode === "base"
+        ? {}
+        : { numCtx: this.promptWindowLimit() }),
      temperature,
    });
  }
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -101,6 +101,10 @@ const KEY_MAPPING = {
    envKey: "OLLAMA_MODEL_TOKEN_LIMIT",
    checks: [nonZero],
  },
+  OllamaLLMPerformanceMode: {
+    envKey: "OLLAMA_PERFORMANCE_MODE",
+    checks: [],
+  },
  OllamaLLMKeepAliveSeconds: {
    envKey: "OLLAMA_KEEP_ALIVE_TIMEOUT",
    checks: [isInteger],