Implement support for selecting basic keep_alive times for Ollama (#1920)

2025-04-17 18:18:11 +00:00 · 2024-07-22 14:44:47 -07:00 · 2024-07-22 14:44:47 -07:00 · 76aa2a4fd4
commit 76aa2a4fd4
parent c646351347
4 changed files with 43 additions and 0 deletions
--- a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
@ -111,6 +111,35 @@ export default function OllamaLLMOptions({ settings }) {
              Enter the URL where Ollama is running.
            </p>
          </div>
+
+          <div className="flex flex-col w-60">
+            <label className="text-white text-sm font-semibold block mb-2">
+              Ollama Keep Alive
+            </label>
+            <select
+              name="OllamaLLMKeepAliveSeconds"
+              required={true}
+              className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+              defaultValue={settings?.OllamaLLMKeepAliveSeconds ?? "300"}
+            >
+              <option value="0">No cache</option>
+              <option value="300">5 minutes</option>
+              <option value="3600">1 hour</option>
+              <option value="-1">Forever</option>
+            </select>
+            <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
+              Choose how long Ollama should keep your model in memory before
+              unloading.
+              <a
+                className="underline text-blue-300"
+                href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
+                target="_blank"
+              >
+                {" "}
+                Learn more &rarr;
+              </a>
+            </p>
+          </div>
        </div>
      </div>
    </div>
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -411,6 +411,7 @@ const SystemSettings = {
      OllamaLLMBasePath: process.env.OLLAMA_BASE_PATH,
      OllamaLLMModelPref: process.env.OLLAMA_MODEL_PREF,
      OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT,
+      OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300,

      // TogetherAI Keys
      TogetherAiApiKey: !!process.env.TOGETHER_AI_API_KEY,
--- a/server/utils/AiProviders/ollama/index.js
+++ b/server/utils/AiProviders/ollama/index.js
@ -13,6 +13,9 @@ class OllamaAILLM {

    this.basePath = process.env.OLLAMA_BASE_PATH;
    this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
+    this.keepAlive = process.env.OLLAMA_KEEP_ALIVE_TIMEOUT
+      ? Number(process.env.OLLAMA_KEEP_ALIVE_TIMEOUT)
+      : 300; // Default 5-minute timeout for Ollama model loading.
    this.limits = {
      history: this.promptWindowLimit() * 0.15,
      system: this.promptWindowLimit() * 0.15,
@ -28,6 +31,7 @@ class OllamaAILLM {
    return new ChatOllama({
      baseUrl: this.basePath,
      model: this.model,
+      keepAlive: this.keepAlive,
      useMLock: true,
      temperature,
    });
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -101,6 +101,10 @@ const KEY_MAPPING = {
    envKey: "OLLAMA_MODEL_TOKEN_LIMIT",
    checks: [nonZero],
  },
+  OllamaLLMKeepAliveSeconds: {
+    envKey: "OLLAMA_KEEP_ALIVE_TIMEOUT",
+    checks: [isInteger],
+  },

  // Mistral AI API Settings
  MistralApiKey: {
@ -454,6 +458,11 @@ function nonZero(input = "") {
  return Number(input) <= 0 ? "Value must be greater than zero" : null;
 }

+function isInteger(input = "") {
+  if (isNaN(Number(input))) return "Value must be a number";
+  return Number(input);
+}
+
 function isValidURL(input = "") {
  try {
    new URL(input);