add token context limit for native llm settings (#614)

Co-authored-by: timothycarambat <rambat1010@gmail.com>
2025-03-15 14:42:23 +00:00 · 2024-01-17 16:25:30 -08:00 · 2024-01-17 16:25:30 -08:00 · 3fe7a25759
commit 3fe7a25759
parent 56dc49966d
4 changed files with 50 additions and 28 deletions
--- a/frontend/src/components/LLMSelection/NativeLLMOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/NativeLLMOptions/index.jsx
@ -54,31 +54,49 @@ function NativeModelSelection({ settings }) {
  }

  return (
-    <div className="flex flex-col w-60">
-      <label className="text-white text-sm font-semibold block mb-4">
-        Model Selection
-      </label>
-      <select
-        name="NativeLLMModelPref"
-        required={true}
-        className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
-      >
-        {customModels.length > 0 && (
-          <optgroup label="Your loaded models">
-            {customModels.map((model) => {
-              return (
-                <option
-                  key={model.id}
-                  value={model.id}
-                  selected={settings.NativeLLMModelPref === model.id}
-                >
-                  {model.id}
-                </option>
-              );
-            })}
-          </optgroup>
-        )}
-      </select>
-    </div>
+    <>
+      <div className="flex flex-col w-60">
+        <label className="text-white text-sm font-semibold block mb-4">
+          Model Selection
+        </label>
+        <select
+          name="NativeLLMModelPref"
+          required={true}
+          className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+        >
+          {customModels.length > 0 && (
+            <optgroup label="Your loaded models">
+              {customModels.map((model) => {
+                return (
+                  <option
+                    key={model.id}
+                    value={model.id}
+                    selected={settings.NativeLLMModelPref === model.id}
+                  >
+                    {model.id}
+                  </option>
+                );
+              })}
+            </optgroup>
+          )}
+        </select>
+      </div>
+      <div className="flex flex-col w-60">
+        <label className="text-white text-sm font-semibold block mb-4">
+          Token context window
+        </label>
+        <input
+          type="number"
+          name="NativeLLMTokenLimit"
+          className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
+          placeholder="4096"
+          min={1}
+          onScroll={(e) => e.target.blur()}
+          defaultValue={settings?.NativeLLMTokenLimit}
+          required={true}
+          autoComplete="off"
+        />
+      </div>
+    </>
  );
 }
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@ -174,6 +174,7 @@ const SystemSettings = {
      ...(llmProvider === "native"
        ? {
            NativeLLMModelPref: process.env.NATIVE_LLM_MODEL_PREF,
+            NativeLLMTokenLimit: process.env.NATIVE_LLM_MODEL_TOKEN_LIMIT,

            // For embedding credentials when ollama is selected.
            OpenAiKey: !!process.env.OPEN_AI_KEY,
--- a/server/utils/AiProviders/native/index.js
+++ b/server/utils/AiProviders/native/index.js
@ -94,8 +94,6 @@ class NativeLLM {
  }

  // Ensure the user set a value for the token limit
-  // and if undefined - assume 4096 window.
-  // DEV: Currently this ENV is not configurable.
  promptWindowLimit() {
    const limit = process.env.NATIVE_LLM_MODEL_TOKEN_LIMIT || 4096;
    if (!limit || isNaN(Number(limit)))
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@ -110,6 +110,11 @@ const KEY_MAPPING = {
    checks: [isDownloadedModel],
  },

+  NativeLLMTokenLimit: {
+    envKey: "NATIVE_LLM_MODEL_TOKEN_LIMIT",
+    checks: [nonZero],
+  },
+
  EmbeddingEngine: {
    envKey: "EMBEDDING_ENGINE",
    checks: [supportedEmbeddingModel],