update ollama performance mode ()

This commit is contained in:
Timothy Carambat 2024-12-18 11:21:35 -08:00 committed by GitHub
parent af703427c7
commit a51de73aaa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 19 additions and 8 deletions
frontend/src/components/LLMSelection/OllamaLLMOptions
server/utils/AiProviders/ollama

View file

@ -169,18 +169,22 @@ export default function OllamaLLMOptions({ settings }) {
className="tooltip !text-xs max-w-xs"
>
<p className="text-red-500">
<strong>Note:</strong> Only change this setting if you
understand its implications on performance and resource usage.
<strong>Note:</strong> Be careful with the Maximum mode. It may
increase resource usage significantly.
</p>
<br />
<p>
<strong>Base:</strong> Ollama automatically limits the context
to 2048 tokens, reducing VRAM usage. Suitable for most users.
to 2048 tokens, keeping resources usage low while maintaining
good performance. Suitable for most users and models.
</p>
<br />
<p>
<strong>Maximum:</strong> Uses the full context window (up to
Max Tokens). May increase VRAM usage significantly.
Max Tokens). Will result in increased resource usage but allows
for larger context conversations. <br />
<br />
This is not recommended for most users.
</p>
</Tooltip>
</div>

View file

@ -29,6 +29,13 @@ class OllamaAILLM {
this.client = new Ollama({ host: this.basePath });
this.embedder = embedder ?? new NativeEmbedder();
this.defaultTemp = 0.7;
this.#log(
`OllamaAILLM initialized with\nmodel: ${this.model}\nperf: ${this.performanceMode}\nn_ctx: ${this.promptWindowLimit()}`
);
}
#log(text, ...args) {
console.log(`\x1b[32m[Ollama]\x1b[0m ${text}`, ...args);
}
#appendContext(contextTexts = []) {
@ -131,11 +138,11 @@ class OllamaAILLM {
keep_alive: this.keepAlive,
options: {
temperature,
useMLock: true,
use_mlock: true,
// There are currently only two performance settings so if its not "base" - its max context.
...(this.performanceMode === "base"
? {}
: { numCtx: this.promptWindowLimit() }),
: { num_ctx: this.promptWindowLimit() }),
},
})
.then((res) => {
@ -179,11 +186,11 @@ class OllamaAILLM {
keep_alive: this.keepAlive,
options: {
temperature,
useMLock: true,
use_mlock: false,
// There are currently only two performance settings so if its not "base" - its max context.
...(this.performanceMode === "base"
? {}
: { numCtx: this.promptWindowLimit() }),
: { num_ctx: this.promptWindowLimit() }),
},
}),
messages,