mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-04-17 18:18:11 +00:00
update ollama performance mode (#2874)
This commit is contained in:
parent
af703427c7
commit
a51de73aaa
2 changed files with 19 additions and 8 deletions
|
@ -169,18 +169,22 @@ export default function OllamaLLMOptions({ settings }) {
|
|||
className="tooltip !text-xs max-w-xs"
|
||||
>
|
||||
<p className="text-red-500">
|
||||
<strong>Note:</strong> Only change this setting if you
|
||||
understand its implications on performance and resource usage.
|
||||
<strong>Note:</strong> Be careful with the Maximum mode. It may
|
||||
increase resource usage significantly.
|
||||
</p>
|
||||
<br />
|
||||
<p>
|
||||
<strong>Base:</strong> Ollama automatically limits the context
|
||||
to 2048 tokens, reducing VRAM usage. Suitable for most users.
|
||||
to 2048 tokens, keeping resources usage low while maintaining
|
||||
good performance. Suitable for most users and models.
|
||||
</p>
|
||||
<br />
|
||||
<p>
|
||||
<strong>Maximum:</strong> Uses the full context window (up to
|
||||
Max Tokens). May increase VRAM usage significantly.
|
||||
Max Tokens). Will result in increased resource usage but allows
|
||||
for larger context conversations. <br />
|
||||
<br />
|
||||
This is not recommended for most users.
|
||||
</p>
|
||||
</Tooltip>
|
||||
</div>
|
||||
|
|
|
@ -29,6 +29,13 @@ class OllamaAILLM {
|
|||
this.client = new Ollama({ host: this.basePath });
|
||||
this.embedder = embedder ?? new NativeEmbedder();
|
||||
this.defaultTemp = 0.7;
|
||||
this.#log(
|
||||
`OllamaAILLM initialized with\nmodel: ${this.model}\nperf: ${this.performanceMode}\nn_ctx: ${this.promptWindowLimit()}`
|
||||
);
|
||||
}
|
||||
|
||||
#log(text, ...args) {
|
||||
console.log(`\x1b[32m[Ollama]\x1b[0m ${text}`, ...args);
|
||||
}
|
||||
|
||||
#appendContext(contextTexts = []) {
|
||||
|
@ -131,11 +138,11 @@ class OllamaAILLM {
|
|||
keep_alive: this.keepAlive,
|
||||
options: {
|
||||
temperature,
|
||||
useMLock: true,
|
||||
use_mlock: true,
|
||||
// There are currently only two performance settings so if its not "base" - its max context.
|
||||
...(this.performanceMode === "base"
|
||||
? {}
|
||||
: { numCtx: this.promptWindowLimit() }),
|
||||
: { num_ctx: this.promptWindowLimit() }),
|
||||
},
|
||||
})
|
||||
.then((res) => {
|
||||
|
@ -179,11 +186,11 @@ class OllamaAILLM {
|
|||
keep_alive: this.keepAlive,
|
||||
options: {
|
||||
temperature,
|
||||
useMLock: true,
|
||||
use_mlock: false,
|
||||
// There are currently only two performance settings so if its not "base" - its max context.
|
||||
...(this.performanceMode === "base"
|
||||
? {}
|
||||
: { numCtx: this.promptWindowLimit() }),
|
||||
: { num_ctx: this.promptWindowLimit() }),
|
||||
},
|
||||
}),
|
||||
messages,
|
||||
|
|
Loading…
Add table
Reference in a new issue