mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-30 19:03:01 +01:00
Fix infer_max_tokens func when configured_max_tokens is set to None
This commit is contained in:
parent
60658a8037
commit
8e77b3dc82
1 changed files with 2 additions and 1 deletions
|
@ -65,8 +65,9 @@ def load_model_from_cache(repo_id: str, filename: str, repo_type="models"):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def infer_max_tokens(model_context_window: int, configured_max_tokens=math.inf) -> int:
|
def infer_max_tokens(model_context_window: int, configured_max_tokens=None) -> int:
|
||||||
"""Infer max prompt size based on device memory and max context window supported by the model"""
|
"""Infer max prompt size based on device memory and max context window supported by the model"""
|
||||||
configured_max_tokens = math.inf if configured_max_tokens is None else configured_max_tokens
|
configured_max_tokens = math.inf if configured_max_tokens is None else configured_max_tokens
|
||||||
vram_based_n_ctx = int(get_device_memory() / 2e6) # based on heuristic
|
vram_based_n_ctx = int(get_device_memory() / 2e6) # based on heuristic
|
||||||
|
configured_max_tokens = configured_max_tokens or math.inf # do not use if set to None
|
||||||
return min(configured_max_tokens, vram_based_n_ctx, model_context_window)
|
return min(configured_max_tokens, vram_based_n_ctx, model_context_window)
|
||||||
|
|
Loading…
Reference in a new issue