diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index 84b0081b..82f68259 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -97,7 +97,8 @@ def completion_with_backoff( # Calculate cost of chat input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0 output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0 - tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage")) + cost = chunk.usage.model_extra.get("estimated_cost") or 0 # Estimated costs returned by DeepInfra API + tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"), cost) # Save conversation trace tracer["chat_model"] = model_name @@ -208,7 +209,8 @@ def llm_thread( # Calculate cost of chat input_tokens = chunk.usage.prompt_tokens if hasattr(chunk, "usage") and chunk.usage else 0 output_tokens = chunk.usage.completion_tokens if hasattr(chunk, "usage") and chunk.usage else 0 - tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage")) + cost = chunk.usage.model_extra.get("estimated_cost") or 0 # Estimated costs returned by DeepInfra API + tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"), cost) # Save conversation trace tracer["chat_model"] = model_name diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index 6fafa6d9..6214e5f5 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -584,13 +584,15 @@ def get_cost_of_chat_message(model_name: str, input_tokens: int = 0, output_toke return input_cost + output_cost + prev_cost -def get_chat_usage_metrics(model_name: str, input_tokens: int = 0, output_tokens: int = 0, usage: dict = {}): +def get_chat_usage_metrics( + model_name: str, input_tokens: int = 0, output_tokens: int = 0, usage: dict = {}, cost: float = None +): """ - Get usage metrics for chat message based on input and output tokens + Get usage metrics for chat message based on input and output tokens and cost """ prev_usage = usage or {"input_tokens": 0, "output_tokens": 0, "cost": 0.0} return { "input_tokens": prev_usage["input_tokens"] + input_tokens, "output_tokens": prev_usage["output_tokens"] + output_tokens, - "cost": get_cost_of_chat_message(model_name, input_tokens, output_tokens, prev_cost=prev_usage["cost"]), + "cost": cost or get_cost_of_chat_message(model_name, input_tokens, output_tokens, prev_cost=prev_usage["cost"]), }