Only evaluate non-empty responses to reduce eval script latency, cost

Empty responses by Khoj will always be an incorrect response, so no need to make call to an evaluator agent to check that
2024-11-23 15:38:55 +01:00 · 2024-11-07 15:23:30 -08:00 · 2024-11-07 15:23:30 -08:00 · 84a8088c2b
commit 84a8088c2b
parent ceb29eae74
1 changed files with 5 additions and 1 deletions
--- a/tests/eval_frames.py
+++ b/tests/eval_frames.py
@ -113,7 +113,11 @@ def process_batch(batch, counter, results, dataset_length):
        agent_response = get_agent_response(prompt)

        # Evaluate response
-        evaluation = evaluate_response(prompt, agent_response, answer)
+        if agent_response is None or agent_response.strip() == "":
+            evaluation["decision"] = False
+            evaluation["explanation"] = "Agent response is empty. This maybe due to a service error."
+        else:
+            evaluation = evaluate_response(prompt, agent_response, answer)

        # Store results
        results.append(