Only evaluate non-empty responses to reduce eval script latency, cost

Empty responses by Khoj will always be an incorrect response, so no need to make call to an evaluator agent to check that
2024-11-27 09:25:06 +01:00 · 2024-11-07 15:23:30 -08:00 · 2024-11-07 15:23:30 -08:00 · d7fc4a91d5
commit d7fc4a91d5
parent 4cad96ded6
1 changed files with 5 additions and 1 deletions
--- a/tests/eval_frames.py
+++ b/tests/eval_frames.py
@ -113,7 +113,11 @@ def process_batch(batch, counter, results, dataset_length):
        agent_response = get_agent_response(prompt)

        # Evaluate response
-        evaluation = evaluate_response(prompt, agent_response, answer)
+        if agent_response is None or agent_response.strip() == "":
+            evaluation["decision"] = False
+            evaluation["explanation"] = "Agent response is empty. This maybe due to a service error."
+        else:
+            evaluation = evaluate_response(prompt, agent_response, answer)

        # Store results
        results.append(