diff --git a/tests/eval_frames.py b/tests/eval_frames.py
index 8a1d849e..ae44f63e 100644
--- a/tests/eval_frames.py
+++ b/tests/eval_frames.py
@@ -113,7 +113,11 @@ def process_batch(batch, counter, results, dataset_length):
         agent_response = get_agent_response(prompt)
 
         # Evaluate response
-        evaluation = evaluate_response(prompt, agent_response, answer)
+        if agent_response is None or agent_response.strip() == "":
+            evaluation["decision"] = False
+            evaluation["explanation"] = "Agent response is empty. This maybe due to a service error."
+        else:
+            evaluation = evaluate_response(prompt, agent_response, answer)
 
         # Store results
         results.append(