Fix and improve offline chat actor, director tests

- Use updated references schema with compiled key - Enable director tests that are now expected to pass and that do pass (with Gemma 2 at least)
2024-11-23 15:38:55 +01:00 · 2024-07-18 03:07:16 +05:30 · 2024-07-18 03:07:16 +05:30 · e9f86e320b
commit e9f86e320b
parent b0ee78586c
2 changed files with 51 additions and 34 deletions
--- a/tests/test_offline_chat_actors.py
+++ b/tests/test_offline_chat_actors.py
@ -286,7 +286,7 @@ def test_answer_from_chat_history_and_currently_retrieved_content(loaded_model):
    # Act
    response_gen = converse_offline(
        references=[
-            "Testatron was born on 1st April 1984 in Testville."
+            {"compiled": "Testatron was born on 1st April 1984 in Testville."}
        ],  # Assume context retrieved from notes for the user_query
        user_query="Where was I born?",
        conversation_log=populate_chat_history(message_list),
@ -341,14 +341,22 @@ def test_answer_requires_current_date_awareness(loaded_model):
    "Chat actor should be able to answer questions relative to current date using provided notes"
    # Arrange
    context = [
-        f"""{datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
-Expenses:Food:Dining  10.00 USD""",
-        f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
-Expenses:Food:Dining  10.00 USD""",
-        f"""2020-04-01 "SuperMercado" "Bananas"
-Expenses:Food:Groceries  10.00 USD""",
-        f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
-Expenses:Food:Dining  10.00 USD""",
+        {
+            "compiled": f"""{datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
+Expenses:Food:Dining  10.00 USD"""
+        },
+        {
+            "compiled": f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
+Expenses:Food:Dining  10.00 USD"""
+        },
+        {
+            "compiled": f"""2020-04-01 "SuperMercado" "Bananas"
+Expenses:Food:Groceries  10.00 USD"""
+        },
+        {
+            "compiled": f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
+Expenses:Food:Dining  10.00 USD"""
+        },
    ]

    # Act
@ -373,14 +381,22 @@ def test_answer_requires_date_aware_aggregation_across_provided_notes(loaded_mod
    "Chat actor should be able to answer questions that require date aware aggregation across multiple notes"
    # Arrange
    context = [
-        f"""# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
-Expenses:Food:Dining  10.00 USD""",
-        f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
-Expenses:Food:Dining  10.00 USD""",
-        f"""2020-04-01 "SuperMercado" "Bananas"
-Expenses:Food:Groceries  10.00 USD""",
-        f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
-Expenses:Food:Dining  10.00 USD""",
+        {
+            "compiled": f"""# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
+Expenses:Food:Dining  10.00 USD"""
+        },
+        {
+            "compiled": f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
+Expenses:Food:Dining  10.00 USD"""
+        },
+        {
+            "compiled": f"""2020-04-01 "SuperMercado" "Bananas"
+Expenses:Food:Groceries  10.00 USD"""
+        },
+        {
+            "compiled": f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
+Expenses:Food:Dining  10.00 USD"""
+        },
    ]

    # Act
@ -430,12 +446,18 @@ def test_ask_for_clarification_if_not_enough_context_in_question(loaded_model):
    "Chat actor should ask for clarification if question cannot be answered unambiguously with the provided context"
    # Arrange
    context = [
-        f"""# Ramya
-My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani.""",
-        f"""# Fang
-My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li.""",
-        f"""# Aiyla
-My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet.""",
+        {
+            "compiled": f"""# Ramya
+My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani."""
+        },
+        {
+            "compiled": f"""# Fang
+My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li."""
+        },
+        {
+            "compiled": f"""# Aiyla
+My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet."""
+        },
    ]

    # Act
@ -459,9 +481,9 @@ def test_agent_prompt_should_be_used(loaded_model, offline_agent):
    "Chat actor should ask be tuned to think like an accountant based on the agent definition"
    # Arrange
    context = [
-        f"""I went to the store and bought some bananas for 2.20""",
-        f"""I went to the store and bought some apples for 1.30""",
-        f"""I went to the store and bought some oranges for 6.00""",
+        {"compiled": f"""I went to the store and bought some bananas for 2.20"""},
+        {"compiled": f"""I went to the store and bought some apples for 1.30"""},
+        {"compiled": f"""I went to the store and bought some oranges for 6.00"""},
    ]

    # Act
@ -499,7 +521,7 @@ def test_chat_does_not_exceed_prompt_size(loaded_model):
    "Ensure chat context and response together do not exceed max prompt size for the model"
    # Arrange
    prompt_size_exceeded_error = "ERROR: The prompt size exceeds the context window size and cannot be processed"
-    context = [" ".join([f"{number}" for number in range(2043)])]
+    context = [{"compiled": " ".join([f"{number}" for number in range(2043)])}]

    # Act
    response_gen = converse_offline(
@ -530,7 +552,7 @@ def test_filter_questions():
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.anyio
@pytest.mark.django_db(transaction=True)
-async def test_use_default_response_mode(client_offline_chat):
+async def test_use_text_response_mode(client_offline_chat):
    # Arrange
    user_query = "What's the latest in the Israel/Palestine conflict?"

@ -538,7 +560,7 @@ async def test_use_default_response_mode(client_offline_chat):
    mode = await aget_relevant_output_modes(user_query, {})

    # Assert
-    assert mode.value == "default"
+    assert mode.value == "text"


 # ----------------------------------------------------------------------------------------------------
--- a/tests/test_offline_chat_director.py
+++ b/tests/test_offline_chat_director.py
@ -45,7 +45,6 @@ def create_conversation(message_list, user, agent=None):

 # Tests
 # ----------------------------------------------------------------------------------------------------
-@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
@pytest.mark.chatquality
@pytest.mark.django_db(transaction=True)
 def test_offline_chat_with_no_chat_history_or_retrieved_content(client_offline_chat):
@ -179,10 +178,6 @@ def test_answer_from_chat_history_and_previously_retrieved_content(client_offlin


 # ----------------------------------------------------------------------------------------------------
-@pytest.mark.xfail(
-    AssertionError,
-    reason="Chat director not capable of answering this question yet because it requires extract_questions",
-)
@pytest.mark.chatquality
@pytest.mark.django_db(transaction=True)
 def test_answer_from_chat_history_and_currently_retrieved_content(client_offline_chat, default_user2):