From 95acb1583d97eee6bfd8e4459fdfa425efb6aaa8 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 1 Aug 2023 20:13:47 -0700 Subject: [PATCH] Update local Chat Actor and Director tests expected to fail --- tests/test_gpt4all_chat_actors.py | 9 +++++---- tests/test_gpt4all_chat_director.py | 6 ++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/test_gpt4all_chat_actors.py b/tests/test_gpt4all_chat_actors.py index 32e7e941..92b3f956 100644 --- a/tests/test_gpt4all_chat_actors.py +++ b/tests/test_gpt4all_chat_actors.py @@ -35,6 +35,7 @@ freezegun.configure(extend_ignore_list=["transformers"]) # Test # ---------------------------------------------------------------------------------------------------- +@pytest.mark.xfail(reason="Search actor isn't very date aware nor capable of formatting") @pytest.mark.chatquality @freeze_time("1984-04-02") def test_extract_question_with_date_filter_from_relative_day(loaded_model): @@ -54,7 +55,7 @@ def test_extract_question_with_date_filter_from_relative_day(loaded_model): # ---------------------------------------------------------------------------------------------------- -@pytest.mark.xfail(reason="Chat actor still isn't very date aware nor capable of formatting") +@pytest.mark.xfail(reason="Search actor still isn't very date aware nor capable of formatting") @pytest.mark.chatquality @freeze_time("1984-04-02") def test_extract_question_with_date_filter_from_relative_month(loaded_model): @@ -168,7 +169,6 @@ def test_generate_search_query_using_question_from_chat_history(loaded_model): # ---------------------------------------------------------------------------------------------------- -# @pytest.mark.xfail(reason="Chat actor does not consistently follow template instructions.") @pytest.mark.chatquality def test_generate_search_query_using_answer_from_chat_history(loaded_model): # Arrange @@ -198,7 +198,7 @@ def test_generate_search_query_using_answer_from_chat_history(loaded_model): # ---------------------------------------------------------------------------------------------------- -@pytest.mark.xfail(reason="Chat actor is not sufficiently date-aware") +@pytest.mark.xfail(reason="Search actor unable to create date filter using chat history and notes as context") @pytest.mark.chatquality def test_generate_search_query_with_date_and_context_from_chat_history(loaded_model): # Arrange @@ -239,7 +239,7 @@ def test_chat_with_no_chat_history_or_retrieved_content(loaded_model): response = "".join([response_chunk for response_chunk in response_gen]) # Assert - expected_responses = ["Khoj", "khoj", "khooj", "Khooj", "KHOJ"] + expected_responses = ["Khoj", "khoj", "KHOJ"] assert len(response) > 0 assert any([expected_response in response for expected_response in expected_responses]), ( "Expected assistants name, [K|k]hoj, in response but got: " + response @@ -426,6 +426,7 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content(loaded # ---------------------------------------------------------------------------------------------------- +@pytest.mark.xfail(reason="Chat actor doesn't ask clarifying questions when context is insufficient") @pytest.mark.chatquality def test_ask_for_clarification_if_not_enough_context_in_question(loaded_model): "Chat actor should ask for clarification if question cannot be answered unambiguously with the provided context" diff --git a/tests/test_gpt4all_chat_director.py b/tests/test_gpt4all_chat_director.py index fe009708..d7386405 100644 --- a/tests/test_gpt4all_chat_director.py +++ b/tests/test_gpt4all_chat_director.py @@ -36,6 +36,7 @@ def populate_chat_history(message_list): # Tests # ---------------------------------------------------------------------------------------------------- +@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet") @pytest.mark.chatquality def test_chat_with_no_chat_history_or_retrieved_content_gpt4all(client_offline_chat): # Act @@ -73,6 +74,7 @@ def test_answer_from_chat_history(client_offline_chat): # ---------------------------------------------------------------------------------------------------- +@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet") @pytest.mark.chatquality def test_answer_from_currently_retrieved_content(client_offline_chat): # Arrange @@ -145,6 +147,7 @@ def test_answer_from_chat_history_and_currently_retrieved_content(client_offline # ---------------------------------------------------------------------------------------------------- +@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet") @pytest.mark.chatquality def test_no_answer_in_chat_history_or_retrieved_content(client_offline_chat): "Chat director should say don't know as not enough contexts in chat history or retrieved to answer question" @@ -186,6 +189,7 @@ def test_answer_requires_current_date_awareness(client_offline_chat): # ---------------------------------------------------------------------------------------------------- +@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet") @pytest.mark.chatquality @freeze_time("2023-04-01") def test_answer_requires_date_aware_aggregation_across_provided_notes(client_offline_chat): @@ -200,6 +204,7 @@ def test_answer_requires_date_aware_aggregation_across_provided_notes(client_off # ---------------------------------------------------------------------------------------------------- +@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet") @pytest.mark.chatquality def test_answer_general_question_not_in_chat_history_or_retrieved_content(client_offline_chat): # Arrange @@ -287,6 +292,7 @@ def test_answer_chat_history_very_long(client_offline_chat): # ---------------------------------------------------------------------------------------------------- +@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet") @pytest.mark.chatquality def test_answer_requires_multiple_independent_searches(client_offline_chat): "Chat director should be able to answer by doing multiple independent searches for required information"