Update local Chat Actor and Director tests expected to fail

This commit is contained in:
Debanjum Singh Solanky 2023-08-01 20:13:47 -07:00
parent c2b7a14ed5
commit 95acb1583d
2 changed files with 11 additions and 4 deletions

View file

@ -35,6 +35,7 @@ freezegun.configure(extend_ignore_list=["transformers"])
# Test # Test
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(reason="Search actor isn't very date aware nor capable of formatting")
@pytest.mark.chatquality @pytest.mark.chatquality
@freeze_time("1984-04-02") @freeze_time("1984-04-02")
def test_extract_question_with_date_filter_from_relative_day(loaded_model): def test_extract_question_with_date_filter_from_relative_day(loaded_model):
@ -54,7 +55,7 @@ def test_extract_question_with_date_filter_from_relative_day(loaded_model):
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(reason="Chat actor still isn't very date aware nor capable of formatting") @pytest.mark.xfail(reason="Search actor still isn't very date aware nor capable of formatting")
@pytest.mark.chatquality @pytest.mark.chatquality
@freeze_time("1984-04-02") @freeze_time("1984-04-02")
def test_extract_question_with_date_filter_from_relative_month(loaded_model): def test_extract_question_with_date_filter_from_relative_month(loaded_model):
@ -168,7 +169,6 @@ def test_generate_search_query_using_question_from_chat_history(loaded_model):
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------
# @pytest.mark.xfail(reason="Chat actor does not consistently follow template instructions.")
@pytest.mark.chatquality @pytest.mark.chatquality
def test_generate_search_query_using_answer_from_chat_history(loaded_model): def test_generate_search_query_using_answer_from_chat_history(loaded_model):
# Arrange # Arrange
@ -198,7 +198,7 @@ def test_generate_search_query_using_answer_from_chat_history(loaded_model):
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(reason="Chat actor is not sufficiently date-aware") @pytest.mark.xfail(reason="Search actor unable to create date filter using chat history and notes as context")
@pytest.mark.chatquality @pytest.mark.chatquality
def test_generate_search_query_with_date_and_context_from_chat_history(loaded_model): def test_generate_search_query_with_date_and_context_from_chat_history(loaded_model):
# Arrange # Arrange
@ -239,7 +239,7 @@ def test_chat_with_no_chat_history_or_retrieved_content(loaded_model):
response = "".join([response_chunk for response_chunk in response_gen]) response = "".join([response_chunk for response_chunk in response_gen])
# Assert # Assert
expected_responses = ["Khoj", "khoj", "khooj", "Khooj", "KHOJ"] expected_responses = ["Khoj", "khoj", "KHOJ"]
assert len(response) > 0 assert len(response) > 0
assert any([expected_response in response for expected_response in expected_responses]), ( assert any([expected_response in response for expected_response in expected_responses]), (
"Expected assistants name, [K|k]hoj, in response but got: " + response "Expected assistants name, [K|k]hoj, in response but got: " + response
@ -426,6 +426,7 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content(loaded
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(reason="Chat actor doesn't ask clarifying questions when context is insufficient")
@pytest.mark.chatquality @pytest.mark.chatquality
def test_ask_for_clarification_if_not_enough_context_in_question(loaded_model): def test_ask_for_clarification_if_not_enough_context_in_question(loaded_model):
"Chat actor should ask for clarification if question cannot be answered unambiguously with the provided context" "Chat actor should ask for clarification if question cannot be answered unambiguously with the provided context"

View file

@ -36,6 +36,7 @@ def populate_chat_history(message_list):
# Tests # Tests
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
@pytest.mark.chatquality @pytest.mark.chatquality
def test_chat_with_no_chat_history_or_retrieved_content_gpt4all(client_offline_chat): def test_chat_with_no_chat_history_or_retrieved_content_gpt4all(client_offline_chat):
# Act # Act
@ -73,6 +74,7 @@ def test_answer_from_chat_history(client_offline_chat):
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
@pytest.mark.chatquality @pytest.mark.chatquality
def test_answer_from_currently_retrieved_content(client_offline_chat): def test_answer_from_currently_retrieved_content(client_offline_chat):
# Arrange # Arrange
@ -145,6 +147,7 @@ def test_answer_from_chat_history_and_currently_retrieved_content(client_offline
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
@pytest.mark.chatquality @pytest.mark.chatquality
def test_no_answer_in_chat_history_or_retrieved_content(client_offline_chat): def test_no_answer_in_chat_history_or_retrieved_content(client_offline_chat):
"Chat director should say don't know as not enough contexts in chat history or retrieved to answer question" "Chat director should say don't know as not enough contexts in chat history or retrieved to answer question"
@ -186,6 +189,7 @@ def test_answer_requires_current_date_awareness(client_offline_chat):
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
@pytest.mark.chatquality @pytest.mark.chatquality
@freeze_time("2023-04-01") @freeze_time("2023-04-01")
def test_answer_requires_date_aware_aggregation_across_provided_notes(client_offline_chat): def test_answer_requires_date_aware_aggregation_across_provided_notes(client_offline_chat):
@ -200,6 +204,7 @@ def test_answer_requires_date_aware_aggregation_across_provided_notes(client_off
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
@pytest.mark.chatquality @pytest.mark.chatquality
def test_answer_general_question_not_in_chat_history_or_retrieved_content(client_offline_chat): def test_answer_general_question_not_in_chat_history_or_retrieved_content(client_offline_chat):
# Arrange # Arrange
@ -287,6 +292,7 @@ def test_answer_chat_history_very_long(client_offline_chat):
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
@pytest.mark.chatquality @pytest.mark.chatquality
def test_answer_requires_multiple_independent_searches(client_offline_chat): def test_answer_requires_multiple_independent_searches(client_offline_chat):
"Chat director should be able to answer by doing multiple independent searches for required information" "Chat director should be able to answer by doing multiple independent searches for required information"