mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Update local Chat Actor and Director tests expected to fail
This commit is contained in:
parent
c2b7a14ed5
commit
95acb1583d
2 changed files with 11 additions and 4 deletions
|
@ -35,6 +35,7 @@ freezegun.configure(extend_ignore_list=["transformers"])
|
||||||
|
|
||||||
# Test
|
# Test
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@pytest.mark.xfail(reason="Search actor isn't very date aware nor capable of formatting")
|
||||||
@pytest.mark.chatquality
|
@pytest.mark.chatquality
|
||||||
@freeze_time("1984-04-02")
|
@freeze_time("1984-04-02")
|
||||||
def test_extract_question_with_date_filter_from_relative_day(loaded_model):
|
def test_extract_question_with_date_filter_from_relative_day(loaded_model):
|
||||||
|
@ -54,7 +55,7 @@ def test_extract_question_with_date_filter_from_relative_day(loaded_model):
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
@pytest.mark.xfail(reason="Chat actor still isn't very date aware nor capable of formatting")
|
@pytest.mark.xfail(reason="Search actor still isn't very date aware nor capable of formatting")
|
||||||
@pytest.mark.chatquality
|
@pytest.mark.chatquality
|
||||||
@freeze_time("1984-04-02")
|
@freeze_time("1984-04-02")
|
||||||
def test_extract_question_with_date_filter_from_relative_month(loaded_model):
|
def test_extract_question_with_date_filter_from_relative_month(loaded_model):
|
||||||
|
@ -168,7 +169,6 @@ def test_generate_search_query_using_question_from_chat_history(loaded_model):
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
# @pytest.mark.xfail(reason="Chat actor does not consistently follow template instructions.")
|
|
||||||
@pytest.mark.chatquality
|
@pytest.mark.chatquality
|
||||||
def test_generate_search_query_using_answer_from_chat_history(loaded_model):
|
def test_generate_search_query_using_answer_from_chat_history(loaded_model):
|
||||||
# Arrange
|
# Arrange
|
||||||
|
@ -198,7 +198,7 @@ def test_generate_search_query_using_answer_from_chat_history(loaded_model):
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
@pytest.mark.xfail(reason="Chat actor is not sufficiently date-aware")
|
@pytest.mark.xfail(reason="Search actor unable to create date filter using chat history and notes as context")
|
||||||
@pytest.mark.chatquality
|
@pytest.mark.chatquality
|
||||||
def test_generate_search_query_with_date_and_context_from_chat_history(loaded_model):
|
def test_generate_search_query_with_date_and_context_from_chat_history(loaded_model):
|
||||||
# Arrange
|
# Arrange
|
||||||
|
@ -239,7 +239,7 @@ def test_chat_with_no_chat_history_or_retrieved_content(loaded_model):
|
||||||
response = "".join([response_chunk for response_chunk in response_gen])
|
response = "".join([response_chunk for response_chunk in response_gen])
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
expected_responses = ["Khoj", "khoj", "khooj", "Khooj", "KHOJ"]
|
expected_responses = ["Khoj", "khoj", "KHOJ"]
|
||||||
assert len(response) > 0
|
assert len(response) > 0
|
||||||
assert any([expected_response in response for expected_response in expected_responses]), (
|
assert any([expected_response in response for expected_response in expected_responses]), (
|
||||||
"Expected assistants name, [K|k]hoj, in response but got: " + response
|
"Expected assistants name, [K|k]hoj, in response but got: " + response
|
||||||
|
@ -426,6 +426,7 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content(loaded
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@pytest.mark.xfail(reason="Chat actor doesn't ask clarifying questions when context is insufficient")
|
||||||
@pytest.mark.chatquality
|
@pytest.mark.chatquality
|
||||||
def test_ask_for_clarification_if_not_enough_context_in_question(loaded_model):
|
def test_ask_for_clarification_if_not_enough_context_in_question(loaded_model):
|
||||||
"Chat actor should ask for clarification if question cannot be answered unambiguously with the provided context"
|
"Chat actor should ask for clarification if question cannot be answered unambiguously with the provided context"
|
||||||
|
|
|
@ -36,6 +36,7 @@ def populate_chat_history(message_list):
|
||||||
|
|
||||||
# Tests
|
# Tests
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
|
||||||
@pytest.mark.chatquality
|
@pytest.mark.chatquality
|
||||||
def test_chat_with_no_chat_history_or_retrieved_content_gpt4all(client_offline_chat):
|
def test_chat_with_no_chat_history_or_retrieved_content_gpt4all(client_offline_chat):
|
||||||
# Act
|
# Act
|
||||||
|
@ -73,6 +74,7 @@ def test_answer_from_chat_history(client_offline_chat):
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
|
||||||
@pytest.mark.chatquality
|
@pytest.mark.chatquality
|
||||||
def test_answer_from_currently_retrieved_content(client_offline_chat):
|
def test_answer_from_currently_retrieved_content(client_offline_chat):
|
||||||
# Arrange
|
# Arrange
|
||||||
|
@ -145,6 +147,7 @@ def test_answer_from_chat_history_and_currently_retrieved_content(client_offline
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
|
||||||
@pytest.mark.chatquality
|
@pytest.mark.chatquality
|
||||||
def test_no_answer_in_chat_history_or_retrieved_content(client_offline_chat):
|
def test_no_answer_in_chat_history_or_retrieved_content(client_offline_chat):
|
||||||
"Chat director should say don't know as not enough contexts in chat history or retrieved to answer question"
|
"Chat director should say don't know as not enough contexts in chat history or retrieved to answer question"
|
||||||
|
@ -186,6 +189,7 @@ def test_answer_requires_current_date_awareness(client_offline_chat):
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
|
||||||
@pytest.mark.chatquality
|
@pytest.mark.chatquality
|
||||||
@freeze_time("2023-04-01")
|
@freeze_time("2023-04-01")
|
||||||
def test_answer_requires_date_aware_aggregation_across_provided_notes(client_offline_chat):
|
def test_answer_requires_date_aware_aggregation_across_provided_notes(client_offline_chat):
|
||||||
|
@ -200,6 +204,7 @@ def test_answer_requires_date_aware_aggregation_across_provided_notes(client_off
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
|
||||||
@pytest.mark.chatquality
|
@pytest.mark.chatquality
|
||||||
def test_answer_general_question_not_in_chat_history_or_retrieved_content(client_offline_chat):
|
def test_answer_general_question_not_in_chat_history_or_retrieved_content(client_offline_chat):
|
||||||
# Arrange
|
# Arrange
|
||||||
|
@ -287,6 +292,7 @@ def test_answer_chat_history_very_long(client_offline_chat):
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
|
||||||
@pytest.mark.chatquality
|
@pytest.mark.chatquality
|
||||||
def test_answer_requires_multiple_independent_searches(client_offline_chat):
|
def test_answer_requires_multiple_independent_searches(client_offline_chat):
|
||||||
"Chat director should be able to answer by doing multiple independent searches for required information"
|
"Chat director should be able to answer by doing multiple independent searches for required information"
|
||||||
|
|
Loading…
Reference in a new issue