From 7ac241b766b22e95da8087d17573dab8d5863431 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 22 Oct 2024 00:34:49 -0700 Subject: [PATCH 01/10] Improve format of notes, online context passed to chat models in prompt Improve separation of note snippets and show its origin file in notes prompt to have more readable, contextualized text shared with model. Previously the references dict was being directly passed as a string. The documents don't look well formatted and are less intelligible. - Passing file path along with notes snippets will help contextualize the notes better. - Better formatting should help with making notes more readable by the chat model. --- src/khoj/processor/conversation/anthropic/anthropic_chat.py | 3 +-- src/khoj/processor/conversation/google/gemini_chat.py | 3 +-- src/khoj/processor/conversation/openai/gpt.py | 3 +-- src/khoj/processor/conversation/prompts.py | 4 ++++ src/khoj/processor/conversation/utils.py | 6 +++++- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/khoj/processor/conversation/anthropic/anthropic_chat.py b/src/khoj/processor/conversation/anthropic/anthropic_chat.py index cb51abb4..5fb900c9 100644 --- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py +++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py @@ -142,9 +142,8 @@ def converse_anthropic( """ # Initialize Variables current_date = datetime.now() - compiled_references = "\n\n".join({f"# {item}" for item in references}) - conversation_primer = prompts.query_prompt.format(query=user_query) + compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references}) if agent and agent.personality: system_prompt = prompts.custom_personality.format( diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py index 7359b3eb..f7cfad31 100644 --- a/src/khoj/processor/conversation/google/gemini_chat.py +++ b/src/khoj/processor/conversation/google/gemini_chat.py @@ -139,9 +139,8 @@ def converse_gemini( """ # Initialize Variables current_date = datetime.now() - compiled_references = "\n\n".join({f"# {item}" for item in references}) - conversation_primer = prompts.query_prompt.format(query=user_query) + compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references}) if agent and agent.personality: system_prompt = prompts.custom_personality.format( diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py index ad02b10e..293bdacd 100644 --- a/src/khoj/processor/conversation/openai/gpt.py +++ b/src/khoj/processor/conversation/openai/gpt.py @@ -143,9 +143,8 @@ def converse( """ # Initialize Variables current_date = datetime.now() - compiled_references = "\n\n".join({f"# {item['compiled']}" for item in references}) - conversation_primer = prompts.query_prompt.format(query=user_query) + compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references}) if agent and agent.personality: system_prompt = prompts.custom_personality.format( diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index ad164c8d..fb6a105b 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -118,6 +118,7 @@ Use my personal notes and our past conversations to inform your response. Ask crisp follow-up questions to get additional context, when a helpful response cannot be provided from the provided notes or past conversations. User's Notes: +----- {references} """.strip() ) @@ -127,6 +128,7 @@ notes_conversation_offline = PromptTemplate.from_template( Use my personal notes and our past conversations to inform your response. User's Notes: +----- {references} """.strip() ) @@ -184,6 +186,7 @@ Use this up-to-date information from the internet to inform your response. Ask crisp follow-up questions to get additional context, when a helpful response cannot be provided from the online data or past conversations. Information from the internet: +----- {online_results} """.strip() ) @@ -193,6 +196,7 @@ online_search_conversation_offline = PromptTemplate.from_template( Use this up-to-date information from the internet to inform your response. Information from the internet: +----- {online_results} """.strip() ) diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index e841c484..56e9e9db 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -178,7 +178,11 @@ def generate_chatml_messages_with_context( # Extract Chat History for Context chatml_messages: List[ChatMessage] = [] for chat in conversation_log.get("chat", []): - message_notes = f'\n\n Notes:\n{chat.get("context")}' if chat.get("context") else "\n" + references = "\n\n".join( + {f"# File: {item['file']}\n## {item['compiled']}\n" for item in chat.get("context") or []} + ) + message_notes = f"\n\n Notes:\n{references}" if chat.get("context") else "\n" + role = "user" if chat["by"] == "you" else "assistant" message_content = chat["message"] + message_notes From 0c52a1169a3c9691f8aeac0fd73e1592975a7df4 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 22 Oct 2024 01:06:00 -0700 Subject: [PATCH 02/10] Put context into separate user message before sending to chat model The document, online search context are now passed as separate user messages to chat model, instead of being added to the final user message. This will improve - Models ability to differentiate data from user query. That should improve response quality and reduce prompt injection probability - Make truncation logic simpler and more robust When context window hit, can simply pop messages to auto truncate context in order of context, user, assistant message for each conversation turn in history until reach current user query The complex, brittle logic to extract user query from context in last user message isn't required. Marking the context message with assistant role doesn't translate well across chat models. E.g - Gemini can't handle consecutive messages by role = model well - Claude will merge consecutive messages by same role. In current message ordering the context message will result get merged into the previous assistant response. And if move context message after user query. The truncation logic will have to hop and skip while doing deletions - GPT seems to handle consecutive roles of any type fine Using context role = user generalizes better across chat models for now and aligns with previous behavior. --- .../conversation/anthropic/anthropic_chat.py | 13 ++++----- .../conversation/google/gemini_chat.py | 13 ++++----- src/khoj/processor/conversation/openai/gpt.py | 13 ++++----- src/khoj/processor/conversation/utils.py | 29 ++++++++++++------- 4 files changed, 36 insertions(+), 32 deletions(-) diff --git a/src/khoj/processor/conversation/anthropic/anthropic_chat.py b/src/khoj/processor/conversation/anthropic/anthropic_chat.py index 5fb900c9..826f0fa4 100644 --- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py +++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py @@ -142,7 +142,6 @@ def converse_anthropic( """ # Initialize Variables current_date = datetime.now() - conversation_primer = prompts.query_prompt.format(query=user_query) compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references}) if agent and agent.personality: @@ -174,16 +173,16 @@ def converse_anthropic( completion_func(chat_response=prompts.no_online_results_found.format()) return iter([prompts.no_online_results_found.format()]) - if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands: - conversation_primer = ( - f"{prompts.online_search_conversation.format(online_results=str(online_results))}\n{conversation_primer}" - ) + context_message = "" if not is_none_or_empty(compiled_references): - conversation_primer = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n{conversation_primer}" + context_message = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n" + if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands: + context_message += f"{prompts.online_search_conversation.format(online_results=str(online_results))}" # Setup Prompt with Primer or Conversation History messages = generate_chatml_messages_with_context( - conversation_primer, + user_query, + context_message=context_message, conversation_log=conversation_log, model_name=model, max_prompt_size=max_prompt_size, diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py index f7cfad31..4221aeb3 100644 --- a/src/khoj/processor/conversation/google/gemini_chat.py +++ b/src/khoj/processor/conversation/google/gemini_chat.py @@ -139,7 +139,6 @@ def converse_gemini( """ # Initialize Variables current_date = datetime.now() - conversation_primer = prompts.query_prompt.format(query=user_query) compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references}) if agent and agent.personality: @@ -172,16 +171,16 @@ def converse_gemini( completion_func(chat_response=prompts.no_online_results_found.format()) return iter([prompts.no_online_results_found.format()]) - if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands: - conversation_primer = ( - f"{prompts.online_search_conversation.format(online_results=str(online_results))}\n{conversation_primer}" - ) + context_message = "" if not is_none_or_empty(compiled_references): - conversation_primer = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n{conversation_primer}" + context_message = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n" + if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands: + context_message += f"{prompts.online_search_conversation.format(online_results=str(online_results))}" # Setup Prompt with Primer or Conversation History messages = generate_chatml_messages_with_context( - conversation_primer, + user_query, + context_message=context_message, conversation_log=conversation_log, model_name=model, max_prompt_size=max_prompt_size, diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py index 293bdacd..2f5045c2 100644 --- a/src/khoj/processor/conversation/openai/gpt.py +++ b/src/khoj/processor/conversation/openai/gpt.py @@ -143,7 +143,6 @@ def converse( """ # Initialize Variables current_date = datetime.now() - conversation_primer = prompts.query_prompt.format(query=user_query) compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references}) if agent and agent.personality: @@ -175,18 +174,18 @@ def converse( completion_func(chat_response=prompts.no_online_results_found.format()) return iter([prompts.no_online_results_found.format()]) - if not is_none_or_empty(online_results): - conversation_primer = ( - f"{prompts.online_search_conversation.format(online_results=str(online_results))}\n{conversation_primer}" - ) + context_message = "" if not is_none_or_empty(compiled_references): - conversation_primer = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n{conversation_primer}" + context_message = f"{prompts.notes_conversation.format(references=compiled_references)}\n\n" + if not is_none_or_empty(online_results): + context_message += f"{prompts.online_search_conversation.format(online_results=str(online_results))}" # Setup Prompt with Primer or Conversation History messages = generate_chatml_messages_with_context( - conversation_primer, + user_query, system_prompt, conversation_log, + context_message=context_message, model_name=model, max_prompt_size=max_prompt_size, tokenizer_name=tokenizer_name, diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 56e9e9db..75f17963 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -12,6 +12,7 @@ from transformers import AutoTokenizer from khoj.database.adapters import ConversationAdapters from khoj.database.models import ChatModelOptions, ClientApplication, KhojUser +from khoj.processor.conversation import prompts from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens from khoj.utils import state from khoj.utils.helpers import is_none_or_empty, merge_dicts @@ -163,6 +164,7 @@ def generate_chatml_messages_with_context( uploaded_image_url=None, vision_enabled=False, model_type="", + context_message="", ): """Generate messages for ChatGPT with context from previous conversation""" # Set max prompt size from user config or based on pre-configured for model and machine specs @@ -178,24 +180,22 @@ def generate_chatml_messages_with_context( # Extract Chat History for Context chatml_messages: List[ChatMessage] = [] for chat in conversation_log.get("chat", []): - references = "\n\n".join( - {f"# File: {item['file']}\n## {item['compiled']}\n" for item in chat.get("context") or []} - ) - message_notes = f"\n\n Notes:\n{references}" if chat.get("context") else "\n" + if not is_none_or_empty(chat.get("context")): + references = "\n\n".join( + {f"# File: {item['file']}\n## {item['compiled']}\n" for item in chat.get("context") or []} + ) + message_context = f"{prompts.notes_conversation.format(references=references)}\n\n" + reconstructed_context_message = ChatMessage(content=message_context, role="context") + chatml_messages.insert(0, reconstructed_context_message) role = "user" if chat["by"] == "you" else "assistant" - - message_content = chat["message"] + message_notes - message_content = construct_structured_message( - message_content, chat.get("uploadedImageData"), model_type, vision_enabled + chat["message"], chat.get("uploadedImageData"), model_type, vision_enabled ) - reconstructed_message = ChatMessage(content=message_content, role=role) - chatml_messages.insert(0, reconstructed_message) - if len(chatml_messages) >= 2 * lookback_turns: + if len(chatml_messages) >= 3 * lookback_turns: break messages = [] @@ -206,6 +206,8 @@ def generate_chatml_messages_with_context( role="user", ) ) + if not is_none_or_empty(context_message): + messages.append(ChatMessage(content=context_message, role="context")) if len(chatml_messages) > 0: messages += chatml_messages if not is_none_or_empty(system_message): @@ -214,6 +216,11 @@ def generate_chatml_messages_with_context( # Truncate oldest messages from conversation history until under max supported prompt size by model messages = truncate_messages(messages, max_prompt_size, model_name, loaded_model, tokenizer_name) + # Reset context message role to assistant + for message in messages: + if message.role == "context": + message.role = "user" + # Return message in chronological order return messages[::-1] From 0847fb010247ef44dbfb6c7dcf81430c98cc5381 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 22 Oct 2024 02:32:34 -0700 Subject: [PATCH 03/10] Pass online context from chat history to chat model for response Previously only notes context from chat history was included. This change includes online context from chat history for model to use for response generation. This can reduce need for online lookups by reusing previous online context for faster responses. But will increase overall response time when not reusing past online context, as faster context buildup per conversation. Unsure if inclusion of context is preferrable. If not, both notes and online context should be removed. --- src/khoj/processor/conversation/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 75f17963..3aee61c9 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -180,11 +180,15 @@ def generate_chatml_messages_with_context( # Extract Chat History for Context chatml_messages: List[ChatMessage] = [] for chat in conversation_log.get("chat", []): + message_context = "" if not is_none_or_empty(chat.get("context")): references = "\n\n".join( {f"# File: {item['file']}\n## {item['compiled']}\n" for item in chat.get("context") or []} ) message_context = f"{prompts.notes_conversation.format(references=references)}\n\n" + if not is_none_or_empty(chat.get("onlineContext")): + message_context += f"{prompts.online_search_conversation.format(online_results=chat.get('onlineContext'))}" + if not is_none_or_empty(chat.get("context")) or not is_none_or_empty(chat.get("onlineContext")): reconstructed_context_message = ChatMessage(content=message_context, role="context") chatml_messages.insert(0, reconstructed_context_message) From 39a613d3bcef85522f99c8625203a1feb40bb062 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 22 Oct 2024 02:58:34 -0700 Subject: [PATCH 04/10] Fix up openai chat actor tests --- tests/test_openai_chat_actors.py | 23 ++++++++++++++--------- tests/test_openai_chat_director.py | 16 ++++++++-------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/tests/test_openai_chat_actors.py b/tests/test_openai_chat_actors.py index fc253b50..b2ae2d34 100644 --- a/tests/test_openai_chat_actors.py +++ b/tests/test_openai_chat_actors.py @@ -214,7 +214,7 @@ def test_answer_from_chat_history_and_previously_retrieved_content(): ( "When was I born?", "You were born on 1st April 1984.", - ["Testatron was born on 1st April 1984 in Testville."], + [{"compiled": "Testatron was born on 1st April 1984 in Testville.", "file": "birth.org"}], ), ] @@ -415,15 +415,18 @@ def test_ask_for_clarification_if_not_enough_context_in_question(): context = [ { "compiled": f"""# Ramya -My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani.""" +My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani.""", + "file": "Family.md", }, { "compiled": f"""# Fang -My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li.""" +My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li.""", + "file": "Family.md", }, { "compiled": f"""# Aiyla -My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet.""" +My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet.""", + "file": "Family.md", }, ] @@ -608,9 +611,11 @@ async def test_infer_webpage_urls_actor_extracts_correct_links(chat_client, defa ), ], ) -async def test_infer_task_scheduling_request(chat_client, user_query, expected_crontime, expected_qs, unexpected_qs): +async def test_infer_task_scheduling_request( + chat_client, user_query, expected_crontime, expected_qs, unexpected_qs, default_user2 +): # Act - crontime, inferred_query, _ = await schedule_query(user_query, {}) + crontime, inferred_query, _ = await schedule_query(user_query, {}, default_user2) inferred_query = inferred_query.lower() # Assert @@ -630,7 +635,7 @@ async def test_infer_task_scheduling_request(chat_client, user_query, expected_c "scheduling_query, executing_query, generated_response, expected_should_notify", [ ( - "Notify me if it is going to rain tomorrow?", + "Notify me only if it is going to rain tomorrow?", "What's the weather forecast for tomorrow?", "It is sunny and warm tomorrow.", False, @@ -656,10 +661,10 @@ async def test_infer_task_scheduling_request(chat_client, user_query, expected_c ], ) def test_decision_on_when_to_notify_scheduled_task_results( - chat_client, scheduling_query, executing_query, generated_response, expected_should_notify + chat_client, default_user2, scheduling_query, executing_query, generated_response, expected_should_notify ): # Act - generated_should_notify = should_notify(scheduling_query, executing_query, generated_response) + generated_should_notify = should_notify(scheduling_query, executing_query, generated_response, default_user2) # Assert assert generated_should_notify == expected_should_notify diff --git a/tests/test_openai_chat_director.py b/tests/test_openai_chat_director.py index 279d6e37..7d460408 100644 --- a/tests/test_openai_chat_director.py +++ b/tests/test_openai_chat_director.py @@ -307,7 +307,7 @@ def test_summarize_one_file(chat_client, default_user2: KhojUser): json={"filename": summarization_file, "conversation_id": str(conversation.id)}, ) query = "/summarize" - response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation.id}) + response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)}) response_message = response.json()["response"] # Assert assert response_message != "" @@ -339,7 +339,7 @@ def test_summarize_extra_text(chat_client, default_user2: KhojUser): json={"filename": summarization_file, "conversation_id": str(conversation.id)}, ) query = "/summarize tell me about Xiu" - response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation.id}) + response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)}) response_message = response.json()["response"] # Assert assert response_message != "" @@ -367,7 +367,7 @@ def test_summarize_multiple_files(chat_client, default_user2: KhojUser): ) query = "/summarize" - response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation.id}) + response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)}) response_message = response.json()["response"] # Assert @@ -383,7 +383,7 @@ def test_summarize_no_files(chat_client, default_user2: KhojUser): # Act query = "/summarize" - response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation.id}) + response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)}) response_message = response.json()["response"] # Assert @@ -418,11 +418,11 @@ def test_summarize_different_conversation(chat_client, default_user2: KhojUser): # Act query = "/summarize" - response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation2.id}) + response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation2.id)}) response_message_conv2 = response.json()["response"] # now make sure that the file filter is still in conversation 1 - response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation1.id}) + response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation1.id)}) response_message_conv1 = response.json()["response"] # Assert @@ -449,7 +449,7 @@ def test_summarize_nonexistant_file(chat_client, default_user2: KhojUser): json={"filename": "imaginary.markdown", "conversation_id": str(conversation.id)}, ) query = urllib.parse.quote("/summarize") - response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation.id}) + response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)}) response_message = response.json()["response"] # Assert assert response_message == "No files selected for summarization. Please add files using the section on the left." @@ -481,7 +481,7 @@ def test_summarize_diff_user_file(chat_client, default_user: KhojUser, pdf_confi # Act query = "/summarize" - response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation.id}) + response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)}) response_message = response.json()["response"] # Assert From 3b978b9b6765fee055b80da0631924e0173dddd2 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 23 Oct 2024 03:40:01 -0700 Subject: [PATCH 05/10] Fix chat history construction when generating chatml msgs with context --- src/khoj/processor/conversation/utils.py | 2 +- src/khoj/routers/api_chat.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index bc5b2c2e..1ccc7594 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -203,7 +203,7 @@ def generate_chatml_messages_with_context( chatml_messages.insert(0, reconstructed_context_message) role = "user" if chat["by"] == "you" else "assistant" - message_content = construct_structured_message(message_content, chat.get("images"), model_type, vision_enabled) + message_content = construct_structured_message(chat["message"], chat.get("images"), model_type, vision_enabled) reconstructed_message = ChatMessage(content=message_content, role=role) chatml_messages.insert(0, reconstructed_message) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index 09ea9eea..3cc541b1 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -112,7 +112,7 @@ def add_files_filter(request: Request, filter: FilesFilterRequest): file_filters = ConversationAdapters.add_files_to_filter(request.user.object, conversation_id, files_filter) return Response(content=json.dumps(file_filters), media_type="application/json", status_code=200) except Exception as e: - logger.error(f"Error adding file filter {filter.filename}: {e}", exc_info=True) + logger.error(f"Error adding file filter {filter.filenames}: {e}", exc_info=True) raise HTTPException(status_code=422, detail=str(e)) From a691ce4aa68225f1c300a50938ea00300d810aa3 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Sun, 27 Oct 2024 20:43:41 -0700 Subject: [PATCH 06/10] Batch entries into smaller groups to process --- .../commands/change_default_model.py | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/khoj/database/management/commands/change_default_model.py b/src/khoj/database/management/commands/change_default_model.py index cfa78581..d9a6359f 100644 --- a/src/khoj/database/management/commands/change_default_model.py +++ b/src/khoj/database/management/commands/change_default_model.py @@ -19,6 +19,8 @@ from khoj.processor.embeddings import EmbeddingsModel logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) +BATCH_SIZE = 1000 # Define an appropriate batch size + class Command(BaseCommand): help = "Convert all existing Entry objects to use a new default Search model." @@ -42,22 +44,24 @@ class Command(BaseCommand): def handle(self, *args, **options): @transaction.atomic def regenerate_entries(entry_filter: Q, embeddings_model: EmbeddingsModel, search_model: SearchModelConfig): - entries = Entry.objects.filter(entry_filter).all() - compiled_entries = [entry.compiled for entry in entries] - updated_entries: List[Entry] = [] - try: - embeddings = embeddings_model.embed_documents(compiled_entries) + total_entries = Entry.objects.filter(entry_filter).count() + for start in tqdm(range(0, total_entries, BATCH_SIZE)): + end = start + BATCH_SIZE + entries = Entry.objects.filter(entry_filter)[start:end] + compiled_entries = [entry.compiled for entry in entries] + updated_entries: List[Entry] = [] + try: + embeddings = embeddings_model.embed_documents(compiled_entries) + except Exception as e: + logger.error(f"Error embedding documents: {e}") + return - except Exception as e: - logger.error(f"Error embedding documents: {e}") - return + for i, entry in enumerate(entries): + entry.embeddings = embeddings[i] + entry.search_model_id = search_model.id + updated_entries.append(entry) - for i, entry in enumerate(tqdm(entries)): - entry.embeddings = embeddings[i] - entry.search_model_id = search_model.id - updated_entries.append(entry) - - Entry.objects.bulk_update(updated_entries, ["embeddings", "search_model_id", "file_path"]) + Entry.objects.bulk_update(updated_entries, ["embeddings", "search_model_id", "file_path"]) search_model_config_id = options.get("search_model_id") apply = options.get("apply") From 4e39088f5b312475d9dbc58f1fd7c1d07408c10f Mon Sep 17 00:00:00 2001 From: Debanjum Date: Sun, 27 Oct 2024 23:02:27 -0700 Subject: [PATCH 07/10] Make agent name in home page carousel not text wrap on mobile --- src/interface/web/app/page.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/interface/web/app/page.tsx b/src/interface/web/app/page.tsx index b570e86c..eafd5373 100644 --- a/src/interface/web/app/page.tsx +++ b/src/interface/web/app/page.tsx @@ -361,7 +361,7 @@ function ChatBodyData(props: ChatBodyDataProps) { className={`${selectedAgent === agents[index].slug ? convertColorToBorderClass(agents[index].color) : "border-muted text-muted-foreground"} hover:cursor-pointer`} > openAgentEditCard(agents[index].slug) } From ee0789eb3dfa80060c21fb11b8ffa3d0390581c4 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Sun, 27 Oct 2024 23:55:47 -0700 Subject: [PATCH 08/10] Mark context messages with user role as context role isn't being used Context role was added to allow change message truncation order based on context role as well. Revert it for now since currently this is not currently being done. --- src/khoj/processor/conversation/utils.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 1ccc7594..9946b4e9 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -174,7 +174,7 @@ def generate_chatml_messages_with_context( model_type="", context_message="", ): - """Generate messages for ChatGPT with context from previous conversation""" + """Generate chat messages with appropriate context from previous conversation to send to the chat model""" # Set max prompt size from user config or based on pre-configured for model and machine specs if not max_prompt_size: if loaded_model: @@ -199,7 +199,7 @@ def generate_chatml_messages_with_context( if not is_none_or_empty(chat.get("onlineContext")): message_context += f"{prompts.online_search_conversation.format(online_results=chat.get('onlineContext'))}" if not is_none_or_empty(message_context): - reconstructed_context_message = ChatMessage(content=message_context, role="context") + reconstructed_context_message = ChatMessage(content=message_context, role="user") chatml_messages.insert(0, reconstructed_context_message) role = "user" if chat["by"] == "you" else "assistant" @@ -220,7 +220,7 @@ def generate_chatml_messages_with_context( ) ) if not is_none_or_empty(context_message): - messages.append(ChatMessage(content=context_message, role="context")) + messages.append(ChatMessage(content=context_message, role="user")) if len(chatml_messages) > 0: messages += chatml_messages if not is_none_or_empty(system_message): @@ -229,11 +229,6 @@ def generate_chatml_messages_with_context( # Truncate oldest messages from conversation history until under max supported prompt size by model messages = truncate_messages(messages, max_prompt_size, model_name, loaded_model, tokenizer_name) - # Reset context message role to assistant - for message in messages: - if message.role == "context": - message.role = "user" - # Return message in chronological order return messages[::-1] From 8ddd70f3a9be25693021f778fb76be3d5082089e Mon Sep 17 00:00:00 2001 From: Debanjum Date: Mon, 28 Oct 2024 00:22:21 -0700 Subject: [PATCH 09/10] Put context into separate message before sending to offline chat model Align context passed to offline chat model with other chat models - Pass context in separate message for better separation between user query and the shared context - Pass filename in context - Add online results for webpage conversation command --- .../conversation/offline/chat_model.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/khoj/processor/conversation/offline/chat_model.py b/src/khoj/processor/conversation/offline/chat_model.py index 4eafae00..d9cbd507 100644 --- a/src/khoj/processor/conversation/offline/chat_model.py +++ b/src/khoj/processor/conversation/offline/chat_model.py @@ -153,7 +153,7 @@ def converse_offline( # Initialize Variables assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured" offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size) - compiled_references_message = "\n\n".join({f"{item['compiled']}" for item in references}) + compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references}) current_date = datetime.now() @@ -170,8 +170,6 @@ def converse_offline( day_of_week=current_date.strftime("%A"), ) - conversation_primer = prompts.query_prompt.format(query=user_query) - if location_data: location_prompt = prompts.user_location.format(location=f"{location_data}") system_prompt = f"{system_prompt}\n{location_prompt}" @@ -181,27 +179,31 @@ def converse_offline( system_prompt = f"{system_prompt}\n{user_name_prompt}" # Get Conversation Primer appropriate to Conversation Type - if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references_message): + if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references): return iter([prompts.no_notes_found.format()]) elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results): completion_func(chat_response=prompts.no_online_results_found.format()) return iter([prompts.no_online_results_found.format()]) - if ConversationCommand.Online in conversation_commands: + context_message = "" + if not is_none_or_empty(compiled_references): + context_message += f"{prompts.notes_conversation_offline.format(references=compiled_references)}\n\n" + if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands: simplified_online_results = online_results.copy() for result in online_results: if online_results[result].get("webpages"): simplified_online_results[result] = online_results[result]["webpages"] - conversation_primer = f"{prompts.online_search_conversation_offline.format(online_results=str(simplified_online_results))}\n{conversation_primer}" - if not is_none_or_empty(compiled_references_message): - conversation_primer = f"{prompts.notes_conversation_offline.format(references=compiled_references_message)}\n\n{conversation_primer}" + context_message += ( + f"{prompts.online_search_conversation_offline.format(online_results=str(simplified_online_results))}" + ) # Setup Prompt with Primer or Conversation History messages = generate_chatml_messages_with_context( - conversation_primer, + user_query, system_prompt, conversation_log, + context_message=context_message, model_name=model, loaded_model=offline_chat_model, max_prompt_size=max_prompt_size, From aad7528d1bfe99cc3d1f3c89aaa769322431efab Mon Sep 17 00:00:00 2001 From: Debanjum Date: Mon, 28 Oct 2024 01:55:24 -0700 Subject: [PATCH 10/10] Render slash commands popup below chat input text area on home page --- .../web/app/components/chatInputArea/chatInputArea.tsx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/interface/web/app/components/chatInputArea/chatInputArea.tsx b/src/interface/web/app/components/chatInputArea/chatInputArea.tsx index 7f2baf1d..92a3b3ae 100644 --- a/src/interface/web/app/components/chatInputArea/chatInputArea.tsx +++ b/src/interface/web/app/components/chatInputArea/chatInputArea.tsx @@ -367,6 +367,11 @@ export const ChatInputArea = forwardRef((pr e.preventDefault()} className={`${props.isMobileWidth ? "w-[100vw]" : "w-full"} rounded-md`} + side="top" + align="center" + /* Offset below text area on home page (i.e where conversationId is unset) */ + sideOffset={props.conversationId ? 0 : 80} + alignOffset={0} >