Use Slash Commands and Add Notes Slash Command (#463)

* Store conversation command options in an Enum * Move to slash commands instead of using @ to specify general commands * Calculate conversation command once & pass it as arg to child funcs * Add /notes command to respond using only knowledge base as context This prevents the chat model to try respond using it's general world knowledge only without any references pulled from the indexed knowledge base * Test general and notes slash commands in openai chat director tests * Update gpt4all tests to use md configuration * Add a /help tooltip * Add dynamic support for describing slash commands. Remove default and treat notes as the default type --------- Co-authored-by: sabaimran <narmiabas@gmail.com>
2024-11-23 15:38:55 +01:00 · 2023-08-26 18:11:18 -07:00 · 2023-08-26 18:11:18 -07:00 · 7919787fb7
commit 7919787fb7
parent e64357698d
14 changed files with 237 additions and 33 deletions
--- a/src/khoj/interface/web/chat.html
+++ b/src/khoj/interface/web/chat.html
@ -9,6 +9,7 @@
        <link rel="stylesheet" href="/static/assets/khoj.css">
    </head>
    <script>
+        let chatOptions = [];
        function copyProgrammaticOutput(event) {
            // Remove the first 4 characters which are the "Copy" button
            const programmaticOutput = event.target.parentNode.textContent.trim().slice(4);
@ -109,6 +110,9 @@
                    const reader = response.body.getReader();
                    const decoder = new TextDecoder();

+                    let chatTooltip = document.getElementById("chat-tooltip");
+                    chatTooltip.style.display = "none";
+
                    function readStream() {
                        reader.read().then(({ done, value }) => {
                            if (done) {
@ -159,6 +163,35 @@
            }
        }

+        function onChatInput() {
+            let chatInput = document.getElementById("chat-input");
+            if (chatInput.value === "/") {
+                let chatTooltip = document.getElementById("chat-tooltip");
+                chatTooltip.style.display = "block";
+                let helpText = "<div>";
+                for (let key in chatOptions) {
+                    helpText += "<b>/" + key + "</b>: " + chatOptions[key] + "<br>";
+                }
+                chatTooltip.innerHTML = helpText;
+            } else if (chatInput.value.startsWith("/")) {
+                const firstWord = chatInput.value.split(" ")[0];
+                if (firstWord.substring(1) in chatOptions) {
+                    // Add a div element around the text.
+                    let chatTooltip = document.getElementById("chat-tooltip");
+                    chatTooltip.style.display = "block";
+                    chatTooltip.innerHTML = "Mode: " + firstWord.substring(1);
+                } else {
+                    let chatTooltip = document.getElementById("chat-tooltip");
+                    chatTooltip.style.display = "none";
+                }
+            } else {
+                let chatTooltip = document.getElementById("chat-tooltip");
+                chatTooltip.style.display = "none";
+            }
+
+            autoResize();
+        }
+
        function autoResize() {
            const textarea = document.getElementById('chat-input');
            const scrollTop = textarea.scrollTop;
@ -196,6 +229,18 @@
                    return;
                });

+            fetch('/api/chat/options')
+                .then(response => response.json())
+                .then(data => {
+                    // Render chat options, if any
+                    if (data) {
+                        chatOptions = data;
+                    }
+                })
+                .catch(err => {
+                    return;
+                });
+
            // Fill query field with value passed in URL query parameters, if any.
            var query_via_url = new URLSearchParams(window.location.search).get("q");
            if (query_via_url) {
@ -242,7 +287,8 @@

        <!-- Chat Footer -->
        <div id="chat-footer">
-            <textarea id="chat-input" class="option" oninput="autoResize()" onkeyup=incrementalChat(event) autofocus="autofocus" placeholder="What is the meaning of life?"></textarea>
+            <div id="chat-tooltip" style="display: none;"></div>
+            <textarea id="chat-input" class="option" oninput="onChatInput()" onkeyup=incrementalChat(event) autofocus="autofocus" placeholder="What is the meaning of life?"></textarea>
        </div>
    </body>

@ -450,6 +496,10 @@
            padding: 10px;
        }

+        div#chat-tooltip {
+            text-align: left;
+        }
+
        @keyframes gradient {
            0% {
                background-position: 0% 50%;
@ -509,7 +559,7 @@
        khojBannerSubmit?.addEventListener("click", function(event) {
            event.preventDefault();
            var email = document.getElementById("khoj-banner-email").value;
-            fetch("https://lantern.khoj.dev/beta/users/", {
+            fetch("https://app.khoj.dev/beta/users/", {
                    method: "POST",
                    body: JSON.stringify({
                        email: email
--- a/src/khoj/interface/web/index.html
+++ b/src/khoj/interface/web/index.html
@ -467,7 +467,7 @@
        khojBannerSubmit?.addEventListener("click", function(event) {
            event.preventDefault();
            var email = document.getElementById("khoj-banner-email").value;
-            fetch("https://lantern.khoj.dev/beta/users/", {
+            fetch("https://app.khoj.dev/beta/users/", {
                    method: "POST",
                    body: JSON.stringify({
                        email: email
--- a/src/khoj/main.py
+++ b/src/khoj/main.py
@ -14,6 +14,7 @@ import threading
 import warnings
 from platform import system
 import webbrowser
+from importlib.metadata import version

 # Ignore non-actionable warnings
 warnings.filterwarnings("ignore", message=r"snapshot_download.py has been made private", category=FutureWarning)
@ -153,6 +154,7 @@ def set_state(args):
    state.host = args.host
    state.port = args.port
    state.demo = args.demo
+    state.khoj_version = version("khoj-assistant")


 def start_server(app, host=None, port=None, socket=None):
--- a/src/khoj/processor/conversation/gpt4all/chat_model.py
+++ b/src/khoj/processor/conversation/gpt4all/chat_model.py
@ -1,4 +1,4 @@
-from typing import Union, List
+from typing import Iterator, Union, List
 from datetime import datetime
 import logging
 from threading import Thread
@ -11,6 +11,7 @@ from khoj.processor.conversation.utils import ThreadedGenerator, generate_chatml
 from khoj.processor.conversation import prompts
 from khoj.utils.constants import empty_escape_sequences
 from khoj.utils import state
+from khoj.utils.helpers import ConversationCommand, is_none_or_empty

 logger = logging.getLogger(__name__)

@ -118,7 +119,8 @@ def converse_offline(
    model: str = "llama-2-7b-chat.ggmlv3.q4_K_S.bin",
    loaded_model: Union[GPT4All, None] = None,
    completion_func=None,
-) -> ThreadedGenerator:
+    conversation_command=ConversationCommand.Notes,
+) -> Union[ThreadedGenerator, Iterator[str]]:
    """
    Converse with user using Llama
    """
@ -127,8 +129,10 @@ def converse_offline(
    compiled_references_message = "\n\n".join({f"{item}" for item in references})

    # Get Conversation Primer appropriate to Conversation Type
-    if compiled_references_message == "":
+    if conversation_command == ConversationCommand.General:
        conversation_primer = user_query
+    elif conversation_command == ConversationCommand.Notes and is_none_or_empty(compiled_references_message):
+        return iter([prompts.no_notes_found.format()])
    else:
        conversation_primer = prompts.notes_conversation_llamav2.format(
            query=user_query, references=compiled_references_message
--- a/src/khoj/processor/conversation/openai/gpt.py
+++ b/src/khoj/processor/conversation/openai/gpt.py
@ -14,6 +14,7 @@ from khoj.processor.conversation.openai.utils import (
    completion_with_backoff,
 )
 from khoj.processor.conversation.utils import generate_chatml_messages_with_context
+from khoj.utils.helpers import ConversationCommand, is_none_or_empty


 logger = logging.getLogger(__name__)
@ -108,6 +109,7 @@ def converse(
    api_key: Optional[str] = None,
    temperature: float = 0.2,
    completion_func=None,
+    conversation_command=ConversationCommand.Notes,
 ):
    """
    Converse with user using OpenAI's ChatGPT
@ -117,8 +119,10 @@ def converse(
    compiled_references = "\n\n".join({f"# {item}" for item in references})

    # Get Conversation Primer appropriate to Conversation Type
-    if compiled_references == "":
+    if conversation_command == ConversationCommand.General:
        conversation_primer = prompts.general_conversation.format(current_date=current_date, query=user_query)
+    elif conversation_command == ConversationCommand.Notes and is_none_or_empty(compiled_references):
+        return iter([prompts.no_notes_found.format()])
    else:
        conversation_primer = prompts.notes_conversation.format(
            current_date=current_date, query=user_query, references=compiled_references
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@ -17,6 +17,11 @@ Current Date: {current_date}
 Question: {query}
 """.strip()
 )
+no_notes_found = PromptTemplate.from_template(
+    """
+    I'm sorry, I couldn't find any relevant notes to respond to your message.
+    """.strip()
+)

 system_prompt_message_llamav2 = f"""You are Khoj, a friendly, smart and helpful personal assistant.
 Using your general knowledge and our past conversations as context, answer the following question.
@ -225,3 +230,17 @@ A:{ "search-type": "notes" }
 Q:When did I go surfing last?
 A:{ "search-type": "notes" }
 Q:"""
+
+
+# System messages to user
+# --
+help_message = PromptTemplate.from_template(
+    """
+**/help**: Show this help message.
+**/notes**: Search only against the information in your knowledge base. This is the default method.
+**/general**: Search general knowledge with the LLM. This will not search against your notes.
+
+You are using the **{model}** model. To change the model, go to your <a href="/config">settings</a> page.
+**version**: {version}
+""".strip()
+)
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@ -18,7 +18,7 @@ from khoj.search_filter.date_filter import DateFilter
 from khoj.search_filter.file_filter import FileFilter
 from khoj.search_filter.word_filter import WordFilter
 from khoj.utils.config import TextSearchModel
-from khoj.utils.helpers import timer
+from khoj.utils.helpers import ConversationCommand, is_none_or_empty, timer, command_descriptions
 from khoj.utils.rawconfig import (
    ContentConfig,
    FullConfig,
@ -36,7 +36,13 @@ from khoj.utils.state import SearchType
 from khoj.utils import state, constants
 from khoj.utils.yaml import save_config_to_file_updated_state
 from fastapi.responses import StreamingResponse, Response
-from khoj.routers.helpers import perform_chat_checks, generate_chat_response, update_telemetry_state
+from khoj.routers.helpers import (
+    get_conversation_command,
+    perform_chat_checks,
+    generate_chat_response,
+    update_telemetry_state,
+)
+from khoj.processor.conversation.prompts import help_message
 from khoj.processor.conversation.openai.gpt import extract_questions
 from khoj.processor.conversation.gpt4all.chat_model import extract_questions_offline
 from fastapi.requests import Request
@ -659,6 +665,30 @@ def chat_history(
    return {"status": "ok", "response": meta_log.get("chat", [])}


+@api.get("/chat/options", response_class=Response)
+async def chat_options(
+    request: Request,
+    client: Optional[str] = None,
+    user_agent: Optional[str] = Header(None),
+    referer: Optional[str] = Header(None),
+    host: Optional[str] = Header(None),
+) -> Response:
+    cmd_options = {}
+    for cmd in ConversationCommand:
+        cmd_options[cmd.value] = command_descriptions[cmd]
+
+    update_telemetry_state(
+        request=request,
+        telemetry_type="api",
+        api="chat_options",
+        client=client,
+        user_agent=user_agent,
+        referer=referer,
+        host=host,
+    )
+    return Response(content=json.dumps(cmd_options), media_type="application/json", status_code=200)
+
+
@api.get("/chat", response_class=Response)
 async def chat(
    request: Request,
@ -671,7 +701,15 @@ async def chat(
    host: Optional[str] = Header(None),
 ) -> Response:
    perform_chat_checks()
-    compiled_references, inferred_queries = await extract_references_and_questions(request, q, (n or 5))
+    conversation_command = get_conversation_command(query=q, any_references=True)
+    compiled_references, inferred_queries = await extract_references_and_questions(
+        request, q, (n or 5), conversation_command
+    )
+    conversation_command = get_conversation_command(query=q, any_references=is_none_or_empty(compiled_references))
+    if conversation_command == ConversationCommand.Help:
+        model_type = "offline" if state.processor_config.conversation.enable_offline_chat else "openai"
+        formatted_help = help_message.format(model=model_type, version=state.khoj_version)
+        return StreamingResponse(iter([formatted_help]), media_type="text/event-stream", status_code=200)

    # Get the (streamed) chat response from the LLM of choice.
    llm_response = generate_chat_response(
@ -679,6 +717,7 @@ async def chat(
        meta_log=state.processor_config.conversation.meta_log,
        compiled_references=compiled_references,
        inferred_queries=inferred_queries,
+        conversation_command=conversation_command,
    )

    if llm_response is None:
@ -716,12 +755,12 @@ async def extract_references_and_questions(
    request: Request,
    q: str,
    n: int,
+    conversation_type: ConversationCommand = ConversationCommand.Notes,
 ):
    # Load Conversation History
    meta_log = state.processor_config.conversation.meta_log

    # Initialize Variables
-    conversation_type = "general" if q.startswith("@general") else "notes"
    compiled_references: List[Any] = []
    inferred_queries: List[str] = []

@ -731,7 +770,7 @@ async def extract_references_and_questions(
        )
        return compiled_references, inferred_queries

-    if conversation_type == "notes":
+    if conversation_type != ConversationCommand.General:
        # Infer search queries from user message
        with timer("Extracting search queries took", logger):
            # If we've reached here, either the user has enabled offline chat or the openai model is enabled.
--- a/src/khoj/routers/helpers.py
+++ b/src/khoj/routers/helpers.py
@ -1,12 +1,12 @@
 import logging
 from datetime import datetime
 from functools import partial
-from typing import List, Optional
+from typing import Iterator, List, Optional, Union

 from fastapi import HTTPException, Request

 from khoj.utils import state
-from khoj.utils.helpers import timer, log_telemetry
+from khoj.utils.helpers import ConversationCommand, timer, log_telemetry
 from khoj.processor.conversation.openai.gpt import converse
 from khoj.processor.conversation.gpt4all.chat_model import converse_offline
 from khoj.processor.conversation.utils import reciprocal_conversation_to_chatml, message_to_log, ThreadedGenerator
@ -57,12 +57,27 @@ def update_telemetry_state(
    ]


+def get_conversation_command(query: str, any_references: bool = False) -> ConversationCommand:
+    if query.startswith("/notes"):
+        return ConversationCommand.Notes
+    elif query.startswith("/general"):
+        return ConversationCommand.General
+    elif query.startswith("/help"):
+        return ConversationCommand.Help
+    # If no relevant notes found for the given query
+    elif not any_references:
+        return ConversationCommand.General
+    else:
+        return ConversationCommand.Notes
+
+
 def generate_chat_response(
    q: str,
    meta_log: dict,
    compiled_references: List[str] = [],
    inferred_queries: List[str] = [],
-) -> ThreadedGenerator:
+    conversation_command: ConversationCommand = ConversationCommand.Notes,
+) -> Union[ThreadedGenerator, Iterator[str]]:
    def _save_to_conversation_log(
        q: str,
        chat_response: str,
@ -85,12 +100,8 @@ def generate_chat_response(

    # Initialize Variables
    user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    conversation_type = "general" if q.startswith("@general") else "notes"
-
-    # Switch to general conversation type if no relevant notes found for the given query
-    conversation_type = "notes" if compiled_references else "general"
-    logger.debug(f"Conversation Type: {conversation_type}")
    chat_response = None
+    logger.debug(f"Conversation Type: {conversation_command.name}")

    try:
        partial_completion = partial(
@ -110,6 +121,7 @@ def generate_chat_response(
                loaded_model=loaded_model,
                conversation_log=meta_log,
                completion_func=partial_completion,
+                conversation_command=conversation_command,
            )

        elif state.processor_config.conversation.openai_model:
@ -122,6 +134,7 @@ def generate_chat_response(
                model=chat_model,
                api_key=api_key,
                completion_func=partial_completion,
+                conversation_command=conversation_command,
            )

    except Exception as e:
--- a/src/khoj/utils/helpers.py
+++ b/src/khoj/utils/helpers.py
@ -2,6 +2,7 @@
 from __future__ import annotations  # to avoid quoting type hints
 from collections import OrderedDict
 import datetime
+from enum import Enum
 from importlib import import_module
 from importlib.metadata import version
 import logging
@ -210,3 +211,16 @@ def log_telemetry(

    # Log telemetry data to telemetry endpoint
    return request_body
+
+
+class ConversationCommand(str, Enum):
+    General = "general"
+    Notes = "notes"
+    Help = "help"
+
+
+command_descriptions = {
+    ConversationCommand.General: "This command allows you to search talk with the LLM without including context from your knowledge base.",
+    ConversationCommand.Notes: "This command allows you to search talk with the LLM while including context from your knowledge base.",
+    ConversationCommand.Help: "This command displays a help message.",
+}
--- a/src/khoj/utils/state.py
+++ b/src/khoj/utils/state.py
@ -30,6 +30,7 @@ SearchType = utils_config.SearchType
 telemetry: List[Dict[str, str]] = []
 previous_query: str = None
 demo: bool = False
+khoj_version: str = None

 if torch.cuda.is_available():
    # Use CUDA GPU
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -233,22 +233,25 @@ def client(content_config: ContentConfig, search_config: SearchConfig, processor

@pytest.fixture(scope="function")
 def client_offline_chat(
-    content_config: ContentConfig, search_config: SearchConfig, processor_config_offline_chat: ProcessorConfig
+    md_content_config: ContentConfig, search_config: SearchConfig, processor_config_offline_chat: ProcessorConfig
 ):
-    state.config.content_type = content_config
+    # Initialize app state
+    state.config.content_type = md_content_config
    state.config.search_type = search_config
    state.SearchType = configure_search_types(state.config)

-    # These lines help us Mock the Search models for these search types
+    # Index Markdown Content for Search
+    filters = [DateFilter(), WordFilter(), FileFilter()]
    state.search_models.text_search = text_search.initialize_model(search_config.asymmetric)
-    state.search_models.image_search = image_search.initialize_model(search_config.image)
-    state.content_index.org = text_search.setup(
-        OrgToJsonl, content_config.org, state.search_models.text_search.bi_encoder, regenerate=False
-    )
-    state.content_index.image = image_search.setup(
-        content_config.image, state.search_models.image_search, regenerate=False
+    state.content_index.markdown = text_search.setup(
+        MarkdownToJsonl,
+        md_content_config.markdown,
+        state.search_models.text_search.bi_encoder,
+        regenerate=False,
+        filters=filters,
    )

+    # Initialize Processor from Config
    state.processor_config = configure_processor(processor_config_offline_chat)

    configure_routes(app)
--- a/tests/test_client.py
+++ b/tests/test_client.py
@ -209,7 +209,7 @@ def test_notes_search(client, content_config: ContentConfig, search_config: Sear
    assert response.status_code == 200
    # assert actual_data contains "Khoj via Emacs" entry
    search_result = response.json()[0]["entry"]
-    assert "git clone" in search_result
+    assert "git clone https://github.com/khoj-ai/khoj" in search_result


 # ----------------------------------------------------------------------------------------------------
@ -267,6 +267,6 @@ def test_notes_search_with_exclude_filter(client, content_config: ContentConfig,

    # Assert
    assert response.status_code == 200
-    # assert actual_data does not contains word "Emacs"
+    # assert actual_data does not contains word "clone"
    search_result = response.json()[0]["entry"]
    assert "clone" not in search_result
--- a/tests/test_gpt4all_chat_director.py
+++ b/tests/test_gpt4all_chat_director.py
@ -74,7 +74,6 @@ def test_answer_from_chat_history(client_offline_chat):


 # ----------------------------------------------------------------------------------------------------
-@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
@pytest.mark.chatquality
 def test_answer_from_currently_retrieved_content(client_offline_chat):
    # Arrange
@ -123,7 +122,10 @@ def test_answer_from_chat_history_and_previously_retrieved_content(client_offlin


 # ----------------------------------------------------------------------------------------------------
-@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
+@pytest.mark.xfail(
+    AssertionError,
+    reason="Chat director not capable of answering this question yet because it requires extract_questions",
+)
@pytest.mark.chatquality
 def test_answer_from_chat_history_and_currently_retrieved_content(client_offline_chat):
    # Arrange
--- a/tests/test_openai_chat_director.py
+++ b/tests/test_openai_chat_director.py
@ -1,9 +1,11 @@
 # Standard Packages
 import os
+import urllib.parse

 # External Packages
 import pytest
 from freezegun import freeze_time
+from khoj.processor.conversation import prompts

 # Internal Packages
 from khoj.processor.conversation.utils import message_to_log
@ -168,6 +170,57 @@ def test_no_answer_in_chat_history_or_retrieved_content(chat_client):
    )


+# ----------------------------------------------------------------------------------------------------
+@pytest.mark.chatquality
+def test_answer_using_general_command(chat_client):
+    # Arrange
+    query = urllib.parse.quote("/general Where was Xi Li born?")
+    message_list = []
+    populate_chat_history(message_list)
+
+    # Act
+    response = chat_client.get(f"/api/chat?q={query}&stream=true")
+    response_message = response.content.decode("utf-8")
+
+    # Assert
+    assert response.status_code == 200
+    assert "Fujiang" not in response_message
+
+
+# ----------------------------------------------------------------------------------------------------
+@pytest.mark.chatquality
+def test_answer_from_retrieved_content_using_notes_command(chat_client):
+    # Arrange
+    query = urllib.parse.quote("/notes Where was Xi Li born?")
+    message_list = []
+    populate_chat_history(message_list)
+
+    # Act
+    response = chat_client.get(f"/api/chat?q={query}&stream=true")
+    response_message = response.content.decode("utf-8")
+
+    # Assert
+    assert response.status_code == 200
+    assert "Fujiang" in response_message
+
+
+# ----------------------------------------------------------------------------------------------------
+@pytest.mark.chatquality
+def test_answer_not_known_using_notes_command(chat_client):
+    # Arrange
+    query = urllib.parse.quote("/notes Where was Testatron born?")
+    message_list = []
+    populate_chat_history(message_list)
+
+    # Act
+    response = chat_client.get(f"/api/chat?q={query}&stream=true")
+    response_message = response.content.decode("utf-8")
+
+    # Assert
+    assert response.status_code == 200
+    assert response_message == prompts.no_notes_found.format()
+
+
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering time aware questions yet")
@pytest.mark.chatquality