Use Slash Commands and Add Notes Slash Command (#463)

* Store conversation command options in an Enum

* Move to slash commands instead of using @ to specify general commands

* Calculate conversation command once & pass it as arg to child funcs

* Add /notes command to respond using only knowledge base as context

This prevents the chat model to try respond using it's general world
knowledge only without any references pulled from the indexed
knowledge base

* Test general and notes slash commands in openai chat director tests

* Update gpt4all tests to use md configuration

* Add a /help tooltip

* Add dynamic support for describing slash commands. Remove default and treat notes as the default type

---------

Co-authored-by: sabaimran <narmiabas@gmail.com>
This commit is contained in:
Debanjum 2023-08-26 18:11:18 -07:00 committed by GitHub
parent e64357698d
commit 7919787fb7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 237 additions and 33 deletions

View file

@ -9,6 +9,7 @@
<link rel="stylesheet" href="/static/assets/khoj.css">
</head>
<script>
let chatOptions = [];
function copyProgrammaticOutput(event) {
// Remove the first 4 characters which are the "Copy" button
const programmaticOutput = event.target.parentNode.textContent.trim().slice(4);
@ -109,6 +110,9 @@
const reader = response.body.getReader();
const decoder = new TextDecoder();
let chatTooltip = document.getElementById("chat-tooltip");
chatTooltip.style.display = "none";
function readStream() {
reader.read().then(({ done, value }) => {
if (done) {
@ -159,6 +163,35 @@
}
}
function onChatInput() {
let chatInput = document.getElementById("chat-input");
if (chatInput.value === "/") {
let chatTooltip = document.getElementById("chat-tooltip");
chatTooltip.style.display = "block";
let helpText = "<div>";
for (let key in chatOptions) {
helpText += "<b>/" + key + "</b>: " + chatOptions[key] + "<br>";
}
chatTooltip.innerHTML = helpText;
} else if (chatInput.value.startsWith("/")) {
const firstWord = chatInput.value.split(" ")[0];
if (firstWord.substring(1) in chatOptions) {
// Add a div element around the text.
let chatTooltip = document.getElementById("chat-tooltip");
chatTooltip.style.display = "block";
chatTooltip.innerHTML = "Mode: " + firstWord.substring(1);
} else {
let chatTooltip = document.getElementById("chat-tooltip");
chatTooltip.style.display = "none";
}
} else {
let chatTooltip = document.getElementById("chat-tooltip");
chatTooltip.style.display = "none";
}
autoResize();
}
function autoResize() {
const textarea = document.getElementById('chat-input');
const scrollTop = textarea.scrollTop;
@ -196,6 +229,18 @@
return;
});
fetch('/api/chat/options')
.then(response => response.json())
.then(data => {
// Render chat options, if any
if (data) {
chatOptions = data;
}
})
.catch(err => {
return;
});
// Fill query field with value passed in URL query parameters, if any.
var query_via_url = new URLSearchParams(window.location.search).get("q");
if (query_via_url) {
@ -242,7 +287,8 @@
<!-- Chat Footer -->
<div id="chat-footer">
<textarea id="chat-input" class="option" oninput="autoResize()" onkeyup=incrementalChat(event) autofocus="autofocus" placeholder="What is the meaning of life?"></textarea>
<div id="chat-tooltip" style="display: none;"></div>
<textarea id="chat-input" class="option" oninput="onChatInput()" onkeyup=incrementalChat(event) autofocus="autofocus" placeholder="What is the meaning of life?"></textarea>
</div>
</body>
@ -450,6 +496,10 @@
padding: 10px;
}
div#chat-tooltip {
text-align: left;
}
@keyframes gradient {
0% {
background-position: 0% 50%;
@ -509,7 +559,7 @@
khojBannerSubmit?.addEventListener("click", function(event) {
event.preventDefault();
var email = document.getElementById("khoj-banner-email").value;
fetch("https://lantern.khoj.dev/beta/users/", {
fetch("https://app.khoj.dev/beta/users/", {
method: "POST",
body: JSON.stringify({
email: email

View file

@ -467,7 +467,7 @@
khojBannerSubmit?.addEventListener("click", function(event) {
event.preventDefault();
var email = document.getElementById("khoj-banner-email").value;
fetch("https://lantern.khoj.dev/beta/users/", {
fetch("https://app.khoj.dev/beta/users/", {
method: "POST",
body: JSON.stringify({
email: email

View file

@ -14,6 +14,7 @@ import threading
import warnings
from platform import system
import webbrowser
from importlib.metadata import version
# Ignore non-actionable warnings
warnings.filterwarnings("ignore", message=r"snapshot_download.py has been made private", category=FutureWarning)
@ -153,6 +154,7 @@ def set_state(args):
state.host = args.host
state.port = args.port
state.demo = args.demo
state.khoj_version = version("khoj-assistant")
def start_server(app, host=None, port=None, socket=None):

View file

@ -1,4 +1,4 @@
from typing import Union, List
from typing import Iterator, Union, List
from datetime import datetime
import logging
from threading import Thread
@ -11,6 +11,7 @@ from khoj.processor.conversation.utils import ThreadedGenerator, generate_chatml
from khoj.processor.conversation import prompts
from khoj.utils.constants import empty_escape_sequences
from khoj.utils import state
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
logger = logging.getLogger(__name__)
@ -118,7 +119,8 @@ def converse_offline(
model: str = "llama-2-7b-chat.ggmlv3.q4_K_S.bin",
loaded_model: Union[GPT4All, None] = None,
completion_func=None,
) -> ThreadedGenerator:
conversation_command=ConversationCommand.Notes,
) -> Union[ThreadedGenerator, Iterator[str]]:
"""
Converse with user using Llama
"""
@ -127,8 +129,10 @@ def converse_offline(
compiled_references_message = "\n\n".join({f"{item}" for item in references})
# Get Conversation Primer appropriate to Conversation Type
if compiled_references_message == "":
if conversation_command == ConversationCommand.General:
conversation_primer = user_query
elif conversation_command == ConversationCommand.Notes and is_none_or_empty(compiled_references_message):
return iter([prompts.no_notes_found.format()])
else:
conversation_primer = prompts.notes_conversation_llamav2.format(
query=user_query, references=compiled_references_message

View file

@ -14,6 +14,7 @@ from khoj.processor.conversation.openai.utils import (
completion_with_backoff,
)
from khoj.processor.conversation.utils import generate_chatml_messages_with_context
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
logger = logging.getLogger(__name__)
@ -108,6 +109,7 @@ def converse(
api_key: Optional[str] = None,
temperature: float = 0.2,
completion_func=None,
conversation_command=ConversationCommand.Notes,
):
"""
Converse with user using OpenAI's ChatGPT
@ -117,8 +119,10 @@ def converse(
compiled_references = "\n\n".join({f"# {item}" for item in references})
# Get Conversation Primer appropriate to Conversation Type
if compiled_references == "":
if conversation_command == ConversationCommand.General:
conversation_primer = prompts.general_conversation.format(current_date=current_date, query=user_query)
elif conversation_command == ConversationCommand.Notes and is_none_or_empty(compiled_references):
return iter([prompts.no_notes_found.format()])
else:
conversation_primer = prompts.notes_conversation.format(
current_date=current_date, query=user_query, references=compiled_references

View file

@ -17,6 +17,11 @@ Current Date: {current_date}
Question: {query}
""".strip()
)
no_notes_found = PromptTemplate.from_template(
"""
I'm sorry, I couldn't find any relevant notes to respond to your message.
""".strip()
)
system_prompt_message_llamav2 = f"""You are Khoj, a friendly, smart and helpful personal assistant.
Using your general knowledge and our past conversations as context, answer the following question.
@ -225,3 +230,17 @@ A:{ "search-type": "notes" }
Q:When did I go surfing last?
A:{ "search-type": "notes" }
Q:"""
# System messages to user
# --
help_message = PromptTemplate.from_template(
"""
**/help**: Show this help message.
**/notes**: Search only against the information in your knowledge base. This is the default method.
**/general**: Search general knowledge with the LLM. This will not search against your notes.
You are using the **{model}** model. To change the model, go to your <a href="/config">settings</a> page.
**version**: {version}
""".strip()
)

View file

@ -18,7 +18,7 @@ from khoj.search_filter.date_filter import DateFilter
from khoj.search_filter.file_filter import FileFilter
from khoj.search_filter.word_filter import WordFilter
from khoj.utils.config import TextSearchModel
from khoj.utils.helpers import timer
from khoj.utils.helpers import ConversationCommand, is_none_or_empty, timer, command_descriptions
from khoj.utils.rawconfig import (
ContentConfig,
FullConfig,
@ -36,7 +36,13 @@ from khoj.utils.state import SearchType
from khoj.utils import state, constants
from khoj.utils.yaml import save_config_to_file_updated_state
from fastapi.responses import StreamingResponse, Response
from khoj.routers.helpers import perform_chat_checks, generate_chat_response, update_telemetry_state
from khoj.routers.helpers import (
get_conversation_command,
perform_chat_checks,
generate_chat_response,
update_telemetry_state,
)
from khoj.processor.conversation.prompts import help_message
from khoj.processor.conversation.openai.gpt import extract_questions
from khoj.processor.conversation.gpt4all.chat_model import extract_questions_offline
from fastapi.requests import Request
@ -659,6 +665,30 @@ def chat_history(
return {"status": "ok", "response": meta_log.get("chat", [])}
@api.get("/chat/options", response_class=Response)
async def chat_options(
request: Request,
client: Optional[str] = None,
user_agent: Optional[str] = Header(None),
referer: Optional[str] = Header(None),
host: Optional[str] = Header(None),
) -> Response:
cmd_options = {}
for cmd in ConversationCommand:
cmd_options[cmd.value] = command_descriptions[cmd]
update_telemetry_state(
request=request,
telemetry_type="api",
api="chat_options",
client=client,
user_agent=user_agent,
referer=referer,
host=host,
)
return Response(content=json.dumps(cmd_options), media_type="application/json", status_code=200)
@api.get("/chat", response_class=Response)
async def chat(
request: Request,
@ -671,7 +701,15 @@ async def chat(
host: Optional[str] = Header(None),
) -> Response:
perform_chat_checks()
compiled_references, inferred_queries = await extract_references_and_questions(request, q, (n or 5))
conversation_command = get_conversation_command(query=q, any_references=True)
compiled_references, inferred_queries = await extract_references_and_questions(
request, q, (n or 5), conversation_command
)
conversation_command = get_conversation_command(query=q, any_references=is_none_or_empty(compiled_references))
if conversation_command == ConversationCommand.Help:
model_type = "offline" if state.processor_config.conversation.enable_offline_chat else "openai"
formatted_help = help_message.format(model=model_type, version=state.khoj_version)
return StreamingResponse(iter([formatted_help]), media_type="text/event-stream", status_code=200)
# Get the (streamed) chat response from the LLM of choice.
llm_response = generate_chat_response(
@ -679,6 +717,7 @@ async def chat(
meta_log=state.processor_config.conversation.meta_log,
compiled_references=compiled_references,
inferred_queries=inferred_queries,
conversation_command=conversation_command,
)
if llm_response is None:
@ -716,12 +755,12 @@ async def extract_references_and_questions(
request: Request,
q: str,
n: int,
conversation_type: ConversationCommand = ConversationCommand.Notes,
):
# Load Conversation History
meta_log = state.processor_config.conversation.meta_log
# Initialize Variables
conversation_type = "general" if q.startswith("@general") else "notes"
compiled_references: List[Any] = []
inferred_queries: List[str] = []
@ -731,7 +770,7 @@ async def extract_references_and_questions(
)
return compiled_references, inferred_queries
if conversation_type == "notes":
if conversation_type != ConversationCommand.General:
# Infer search queries from user message
with timer("Extracting search queries took", logger):
# If we've reached here, either the user has enabled offline chat or the openai model is enabled.

View file

@ -1,12 +1,12 @@
import logging
from datetime import datetime
from functools import partial
from typing import List, Optional
from typing import Iterator, List, Optional, Union
from fastapi import HTTPException, Request
from khoj.utils import state
from khoj.utils.helpers import timer, log_telemetry
from khoj.utils.helpers import ConversationCommand, timer, log_telemetry
from khoj.processor.conversation.openai.gpt import converse
from khoj.processor.conversation.gpt4all.chat_model import converse_offline
from khoj.processor.conversation.utils import reciprocal_conversation_to_chatml, message_to_log, ThreadedGenerator
@ -57,12 +57,27 @@ def update_telemetry_state(
]
def get_conversation_command(query: str, any_references: bool = False) -> ConversationCommand:
if query.startswith("/notes"):
return ConversationCommand.Notes
elif query.startswith("/general"):
return ConversationCommand.General
elif query.startswith("/help"):
return ConversationCommand.Help
# If no relevant notes found for the given query
elif not any_references:
return ConversationCommand.General
else:
return ConversationCommand.Notes
def generate_chat_response(
q: str,
meta_log: dict,
compiled_references: List[str] = [],
inferred_queries: List[str] = [],
) -> ThreadedGenerator:
conversation_command: ConversationCommand = ConversationCommand.Notes,
) -> Union[ThreadedGenerator, Iterator[str]]:
def _save_to_conversation_log(
q: str,
chat_response: str,
@ -85,12 +100,8 @@ def generate_chat_response(
# Initialize Variables
user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
conversation_type = "general" if q.startswith("@general") else "notes"
# Switch to general conversation type if no relevant notes found for the given query
conversation_type = "notes" if compiled_references else "general"
logger.debug(f"Conversation Type: {conversation_type}")
chat_response = None
logger.debug(f"Conversation Type: {conversation_command.name}")
try:
partial_completion = partial(
@ -110,6 +121,7 @@ def generate_chat_response(
loaded_model=loaded_model,
conversation_log=meta_log,
completion_func=partial_completion,
conversation_command=conversation_command,
)
elif state.processor_config.conversation.openai_model:
@ -122,6 +134,7 @@ def generate_chat_response(
model=chat_model,
api_key=api_key,
completion_func=partial_completion,
conversation_command=conversation_command,
)
except Exception as e:

View file

@ -2,6 +2,7 @@
from __future__ import annotations # to avoid quoting type hints
from collections import OrderedDict
import datetime
from enum import Enum
from importlib import import_module
from importlib.metadata import version
import logging
@ -210,3 +211,16 @@ def log_telemetry(
# Log telemetry data to telemetry endpoint
return request_body
class ConversationCommand(str, Enum):
General = "general"
Notes = "notes"
Help = "help"
command_descriptions = {
ConversationCommand.General: "This command allows you to search talk with the LLM without including context from your knowledge base.",
ConversationCommand.Notes: "This command allows you to search talk with the LLM while including context from your knowledge base.",
ConversationCommand.Help: "This command displays a help message.",
}

View file

@ -30,6 +30,7 @@ SearchType = utils_config.SearchType
telemetry: List[Dict[str, str]] = []
previous_query: str = None
demo: bool = False
khoj_version: str = None
if torch.cuda.is_available():
# Use CUDA GPU

View file

@ -233,22 +233,25 @@ def client(content_config: ContentConfig, search_config: SearchConfig, processor
@pytest.fixture(scope="function")
def client_offline_chat(
content_config: ContentConfig, search_config: SearchConfig, processor_config_offline_chat: ProcessorConfig
md_content_config: ContentConfig, search_config: SearchConfig, processor_config_offline_chat: ProcessorConfig
):
state.config.content_type = content_config
# Initialize app state
state.config.content_type = md_content_config
state.config.search_type = search_config
state.SearchType = configure_search_types(state.config)
# These lines help us Mock the Search models for these search types
# Index Markdown Content for Search
filters = [DateFilter(), WordFilter(), FileFilter()]
state.search_models.text_search = text_search.initialize_model(search_config.asymmetric)
state.search_models.image_search = image_search.initialize_model(search_config.image)
state.content_index.org = text_search.setup(
OrgToJsonl, content_config.org, state.search_models.text_search.bi_encoder, regenerate=False
)
state.content_index.image = image_search.setup(
content_config.image, state.search_models.image_search, regenerate=False
state.content_index.markdown = text_search.setup(
MarkdownToJsonl,
md_content_config.markdown,
state.search_models.text_search.bi_encoder,
regenerate=False,
filters=filters,
)
# Initialize Processor from Config
state.processor_config = configure_processor(processor_config_offline_chat)
configure_routes(app)

View file

@ -209,7 +209,7 @@ def test_notes_search(client, content_config: ContentConfig, search_config: Sear
assert response.status_code == 200
# assert actual_data contains "Khoj via Emacs" entry
search_result = response.json()[0]["entry"]
assert "git clone" in search_result
assert "git clone https://github.com/khoj-ai/khoj" in search_result
# ----------------------------------------------------------------------------------------------------
@ -267,6 +267,6 @@ def test_notes_search_with_exclude_filter(client, content_config: ContentConfig,
# Assert
assert response.status_code == 200
# assert actual_data does not contains word "Emacs"
# assert actual_data does not contains word "clone"
search_result = response.json()[0]["entry"]
assert "clone" not in search_result

View file

@ -74,7 +74,6 @@ def test_answer_from_chat_history(client_offline_chat):
# ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
@pytest.mark.chatquality
def test_answer_from_currently_retrieved_content(client_offline_chat):
# Arrange
@ -123,7 +122,10 @@ def test_answer_from_chat_history_and_previously_retrieved_content(client_offlin
# ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
@pytest.mark.xfail(
AssertionError,
reason="Chat director not capable of answering this question yet because it requires extract_questions",
)
@pytest.mark.chatquality
def test_answer_from_chat_history_and_currently_retrieved_content(client_offline_chat):
# Arrange

View file

@ -1,9 +1,11 @@
# Standard Packages
import os
import urllib.parse
# External Packages
import pytest
from freezegun import freeze_time
from khoj.processor.conversation import prompts
# Internal Packages
from khoj.processor.conversation.utils import message_to_log
@ -168,6 +170,57 @@ def test_no_answer_in_chat_history_or_retrieved_content(chat_client):
)
# ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality
def test_answer_using_general_command(chat_client):
# Arrange
query = urllib.parse.quote("/general Where was Xi Li born?")
message_list = []
populate_chat_history(message_list)
# Act
response = chat_client.get(f"/api/chat?q={query}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
assert response.status_code == 200
assert "Fujiang" not in response_message
# ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality
def test_answer_from_retrieved_content_using_notes_command(chat_client):
# Arrange
query = urllib.parse.quote("/notes Where was Xi Li born?")
message_list = []
populate_chat_history(message_list)
# Act
response = chat_client.get(f"/api/chat?q={query}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
assert response.status_code == 200
assert "Fujiang" in response_message
# ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality
def test_answer_not_known_using_notes_command(chat_client):
# Arrange
query = urllib.parse.quote("/notes Where was Testatron born?")
message_list = []
populate_chat_history(message_list)
# Act
response = chat_client.get(f"/api/chat?q={query}&stream=true")
response_message = response.content.decode("utf-8")
# Assert
assert response.status_code == 200
assert response_message == prompts.no_notes_found.format()
# ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering time aware questions yet")
@pytest.mark.chatquality