From 8f491d72decd47b365a31776ff07496307a9fff3 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 4 Jul 2023 10:14:39 -0700 Subject: [PATCH 01/14] Initial code with chat streaming working (warning: messy code) --- src/khoj/interface/web/chat.html | 47 ++++++++++++-- src/khoj/processor/conversation/gpt.py | 12 +++- src/khoj/processor/conversation/utils.py | 55 +++++++++++++++- src/khoj/routers/api.py | 82 +++++++++++++++++------- 4 files changed, 163 insertions(+), 33 deletions(-) diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index bb5d355a..213f20ad 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -64,14 +64,47 @@ // Generate backend API URL to execute query let url = `/api/chat?q=${encodeURIComponent(query)}&client=web`; - // Call specified Khoj API + let chat_body = document.getElementById("chat-body"); + let new_response = document.createElement("div"); + new_response.classList.add("chat-message", "khoj"); + new_response.attributes["data-meta"] = "🏮 Khoj at " + formatDate(new Date()); + chat_body.appendChild(new_response); + + let new_response_text = document.createElement("div"); + new_response_text.classList.add("chat-message-text", "khoj"); + new_response.appendChild(new_response_text); + + // Call specified Khoj API which returns a streamed response of type text/plain fetch(url) - .then(response => response.json()) - .then(data => { - // Render message by Khoj to chat body - console.log(data.response); - renderMessageWithReference(data.response, "khoj", data.context); + .then(response => { + console.log(response); + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + + function readStream() { + reader.read().then(({ done, value }) => { + if (done) { + console.log("Stream complete"); + return; + } + + const chunk = decoder.decode(value, { stream: true }); + new_response_text.innerHTML += chunk; + console.log(`Received ${chunk.length} bytes of data`); + console.log(`Chunk: ${chunk}`); + readStream(); + }); + } + readStream(); }); + + + // fetch(url) + // .then(data => { + // // Render message by Khoj to chat body + // console.log(data.response); + // renderMessageWithReference(data.response, "khoj", data.context); + // }); } function incrementalChat(event) { @@ -82,7 +115,7 @@ } window.onload = function () { - fetch('/api/chat?client=web') + fetch('/api/chat/init?client=web') .then(response => response.json()) .then(data => { if (data.detail) { diff --git a/src/khoj/processor/conversation/gpt.py b/src/khoj/processor/conversation/gpt.py index 2f29b2ef..74f13305 100644 --- a/src/khoj/processor/conversation/gpt.py +++ b/src/khoj/processor/conversation/gpt.py @@ -170,12 +170,18 @@ def converse(references, user_query, conversation_log={}, model="gpt-3.5-turbo", # Get Response from GPT logger.debug(f"Conversation Context for GPT: {messages}") - response = chat_completion_with_backoff( + return chat_completion_with_backoff( messages=messages, model_name=model, temperature=temperature, openai_api_key=api_key, ) - # Extract, Clean Message from GPT's Response - return response.strip(empty_escape_sequences) + # async for tokens in chat_completion_with_backoff( + # messages=messages, + # model_name=model, + # temperature=temperature, + # openai_api_key=api_key, + # ): + # logger.info(f"Tokens from GPT: {tokens}") + # yield tokens diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 03b6d9b1..0730f3f2 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -2,11 +2,19 @@ import os import logging from datetime import datetime +from typing import Any, Optional +from uuid import UUID +import asyncio +from threading import Thread # External Packages from langchain.chat_models import ChatOpenAI from langchain.llms import OpenAI from langchain.schema import ChatMessage +from langchain.callbacks.base import BaseCallbackHandler +from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler +from langchain.callbacks import AsyncIteratorCallbackHandler +from langchain.callbacks.base import BaseCallbackManager, AsyncCallbackHandler import openai import tiktoken from tenacity import ( @@ -20,12 +28,43 @@ from tenacity import ( # Internal Packages from khoj.utils.helpers import merge_dicts +import queue logger = logging.getLogger(__name__) max_prompt_size = {"gpt-3.5-turbo": 4096, "gpt-4": 8192} +class ThreadedGenerator: + def __init__(self): + self.queue = queue.Queue() + + def __iter__(self): + return self + + def __next__(self): + item = self.queue.get() + if item is StopIteration: + raise item + return item + + def send(self, data): + self.queue.put(data) + + def close(self): + self.queue.put(StopIteration) + + +class StreamingChatCallbackHandler(StreamingStdOutCallbackHandler): + def __init__(self, gen: ThreadedGenerator): + super().__init__() + self.gen = gen + + def on_llm_new_token(self, token: str, **kwargs) -> Any: + logger.debug(f"New Token: {token}") + self.gen.send(token) + + @retry( retry=( retry_if_exception_type(openai.error.Timeout) @@ -63,14 +102,28 @@ def completion_with_backoff(**kwargs): reraise=True, ) def chat_completion_with_backoff(messages, model_name, temperature, openai_api_key=None): + g = ThreadedGenerator() + t = Thread(target=llm_thread, args=(g, messages, model_name, temperature, openai_api_key)) + t.start() + return g + + +def llm_thread(g, messages, model_name, temperature, openai_api_key=None): + callback_handler = StreamingChatCallbackHandler(g) chat = ChatOpenAI( + streaming=True, + verbose=True, + callback_manager=BaseCallbackManager([callback_handler]), model_name=model_name, temperature=temperature, openai_api_key=openai_api_key or os.getenv("OPENAI_API_KEY"), request_timeout=20, max_retries=1, ) - return chat(messages).content + + chat(messages=messages) + + g.close() def generate_chatml_messages_with_context( diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index a32f420b..2d77d2ef 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -34,6 +34,7 @@ from khoj.utils.rawconfig import ( from khoj.utils.state import SearchType from khoj.utils import state, constants from khoj.utils.yaml import save_config_to_file_updated_state +from fastapi.responses import StreamingResponse # Initialize Router api = APIRouter() @@ -393,8 +394,8 @@ def update( return {"status": "ok", "message": "khoj reloaded"} -@api.get("/chat") -async def chat( +@api.get("/chat/init") +def chat_init( request: Request, q: Optional[str] = None, client: Optional[str] = None, @@ -411,13 +412,52 @@ async def chat( status_code=500, detail="Set your OpenAI API key via Khoj settings and restart it to use Khoj Chat." ) + # Load Conversation History + meta_log = state.processor_config.conversation.meta_log + + user_state = { + "client_host": request.client.host, + "user_agent": user_agent or "unknown", + "referer": referer or "unknown", + "host": host or "unknown", + } + + state.telemetry += [ + log_telemetry( + telemetry_type="api", api="chat", client=client, app_config=state.config.app, properties=user_state + ) + ] + + # If user query is empty, return chat history + if not q: + return {"status": "ok", "response": meta_log.get("chat", [])} + + +@api.get("/chat", response_class=StreamingResponse) +async def chat( + request: Request, + q: Optional[str] = None, + client: Optional[str] = None, + user_agent: Optional[str] = Header(None), + referer: Optional[str] = Header(None), + host: Optional[str] = Header(None), +) -> StreamingResponse: + if ( + state.processor_config is None + or state.processor_config.conversation is None + or state.processor_config.conversation.openai_api_key is None + ): + raise HTTPException( + status_code=500, detail="Set your OpenAI API key via Khoj settings and restart it to use Khoj Chat." + ) + # Load Conversation History chat_session = state.processor_config.conversation.chat_session meta_log = state.processor_config.conversation.meta_log # If user query is empty, return chat history if not q: - return {"status": "ok", "response": meta_log.get("chat", [])} + return StreamingResponse(None) # Initialize Variables api_key = state.processor_config.conversation.openai_api_key @@ -446,24 +486,6 @@ async def chat( conversation_type = "notes" if compiled_references else "general" logger.debug(f"Conversation Type: {conversation_type}") - try: - with timer("Generating chat response took", logger): - gpt_response = converse(compiled_references, q, meta_log, model=chat_model, api_key=api_key) - status = "ok" - except Exception as e: - gpt_response = str(e) - status = "error" - - # Update Conversation History - state.processor_config.conversation.chat_session = message_to_prompt(q, chat_session, gpt_message=gpt_response) - state.processor_config.conversation.meta_log["chat"] = message_to_log( - q, - gpt_response, - user_message_metadata={"created": user_message_time}, - khoj_message_metadata={"context": compiled_references, "intent": {"inferred-queries": inferred_queries}}, - conversation_log=meta_log.get("chat", []), - ) - user_state = { "client_host": request.client.host, "user_agent": user_agent or "unknown", @@ -477,4 +499,20 @@ async def chat( ) ] - return {"status": status, "response": gpt_response, "context": compiled_references} + try: + with timer("Generating chat response took", logger): + gpt_response = converse(compiled_references, q, meta_log, model=chat_model, api_key=api_key) + except Exception as e: + gpt_response = str(e) + + # Update Conversation History + # state.processor_config.conversation.chat_session = message_to_prompt(q, chat_session, gpt_message=gpt_response) + # state.processor_config.conversation.meta_log["chat"] = message_to_log( + # q, + # gpt_response, + # user_message_metadata={"created": user_message_time}, + # khoj_message_metadata={"context": compiled_references, "intent": {"inferred-queries": inferred_queries}}, + # conversation_log=meta_log.get("chat", []), + # ) + + return StreamingResponse(gpt_response, media_type="text/event-stream", status_code=200) From afd162de01b05e021a872a7b0357dd5dee79e668 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 4 Jul 2023 12:47:50 -0700 Subject: [PATCH 02/14] Add reference notes to result response from GPT when streaming is completed - NOTE: results are still not being saved to conversation history --- src/khoj/interface/web/chat.html | 19 +++++++++++++++---- src/khoj/processor/conversation/gpt.py | 1 + src/khoj/processor/conversation/utils.py | 12 ++++++++---- src/khoj/routers/api.py | 12 ++++++++++-- 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index 213f20ad..7d93c836 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -89,10 +89,21 @@ } const chunk = decoder.decode(value, { stream: true }); - new_response_text.innerHTML += chunk; - console.log(`Received ${chunk.length} bytes of data`); - console.log(`Chunk: ${chunk}`); - readStream(); + + if (chunk.startsWith("### compiled references:")) { + const rawReference = chunk.split("### compiled references:")[1]; + const rawReferenceAsJson = JSON.parse(rawReference); + let polishedReference = rawReferenceAsJson.map((reference, index) => generateReference(reference, index)) + .join(","); + + new_response_text.innerHTML += polishedReference; + } else { + new_response_text.innerHTML += chunk; + console.log(`Received ${chunk.length} bytes of data`); + console.log(`Chunk: ${chunk}`); + document.getElementById("chat-body").scrollTop = document.getElementById("chat-body").scrollHeight; + readStream(); + } }); } readStream(); diff --git a/src/khoj/processor/conversation/gpt.py b/src/khoj/processor/conversation/gpt.py index 74f13305..1c993c37 100644 --- a/src/khoj/processor/conversation/gpt.py +++ b/src/khoj/processor/conversation/gpt.py @@ -172,6 +172,7 @@ def converse(references, user_query, conversation_log={}, model="gpt-3.5-turbo", logger.debug(f"Conversation Context for GPT: {messages}") return chat_completion_with_backoff( messages=messages, + compiled_references=references, model_name=model, temperature=temperature, openai_api_key=api_key, diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 0730f3f2..f2a87b3c 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -6,6 +6,7 @@ from typing import Any, Optional from uuid import UUID import asyncio from threading import Thread +import json # External Packages from langchain.chat_models import ChatOpenAI @@ -36,8 +37,9 @@ max_prompt_size = {"gpt-3.5-turbo": 4096, "gpt-4": 8192} class ThreadedGenerator: - def __init__(self): + def __init__(self, compiled_references): self.queue = queue.Queue() + self.compiled_references = compiled_references def __iter__(self): return self @@ -45,13 +47,15 @@ class ThreadedGenerator: def __next__(self): item = self.queue.get() if item is StopIteration: - raise item + raise StopIteration return item def send(self, data): self.queue.put(data) def close(self): + if self.compiled_references and len(self.compiled_references) > 0: + self.queue.put(f"### compiled references:{json.dumps(self.compiled_references)}") self.queue.put(StopIteration) @@ -101,8 +105,8 @@ def completion_with_backoff(**kwargs): before_sleep=before_sleep_log(logger, logging.DEBUG), reraise=True, ) -def chat_completion_with_backoff(messages, model_name, temperature, openai_api_key=None): - g = ThreadedGenerator() +def chat_completion_with_backoff(messages, compiled_references, model_name, temperature, openai_api_key=None): + g = ThreadedGenerator(compiled_references) t = Thread(target=llm_thread, args=(g, messages, model_name, temperature, openai_api_key)) t.start() return g diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 2d77d2ef..3acbd62a 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -478,7 +478,7 @@ async def chat( result_list = [] for query in inferred_queries: result_list.extend( - await search(query, request=request, n=5, r=True, score_threshold=-5.0, dedupe=False) + await search(query, request=request, n=5, r=False, score_threshold=-5.0, dedupe=False) ) compiled_references = [item.additional["compiled"] for item in result_list] @@ -501,7 +501,15 @@ async def chat( try: with timer("Generating chat response took", logger): - gpt_response = converse(compiled_references, q, meta_log, model=chat_model, api_key=api_key) + gpt_response = converse( + compiled_references, + q, + meta_log, + model=chat_model, + api_key=api_key, + chat_session=chat_session, + inferred_queries=inferred_queries, + ) except Exception as e: gpt_response = str(e) From 79b1b1d35018ad92d574fb4d6608c18838555620 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 4 Jul 2023 17:33:52 -0700 Subject: [PATCH 03/14] Save streamed chat conversations via partial function passed to the ThreadGenerator --- src/khoj/processor/conversation/gpt.py | 11 +++++- src/khoj/processor/conversation/utils.py | 14 +++++--- src/khoj/routers/api.py | 46 +++++++++++++++--------- 3 files changed, 50 insertions(+), 21 deletions(-) diff --git a/src/khoj/processor/conversation/gpt.py b/src/khoj/processor/conversation/gpt.py index 1c993c37..14dcf87b 100644 --- a/src/khoj/processor/conversation/gpt.py +++ b/src/khoj/processor/conversation/gpt.py @@ -144,7 +144,15 @@ def extract_search_type(text, model, api_key=None, temperature=0.5, max_tokens=1 return json.loads(response.strip(empty_escape_sequences)) -def converse(references, user_query, conversation_log={}, model="gpt-3.5-turbo", api_key=None, temperature=0.2): +def converse( + references, + user_query, + conversation_log={}, + model="gpt-3.5-turbo", + api_key=None, + temperature=0.2, + completion_func=None, +): """ Converse with user using OpenAI's ChatGPT """ @@ -176,6 +184,7 @@ def converse(references, user_query, conversation_log={}, model="gpt-3.5-turbo", model_name=model, temperature=temperature, openai_api_key=api_key, + completion_func=completion_func, ) # async for tokens in chat_completion_with_backoff( diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index f2a87b3c..4305999f 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -37,9 +37,11 @@ max_prompt_size = {"gpt-3.5-turbo": 4096, "gpt-4": 8192} class ThreadedGenerator: - def __init__(self, compiled_references): + def __init__(self, compiled_references, completion_func=None): self.queue = queue.Queue() self.compiled_references = compiled_references + self.completion_func = completion_func + self.response = "" def __iter__(self): return self @@ -47,10 +49,13 @@ class ThreadedGenerator: def __next__(self): item = self.queue.get() if item is StopIteration: + if self.completion_func: + self.completion_func(gpt_response=self.response) raise StopIteration return item def send(self, data): + self.response += data self.queue.put(data) def close(self): @@ -65,7 +70,6 @@ class StreamingChatCallbackHandler(StreamingStdOutCallbackHandler): self.gen = gen def on_llm_new_token(self, token: str, **kwargs) -> Any: - logger.debug(f"New Token: {token}") self.gen.send(token) @@ -105,8 +109,10 @@ def completion_with_backoff(**kwargs): before_sleep=before_sleep_log(logger, logging.DEBUG), reraise=True, ) -def chat_completion_with_backoff(messages, compiled_references, model_name, temperature, openai_api_key=None): - g = ThreadedGenerator(compiled_references) +def chat_completion_with_backoff( + messages, compiled_references, model_name, temperature, openai_api_key=None, completion_func=None +): + g = ThreadedGenerator(compiled_references, completion_func=completion_func) t = Thread(target=llm_thread, args=(g, messages, model_name, temperature, openai_api_key)) t.start() return g diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 3acbd62a..d1fbbdb5 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -6,6 +6,7 @@ import yaml import logging from datetime import datetime from typing import List, Optional, Union +from functools import partial # External Packages from fastapi import APIRouter, HTTPException, Header, Request @@ -442,6 +443,24 @@ async def chat( referer: Optional[str] = Header(None), host: Optional[str] = Header(None), ) -> StreamingResponse: + def _save_to_conversation_log( + q: str, + gpt_response: str, + user_message_time: str, + compiled_references: List[str], + inferred_queries: List[str], + chat_session: str, + meta_log, + ): + state.processor_config.conversation.chat_session = message_to_prompt(q, chat_session, gpt_message=gpt_response) + state.processor_config.conversation.meta_log["chat"] = message_to_log( + q, + gpt_response, + user_message_metadata={"created": user_message_time}, + khoj_message_metadata={"context": compiled_references, "intent": {"inferred-queries": inferred_queries}}, + conversation_log=meta_log.get("chat", []), + ) + if ( state.processor_config is None or state.processor_config.conversation is None @@ -501,26 +520,21 @@ async def chat( try: with timer("Generating chat response took", logger): - gpt_response = converse( - compiled_references, + partial_completion = partial( + _save_to_conversation_log, q, - meta_log, - model=chat_model, - api_key=api_key, - chat_session=chat_session, + user_message_time=user_message_time, + compiled_references=compiled_references, inferred_queries=inferred_queries, + chat_session=chat_session, + meta_log=meta_log, ) + + gpt_response = converse( + compiled_references, q, meta_log, model=chat_model, api_key=api_key, completion_func=partial_completion + ) + except Exception as e: gpt_response = str(e) - # Update Conversation History - # state.processor_config.conversation.chat_session = message_to_prompt(q, chat_session, gpt_message=gpt_response) - # state.processor_config.conversation.meta_log["chat"] = message_to_log( - # q, - # gpt_response, - # user_message_metadata={"created": user_message_time}, - # khoj_message_metadata={"context": compiled_references, "intent": {"inferred-queries": inferred_queries}}, - # conversation_log=meta_log.get("chat", []), - # ) - return StreamingResponse(gpt_response, media_type="text/event-stream", status_code=200) From 67a8795b1fb2211582d832dc1eed55cdf60d43f4 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 5 Jul 2023 11:24:40 -0700 Subject: [PATCH 04/14] Clean-up commented out code --- src/khoj/interface/web/chat.html | 61 ++++++++++-------------- src/khoj/processor/conversation/utils.py | 9 ++-- src/khoj/routers/api.py | 7 +-- 3 files changed, 31 insertions(+), 46 deletions(-) diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index 7d93c836..d5d55956 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -84,7 +84,6 @@ function readStream() { reader.read().then(({ done, value }) => { if (done) { - console.log("Stream complete"); return; } @@ -99,8 +98,6 @@ new_response_text.innerHTML += polishedReference; } else { new_response_text.innerHTML += chunk; - console.log(`Received ${chunk.length} bytes of data`); - console.log(`Chunk: ${chunk}`); document.getElementById("chat-body").scrollTop = document.getElementById("chat-body").scrollHeight; readStream(); } @@ -108,14 +105,6 @@ } readStream(); }); - - - // fetch(url) - // .then(data => { - // // Render message by Khoj to chat body - // console.log(data.response); - // renderMessageWithReference(data.response, "khoj", data.context); - // }); } function incrementalChat(event) { @@ -428,31 +417,33 @@ diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 4305999f..6f6f1b70 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -2,9 +2,7 @@ import os import logging from datetime import datetime -from typing import Any, Optional -from uuid import UUID -import asyncio +from typing import Any from threading import Thread import json @@ -12,10 +10,8 @@ import json from langchain.chat_models import ChatOpenAI from langchain.llms import OpenAI from langchain.schema import ChatMessage -from langchain.callbacks.base import BaseCallbackHandler from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler -from langchain.callbacks import AsyncIteratorCallbackHandler -from langchain.callbacks.base import BaseCallbackManager, AsyncCallbackHandler +from langchain.callbacks.base import BaseCallbackManager import openai import tiktoken from tenacity import ( @@ -50,6 +46,7 @@ class ThreadedGenerator: item = self.queue.get() if item is StopIteration: if self.completion_func: + # The completion func effective acts as a callback. It adds the aggregated response to the conversation history. It's constructed in api.py. self.completion_func(gpt_response=self.response) raise StopIteration return item diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index d1fbbdb5..87f428da 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -398,7 +398,6 @@ def update( @api.get("/chat/init") def chat_init( request: Request, - q: Optional[str] = None, client: Optional[str] = None, user_agent: Optional[str] = Header(None), referer: Optional[str] = Header(None), @@ -429,9 +428,7 @@ def chat_init( ) ] - # If user query is empty, return chat history - if not q: - return {"status": "ok", "response": meta_log.get("chat", [])} + return {"status": "ok", "response": meta_log.get("chat", [])} @api.get("/chat", response_class=StreamingResponse) @@ -474,7 +471,7 @@ async def chat( chat_session = state.processor_config.conversation.chat_session meta_log = state.processor_config.conversation.meta_log - # If user query is empty, return chat history + # If user query is empty, return nothing if not q: return StreamingResponse(None) From 68e635cc328aebc8a3364e66e3530663090dfc4d Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 5 Jul 2023 11:33:56 -0700 Subject: [PATCH 05/14] Remove additional comments and debug statements --- src/khoj/processor/conversation/gpt.py | 10 ---------- src/khoj/processor/conversation/utils.py | 3 ++- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/src/khoj/processor/conversation/gpt.py b/src/khoj/processor/conversation/gpt.py index 14dcf87b..e1b54303 100644 --- a/src/khoj/processor/conversation/gpt.py +++ b/src/khoj/processor/conversation/gpt.py @@ -177,7 +177,6 @@ def converse( ) # Get Response from GPT - logger.debug(f"Conversation Context for GPT: {messages}") return chat_completion_with_backoff( messages=messages, compiled_references=references, @@ -186,12 +185,3 @@ def converse( openai_api_key=api_key, completion_func=completion_func, ) - - # async for tokens in chat_completion_with_backoff( - # messages=messages, - # model_name=model, - # temperature=temperature, - # openai_api_key=api_key, - # ): - # logger.info(f"Tokens from GPT: {tokens}") - # yield tokens diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 6f6f1b70..cab4ef19 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -46,7 +46,8 @@ class ThreadedGenerator: item = self.queue.get() if item is StopIteration: if self.completion_func: - # The completion func effective acts as a callback. It adds the aggregated response to the conversation history. It's constructed in api.py. + # The completion func effective acts as a callback. + # It adds the aggregated response to the conversation history. It's constructed in api.py. self.completion_func(gpt_response=self.response) raise StopIteration return item From 3ff5074cf5970d1285b4a49d1b7221593f0e8a99 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 5 Jul 2023 14:59:44 -0700 Subject: [PATCH 06/14] Log the end-to-end time of generating a streamed response from OpenAI --- src/khoj/processor/conversation/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index cab4ef19..4c3e7619 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -2,6 +2,7 @@ import os import logging from datetime import datetime +from time import perf_counter from typing import Any from threading import Thread import json @@ -38,6 +39,7 @@ class ThreadedGenerator: self.compiled_references = compiled_references self.completion_func = completion_func self.response = "" + self.start_time = perf_counter() def __iter__(self): return self @@ -45,6 +47,8 @@ class ThreadedGenerator: def __next__(self): item = self.queue.get() if item is StopIteration: + time_to_response = perf_counter() - self.start_time + logger.info(f"Time to stream full response: {time_to_response:.3f}") if self.completion_func: # The completion func effective acts as a callback. # It adds the aggregated response to the conversation history. It's constructed in api.py. From 4e6b66b139a097c358c462c7b71740f4c78c32cd Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 5 Jul 2023 15:01:22 -0700 Subject: [PATCH 07/14] Add support for streaming chat response from OpenAI to Obsidian - I needed to installed node-fetch to accomplish this, as the built-in request object from Obsidian doesn't seem to support streaming and the built-in fetch object is very sensitive to any and all cross origin requests --- src/interface/obsidian/package.json | 11 +++- src/interface/obsidian/src/chat_modal.ts | 79 +++++++++++++++++++---- src/interface/obsidian/yarn.lock | 82 ++++++++++++++++++++++++ src/khoj/interface/web/chat.html | 1 - 4 files changed, 159 insertions(+), 14 deletions(-) diff --git a/src/interface/obsidian/package.json b/src/interface/obsidian/package.json index a9d47535..9604a786 100644 --- a/src/interface/obsidian/package.json +++ b/src/interface/obsidian/package.json @@ -8,7 +8,12 @@ "build": "tsc -noEmit -skipLibCheck && node esbuild.config.mjs production", "version": "node version-bump.mjs && git add manifest.json versions.json" }, - "keywords": ["search", "chat", "AI", "assistant"], + "keywords": [ + "search", + "chat", + "AI", + "assistant" + ], "author": "Debanjum Singh Solanky", "license": "GPL-3.0-or-later", "devDependencies": { @@ -20,5 +25,9 @@ "obsidian": "latest", "tslib": "2.4.0", "typescript": "4.7.4" + }, + "dependencies": { + "@types/node-fetch": "^2.6.4", + "node-fetch": "3.0.0" } } diff --git a/src/interface/obsidian/src/chat_modal.ts b/src/interface/obsidian/src/chat_modal.ts index acca8813..0b5624f8 100644 --- a/src/interface/obsidian/src/chat_modal.ts +++ b/src/interface/obsidian/src/chat_modal.ts @@ -1,6 +1,6 @@ import { App, Modal, request, Setting } from 'obsidian'; import { KhojSetting } from 'src/settings'; - +import fetch from "node-fetch"; export class KhojChatModal extends Modal { result: string; @@ -34,13 +34,8 @@ export class KhojChatModal extends Modal { // Create area for chat logs contentEl.createDiv({ attr: { id: "khoj-chat-body", class: "khoj-chat-body" } }); - // Get conversation history from Khoj backend - let chatUrl = `${this.setting.khojUrl}/api/chat?client=obsidian`; - let response = await request(chatUrl); - let chatLogs = JSON.parse(response).response; - chatLogs.forEach((chatLog: any) => { - this.renderMessageWithReferences(chatLog.message, chatLog.by, chatLog.context, new Date(chatLog.created)); - }); + // Get chat history from Khoj backend + await this.getChatHistory(); // Add chat input field contentEl.createEl("input", @@ -104,6 +99,35 @@ export class KhojChatModal extends Modal { return chat_message_el } + createKhojResponseDiv(dt?: Date): HTMLDivElement { + let message_time = this.formatDate(dt ?? new Date()); + + // Append message to conversation history HTML element. + // The chat logs should display above the message input box to follow standard UI semantics + let chat_body_el = this.contentEl.getElementsByClassName("khoj-chat-body")[0]; + let chat_message_el = chat_body_el.createDiv({ + attr: { + "data-meta": `🏮 Khoj at ${message_time}`, + class: `khoj-chat-message khoj` + }, + }).createDiv({ + attr: { + class: `khoj-chat-message-text khoj` + }, + }) + + // Scroll to bottom after inserting chat messages + this.modalEl.scrollTop = this.modalEl.scrollHeight; + + return chat_message_el + } + + renderIncrementalMessage(htmlElement: HTMLDivElement, additionalMessage: string) { + htmlElement.innerHTML += additionalMessage; + // Scroll to bottom of modal, till the send message input box + this.modalEl.scrollTop = this.modalEl.scrollHeight; + } + formatDate(date: Date): string { // Format date in HH:MM, DD MMM YYYY format let time_string = date.toLocaleTimeString('en-IN', { hour: '2-digit', minute: '2-digit', hour12: false }); @@ -111,6 +135,17 @@ export class KhojChatModal extends Modal { return `${time_string}, ${date_string}`; } + + async getChatHistory(): Promise { + // Get chat history from Khoj backend + let chatUrl = `${this.setting.khojUrl}/api/chat/init?client=obsidian`; + let response = await request(chatUrl); + let chatLogs = JSON.parse(response).response; + chatLogs.forEach((chatLog: any) => { + this.renderMessageWithReferences(chatLog.message, chatLog.by, chatLog.context, new Date(chatLog.created)); + }); + } + async getChatResponse(query: string | undefined | null): Promise { // Exit if query is empty if (!query || query === "") return; @@ -121,10 +156,30 @@ export class KhojChatModal extends Modal { // Get chat response from Khoj backend let encodedQuery = encodeURIComponent(query); let chatUrl = `${this.setting.khojUrl}/api/chat?q=${encodedQuery}&client=obsidian`; - let response = await request(chatUrl); - let data = JSON.parse(response); - // Render Khoj response as chat message - this.renderMessage(data.response, "khoj"); + let response = await fetch(chatUrl, { + method: "GET", + headers: { + "Access-Control-Allow-Origin": "*", + "Content-Type": "text/event-stream" + }, + }) + let responseElemeent = this.createKhojResponseDiv(); + + try { + if (response.body == null) { + throw new Error("Response body is null"); + } + + for await (const chunk of response.body) { + const responseText = chunk.toString(); + if (responseText.startsWith("### compiled references:")) { + return; + } + this.renderIncrementalMessage(responseElemeent, responseText); + } + } catch (err) { + this.renderIncrementalMessage(responseElemeent, "Sorry, unable to get response from Khoj backend ❤️‍🩹. Contact developer for help at team@khoj.dev or in Discord") + } } } diff --git a/src/interface/obsidian/yarn.lock b/src/interface/obsidian/yarn.lock index a11ea15e..c5ffbb28 100644 --- a/src/interface/obsidian/yarn.lock +++ b/src/interface/obsidian/yarn.lock @@ -40,6 +40,19 @@ resolved "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.11.tgz" integrity sha512-wOuvG1SN4Us4rez+tylwwwCV1psiNVOkJeM3AUWUNWg/jDQY2+HE/444y5gc+jBmRqASOm2Oeh5c1axHobwRKQ== +"@types/node-fetch@^2.6.4": + version "2.6.4" + resolved "https://registry.yarnpkg.com/@types/node-fetch/-/node-fetch-2.6.4.tgz#1bc3a26de814f6bf466b25aeb1473fa1afe6a660" + integrity sha512-1ZX9fcN4Rvkvgv4E6PAY5WXUFWFcRWxZa3EW83UjycOB9ljJCedb2CupIP4RZMEwF/M3eTcCihbBRgwtGbg5Rg== + dependencies: + "@types/node" "*" + form-data "^3.0.0" + +"@types/node@*": + version "20.3.3" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.3.3.tgz#329842940042d2b280897150e023e604d11657d6" + integrity sha512-wheIYdr4NYML61AjC8MKj/2jrR/kDQri/CIpVoZwldwhnIrD/j9jIU5bJ8yBKuB2VhpFV7Ab6G2XkBjv9r9Zzw== + "@types/node@^16.11.6": version "16.18.12" resolved "https://registry.npmjs.org/@types/node/-/node-16.18.12.tgz" @@ -137,6 +150,11 @@ array-union@^2.1.0: resolved "https://registry.npmjs.org/array-union/-/array-union-2.1.0.tgz" integrity sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw== +asynckit@^0.4.0: + version "0.4.0" + resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79" + integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q== + braces@^3.0.2: version "3.0.2" resolved "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz" @@ -149,6 +167,18 @@ builtin-modules@3.3.0: resolved "https://registry.npmjs.org/builtin-modules/-/builtin-modules-3.3.0.tgz" integrity sha512-zhaCDicdLuWN5UbN5IMnFqNMhNfo919sH85y2/ea+5Yg9TsTkeZxpL+JLbp6cgYFS4sRLp3YV4S6yDuqVWHYOw== +combined-stream@^1.0.8: + version "1.0.8" + resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f" + integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg== + dependencies: + delayed-stream "~1.0.0" + +data-uri-to-buffer@^3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/data-uri-to-buffer/-/data-uri-to-buffer-3.0.1.tgz#594b8973938c5bc2c33046535785341abc4f3636" + integrity sha512-WboRycPNsVw3B3TL559F7kuBUM4d8CgMEvk6xEJlOp7OBPjt6G7z8WMWlD2rOFZLk6OYfFIUGsCOWzcQH9K2og== + debug@^4.3.4: version "4.3.4" resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz" @@ -156,6 +186,11 @@ debug@^4.3.4: dependencies: ms "2.1.2" +delayed-stream@~1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619" + integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ== + dir-glob@^3.0.1: version "3.0.1" resolved "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz" @@ -349,6 +384,14 @@ fastq@^1.6.0: dependencies: reusify "^1.0.4" +fetch-blob@^3.1.2: + version "3.2.0" + resolved "https://registry.yarnpkg.com/fetch-blob/-/fetch-blob-3.2.0.tgz#f09b8d4bbd45adc6f0c20b7e787e793e309dcce9" + integrity sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ== + dependencies: + node-domexception "^1.0.0" + web-streams-polyfill "^3.0.3" + fill-range@^7.0.1: version "7.0.1" resolved "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz" @@ -356,6 +399,15 @@ fill-range@^7.0.1: dependencies: to-regex-range "^5.0.1" +form-data@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/form-data/-/form-data-3.0.1.tgz#ebd53791b78356a99af9a300d4282c4d5eb9755f" + integrity sha512-RHkBKtLWUVwd7SqRIvCZMEvAMoGUp0XU+seQiZejj0COz3RI3hWP4sCv3gZWWLjJTd7rGwcsF5eKZGii0r/hbg== + dependencies: + asynckit "^0.4.0" + combined-stream "^1.0.8" + mime-types "^2.1.12" + functional-red-black-tree@^1.0.1: version "1.0.1" resolved "https://registry.npmjs.org/functional-red-black-tree/-/functional-red-black-tree-1.0.1.tgz" @@ -422,6 +474,18 @@ micromatch@^4.0.4: braces "^3.0.2" picomatch "^2.3.1" +mime-db@1.52.0: + version "1.52.0" + resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.52.0.tgz#bbabcdc02859f4987301c856e3387ce5ec43bf70" + integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg== + +mime-types@^2.1.12: + version "2.1.35" + resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.35.tgz#381a871b62a734450660ae3deee44813f70d959a" + integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw== + dependencies: + mime-db "1.52.0" + moment@2.29.4: version "2.29.4" resolved "https://registry.npmjs.org/moment/-/moment-2.29.4.tgz" @@ -432,6 +496,19 @@ ms@2.1.2: resolved "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz" integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== +node-domexception@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/node-domexception/-/node-domexception-1.0.0.tgz#6888db46a1f71c0b76b3f7555016b63fe64766e5" + integrity sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ== + +node-fetch@3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-3.0.0.tgz#79da7146a520036f2c5f644e4a26095f17e411ea" + integrity sha512-bKMI+C7/T/SPU1lKnbQbwxptpCrG9ashG+VkytmXCPZyuM9jB6VU+hY0oi4lC8LxTtAeWdckNCTa3nrGsAdA3Q== + dependencies: + data-uri-to-buffer "^3.0.1" + fetch-blob "^3.1.2" + obsidian@latest: version "1.1.1" resolved "https://registry.npmjs.org/obsidian/-/obsidian-1.1.1.tgz" @@ -513,6 +590,11 @@ typescript@4.7.4: resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.7.4.tgz#1a88596d1cf47d59507a1bcdfb5b9dfe4d488235" integrity sha512-C0WQT0gezHuw6AdY1M2jxUO83Rjf0HP7Sk1DtXj6j1EwkQNZrHAg2XPWlq62oqEhYvONq5pkC2Y9oPljWToLmQ== +web-streams-polyfill@^3.0.3: + version "3.2.1" + resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.2.1.tgz#71c2718c52b45fd49dbeee88634b3a60ceab42a6" + integrity sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q== + yallist@^4.0.0: version "4.0.0" resolved "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz" diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index d5d55956..293852b0 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -77,7 +77,6 @@ // Call specified Khoj API which returns a streamed response of type text/plain fetch(url) .then(response => { - console.log(response); const reader = response.body.getReader(); const decoder = new TextDecoder(); From 257a421e45a086664c5fea6a8c4f488fc56b6ff1 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 5 Jul 2023 15:12:18 -0700 Subject: [PATCH 08/14] Bonus: add try-catch logic around telemetry upload in case of JSON serializability issues --- src/khoj/configure.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/khoj/configure.py b/src/khoj/configure.py index ecd8345e..ff67fc19 100644 --- a/src/khoj/configure.py +++ b/src/khoj/configure.py @@ -253,6 +253,13 @@ def upload_telemetry(): try: logger.debug(f"📡 Upload usage telemetry to {constants.telemetry_server}:\n{state.telemetry}") + for log in state.telemetry: + for field in log: + # Check if the value for the field is JSON serializable + try: + json.dumps(log[field]) + except TypeError: + log[field] = str(log[field]) requests.post(constants.telemetry_server, json=state.telemetry) except Exception as e: logger.error(f"📡 Error uploading telemetry: {e}") From c12ec1fd03d81a1f3adaaea1bc8863a207aa6bad Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 5 Jul 2023 17:19:39 -0700 Subject: [PATCH 09/14] Show temp status message in Khoj web chat while Khoj is thinking - Scroll to bottom after adding temporary status message and references too --- src/khoj/interface/web/chat.html | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index 293852b0..26818771 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -74,6 +74,10 @@ new_response_text.classList.add("chat-message-text", "khoj"); new_response.appendChild(new_response_text); + // Temporary status message to indicate that Khoj is thinking + new_response_text.innerHTML = "🤔"; + document.getElementById("chat-body").scrollTop = document.getElementById("chat-body").scrollHeight; + // Call specified Khoj API which returns a streamed response of type text/plain fetch(url) .then(response => { @@ -96,10 +100,17 @@ new_response_text.innerHTML += polishedReference; } else { + // Clear temporary status message + if (new_response_text.innerHTML === "🤔") { + new_response_text.innerHTML = ""; + } + new_response_text.innerHTML += chunk; - document.getElementById("chat-body").scrollTop = document.getElementById("chat-body").scrollHeight; readStream(); } + + // Scroll to bottom of chat window as chat response is streamed + document.getElementById("chat-body").scrollTop = document.getElementById("chat-body").scrollHeight; }); } readStream(); From 8271abe72964f1ea5a666b15ed9bf4fd0ad36a0b Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 5 Jul 2023 17:24:42 -0700 Subject: [PATCH 10/14] Use optional chaining operator to extract khojBannerSubmit from conditional --- src/khoj/interface/web/chat.html | 50 +++++++++++++++----------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index 26818771..8ec43140 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -427,33 +427,31 @@ From 0ba838b53a9f0e66ccb9d127aa9cf85c926f1959 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 5 Jul 2023 18:01:49 -0700 Subject: [PATCH 11/14] Show temp status message in Khoj Obsidian chat while Khoj is thinking - Scroll to bottom after adding temporary status message and references too --- src/interface/obsidian/src/chat_modal.ts | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/interface/obsidian/src/chat_modal.ts b/src/interface/obsidian/src/chat_modal.ts index 0b5624f8..cbc18e31 100644 --- a/src/interface/obsidian/src/chat_modal.ts +++ b/src/interface/obsidian/src/chat_modal.ts @@ -156,6 +156,10 @@ export class KhojChatModal extends Modal { // Get chat response from Khoj backend let encodedQuery = encodeURIComponent(query); let chatUrl = `${this.setting.khojUrl}/api/chat?q=${encodedQuery}&client=obsidian`; + let responseElement = this.createKhojResponseDiv(); + + // Temporary status message to indicate that Khoj is thinking + this.renderIncrementalMessage(responseElement, "🤔"); let response = await fetch(chatUrl, { method: "GET", @@ -164,22 +168,25 @@ export class KhojChatModal extends Modal { "Content-Type": "text/event-stream" }, }) - let responseElemeent = this.createKhojResponseDiv(); try { if (response.body == null) { throw new Error("Response body is null"); } + // Clear thinking status message + if (responseElement.innerHTML === "🤔") { + responseElement.innerHTML = ""; + } for await (const chunk of response.body) { const responseText = chunk.toString(); if (responseText.startsWith("### compiled references:")) { return; } - this.renderIncrementalMessage(responseElemeent, responseText); + this.renderIncrementalMessage(responseElement, responseText); } } catch (err) { - this.renderIncrementalMessage(responseElemeent, "Sorry, unable to get response from Khoj backend ❤️‍🩹. Contact developer for help at team@khoj.dev or in Discord") + this.renderIncrementalMessage(responseElement, "Sorry, unable to get response from Khoj backend ❤️‍🩹. Contact developer for help at team@khoj.dev or in Discord") } } } From 46269ddfd38b24c63cad72bced48b7d87ab4abdd Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 5 Jul 2023 18:27:06 -0700 Subject: [PATCH 12/14] Fix chat logging messages to get context without flooding logs --- src/khoj/processor/conversation/gpt.py | 2 ++ src/khoj/processor/conversation/utils.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/khoj/processor/conversation/gpt.py b/src/khoj/processor/conversation/gpt.py index e1b54303..2ee93053 100644 --- a/src/khoj/processor/conversation/gpt.py +++ b/src/khoj/processor/conversation/gpt.py @@ -175,6 +175,8 @@ def converse( conversation_log, model, ) + truncated_messages = "\n".join({f"{message.content[:40]}..." for message in messages}) + logger.debug(f"Conversation Context for GPT: {truncated_messages}") # Get Response from GPT return chat_completion_with_backoff( diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 4c3e7619..99084bf0 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -48,7 +48,7 @@ class ThreadedGenerator: item = self.queue.get() if item is StopIteration: time_to_response = perf_counter() - self.start_time - logger.info(f"Time to stream full response: {time_to_response:.3f}") + logger.info(f"Chat streaming took: {time_to_response:.3f} seconds") if self.completion_func: # The completion func effective acts as a callback. # It adds the aggregated response to the conversation history. It's constructed in api.py. From e562114f6b4d0ec71c6d6940ffe60b6bd76ed0e3 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 5 Jul 2023 18:53:49 -0700 Subject: [PATCH 13/14] Improve comments, var names in js for chat streaming on web interface --- src/khoj/interface/web/chat.html | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index 8ec43140..1b9f5cb1 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -90,21 +90,22 @@ return; } + // Decode message chunk from stream const chunk = decoder.decode(value, { stream: true }); - if (chunk.startsWith("### compiled references:")) { - const rawReference = chunk.split("### compiled references:")[1]; - const rawReferenceAsJson = JSON.parse(rawReference); - let polishedReference = rawReferenceAsJson.map((reference, index) => generateReference(reference, index)) + // Display references used to generate response + const rawReferences = chunk.split("### compiled references:")[1]; + const rawReferencesAsJson = JSON.parse(rawReferences); + let polishedReferences = rawReferencesAsJson + .map((reference, index) => generateReference(reference, index)) .join(","); - - new_response_text.innerHTML += polishedReference; + new_response_text.innerHTML += polishedReferences; } else { - // Clear temporary status message + // Display response from Khoj if (new_response_text.innerHTML === "🤔") { + // Clear temporary status message new_response_text.innerHTML = ""; } - new_response_text.innerHTML += chunk; readStream(); } From e111eda6aef487e34573d02bfa5b9321ce721676 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 5 Jul 2023 18:55:14 -0700 Subject: [PATCH 14/14] Make client, app_config optional in telemetry logger for correct typing --- src/khoj/utils/helpers.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index 13895fb8..7a6cf378 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -175,7 +175,11 @@ def get_server_id(): def log_telemetry( - telemetry_type: str, api: str = None, client: str = None, app_config: AppConfig = None, properties: dict = None + telemetry_type: str, + api: str = None, + client: Optional[str] = None, + app_config: Optional[AppConfig] = None, + properties: dict = None, ): """Log basic app usage telemetry like client, os, api called""" # Do not log usage telemetry, if telemetry is disabled via app config