From 2bb2ff27a4983e40d41f3981827645927b256723 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Mon, 11 Nov 2024 11:21:26 -0800 Subject: [PATCH] Rename attached_files to query_files. Update relevant backend and client-side code. --- src/interface/web/app/chat/page.tsx | 26 ++++--- .../components/chatHistory/chatHistory.tsx | 2 +- .../components/chatMessage/chatMessage.tsx | 25 +++--- .../processor/content/pdf/pdf_to_entries.py | 10 ++- .../conversation/anthropic/anthropic_chat.py | 8 +- .../conversation/google/gemini_chat.py | 8 +- .../conversation/offline/chat_model.py | 8 +- src/khoj/processor/conversation/openai/gpt.py | 8 +- src/khoj/processor/conversation/utils.py | 44 +++++------ src/khoj/processor/image/generate.py | 4 +- src/khoj/processor/tools/online_search.py | 8 +- src/khoj/processor/tools/run_code.py | 8 +- src/khoj/routers/api.py | 10 +-- src/khoj/routers/api_chat.py | 44 +++++------ src/khoj/routers/helpers.py | 76 +++++++++---------- src/khoj/routers/research.py | 16 ++-- 16 files changed, 159 insertions(+), 146 deletions(-) diff --git a/src/interface/web/app/chat/page.tsx b/src/interface/web/app/chat/page.tsx index e069c2e3..966cdd17 100644 --- a/src/interface/web/app/chat/page.tsx +++ b/src/interface/web/app/chat/page.tsx @@ -83,19 +83,21 @@ function ChatBodyData(props: ChatBodyDataProps) { } const storedUploadedFiles = localStorage.getItem("uploadedFiles"); - const parsedFiles = storedUploadedFiles ? JSON.parse(storedUploadedFiles) : []; - const uploadedFiles: AttachedFileText[] = []; - for (const file of parsedFiles) { - uploadedFiles.push({ - name: file.name, - file_type: file.file_type, - content: file.content, - size: file.size, - }); + if (storedUploadedFiles) { + const parsedFiles = storedUploadedFiles ? JSON.parse(storedUploadedFiles) : []; + const uploadedFiles: AttachedFileText[] = []; + for (const file of parsedFiles) { + uploadedFiles.push({ + name: file.name, + file_type: file.file_type, + content: file.content, + size: file.size, + }); + } + localStorage.removeItem("uploadedFiles"); + props.setUploadedFiles(uploadedFiles); } - localStorage.removeItem("uploadedFiles"); - props.setUploadedFiles(uploadedFiles); }, [setQueryToProcess, props.setImages, conversationId]); useEffect(() => { @@ -212,7 +214,7 @@ export default function Chat() { timestamp: new Date().toISOString(), rawQuery: queryToProcess || "", images: images, - attachedFiles: uploadedFiles, + queryFiles: uploadedFiles, }; setMessages((prevMessages) => [...prevMessages, newStreamMessage]); setProcessQuerySignal(true); diff --git a/src/interface/web/app/components/chatHistory/chatHistory.tsx b/src/interface/web/app/components/chatHistory/chatHistory.tsx index 37b44f3a..ea566df4 100644 --- a/src/interface/web/app/components/chatHistory/chatHistory.tsx +++ b/src/interface/web/app/components/chatHistory/chatHistory.tsx @@ -373,7 +373,7 @@ export default function ChatHistory(props: ChatHistoryProps) { images: message.images, conversationId: props.conversationId, turnId: messageTurnId, - attachedFiles: message.attachedFiles, + queryFiles: message.queryFiles, }} customClassName="fullHistory" borderLeftColor={`${data?.agent?.color}-500`} diff --git a/src/interface/web/app/components/chatMessage/chatMessage.tsx b/src/interface/web/app/components/chatMessage/chatMessage.tsx index bfe9e9e4..aae271b1 100644 --- a/src/interface/web/app/components/chatMessage/chatMessage.tsx +++ b/src/interface/web/app/components/chatMessage/chatMessage.tsx @@ -161,7 +161,7 @@ export interface SingleChatMessage { images?: string[]; conversationId: string; turnId?: string; - attachedFiles?: AttachedFileText[]; + queryFiles?: AttachedFileText[]; } export interface StreamMessage { @@ -178,7 +178,7 @@ export interface StreamMessage { intentType?: string; inferredQueries?: string[]; turnId?: string; - attachedFiles?: AttachedFileText[]; + queryFiles?: AttachedFileText[]; } export interface ChatHistoryData { @@ -708,16 +708,21 @@ const ChatMessage = forwardRef((props, ref) => onMouseLeave={(event) => setIsHovering(false)} onMouseEnter={(event) => setIsHovering(true)} > - {props.chatMessage.attachedFiles && props.chatMessage.attachedFiles.length > 0 && ( -
- {props.chatMessage.attachedFiles.map((file, index) => ( + {props.chatMessage.queryFiles && props.chatMessage.queryFiles.length > 0 && ( +
+ {props.chatMessage.queryFiles.map((file, index) => ( - -
- {getIconFromFilename(file.file_type)} - {file.name} + +
+
+ {getIconFromFilename(file.file_type)} +
+ {file.name} {file.size && ( - + ({convertBytesToText(file.size)}) )} diff --git a/src/khoj/processor/content/pdf/pdf_to_entries.py b/src/khoj/processor/content/pdf/pdf_to_entries.py index 39685996..f1ac5104 100644 --- a/src/khoj/processor/content/pdf/pdf_to_entries.py +++ b/src/khoj/processor/content/pdf/pdf_to_entries.py @@ -103,9 +103,17 @@ class PdfToEntries(TextToEntries): pdf_entries_per_file = loader.load() # Convert the loaded entries into the desired format - pdf_entry_by_pages = [page.page_content for page in pdf_entries_per_file] + pdf_entry_by_pages = [PdfToEntries.clean_text(page.page_content) for page in pdf_entries_per_file] except Exception as e: logger.warning(f"Unable to process file: {pdf_file}. This file will not be indexed.") logger.warning(e, exc_info=True) return pdf_entry_by_pages + + @staticmethod + def clean_text(text: str) -> str: + # Remove null bytes + text = text.replace("\x00", "") + # Replace invalid Unicode + text = text.encode("utf-8", errors="ignore").decode("utf-8") + return text diff --git a/src/khoj/processor/conversation/anthropic/anthropic_chat.py b/src/khoj/processor/conversation/anthropic/anthropic_chat.py index fac8dfa4..c934292a 100644 --- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py +++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py @@ -36,7 +36,7 @@ def extract_questions_anthropic( query_images: Optional[list[str]] = None, vision_enabled: bool = False, personality_context: Optional[str] = None, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ): """ @@ -83,7 +83,7 @@ def extract_questions_anthropic( images=query_images, model_type=ChatModelOptions.ModelType.ANTHROPIC, vision_enabled=vision_enabled, - attached_file_context=attached_files, + attached_file_context=query_files, ) messages = [] @@ -152,7 +152,7 @@ def converse_anthropic( agent: Agent = None, query_images: Optional[list[str]] = None, vision_available: bool = False, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ): """ @@ -210,7 +210,7 @@ def converse_anthropic( query_images=query_images, vision_enabled=vision_available, model_type=ChatModelOptions.ModelType.ANTHROPIC, - attached_files=attached_files, + query_files=query_files, ) messages, system_prompt = format_messages_for_anthropic(messages, system_prompt) diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py index b7a7739f..db6f3c4e 100644 --- a/src/khoj/processor/conversation/google/gemini_chat.py +++ b/src/khoj/processor/conversation/google/gemini_chat.py @@ -37,7 +37,7 @@ def extract_questions_gemini( query_images: Optional[list[str]] = None, vision_enabled: bool = False, personality_context: Optional[str] = None, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ): """ @@ -84,7 +84,7 @@ def extract_questions_gemini( images=query_images, model_type=ChatModelOptions.ModelType.GOOGLE, vision_enabled=vision_enabled, - attached_file_context=attached_files, + attached_file_context=query_files, ) messages = [] @@ -162,7 +162,7 @@ def converse_gemini( agent: Agent = None, query_images: Optional[list[str]] = None, vision_available: bool = False, - attached_files: str = None, + query_files: str = None, tracer={}, ): """ @@ -221,7 +221,7 @@ def converse_gemini( query_images=query_images, vision_enabled=vision_available, model_type=ChatModelOptions.ModelType.GOOGLE, - attached_files=attached_files, + query_files=query_files, ) messages, system_prompt = format_messages_for_gemini(messages, system_prompt) diff --git a/src/khoj/processor/conversation/offline/chat_model.py b/src/khoj/processor/conversation/offline/chat_model.py index 6a25e258..c41c847b 100644 --- a/src/khoj/processor/conversation/offline/chat_model.py +++ b/src/khoj/processor/conversation/offline/chat_model.py @@ -37,7 +37,7 @@ def extract_questions_offline( max_prompt_size: int = None, temperature: float = 0.7, personality_context: Optional[str] = None, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ) -> List[str]: """ @@ -88,7 +88,7 @@ def extract_questions_offline( loaded_model=offline_chat_model, max_prompt_size=max_prompt_size, model_type=ChatModelOptions.ModelType.OFFLINE, - attached_files=attached_files, + query_files=query_files, ) state.chat_lock.acquire() @@ -154,7 +154,7 @@ def converse_offline( location_data: LocationData = None, user_name: str = None, agent: Agent = None, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ) -> Union[ThreadedGenerator, Iterator[str]]: """ @@ -219,7 +219,7 @@ def converse_offline( max_prompt_size=max_prompt_size, tokenizer_name=tokenizer_name, model_type=ChatModelOptions.ModelType.OFFLINE, - attached_files=attached_files, + query_files=query_files, ) truncated_messages = "\n".join({f"{message.content[:70]}..." for message in messages}) diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py index 70d208d8..c4cb0c67 100644 --- a/src/khoj/processor/conversation/openai/gpt.py +++ b/src/khoj/processor/conversation/openai/gpt.py @@ -34,7 +34,7 @@ def extract_questions( query_images: Optional[list[str]] = None, vision_enabled: bool = False, personality_context: Optional[str] = None, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ): """ @@ -80,7 +80,7 @@ def extract_questions( images=query_images, model_type=ChatModelOptions.ModelType.OPENAI, vision_enabled=vision_enabled, - attached_file_context=attached_files, + attached_file_context=query_files, ) messages = [] @@ -151,7 +151,7 @@ def converse( agent: Agent = None, query_images: Optional[list[str]] = None, vision_available: bool = False, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ): """ @@ -210,7 +210,7 @@ def converse( query_images=query_images, vision_enabled=vision_available, model_type=ChatModelOptions.ModelType.OPENAI, - attached_files=attached_files, + query_files=query_files, ) truncated_messages = "\n".join({f"{message.content[:70]}..." for message in messages}) logger.debug(f"Conversation Context for GPT: {truncated_messages}") diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 7fe83d06..b007be9a 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -157,14 +157,14 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A chat_history += f"User: {chat['intent']['query']}\n" chat_history += f"{agent_name}: {chat['intent']['inferred-queries'][0]}\n" elif chat["by"] == "you": - raw_attached_files = chat.get("attachedFiles") - if raw_attached_files: - attached_files: Dict[str, str] = {} - for file in raw_attached_files: - attached_files[file["name"]] = file["content"] + raw_query_files = chat.get("queryFiles") + if raw_query_files: + query_files: Dict[str, str] = {} + for file in raw_query_files: + query_files[file["name"]] = file["content"] - attached_file_context = gather_raw_attached_files(attached_files) - chat_history += f"User: {attached_file_context}\n" + query_file_context = gather_raw_query_files(query_files) + chat_history += f"User: {query_file_context}\n" return chat_history @@ -254,7 +254,7 @@ def save_to_conversation_log( conversation_id: str = None, automation_id: str = None, query_images: List[str] = None, - raw_attached_files: List[FileAttachment] = [], + raw_query_files: List[FileAttachment] = [], train_of_thought: List[Any] = [], tracer: Dict[str, Any] = {}, ): @@ -267,7 +267,7 @@ def save_to_conversation_log( "created": user_message_time, "images": query_images, "turnId": turn_id, - "attachedFiles": [file.model_dump(mode="json") for file in raw_attached_files], + "queryFiles": [file.model_dump(mode="json") for file in raw_query_files], }, khoj_message_metadata={ "context": compiled_references, @@ -330,18 +330,18 @@ def construct_structured_message( return message -def gather_raw_attached_files( - attached_files: Dict[str, str], +def gather_raw_query_files( + query_files: Dict[str, str], ): """ Gather contextual data from the given (raw) files """ - if len(attached_files) == 0: + if len(query_files) == 0: return "" contextual_data = " ".join( - [f"File: {file_name}\n\n{file_content}\n\n" for file_name, file_content in attached_files.items()] + [f"File: {file_name}\n\n{file_content}\n\n" for file_name, file_content in query_files.items()] ) return f"I have attached the following files:\n\n{contextual_data}" @@ -358,7 +358,7 @@ def generate_chatml_messages_with_context( vision_enabled=False, model_type="", context_message="", - attached_files: str = None, + query_files: str = None, ): """Generate chat messages with appropriate context from previous conversation to send to the chat model""" # Set max prompt size from user config or based on pre-configured for model and machine specs @@ -389,13 +389,13 @@ def generate_chatml_messages_with_context( ) message_context += f"{prompts.notes_conversation.format(references=references)}\n\n" - if chat.get("attachedFiles"): - raw_attached_files = chat.get("attachedFiles") - attached_files_dict = dict() - for file in raw_attached_files: - attached_files_dict[file["name"]] = file["content"] + if chat.get("queryFiles"): + raw_query_files = chat.get("queryFiles") + query_files_dict = dict() + for file in raw_query_files: + query_files_dict[file["name"]] = file["content"] - message_attached_files = gather_raw_attached_files(attached_files_dict) + message_attached_files = gather_raw_query_files(query_files_dict) chatml_messages.append(ChatMessage(content=message_attached_files, role="user")) if not is_none_or_empty(chat.get("onlineContext")): @@ -407,7 +407,7 @@ def generate_chatml_messages_with_context( role = "user" if chat["by"] == "you" else "assistant" message_content = construct_structured_message( - chat["message"], chat.get("images"), model_type, vision_enabled, attached_file_context=attached_files + chat["message"], chat.get("images"), model_type, vision_enabled, attached_file_context=query_files ) reconstructed_message = ChatMessage(content=message_content, role=role) @@ -421,7 +421,7 @@ def generate_chatml_messages_with_context( messages.append( ChatMessage( content=construct_structured_message( - user_message, query_images, model_type, vision_enabled, attached_files + user_message, query_images, model_type, vision_enabled, query_files ), role="user", ) diff --git a/src/khoj/processor/image/generate.py b/src/khoj/processor/image/generate.py index 003dae4d..6c1f71b6 100644 --- a/src/khoj/processor/image/generate.py +++ b/src/khoj/processor/image/generate.py @@ -28,7 +28,7 @@ async def text_to_image( send_status_func: Optional[Callable] = None, query_images: Optional[List[str]] = None, agent: Agent = None, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ): status_code = 200 @@ -70,7 +70,7 @@ async def text_to_image( query_images=query_images, user=user, agent=agent, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) diff --git a/src/khoj/processor/tools/online_search.py b/src/khoj/processor/tools/online_search.py index c6fbd607..7fdb82d4 100644 --- a/src/khoj/processor/tools/online_search.py +++ b/src/khoj/processor/tools/online_search.py @@ -68,7 +68,7 @@ async def search_online( query_images: List[str] = None, previous_subqueries: Set = set(), agent: Agent = None, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ): query += " ".join(custom_filters) @@ -86,7 +86,7 @@ async def search_online( query_images=query_images, agent=agent, tracer=tracer, - attached_files=attached_files, + query_files=query_files, ) subqueries = list(new_subqueries - previous_subqueries) response_dict: Dict[str, Dict[str, List[Dict] | Dict]] = {} @@ -178,7 +178,7 @@ async def read_webpages( query_images: List[str] = None, agent: Agent = None, max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ): "Infer web pages to read from the query and extract relevant information from them" @@ -190,7 +190,7 @@ async def read_webpages( user, query_images, agent=agent, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) diff --git a/src/khoj/processor/tools/run_code.py b/src/khoj/processor/tools/run_code.py index c4e2ace1..e24b3b53 100644 --- a/src/khoj/processor/tools/run_code.py +++ b/src/khoj/processor/tools/run_code.py @@ -36,7 +36,7 @@ async def run_code( query_images: List[str] = None, agent: Agent = None, sandbox_url: str = SANDBOX_URL, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ): # Generate Code @@ -54,7 +54,7 @@ async def run_code( query_images, agent, tracer, - attached_files, + query_files, ) except Exception as e: raise ValueError(f"Failed to generate code for {query} with error: {e}") @@ -84,7 +84,7 @@ async def generate_python_code( query_images: List[str] = None, agent: Agent = None, tracer: dict = {}, - attached_files: str = None, + query_files: str = None, ) -> List[str]: location = f"{location_data}" if location_data else "Unknown" username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else "" @@ -112,7 +112,7 @@ async def generate_python_code( response_type="json_object", user=user, tracer=tracer, - attached_files=attached_files, + query_files=query_files, ) # Validate that the response is a non-empty, JSON-serializable list diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 747c7e66..f66fbce8 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -351,7 +351,7 @@ async def extract_references_and_questions( query_images: Optional[List[str]] = None, previous_inferred_queries: Set = set(), agent: Agent = None, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ): user = request.user.object if request.user.is_authenticated else None @@ -426,7 +426,7 @@ async def extract_references_and_questions( user=user, max_prompt_size=conversation_config.max_prompt_size, personality_context=personality_context, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) elif conversation_config.model_type == ChatModelOptions.ModelType.OPENAI: @@ -445,7 +445,7 @@ async def extract_references_and_questions( query_images=query_images, vision_enabled=vision_enabled, personality_context=personality_context, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) elif conversation_config.model_type == ChatModelOptions.ModelType.ANTHROPIC: @@ -461,7 +461,7 @@ async def extract_references_and_questions( user=user, vision_enabled=vision_enabled, personality_context=personality_context, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) elif conversation_config.model_type == ChatModelOptions.ModelType.GOOGLE: @@ -478,7 +478,7 @@ async def extract_references_and_questions( user=user, vision_enabled=vision_enabled, personality_context=personality_context, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index bb642267..33476a97 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -50,7 +50,7 @@ from khoj.routers.helpers import ( aget_relevant_output_modes, construct_automation_created_message, create_automation, - gather_raw_attached_files, + gather_raw_query_files, generate_excalidraw_diagram, generate_summary_from_files, get_conversation_command, @@ -602,7 +602,7 @@ async def chat( country_code = body.country_code or get_country_code_from_timezone(body.timezone) timezone = body.timezone raw_images = body.images - raw_attached_files = body.files + raw_query_files = body.files async def event_generator(q: str, images: list[str]): start_time = time.perf_counter() @@ -614,7 +614,7 @@ async def chat( q = unquote(q) train_of_thought = [] nonlocal conversation_id - nonlocal raw_attached_files + nonlocal raw_query_files tracer: dict = { "mid": turn_id, @@ -634,10 +634,10 @@ async def chat( if uploaded_image: uploaded_images.append(uploaded_image) - attached_files: Dict[str, str] = {} - if raw_attached_files: - for file in raw_attached_files: - attached_files[file.name] = file.content + query_files: Dict[str, str] = {} + if raw_query_files: + for file in raw_query_files: + query_files[file.name] = file.content async def send_event(event_type: ChatEvent, data: str | dict): nonlocal connection_alive, ttft, train_of_thought @@ -750,7 +750,7 @@ async def chat( compiled_references: List[Any] = [] inferred_queries: List[Any] = [] file_filters = conversation.file_filters if conversation and conversation.file_filters else [] - attached_file_context = gather_raw_attached_files(attached_files) + attached_file_context = gather_raw_query_files(query_files) if conversation_commands == [ConversationCommand.Default] or is_automated_task: conversation_commands = await aget_relevant_information_sources( @@ -760,7 +760,7 @@ async def chat( user=user, query_images=uploaded_images, agent=agent, - attached_files=attached_file_context, + query_files=attached_file_context, tracer=tracer, ) @@ -806,7 +806,7 @@ async def chat( user_name=user_name, location=location, file_filters=conversation.file_filters if conversation else [], - attached_files=attached_file_context, + query_files=attached_file_context, tracer=tracer, ): if isinstance(research_result, InformationCollectionIteration): @@ -855,7 +855,7 @@ async def chat( query_images=uploaded_images, agent=agent, send_status_func=partial(send_event, ChatEvent.STATUS), - attached_files=attached_file_context, + query_files=attached_file_context, tracer=tracer, ): if isinstance(response, dict) and ChatEvent.STATUS in response: @@ -877,7 +877,7 @@ async def chat( conversation_id=conversation_id, query_images=uploaded_images, train_of_thought=train_of_thought, - raw_attached_files=raw_attached_files, + raw_query_files=raw_query_files, tracer=tracer, ) return @@ -923,7 +923,7 @@ async def chat( automation_id=automation.id, query_images=uploaded_images, train_of_thought=train_of_thought, - raw_attached_files=raw_attached_files, + raw_query_files=raw_query_files, tracer=tracer, ) async for result in send_llm_response(llm_response): @@ -946,7 +946,7 @@ async def chat( partial(send_event, ChatEvent.STATUS), query_images=uploaded_images, agent=agent, - attached_files=attached_file_context, + query_files=attached_file_context, tracer=tracer, ): if isinstance(result, dict) and ChatEvent.STATUS in result: @@ -992,7 +992,7 @@ async def chat( custom_filters, query_images=uploaded_images, agent=agent, - attached_files=attached_file_context, + query_files=attached_file_context, tracer=tracer, ): if isinstance(result, dict) and ChatEvent.STATUS in result: @@ -1018,7 +1018,7 @@ async def chat( partial(send_event, ChatEvent.STATUS), query_images=uploaded_images, agent=agent, - attached_files=attached_file_context, + query_files=attached_file_context, tracer=tracer, ): if isinstance(result, dict) and ChatEvent.STATUS in result: @@ -1059,7 +1059,7 @@ async def chat( partial(send_event, ChatEvent.STATUS), query_images=uploaded_images, agent=agent, - attached_files=attached_file_context, + query_files=attached_file_context, tracer=tracer, ): if isinstance(result, dict) and ChatEvent.STATUS in result: @@ -1100,7 +1100,7 @@ async def chat( send_status_func=partial(send_event, ChatEvent.STATUS), query_images=uploaded_images, agent=agent, - attached_files=attached_file_context, + query_files=attached_file_context, tracer=tracer, ): if isinstance(result, dict) and ChatEvent.STATUS in result: @@ -1134,7 +1134,7 @@ async def chat( query_images=uploaded_images, train_of_thought=train_of_thought, attached_file_context=attached_file_context, - raw_attached_files=raw_attached_files, + raw_query_files=raw_query_files, tracer=tracer, ) content_obj = { @@ -1164,7 +1164,7 @@ async def chat( user=user, agent=agent, send_status_func=partial(send_event, ChatEvent.STATUS), - attached_files=attached_file_context, + query_files=attached_file_context, tracer=tracer, ): if isinstance(result, dict) and ChatEvent.STATUS in result: @@ -1195,7 +1195,7 @@ async def chat( query_images=uploaded_images, train_of_thought=train_of_thought, attached_file_context=attached_file_context, - raw_attached_files=raw_attached_files, + raw_query_files=raw_query_files, tracer=tracer, ) @@ -1224,7 +1224,7 @@ async def chat( uploaded_images, train_of_thought, attached_file_context, - raw_attached_files, + raw_query_files, tracer, ) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 69f77e91..e645478d 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -256,18 +256,18 @@ async def agenerate_chat_response(*args): return await loop.run_in_executor(executor, generate_chat_response, *args) -def gather_raw_attached_files( - attached_files: Dict[str, str], +def gather_raw_query_files( + query_files: Dict[str, str], ): """ Gather contextual data from the given (raw) files """ - if len(attached_files) == 0: + if len(query_files) == 0: return "" contextual_data = " ".join( - [f"File: {file_name}\n\n{file_content}\n\n" for file_name, file_content in attached_files.items()] + [f"File: {file_name}\n\n{file_content}\n\n" for file_name, file_content in query_files.items()] ) return f"I have attached the following files:\n\n{contextual_data}" @@ -334,7 +334,7 @@ async def aget_relevant_information_sources( user: KhojUser, query_images: List[str] = None, agent: Agent = None, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ): """ @@ -372,7 +372,7 @@ async def aget_relevant_information_sources( relevant_tools_prompt, response_type="json_object", user=user, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) @@ -482,7 +482,7 @@ async def infer_webpage_urls( user: KhojUser, query_images: List[str] = None, agent: Agent = None, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ) -> List[str]: """ @@ -512,7 +512,7 @@ async def infer_webpage_urls( query_images=query_images, response_type="json_object", user=user, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) @@ -538,7 +538,7 @@ async def generate_online_subqueries( user: KhojUser, query_images: List[str] = None, agent: Agent = None, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ) -> Set[str]: """ @@ -568,7 +568,7 @@ async def generate_online_subqueries( query_images=query_images, response_type="json_object", user=user, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) @@ -691,7 +691,7 @@ async def generate_summary_from_files( query_images: List[str] = None, agent: Agent = None, send_status_func: Optional[Callable] = None, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ): try: @@ -701,17 +701,15 @@ async def generate_summary_from_files( if len(file_names) > 0: file_objects = await FileObjectAdapters.async_get_file_objects_by_name(None, file_names.pop(), agent) - if (file_objects and len(file_objects) == 0 and not attached_files) or ( - not file_objects and not attached_files - ): + if (file_objects and len(file_objects) == 0 and not query_files) or (not file_objects and not query_files): response_log = "Sorry, I couldn't find anything to summarize." yield response_log return contextual_data = " ".join([f"File: {file.file_name}\n\n{file.raw_text}" for file in file_objects]) - if attached_files: - contextual_data += f"\n\n{attached_files}" + if query_files: + contextual_data += f"\n\n{query_files}" if not q: q = "Create a general summary of the file" @@ -754,7 +752,7 @@ async def generate_excalidraw_diagram( user: KhojUser = None, agent: Agent = None, send_status_func: Optional[Callable] = None, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ): if send_status_func: @@ -770,7 +768,7 @@ async def generate_excalidraw_diagram( query_images=query_images, user=user, agent=agent, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) @@ -797,7 +795,7 @@ async def generate_better_diagram_description( query_images: List[str] = None, user: KhojUser = None, agent: Agent = None, - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ) -> str: """ @@ -839,7 +837,7 @@ async def generate_better_diagram_description( improve_diagram_description_prompt, query_images=query_images, user=user, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) response = response.strip() @@ -887,7 +885,7 @@ async def generate_better_image_prompt( query_images: Optional[List[str]] = None, user: KhojUser = None, agent: Agent = None, - attached_files: str = "", + query_files: str = "", tracer: dict = {}, ) -> str: """ @@ -936,7 +934,7 @@ async def generate_better_image_prompt( with timer("Chat actor: Generate contextual image prompt", logger): response = await send_message_to_model_wrapper( - image_prompt, query_images=query_images, user=user, attached_files=attached_files, tracer=tracer + image_prompt, query_images=query_images, user=user, query_files=query_files, tracer=tracer ) response = response.strip() if response.startswith(('"', "'")) and response.endswith(('"', "'")): @@ -952,7 +950,7 @@ async def send_message_to_model_wrapper( user: KhojUser = None, query_images: List[str] = None, context: str = "", - attached_files: str = None, + query_files: str = None, tracer: dict = {}, ): conversation_config: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config(user) @@ -992,7 +990,7 @@ async def send_message_to_model_wrapper( max_prompt_size=max_tokens, vision_enabled=vision_available, model_type=conversation_config.model_type, - attached_files=attached_files, + query_files=query_files, ) return send_message_to_model_offline( @@ -1019,7 +1017,7 @@ async def send_message_to_model_wrapper( vision_enabled=vision_available, query_images=query_images, model_type=conversation_config.model_type, - attached_files=attached_files, + query_files=query_files, ) return send_message_to_model( @@ -1042,7 +1040,7 @@ async def send_message_to_model_wrapper( vision_enabled=vision_available, query_images=query_images, model_type=conversation_config.model_type, - attached_files=attached_files, + query_files=query_files, ) return anthropic_send_message_to_model( @@ -1064,7 +1062,7 @@ async def send_message_to_model_wrapper( vision_enabled=vision_available, query_images=query_images, model_type=conversation_config.model_type, - attached_files=attached_files, + query_files=query_files, ) return gemini_send_message_to_model( @@ -1079,7 +1077,7 @@ def send_message_to_model_wrapper_sync( system_message: str = "", response_type: str = "text", user: KhojUser = None, - attached_files: str = "", + query_files: str = "", tracer: dict = {}, ): conversation_config: ChatModelOptions = ConversationAdapters.get_default_conversation_config(user) @@ -1104,7 +1102,7 @@ def send_message_to_model_wrapper_sync( max_prompt_size=max_tokens, vision_enabled=vision_available, model_type=conversation_config.model_type, - attached_files=attached_files, + query_files=query_files, ) return send_message_to_model_offline( @@ -1126,7 +1124,7 @@ def send_message_to_model_wrapper_sync( max_prompt_size=max_tokens, vision_enabled=vision_available, model_type=conversation_config.model_type, - attached_files=attached_files, + query_files=query_files, ) openai_response = send_message_to_model( @@ -1148,7 +1146,7 @@ def send_message_to_model_wrapper_sync( max_prompt_size=max_tokens, vision_enabled=vision_available, model_type=conversation_config.model_type, - attached_files=attached_files, + query_files=query_files, ) return anthropic_send_message_to_model( @@ -1168,7 +1166,7 @@ def send_message_to_model_wrapper_sync( max_prompt_size=max_tokens, vision_enabled=vision_available, model_type=conversation_config.model_type, - attached_files=attached_files, + query_files=query_files, ) return gemini_send_message_to_model( @@ -1199,8 +1197,8 @@ def generate_chat_response( meta_research: str = "", query_images: Optional[List[str]] = None, train_of_thought: List[Any] = [], - attached_files: str = None, - raw_attached_files: List[FileAttachment] = None, + query_files: str = None, + raw_query_files: List[FileAttachment] = None, tracer: dict = {}, ) -> Tuple[Union[ThreadedGenerator, Iterator[str]], Dict[str, str]]: # Initialize Variables @@ -1223,7 +1221,7 @@ def generate_chat_response( conversation_id=conversation_id, query_images=query_images, train_of_thought=train_of_thought, - raw_attached_files=raw_attached_files, + raw_query_files=raw_query_files, tracer=tracer, ) @@ -1258,7 +1256,7 @@ def generate_chat_response( location_data=location_data, user_name=user_name, agent=agent, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) @@ -1284,7 +1282,7 @@ def generate_chat_response( user_name=user_name, agent=agent, vision_available=vision_available, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) @@ -1307,7 +1305,7 @@ def generate_chat_response( user_name=user_name, agent=agent, vision_available=vision_available, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) elif conversation_config.model_type == ChatModelOptions.ModelType.GOOGLE: @@ -1329,7 +1327,7 @@ def generate_chat_response( agent=agent, query_images=query_images, vision_available=vision_available, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) diff --git a/src/khoj/routers/research.py b/src/khoj/routers/research.py index b9d1fcff..abf8f96c 100644 --- a/src/khoj/routers/research.py +++ b/src/khoj/routers/research.py @@ -46,7 +46,7 @@ async def apick_next_tool( max_iterations: int = 5, send_status_func: Optional[Callable] = None, tracer: dict = {}, - attached_files: str = None, + query_files: str = None, ): """Given a query, determine which of the available tools the agent should use in order to answer appropriately.""" @@ -92,7 +92,7 @@ async def apick_next_tool( response_type="json_object", user=user, query_images=query_images, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ) except Exception as e: @@ -152,7 +152,7 @@ async def execute_information_collection( location: LocationData = None, file_filters: List[str] = [], tracer: dict = {}, - attached_files: str = None, + query_files: str = None, ): current_iteration = 0 MAX_ITERATIONS = 5 @@ -176,7 +176,7 @@ async def execute_information_collection( MAX_ITERATIONS, send_status_func, tracer=tracer, - attached_files=attached_files, + query_files=query_files, ): if isinstance(result, dict) and ChatEvent.STATUS in result: yield result[ChatEvent.STATUS] @@ -207,7 +207,7 @@ async def execute_information_collection( previous_inferred_queries=previous_inferred_queries, agent=agent, tracer=tracer, - attached_files=attached_files, + query_files=query_files, ): if isinstance(result, dict) and ChatEvent.STATUS in result: yield result[ChatEvent.STATUS] @@ -269,7 +269,7 @@ async def execute_information_collection( query_images=query_images, agent=agent, tracer=tracer, - attached_files=attached_files, + query_files=query_files, ): if isinstance(result, dict) and ChatEvent.STATUS in result: yield result[ChatEvent.STATUS] @@ -300,7 +300,7 @@ async def execute_information_collection( send_status_func, query_images=query_images, agent=agent, - attached_files=attached_files, + query_files=query_files, tracer=tracer, ): if isinstance(result, dict) and ChatEvent.STATUS in result: @@ -326,7 +326,7 @@ async def execute_information_collection( query_images=query_images, agent=agent, send_status_func=send_status_func, - attached_files=attached_files, + query_files=query_files, ): if isinstance(result, dict) and ChatEvent.STATUS in result: yield result[ChatEvent.STATUS]