From e2abc1a257b3b58e1a0355ea23aef20b86869eb6 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 17 Oct 2024 23:05:43 -0700 Subject: [PATCH 01/24] Handle multiple images shared in query to chat API Previously Khoj could respond to a single shared image at a time. This changes updates the chat API to accept multiple images shared by the user and send it to the appropriate chat actors including the openai response generation chat actor for getting an image aware response --- .../conversation/google/gemini_chat.py | 3 +- src/khoj/processor/conversation/openai/gpt.py | 8 +-- src/khoj/processor/conversation/utils.py | 30 +++++---- src/khoj/processor/image/generate.py | 4 +- src/khoj/processor/tools/online_search.py | 8 +-- src/khoj/routers/api.py | 4 +- src/khoj/routers/api_chat.py | 62 +++++++++---------- src/khoj/routers/helpers.py | 52 ++++++++-------- 8 files changed, 90 insertions(+), 81 deletions(-) diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py index 7359b3eb..e8848806 100644 --- a/src/khoj/processor/conversation/google/gemini_chat.py +++ b/src/khoj/processor/conversation/google/gemini_chat.py @@ -6,7 +6,7 @@ from typing import Dict, Optional from langchain.schema import ChatMessage -from khoj.database.models import Agent, KhojUser +from khoj.database.models import Agent, ChatModelOptions, KhojUser from khoj.processor.conversation import prompts from khoj.processor.conversation.google.utils import ( format_messages_for_gemini, @@ -187,6 +187,7 @@ def converse_gemini( model_name=model, max_prompt_size=max_prompt_size, tokenizer_name=tokenizer_name, + model_type=ChatModelOptions.ModelType.GOOGLE, ) messages, system_prompt = format_messages_for_gemini(messages, system_prompt) diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py index ad02b10e..4a656fac 100644 --- a/src/khoj/processor/conversation/openai/gpt.py +++ b/src/khoj/processor/conversation/openai/gpt.py @@ -30,7 +30,7 @@ def extract_questions( api_base_url=None, location_data: LocationData = None, user: KhojUser = None, - uploaded_image_url: Optional[str] = None, + query_images: Optional[list[str]] = None, vision_enabled: bool = False, personality_context: Optional[str] = None, ): @@ -74,7 +74,7 @@ def extract_questions( prompt = construct_structured_message( message=prompt, - image_url=uploaded_image_url, + images=query_images, model_type=ChatModelOptions.ModelType.OPENAI, vision_enabled=vision_enabled, ) @@ -135,7 +135,7 @@ def converse( location_data: LocationData = None, user_name: str = None, agent: Agent = None, - image_url: Optional[str] = None, + query_images: Optional[list[str]] = None, vision_available: bool = False, ): """ @@ -191,7 +191,7 @@ def converse( model_name=model, max_prompt_size=max_prompt_size, tokenizer_name=tokenizer_name, - uploaded_image_url=image_url, + query_images=query_images, vision_enabled=vision_available, model_type=ChatModelOptions.ModelType.OPENAI, ) diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index e841c484..8d799745 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -109,7 +109,7 @@ def save_to_conversation_log( client_application: ClientApplication = None, conversation_id: str = None, automation_id: str = None, - uploaded_image_url: str = None, + query_images: List[str] = None, ): user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S") updated_conversation = message_to_log( @@ -117,7 +117,7 @@ def save_to_conversation_log( chat_response=chat_response, user_message_metadata={ "created": user_message_time, - "uploadedImageData": uploaded_image_url, + "images": query_images, }, khoj_message_metadata={ "context": compiled_references, @@ -145,10 +145,18 @@ Khoj: "{inferred_queries if ("text-to-image" in intent_type) else chat_response} ) -# Format user and system messages to chatml format -def construct_structured_message(message, image_url, model_type, vision_enabled): - if image_url and vision_enabled and model_type == ChatModelOptions.ModelType.OPENAI: - return [{"type": "text", "text": message}, {"type": "image_url", "image_url": {"url": image_url}}] +def construct_structured_message(message: str, images: list[str], model_type: str, vision_enabled: bool): + """ + Format messages into appropriate multimedia format for supported chat model types + """ + if not images or not vision_enabled: + return message + + if model_type == ChatModelOptions.ModelType.OPENAI: + return [ + {"type": "text", "text": message}, + *[{"type": "image_url", "image_url": {"url": image}} for image in images], + ] return message @@ -160,7 +168,7 @@ def generate_chatml_messages_with_context( loaded_model: Optional[Llama] = None, max_prompt_size=None, tokenizer_name=None, - uploaded_image_url=None, + query_images=None, vision_enabled=False, model_type="", ): @@ -183,9 +191,7 @@ def generate_chatml_messages_with_context( message_content = chat["message"] + message_notes - message_content = construct_structured_message( - message_content, chat.get("uploadedImageData"), model_type, vision_enabled - ) + message_content = construct_structured_message(message_content, chat.get("images"), model_type, vision_enabled) reconstructed_message = ChatMessage(content=message_content, role=role) @@ -198,7 +204,7 @@ def generate_chatml_messages_with_context( if not is_none_or_empty(user_message): messages.append( ChatMessage( - content=construct_structured_message(user_message, uploaded_image_url, model_type, vision_enabled), + content=construct_structured_message(user_message, query_images, model_type, vision_enabled), role="user", ) ) @@ -222,7 +228,6 @@ def truncate_messages( tokenizer_name=None, ) -> list[ChatMessage]: """Truncate messages to fit within max prompt size supported by model""" - default_tokenizer = "gpt-4o" try: @@ -252,6 +257,7 @@ def truncate_messages( system_message = messages.pop(idx) break + # TODO: Handle truncation of multi-part message.content, i.e when message.content is a list[dict] rather than a string system_message_tokens = ( len(encoder.encode(system_message.content)) if system_message and type(system_message.content) == str else 0 ) diff --git a/src/khoj/processor/image/generate.py b/src/khoj/processor/image/generate.py index 59073731..ee39bdc5 100644 --- a/src/khoj/processor/image/generate.py +++ b/src/khoj/processor/image/generate.py @@ -26,7 +26,7 @@ async def text_to_image( references: List[Dict[str, Any]], online_results: Dict[str, Any], send_status_func: Optional[Callable] = None, - uploaded_image_url: Optional[str] = None, + query_images: Optional[List[str]] = None, agent: Agent = None, ): status_code = 200 @@ -65,7 +65,7 @@ async def text_to_image( note_references=references, online_results=online_results, model_type=text_to_image_config.model_type, - uploaded_image_url=uploaded_image_url, + query_images=query_images, user=user, agent=agent, ) diff --git a/src/khoj/processor/tools/online_search.py b/src/khoj/processor/tools/online_search.py index 70972eac..fdf1ba9f 100644 --- a/src/khoj/processor/tools/online_search.py +++ b/src/khoj/processor/tools/online_search.py @@ -62,7 +62,7 @@ async def search_online( user: KhojUser, send_status_func: Optional[Callable] = None, custom_filters: List[str] = [], - uploaded_image_url: str = None, + query_images: List[str] = None, agent: Agent = None, ): query += " ".join(custom_filters) @@ -73,7 +73,7 @@ async def search_online( # Breakdown the query into subqueries to get the correct answer subqueries = await generate_online_subqueries( - query, conversation_history, location, user, uploaded_image_url=uploaded_image_url, agent=agent + query, conversation_history, location, user, query_images=query_images, agent=agent ) response_dict = {} @@ -151,7 +151,7 @@ async def read_webpages( location: LocationData, user: KhojUser, send_status_func: Optional[Callable] = None, - uploaded_image_url: str = None, + query_images: List[str] = None, agent: Agent = None, ): "Infer web pages to read from the query and extract relevant information from them" @@ -159,7 +159,7 @@ async def read_webpages( if send_status_func: async for event in send_status_func(f"**Inferring web pages to read**"): yield {ChatEvent.STATUS: event} - urls = await infer_webpage_urls(query, conversation_history, location, user, uploaded_image_url) + urls = await infer_webpage_urls(query, conversation_history, location, user, query_images) logger.info(f"Reading web pages at: {urls}") if send_status_func: diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 59948b47..075c8c47 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -340,7 +340,7 @@ async def extract_references_and_questions( conversation_commands: List[ConversationCommand] = [ConversationCommand.Default], location_data: LocationData = None, send_status_func: Optional[Callable] = None, - uploaded_image_url: Optional[str] = None, + query_images: Optional[List[str]] = None, agent: Agent = None, ): user = request.user.object if request.user.is_authenticated else None @@ -431,7 +431,7 @@ async def extract_references_and_questions( conversation_log=meta_log, location_data=location_data, user=user, - uploaded_image_url=uploaded_image_url, + query_images=query_images, vision_enabled=vision_enabled, personality_context=personality_context, ) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index d57b5530..ee84c554 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -535,7 +535,7 @@ class ChatRequestBody(BaseModel): country: Optional[str] = None country_code: Optional[str] = None timezone: Optional[str] = None - image: Optional[str] = None + images: Optional[list[str]] = None create_new: Optional[bool] = False @@ -564,9 +564,9 @@ async def chat( country = body.country or get_country_name_from_timezone(body.timezone) country_code = body.country_code or get_country_code_from_timezone(body.timezone) timezone = body.timezone - image = body.image + raw_images = body.images - async def event_generator(q: str, image: str): + async def event_generator(q: str, images: list[str]): start_time = time.perf_counter() ttft = None chat_metadata: dict = {} @@ -576,16 +576,16 @@ async def chat( q = unquote(q) nonlocal conversation_id - uploaded_image_url = None - if image: - decoded_string = unquote(image) - base64_data = decoded_string.split(",", 1)[1] - image_bytes = base64.b64decode(base64_data) - webp_image_bytes = convert_image_to_webp(image_bytes) - try: - uploaded_image_url = upload_image_to_bucket(webp_image_bytes, request.user.object.id) - except: - uploaded_image_url = None + uploaded_images: list[str] = [] + if images: + for image in images: + decoded_string = unquote(image) + base64_data = decoded_string.split(",", 1)[1] + image_bytes = base64.b64decode(base64_data) + webp_image_bytes = convert_image_to_webp(image_bytes) + uploaded_image = upload_image_to_bucket(webp_image_bytes, request.user.object.id) + if uploaded_image: + uploaded_images.append(uploaded_image) async def send_event(event_type: ChatEvent, data: str | dict): nonlocal connection_alive, ttft @@ -692,7 +692,7 @@ async def chat( meta_log, is_automated_task, user=user, - uploaded_image_url=uploaded_image_url, + query_images=uploaded_images, agent=agent, ) conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands]) @@ -701,7 +701,7 @@ async def chat( ): yield result - mode = await aget_relevant_output_modes(q, meta_log, is_automated_task, user, uploaded_image_url, agent) + mode = await aget_relevant_output_modes(q, meta_log, is_automated_task, user, uploaded_images, agent) async for result in send_event(ChatEvent.STATUS, f"**Decided Response Mode:** {mode.value}"): yield result if mode not in conversation_commands: @@ -764,7 +764,7 @@ async def chat( q, contextual_data, conversation_history=meta_log, - uploaded_image_url=uploaded_image_url, + query_images=uploaded_images, user=user, agent=agent, ) @@ -785,7 +785,7 @@ async def chat( intent_type="summarize", client_application=request.user.client_app, conversation_id=conversation_id, - uploaded_image_url=uploaded_image_url, + query_images=uploaded_images, ) return @@ -828,7 +828,7 @@ async def chat( conversation_id=conversation_id, inferred_queries=[query_to_run], automation_id=automation.id, - uploaded_image_url=uploaded_image_url, + query_images=uploaded_images, ) async for result in send_llm_response(llm_response): yield result @@ -848,7 +848,7 @@ async def chat( conversation_commands, location, partial(send_event, ChatEvent.STATUS), - uploaded_image_url=uploaded_image_url, + query_images=uploaded_images, agent=agent, ): if isinstance(result, dict) and ChatEvent.STATUS in result: @@ -892,7 +892,7 @@ async def chat( user, partial(send_event, ChatEvent.STATUS), custom_filters, - uploaded_image_url=uploaded_image_url, + query_images=uploaded_images, agent=agent, ): if isinstance(result, dict) and ChatEvent.STATUS in result: @@ -916,7 +916,7 @@ async def chat( location, user, partial(send_event, ChatEvent.STATUS), - uploaded_image_url=uploaded_image_url, + query_images=uploaded_images, agent=agent, ): if isinstance(result, dict) and ChatEvent.STATUS in result: @@ -966,20 +966,20 @@ async def chat( references=compiled_references, online_results=online_results, send_status_func=partial(send_event, ChatEvent.STATUS), - uploaded_image_url=uploaded_image_url, + query_images=uploaded_images, agent=agent, ): if isinstance(result, dict) and ChatEvent.STATUS in result: yield result[ChatEvent.STATUS] else: - image, status_code, improved_image_prompt, intent_type = result + generated_image, status_code, improved_image_prompt, intent_type = result - if image is None or status_code != 200: + if generated_image is None or status_code != 200: content_obj = { "content-type": "application/json", "intentType": intent_type, "detail": improved_image_prompt, - "image": image, + "image": None, } async for result in send_llm_response(json.dumps(content_obj)): yield result @@ -987,7 +987,7 @@ async def chat( await sync_to_async(save_to_conversation_log)( q, - image, + generated_image, user, meta_log, user_message_time, @@ -997,12 +997,12 @@ async def chat( conversation_id=conversation_id, compiled_references=compiled_references, online_results=online_results, - uploaded_image_url=uploaded_image_url, + query_images=uploaded_images, ) content_obj = { "intentType": intent_type, "inferredQueries": [improved_image_prompt], - "image": image, + "image": generated_image, } async for result in send_llm_response(json.dumps(content_obj)): yield result @@ -1024,7 +1024,7 @@ async def chat( conversation_id, location, user_name, - uploaded_image_url, + uploaded_images, ) # Send Response @@ -1050,9 +1050,9 @@ async def chat( ## Stream Text Response if stream: - return StreamingResponse(event_generator(q, image=image), media_type="text/plain") + return StreamingResponse(event_generator(q, images=raw_images), media_type="text/plain") ## Non-Streaming Text Response else: - response_iterator = event_generator(q, image=image) + response_iterator = event_generator(q, images=raw_images) response_data = await read_chat_stream(response_iterator) return Response(content=json.dumps(response_data), media_type="application/json", status_code=200) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 12616e36..7ed9c72d 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -290,7 +290,7 @@ async def aget_relevant_information_sources( conversation_history: dict, is_task: bool, user: KhojUser, - uploaded_image_url: str = None, + query_images: List[str] = None, agent: Agent = None, ): """ @@ -309,8 +309,8 @@ async def aget_relevant_information_sources( chat_history = construct_chat_history(conversation_history) - if uploaded_image_url: - query = f"[placeholder for user attached image]\n{query}" + if query_images: + query = f"[placeholder for {len(query_images)} user attached images]\n{query}" personality_context = ( prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else "" @@ -367,7 +367,7 @@ async def aget_relevant_output_modes( conversation_history: dict, is_task: bool = False, user: KhojUser = None, - uploaded_image_url: str = None, + query_images: List[str] = None, agent: Agent = None, ): """ @@ -389,8 +389,8 @@ async def aget_relevant_output_modes( chat_history = construct_chat_history(conversation_history) - if uploaded_image_url: - query = f"[placeholder for user attached image]\n{query}" + if query_images: + query = f"[placeholder for {len(query_images)} user attached images]\n{query}" personality_context = ( prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else "" @@ -433,7 +433,7 @@ async def infer_webpage_urls( conversation_history: dict, location_data: LocationData, user: KhojUser, - uploaded_image_url: str = None, + query_images: List[str] = None, agent: Agent = None, ) -> List[str]: """ @@ -459,7 +459,7 @@ async def infer_webpage_urls( with timer("Chat actor: Infer webpage urls to read", logger): response = await send_message_to_model_wrapper( - online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object", user=user + online_queries_prompt, query_images=query_images, response_type="json_object", user=user ) # Validate that the response is a non-empty, JSON-serializable list of URLs @@ -479,7 +479,7 @@ async def generate_online_subqueries( conversation_history: dict, location_data: LocationData, user: KhojUser, - uploaded_image_url: str = None, + query_images: List[str] = None, agent: Agent = None, ) -> List[str]: """ @@ -505,7 +505,7 @@ async def generate_online_subqueries( with timer("Chat actor: Generate online search subqueries", logger): response = await send_message_to_model_wrapper( - online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object", user=user + online_queries_prompt, query_images=query_images, response_type="json_object", user=user ) # Validate that the response is a non-empty, JSON-serializable list @@ -524,7 +524,7 @@ async def generate_online_subqueries( async def schedule_query( - q: str, conversation_history: dict, user: KhojUser, uploaded_image_url: str = None + q: str, conversation_history: dict, user: KhojUser, query_images: List[str] = None ) -> Tuple[str, ...]: """ Schedule the date, time to run the query. Assume the server timezone is UTC. @@ -537,7 +537,7 @@ async def schedule_query( ) raw_response = await send_message_to_model_wrapper( - crontime_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object", user=user + crontime_prompt, query_images=query_images, response_type="json_object", user=user ) # Validate that the response is a non-empty, JSON-serializable list @@ -583,7 +583,7 @@ async def extract_relevant_summary( q: str, corpus: str, conversation_history: dict, - uploaded_image_url: str = None, + query_images: List[str] = None, user: KhojUser = None, agent: Agent = None, ) -> Union[str, None]: @@ -612,7 +612,7 @@ async def extract_relevant_summary( extract_relevant_information, prompts.system_prompt_extract_relevant_summary, user=user, - uploaded_image_url=uploaded_image_url, + query_images=query_images, ) return response.strip() @@ -624,7 +624,7 @@ async def generate_better_image_prompt( note_references: List[Dict[str, Any]], online_results: Optional[dict] = None, model_type: Optional[str] = None, - uploaded_image_url: Optional[str] = None, + query_images: Optional[List[str]] = None, user: KhojUser = None, agent: Agent = None, ) -> str: @@ -676,7 +676,7 @@ async def generate_better_image_prompt( ) with timer("Chat actor: Generate contextual image prompt", logger): - response = await send_message_to_model_wrapper(image_prompt, uploaded_image_url=uploaded_image_url, user=user) + response = await send_message_to_model_wrapper(image_prompt, query_images=query_images, user=user) response = response.strip() if response.startswith(('"', "'")) and response.endswith(('"', "'")): response = response[1:-1] @@ -689,11 +689,11 @@ async def send_message_to_model_wrapper( system_message: str = "", response_type: str = "text", user: KhojUser = None, - uploaded_image_url: str = None, + query_images: List[str] = None, ): conversation_config: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config(user) vision_available = conversation_config.vision_enabled - if not vision_available and uploaded_image_url: + if not vision_available and query_images: vision_enabled_config = await ConversationAdapters.aget_vision_enabled_config() if vision_enabled_config: conversation_config = vision_enabled_config @@ -746,7 +746,7 @@ async def send_message_to_model_wrapper( max_prompt_size=max_tokens, tokenizer_name=tokenizer, vision_enabled=vision_available, - uploaded_image_url=uploaded_image_url, + query_images=query_images, model_type=conversation_config.model_type, ) @@ -766,7 +766,7 @@ async def send_message_to_model_wrapper( max_prompt_size=max_tokens, tokenizer_name=tokenizer, vision_enabled=vision_available, - uploaded_image_url=uploaded_image_url, + query_images=query_images, model_type=conversation_config.model_type, ) @@ -784,7 +784,8 @@ async def send_message_to_model_wrapper( max_prompt_size=max_tokens, tokenizer_name=tokenizer, vision_enabled=vision_available, - uploaded_image_url=uploaded_image_url, + query_images=query_images, + model_type=conversation_config.model_type, ) return gemini_send_message_to_model( @@ -875,6 +876,7 @@ def send_message_to_model_wrapper_sync( model_name=chat_model, max_prompt_size=max_tokens, vision_enabled=vision_available, + model_type=conversation_config.model_type, ) return gemini_send_message_to_model( @@ -900,7 +902,7 @@ def generate_chat_response( conversation_id: str = None, location_data: LocationData = None, user_name: Optional[str] = None, - uploaded_image_url: Optional[str] = None, + query_images: Optional[List[str]] = None, ) -> Tuple[Union[ThreadedGenerator, Iterator[str]], Dict[str, str]]: # Initialize Variables chat_response = None @@ -919,12 +921,12 @@ def generate_chat_response( inferred_queries=inferred_queries, client_application=client_application, conversation_id=conversation_id, - uploaded_image_url=uploaded_image_url, + query_images=query_images, ) conversation_config = ConversationAdapters.get_valid_conversation_config(user, conversation) vision_available = conversation_config.vision_enabled - if not vision_available and uploaded_image_url: + if not vision_available and query_images: vision_enabled_config = ConversationAdapters.get_vision_enabled_config() if vision_enabled_config: conversation_config = vision_enabled_config @@ -955,7 +957,7 @@ def generate_chat_response( chat_response = converse( compiled_references, q, - image_url=uploaded_image_url, + query_images=query_images, online_results=online_results, conversation_log=meta_log, model=chat_model, From 0d6a54c10fe18efc615eb053b5966205ce0de5e5 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 17 Oct 2024 23:08:20 -0700 Subject: [PATCH 02/24] Allow sharing multiple images as part of user query from the web app Previously the web app only expected a single image to be shared by the user as part of their query. This change allows sharing multiple images from the web app. Closes #921 --- src/interface/web/app/chat/page.tsx | 39 +++++---- .../components/chatHistory/chatHistory.tsx | 3 +- .../chatInputArea/chatInputArea.tsx | 87 ++++++++++++------- .../components/chatMessage/chatMessage.tsx | 17 ++-- src/interface/web/app/page.tsx | 13 +-- src/interface/web/app/share/chat/page.tsx | 40 ++++++--- 6 files changed, 122 insertions(+), 77 deletions(-) diff --git a/src/interface/web/app/chat/page.tsx b/src/interface/web/app/chat/page.tsx index 7d87fd81..c9c38870 100644 --- a/src/interface/web/app/chat/page.tsx +++ b/src/interface/web/app/chat/page.tsx @@ -17,8 +17,6 @@ import { useIPLocationData, useIsMobileWidth, welcomeConsole } from "../common/u import ChatInputArea, { ChatOptions } from "../components/chatInputArea/chatInputArea"; import { useAuthenticatedData } from "../common/auth"; import { AgentData } from "../agents/page"; -import { DotsThreeVertical } from "@phosphor-icons/react"; -import { Button } from "@/components/ui/button"; interface ChatBodyDataProps { chatOptionsData: ChatOptions | null; @@ -29,14 +27,14 @@ interface ChatBodyDataProps { setUploadedFiles: (files: string[]) => void; isMobileWidth?: boolean; isLoggedIn: boolean; - setImage64: (image64: string) => void; + setImages: (images: string[]) => void; } function ChatBodyData(props: ChatBodyDataProps) { const searchParams = useSearchParams(); const conversationId = searchParams.get("conversationId"); const [message, setMessage] = useState(""); - const [image, setImage] = useState(null); + const [images, setImages] = useState([]); const [processingMessage, setProcessingMessage] = useState(false); const [agentMetadata, setAgentMetadata] = useState(null); @@ -44,17 +42,20 @@ function ChatBodyData(props: ChatBodyDataProps) { const onConversationIdChange = props.onConversationIdChange; useEffect(() => { - if (image) { - props.setImage64(encodeURIComponent(image)); + if (images.length > 0) { + const encodedImages = images.map((image) => encodeURIComponent(image)); + props.setImages(encodedImages); } - }, [image, props.setImage64]); + }, [images, props.setImages]); useEffect(() => { - const storedImage = localStorage.getItem("image"); - if (storedImage) { - setImage(storedImage); - props.setImage64(encodeURIComponent(storedImage)); - localStorage.removeItem("image"); + const storedImages = localStorage.getItem("images"); + if (storedImages) { + const parsedImages: string[] = JSON.parse(storedImages); + setImages(parsedImages); + const encodedImages = parsedImages.map((img: string) => encodeURIComponent(img)); + props.setImages(encodedImages); + localStorage.removeItem("images"); } const storedMessage = localStorage.getItem("message"); @@ -62,7 +63,7 @@ function ChatBodyData(props: ChatBodyDataProps) { setProcessingMessage(true); setQueryToProcess(storedMessage); } - }, [setQueryToProcess]); + }, [setQueryToProcess, props.setImages]); useEffect(() => { if (message) { @@ -112,7 +113,7 @@ function ChatBodyData(props: ChatBodyDataProps) { agentColor={agentMetadata?.color} isLoggedIn={props.isLoggedIn} sendMessage={(message) => setMessage(message)} - sendImage={(image) => setImage(image)} + sendImage={(image) => setImages((prevImages) => [...prevImages, image])} sendDisabled={processingMessage} chatOptionsData={props.chatOptionsData} conversationId={conversationId} @@ -134,7 +135,7 @@ export default function Chat() { const [queryToProcess, setQueryToProcess] = useState(""); const [processQuerySignal, setProcessQuerySignal] = useState(false); const [uploadedFiles, setUploadedFiles] = useState([]); - const [image64, setImage64] = useState(""); + const [images, setImages] = useState([]); const locationData = useIPLocationData() || { timezone: Intl.DateTimeFormat().resolvedOptions().timeZone, @@ -170,7 +171,7 @@ export default function Chat() { completed: false, timestamp: new Date().toISOString(), rawQuery: queryToProcess || "", - uploadedImageData: decodeURIComponent(image64), + images: images, }; setMessages((prevMessages) => [...prevMessages, newStreamMessage]); setProcessQuerySignal(true); @@ -201,7 +202,7 @@ export default function Chat() { if (done) { setQueryToProcess(""); setProcessQuerySignal(false); - setImage64(""); + setImages([]); break; } @@ -249,7 +250,7 @@ export default function Chat() { country_code: locationData.countryCode, timezone: locationData.timezone, }), - ...(image64 && { image: image64 }), + ...(images.length > 0 && { images: images }), }; const response = await fetch(chatAPI, { @@ -331,7 +332,7 @@ export default function Chat() { setUploadedFiles={setUploadedFiles} isMobileWidth={isMobileWidth} onConversationIdChange={handleConversationIdChange} - setImage64={setImage64} + setImages={setImages} /> diff --git a/src/interface/web/app/components/chatHistory/chatHistory.tsx b/src/interface/web/app/components/chatHistory/chatHistory.tsx index 1a7c90c0..fc37ba7d 100644 --- a/src/interface/web/app/components/chatHistory/chatHistory.tsx +++ b/src/interface/web/app/components/chatHistory/chatHistory.tsx @@ -298,7 +298,7 @@ export default function ChatHistory(props: ChatHistoryProps) { created: message.timestamp, by: "you", automationId: "", - uploadedImageData: message.uploadedImageData, + images: message.images, }} customClassName="fullHistory" borderLeftColor={`${data?.agent?.color}-500`} @@ -341,7 +341,6 @@ export default function ChatHistory(props: ChatHistoryProps) { created: new Date().getTime().toString(), by: "you", automationId: "", - uploadedImageData: props.pendingMessage, }} customClassName="fullHistory" borderLeftColor={`${data?.agent?.color}-500`} diff --git a/src/interface/web/app/components/chatInputArea/chatInputArea.tsx b/src/interface/web/app/components/chatInputArea/chatInputArea.tsx index d85d6a54..fde23a0d 100644 --- a/src/interface/web/app/components/chatInputArea/chatInputArea.tsx +++ b/src/interface/web/app/components/chatInputArea/chatInputArea.tsx @@ -78,10 +78,11 @@ export default function ChatInputArea(props: ChatInputProps) { const [loginRedirectMessage, setLoginRedirectMessage] = useState(null); const [showLoginPrompt, setShowLoginPrompt] = useState(false); - const [recording, setRecording] = useState(false); const [imageUploaded, setImageUploaded] = useState(false); - const [imagePath, setImagePath] = useState(""); - const [imageData, setImageData] = useState(null); + const [imagePaths, setImagePaths] = useState([]); + const [imageData, setImageData] = useState([]); + + const [recording, setRecording] = useState(false); const [mediaRecorder, setMediaRecorder] = useState(null); const [progressValue, setProgressValue] = useState(0); @@ -106,27 +107,31 @@ export default function ChatInputArea(props: ChatInputProps) { useEffect(() => { async function fetchImageData() { - if (imagePath) { - const response = await fetch(imagePath); - const blob = await response.blob(); - const reader = new FileReader(); - reader.onload = function () { - const base64data = reader.result; - setImageData(base64data as string); - }; - reader.readAsDataURL(blob); + if (imagePaths.length > 0) { + const newImageData = await Promise.all( + imagePaths.map(async (path) => { + const response = await fetch(path); + const blob = await response.blob(); + return new Promise((resolve) => { + const reader = new FileReader(); + reader.onload = () => resolve(reader.result as string); + reader.readAsDataURL(blob); + }); + }), + ); + setImageData(newImageData); } setUploading(false); } setUploading(true); fetchImageData(); - }, [imagePath]); + }, [imagePaths]); function onSendMessage() { if (imageUploaded) { setImageUploaded(false); - setImagePath(""); - props.sendImage(imageData || ""); + setImagePaths([]); + imageData.forEach((data) => props.sendImage(data)); } if (!message.trim()) return; @@ -172,18 +177,23 @@ export default function ChatInputArea(props: ChatInputProps) { setShowLoginPrompt(true); return; } - // check for image file + // check for image files const image_endings = ["jpg", "jpeg", "png", "webp"]; + const newImagePaths: string[] = []; for (let i = 0; i < files.length; i++) { const file = files[i]; const file_extension = file.name.split(".").pop(); if (image_endings.includes(file_extension || "")) { - setImageUploaded(true); - setImagePath(DOMPurify.sanitize(URL.createObjectURL(file))); - return; + newImagePaths.push(DOMPurify.sanitize(URL.createObjectURL(file))); } } + if (newImagePaths.length > 0) { + setImageUploaded(true); + setImagePaths((prevPaths) => [...prevPaths, ...newImagePaths]); + return; + } + uploadDataForIndexing( files, setWarning, @@ -288,9 +298,12 @@ export default function ChatInputArea(props: ChatInputProps) { setIsDragAndDropping(false); } - function removeImageUpload() { - setImageUploaded(false); - setImagePath(""); + function removeImageUpload(index: number) { + setImagePaths((prevPaths) => prevPaths.filter((_, i) => i !== index)); + setImageData((prevData) => prevData.filter((_, i) => i !== index)); + if (imagePaths.length === 1) { + setImageUploaded(false); + } } return ( @@ -413,16 +426,24 @@ export default function ChatInputArea(props: ChatInputProps) { onDrop={handleDragAndDropFiles} > {imageUploaded && ( -
-
- img -
-
- -
+
+ {imagePaths.map((path, index) => ( +
+ {`img-${index}`} + +
+ ))}
)} { if (e.key === "Enter" && !e.shiftKey) { setImageUploaded(false); - setImagePath(""); + setImagePaths([]); e.preventDefault(); onSendMessage(); } diff --git a/src/interface/web/app/components/chatMessage/chatMessage.tsx b/src/interface/web/app/components/chatMessage/chatMessage.tsx index 23371512..e0d0f09c 100644 --- a/src/interface/web/app/components/chatMessage/chatMessage.tsx +++ b/src/interface/web/app/components/chatMessage/chatMessage.tsx @@ -114,7 +114,7 @@ export interface SingleChatMessage { rawQuery?: string; intent?: Intent; agent?: AgentData; - uploadedImageData?: string; + images?: string[]; } export interface StreamMessage { @@ -126,7 +126,7 @@ export interface StreamMessage { rawQuery: string; timestamp: string; agent?: AgentData; - uploadedImageData?: string; + images?: string[]; } export interface ChatHistoryData { @@ -208,7 +208,6 @@ interface ChatMessageProps { borderLeftColor?: string; isLastMessage?: boolean; agent?: AgentData; - uploadedImageData?: string; } interface TrainOfThoughtProps { @@ -328,8 +327,14 @@ const ChatMessage = forwardRef((props, ref) => .replace(/\\\[/g, "LEFTBRACKET") .replace(/\\\]/g, "RIGHTBRACKET"); - if (props.chatMessage.uploadedImageData) { - message = `![uploaded image](${props.chatMessage.uploadedImageData})\n\n${message}`; + if (props.chatMessage.images && props.chatMessage.images.length > 0) { + const imagesInMd = props.chatMessage.images + .map( + (image) => + `![uploaded image](${image.startsWith("data%3Aimage") ? decodeURIComponent(image) : image})`, + ) + .join("\n\n"); + message = `${imagesInMd}\n\n${message}`; } if (props.chatMessage.intent && props.chatMessage.intent.type == "text-to-image") { @@ -364,7 +369,7 @@ const ChatMessage = forwardRef((props, ref) => // Sanitize and set the rendered markdown setMarkdownRendered(DOMPurify.sanitize(markdownRendered)); - }, [props.chatMessage.message, props.chatMessage.intent]); + }, [props.chatMessage.message, props.chatMessage.images, props.chatMessage.intent]); useEffect(() => { if (copySuccess) { diff --git a/src/interface/web/app/page.tsx b/src/interface/web/app/page.tsx index 158b6fb7..7002a340 100644 --- a/src/interface/web/app/page.tsx +++ b/src/interface/web/app/page.tsx @@ -44,7 +44,7 @@ function FisherYatesShuffle(array: any[]) { function ChatBodyData(props: ChatBodyDataProps) { const [message, setMessage] = useState(""); - const [image, setImage] = useState(null); + const [images, setImages] = useState([]); const [processingMessage, setProcessingMessage] = useState(false); const [greeting, setGreeting] = useState(""); const [shuffledOptions, setShuffledOptions] = useState([]); @@ -140,18 +140,19 @@ function ChatBodyData(props: ChatBodyDataProps) { onConversationIdChange?.(newConversationId); window.location.href = `/chat?conversationId=${newConversationId}`; localStorage.setItem("message", message); - if (image) { - localStorage.setItem("image", image); + if (images.length > 0) { + localStorage.setItem("images", JSON.stringify(images)); } } catch (error) { console.error("Error creating new conversation:", error); setProcessingMessage(false); } setMessage(""); + setImages([]); } }; processMessage(); - if (message) { + if (message || images.length > 0) { setProcessingMessage(true); } }, [selectedAgent, message, processingMessage, onConversationIdChange]); @@ -232,7 +233,7 @@ function ChatBodyData(props: ChatBodyDataProps) { setMessage(message)} - sendImage={(image) => setImage(image)} + sendImage={(image) => setImages((prevImages) => [...prevImages, image])} sendDisabled={processingMessage} chatOptionsData={props.chatOptionsData} conversationId={null} @@ -313,7 +314,7 @@ function ChatBodyData(props: ChatBodyDataProps) { setMessage(message)} - sendImage={(image) => setImage(image)} + sendImage={(image) => setImages((prevImages) => [...prevImages, image])} sendDisabled={processingMessage} chatOptionsData={props.chatOptionsData} conversationId={null} diff --git a/src/interface/web/app/share/chat/page.tsx b/src/interface/web/app/share/chat/page.tsx index 9bc5f12d..b1b92034 100644 --- a/src/interface/web/app/share/chat/page.tsx +++ b/src/interface/web/app/share/chat/page.tsx @@ -28,22 +28,40 @@ interface ChatBodyDataProps { isLoggedIn: boolean; conversationId?: string; setQueryToProcess: (query: string) => void; - setImage64: (image64: string) => void; + setImages: (images: string[]) => void; } function ChatBodyData(props: ChatBodyDataProps) { const [message, setMessage] = useState(""); - const [image, setImage] = useState(null); + const [images, setImages] = useState([]); const [processingMessage, setProcessingMessage] = useState(false); const [agentMetadata, setAgentMetadata] = useState(null); const setQueryToProcess = props.setQueryToProcess; const streamedMessages = props.streamedMessages; useEffect(() => { - if (image) { - props.setImage64(encodeURIComponent(image)); + if (images.length > 0) { + const encodedImages = images.map((image) => encodeURIComponent(image)); + props.setImages(encodedImages); } - }, [image, props.setImage64]); + }, [images, props.setImages]); + + useEffect(() => { + const storedImages = localStorage.getItem("images"); + if (storedImages) { + const parsedImages: string[] = JSON.parse(storedImages); + setImages(parsedImages); + const encodedImages = parsedImages.map((img: string) => encodeURIComponent(img)); + props.setImages(encodedImages); + localStorage.removeItem("images"); + } + + const storedMessage = localStorage.getItem("message"); + if (storedMessage) { + setProcessingMessage(true); + setQueryToProcess(storedMessage); + } + }, [setQueryToProcess, props.setImages]); useEffect(() => { if (message) { @@ -86,7 +104,7 @@ function ChatBodyData(props: ChatBodyDataProps) { setMessage(message)} - sendImage={(image) => setImage(image)} + sendImage={(image) => setImages((prevImages) => [...prevImages, image])} sendDisabled={processingMessage} chatOptionsData={props.chatOptionsData} conversationId={props.conversationId} @@ -109,7 +127,7 @@ export default function SharedChat() { const [processQuerySignal, setProcessQuerySignal] = useState(false); const [uploadedFiles, setUploadedFiles] = useState([]); const [paramSlug, setParamSlug] = useState(undefined); - const [image64, setImage64] = useState(""); + const [images, setImages] = useState([]); const locationData = useIPLocationData() || { timezone: Intl.DateTimeFormat().resolvedOptions().timeZone, @@ -167,7 +185,7 @@ export default function SharedChat() { completed: false, timestamp: new Date().toISOString(), rawQuery: queryToProcess || "", - uploadedImageData: decodeURIComponent(image64), + images: images, }; setMessages((prevMessages) => [...prevMessages, newStreamMessage]); setProcessQuerySignal(true); @@ -194,7 +212,7 @@ export default function SharedChat() { if (done) { setQueryToProcess(""); setProcessQuerySignal(false); - setImage64(""); + setImages([]); break; } @@ -236,7 +254,7 @@ export default function SharedChat() { country_code: locationData.countryCode, timezone: locationData.timezone, }), - ...(image64 && { image: image64 }), + ...(images.length > 0 && { image: images }), }; const response = await fetch(chatAPI, { @@ -286,7 +304,7 @@ export default function SharedChat() { setTitle={setTitle} setUploadedFiles={setUploadedFiles} isMobileWidth={isMobileWidth} - setImage64={setImage64} + setImages={setImages} />
From 3e39fac455f58f390113d7fc34439c0387e159b1 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Fri, 18 Oct 2024 19:13:06 -0700 Subject: [PATCH 03/24] Add vision support for Gemini models in Khoj --- .../conversation/google/gemini_chat.py | 38 ++++++++++----- .../processor/conversation/google/utils.py | 46 ++++++++++++++----- src/khoj/processor/conversation/utils.py | 2 +- src/khoj/routers/api.py | 2 + src/khoj/routers/helpers.py | 6 ++- 5 files changed, 67 insertions(+), 27 deletions(-) diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py index e8848806..10af8b4d 100644 --- a/src/khoj/processor/conversation/google/gemini_chat.py +++ b/src/khoj/processor/conversation/google/gemini_chat.py @@ -13,7 +13,10 @@ from khoj.processor.conversation.google.utils import ( gemini_chat_completion_with_backoff, gemini_completion_with_backoff, ) -from khoj.processor.conversation.utils import generate_chatml_messages_with_context +from khoj.processor.conversation.utils import ( + construct_structured_message, + generate_chatml_messages_with_context, +) from khoj.utils.helpers import ConversationCommand, is_none_or_empty from khoj.utils.rawconfig import LocationData @@ -29,6 +32,8 @@ def extract_questions_gemini( max_tokens=None, location_data: LocationData = None, user: KhojUser = None, + query_images: Optional[list[str]] = None, + vision_enabled: bool = False, personality_context: Optional[str] = None, ): """ @@ -70,17 +75,17 @@ def extract_questions_gemini( text=text, ) - messages = [ChatMessage(content=prompt, role="user")] + prompt = construct_structured_message( + message=prompt, + images=query_images, + model_type=ChatModelOptions.ModelType.GOOGLE, + vision_enabled=vision_enabled, + ) - model_kwargs = {"response_mime_type": "application/json"} + messages = [ChatMessage(content=prompt, role="user"), ChatMessage(content=system_prompt, role="system")] - response = gemini_completion_with_backoff( - messages=messages, - system_prompt=system_prompt, - model_name=model, - temperature=temperature, - api_key=api_key, - model_kwargs=model_kwargs, + response = gemini_send_message_to_model( + messages, api_key, model, response_type="json_object", temperature=temperature ) # Extract, Clean Message from Gemini's Response @@ -102,7 +107,7 @@ def extract_questions_gemini( return questions -def gemini_send_message_to_model(messages, api_key, model, response_type="text"): +def gemini_send_message_to_model(messages, api_key, model, response_type="text", temperature=0, model_kwargs=None): """ Send message to model """ @@ -114,7 +119,12 @@ def gemini_send_message_to_model(messages, api_key, model, response_type="text") # Get Response from Gemini return gemini_completion_with_backoff( - messages=messages, system_prompt=system_prompt, model_name=model, api_key=api_key, model_kwargs=model_kwargs + messages=messages, + system_prompt=system_prompt, + model_name=model, + api_key=api_key, + temperature=temperature, + model_kwargs=model_kwargs, ) @@ -133,6 +143,8 @@ def converse_gemini( location_data: LocationData = None, user_name: str = None, agent: Agent = None, + query_images: Optional[list[str]] = None, + vision_available: bool = False, ): """ Converse with user using Google's Gemini @@ -187,6 +199,8 @@ def converse_gemini( model_name=model, max_prompt_size=max_prompt_size, tokenizer_name=tokenizer_name, + query_images=query_images, + vision_enabled=vision_available, model_type=ChatModelOptions.ModelType.GOOGLE, ) diff --git a/src/khoj/processor/conversation/google/utils.py b/src/khoj/processor/conversation/google/utils.py index 5679ba4d..d19b02f2 100644 --- a/src/khoj/processor/conversation/google/utils.py +++ b/src/khoj/processor/conversation/google/utils.py @@ -1,8 +1,11 @@ import logging import random +from io import BytesIO from threading import Thread import google.generativeai as genai +import PIL.Image +import requests from google.generativeai.types.answer_types import FinishReason from google.generativeai.types.generation_types import StopCandidateException from google.generativeai.types.safety_types import ( @@ -53,14 +56,14 @@ def gemini_completion_with_backoff( }, ) - formatted_messages = [{"role": message.role, "parts": [message.content]} for message in messages] + formatted_messages = [{"role": message.role, "parts": message.content} for message in messages] # Start chat session. All messages up to the last are considered to be part of the chat history chat_session = model.start_chat(history=formatted_messages[0:-1]) try: # Generate the response. The last message is considered to be the current prompt - aggregated_response = chat_session.send_message(formatted_messages[-1]["parts"][0]) + aggregated_response = chat_session.send_message(formatted_messages[-1]["parts"]) return aggregated_response.text except StopCandidateException as e: response_message, _ = handle_gemini_response(e.args) @@ -117,11 +120,11 @@ def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_k }, ) - formatted_messages = [{"role": message.role, "parts": [message.content]} for message in messages] + formatted_messages = [{"role": message.role, "parts": message.content} for message in messages] # all messages up to the last are considered to be part of the chat history chat_session = model.start_chat(history=formatted_messages[0:-1]) # the last message is considered to be the current prompt - for chunk in chat_session.send_message(formatted_messages[-1]["parts"][0], stream=True): + for chunk in chat_session.send_message(formatted_messages[-1]["parts"], stream=True): message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback) message = message or chunk.text g.send(message) @@ -191,14 +194,6 @@ def generate_safety_response(safety_ratings): def format_messages_for_gemini(messages: list[ChatMessage], system_prompt: str = None) -> tuple[list[str], str]: - if len(messages) == 1: - messages[0].role = "user" - return messages, system_prompt - - for message in messages: - if message.role == "assistant": - message.role = "model" - # Extract system message system_prompt = system_prompt or "" for message in messages.copy(): @@ -207,4 +202,31 @@ def format_messages_for_gemini(messages: list[ChatMessage], system_prompt: str = messages.remove(message) system_prompt = None if is_none_or_empty(system_prompt) else system_prompt + for message in messages: + # Convert message content to string list from chatml dictionary list + if isinstance(message.content, list): + # Convert image_urls to PIL.Image and place them at beginning of list (better for Gemini) + message.content = [ + get_image_from_url(item["image_url"]["url"]) if item["type"] == "image_url" else item["text"] + for item in sorted(message.content, key=lambda x: 0 if x["type"] == "image_url" else 1) + ] + elif isinstance(message.content, str): + message.content = [message.content] + + if message.role == "assistant": + message.role = "model" + + if len(messages) == 1: + messages[0].role = "user" + return messages, system_prompt + + +def get_image_from_url(image_url: str) -> PIL.Image: + try: + response = requests.get(image_url) + response.raise_for_status() # Check if the request was successful + return PIL.Image.open(BytesIO(response.content)) + except requests.exceptions.RequestException as e: + logger.error(f"Failed to get image from URL {image_url}: {e}") + return None diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 8d799745..789be3a5 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -152,7 +152,7 @@ def construct_structured_message(message: str, images: list[str], model_type: st if not images or not vision_enabled: return message - if model_type == ChatModelOptions.ModelType.OPENAI: + if model_type in [ChatModelOptions.ModelType.OPENAI, ChatModelOptions.ModelType.GOOGLE]: return [ {"type": "text", "text": message}, *[{"type": "image_url", "image_url": {"url": image}} for image in images], diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 075c8c47..33edd61f 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -452,12 +452,14 @@ async def extract_references_and_questions( chat_model = conversation_config.chat_model inferred_queries = extract_questions_gemini( defiltered_query, + query_images=query_images, model=chat_model, api_key=api_key, conversation_log=meta_log, location_data=location_data, max_tokens=conversation_config.max_prompt_size, user=user, + vision_enabled=vision_enabled, personality_context=personality_context, ) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 7ed9c72d..739a3ad6 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -995,8 +995,9 @@ def generate_chat_response( chat_response = converse_gemini( compiled_references, q, - online_results, - meta_log, + query_images=query_images, + online_results=online_results, + conversation_log=meta_log, model=conversation_config.chat_model, api_key=api_key, completion_func=partial_completion, @@ -1006,6 +1007,7 @@ def generate_chat_response( location_data=location_data, user_name=user_name, agent=agent, + vision_available=vision_available, ) metadata.update({"chat_model": conversation_config.chat_model}) From 58a331227dab6fcfa60cad63edef519766bcff9d Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sat, 19 Oct 2024 16:29:45 -0700 Subject: [PATCH 04/24] Display the attached images inside the chat input area on the web app - Put the attached images display div inside the same parent div as the text area - Keep the attachment, microphone/send message buttons aligned with the text area. So the attached images just show up at the top of the text area but everything else stays at the same horizontal height as before. - This improves the UX by - Ensuring that the attached images do not obscure the agents pane above the chat input area - The attached images visually look like they are inside the actual input area, rather than floating above it. So the visual aligns with the semantics --- .../chatInputArea/chatInputArea.tsx | 167 +++++++++--------- 1 file changed, 85 insertions(+), 82 deletions(-) diff --git a/src/interface/web/app/components/chatInputArea/chatInputArea.tsx b/src/interface/web/app/components/chatInputArea/chatInputArea.tsx index fde23a0d..35e34f99 100644 --- a/src/interface/web/app/components/chatInputArea/chatInputArea.tsx +++ b/src/interface/web/app/components/chatInputArea/chatInputArea.tsx @@ -420,32 +420,11 @@ export default function ChatInputArea(props: ChatInputProps) { )}
- {imageUploaded && ( -
- {imagePaths.map((path, index) => ( -
- {`img-${index}`} - -
- ))} -
- )} - -
+
+ +
+
+
+ {imageUploaded && + imagePaths.map((path, index) => ( +
+ {`img-${index}`} + +
+ ))} +