Merge branch 'master' into multi-image-chat-and-vision-for-gemini

This commit is contained in:
Debanjum Singh Solanky 2024-10-22 18:29:44 -07:00
commit 6c393800cc
43 changed files with 1080 additions and 147 deletions

View file

@ -1,7 +1,7 @@
{
"id": "khoj",
"name": "Khoj",
"version": "1.26.0",
"version": "1.26.4",
"minAppVersion": "0.15.0",
"description": "Your Second Brain",
"author": "Khoj Inc.",

View file

@ -62,7 +62,7 @@ dependencies = [
"requests >= 2.26.0",
"tenacity == 8.3.0",
"anyio == 3.7.1",
"pymupdf >= 1.23.5",
"pymupdf == 1.24.11",
"django == 5.0.9",
"authlib == 1.2.1",
"llama-cpp-python == 0.2.88",

View file

@ -326,7 +326,7 @@
entries.forEach(entry => {
// If the element is in the viewport, fetch the remaining message and unobserve the element
if (entry.isIntersecting) {
fetchRemainingChatMessages(chatHistoryUrl, headers);
fetchRemainingChatMessages(chatHistoryUrl, headers, chatBody.dataset.conversation_id, hostURL);
observer.unobserve(entry.target);
}
});
@ -342,7 +342,11 @@
new Date(chat_log.created),
chat_log.onlineContext,
chat_log.intent?.type,
chat_log.intent?.["inferred-queries"]);
chat_log.intent?.["inferred-queries"],
chatBody.dataset.conversationId ?? "",
hostURL,
);
chatBody.appendChild(messageElement);
// When the 4th oldest message is within viewing distance (~60% scrolled up)
@ -421,7 +425,7 @@
}
}
function fetchRemainingChatMessages(chatHistoryUrl, headers) {
function fetchRemainingChatMessages(chatHistoryUrl, headers, conversationId, hostURL) {
// Create a new IntersectionObserver
let observer = new IntersectionObserver((entries, observer) => {
entries.forEach(entry => {
@ -435,7 +439,9 @@
new Date(chat_log.created),
chat_log.onlineContext,
chat_log.intent?.type,
chat_log.intent?.["inferred-queries"]
chat_log.intent?.["inferred-queries"],
chatBody.dataset.conversationId ?? "",
hostURL,
);
entry.target.replaceWith(messageElement);

View file

@ -189,11 +189,19 @@ function processOnlineReferences(referenceSection, onlineContext) { //same
return numOnlineReferences;
}
function renderMessageWithReference(message, by, context=null, dt=null, onlineContext=null, intentType=null, inferredQueries=null) { //same
function renderMessageWithReference(message, by, context=null, dt=null, onlineContext=null, intentType=null, inferredQueries=null, conversationId=null, hostURL=null) {
let chatEl;
if (intentType?.includes("text-to-image")) {
let imageMarkdown = generateImageMarkdown(message, intentType, inferredQueries);
chatEl = renderMessage(imageMarkdown, by, dt, null, false, "return");
} else if (intentType === "excalidraw") {
let domain = hostURL ?? "https://app.khoj.dev/";
if (!domain.endsWith("/")) domain += "/";
let excalidrawMessage = `Hey, I'm not ready to show you diagrams yet here. But you can view it in the web app at ${domain}chat?conversationId=${conversationId}`;
chatEl = renderMessage(excalidrawMessage, by, dt, null, false, "return");
} else {
chatEl = renderMessage(message, by, dt, null, false, "return");
}
@ -312,7 +320,6 @@ function formatHTMLMessage(message, raw=false, willReplace=true) { //same
}
function createReferenceSection(references, createLinkerSection=false) {
console.log("linker data: ", createLinkerSection);
let referenceSection = document.createElement('div');
referenceSection.classList.add("reference-section");
referenceSection.classList.add("collapsed");
@ -417,7 +424,11 @@ function handleImageResponse(imageJson, rawResponse) {
rawResponse += `![generated_image](${imageJson.image})`;
} else if (imageJson.intentType === "text-to-image-v3") {
rawResponse = `![](data:image/webp;base64,${imageJson.image})`;
} else if (imageJson.intentType === "excalidraw") {
const redirectMessage = `Hey, I'm not ready to show you diagrams yet here. But you can view it in the web app`;
rawResponse += redirectMessage;
}
if (inferredQuery) {
rawResponse += `\n\n**Inferred Query**:\n\n${inferredQuery}`;
}

View file

@ -1,6 +1,6 @@
{
"name": "Khoj",
"version": "1.26.0",
"version": "1.26.4",
"description": "Your Second Brain",
"author": "Khoj Inc. <team@khoj.dev>",
"license": "GPL-3.0-or-later",

View file

@ -6,7 +6,7 @@
;; Saba Imran <saba@khoj.dev>
;; Description: Your Second Brain
;; Keywords: search, chat, ai, org-mode, outlines, markdown, pdf, image
;; Version: 1.26.0
;; Version: 1.26.4
;; Package-Requires: ((emacs "27.1") (transient "0.3.0") (dash "2.19.1"))
;; URL: https://github.com/khoj-ai/khoj/tree/master/src/interface/emacs
@ -127,6 +127,11 @@
(const "image")
(const "pdf")))
(defcustom khoj-default-agent "khoj"
"The default agent to chat with. See https://app.khoj.dev/agents for available options."
:group 'khoj
:type 'string)
;; --------------------------
;; Khoj Dynamic Configuration
@ -144,6 +149,9 @@
(defconst khoj--chat-buffer-name "*🏮 Khoj Chat*"
"Name of chat buffer for Khoj.")
(defvar khoj--selected-agent khoj-default-agent
"Currently selected Khoj agent.")
(defvar khoj--content-type "org"
"The type of content to perform search on.")
@ -656,13 +664,15 @@ Simplified fork of `org-cycle-content' from Emacs 29.1 to work with >=27.1."
;; --------------
;; Query Khoj API
;; --------------
(defun khoj--call-api (path &optional method params callback &rest cbargs)
"Sync call API at PATH with METHOD and query PARAMS as kv assoc list.
(defun khoj--call-api (path &optional method params body callback &rest cbargs)
"Sync call API at PATH with METHOD, query PARAMS and BODY as kv assoc list.
Optionally apply CALLBACK with JSON parsed response and CBARGS."
(let* ((url-request-method (or method "GET"))
(url-request-extra-headers `(("Authorization" . ,(format "Bearer %s" khoj-api-key))))
(param-string (if params (url-build-query-string params) ""))
(query-url (format "%s%s?%s&client=emacs" khoj-server-url path param-string))
(url-request-extra-headers `(("Authorization" . ,(format "Bearer %s" khoj-api-key)) ("Content-Type" . "application/json")))
(url-request-data (if body (json-encode body) nil))
(param-string (url-build-query-string (append params '((client "emacs")))))
(query-url (format "%s%s?%s" khoj-server-url path param-string))
(cbargs (if (and (listp cbargs) (listp (car cbargs))) (car cbargs) cbargs))) ; normalize cbargs to (a b) from ((a b)) if required
(with-temp-buffer
(condition-case ex
@ -682,8 +692,8 @@ Optionally apply CALLBACK with JSON parsed response and CBARGS."
(url-request-extra-headers `(("Authorization" . ,(format "Bearer %s" khoj-api-key)) ("Content-Type" . "application/json")))
(url-request-data (if body (json-encode body) nil))
(param-string (url-build-query-string (append params '((client "emacs")))))
(cbargs (if (and (listp cbargs) (listp (car cbargs))) (car cbargs) cbargs)) ; normalize cbargs to (a b) from ((a b)) if required
(query-url (format "%s%s?%s" khoj-server-url path param-string)))
(query-url (format "%s%s?%s" khoj-server-url path param-string))
(cbargs (if (and (listp cbargs) (listp (car cbargs))) (car cbargs) cbargs))) ; normalize cbargs to (a b) from ((a b)) if required
(url-retrieve query-url
(lambda (status)
(if (plist-get status :error)
@ -699,7 +709,7 @@ Optionally apply CALLBACK with JSON parsed response and CBARGS."
(defun khoj--get-enabled-content-types ()
"Get content types enabled for search from API."
(khoj--call-api "/api/content/types" "GET" nil `(lambda (item) (mapcar #'intern item))))
(khoj--call-api "/api/content/types" "GET" nil nil `(lambda (item) (mapcar #'intern item))))
(defun khoj--query-search-api-and-render-results (query content-type buffer-name &optional rerank is-find-similar)
"Query Khoj Search API with QUERY, CONTENT-TYPE and RERANK as query params.
@ -913,14 +923,16 @@ Call CALLBACK func with response and CBARGS."
(let ((selected-session-id (khoj--select-conversation-session "Open")))
(khoj--load-chat-session khoj--chat-buffer-name selected-session-id)))
(defun khoj--create-chat-session ()
"Create new chat session."
(khoj--call-api "/api/chat/sessions" "POST"))
(defun khoj--create-chat-session (&optional agent)
"Create new chat session with AGENT."
(khoj--call-api "/api/chat/sessions"
"POST"
(when agent `(("agent_slug" ,agent)))))
(defun khoj--new-conversation-session ()
"Create new Khoj conversation session."
(defun khoj--new-conversation-session (&optional agent)
"Create new Khoj conversation session with AGENT."
(thread-last
(khoj--create-chat-session)
(khoj--create-chat-session agent)
(assoc 'conversation_id)
(cdr)
(khoj--chat)))
@ -935,6 +947,15 @@ Call CALLBACK func with response and CBARGS."
(khoj--select-conversation-session "Delete")
(khoj--delete-chat-session)))
(defun khoj--get-agents ()
"Get list of available Khoj agents."
(let* ((response (khoj--call-api "/api/agents" "GET"))
(agents (mapcar (lambda (agent)
(cons (cdr (assoc 'name agent))
(cdr (assoc 'slug agent))))
response)))
agents))
(defun khoj--render-chat-message (message sender &optional receive-date)
"Render chat messages as `org-mode' list item.
MESSAGE is the text of the chat message.
@ -1246,6 +1267,20 @@ Paragraph only starts at first text after blank line."
;; dynamically set choices to content types enabled on khoj backend
:choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "image")))
(transient-define-argument khoj--agent-switch ()
:class 'transient-switches
:argument-format "--agent=%s"
:argument-regexp ".+"
:init-value (lambda (obj)
(oset obj value (format "--agent=%s" khoj--selected-agent)))
:choices (or (ignore-errors (mapcar #'cdr (khoj--get-agents))) '("khoj"))
:reader (lambda (prompt initial-input history)
(let* ((agents (khoj--get-agents))
(selected (completing-read prompt agents nil t initial-input history))
(slug (cdr (assoc selected agents))))
(setq khoj--selected-agent slug)
slug)))
(transient-define-suffix khoj--search-command (&optional args)
(interactive (list (transient-args transient-current-command)))
(progn
@ -1287,10 +1322,11 @@ Paragraph only starts at first text after blank line."
(interactive (list (transient-args transient-current-command)))
(khoj--open-conversation-session))
(transient-define-suffix khoj--new-conversation-session-command (&optional _)
(transient-define-suffix khoj--new-conversation-session-command (&optional args)
"Command to select Khoj conversation sessions to open."
(interactive (list (transient-args transient-current-command)))
(khoj--new-conversation-session))
(let ((agent-slug (transient-arg-value "--agent=" args)))
(khoj--new-conversation-session agent-slug)))
(transient-define-suffix khoj--delete-conversation-session-command (&optional _)
"Command to select Khoj conversation sessions to delete."
@ -1298,14 +1334,15 @@ Paragraph only starts at first text after blank line."
(khoj--delete-conversation-session))
(transient-define-prefix khoj--chat-menu ()
"Open the Khoj chat menu."
["Act"
("c" "Chat" khoj--chat-command)
("o" "Open Conversation" khoj--open-conversation-session-command)
("n" "New Conversation" khoj--new-conversation-session-command)
("d" "Delete Conversation" khoj--delete-conversation-session-command)
("q" "Quit" transient-quit-one)
])
"Create the Khoj Chat Menu and Execute Commands."
[["Configure"
("a" "Select Agent" khoj--agent-switch)]]
[["Act"
("c" "Chat" khoj--chat-command)
("o" "Open Conversation" khoj--open-conversation-session-command)
("n" "New Conversation" khoj--new-conversation-session-command)
("d" "Delete Conversation" khoj--delete-conversation-session-command)
("q" "Quit" transient-quit-one)]])
(transient-define-prefix khoj--menu ()
"Create Khoj Menu to Configure and Execute Commands."

View file

@ -1,7 +1,7 @@
{
"id": "khoj",
"name": "Khoj",
"version": "1.26.0",
"version": "1.26.4",
"minAppVersion": "0.15.0",
"description": "Your Second Brain",
"author": "Khoj Inc.",

View file

@ -1,6 +1,6 @@
{
"name": "Khoj",
"version": "1.26.0",
"version": "1.26.4",
"description": "Your Second Brain",
"author": "Debanjum Singh Solanky, Saba Imran <team@khoj.dev>",
"license": "GPL-3.0-or-later",

View file

@ -484,12 +484,13 @@ export class KhojChatView extends KhojPaneView {
dt?: Date,
intentType?: string,
inferredQueries?: string[],
conversationId?: string,
) {
if (!message) return;
let chatMessageEl;
if (intentType?.includes("text-to-image")) {
let imageMarkdown = this.generateImageMarkdown(message, intentType, inferredQueries);
if (intentType?.includes("text-to-image") || intentType === "excalidraw") {
let imageMarkdown = this.generateImageMarkdown(message, intentType, inferredQueries, conversationId);
chatMessageEl = this.renderMessage(chatEl, imageMarkdown, sender, dt);
} else {
chatMessageEl = this.renderMessage(chatEl, message, sender, dt);
@ -509,7 +510,7 @@ export class KhojChatView extends KhojPaneView {
chatMessageBodyEl.appendChild(this.createReferenceSection(references));
}
generateImageMarkdown(message: string, intentType: string, inferredQueries?: string[]) {
generateImageMarkdown(message: string, intentType: string, inferredQueries?: string[], conversationId?: string): string {
let imageMarkdown = "";
if (intentType === "text-to-image") {
imageMarkdown = `![](data:image/png;base64,${message})`;
@ -517,6 +518,10 @@ export class KhojChatView extends KhojPaneView {
imageMarkdown = `![](${message})`;
} else if (intentType === "text-to-image-v3") {
imageMarkdown = `![](data:image/webp;base64,${message})`;
} else if (intentType === "excalidraw") {
const domain = this.setting.khojUrl.endsWith("/") ? this.setting.khojUrl : `${this.setting.khojUrl}/`;
const redirectMessage = `Hey, I'm not ready to show you diagrams yet here. But you can view it in ${domain}chat?conversationId=${conversationId}`;
imageMarkdown = redirectMessage;
}
if (inferredQueries) {
imageMarkdown += "\n\n**Inferred Query**:";
@ -884,6 +889,7 @@ export class KhojChatView extends KhojPaneView {
new Date(chatLog.created),
chatLog.intent?.type,
chatLog.intent?.["inferred-queries"],
chatBodyEl.dataset.conversationId ?? "",
);
// push the user messages to the chat history
if(chatLog.by === "you"){
@ -1354,6 +1360,10 @@ export class KhojChatView extends KhojPaneView {
rawResponse += `![generated_image](${imageJson.image})`;
} else if (imageJson.intentType === "text-to-image-v3") {
rawResponse = `![](data:image/webp;base64,${imageJson.image})`;
} else if (imageJson.intentType === "excalidraw") {
const domain = this.setting.khojUrl.endsWith("/") ? this.setting.khojUrl : `${this.setting.khojUrl}/`;
const redirectMessage = `Hey, I'm not ready to show you diagrams yet here. But you can view it in ${domain}`;
rawResponse += redirectMessage;
}
if (inferredQuery) {
rawResponse += `\n\n**Inferred Query**:\n\n${inferredQuery}`;

View file

@ -78,5 +78,9 @@
"1.24.0": "0.15.0",
"1.24.1": "0.15.0",
"1.25.0": "0.15.0",
"1.26.0": "0.15.0"
"1.26.0": "0.15.0",
"1.26.1": "0.15.0",
"1.26.2": "0.15.0",
"1.26.3": "0.15.0",
"1.26.4": "0.15.0"
}

View file

@ -79,7 +79,7 @@ div.titleBar {
div.chatBoxBody {
display: grid;
height: 100%;
width: 70%;
width: 95%;
margin: auto;
}

View file

@ -47,7 +47,14 @@ export default function RootLayout({
child-src 'none';
object-src 'none';"
></meta>
<body className={inter.className}>{children}</body>
<body className={inter.className}>
{children}
<script
dangerouslySetInnerHTML={{
__html: `window.EXCALIDRAW_ASSET_PATH = 'https://assets.khoj.dev/@excalidraw/excalidraw/dist/';`,
}}
/>
</body>
</html>
);
}

View file

@ -41,6 +41,8 @@ function ChatBodyData(props: ChatBodyDataProps) {
const setQueryToProcess = props.setQueryToProcess;
const onConversationIdChange = props.onConversationIdChange;
const chatHistoryCustomClassName = props.isMobileWidth ? "w-full" : "w-4/6";
useEffect(() => {
if (images.length > 0) {
const encodedImages = images.map((image) => encodeURIComponent(image));
@ -105,10 +107,11 @@ function ChatBodyData(props: ChatBodyDataProps) {
setAgent={setAgentMetadata}
pendingMessage={processingMessage ? message : ""}
incomingMessages={props.streamedMessages}
customClassName={chatHistoryCustomClassName}
/>
</div>
<div
className={`${styles.inputBox} p-1 md:px-2 shadow-md bg-background align-middle items-center justify-center dark:bg-neutral-700 dark:border-0 dark:shadow-sm rounded-t-2xl rounded-b-none md:rounded-xl h-fit`}
className={`${styles.inputBox} p-1 md:px-2 shadow-md bg-background align-middle items-center justify-center dark:bg-neutral-700 dark:border-0 dark:shadow-sm rounded-t-2xl rounded-b-none md:rounded-xl h-fit ${chatHistoryCustomClassName} mr-auto ml-auto`}
>
<ChatInputArea
agentColor={agentMetadata?.color}

View file

@ -5,10 +5,10 @@ export interface RawReferenceData {
onlineContext?: OnlineContext;
}
export interface ResponseWithReferences {
context?: Context[];
online?: OnlineContext;
response?: string;
export interface ResponseWithIntent {
intentType: string;
response: string;
inferredQueries?: string[];
}
interface MessageChunk {
@ -49,10 +49,14 @@ export function convertMessageChunkToJson(chunk: string): MessageChunk {
function handleJsonResponse(chunkData: any) {
const jsonData = chunkData as any;
if (jsonData.image || jsonData.detail) {
let responseWithReference = handleImageResponse(chunkData, true);
if (responseWithReference.response) return responseWithReference.response;
let responseWithIntent = handleImageResponse(chunkData, true);
return responseWithIntent;
} else if (jsonData.response) {
return jsonData.response;
return {
response: jsonData.response,
intentType: "",
inferredQueries: [],
};
} else {
throw new Error("Invalid JSON response");
}
@ -80,8 +84,18 @@ export function processMessageChunk(
return { context, onlineContext };
} else if (chunk.type === "message") {
const chunkData = chunk.data;
// Here, handle if the response is a JSON response with an image, but the intentType is excalidraw
if (chunkData !== null && typeof chunkData === "object") {
currentMessage.rawResponse += handleJsonResponse(chunkData);
let responseWithIntent = handleJsonResponse(chunkData);
if (responseWithIntent.intentType && responseWithIntent.intentType === "excalidraw") {
currentMessage.rawResponse = responseWithIntent.response;
} else {
currentMessage.rawResponse += responseWithIntent.response;
}
currentMessage.intentType = responseWithIntent.intentType;
currentMessage.inferredQueries = responseWithIntent.inferredQueries;
} else if (
typeof chunkData === "string" &&
chunkData.trim()?.startsWith("{") &&
@ -89,7 +103,10 @@ export function processMessageChunk(
) {
try {
const jsonData = JSON.parse(chunkData.trim());
currentMessage.rawResponse += handleJsonResponse(jsonData);
let responseWithIntent = handleJsonResponse(jsonData);
currentMessage.rawResponse += responseWithIntent.response;
currentMessage.intentType = responseWithIntent.intentType;
currentMessage.inferredQueries = responseWithIntent.inferredQueries;
} catch (e) {
currentMessage.rawResponse += JSON.stringify(chunkData);
}
@ -111,42 +128,26 @@ export function processMessageChunk(
return { context, onlineContext };
}
export function handleImageResponse(imageJson: any, liveStream: boolean): ResponseWithReferences {
export function handleImageResponse(imageJson: any, liveStream: boolean): ResponseWithIntent {
let rawResponse = "";
if (imageJson.image) {
const inferredQuery = imageJson.inferredQueries?.[0] ?? "generated image";
// If response has image field, response is a generated image.
if (imageJson.intentType === "text-to-image") {
rawResponse += `![generated_image](data:image/png;base64,${imageJson.image})`;
} else if (imageJson.intentType === "text-to-image2") {
rawResponse += `![generated_image](${imageJson.image})`;
} else if (imageJson.intentType === "text-to-image-v3") {
rawResponse = `![](data:image/webp;base64,${imageJson.image})`;
}
if (inferredQuery && !liveStream) {
rawResponse += `\n\n${inferredQuery}`;
}
// If response has image field, response may be a generated image
rawResponse = imageJson.image;
}
let reference: ResponseWithReferences = {};
let responseWithIntent: ResponseWithIntent = {
intentType: imageJson.intentType,
response: rawResponse,
inferredQueries: imageJson.inferredQueries,
};
if (imageJson.context && imageJson.context.length > 0) {
const rawReferenceAsJson = imageJson.context;
if (rawReferenceAsJson instanceof Array) {
reference.context = rawReferenceAsJson;
} else if (typeof rawReferenceAsJson === "object" && rawReferenceAsJson !== null) {
reference.online = rawReferenceAsJson;
}
}
if (imageJson.detail) {
// The detail field contains the improved image prompt
rawResponse += imageJson.detail;
}
reference.response = rawResponse;
return reference;
return responseWithIntent;
}
export function modifyFileFilterForConversation(

View file

@ -48,6 +48,7 @@ import {
Oven,
Gavel,
Broadcast,
KeyReturn,
} from "@phosphor-icons/react";
import { Markdown, OrgMode, Pdf, Word } from "@/app/components/logo/fileLogo";
@ -193,6 +194,10 @@ export function getIconForSlashCommand(command: string, customClassName: string
}
if (command.includes("default")) {
return <KeyReturn className={className} />;
}
if (command.includes("diagram")) {
return <Shapes className={className} />;
}

View file

@ -2,12 +2,7 @@ div.chatHistory {
display: flex;
flex-direction: column;
height: 100%;
}
div.chatLayout {
height: 80vh;
overflow-y: auto;
margin: 0 auto;
margin: auto;
}
div.agentIndicator a {

View file

@ -37,6 +37,7 @@ interface ChatHistoryProps {
pendingMessage?: string;
publicConversationSlug?: string;
setAgent: (agent: AgentData) => void;
customClassName?: string;
}
function constructTrainOfThought(
@ -255,7 +256,7 @@ export default function ChatHistory(props: ChatHistoryProps) {
return (
<ScrollArea className={`h-[80vh] relative`} ref={scrollAreaRef}>
<div>
<div className={styles.chatHistory}>
<div className={`${styles.chatHistory} ${props.customClassName}`}>
<div ref={sentinelRef} style={{ height: "1px" }}>
{fetchingData && (
<InlineLoading message="Loading Conversation" className="opacity-50" />
@ -322,6 +323,12 @@ export default function ChatHistory(props: ChatHistoryProps) {
by: "khoj",
automationId: "",
rawQuery: message.rawQuery,
intent: {
type: message.intentType || "",
query: message.rawQuery,
"memory-type": "",
"inferred-queries": message.inferredQueries || [],
},
}}
customClassName="fullHistory"
borderLeftColor={`${data?.agent?.color}-500`}
@ -365,18 +372,20 @@ export default function ChatHistory(props: ChatHistoryProps) {
</div>
)}
</div>
{!isNearBottom && (
<button
title="Scroll to bottom"
className="absolute bottom-4 right-5 bg-white dark:bg-[hsl(var(--background))] text-neutral-500 dark:text-white p-2 rounded-full shadow-xl"
onClick={() => {
scrollToBottom();
setIsNearBottom(true);
}}
>
<ArrowDown size={24} />
</button>
)}
<div className={`${props.customClassName} fixed bottom-[15%] z-10`}>
{!isNearBottom && (
<button
title="Scroll to bottom"
className="absolute bottom-0 right-0 bg-white dark:bg-[hsl(var(--background))] text-neutral-500 dark:text-white p-2 rounded-full shadow-xl"
onClick={() => {
scrollToBottom();
setIsNearBottom(true);
}}
>
<ArrowDown size={24} />
</button>
)}
</div>
</div>
</ScrollArea>
);

View file

@ -26,6 +26,7 @@ import {
Palette,
ClipboardText,
Check,
Shapes,
} from "@phosphor-icons/react";
import DOMPurify from "dompurify";
@ -35,6 +36,7 @@ import { AgentData } from "@/app/agents/page";
import renderMathInElement from "katex/contrib/auto-render";
import "katex/dist/katex.min.css";
import ExcalidrawComponent from "../excalidraw/excalidraw";
const md = new markdownIt({
html: true,
@ -127,6 +129,8 @@ export interface StreamMessage {
timestamp: string;
agent?: AgentData;
images?: string[];
intentType?: string;
inferredQueries?: string[];
}
export interface ChatHistoryData {
@ -251,6 +255,10 @@ function chooseIconFromHeader(header: string, iconColor: string) {
return <Aperture className={`${classNames}`} />;
}
if (compareHeader.includes("diagram")) {
return <Shapes className={`${classNames}`} />;
}
if (compareHeader.includes("paint")) {
return <Palette className={`${classNames}`} />;
}
@ -282,6 +290,7 @@ const ChatMessage = forwardRef<HTMLDivElement, ChatMessageProps>((props, ref) =>
const [markdownRendered, setMarkdownRendered] = useState<string>("");
const [isPlaying, setIsPlaying] = useState<boolean>(false);
const [interrupted, setInterrupted] = useState<boolean>(false);
const [excalidrawData, setExcalidrawData] = useState<string>("");
const interruptedRef = useRef<boolean>(false);
const messageRef = useRef<HTMLDivElement>(null);
@ -320,6 +329,11 @@ const ChatMessage = forwardRef<HTMLDivElement, ChatMessageProps>((props, ref) =>
useEffect(() => {
let message = props.chatMessage.message;
if (props.chatMessage.intent && props.chatMessage.intent.type == "excalidraw") {
message = props.chatMessage.intent["inferred-queries"][0];
setExcalidrawData(props.chatMessage.message);
}
// Replace LaTeX delimiters with placeholders
message = message
.replace(/\\\(/g, "LEFTPAREN")
@ -337,22 +351,27 @@ const ChatMessage = forwardRef<HTMLDivElement, ChatMessageProps>((props, ref) =>
message = `<div class="${styles.imagesContainer}">${imagesInMd}</div>${message}`;
}
if (props.chatMessage.intent && props.chatMessage.intent.type == "text-to-image") {
message = `![generated image](data:image/png;base64,${message})`;
} else if (props.chatMessage.intent && props.chatMessage.intent.type == "text-to-image2") {
message = `![generated image](${message})`;
} else if (
props.chatMessage.intent &&
props.chatMessage.intent.type == "text-to-image-v3"
) {
message = `![generated image](data:image/webp;base64,${message})`;
}
if (
props.chatMessage.intent &&
props.chatMessage.intent.type.includes("text-to-image") &&
props.chatMessage.intent["inferred-queries"]?.length > 0
) {
message += `\n\n${props.chatMessage.intent["inferred-queries"][0]}`;
const intentTypeHandlers = {
"text-to-image": (msg: string) => `![generated image](data:image/png;base64,${msg})`,
"text-to-image2": (msg: string) => `![generated image](${msg})`,
"text-to-image-v3": (msg: string) =>
`![generated image](data:image/webp;base64,${msg})`,
excalidraw: (msg: string) => {
return msg;
},
};
if (props.chatMessage.intent) {
const { type, "inferred-queries": inferredQueries } = props.chatMessage.intent;
console.log("intent type", type);
if (type in intentTypeHandlers) {
message = intentTypeHandlers[type as keyof typeof intentTypeHandlers](message);
}
if (type.includes("text-to-image") && inferredQueries?.length > 0) {
message += `\n\n${inferredQueries[0]}`;
}
}
setTextRendered(message);
@ -559,6 +578,7 @@ const ChatMessage = forwardRef<HTMLDivElement, ChatMessageProps>((props, ref) =>
className={styles.chatMessage}
dangerouslySetInnerHTML={{ __html: markdownRendered }}
/>
{excalidrawData && <ExcalidrawComponent data={excalidrawData} />}
</div>
<div className={styles.teaserReferencesContainer}>
<TeaserReferencesSection

View file

@ -0,0 +1,24 @@
"use client";
import dynamic from "next/dynamic";
import { Suspense } from "react";
import Loading from "../../components/loading/loading";
// Since client components get prerenderd on server as well hence importing
// the excalidraw stuff dynamically with ssr false
const ExcalidrawWrapper = dynamic(() => import("./excalidrawWrapper").then((mod) => mod.default), {
ssr: false,
});
interface ExcalidrawComponentProps {
data: any;
}
export default function ExcalidrawComponent(props: ExcalidrawComponentProps) {
return (
<Suspense fallback={<Loading />}>
<ExcalidrawWrapper data={props.data} />
</Suspense>
);
}

View file

@ -0,0 +1,149 @@
"use client";
import { useState, useEffect } from "react";
import dynamic from "next/dynamic";
import { ExcalidrawProps } from "@excalidraw/excalidraw/types/types";
import { ExcalidrawElement } from "@excalidraw/excalidraw/types/element/types";
import { ExcalidrawElementSkeleton } from "@excalidraw/excalidraw/types/data/transform";
const Excalidraw = dynamic<ExcalidrawProps>(
async () => (await import("@excalidraw/excalidraw")).Excalidraw,
{
ssr: false,
},
);
import { convertToExcalidrawElements } from "@excalidraw/excalidraw";
import { Button } from "@/components/ui/button";
import { ArrowsInSimple, ArrowsOutSimple } from "@phosphor-icons/react";
interface ExcalidrawWrapperProps {
data: ExcalidrawElementSkeleton[];
}
export default function ExcalidrawWrapper(props: ExcalidrawWrapperProps) {
const [excalidrawElements, setExcalidrawElements] = useState<ExcalidrawElement[]>([]);
const [expanded, setExpanded] = useState<boolean>(false);
const isValidExcalidrawElement = (element: ExcalidrawElementSkeleton): boolean => {
return (
element.x !== undefined &&
element.y !== undefined &&
element.id !== undefined &&
element.type !== undefined
);
};
useEffect(() => {
if (expanded) {
onkeydown = (e) => {
if (e.key === "Escape") {
setExpanded(false);
// Trigger a resize event to make Excalidraw adjust its size
window.dispatchEvent(new Event("resize"));
}
};
} else {
onkeydown = null;
}
}, [expanded]);
useEffect(() => {
// Do some basic validation
const basicValidSkeletons: ExcalidrawElementSkeleton[] = [];
for (const element of props.data) {
if (isValidExcalidrawElement(element as ExcalidrawElementSkeleton)) {
basicValidSkeletons.push(element as ExcalidrawElementSkeleton);
}
}
const validSkeletons: ExcalidrawElementSkeleton[] = [];
for (const element of basicValidSkeletons) {
if (element.type === "frame") {
continue;
}
if (element.type === "arrow") {
const start = basicValidSkeletons.find((child) => child.id === element.start?.id);
const end = basicValidSkeletons.find((child) => child.id === element.end?.id);
if (start && end) {
validSkeletons.push(element);
}
} else {
validSkeletons.push(element);
}
}
for (const element of basicValidSkeletons) {
if (element.type === "frame") {
const children = element.children?.map((childId) => {
return validSkeletons.find((child) => child.id === childId);
});
// Get the valid children, filter out any undefined values
const validChildrenIds: readonly string[] = children
?.map((child) => child?.id)
.filter((id) => id !== undefined) as string[];
if (validChildrenIds === undefined || validChildrenIds.length === 0) {
continue;
}
validSkeletons.push({
...element,
children: validChildrenIds,
});
}
}
const elements = convertToExcalidrawElements(validSkeletons);
setExcalidrawElements(elements);
}, []);
return (
<div className="relative">
<div
className={`${expanded ? "fixed inset-0 bg-black bg-opacity-50 backdrop-blur-sm z-50 flex items-center justify-center" : ""}`}
>
<Button
onClick={() => {
setExpanded(!expanded);
// Trigger a resize event to make Excalidraw adjust its size
window.dispatchEvent(new Event("resize"));
}}
variant={"outline"}
className={`${expanded ? "absolute top-2 left-2 z-[60]" : ""}`}
>
{expanded ? (
<ArrowsInSimple className="h-4 w-4" />
) : (
<ArrowsOutSimple className="h-4 w-4" />
)}
</Button>
<div
className={`
${expanded ? "w-[80vw] h-[80vh]" : "w-full h-[500px]"}
bg-white overflow-hidden rounded-lg relative
`}
>
<Excalidraw
initialData={{
elements: excalidrawElements,
appState: { zenModeEnabled: true },
scrollToContent: true,
}}
// TODO - Create a common function to detect if the theme is dark?
theme={localStorage.getItem("theme") === "dark" ? "dark" : "light"}
validateEmbeddable={true}
renderTopRightUI={(isMobile, appState) => {
return <></>;
}}
/>
</div>
</div>
</div>
);
}

View file

@ -27,7 +27,14 @@ export default function RootLayout({
child-src 'none';
object-src 'none';"
></meta>
<body className={inter.className}>{children}</body>
<body className={inter.className}>
{children}
<script
dangerouslySetInnerHTML={{
__html: `window.EXCALIDRAW_ASSET_PATH = 'https://assets.khoj.dev/@excalidraw/excalidraw/dist/';`,
}}
/>
</body>
</html>
);
}

View file

@ -39,6 +39,8 @@ function ChatBodyData(props: ChatBodyDataProps) {
const setQueryToProcess = props.setQueryToProcess;
const streamedMessages = props.streamedMessages;
const chatHistoryCustomClassName = props.isMobileWidth ? "w-full" : "w-4/6";
useEffect(() => {
if (images.length > 0) {
const encodedImages = images.map((image) => encodeURIComponent(image));
@ -96,10 +98,11 @@ function ChatBodyData(props: ChatBodyDataProps) {
setTitle={props.setTitle}
pendingMessage={processingMessage ? message : ""}
incomingMessages={props.streamedMessages}
customClassName={chatHistoryCustomClassName}
/>
</div>
<div
className={`${styles.inputBox} p-1 md:px-2 shadow-md bg-background align-middle items-center justify-center dark:bg-neutral-700 dark:border-0 dark:shadow-sm rounded-t-2xl rounded-b-none md:rounded-xl`}
className={`${styles.inputBox} p-1 md:px-2 shadow-md bg-background align-middle items-center justify-center dark:bg-neutral-700 dark:border-0 dark:shadow-sm rounded-t-2xl rounded-b-none md:rounded-xl h-fit ${chatHistoryCustomClassName} mr-auto ml-auto`}
>
<ChatInputArea
isLoggedIn={props.isLoggedIn}
@ -293,6 +296,19 @@ export default function SharedChat() {
<div className={styles.chatBox}>
<div className={styles.chatBoxBody}>
{!isMobileWidth && title && (
<div
className={`${styles.chatTitleWrapper} text-nowrap text-ellipsis overflow-hidden max-w-screen-md grid items-top font-bold mr-8 pt-6 col-auto h-fit`}
>
{title && (
<h2
className={`text-lg text-ellipsis whitespace-nowrap overflow-x-hidden`}
>
{title}
</h2>
)}
</div>
)}
<Suspense fallback={<Loading />}>
<ChatBodyData
conversationId={conversationId}

View file

@ -75,7 +75,7 @@ div.titleBar {
div.chatBoxBody {
display: grid;
height: 100%;
width: 70%;
width: 95%;
margin: auto;
}

View file

@ -1,6 +1,6 @@
{
"name": "khoj-ai",
"version": "1.26.0",
"version": "1.26.4",
"private": true,
"scripts": {
"dev": "next dev",
@ -63,7 +63,8 @@
"swr": "^2.2.5",
"typescript": "^5",
"vaul": "^0.9.1",
"zod": "^3.23.8"
"zod": "^3.23.8",
"@excalidraw/excalidraw": "^0.17.6"
},
"devDependencies": {
"@types/dompurify": "^3.0.5",

View file

@ -286,6 +286,11 @@
resolved "https://registry.yarnpkg.com/@eslint/js/-/js-8.57.1.tgz#de633db3ec2ef6a3c89e2f19038063e8a122e2c2"
integrity sha512-d9zaMRSTIKDLhctzH12MtXvJKSSUhaHcjV+2Z+GK+EEY7XKpP5yR4x+N3TAcHTcu963nIr+TMcCb4DBCYX1z6Q==
"@excalidraw/excalidraw@^0.17.6":
version "0.17.6"
resolved "https://registry.yarnpkg.com/@excalidraw/excalidraw/-/excalidraw-0.17.6.tgz#5fd208ce69d33ca712d1804b50d7d06d5c46ac4d"
integrity sha512-fyCl+zG/Z5yhHDh5Fq2ZGmphcrALmuOdtITm8gN4d8w4ntnaopTXcTfnAAaU3VleDC6LhTkoLOTG6P5kgREiIg==
"@floating-ui/core@^1.6.0":
version "1.6.8"
resolved "https://registry.yarnpkg.com/@floating-ui/core/-/core-1.6.8.tgz#aa43561be075815879305965020f492cdb43da12"

View file

@ -172,7 +172,7 @@ class UserAuthenticationBackend(AuthenticationBackend):
request=request,
telemetry_type="api",
api="create_user",
metadata={"user_id": str(user.uuid)},
metadata={"server_id": str(user.uuid)},
)
logger.log(logging.INFO, f"🥳 New User Created: {user.uuid}")
else:

View file

@ -622,6 +622,8 @@ class AgentAdapters:
@staticmethod
def get_all_accessible_agents(user: KhojUser = None):
public_query = Q(privacy_level=Agent.PrivacyLevel.PUBLIC)
# TODO Update this to allow any public agent that's officially approved once that experience is launched
public_query &= Q(managed_by_admin=True)
if user:
return (
Agent.objects.filter(public_query | Q(creator=user))
@ -640,6 +642,16 @@ class AgentAdapters:
agents = await sync_to_async(AgentAdapters.get_all_accessible_agents)(user)
return await sync_to_async(list)(agents)
@staticmethod
async def ais_agent_accessible(agent: Agent, user: KhojUser) -> bool:
if agent.privacy_level == Agent.PrivacyLevel.PUBLIC:
return True
if agent.creator == user:
return True
if agent.privacy_level == Agent.PrivacyLevel.PROTECTED:
return True
return False
@staticmethod
def get_conversation_agent_by_id(agent_id: int):
agent = Agent.objects.filter(id=agent_id).first()
@ -1463,12 +1475,15 @@ class EntryAdapters:
file_filters = EntryAdapters.file_filter.get_filter_terms(query)
date_filters = EntryAdapters.date_filter.get_query_date_range(query)
user_or_agent = Q(user=user)
owner_filter = Q()
if user != None:
owner_filter = Q(user=user)
if agent != None:
user_or_agent |= Q(agent=agent)
owner_filter |= Q(agent=agent)
if len(word_filters) == 0 and len(file_filters) == 0 and len(date_filters) == 0:
return Entry.objects.filter(user_or_agent)
return Entry.objects.filter(owner_filter)
for term in word_filters:
if term.startswith("+"):
@ -1504,7 +1519,7 @@ class EntryAdapters:
formatted_max_date = date.fromtimestamp(max_date).strftime("%Y-%m-%d")
q_filter_terms &= Q(embeddings_dates__date__lte=formatted_max_date)
relevant_entries = Entry.objects.filter(user_or_agent).filter(q_filter_terms)
relevant_entries = Entry.objects.filter(owner_filter).filter(q_filter_terms)
if file_type_filter:
relevant_entries = relevant_entries.filter(file_type=file_type_filter)
return relevant_entries
@ -1519,13 +1534,18 @@ class EntryAdapters:
max_distance: float = math.inf,
agent: Agent = None,
):
user_or_agent = Q(user=user)
owner_filter = Q()
if user != None:
owner_filter = Q(user=user)
if agent != None:
user_or_agent |= Q(agent=agent)
owner_filter |= Q(agent=agent)
if owner_filter == Q():
return Entry.objects.none()
relevant_entries = EntryAdapters.apply_filters(user, raw_query, file_type_filter, agent)
relevant_entries = relevant_entries.filter(user_or_agent).annotate(
relevant_entries = relevant_entries.filter(owner_filter).annotate(
distance=CosineDistance("embeddings", embeddings)
)
relevant_entries = relevant_entries.filter(distance__lte=max_distance)

View file

@ -0,0 +1,46 @@
# Generated by Django 5.0.8 on 2024-10-21 05:16
import django.contrib.postgres.fields
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("database", "0069_webscraper_serverchatsettings_web_scraper"),
]
operations = [
migrations.AlterField(
model_name="agent",
name="input_tools",
field=django.contrib.postgres.fields.ArrayField(
base_field=models.CharField(
choices=[
("general", "General"),
("online", "Online"),
("notes", "Notes"),
("summarize", "Summarize"),
("webpage", "Webpage"),
],
max_length=200,
),
blank=True,
default=list,
null=True,
size=None,
),
),
migrations.AlterField(
model_name="agent",
name="output_modes",
field=django.contrib.postgres.fields.ArrayField(
base_field=models.CharField(
choices=[("text", "Text"), ("image", "Image"), ("automation", "Automation")], max_length=200
),
blank=True,
default=list,
null=True,
size=None,
),
),
]

View file

@ -180,8 +180,12 @@ class Agent(BaseModel):
) # Creator will only be null when the agents are managed by admin
name = models.CharField(max_length=200)
personality = models.TextField()
input_tools = ArrayField(models.CharField(max_length=200, choices=InputToolOptions.choices), default=list)
output_modes = ArrayField(models.CharField(max_length=200, choices=OutputModeOptions.choices), default=list)
input_tools = ArrayField(
models.CharField(max_length=200, choices=InputToolOptions.choices), default=list, null=True, blank=True
)
output_modes = ArrayField(
models.CharField(max_length=200, choices=OutputModeOptions.choices), default=list, null=True, blank=True
)
managed_by_admin = models.BooleanField(default=False)
chat_model = models.ForeignKey(ChatModelOptions, on_delete=models.CASCADE)
slug = models.CharField(max_length=200, unique=True)

View file

@ -67,7 +67,7 @@ class PdfToEntries(TextToEntries):
bytes = pdf_files[pdf_file]
f.write(bytes)
try:
loader = PyMuPDFLoader(f"{tmp_file}", extract_images=True)
loader = PyMuPDFLoader(f"{tmp_file}", extract_images=False)
pdf_entries_per_file = [page.page_content for page in loader.load()]
except ImportError:
loader = PyMuPDFLoader(f"{tmp_file}")

View file

@ -176,6 +176,150 @@ Improved Prompt:
""".strip()
)
## Diagram Generation
## --
improve_diagram_description_prompt = PromptTemplate.from_template(
"""
you are an architect working with a novice artist using a diagramming tool.
{personality_context}
you need to convert the user's query to a description format that the novice artist can use very well. you are allowed to use primitives like
- text
- rectangle
- diamond
- ellipse
- line
- arrow
- frame
use these primitives to describe what sort of diagram the drawer should create. the artist must recreate the diagram every time, so include all relevant prior information in your description.
use simple, concise language.
Today's Date: {current_date}
User's Location: {location}
User's Notes:
{references}
Online References:
{online_results}
Conversation Log:
{chat_history}
Query: {query}
""".strip()
)
excalidraw_diagram_generation_prompt = PromptTemplate.from_template(
"""
You are a program manager with the ability to describe diagrams to compose in professional, fine detail.
{personality_context}
You need to create a declarative description of the diagram and relevant components, using this base schema. Use the `label` property to specify the text to be rendered in the respective elements. Always use light colors for the `backgroundColor` property, like white, or light blue, green, red. "type", "x", "y", "id", are required properties for all elements.
{{
type: string,
x: number,
y: number,
strokeColor: string,
backgroundColor: string,
width: number,
height: number,
id: string,
label: {{
text: string,
}}
}}
Valid types:
- text
- rectangle
- diamond
- ellipse
- line
- arrow
For arrows and lines, you can use the `points` property to specify the start and end points of the arrow. You may also use the `label` property to specify the text to be rendered. You may use the `start` and `end` properties to connect the linear elements to other elements. The start and end point can either be the ID to map to an existing object, or the `type` to create a new object. Mapping to an existing object is useful if you want to connect it to multiple objects. Lines and arrows can only start and end at rectangle, text, diamond, or ellipse elements.
{{
type: "arrow",
id: string,
x: number,
y: number,
width: number,
height: number,
strokeColor: string,
start: {{
id: string,
type: string,
}},
end: {{
id: string,
type: string,
}},
label: {{
text: string,
}}
points: [
[number, number],
[number, number],
]
}}
For text, you must use the `text` property to specify the text to be rendered. You may also use `fontSize` property to specify the font size of the text. Only use the `text` element for titles, subtitles, and overviews. For labels, use the `label` property in the respective elements.
{{
type: "text",
id: string,
x: number,
y: number,
fontSize: number,
text: string,
}}
For frames, use the `children` property to specify the elements that are inside the frame by their ids.
{{
type: "frame",
id: string,
x: number,
y: number,
width: number,
height: number,
name: string,
children: [
string
]
}}
Here's an example of a valid diagram:
Design Description: Create a diagram describing a circular development process with 3 stages: design, implementation and feedback. The design stage is connected to the implementation stage and the implementation stage is connected to the feedback stage and the feedback stage is connected to the design stage. Each stage should be labeled with the stage name.
Response:
[
{{"type":"text","x":-150,"y":50,"width":300,"height":40,"id":"title_text","text":"Circular Development Process","fontSize":24}},
{{"type":"ellipse","x":-169,"y":113,"width":188,"height":202,"id":"design_ellipse", "label": {{"text": "Design"}}}},
{{"type":"ellipse","x":62,"y":394,"width":186,"height":188,"id":"implement_ellipse", "label": {{"text": "Implement"}}}},
{{"type":"ellipse","x":-348,"y":430,"width":184,"height":170,"id":"feedback_ellipse", "label": {{"text": "Feedback"}}}},
{{"type":"arrow","x":21,"y":273,"id":"design_to_implement_arrow","points":[[0,0],[86,105]],"start":{{"id":"design_ellipse"}}, "end":{{"id":"implement_ellipse"}}}},
{{"type":"arrow","x":50,"y":519,"id":"implement_to_feedback_arrow","points":[[0,0],[-198,-6]],"start":{{"id":"implement_ellipse"}}, "end":{{"id":"feedback_ellipse"}}}},
{{"type":"arrow","x":-228,"y":417,"id":"feedback_to_design_arrow","points":[[0,0],[85,-123]],"start":{{"id":"feedback_ellipse"}}, "end":{{"id":"design_ellipse"}}}},
]
Create a detailed diagram from the provided context and user prompt below. Return a valid JSON object:
Diagram Description: {query}
""".strip()
)
## Online Search Conversation
## --
online_search_conversation = PromptTemplate.from_template(

View file

@ -189,7 +189,10 @@ def generate_chatml_messages_with_context(
message_notes = f'\n\n Notes:\n{chat.get("context")}' if chat.get("context") else "\n"
role = "user" if chat["by"] == "you" else "assistant"
message_content = chat["message"] + message_notes
if chat["by"] == "khoj" and "excalidraw" in chat["intent"].get("type"):
message_content = chat.get("intent").get("inferred-queries")[0] + message_notes
else:
message_content = chat["message"] + message_notes
message_content = construct_structured_message(message_content, chat.get("images"), model_type, vision_enabled)

View file

@ -21,6 +21,7 @@ from starlette.authentication import has_required_scope, requires
from khoj.configure import initialize_content
from khoj.database import adapters
from khoj.database.adapters import (
AgentAdapters,
AutomationAdapters,
ConversationAdapters,
EntryAdapters,
@ -114,10 +115,16 @@ async def execute_search(
dedupe: Optional[bool] = True,
agent: Optional[Agent] = None,
):
start_time = time.time()
# Run validation checks
results: List[SearchResponse] = []
start_time = time.time()
# Ensure the agent, if present, is accessible by the user
if user and agent and not await AgentAdapters.ais_agent_accessible(agent, user):
logger.error(f"Agent {agent.slug} is not accessible by user {user}")
return results
if q is None or q == "":
logger.warning(f"No query param (q) passed in API call to initiate search")
return results

View file

@ -42,6 +42,7 @@ from khoj.routers.helpers import (
construct_automation_created_message,
create_automation,
extract_relevant_summary,
generate_excalidraw_diagram,
get_conversation_command,
is_query_empty,
is_ready_to_chat,
@ -846,7 +847,7 @@ async def chat(
defiltered_query = result[2]
except Exception as e:
error_message = f"Error searching knowledge base: {e}. Attempting to respond without document references."
logger.warning(error_message)
logger.error(error_message, exc_info=True)
async for result in send_event(
ChatEvent.STATUS, "Document search failed. I'll try respond without document references"
):
@ -995,6 +996,57 @@ async def chat(
yield result
return
if ConversationCommand.Diagram in conversation_commands:
async for result in send_event(ChatEvent.STATUS, f"Creating diagram"):
yield result
intent_type = "excalidraw"
inferred_queries = []
diagram_description = ""
async for result in generate_excalidraw_diagram(
q=defiltered_query,
conversation_history=meta_log,
location_data=location,
note_references=compiled_references,
online_results=online_results,
query_images=uploaded_images,
user=user,
agent=agent,
send_status_func=partial(send_event, ChatEvent.STATUS),
):
if isinstance(result, dict) and ChatEvent.STATUS in result:
yield result[ChatEvent.STATUS]
else:
better_diagram_description_prompt, excalidraw_diagram_description = result
inferred_queries.append(better_diagram_description_prompt)
diagram_description = excalidraw_diagram_description
content_obj = {
"intentType": intent_type,
"inferredQueries": inferred_queries,
"image": diagram_description,
}
await sync_to_async(save_to_conversation_log)(
q,
excalidraw_diagram_description,
user,
meta_log,
user_message_time,
intent_type="excalidraw",
inferred_queries=[better_diagram_description_prompt],
client_application=request.user.client_app,
conversation_id=conversation_id,
compiled_references=compiled_references,
online_results=online_results,
query_images=uploaded_images,
)
async for result in send_llm_response(json.dumps(content_obj)):
yield result
return
## Generate Text Output
async for result in send_event(ChatEvent.STATUS, f"**Generating a well-informed response**"):
yield result

View file

@ -90,7 +90,7 @@ async def login_magic_link(request: Request, form: MagicLinkForm):
request=request,
telemetry_type="api",
api="create_user",
metadata={"user_id": str(user.uuid)},
metadata={"server_id": str(user.uuid)},
)
logger.log(logging.INFO, f"🥳 New User Created: {user.uuid}")
@ -175,7 +175,7 @@ async def auth(request: Request):
request=request,
telemetry_type="api",
api="create_user",
metadata={"user_id": str(khoj_user.uuid)},
metadata={"server_id": str(khoj_user.uuid)},
)
logger.log(logging.INFO, f"🥳 New User Created: {khoj_user.uuid}")
return RedirectResponse(url=next_url, status_code=HTTP_302_FOUND)

View file

@ -15,6 +15,7 @@ from typing import (
Annotated,
Any,
AsyncGenerator,
Callable,
Dict,
Iterator,
List,
@ -217,6 +218,9 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")):
chat_history += f"User: {chat['intent']['query']}\n"
chat_history += f"{agent_name}: [generated image redacted for space]\n"
elif chat["by"] == "khoj" and ("excalidraw" in chat["intent"].get("type")):
chat_history += f"User: {chat['intent']['query']}\n"
chat_history += f"{agent_name}: {chat['intent']['inferred-queries'][0]}\n"
return chat_history
@ -237,6 +241,8 @@ def get_conversation_command(query: str, any_references: bool = False) -> Conver
return ConversationCommand.AutomatedTask
elif query.startswith("/summarize"):
return ConversationCommand.Summarize
elif query.startswith("/diagram"):
return ConversationCommand.Diagram
# If no relevant notes found for the given query
elif not any_references:
return ConversationCommand.General
@ -619,6 +625,129 @@ async def extract_relevant_summary(
return response.strip()
async def generate_excalidraw_diagram(
q: str,
conversation_history: Dict[str, Any],
location_data: LocationData,
note_references: List[Dict[str, Any]],
online_results: Optional[dict] = None,
query_images: List[str] = None,
user: KhojUser = None,
agent: Agent = None,
send_status_func: Optional[Callable] = None,
):
if send_status_func:
async for event in send_status_func("**Enhancing the Diagramming Prompt**"):
yield {ChatEvent.STATUS: event}
better_diagram_description_prompt = await generate_better_diagram_description(
q=q,
conversation_history=conversation_history,
location_data=location_data,
note_references=note_references,
online_results=online_results,
query_images=query_images,
user=user,
agent=agent,
)
if send_status_func:
async for event in send_status_func(f"**Diagram to Create:**:\n{better_diagram_description_prompt}"):
yield {ChatEvent.STATUS: event}
excalidraw_diagram_description = await generate_excalidraw_diagram_from_description(
q=better_diagram_description_prompt,
user=user,
agent=agent,
)
yield better_diagram_description_prompt, excalidraw_diagram_description
async def generate_better_diagram_description(
q: str,
conversation_history: Dict[str, Any],
location_data: LocationData,
note_references: List[Dict[str, Any]],
online_results: Optional[dict] = None,
query_images: List[str] = None,
user: KhojUser = None,
agent: Agent = None,
) -> str:
"""
Generate a diagram description from the given query and context
"""
today_date = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d, %A")
personality_context = (
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
)
if location_data:
location_prompt = prompts.user_location.format(location=f"{location_data}")
else:
location_prompt = "Unknown"
user_references = "\n\n".join([f"# {item['compiled']}" for item in note_references])
chat_history = construct_chat_history(conversation_history)
simplified_online_results = {}
if online_results:
for result in online_results:
if online_results[result].get("answerBox"):
simplified_online_results[result] = online_results[result]["answerBox"]
elif online_results[result].get("webpages"):
simplified_online_results[result] = online_results[result]["webpages"]
improve_diagram_description_prompt = prompts.improve_diagram_description_prompt.format(
query=q,
chat_history=chat_history,
location=location_prompt,
current_date=today_date,
references=user_references,
online_results=simplified_online_results,
personality_context=personality_context,
)
with timer("Chat actor: Generate better diagram description", logger):
response = await send_message_to_model_wrapper(
improve_diagram_description_prompt, query_images=query_images, user=user
)
response = response.strip()
if response.startswith(('"', "'")) and response.endswith(('"', "'")):
response = response[1:-1]
return response
async def generate_excalidraw_diagram_from_description(
q: str,
user: KhojUser = None,
agent: Agent = None,
) -> str:
personality_context = (
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
)
excalidraw_diagram_generation = prompts.excalidraw_diagram_generation_prompt.format(
personality_context=personality_context,
query=q,
)
with timer("Chat actor: Generate excalidraw diagram", logger):
raw_response = await send_message_to_model_wrapper(message=excalidraw_diagram_generation, user=user)
raw_response = raw_response.strip()
raw_response = remove_json_codeblock(raw_response)
response: Dict[str, str] = json.loads(raw_response)
if not response or not isinstance(response, List) or not isinstance(response[0], Dict):
# TODO Some additional validation here that it's a valid Excalidraw diagram
raise AssertionError(f"Invalid response for improving diagram description: {response}")
return response
async def generate_better_image_prompt(
q: str,
conversation_history: str,

View file

@ -82,7 +82,7 @@ async def subscribe(request: Request):
request=request,
telemetry_type="api",
api="create_user",
metadata={"user_id": str(user.user.uuid)},
metadata={"server_id": str(user.user.uuid)},
)
logger.log(logging.INFO, f"🥳 New User Created: {user.user.uuid}")

View file

@ -51,17 +51,6 @@ def chat_page(request: Request):
)
@web_client.get("/experimental", response_class=FileResponse)
@requires(["authenticated"], redirect="login_page")
def experimental_page(request: Request):
return templates.TemplateResponse(
"index.html",
context={
"request": request,
},
)
@web_client.get("/factchecker", response_class=FileResponse)
def fact_checker_page(request: Request):
return templates.TemplateResponse(

View file

@ -318,6 +318,7 @@ class ConversationCommand(str, Enum):
Automation = "automation"
AutomatedTask = "automated_task"
Summarize = "summarize"
Diagram = "diagram"
command_descriptions = {
@ -326,10 +327,11 @@ command_descriptions = {
ConversationCommand.Default: "The default command when no command specified. It intelligently auto-switches between general and notes mode.",
ConversationCommand.Online: "Search for information on the internet.",
ConversationCommand.Webpage: "Get information from webpage suggested by you.",
ConversationCommand.Image: "Generate images by describing your imagination in words.",
ConversationCommand.Image: "Generate illustrative, creative images by describing your imagination in words.",
ConversationCommand.Automation: "Automatically run your query at a specified time or interval.",
ConversationCommand.Help: "Get help with how to use or setup Khoj from the documentation",
ConversationCommand.Summarize: "Get help with a question pertaining to an entire document.",
ConversationCommand.Diagram: "Draw a flowchart, diagram, or any other visual representation best expressed with primitives like lines, rectangles, and text.",
}
command_descriptions_for_agent = {
@ -353,12 +355,14 @@ mode_descriptions_for_llm = {
ConversationCommand.Image: "Use this if the user is requesting you to create a new picture based on their description.",
ConversationCommand.Automation: "Use this if you are confident the user is requesting a response at a scheduled date, time and frequency",
ConversationCommand.Text: "Use this if a normal text response would be sufficient for accurately responding to the query.",
ConversationCommand.Diagram: "Use this if the user is requesting a visual representation that requires primitives like lines, rectangles, and text.",
}
mode_descriptions_for_agent = {
ConversationCommand.Image: "Agent can generate image in response.",
ConversationCommand.Automation: "Agent can schedule a task to run at a scheduled date, time and frequency in response.",
ConversationCommand.Text: "Agent can generate text in response.",
ConversationCommand.Diagram: "Agent can generate a visual representation that requires primitives like lines, rectangles, and text.",
}

View file

@ -178,6 +178,13 @@ def api_user4(default_user4):
)
@pytest.mark.django_db
@pytest.fixture
def default_openai_chat_model_option():
chat_model = ChatModelOptionsFactory(chat_model="gpt-4o-mini", model_type="openai")
return chat_model
@pytest.mark.django_db
@pytest.fixture
def offline_agent():

211
tests/test_agents.py Normal file
View file

@ -0,0 +1,211 @@
# tests/test_agents.py
import os
import pytest
from asgiref.sync import sync_to_async
from khoj.database.adapters import AgentAdapters
from khoj.database.models import Agent, ChatModelOptions, Entry, KhojUser
from khoj.routers.api import execute_search
from khoj.utils.helpers import get_absolute_path
from tests.helpers import ChatModelOptionsFactory
def test_create_default_agent(default_user: KhojUser):
ChatModelOptionsFactory()
agent = AgentAdapters.create_default_agent(default_user)
assert agent is not None
assert agent.input_tools == []
assert agent.output_modes == []
assert agent.privacy_level == Agent.PrivacyLevel.PUBLIC
assert agent.managed_by_admin == True
@pytest.mark.anyio
@pytest.mark.django_db(transaction=True)
async def test_create_or_update_agent(default_user: KhojUser, default_openai_chat_model_option: ChatModelOptions):
new_agent = await AgentAdapters.aupdate_agent(
default_user,
"Test Agent",
"Test Personality",
Agent.PrivacyLevel.PRIVATE,
"icon",
"color",
default_openai_chat_model_option.chat_model,
[],
[],
[],
)
assert new_agent is not None
assert new_agent.name == "Test Agent"
assert new_agent.privacy_level == Agent.PrivacyLevel.PRIVATE
assert new_agent.creator == default_user
@pytest.mark.anyio
@pytest.mark.django_db(transaction=True)
async def test_create_or_update_agent_with_knowledge_base(
default_user2: KhojUser, default_openai_chat_model_option: ChatModelOptions, chat_client
):
full_filename = get_absolute_path("tests/data/markdown/having_kids.markdown")
new_agent = await AgentAdapters.aupdate_agent(
default_user2,
"Test Agent",
"Test Personality",
Agent.PrivacyLevel.PRIVATE,
"icon",
"color",
default_openai_chat_model_option.chat_model,
[full_filename],
[],
[],
)
entries = await sync_to_async(list)(Entry.objects.filter(agent=new_agent))
file_names = set()
for entry in entries:
file_names.add(entry.file_path)
assert new_agent is not None
assert new_agent.name == "Test Agent"
assert new_agent.privacy_level == Agent.PrivacyLevel.PRIVATE
assert new_agent.creator == default_user2
assert len(entries) > 0
assert full_filename in file_names
assert len(file_names) == 1
@pytest.mark.anyio
@pytest.mark.django_db(transaction=True)
async def test_create_or_update_agent_with_knowledge_base_and_search(
default_user2: KhojUser, default_openai_chat_model_option: ChatModelOptions, chat_client
):
full_filename = get_absolute_path("tests/data/markdown/having_kids.markdown")
new_agent = await AgentAdapters.aupdate_agent(
default_user2,
"Test Agent",
"Test Personality",
Agent.PrivacyLevel.PRIVATE,
"icon",
"color",
default_openai_chat_model_option.chat_model,
[full_filename],
[],
[],
)
search_result = await execute_search(user=default_user2, q="having kids", agent=new_agent)
assert len(search_result) == 5
@pytest.mark.anyio
@pytest.mark.django_db(transaction=True)
async def test_agent_with_knowledge_base_and_search_not_creator(
default_user2: KhojUser, default_openai_chat_model_option: ChatModelOptions, chat_client, default_user3: KhojUser
):
full_filename = get_absolute_path("tests/data/markdown/having_kids.markdown")
new_agent = await AgentAdapters.aupdate_agent(
default_user2,
"Test Agent",
"Test Personality",
Agent.PrivacyLevel.PUBLIC,
"icon",
"color",
default_openai_chat_model_option.chat_model,
[full_filename],
[],
[],
)
search_result = await execute_search(user=default_user3, q="having kids", agent=new_agent)
assert len(search_result) == 5
@pytest.mark.anyio
@pytest.mark.django_db(transaction=True)
async def test_agent_with_knowledge_base_and_search_not_creator_and_private(
default_user2: KhojUser, default_openai_chat_model_option: ChatModelOptions, chat_client, default_user3: KhojUser
):
full_filename = get_absolute_path("tests/data/markdown/having_kids.markdown")
new_agent = await AgentAdapters.aupdate_agent(
default_user2,
"Test Agent",
"Test Personality",
Agent.PrivacyLevel.PRIVATE,
"icon",
"color",
default_openai_chat_model_option.chat_model,
[full_filename],
[],
[],
)
search_result = await execute_search(user=default_user3, q="having kids", agent=new_agent)
assert len(search_result) == 0
@pytest.mark.anyio
@pytest.mark.django_db(transaction=True)
async def test_agent_with_knowledge_base_and_search_not_creator_and_private_accessible_to_none(
default_user2: KhojUser, default_openai_chat_model_option: ChatModelOptions, chat_client
):
full_filename = get_absolute_path("tests/data/markdown/having_kids.markdown")
new_agent = await AgentAdapters.aupdate_agent(
default_user2,
"Test Agent",
"Test Personality",
Agent.PrivacyLevel.PRIVATE,
"icon",
"color",
default_openai_chat_model_option.chat_model,
[full_filename],
[],
[],
)
search_result = await execute_search(user=None, q="having kids", agent=new_agent)
assert len(search_result) == 5
@pytest.mark.anyio
@pytest.mark.django_db(transaction=True)
async def test_multiple_agents_with_knowledge_base_and_users(
default_user2: KhojUser, default_openai_chat_model_option: ChatModelOptions, chat_client, default_user3: KhojUser
):
full_filename = get_absolute_path("tests/data/markdown/having_kids.markdown")
new_agent = await AgentAdapters.aupdate_agent(
default_user2,
"Test Agent",
"Test Personality",
Agent.PrivacyLevel.PUBLIC,
"icon",
"color",
default_openai_chat_model_option.chat_model,
[full_filename],
[],
[],
)
full_filename2 = get_absolute_path("tests/data/markdown/Namita.markdown")
new_agent2 = await AgentAdapters.aupdate_agent(
default_user2,
"Test Agent 2",
"Test Personality",
Agent.PrivacyLevel.PUBLIC,
"icon",
"color",
default_openai_chat_model_option.chat_model,
[full_filename2],
[],
[],
)
search_result = await execute_search(user=default_user3, q="having kids", agent=new_agent2)
search_result2 = await execute_search(user=default_user3, q="Namita", agent=new_agent2)
assert len(search_result) == 0
assert len(search_result2) == 1

View file

@ -1,6 +1,8 @@
import os
import re
import pytest
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
from khoj.utils.fs_syncer import get_pdf_files
from khoj.utils.rawconfig import TextContentConfig
@ -37,6 +39,7 @@ def test_multi_page_pdf_to_jsonl():
assert len(entries[1]) == 6
@pytest.mark.skip(reason="Temporarily disabled OCR due to performance issues")
def test_ocr_page_pdf_to_jsonl():
"Convert multiple pages from single PDF file to jsonl."
# Arrange

View file

@ -78,5 +78,9 @@
"1.24.0": "0.15.0",
"1.24.1": "0.15.0",
"1.25.0": "0.15.0",
"1.26.0": "0.15.0"
"1.26.0": "0.15.0",
"1.26.1": "0.15.0",
"1.26.2": "0.15.0",
"1.26.3": "0.15.0",
"1.26.4": "0.15.0"
}