mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Enable Passing External Documents for Analysis in Code Sandbox (#960)
- Allow passing user files as input into code sandbox for analysis - Update prompt to give more example of complex, multi-line code - Simplify logic for model. Run one program at a time, instead of allowing model to run multiple programs in parallel - Show Code generated charts and docs in Reference pane of web app and make them downloaded
This commit is contained in:
commit
48862a8400
7 changed files with 270 additions and 166 deletions
|
@ -139,33 +139,6 @@ export function processMessageChunk(
|
|||
if (onlineContext) currentMessage.onlineContext = onlineContext;
|
||||
if (context) currentMessage.context = context;
|
||||
|
||||
// Replace file links with base64 data
|
||||
currentMessage.rawResponse = renderCodeGenImageInline(
|
||||
currentMessage.rawResponse,
|
||||
codeContext,
|
||||
);
|
||||
|
||||
// Add code context files to the message
|
||||
if (codeContext) {
|
||||
Object.entries(codeContext).forEach(([key, value]) => {
|
||||
value.results.output_files?.forEach((file) => {
|
||||
if (file.filename.endsWith(".png") || file.filename.endsWith(".jpg")) {
|
||||
// Don't add the image again if it's already in the message!
|
||||
if (!currentMessage.rawResponse.includes(`![${file.filename}](`)) {
|
||||
currentMessage.rawResponse += `\n\n![${file.filename}](data:image/png;base64,${file.b64_data})`;
|
||||
}
|
||||
} else if (
|
||||
file.filename.endsWith(".txt") ||
|
||||
file.filename.endsWith(".org") ||
|
||||
file.filename.endsWith(".md")
|
||||
) {
|
||||
const decodedText = atob(file.b64_data);
|
||||
currentMessage.rawResponse += `\n\n\`\`\`\n${decodedText}\n\`\`\``;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Mark current message streaming as completed
|
||||
currentMessage.completed = true;
|
||||
}
|
||||
|
@ -200,9 +173,13 @@ export function renderCodeGenImageInline(message: string, codeContext: CodeConte
|
|||
Object.values(codeContext).forEach((contextData) => {
|
||||
contextData.results.output_files?.forEach((file) => {
|
||||
const regex = new RegExp(`!?\\[.*?\\]\\(.*${file.filename}\\)`, "g");
|
||||
if (file.filename.match(/\.(png|jpg|jpeg|gif|webp)$/i)) {
|
||||
if (file.filename.match(/\.(png|jpg|jpeg)$/i)) {
|
||||
const replacement = `![${file.filename}](data:image/${file.filename.split(".").pop()};base64,${file.b64_data})`;
|
||||
message = message.replace(regex, replacement);
|
||||
} else if (file.filename.match(/\.(txt|org|md|csv|json)$/i)) {
|
||||
// render output files generated by codegen as downloadable links
|
||||
const replacement = `![${file.filename}](data:text/plain;base64,${file.b64_data})`;
|
||||
message = message.replace(regex, replacement);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
@ -40,7 +40,6 @@ import {
|
|||
Leaf,
|
||||
NewspaperClipping,
|
||||
OrangeSlice,
|
||||
Rainbow,
|
||||
SmileyMelting,
|
||||
YinYang,
|
||||
SneakerMove,
|
||||
|
@ -247,6 +246,13 @@ function getIconFromFilename(
|
|||
case "doc":
|
||||
case "docx":
|
||||
return <MicrosoftWordLogo className={className} />;
|
||||
case "csv":
|
||||
case "json":
|
||||
return <MathOperations className={className} />;
|
||||
case "txt":
|
||||
return <Notebook className={className} />;
|
||||
case "py":
|
||||
return <Code className={className} />;
|
||||
case "jpg":
|
||||
case "jpeg":
|
||||
case "png":
|
||||
|
|
|
@ -420,6 +420,24 @@ const ChatMessage = forwardRef<HTMLDivElement, ChatMessageProps>((props, ref) =>
|
|||
message += `\n\n${inferredQueries[0]}`;
|
||||
}
|
||||
}
|
||||
|
||||
// Replace file links with base64 data
|
||||
message = renderCodeGenImageInline(message, props.chatMessage.codeContext);
|
||||
|
||||
// Add code context files to the message
|
||||
if (props.chatMessage.codeContext) {
|
||||
Object.entries(props.chatMessage.codeContext).forEach(([key, value]) => {
|
||||
value.results.output_files?.forEach((file) => {
|
||||
if (file.filename.endsWith(".png") || file.filename.endsWith(".jpg")) {
|
||||
// Don't add the image again if it's already in the message!
|
||||
if (!message.includes(`![${file.filename}](`)) {
|
||||
message += `\n\n![${file.filename}](data:image/png;base64,${file.b64_data})`;
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Handle user attached images rendering
|
||||
let messageForClipboard = message;
|
||||
let messageToRender = message;
|
||||
|
@ -445,48 +463,6 @@ const ChatMessage = forwardRef<HTMLDivElement, ChatMessageProps>((props, ref) =>
|
|||
messageToRender = `${userImagesInHtml}${messageToRender}`;
|
||||
}
|
||||
|
||||
if (props.chatMessage.intent && props.chatMessage.intent.type == "text-to-image") {
|
||||
message = `![generated image](data:image/png;base64,${message})`;
|
||||
} else if (props.chatMessage.intent && props.chatMessage.intent.type == "text-to-image2") {
|
||||
message = `![generated image](${message})`;
|
||||
} else if (
|
||||
props.chatMessage.intent &&
|
||||
props.chatMessage.intent.type == "text-to-image-v3"
|
||||
) {
|
||||
message = `![generated image](data:image/webp;base64,${message})`;
|
||||
}
|
||||
if (
|
||||
props.chatMessage.intent &&
|
||||
props.chatMessage.intent.type.includes("text-to-image") &&
|
||||
props.chatMessage.intent["inferred-queries"]?.length > 0
|
||||
) {
|
||||
message += `\n\n${props.chatMessage.intent["inferred-queries"][0]}`;
|
||||
}
|
||||
|
||||
// Replace file links with base64 data
|
||||
message = renderCodeGenImageInline(message, props.chatMessage.codeContext);
|
||||
|
||||
// Add code context files to the message
|
||||
if (props.chatMessage.codeContext) {
|
||||
Object.entries(props.chatMessage.codeContext).forEach(([key, value]) => {
|
||||
value.results.output_files?.forEach((file) => {
|
||||
if (file.filename.endsWith(".png") || file.filename.endsWith(".jpg")) {
|
||||
// Don't add the image again if it's already in the message!
|
||||
if (!message.includes(`![${file.filename}](`)) {
|
||||
message += `\n\n![${file.filename}](data:image/png;base64,${file.b64_data})`;
|
||||
}
|
||||
} else if (
|
||||
file.filename.endsWith(".txt") ||
|
||||
file.filename.endsWith(".org") ||
|
||||
file.filename.endsWith(".md")
|
||||
) {
|
||||
const decodedText = atob(file.b64_data);
|
||||
message += `\n\n\`\`\`\n${decodedText}\n\`\`\``;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Set the message text
|
||||
setTextRendered(messageForClipboard);
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
import { useEffect, useState } from "react";
|
||||
|
||||
import { ArrowRight } from "@phosphor-icons/react";
|
||||
import { ArrowCircleDown, ArrowRight } from "@phosphor-icons/react";
|
||||
|
||||
import markdownIt from "markdown-it";
|
||||
const md = new markdownIt({
|
||||
|
@ -11,7 +11,13 @@ const md = new markdownIt({
|
|||
typographer: true,
|
||||
});
|
||||
|
||||
import { Context, WebPage, OnlineContext, CodeContext } from "../chatMessage/chatMessage";
|
||||
import {
|
||||
Context,
|
||||
WebPage,
|
||||
OnlineContext,
|
||||
CodeContext,
|
||||
CodeContextFile,
|
||||
} from "../chatMessage/chatMessage";
|
||||
import { Card } from "@/components/ui/card";
|
||||
|
||||
import {
|
||||
|
@ -51,6 +57,7 @@ function NotesContextReferenceCard(props: NotesContextReferenceCardProps) {
|
|||
props.title || ".txt",
|
||||
"w-6 h-6 text-muted-foreground inline-flex mr-2",
|
||||
);
|
||||
const fileName = props.title.split("/").pop() || props.title;
|
||||
const snippet = extractSnippet(props);
|
||||
const [isHovering, setIsHovering] = useState(false);
|
||||
|
||||
|
@ -61,30 +68,30 @@ function NotesContextReferenceCard(props: NotesContextReferenceCardProps) {
|
|||
<Card
|
||||
onMouseEnter={() => setIsHovering(true)}
|
||||
onMouseLeave={() => setIsHovering(false)}
|
||||
className={`${props.showFullContent ? "w-auto" : "w-[200px]"} overflow-hidden break-words text-balance rounded-lg p-2 bg-muted border-none`}
|
||||
className={`${props.showFullContent ? "w-auto" : "w-[200px]"} overflow-hidden break-words text-balance rounded-lg border-none p-2 bg-muted`}
|
||||
>
|
||||
<h3
|
||||
className={`${props.showFullContent ? "block" : "line-clamp-1"} text-muted-foreground}`}
|
||||
>
|
||||
{fileIcon}
|
||||
{props.title}
|
||||
{props.showFullContent ? props.title : fileName}
|
||||
</h3>
|
||||
<p
|
||||
className={`${props.showFullContent ? "block" : "overflow-hidden line-clamp-2"}`}
|
||||
className={`text-sm ${props.showFullContent ? "overflow-x-auto block" : "overflow-hidden line-clamp-2"}`}
|
||||
dangerouslySetInnerHTML={{ __html: snippet }}
|
||||
></p>
|
||||
</Card>
|
||||
</PopoverTrigger>
|
||||
<PopoverContent className="w-[400px] mx-2">
|
||||
<Card
|
||||
className={`w-auto overflow-hidden break-words text-balance rounded-lg p-2 border-none`}
|
||||
className={`w-auto overflow-hidden break-words text-balance rounded-lg border-none p-2`}
|
||||
>
|
||||
<h3 className={`line-clamp-2 text-muted-foreground}`}>
|
||||
{fileIcon}
|
||||
{props.title}
|
||||
</h3>
|
||||
<p
|
||||
className={`overflow-hidden line-clamp-3`}
|
||||
className={`border-t mt-1 pt-1 text-sm overflow-hidden line-clamp-5`}
|
||||
dangerouslySetInnerHTML={{ __html: snippet }}
|
||||
></p>
|
||||
</Card>
|
||||
|
@ -97,14 +104,98 @@ function NotesContextReferenceCard(props: NotesContextReferenceCardProps) {
|
|||
interface CodeContextReferenceCardProps {
|
||||
code: string;
|
||||
output: string;
|
||||
output_files: CodeContextFile[];
|
||||
error: string;
|
||||
showFullContent: boolean;
|
||||
}
|
||||
|
||||
function CodeContextReferenceCard(props: CodeContextReferenceCardProps) {
|
||||
const fileIcon = getIconFromFilename(".py", "w-6 h-6 text-muted-foreground inline-flex mr-2");
|
||||
const snippet = DOMPurify.sanitize(props.code);
|
||||
const fileIcon = getIconFromFilename(".py", "!w-4 h-4 text-muted-foreground flex-shrink-0");
|
||||
const sanitizedCodeSnippet = DOMPurify.sanitize(props.code);
|
||||
const [isHovering, setIsHovering] = useState(false);
|
||||
const [isDownloadHover, setIsDownloadHover] = useState(false);
|
||||
|
||||
const handleDownload = (file: CodeContextFile) => {
|
||||
// Determine MIME type
|
||||
let mimeType = "text/plain";
|
||||
let byteString = file.b64_data;
|
||||
if (file.filename.match(/\.(png|jpg|jpeg|webp)$/)) {
|
||||
mimeType = `image/${file.filename.split(".").pop()}`;
|
||||
byteString = atob(file.b64_data);
|
||||
} else if (file.filename.endsWith(".json")) {
|
||||
mimeType = "application/json";
|
||||
} else if (file.filename.endsWith(".csv")) {
|
||||
mimeType = "text/csv";
|
||||
}
|
||||
|
||||
const arrayBuffer = new ArrayBuffer(byteString.length);
|
||||
const bytes = new Uint8Array(arrayBuffer);
|
||||
|
||||
for (let i = 0; i < byteString.length; i++) {
|
||||
bytes[i] = byteString.charCodeAt(i);
|
||||
}
|
||||
|
||||
const blob = new Blob([arrayBuffer], { type: mimeType });
|
||||
const url = URL.createObjectURL(blob);
|
||||
const a = document.createElement("a");
|
||||
a.href = url;
|
||||
a.download = file.filename;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
document.body.removeChild(a);
|
||||
URL.revokeObjectURL(url);
|
||||
};
|
||||
|
||||
const renderOutputFiles = (files: CodeContextFile[], hoverCard: boolean) => {
|
||||
if (files?.length == 0) return null;
|
||||
return (
|
||||
<div
|
||||
className={`${hoverCard || props.showFullContent ? "border-t mt-1 pt-1" : undefined}`}
|
||||
>
|
||||
{files.slice(0, props.showFullContent ? undefined : 1).map((file, index) => {
|
||||
return (
|
||||
<div key={`${file.filename}-${index}`}>
|
||||
<h4 className="text-sm text-muted-foreground flex items-center">
|
||||
<span
|
||||
className={`overflow-hidden mr-2 font-bold ${props.showFullContent ? undefined : "line-clamp-1"}`}
|
||||
>
|
||||
{file.filename}
|
||||
</span>
|
||||
<button
|
||||
className={`${hoverCard ? "hidden" : undefined}`}
|
||||
onClick={(e) => {
|
||||
e.preventDefault();
|
||||
handleDownload(file);
|
||||
}}
|
||||
onMouseEnter={() => setIsDownloadHover(true)}
|
||||
onMouseLeave={() => setIsDownloadHover(false)}
|
||||
title={`Download file: ${file.filename}`}
|
||||
>
|
||||
<ArrowCircleDown
|
||||
className={`w-4 h-4`}
|
||||
weight={isDownloadHover ? "fill" : "regular"}
|
||||
/>
|
||||
</button>
|
||||
</h4>
|
||||
{file.filename.match(/\.(txt|org|md|csv|json)$/) ? (
|
||||
<pre
|
||||
className={`${props.showFullContent ? "block" : "line-clamp-2"} text-sm mt-1 p-1 bg-background rounded overflow-x-auto`}
|
||||
>
|
||||
{file.b64_data}
|
||||
</pre>
|
||||
) : file.filename.match(/\.(png|jpg|jpeg|webp)$/) ? (
|
||||
<img
|
||||
src={`data:image/${file.filename.split(".").pop()};base64,${file.b64_data}`}
|
||||
alt={file.filename}
|
||||
className="mt-1 max-h-32 rounded"
|
||||
/>
|
||||
) : null}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
|
@ -113,30 +204,44 @@ function CodeContextReferenceCard(props: CodeContextReferenceCardProps) {
|
|||
<Card
|
||||
onMouseEnter={() => setIsHovering(true)}
|
||||
onMouseLeave={() => setIsHovering(false)}
|
||||
className={`${props.showFullContent ? "w-auto" : "w-[200px]"} overflow-hidden break-words text-balance rounded-lg p-2 bg-muted border-none`}
|
||||
>
|
||||
<h3
|
||||
className={`${props.showFullContent ? "block" : "line-clamp-1"} text-muted-foreground}`}
|
||||
className={`${props.showFullContent ? "w-auto" : "w-[200px]"} overflow-hidden break-words text-balance rounded-lg border-none p-2 bg-muted`}
|
||||
>
|
||||
<div className="flex flex-col px-1">
|
||||
<div className="flex items-center gap-2">
|
||||
{fileIcon}
|
||||
Code
|
||||
</h3>
|
||||
<p
|
||||
className={`${props.showFullContent ? "block" : "overflow-hidden line-clamp-2"}`}
|
||||
<h3
|
||||
className={`overflow-hidden ${props.showFullContent ? "block" : "line-clamp-1"} text-muted-foreground flex-grow`}
|
||||
>
|
||||
{snippet}
|
||||
</p>
|
||||
code {props.output_files?.length > 0 ? "artifacts" : ""}
|
||||
</h3>
|
||||
</div>
|
||||
<pre
|
||||
className={`text-xs pb-2 ${props.showFullContent ? "block overflow-x-auto" : props.output_files?.length > 0 ? "hidden" : "overflow-hidden line-clamp-3"}`}
|
||||
>
|
||||
{sanitizedCodeSnippet}
|
||||
</pre>
|
||||
{renderOutputFiles(props.output_files, false)}
|
||||
</div>
|
||||
</Card>
|
||||
</PopoverTrigger>
|
||||
<PopoverContent className="w-[400px] mx-2">
|
||||
<Card
|
||||
className={`w-auto overflow-hidden break-words text-balance rounded-lg p-2 border-none`}
|
||||
className={`w-auto overflow-hidden break-words text-balance rounded-lg border-none p-2`}
|
||||
>
|
||||
<h3 className={`line-clamp-2 text-muted-foreground}`}>
|
||||
<div className="flex items-center gap-2">
|
||||
{fileIcon}
|
||||
Code
|
||||
<h3
|
||||
className={`overflow-hidden ${props.showFullContent ? "block" : "line-clamp-1"} text-muted-foreground flex-grow`}
|
||||
>
|
||||
code {props.output_files?.length > 0 ? "artifact" : ""}
|
||||
</h3>
|
||||
<p className={`overflow-hidden line-clamp-3`}>{snippet}</p>
|
||||
</div>
|
||||
{(props.output_files.length > 0 &&
|
||||
renderOutputFiles(props.output_files?.slice(0, 1), true)) || (
|
||||
<pre className="text-xs border-t mt-1 pt-1 verflow-hidden line-clamp-10">
|
||||
{sanitizedCodeSnippet}
|
||||
</pre>
|
||||
)}
|
||||
</Card>
|
||||
</PopoverContent>
|
||||
</Popover>
|
||||
|
@ -144,14 +249,10 @@ function CodeContextReferenceCard(props: CodeContextReferenceCardProps) {
|
|||
);
|
||||
}
|
||||
|
||||
export interface ReferencePanelData {
|
||||
notesReferenceCardData: NotesContextReferenceData[];
|
||||
onlineReferenceCardData: OnlineReferenceData[];
|
||||
}
|
||||
|
||||
export interface CodeReferenceData {
|
||||
code: string;
|
||||
output: string;
|
||||
output_files: CodeContextFile[];
|
||||
error: string;
|
||||
}
|
||||
|
||||
|
@ -197,21 +298,17 @@ function GenericOnlineReferenceCard(props: OnlineReferenceCardProps) {
|
|||
<Card
|
||||
onMouseEnter={handleMouseEnter}
|
||||
onMouseLeave={handleMouseLeave}
|
||||
className={`${props.showFullContent ? "w-auto" : "w-[200px]"} overflow-hidden break-words rounded-lg text-balance p-2 bg-muted border-none`}
|
||||
className={`${props.showFullContent ? "w-auto" : "w-[200px]"} overflow-hidden break-words text-balance rounded-lg border-none p-2 bg-muted`}
|
||||
>
|
||||
<div className="flex flex-col">
|
||||
<a
|
||||
href={props.link}
|
||||
target="_blank"
|
||||
rel="noreferrer"
|
||||
className="!no-underline p-2"
|
||||
className="!no-underline px-1"
|
||||
>
|
||||
<div className="flex items-center gap-2">
|
||||
<img
|
||||
src={favicon}
|
||||
alt=""
|
||||
className="!w-4 h-4 mr-2 flex-shrink-0"
|
||||
/>
|
||||
<img src={favicon} alt="" className="!w-4 h-4 flex-shrink-0" />
|
||||
<h3
|
||||
className={`overflow-hidden ${props.showFullContent ? "block" : "line-clamp-1"} text-muted-foreground flex-grow`}
|
||||
>
|
||||
|
@ -224,7 +321,7 @@ function GenericOnlineReferenceCard(props: OnlineReferenceCardProps) {
|
|||
{props.title}
|
||||
</h3>
|
||||
<p
|
||||
className={`overflow-hidden ${props.showFullContent ? "block" : "line-clamp-2"}`}
|
||||
className={`overflow-hidden text-sm ${props.showFullContent ? "block" : "line-clamp-2"}`}
|
||||
>
|
||||
{props.description}
|
||||
</p>
|
||||
|
@ -241,23 +338,23 @@ function GenericOnlineReferenceCard(props: OnlineReferenceCardProps) {
|
|||
href={props.link}
|
||||
target="_blank"
|
||||
rel="noreferrer"
|
||||
className="!no-underline p-2"
|
||||
className="!no-underline px-1"
|
||||
>
|
||||
<div className="flex items-center">
|
||||
<img src={favicon} alt="" className="!w-4 h-4 mr-2" />
|
||||
<div className="flex items-center gap-2">
|
||||
<img src={favicon} alt="" className="!w-4 h-4 flex-shrink-0" />
|
||||
<h3
|
||||
className={`overflow-hidden ${props.showFullContent ? "block" : "line-clamp-2"} text-muted-foreground`}
|
||||
className={`overflow-hidden ${props.showFullContent ? "block" : "line-clamp-2"} text-muted-foreground flex-grow`}
|
||||
>
|
||||
{domain}
|
||||
</h3>
|
||||
</div>
|
||||
<h3
|
||||
className={`overflow-hidden ${props.showFullContent ? "block" : "line-clamp-2"} font-bold`}
|
||||
className={`border-t mt-1 pt-1 overflow-hidden ${props.showFullContent ? "block" : "line-clamp-2"} font-bold`}
|
||||
>
|
||||
{props.title}
|
||||
</h3>
|
||||
<p
|
||||
className={`overflow-hidden ${props.showFullContent ? "block" : "line-clamp-3"}`}
|
||||
className={`overflow-hidden text-sm ${props.showFullContent ? "block" : "line-clamp-5"}`}
|
||||
>
|
||||
{props.description}
|
||||
</p>
|
||||
|
@ -287,6 +384,7 @@ export function constructAllReferences(
|
|||
codeReferences.push({
|
||||
code: value.code,
|
||||
output: value.results.std_out,
|
||||
output_files: value.results.output_files,
|
||||
error: value.results.std_err,
|
||||
});
|
||||
}
|
||||
|
@ -390,10 +488,10 @@ export function TeaserReferencesSection(props: TeaserReferenceSectionProps) {
|
|||
setNumTeaserSlots(props.isMobileWidth ? 1 : 3);
|
||||
}, [props.isMobileWidth]);
|
||||
|
||||
const notesDataToShow = props.notesReferenceCardData.slice(0, numTeaserSlots);
|
||||
const codeDataToShow = props.codeReferenceCardData.slice(
|
||||
const codeDataToShow = props.codeReferenceCardData.slice(0, numTeaserSlots);
|
||||
const notesDataToShow = props.notesReferenceCardData.slice(
|
||||
0,
|
||||
numTeaserSlots - notesDataToShow.length,
|
||||
numTeaserSlots - codeDataToShow.length,
|
||||
);
|
||||
const onlineDataToShow =
|
||||
notesDataToShow.length + codeDataToShow.length < numTeaserSlots
|
||||
|
@ -424,15 +522,6 @@ export function TeaserReferencesSection(props: TeaserReferenceSectionProps) {
|
|||
<p className="text-gray-400 m-2">{numReferences} sources</p>
|
||||
</h3>
|
||||
<div className={`flex flex-wrap gap-2 w-auto mt-2`}>
|
||||
{notesDataToShow.map((note, index) => {
|
||||
return (
|
||||
<NotesContextReferenceCard
|
||||
showFullContent={false}
|
||||
{...note}
|
||||
key={`${note.title}-${index}`}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
{codeDataToShow.map((code, index) => {
|
||||
return (
|
||||
<CodeContextReferenceCard
|
||||
|
@ -442,6 +531,15 @@ export function TeaserReferencesSection(props: TeaserReferenceSectionProps) {
|
|||
/>
|
||||
);
|
||||
})}
|
||||
{notesDataToShow.map((note, index) => {
|
||||
return (
|
||||
<NotesContextReferenceCard
|
||||
showFullContent={false}
|
||||
{...note}
|
||||
key={`${note.title}-${index}`}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
{onlineDataToShow.map((online, index) => {
|
||||
return (
|
||||
<GenericOnlineReferenceCard
|
||||
|
@ -486,6 +584,15 @@ export default function ReferencePanel(props: ReferencePanelDataProps) {
|
|||
<SheetDescription>View all references for this response</SheetDescription>
|
||||
</SheetHeader>
|
||||
<div className="flex flex-wrap gap-2 w-auto mt-2">
|
||||
{props.codeReferenceCardData.map((code, index) => {
|
||||
return (
|
||||
<CodeContextReferenceCard
|
||||
showFullContent={true}
|
||||
{...code}
|
||||
key={`code-${index}`}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
{props.notesReferenceCardData.map((note, index) => {
|
||||
return (
|
||||
<NotesContextReferenceCard
|
||||
|
@ -504,15 +611,6 @@ export default function ReferencePanel(props: ReferencePanelDataProps) {
|
|||
/>
|
||||
);
|
||||
})}
|
||||
{props.codeReferenceCardData.map((code, index) => {
|
||||
return (
|
||||
<CodeContextReferenceCard
|
||||
showFullContent={true}
|
||||
{...code}
|
||||
key={`code-${index}`}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</SheetContent>
|
||||
</Sheet>
|
||||
|
|
|
@ -870,25 +870,40 @@ Khoj:
|
|||
# --
|
||||
python_code_generation_prompt = PromptTemplate.from_template(
|
||||
"""
|
||||
You are Khoj, an advanced python programmer. You are tasked with constructing **up to three** python programs to best answer the user query.
|
||||
You are Khoj, an advanced python programmer. You are tasked with constructing a python program to best answer the user query.
|
||||
- The python program will run in a pyodide python sandbox with no network access.
|
||||
- You can write programs to run complex calculations, analyze data, create charts, generate documents to meticulously answer the query
|
||||
- The sandbox has access to the standard library, matplotlib, panda, numpy, scipy, bs4, sympy, brotli, cryptography, fast-parquet
|
||||
- You can write programs to run complex calculations, analyze data, create charts, generate documents to meticulously answer the query.
|
||||
- The sandbox has access to the standard library, matplotlib, panda, numpy, scipy, bs4, sympy, brotli, cryptography, fast-parquet.
|
||||
- List known file paths to required user documents in "input_files" and known links to required documents from the web in the "input_links" field.
|
||||
- The python program should be self-contained. It can only read data generated by the program itself and from provided input_files, input_links by their basename (i.e filename excluding file path).
|
||||
- Do not try display images or plots in the code directly. The code should save the image or plot to a file instead.
|
||||
- Write any document, charts etc. to be shared with the user to file. These files can be seen by the user.
|
||||
- Use as much context from the previous questions and answers as required to generate your code.
|
||||
{personality_context}
|
||||
What code will you need to write, if any, to answer the user's question?
|
||||
Provide code programs as a list of strings in a JSON object with key "codes".
|
||||
What code will you need to write to answer the user's question?
|
||||
|
||||
Current Date: {current_date}
|
||||
User's Location: {location}
|
||||
{username}
|
||||
|
||||
The JSON schema is of the form {{"codes": ["code1", "code2", "code3"]}}
|
||||
For example:
|
||||
{{"codes": ["print('Hello, World!')", "print('Goodbye, World!')"]}}
|
||||
The response JSON schema is of the form {{"code": "<python_code>", "input_files": ["file_path_1", "file_path_2"], "input_links": ["link_1", "link_2"]}}
|
||||
Examples:
|
||||
---
|
||||
{{
|
||||
"code": "# Input values\\nprincipal = 43235\\nrate = 5.24\\nyears = 5\\n\\n# Convert rate to decimal\\nrate_decimal = rate / 100\\n\\n# Calculate final amount\\nfinal_amount = principal * (1 + rate_decimal) ** years\\n\\n# Calculate interest earned\\ninterest_earned = final_amount - principal\\n\\n# Print results with formatting\\nprint(f"Interest Earned: ${{interest_earned:,.2f}}")\\nprint(f"Final Amount: ${{final_amount:,.2f}}")"
|
||||
}}
|
||||
|
||||
Now it's your turn to construct python programs to answer the user's question. Provide them as a list of strings in a JSON object. Do not say anything else.
|
||||
{{
|
||||
"code": "import re\\n\\n# Read org file\\nfile_path = 'tasks.org'\\nwith open(file_path, 'r') as f:\\n content = f.read()\\n\\n# Get today's date in YYYY-MM-DD format\\ntoday = datetime.now().strftime('%Y-%m-%d')\\npattern = r'\*+\s+.*\\n.*SCHEDULED:\s+<' + today + r'.*>'\\n\\n# Find all matches using multiline mode\\nmatches = re.findall(pattern, content, re.MULTILINE)\\ncount = len(matches)\\n\\n# Display count\\nprint(f'Count of scheduled tasks for today: {{count}}')",
|
||||
"input_files": ["/home/linux/tasks.org"]
|
||||
}}
|
||||
|
||||
{{
|
||||
"code": "import pandas as pd\\nimport matplotlib.pyplot as plt\\n\\n# Load the CSV file\\ndf = pd.read_csv('world_population_by_year.csv')\\n\\n# Plot the data\\nplt.figure(figsize=(10, 6))\\nplt.plot(df['Year'], df['Population'], marker='o')\\n\\n# Add titles and labels\\nplt.title('Population by Year')\\nplt.xlabel('Year')\\nplt.ylabel('Population')\\n\\n# Save the plot to a file\\nplt.savefig('population_by_year_plot.png')",
|
||||
"input_links": ["https://population.un.org/world_population_by_year.csv"]
|
||||
}}
|
||||
|
||||
Now it's your turn to construct a python program to answer the user's question. Provide the code, required input files and input links in a JSON object. Do not say anything else.
|
||||
Context:
|
||||
---
|
||||
{context}
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
import asyncio
|
||||
import base64
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
from typing import Any, Callable, List, Optional
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, List, NamedTuple, Optional
|
||||
|
||||
import aiohttp
|
||||
|
||||
from khoj.database.adapters import ais_user_subscribed
|
||||
from khoj.database.models import Agent, KhojUser
|
||||
from khoj.database.adapters import FileObjectAdapters
|
||||
from khoj.database.models import Agent, FileObject, KhojUser
|
||||
from khoj.processor.conversation import prompts
|
||||
from khoj.processor.conversation.utils import (
|
||||
ChatEvent,
|
||||
|
@ -17,7 +19,7 @@ from khoj.processor.conversation.utils import (
|
|||
construct_chat_history,
|
||||
)
|
||||
from khoj.routers.helpers import send_message_to_model_wrapper
|
||||
from khoj.utils.helpers import timer
|
||||
from khoj.utils.helpers import is_none_or_empty, timer
|
||||
from khoj.utils.rawconfig import LocationData
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -26,6 +28,12 @@ logger = logging.getLogger(__name__)
|
|||
SANDBOX_URL = os.getenv("KHOJ_TERRARIUM_URL", "http://localhost:8080")
|
||||
|
||||
|
||||
class GeneratedCode(NamedTuple):
|
||||
code: str
|
||||
input_files: List[str]
|
||||
input_links: List[str]
|
||||
|
||||
|
||||
async def run_code(
|
||||
query: str,
|
||||
conversation_history: dict,
|
||||
|
@ -41,11 +49,11 @@ async def run_code(
|
|||
):
|
||||
# Generate Code
|
||||
if send_status_func:
|
||||
async for event in send_status_func(f"**Generate code snippets** for {query}"):
|
||||
async for event in send_status_func(f"**Generate code snippet** for {query}"):
|
||||
yield {ChatEvent.STATUS: event}
|
||||
try:
|
||||
with timer("Chat actor: Generate programs to execute", logger):
|
||||
codes = await generate_python_code(
|
||||
generated_code = await generate_python_code(
|
||||
query,
|
||||
conversation_history,
|
||||
context,
|
||||
|
@ -59,15 +67,26 @@ async def run_code(
|
|||
except Exception as e:
|
||||
raise ValueError(f"Failed to generate code for {query} with error: {e}")
|
||||
|
||||
# Prepare Input Data
|
||||
input_data = []
|
||||
user_input_files: List[FileObject] = []
|
||||
for input_file in generated_code.input_files:
|
||||
user_input_files += await FileObjectAdapters.aget_file_objects_by_name(user, input_file)
|
||||
for f in user_input_files:
|
||||
input_data.append(
|
||||
{
|
||||
"filename": os.path.basename(f.file_name),
|
||||
"b64_data": base64.b64encode(f.raw_text.encode("utf-8")).decode("utf-8"),
|
||||
}
|
||||
)
|
||||
|
||||
# Run Code
|
||||
if send_status_func:
|
||||
async for event in send_status_func(f"**Running {len(codes)} code snippets**"):
|
||||
async for event in send_status_func(f"**Running code snippet**"):
|
||||
yield {ChatEvent.STATUS: event}
|
||||
try:
|
||||
tasks = [execute_sandboxed_python(code, sandbox_url) for code in codes]
|
||||
with timer("Chat actor: Execute generated programs", logger):
|
||||
results = await asyncio.gather(*tasks)
|
||||
for result in results:
|
||||
with timer("Chat actor: Execute generated program", logger, log_level=logging.INFO):
|
||||
result = await execute_sandboxed_python(generated_code.code, input_data, sandbox_url)
|
||||
code = result.pop("code")
|
||||
logger.info(f"Executed Code:\n--@@--\n{code}\n--@@--Result:\n--@@--\n{result}\n--@@--")
|
||||
yield {query: {"code": code, "results": result}}
|
||||
|
@ -81,14 +100,13 @@ async def generate_python_code(
|
|||
context: str,
|
||||
location_data: LocationData,
|
||||
user: KhojUser,
|
||||
query_images: List[str] = None,
|
||||
query_images: list[str] = None,
|
||||
agent: Agent = None,
|
||||
tracer: dict = {},
|
||||
query_files: str = None,
|
||||
) -> List[str]:
|
||||
) -> GeneratedCode:
|
||||
location = f"{location_data}" if location_data else "Unknown"
|
||||
username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else ""
|
||||
subscribed = await ais_user_subscribed(user)
|
||||
chat_history = construct_chat_history(conversation_history)
|
||||
|
||||
utc_date = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d")
|
||||
|
@ -118,27 +136,39 @@ async def generate_python_code(
|
|||
# Validate that the response is a non-empty, JSON-serializable list
|
||||
response = clean_json(response)
|
||||
response = json.loads(response)
|
||||
codes = [code.strip() for code in response["codes"] if code.strip()]
|
||||
code = response.get("code", "").strip()
|
||||
input_files = response.get("input_files", [])
|
||||
input_links = response.get("input_links", [])
|
||||
|
||||
if not isinstance(codes, list) or not codes or len(codes) == 0:
|
||||
if not isinstance(code, str) or is_none_or_empty(code):
|
||||
raise ValueError
|
||||
return codes
|
||||
return GeneratedCode(code, input_files, input_links)
|
||||
|
||||
|
||||
async def execute_sandboxed_python(code: str, sandbox_url: str = SANDBOX_URL) -> dict[str, Any]:
|
||||
async def execute_sandboxed_python(code: str, input_data: list[dict], sandbox_url: str = SANDBOX_URL) -> dict[str, Any]:
|
||||
"""
|
||||
Takes code to run as a string and calls the terrarium API to execute it.
|
||||
Returns the result of the code execution as a dictionary.
|
||||
|
||||
Reference data i/o format based on Terrarium example client code at:
|
||||
https://github.com/cohere-ai/cohere-terrarium/blob/main/example-clients/python/terrarium_client.py
|
||||
"""
|
||||
headers = {"Content-Type": "application/json"}
|
||||
cleaned_code = clean_code_python(code)
|
||||
data = {"code": cleaned_code}
|
||||
data = {"code": cleaned_code, "files": input_data}
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(sandbox_url, json=data, headers=headers) as response:
|
||||
if response.status == 200:
|
||||
result: dict[str, Any] = await response.json()
|
||||
result["code"] = cleaned_code
|
||||
# Store decoded output files
|
||||
for output_file in result.get("output_files", []):
|
||||
# Decode text files as UTF-8
|
||||
if mimetypes.guess_type(output_file["filename"])[0].startswith("text/") or Path(
|
||||
output_file["filename"]
|
||||
).suffix in [".org", ".md", ".json"]:
|
||||
output_file["b64_data"] = base64.b64decode(output_file["b64_data"]).decode("utf-8")
|
||||
return result
|
||||
else:
|
||||
return {
|
||||
|
|
|
@ -1131,6 +1131,7 @@ async def chat(
|
|||
conversation_id=conversation_id,
|
||||
compiled_references=compiled_references,
|
||||
online_results=online_results,
|
||||
code_results=code_results,
|
||||
query_images=uploaded_images,
|
||||
train_of_thought=train_of_thought,
|
||||
raw_query_files=raw_query_files,
|
||||
|
@ -1191,6 +1192,7 @@ async def chat(
|
|||
conversation_id=conversation_id,
|
||||
compiled_references=compiled_references,
|
||||
online_results=online_results,
|
||||
code_results=code_results,
|
||||
query_images=uploaded_images,
|
||||
train_of_thought=train_of_thought,
|
||||
raw_query_files=raw_query_files,
|
||||
|
|
Loading…
Reference in a new issue