Create Settings Page in new Web App (#872)

- Details
  - Add Profile Client, Content Sections
  - Make Multi Step Cards for Whatsapp, Files, Notion Integrations
  - Align Settings page with new Baraabar UX
This commit is contained in:
Debanjum 2024-07-30 06:59:42 -07:00 committed by GitHub
commit 60870a7a3e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 1422 additions and 9621 deletions

View file

@ -11,15 +11,69 @@ export interface UserProfile {
detail: string;
}
const userFetcher = () => window.fetch('/api/v1/user').then(res => res.json()).catch(err => console.log(err));
const fetcher = (url: string) =>
window.fetch(url)
.then(res => res.json())
.catch(err => console.warn(err));
export function useAuthenticatedData() {
const { data, error } = useSWR<UserProfile>('/api/v1/user', fetcher, { revalidateOnFocus: false });
const { data, error } = useSWR<UserProfile>('/api/v1/user', userFetcher, { revalidateOnFocus: false });
if (error) return null;
if (!data) return null;
if (data.detail === 'Forbidden') return null;
if (error || !data || data.detail === 'Forbidden') return null;
return data;
}
export interface ModelOptions {
id: number;
name: string;
}
export interface SyncedContent {
computer: boolean;
github: boolean;
notion: boolean;
}
export interface UserConfig {
// user info
username: string;
user_photo: string | null;
is_active: boolean;
given_name: string;
phone_number: string;
is_phone_number_verified: boolean;
// user content settings
enabled_content_source: SyncedContent;
has_documents: boolean;
notion_token: string | null;
// user model settings
search_model_options: ModelOptions[];
selected_search_model_config: number;
chat_model_options: ModelOptions[];
selected_chat_model_config: number;
paint_model_options: ModelOptions[];
selected_paint_model_config: number;
voice_model_options: ModelOptions[];
selected_voice_model_config: number;
// user billing info
subscription_state: string;
subscription_renewal_date: string;
// server settings
khoj_cloud_subscription_url: string | undefined;
billing_enabled: boolean;
is_eleven_labs_enabled: boolean;
is_twilio_enabled: boolean;
khoj_version: string;
anonymous_mode: boolean;
notion_oauth_url: string;
detail: string;
}
export function useUserConfig(detailed: boolean = false) {
const url = `/api/settings?detailed=${detailed}`;
const { data, error } = useSWR<UserConfig>(url, fetcher, { revalidateOnFocus: false });
if (error || !data || data.detail === 'Forbidden') return null;
return data;
}

View file

@ -13,6 +13,8 @@ export interface LocationData {
const locationFetcher = () => window.fetch("https://ipapi.co/json").then((res) => res.json()).catch((err) => console.log(err));
export const toTitleCase = (str: string) => str.replace(/\w\S*/g, (txt) => txt.charAt(0).toUpperCase() + txt.slice(1).toLowerCase());
export function welcomeConsole() {
console.log(`%c %s`, "font-family:monospace", `
__ __ __ __ ______ __ _____ __

View file

@ -2,7 +2,7 @@
import './globals.css';
import styles from './page.module.css';
import React, { Suspense, useEffect, useState, useMemo } from 'react';
import React, { useEffect, useState } from 'react';
import SuggestionCard from './components/suggestions/suggestionCard';
import SidePanel from './components/sidePanel/chatHistorySidePanel';
@ -16,7 +16,7 @@ import 'katex/dist/katex.min.css';
import ChatInputArea, { ChatOptions } from './components/chatInputArea/chatInputArea';
import { useAuthenticatedData } from './common/auth';
import { Card, CardTitle } from '@/components/ui/card';
import { converColorToBgGradient, colorMap, convertColorToBorderClass } from './common/colorUtils';
import { convertColorToBorderClass } from './common/colorUtils';
import { getIconFromIconName } from './common/iconUtils';
import { ClockCounterClockwise } from '@phosphor-icons/react';
import { AgentData } from './agents/page';
@ -150,10 +150,6 @@ function ChatBodyData(props: ChatBodyDataProps) {
}
}
function getTailwindBorderClass(color: string): string {
return colorMap[color] || 'border-black'; // Default to black if color not found
}
return (
<div className={`${styles.chatBoxBody}`}>
<div className="w-full text-center">
@ -186,7 +182,7 @@ function ChatBodyData(props: ChatBodyDataProps) {
<div className={`ml-auto mr-auto ${props.isMobileWidth ? 'w-full' : 'w-fit'}`}>
{
!props.isMobileWidth &&
<div className={`w-full ${styles.inputBox} shadow-lg bg-background align-middle items-center justify-center p-3 dark:bg-neutral-700 border-stone-100 dark:border-none dark:shadow-none`}>
<div className={`w-full ${styles.inputBox} shadow-lg bg-background align-middle items-center justify-center px-3 py-1 dark:bg-neutral-700 border-stone-100 dark:border-none dark:shadow-none`}>
<ChatInputArea
isLoggedIn={props.isLoggedIn}
sendMessage={(message) => setMessage(message)}

View file

@ -0,0 +1,38 @@
import type { Metadata } from "next";
import { Noto_Sans } from "next/font/google";
import "../globals.css";
import { Toaster } from "@/components/ui/toaster";
const inter = Noto_Sans({ subsets: ["latin"] });
export const metadata: Metadata = {
title: "Khoj AI - Settings",
description: "Configure Khoj to get personalized, deeper assistance.",
icons: {
icon: "/static/favicon.ico",
},
};
export default function RootLayout({
children,
}: Readonly<{
children: React.ReactNode;
}>) {
return (
<html lang="en">
<meta httpEquiv="Content-Security-Policy"
content="default-src 'self' https://assets.khoj.dev;
script-src 'self' https://assets.khoj.dev 'unsafe-inline' 'unsafe-eval';
connect-src 'self' https://ipapi.co/json ws://localhost:42110;
style-src 'self' https://assets.khoj.dev 'unsafe-inline' https://fonts.googleapis.com;
img-src 'self' data: https://*.khoj.dev https://*.googleusercontent.com;
font-src 'self' https://assets.khoj.dev https://fonts.gstatic.com;
child-src 'none';
object-src 'none';"></meta>
<body className={inter.className}>
{children}
<Toaster />
</body>
</html>
);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,11 @@
div.page {
display: grid;
grid-template-columns: auto 1fr;
gap: 1rem;
height: 100vh;
color: hsla(var(--foreground));
}
div.contentBody {
display: grid;
margin: auto;
}

View file

@ -0,0 +1,71 @@
"use client"
import * as React from "react"
import { OTPInput, OTPInputContext } from "input-otp"
import { Dot } from "lucide-react"
import { cn } from "@/lib/utils"
const InputOTP = React.forwardRef<
React.ElementRef<typeof OTPInput>,
React.ComponentPropsWithoutRef<typeof OTPInput>
>(({ className, containerClassName, ...props }, ref) => (
<OTPInput
ref={ref}
containerClassName={cn(
"flex items-center gap-2 has-[:disabled]:opacity-50",
containerClassName
)}
className={cn("disabled:cursor-not-allowed", className)}
{...props}
/>
))
InputOTP.displayName = "InputOTP"
const InputOTPGroup = React.forwardRef<
React.ElementRef<"div">,
React.ComponentPropsWithoutRef<"div">
>(({ className, ...props }, ref) => (
<div ref={ref} className={cn("flex items-center", className)} {...props} />
))
InputOTPGroup.displayName = "InputOTPGroup"
const InputOTPSlot = React.forwardRef<
React.ElementRef<"div">,
React.ComponentPropsWithoutRef<"div"> & { index: number }
>(({ index, className, ...props }, ref) => {
const inputOTPContext = React.useContext(OTPInputContext)
const { char, hasFakeCaret, isActive } = inputOTPContext.slots[index]
return (
<div
ref={ref}
className={cn(
"relative flex h-10 w-10 items-center justify-center border-y border-r border-input text-sm transition-all first:rounded-l-md first:border-l last:rounded-r-md",
isActive && "z-10 ring-2 ring-ring ring-offset-background",
className
)}
{...props}
>
{char}
{hasFakeCaret && (
<div className="pointer-events-none absolute inset-0 flex items-center justify-center">
<div className="h-4 w-px animate-caret-blink bg-foreground duration-1000" />
</div>
)}
</div>
)
})
InputOTPSlot.displayName = "InputOTPSlot"
const InputOTPSeparator = React.forwardRef<
React.ElementRef<"div">,
React.ComponentPropsWithoutRef<"div">
>(({ ...props }, ref) => (
<div ref={ref} role="separator" {...props}>
<Dot />
</div>
))
InputOTPSeparator.displayName = "InputOTPSeparator"
export { InputOTP, InputOTPGroup, InputOTPSlot, InputOTPSeparator }

View file

@ -0,0 +1,117 @@
import * as React from "react"
import { cn } from "@/lib/utils"
const Table = React.forwardRef<
HTMLTableElement,
React.HTMLAttributes<HTMLTableElement>
>(({ className, ...props }, ref) => (
<div className="relative w-full overflow-auto">
<table
ref={ref}
className={cn("w-full caption-bottom text-sm", className)}
{...props}
/>
</div>
))
Table.displayName = "Table"
const TableHeader = React.forwardRef<
HTMLTableSectionElement,
React.HTMLAttributes<HTMLTableSectionElement>
>(({ className, ...props }, ref) => (
<thead ref={ref} className={cn("[&_tr]:border-b", className)} {...props} />
))
TableHeader.displayName = "TableHeader"
const TableBody = React.forwardRef<
HTMLTableSectionElement,
React.HTMLAttributes<HTMLTableSectionElement>
>(({ className, ...props }, ref) => (
<tbody
ref={ref}
className={cn("[&_tr:last-child]:border-0", className)}
{...props}
/>
))
TableBody.displayName = "TableBody"
const TableFooter = React.forwardRef<
HTMLTableSectionElement,
React.HTMLAttributes<HTMLTableSectionElement>
>(({ className, ...props }, ref) => (
<tfoot
ref={ref}
className={cn(
"border-t bg-muted/50 font-medium [&>tr]:last:border-b-0",
className
)}
{...props}
/>
))
TableFooter.displayName = "TableFooter"
const TableRow = React.forwardRef<
HTMLTableRowElement,
React.HTMLAttributes<HTMLTableRowElement>
>(({ className, ...props }, ref) => (
<tr
ref={ref}
className={cn(
"border-b transition-colors hover:bg-muted/50 data-[state=selected]:bg-muted",
className
)}
{...props}
/>
))
TableRow.displayName = "TableRow"
const TableHead = React.forwardRef<
HTMLTableCellElement,
React.ThHTMLAttributes<HTMLTableCellElement>
>(({ className, ...props }, ref) => (
<th
ref={ref}
className={cn(
"h-12 px-4 text-left align-middle font-medium text-muted-foreground [&:has([role=checkbox])]:pr-0",
className
)}
{...props}
/>
))
TableHead.displayName = "TableHead"
const TableCell = React.forwardRef<
HTMLTableCellElement,
React.TdHTMLAttributes<HTMLTableCellElement>
>(({ className, ...props }, ref) => (
<td
ref={ref}
className={cn("p-4 align-middle [&:has([role=checkbox])]:pr-0", className)}
{...props}
/>
))
TableCell.displayName = "TableCell"
const TableCaption = React.forwardRef<
HTMLTableCaptionElement,
React.HTMLAttributes<HTMLTableCaptionElement>
>(({ className, ...props }, ref) => (
<caption
ref={ref}
className={cn("mt-4 text-sm text-muted-foreground", className)}
{...props}
/>
))
TableCaption.displayName = "TableCaption"
export {
Table,
TableHeader,
TableBody,
TableFooter,
TableHead,
TableRow,
TableCell,
TableCaption,
}

View file

@ -10,6 +10,10 @@ const nextConfig = {
source: '/api/:path*',
destination: 'http://localhost:42110/api/:path*',
},
{
source: '/auth/:path*',
destination: 'http://localhost:42110/auth/:path*',
},
];
},
trailingSlash: true,

File diff suppressed because it is too large Load diff

View file

@ -45,7 +45,9 @@
"dompurify": "^3.1.6",
"eslint": "^8",
"eslint-config-next": "14.2.3",
"input-otp": "^1.2.4",
"katex": "^0.16.10",
"libphonenumber-js": "^1.11.4",
"lucide-react": "^0.397.0",
"markdown-it": "^14.1.0",
"markdown-it-highlightjs": "^4.1.0",

View file

@ -89,10 +89,15 @@ const config = {
from: { height: "var(--radix-accordion-content-height)" },
to: { height: "0" },
},
"caret-blink": {
"0%,70%,100%": { opacity: "1" },
"20%,50%": { opacity: "0" },
},
},
animation: {
"accordion-down": "accordion-down 0.2s ease-out",
"accordion-up": "accordion-up 0.2s ease-out",
"caret-blink": "caret-blink 1.25s ease-out infinite",
},
},
},

View file

@ -2968,6 +2968,11 @@ inherits@2, inherits@^2.0.3, inherits@^2.0.4:
resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
input-otp@^1.2.4:
version "1.2.4"
resolved "https://registry.yarnpkg.com/input-otp/-/input-otp-1.2.4.tgz#9834af8675ac72c7f1b7c010f181b3b4ffdd0f72"
integrity sha512-md6rhmD+zmMnUh5crQNSQxq3keBRYvE3odbr4Qb9g2NWzQv9azi+t1a3X4TBTbh98fsGHgEEJlzbe1q860uGCA==
internal-slot@^1.0.4, internal-slot@^1.0.7:
version "1.0.7"
resolved "https://registry.yarnpkg.com/internal-slot/-/internal-slot-1.0.7.tgz#c06dcca3ed874249881007b0a5523b172a190802"
@ -3357,6 +3362,11 @@ levn@^0.4.1:
prelude-ls "^1.2.1"
type-check "~0.4.0"
libphonenumber-js@^1.11.4:
version "1.11.4"
resolved "https://registry.yarnpkg.com/libphonenumber-js/-/libphonenumber-js-1.11.4.tgz#e63fe553f45661b30bb10bb8c82c9cf2b22ec32a"
integrity sha512-F/R50HQuWWYcmU/esP5jrH5LiWYaN7DpN0a/99U8+mnGGtnx8kmRE+649dQh3v+CowXXZc8vpkf5AmYkO0AQ7Q==
lilconfig@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/lilconfig/-/lilconfig-2.1.0.tgz#78e23ac89ebb7e1bfbf25b18043de756548e7f52"

View file

@ -1106,6 +1106,16 @@ class EntryAdapters:
async def adelete_entry_by_file(user: KhojUser, file_path: str):
return await Entry.objects.filter(user=user, file_path=file_path).adelete()
@staticmethod
async def adelete_entries_by_filenames(user: KhojUser, filenames: List[str], batch_size=1000):
deleted_count = 0
for i in range(0, len(filenames), batch_size):
batch = filenames[i : i + batch_size]
count, _ = await Entry.objects.filter(user=user, file_path__in=batch).adelete()
deleted_count += count
return deleted_count
@staticmethod
def get_all_filenames_by_source(user: KhojUser, file_source: str):
return (

View file

@ -235,7 +235,7 @@
<div class="card-description-row">
<select id="voice-models">
{% for option in voice_model_options %}
<option value="{{ option.id }}" {% if option.id == selected_voice_config %}selected{% endif %}>{{ option.name }}</option>
<option value="{{ option.id }}" {% if option.id == selected_voice_model_config %}selected{% endif %}>{{ option.name }}</option>
{% endfor %}
</select>
</div>

View file

@ -81,7 +81,7 @@ class IndexerInput(BaseModel):
@requires(["authenticated"])
async def put_content(
request: Request,
files: list[UploadFile],
files: List[UploadFile] = [],
t: Optional[Union[state.SearchType, str]] = state.SearchType.All,
client: Optional[str] = None,
user_agent: Optional[str] = Header(None),
@ -90,7 +90,7 @@ async def put_content(
indexed_data_limiter: ApiIndexedDataLimiter = Depends(
ApiIndexedDataLimiter(
incoming_entries_size_limit=10,
subscribed_incoming_entries_size_limit=25,
subscribed_incoming_entries_size_limit=75,
total_entries_size_limit=10,
subscribed_total_entries_size_limit=100,
)
@ -103,7 +103,7 @@ async def put_content(
@requires(["authenticated"])
async def patch_content(
request: Request,
files: list[UploadFile],
files: List[UploadFile] = [],
t: Optional[Union[state.SearchType, str]] = state.SearchType.All,
client: Optional[str] = None,
user_agent: Optional[str] = Header(None),
@ -112,7 +112,7 @@ async def patch_content(
indexed_data_limiter: ApiIndexedDataLimiter = Depends(
ApiIndexedDataLimiter(
incoming_entries_size_limit=10,
subscribed_incoming_entries_size_limit=25,
subscribed_incoming_entries_size_limit=75,
total_entries_size_limit=10,
subscribed_total_entries_size_limit=100,
)
@ -236,37 +236,9 @@ async def set_content_notion(
return {"status": "ok"}
@api_content.delete("/{content_source}", status_code=200)
@requires(["authenticated"])
async def delete_content_source(
request: Request,
content_source: str,
client: Optional[str] = None,
):
user = request.user.object
update_telemetry_state(
request=request,
telemetry_type="api",
api="delete_content_config",
client=client,
metadata={"content_source": content_source},
)
content_object = map_config_to_object(content_source)
if content_object is None:
raise ValueError(f"Invalid content source: {content_source}")
elif content_object != "Computer":
await content_object.objects.filter(user=user).adelete()
await sync_to_async(EntryAdapters.delete_all_entries)(user, file_source=content_source)
enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
return {"status": "ok"}
@api_content.delete("/file", status_code=201)
@requires(["authenticated"])
async def delete_content_file(
async def delete_content_files(
request: Request,
filename: str,
client: Optional[str] = None,
@ -285,6 +257,31 @@ async def delete_content_file(
return {"status": "ok"}
class DeleteFilesRequest(BaseModel):
files: List[str]
@api_content.delete("/files", status_code=201)
@requires(["authenticated"])
async def delete_content_file(
request: Request,
files: DeleteFilesRequest,
client: Optional[str] = None,
):
user = request.user.object
update_telemetry_state(
request=request,
telemetry_type="api",
api="delete_file",
client=client,
)
deleted_count = await EntryAdapters.adelete_entries_by_filenames(user, files.files)
return {"status": "ok", "deleted_count": deleted_count}
@api_content.get("/size", response_model=Dict[str, int])
@requires(["authenticated"])
async def get_content_size(request: Request, common: CommonQueryParams, client: Optional[str] = None):
@ -331,6 +328,39 @@ async def get_content_source(
return await sync_to_async(list)(EntryAdapters.get_all_filenames_by_source(user, content_source)) # type: ignore[call-arg]
@api_content.delete("/{content_source}", status_code=200)
@requires(["authenticated"])
async def delete_content_source(
request: Request,
content_source: str,
client: Optional[str] = None,
):
user = request.user.object
content_object = map_config_to_object(content_source)
if content_object is None:
raise ValueError(f"Invalid content source: {content_source}")
elif content_object != "Computer":
await content_object.objects.filter(user=user).adelete()
await sync_to_async(EntryAdapters.delete_all_entries)(user, file_source=content_source)
if content_source == DbEntry.EntrySource.NOTION:
await NotionConfig.objects.filter(user=user).adelete()
elif content_source == DbEntry.EntrySource.GITHUB:
await GithubConfig.objects.filter(user=user).adelete()
update_telemetry_state(
request=request,
telemetry_type="api",
api="delete_content_config",
client=client,
metadata={"content_source": content_source},
)
enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
return {"status": "ok"}
async def indexer(
request: Request,
files: list[UploadFile],

View file

@ -46,6 +46,7 @@ from khoj.database.adapters import (
create_khoj_token,
get_khoj_tokens,
get_user_name,
get_user_notion_config,
get_user_subscription_state,
run_with_process_lock,
)
@ -987,14 +988,15 @@ class ApiIndexedDataLimiter:
self.total_entries_size_limit = total_entries_size_limit
self.subscribed_total_entries_size = subscribed_total_entries_size_limit
def __call__(self, request: Request, files: List[UploadFile]):
def __call__(self, request: Request, files: List[UploadFile] = None):
if state.billing_enabled is False:
return
subscribed = has_required_scope(request, ["premium"])
incoming_data_size_mb = 0.0
deletion_file_names = set()
if not request.user.is_authenticated:
if not request.user.is_authenticated or not files:
return
user: KhojUser = request.user.object
@ -1254,6 +1256,10 @@ def get_user_config(user: KhojUser, request: Request, is_detailed: bool = False)
"notion": ("notion" in enabled_content_sources_set),
}
notion_oauth_url = get_notion_auth_url(user)
current_notion_config = get_user_notion_config(user)
notion_token = current_notion_config.token if current_notion_config else ""
selected_chat_model_config = ConversationAdapters.get_conversation_config(user)
chat_models = ConversationAdapters.get_conversation_processor_options().all()
chat_model_options = list()
@ -1273,10 +1279,6 @@ def get_user_config(user: KhojUser, request: Request, is_detailed: bool = False)
for paint_model in paint_model_options:
all_paint_model_options.append({"name": paint_model.model_name, "id": paint_model.id})
notion_oauth_url = get_notion_auth_url(user)
eleven_labs_enabled = is_eleven_labs_enabled()
voice_models = ConversationAdapters.get_voice_model_options()
voice_model_options = list()
for voice_model in voice_models:
@ -1284,8 +1286,10 @@ def get_user_config(user: KhojUser, request: Request, is_detailed: bool = False)
if len(voice_model_options) == 0:
eleven_labs_enabled = False
else:
eleven_labs_enabled = is_eleven_labs_enabled()
selected_voice_config = ConversationAdapters.get_voice_model_config(user)
selected_voice_model_config = ConversationAdapters.get_voice_model_config(user)
return {
"request": request,
@ -1296,9 +1300,11 @@ def get_user_config(user: KhojUser, request: Request, is_detailed: bool = False)
"given_name": given_name,
"phone_number": user.phone_number,
"is_phone_number_verified": user.verified_phone_number,
# user content, model settings
# user content settings
"enabled_content_source": enabled_content_sources,
"has_documents": has_documents,
"notion_token": notion_token,
# user model settings
"search_model_options": all_search_model_options,
"selected_search_model_config": current_search_model_option.id,
"chat_model_options": chat_model_options,
@ -1306,7 +1312,7 @@ def get_user_config(user: KhojUser, request: Request, is_detailed: bool = False)
"paint_model_options": all_paint_model_options,
"selected_paint_model_config": selected_paint_model_config.id if selected_paint_model_config else None,
"voice_model_options": voice_model_options,
"selected_voice_config": selected_voice_config.model_id if selected_voice_config else None,
"selected_voice_model_config": selected_voice_model_config.model_id if selected_voice_model_config else None,
# user billing info
"subscription_state": user_subscription_state,
"subscription_renewal_date": subscription_renewal_date,

View file

@ -1,349 +0,0 @@
import asyncio
import logging
from typing import Dict, Optional, Union
from fastapi import APIRouter, Depends, Header, Request, Response, UploadFile
from pydantic import BaseModel
from starlette.authentication import requires
from khoj.database.models import GithubConfig, KhojUser, NotionConfig
from khoj.processor.content.docx.docx_to_entries import DocxToEntries
from khoj.processor.content.github.github_to_entries import GithubToEntries
from khoj.processor.content.images.image_to_entries import ImageToEntries
from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
from khoj.processor.content.notion.notion_to_entries import NotionToEntries
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
from khoj.routers.helpers import ApiIndexedDataLimiter, update_telemetry_state
from khoj.search_type import text_search
from khoj.utils import constants, state
from khoj.utils.config import SearchModels
from khoj.utils.helpers import LRU, get_file_type
from khoj.utils.rawconfig import ContentConfig, FullConfig, SearchConfig
from khoj.utils.yaml import save_config_to_file_updated_state
logger = logging.getLogger(__name__)
indexer = APIRouter()
class File(BaseModel):
path: str
content: Union[str, bytes]
class IndexBatchRequest(BaseModel):
files: list[File]
class IndexerInput(BaseModel):
org: Optional[dict[str, str]] = None
markdown: Optional[dict[str, str]] = None
pdf: Optional[dict[str, bytes]] = None
plaintext: Optional[dict[str, str]] = None
image: Optional[dict[str, bytes]] = None
docx: Optional[dict[str, bytes]] = None
@indexer.post("/update")
@requires(["authenticated"])
async def update(
request: Request,
files: list[UploadFile],
force: bool = False,
t: Optional[Union[state.SearchType, str]] = state.SearchType.All,
client: Optional[str] = None,
user_agent: Optional[str] = Header(None),
referer: Optional[str] = Header(None),
host: Optional[str] = Header(None),
indexed_data_limiter: ApiIndexedDataLimiter = Depends(
ApiIndexedDataLimiter(
incoming_entries_size_limit=10,
subscribed_incoming_entries_size_limit=75,
total_entries_size_limit=10,
subscribed_total_entries_size_limit=100,
)
),
):
user = request.user.object
index_files: Dict[str, Dict[str, str]] = {
"org": {},
"markdown": {},
"pdf": {},
"plaintext": {},
"image": {},
"docx": {},
}
try:
logger.info(f"📬 Updating content index via API call by {client} client")
for file in files:
file_content = file.file.read()
file_type, encoding = get_file_type(file.content_type, file_content)
if file_type in index_files:
index_files[file_type][file.filename] = file_content.decode(encoding) if encoding else file_content
else:
logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file.filename}")
indexer_input = IndexerInput(
org=index_files["org"],
markdown=index_files["markdown"],
pdf=index_files["pdf"],
plaintext=index_files["plaintext"],
image=index_files["image"],
docx=index_files["docx"],
)
if state.config == None:
logger.info("📬 Initializing content index on first run.")
default_full_config = FullConfig(
content_type=None,
search_type=SearchConfig.model_validate(constants.default_config["search-type"]),
processor=None,
)
state.config = default_full_config
default_content_config = ContentConfig(
org=None,
markdown=None,
pdf=None,
docx=None,
image=None,
github=None,
notion=None,
plaintext=None,
)
state.config.content_type = default_content_config
save_config_to_file_updated_state()
configure_search(state.search_models, state.config.search_type)
# Extract required fields from config
loop = asyncio.get_event_loop()
success = await loop.run_in_executor(
None,
configure_content,
indexer_input.model_dump(),
force,
t,
False,
user,
)
if not success:
raise RuntimeError("Failed to update content index")
logger.info(f"Finished processing batch indexing request")
except Exception as e:
logger.error(f"Failed to process batch indexing request: {e}", exc_info=True)
logger.error(
f'🚨 Failed to {"force " if force else ""}update {t} content index triggered via API call by {client} client: {e}',
exc_info=True,
)
return Response(content="Failed", status_code=500)
indexing_metadata = {
"num_org": len(index_files["org"]),
"num_markdown": len(index_files["markdown"]),
"num_pdf": len(index_files["pdf"]),
"num_plaintext": len(index_files["plaintext"]),
"num_image": len(index_files["image"]),
"num_docx": len(index_files["docx"]),
}
update_telemetry_state(
request=request,
telemetry_type="api",
api="index/update",
client=client,
user_agent=user_agent,
referer=referer,
host=host,
metadata=indexing_metadata,
)
logger.info(f"📪 Content index updated via API call by {client} client")
indexed_filenames = ",".join(file for ctype in index_files for file in index_files[ctype]) or ""
return Response(content=indexed_filenames, status_code=200)
def configure_search(search_models: SearchModels, search_config: Optional[SearchConfig]) -> Optional[SearchModels]:
# Run Validation Checks
if search_models is None:
search_models = SearchModels()
return search_models
def configure_content(
files: Optional[dict[str, dict[str, str]]],
regenerate: bool = False,
t: Optional[state.SearchType] = state.SearchType.All,
full_corpus: bool = True,
user: KhojUser = None,
) -> bool:
success = True
if t == None:
t = state.SearchType.All
if t is not None and t in [type.value for type in state.SearchType]:
t = state.SearchType(t)
if t is not None and not t.value in [type.value for type in state.SearchType]:
logger.warning(f"🚨 Invalid search type: {t}")
return False
search_type = t.value if t else None
no_documents = all([not files.get(file_type) for file_type in files])
if files is None:
logger.warning(f"🚨 No files to process for {search_type} search.")
return True
try:
# Initialize Org Notes Search
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Org.value) and files["org"]:
logger.info("🦄 Setting up search for orgmode notes")
# Extract Entries, Generate Notes Embeddings
text_search.setup(
OrgToEntries,
files.get("org"),
regenerate=regenerate,
full_corpus=full_corpus,
user=user,
)
except Exception as e:
logger.error(f"🚨 Failed to setup org: {e}", exc_info=True)
success = False
try:
# Initialize Markdown Search
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Markdown.value) and files[
"markdown"
]:
logger.info("💎 Setting up search for markdown notes")
# Extract Entries, Generate Markdown Embeddings
text_search.setup(
MarkdownToEntries,
files.get("markdown"),
regenerate=regenerate,
full_corpus=full_corpus,
user=user,
)
except Exception as e:
logger.error(f"🚨 Failed to setup markdown: {e}", exc_info=True)
success = False
try:
# Initialize PDF Search
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Pdf.value) and files["pdf"]:
logger.info("🖨️ Setting up search for pdf")
# Extract Entries, Generate PDF Embeddings
text_search.setup(
PdfToEntries,
files.get("pdf"),
regenerate=regenerate,
full_corpus=full_corpus,
user=user,
)
except Exception as e:
logger.error(f"🚨 Failed to setup PDF: {e}", exc_info=True)
success = False
try:
# Initialize Plaintext Search
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Plaintext.value) and files[
"plaintext"
]:
logger.info("📄 Setting up search for plaintext")
# Extract Entries, Generate Plaintext Embeddings
text_search.setup(
PlaintextToEntries,
files.get("plaintext"),
regenerate=regenerate,
full_corpus=full_corpus,
user=user,
)
except Exception as e:
logger.error(f"🚨 Failed to setup plaintext: {e}", exc_info=True)
success = False
try:
if no_documents:
github_config = GithubConfig.objects.filter(user=user).prefetch_related("githubrepoconfig").first()
if (
search_type == state.SearchType.All.value or search_type == state.SearchType.Github.value
) and github_config is not None:
logger.info("🐙 Setting up search for github")
# Extract Entries, Generate Github Embeddings
text_search.setup(
GithubToEntries,
None,
regenerate=regenerate,
full_corpus=full_corpus,
user=user,
config=github_config,
)
except Exception as e:
logger.error(f"🚨 Failed to setup GitHub: {e}", exc_info=True)
success = False
try:
if no_documents:
# Initialize Notion Search
notion_config = NotionConfig.objects.filter(user=user).first()
if (
search_type == state.SearchType.All.value or search_type == state.SearchType.Notion.value
) and notion_config:
logger.info("🔌 Setting up search for notion")
text_search.setup(
NotionToEntries,
None,
regenerate=regenerate,
full_corpus=full_corpus,
user=user,
config=notion_config,
)
except Exception as e:
logger.error(f"🚨 Failed to setup Notion: {e}", exc_info=True)
success = False
try:
# Initialize Image Search
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Image.value) and files[
"image"
]:
logger.info("🖼️ Setting up search for images")
# Extract Entries, Generate Image Embeddings
text_search.setup(
ImageToEntries,
files.get("image"),
regenerate=regenerate,
full_corpus=full_corpus,
user=user,
)
except Exception as e:
logger.error(f"🚨 Failed to setup images: {e}", exc_info=True)
success = False
try:
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Docx.value) and files["docx"]:
logger.info("📄 Setting up search for docx")
text_search.setup(
DocxToEntries,
files.get("docx"),
regenerate=regenerate,
full_corpus=full_corpus,
user=user,
)
except Exception as e:
logger.error(f"🚨 Failed to setup docx: {e}", exc_info=True)
success = False
# Invalidate Query Cache
if user:
state.query_cache[user.uuid] = LRU()
return success