Merge with features/internet-enabled-search

This commit is contained in:
sabaimran 2023-11-22 07:25:24 -08:00
commit 256e8de40a
88 changed files with 365 additions and 316 deletions

View file

@ -48,7 +48,7 @@ jobs:
- name: 🌡️ Validate Python Package
run: |
# Validate PyPi Package
pipx run check-wheel-contents dist/*.whl
pipx run check-wheel-contents dist/*.whl --ignore W004
pipx run twine check dist/*
- name: ⏫ Upload Python Package Artifacts

View file

@ -3,7 +3,7 @@ name: test
on:
pull_request:
paths:
- src/**
- src/khoj/**
- tests/**
- config/**
- pyproject.toml

View file

@ -17,7 +17,7 @@ RUN sed -i 's/dynamic = \["version"\]/version = "0.0.0"/' pyproject.toml && \
COPY . .
# Set the PYTHONPATH environment variable in order for it to find the Django app.
ENV PYTHONPATH=/app/src:$PYTHONPATH
ENV PYTHONPATH=/app/src/khoj:$PYTHONPATH
# Run the Application
# There are more arguments required for the application to run,

View file

@ -6,5 +6,5 @@
"description": "An AI copilot for your Second Brain",
"author": "Khoj Inc.",
"authorUrl": "https://github.com/khoj-ai",
"isDesktopOnly": true
"isDesktopOnly": false
}

View file

@ -46,7 +46,7 @@ dependencies = [
"tiktoken >= 0.3.2",
"tenacity >= 8.2.2",
"pillow == 9.3.0",
"pydantic >= 1.10.10",
"pydantic >= 2.0.0",
"pyyaml == 6.0",
"rich >= 13.3.1",
"schedule == 1.1.0",
@ -128,6 +128,9 @@ warn_unused_ignores = false
[tool.black]
line-length = 120
[tool.isort]
profile = "black"
[tool.pytest.ini_options]
addopts = "--strict-markers"
markers = [

View file

@ -1,5 +1,5 @@
[pytest]
DJANGO_SETTINGS_MODULE = app.settings
DJANGO_SETTINGS_MODULE = khoj.app.settings
pythonpath = . src
testpaths = tests
markers =

View file

@ -120,7 +120,8 @@
// Create a new div for the chat message text and append it to the chat message
let chatMessageText = document.createElement('div');
chatMessageText.className = `chat-message-text ${by}`;
chatMessageText.innerHTML = formattedMessage;
let textNode = document.createTextNode(formattedMessage);
chatMessageText.appendChild(textNode);
chatMessage.appendChild(chatMessageText);
// Append annotations div to the chat message

View file

@ -112,14 +112,14 @@
} else if (
item.additional.file.endsWith(".md") ||
item.additional.file.endsWith(".markdown") ||
(item.additional.file.includes("issues") && item.additional.file.includes("github.com")) ||
(item.additional.file.includes("commit") && item.additional.file.includes("github.com"))
(item.additional.file.includes("issues") && item.additional.source === "github") ||
(item.additional.file.includes("commit") && item.additional.source === "github")
)
{
html += render_markdown(query, [item]);
} else if (item.additional.file.endsWith(".pdf")) {
html += render_pdf(query, [item]);
} else if (item.additional.file.includes("notion.so")) {
} else if (item.additional.source == "notion") {
html += `<div class="results-notion">` + `<b><a href="${item.additional.file}">${item.additional.heading}</a></b>` + `<p>${item.entry}</p>` + `</div>`;
} else if (item.additional.file.endsWith(".html")) {
html += render_html(query, [item]);

View file

@ -6,5 +6,5 @@
"description": "An AI copilot for your Second Brain",
"author": "Khoj Inc.",
"authorUrl": "https://github.com/khoj-ai",
"isDesktopOnly": true
"isDesktopOnly": false
}

View file

@ -73,21 +73,19 @@ export default class Khoj extends Plugin {
// Check if khoj backend is configured, note if cannot connect to backend
let headers = { "Authorization": `Bearer ${this.settings.khojApiKey}` };
if (this.settings.khojUrl === "https://app.khoj.dev") {
if (this.settings.khojApiKey === "") {
new Notice(`Khoj API key is not configured. Please visit https://app.khoj.dev/config#clients to get an API key.`);
return;
}
await request({ url: this.settings.khojUrl ,method: "GET", headers: headers })
.then(response => {
this.settings.connectedToBackend = true;
})
.catch(error => {
this.settings.connectedToBackend = false;
new Notice(`Ensure Khoj backend is running and Khoj URL is pointing to it in the plugin settings.\n\n${error}`);
});
if (this.settings.khojApiKey === "" && this.settings.khojUrl === "https://app.khoj.dev") {
new Notice(`Khoj API key is not configured. Please visit https://app.khoj.dev/config#clients to get an API key.`);
return;
}
await request({ url: this.settings.khojUrl ,method: "GET", headers: headers })
.then(response => {
this.settings.connectedToBackend = true;
})
.catch(error => {
this.settings.connectedToBackend = false;
new Notice(`Ensure Khoj backend is running and Khoj URL is pointing to it in the plugin settings.\n\n${error}`);
});
}
async saveSettings() {

View file

@ -87,27 +87,18 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
}
async getSuggestions(query: string): Promise<SearchResult[]> {
// Query Khoj backend for search results
// Setup Query Khoj backend for search results
let encodedQuery = encodeURIComponent(query);
let searchUrl = `${this.setting.khojUrl}/api/search?q=${encodedQuery}&n=${this.setting.resultsCount}&r=${this.rerank}&client=obsidian`;
let headers = { 'Authorization': `Bearer ${this.setting.khojApiKey}` }
// Get search results for markdown and pdf files
let mdResponse = await request({ url: `${searchUrl}&t=markdown`, headers: headers });
let pdfResponse = await request({ url: `${searchUrl}&t=pdf`, headers: headers });
// Get search results from Khoj backend
let response = await request({ url: `${searchUrl}`, headers: headers });
// Parse search results
let mdData = JSON.parse(mdResponse)
let results = JSON.parse(response)
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
.map((result: any) => { return { entry: result.entry, score: result.score, file: result.additional.file }; });
let pdfData = JSON.parse(pdfResponse)
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
.map((result: any) => { return { entry: `## ${result.additional.compiled}`, score: result.score, file: result.additional.file } as SearchResult; })
// Combine markdown and PDF results and sort them by score
let results = mdData.concat(pdfData)
.sort((a: any, b: any) => b.score - a.score)
.map((result: any) => { return { entry: result.entry, file: result.file } as SearchResult; })
.map((result: any) => { return { entry: result.entry, file: result.additional.file } as SearchResult; });
this.query = query;
return results;

View file

@ -14,7 +14,7 @@ from pathlib import Path
import os
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent.parent
BASE_DIR = Path(__file__).resolve().parent.parent
# Quick-start development settings - unsuitable for production
@ -24,15 +24,15 @@ BASE_DIR = Path(__file__).resolve().parent.parent.parent
SECRET_KEY = os.getenv("KHOJ_DJANGO_SECRET_KEY")
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = os.getenv("KHOJ_DEBUG", "False") == "True"
DEBUG = os.getenv("KHOJ_DEBUG") == "True"
ALLOWED_HOSTS = [".khoj.dev", "localhost", "127.0.0.1", "[::1]", "beta.khoj.dev"]
# All Subdomains of KHOJ_DOMAIN are trusted
KHOJ_DOMAIN = os.getenv("KHOJ_DOMAIN", "khoj.dev")
ALLOWED_HOSTS = [f".{KHOJ_DOMAIN}", "localhost", "127.0.0.1", "[::1]"]
CSRF_TRUSTED_ORIGINS = [
"https://app.khoj.dev",
"https://beta.khoj.dev",
"https://khoj.dev",
"https://*.khoj.dev",
f"https://*.{KHOJ_DOMAIN}",
f"https://{KHOJ_DOMAIN}",
]
COOKIE_SAMESITE = "None"
@ -40,8 +40,8 @@ if DEBUG:
SESSION_COOKIE_DOMAIN = "localhost"
CSRF_COOKIE_DOMAIN = "localhost"
else:
SESSION_COOKIE_DOMAIN = "khoj.dev"
CSRF_COOKIE_DOMAIN = "khoj.dev"
SESSION_COOKIE_DOMAIN = KHOJ_DOMAIN
CSRF_COOKIE_DOMAIN = KHOJ_DOMAIN
SESSION_COOKIE_SECURE = True
CSRF_COOKIE_SECURE = True
@ -53,7 +53,7 @@ SESSION_COOKIE_SAMESITE = "None"
INSTALLED_APPS = [
"django.contrib.auth",
"django.contrib.contenttypes",
"database.apps.DatabaseConfig",
"khoj.database.apps.DatabaseConfig",
"django.contrib.admin",
"django.contrib.sessions",
"django.contrib.messages",
@ -143,7 +143,7 @@ USE_TZ = True
# https://docs.djangoproject.com/en/4.2/howto/static-files/
STATIC_ROOT = BASE_DIR / "static"
STATICFILES_DIRS = [BASE_DIR / "src/khoj/interface/web"]
STATICFILES_DIRS = [BASE_DIR / "interface/web"]
STATIC_URL = "/static/"
# Default primary key field type

View file

@ -20,8 +20,8 @@ from starlette.authentication import (
)
# Internal Packages
from database.models import KhojUser, Subscription
from database.adapters import get_all_users, get_or_create_search_model
from khoj.database.models import KhojUser, Subscription
from khoj.database.adapters import get_all_users, get_or_create_search_model
from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
from khoj.routers.indexer import configure_content, load_content, configure_search
from khoj.utils import constants, state
@ -45,7 +45,7 @@ class UserAuthenticationBackend(AuthenticationBackend):
def __init__(
self,
):
from database.models import KhojUser, KhojApiUser
from khoj.database.models import KhojUser, KhojApiUser
self.khojuser_manager = KhojUser.objects
self.khojapiuser_manager = KhojApiUser.objects

View file

@ -1,32 +1,29 @@
import math
from typing import Optional, Type, List
from datetime import date, datetime
import secrets
from typing import Type, List
from datetime import date, timezone
from datetime import date, datetime, timezone
from typing import List, Optional, Type
from django.db import models
from asgiref.sync import sync_to_async
from django.contrib.sessions.backends.db import SessionStore
from pgvector.django import CosineDistance
from django.db.models.manager import BaseManager
from django.db import models
from django.db.models import Q
from django.db.models.manager import BaseManager
from fastapi import HTTPException
from pgvector.django import CosineDistance
from torch import Tensor
# Import sync_to_async from Django Channels
from asgiref.sync import sync_to_async
from fastapi import HTTPException
from database.models import (
KhojUser,
from khoj.database.models import (
ChatModelOptions,
Conversation,
Entry,
GithubConfig,
GithubRepoConfig,
GoogleUser,
KhojApiUser,
KhojUser,
NotionConfig,
GithubConfig,
Entry,
GithubRepoConfig,
Conversation,
ChatModelOptions,
OfflineChatProcessorConversationConfig,
OpenAIProcessorConversationConfig,
SearchModelConfig,
Subscription,
UserConversationConfig,
@ -34,12 +31,12 @@ from database.models import (
OfflineChatProcessorConversationConfig,
ReflectiveQuestion,
)
from khoj.utils.helpers import generate_random_name
from khoj.search_filter.date_filter import DateFilter
from khoj.search_filter.file_filter import FileFilter
from khoj.search_filter.word_filter import WordFilter
from khoj.utils import state
from khoj.utils.config import GPT4AllProcessorModel
from khoj.search_filter.word_filter import WordFilter
from khoj.search_filter.file_filter import FileFilter
from khoj.search_filter.date_filter import DateFilter
from khoj.utils.helpers import generate_random_name
async def set_notion_config(token: str, user: KhojUser):

View file

@ -3,7 +3,7 @@ from django.contrib.auth.admin import UserAdmin
# Register your models here.
from database.models import (
from khoj.database.models import (
KhojUser,
ChatModelOptions,
OpenAIProcessorConversationConfig,

View file

@ -3,4 +3,4 @@ from django.apps import AppConfig
class DatabaseConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "database"
name = "khoj.database"

View file

@ -2,6 +2,8 @@
from django.db import migrations
from typing import List, Any
class Migration(migrations.Migration):
dependencies = [
@ -9,4 +11,4 @@ class Migration(migrations.Migration):
("database", "0010_rename_embeddings_entry_and_more"),
]
operations = []
operations: List[Any] = []

View file

@ -112,14 +112,14 @@
} else if (
item.additional.file.endsWith(".md") ||
item.additional.file.endsWith(".markdown") ||
(item.additional.file.includes("issues") && item.additional.file.includes("github.com")) ||
(item.additional.file.includes("commit") && item.additional.file.includes("github.com"))
(item.additional.file.includes("issues") && item.additional.source === "github") ||
(item.additional.file.includes("commit") && item.additional.source === "github")
)
{
html += render_markdown(query, [item]);
} else if (item.additional.file.endsWith(".pdf")) {
html += render_pdf(query, [item]);
} else if (item.additional.file.includes("notion.so")) {
} else if (item.additional.source === "notion") {
html += `<div class="results-notion">` + `<b><a href="${item.additional.file}">${item.additional.heading}</a></b>` + `<p>${item.entry}</p>` + `</div>`;
} else if (item.additional.file.endsWith(".html")) {
html += render_html(query, [item]);

View file

@ -1,4 +1,10 @@
""" Main module for Khoj Assistant
isort:skip_file
"""
# Standard Packages
from contextlib import redirect_stdout
import io
import os
import sys
import locale
@ -25,14 +31,18 @@ from django.core.asgi import get_asgi_application
from django.core.management import call_command
# Initialize Django
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "app.settings")
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "khoj.app.settings")
django.setup()
# Initialize Django Database
call_command("migrate", "--noinput")
db_migrate_output = io.StringIO()
with redirect_stdout(db_migrate_output):
call_command("migrate", "--noinput")
# Initialize Django Static Files
call_command("collectstatic", "--noinput")
collectstatic_output = io.StringIO()
with redirect_stdout(collectstatic_output):
call_command("collectstatic", "--noinput")
# Initialize the Application Server
app = FastAPI()
@ -41,9 +51,16 @@ app = FastAPI()
django_app = get_asgi_application()
# Add CORS middleware
KHOJ_DOMAIN = os.getenv("KHOJ_DOMAIN", "app.khoj.dev")
app.add_middleware(
CORSMiddleware,
allow_origins=["app://obsidian.md", "http://localhost:*", "https://app.khoj.dev/*", "app://khoj.dev"],
allow_origins=[
"app://obsidian.md",
"http://localhost:*",
"http://127.0.0.1:*",
f"https://{KHOJ_DOMAIN}",
"app://khoj.dev",
],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
@ -75,14 +92,16 @@ def run(should_start_server=True):
args = cli(state.cli_args)
set_state(args)
logger.info(f"🚒 Initializing Khoj v{state.khoj_version}")
# Set Logging Level
if args.verbose == 0:
logger.setLevel(logging.INFO)
elif args.verbose >= 1:
logger.setLevel(logging.DEBUG)
logger.info(f"🚒 Initializing Khoj v{state.khoj_version}")
logger.info(f"📦 Initializing DB:\n{db_migrate_output.getvalue().strip()}")
logger.debug(f"🌍 Initializing Web Client:\n{collectstatic_output.getvalue().strip()}")
initialization()
# Create app directory, if it doesn't exist
@ -103,10 +122,10 @@ def run(should_start_server=True):
# Mount Django and Static Files
app.mount("/server", django_app, name="server")
static_dir = "static"
static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
if not os.path.exists(static_dir):
os.mkdir(static_dir)
app.mount(f"/{static_dir}", StaticFiles(directory=static_dir), name=static_dir)
app.mount(f"/static", StaticFiles(directory=static_dir), name=static_dir)
# Configure Middleware
configure_middleware(app)

View file

@ -6,7 +6,7 @@ import sys
def main():
"""Run administrative tasks."""
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "app.settings")
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "khoj.app.settings")
try:
from django.core.management import execute_from_command_line
except ImportError as exc:

View file

@ -60,7 +60,7 @@ import logging
from packaging import version
from khoj.utils.yaml import load_config_from_file, save_config_to_file
from database.models import (
from khoj.database.models import (
OpenAIProcessorConversationConfig,
OfflineChatProcessorConversationConfig,
ChatModelOptions,

View file

@ -2,19 +2,20 @@
import logging
import time
from datetime import datetime
from typing import Dict, List, Union, Tuple
from typing import Dict, List, Tuple, Union
# External Packages
import requests
from khoj.database.models import Entry as DbEntry
from khoj.database.models import GithubConfig, KhojUser
from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
from khoj.processor.content.text_to_entries import TextToEntries
# Internal Packages
from khoj.utils.helpers import timer
from khoj.utils.rawconfig import Entry, GithubContentConfig, GithubRepoConfig
from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
from khoj.processor.text_to_entries import TextToEntries
from database.models import Entry as DbEntry, GithubConfig, KhojUser
logger = logging.getLogger(__name__)

View file

@ -1,17 +1,19 @@
# Standard Packages
import logging
import re
import urllib3
from pathlib import Path
from typing import Tuple, List
from typing import List, Tuple
import urllib3
from khoj.database.models import Entry as DbEntry
from khoj.database.models import KhojUser
# Internal Packages
from khoj.processor.text_to_entries import TextToEntries
from khoj.utils.helpers import timer
from khoj.processor.content.text_to_entries import TextToEntries
from khoj.utils.constants import empty_escape_sequences
from khoj.utils.helpers import timer
from khoj.utils.rawconfig import Entry
from database.models import Entry as DbEntry, KhojUser
logger = logging.getLogger(__name__)

View file

@ -1,19 +1,18 @@
# Standard Packages
import logging
from enum import Enum
from typing import Tuple
# External Packages
import requests
from khoj.database.models import Entry as DbEntry
from khoj.database.models import KhojUser, NotionConfig
from khoj.processor.content.text_to_entries import TextToEntries
# Internal Packages
from khoj.utils.helpers import timer
from khoj.utils.rawconfig import Entry, NotionContentConfig
from khoj.processor.text_to_entries import TextToEntries
from khoj.utils.rawconfig import Entry
from database.models import Entry as DbEntry, KhojUser, NotionConfig
from enum import Enum
logger = logging.getLogger(__name__)

View file

@ -3,14 +3,15 @@ import logging
from pathlib import Path
from typing import Iterable, List, Tuple
from khoj.database.models import Entry as DbEntry
from khoj.database.models import KhojUser
# Internal Packages
from khoj.processor.data_sources.org_mode import orgnode
from khoj.processor.text_to_entries import TextToEntries
from khoj.processor.content.org_mode import orgnode
from khoj.processor.content.text_to_entries import TextToEntries
from khoj.utils import state
from khoj.utils.helpers import timer
from khoj.utils.rawconfig import Entry
from khoj.utils import state
from database.models import Entry as DbEntry, KhojUser
logger = logging.getLogger(__name__)

View file

@ -1,18 +1,19 @@
# Standard Packages
import os
import logging
from typing import List, Tuple
import base64
import logging
import os
from typing import List, Tuple
# External Packages
from langchain.document_loaders import PyMuPDFLoader
from khoj.database.models import Entry as DbEntry
from khoj.database.models import KhojUser
# Internal Packages
from khoj.processor.text_to_entries import TextToEntries
from khoj.processor.content.text_to_entries import TextToEntries
from khoj.utils.helpers import timer
from khoj.utils.rawconfig import Entry
from database.models import Entry as DbEntry, KhojUser
logger = logging.getLogger(__name__)

View file

@ -2,15 +2,16 @@
import logging
from pathlib import Path
from typing import List, Tuple
from bs4 import BeautifulSoup
from khoj.database.models import Entry as DbEntry
from khoj.database.models import KhojUser
# Internal Packages
from khoj.processor.text_to_entries import TextToEntries
from khoj.processor.content.text_to_entries import TextToEntries
from khoj.utils.helpers import timer
from khoj.utils.rawconfig import Entry
from database.models import Entry as DbEntry, KhojUser
logger = logging.getLogger(__name__)

View file

@ -13,8 +13,8 @@ from khoj.utils.helpers import is_none_or_empty, timer, batcher
# Internal Packages
from khoj.utils.rawconfig import Entry
from khoj.search_filter.date_filter import DateFilter
from database.models import KhojUser, Entry as DbEntry, EntryDates
from database.adapters import EntryAdapters
from khoj.database.models import KhojUser, Entry as DbEntry, EntryDates
from khoj.database.adapters import EntryAdapters
logger = logging.getLogger(__name__)

View file

@ -10,7 +10,7 @@ You are Khoj, a smart, inquisitive and helpful personal assistant.
Use your general knowledge and the past conversation with the user as context to inform your responses.
You were created by Khoj Inc. with the following capabilities:
- You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you. They can share files with you using the Khoj desktop application.
- You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you. They can share files with you using any Khoj client, including the native Desktop app, the Obsidian or Emacs plugins, or the web app.
- You cannot set reminders.
- Say "I don't know" or "I don't understand" if you don't know what to say or if you don't know the answer to a question.
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided notes or past conversations.
@ -128,8 +128,9 @@ The user has a question which you can use the internet to respond to. Can you br
Today's date in UTC: {current_date}
Here are some examples of questions and subqueries:
Q: What is the weather like in New York?
A: ["weather in new york"]
Q: Posts about vector databases on Hacker News
A: ["site:"news.ycombinator.com vector database"]
Q: What is the weather like in New York and San Francisco?
A: ["weather in new york", "weather in san francisco"]

View file

@ -1,64 +1,63 @@
# Standard Packages
import concurrent.futures
import json
import logging
import math
import time
import logging
import json
from typing import List, Optional, Union, Any, Dict
from typing import Any, Dict, List, Optional, Union
from asgiref.sync import sync_to_async
# External Packages
from fastapi import APIRouter, Depends, HTTPException, Header, Request
from fastapi import APIRouter, Depends, Header, HTTPException, Request
from fastapi.requests import Request
from fastapi.responses import Response, StreamingResponse
from starlette.authentication import requires
from asgiref.sync import sync_to_async
# Internal Packages
from khoj.configure import configure_server
from khoj.search_type import image_search, text_search
from khoj.search_filter.date_filter import DateFilter
from khoj.search_filter.file_filter import FileFilter
from khoj.search_filter.word_filter import WordFilter
from khoj.utils.config import TextSearchModel, GPT4AllProcessorModel
from khoj.utils.helpers import ConversationCommand, is_none_or_empty, timer, command_descriptions
from khoj.utils.rawconfig import (
FullConfig,
SearchConfig,
SearchResponse,
GithubContentConfig,
NotionContentConfig,
)
from khoj.utils.state import SearchType
from khoj.utils import state, constants
from khoj.utils.helpers import AsyncIteratorWrapper, get_device
from fastapi.responses import StreamingResponse, Response
from khoj.routers.helpers import (
CommonQueryParams,
get_conversation_command,
validate_conversation_config,
agenerate_chat_response,
update_telemetry_state,
is_ready_to_chat,
ApiUserRateLimiter,
)
from khoj.processor.conversation.prompts import help_message, no_entries_found
from khoj.processor.conversation.openai.gpt import extract_questions
from khoj.processor.conversation.gpt4all.chat_model import extract_questions_offline
from khoj.processor.tools.online_search import search_with_google
from fastapi.requests import Request
from database import adapters
from database.adapters import EntryAdapters, ConversationAdapters
from database.models import (
from khoj.database import adapters
from khoj.database.adapters import ConversationAdapters, EntryAdapters
from khoj.database.models import ChatModelOptions
from khoj.database.models import Entry as DbEntry
from khoj.database.models import (
GithubConfig,
KhojUser,
LocalMarkdownConfig,
LocalOrgConfig,
LocalPdfConfig,
LocalPlaintextConfig,
KhojUser,
Entry as DbEntry,
GithubConfig,
NotionConfig,
ChatModelOptions,
)
from khoj.processor.conversation.gpt4all.chat_model import extract_questions_offline
from khoj.processor.conversation.openai.gpt import extract_questions
from khoj.processor.conversation.prompts import help_message, no_entries_found
from khoj.processor.tools.online_search import search_with_google
from khoj.routers.helpers import (
ApiUserRateLimiter,
CommonQueryParams,
agenerate_chat_response,
get_conversation_command,
is_ready_to_chat,
update_telemetry_state,
validate_conversation_config,
)
from khoj.search_filter.date_filter import DateFilter
from khoj.search_filter.file_filter import FileFilter
from khoj.search_filter.word_filter import WordFilter
from khoj.search_type import image_search, text_search
from khoj.utils import constants, state
from khoj.utils.config import GPT4AllProcessorModel, TextSearchModel
from khoj.utils.helpers import (
AsyncIteratorWrapper,
ConversationCommand,
command_descriptions,
get_device,
is_none_or_empty,
timer,
)
from khoj.utils.rawconfig import FullConfig, GithubContentConfig, NotionContentConfig, SearchConfig, SearchResponse
from khoj.utils.state import SearchType
# Initialize Router
api = APIRouter()

View file

@ -15,8 +15,8 @@ from google.oauth2 import id_token
from google.auth.transport import requests as google_requests
# Internal Packages
from database.adapters import get_khoj_tokens, get_or_create_user, create_khoj_token, delete_khoj_token
from database.models import KhojApiUser
from khoj.database.adapters import get_khoj_tokens, get_or_create_user, create_khoj_token, delete_khoj_token
from khoj.database.models import KhojApiUser
from khoj.routers.helpers import update_telemetry_state
from khoj.utils import state

View file

@ -1,31 +1,28 @@
# Standard Packages
import asyncio
import json
import logging
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from functools import partial
import logging
from time import time
import json
from typing import Annotated, Iterator, List, Optional, Union, Tuple, Dict, Any
from datetime import datetime
from khoj.processor.conversation import prompts
from typing import Annotated, Any, Dict, Iterator, List, Optional, Tuple, Union
# External Packages
from fastapi import HTTPException, Header, Request, Depends
from fastapi import Depends, Header, HTTPException, Request
from khoj.database.adapters import ConversationAdapters
from khoj.database.models import KhojUser, Subscription
from khoj.processor.conversation import prompts
from khoj.processor.conversation.gpt4all.chat_model import converse_offline, send_message_to_model_offline
from khoj.processor.conversation.openai.gpt import converse, send_message_to_model
from khoj.processor.conversation.utils import ThreadedGenerator, message_to_log
# Internal Packages
from khoj.utils import state
from khoj.utils.config import GPT4AllProcessorModel
from khoj.utils.helpers import ConversationCommand, log_telemetry
from khoj.processor.conversation.openai.gpt import converse, send_message_to_model
from khoj.processor.conversation.gpt4all.chat_model import converse_offline, send_message_to_model_offline
from khoj.processor.conversation.utils import message_to_log, ThreadedGenerator
from database.models import KhojUser, Subscription, ChatModelOptions
from database.adapters import ConversationAdapters
logger = logging.getLogger(__name__)

View file

@ -1,40 +1,25 @@
# Standard Packages
import logging
from typing import Optional, Union, Dict
import asyncio
import logging
from typing import Dict, Optional, Union
# External Packages
from fastapi import APIRouter, Header, Request, Response, UploadFile
from pydantic import BaseModel
from starlette.authentication import requires
# Internal Packages
from khoj.utils import state, constants
from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
from khoj.processor.data_sources.pdf.pdf_to_entries import PdfToEntries
from khoj.processor.data_sources.github.github_to_entries import GithubToEntries
from khoj.processor.data_sources.notion.notion_to_entries import NotionToEntries
from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries
from khoj.search_type import text_search, image_search
from khoj.database.models import GithubConfig, KhojUser, NotionConfig
from khoj.processor.content.github.github_to_entries import GithubToEntries
from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
from khoj.processor.content.notion.notion_to_entries import NotionToEntries
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
from khoj.routers.helpers import update_telemetry_state
from khoj.utils.yaml import save_config_to_file_updated_state
from khoj.utils.config import SearchModels
from khoj.search_type import image_search, text_search
from khoj.utils import constants, state
from khoj.utils.config import ContentIndex, SearchModels
from khoj.utils.helpers import LRU, get_file_type
from khoj.utils.rawconfig import (
ContentConfig,
FullConfig,
SearchConfig,
)
from khoj.utils.config import (
ContentIndex,
SearchModels,
)
from database.models import (
KhojUser,
GithubConfig,
NotionConfig,
)
from khoj.utils.rawconfig import ContentConfig, FullConfig, SearchConfig
from khoj.utils.yaml import save_config_to_file_updated_state
logger = logging.getLogger(__name__)
@ -189,6 +174,9 @@ def configure_content(
content_index = ContentIndex()
success = True
if t == None:
t = state.SearchType.All
if t is not None and t in [type.value for type in state.SearchType]:
t = state.SearchType(t)
@ -315,7 +303,7 @@ def configure_content(
# Initialize Notion Search
notion_config = NotionConfig.objects.filter(user=user).first()
if (
search_type == state.SearchType.All.value or search_type in state.SearchType.Notion.value
search_type == state.SearchType.All.value or search_type == state.SearchType.Notion.value
) and notion_config:
logger.info("🔌 Setting up search for notion")
text_search.setup(
@ -328,7 +316,7 @@ def configure_content(
)
except Exception as e:
logger.error(f"🚨 Failed to setup GitHub: {e}", exc_info=True)
logger.error(f"🚨 Failed to setup Notion: {e}", exc_info=True)
success = False
# Invalidate Query Cache

View file

@ -10,7 +10,7 @@ from starlette.authentication import requires
import stripe
# Internal Packages
from database import adapters
from khoj.database import adapters
# Stripe integration for Khoj Cloud Subscription

View file

@ -8,8 +8,8 @@ from fastapi import Request
from fastapi.responses import HTMLResponse, FileResponse, RedirectResponse
from fastapi.templating import Jinja2Templates
from starlette.authentication import requires
from database import adapters
from database.models import KhojUser
from khoj.database import adapters
from khoj.database.models import KhojUser
from khoj.utils.rawconfig import (
GithubContentConfig,
GithubRepoConfig,
@ -18,7 +18,7 @@ from khoj.utils.rawconfig import (
# Internal Packages
from khoj.utils import constants, state
from database.adapters import (
from khoj.database.adapters import (
EntryAdapters,
get_user_github_config,
get_user_notion_config,

View file

@ -12,7 +12,6 @@ from sentence_transformers import SentenceTransformer, util
from PIL import Image
from tqdm import trange
import torch
from khoj.utils import state
# Internal Packages
from khoj.utils.helpers import get_absolute_path, get_from_dict, resolve_absolute_path, load_model, timer
@ -26,9 +25,6 @@ logger = logging.getLogger(__name__)
def initialize_model(search_config: ImageSearchConfig):
# Initialize Model
torch.set_num_threads(4)
# Convert model directory to absolute path
search_config.model_directory = resolve_absolute_path(search_config.model_directory)

View file

@ -18,9 +18,9 @@ from khoj.utils.models import BaseEncoder
from khoj.utils.state import SearchType
from khoj.utils.rawconfig import SearchResponse, Entry
from khoj.utils.jsonl import load_jsonl
from khoj.processor.text_to_entries import TextToEntries
from database.adapters import EntryAdapters
from database.models import KhojUser, Entry as DbEntry
from khoj.processor.content.text_to_entries import TextToEntries
from khoj.database.adapters import EntryAdapters
from khoj.database.models import KhojUser, Entry as DbEntry
logger = logging.getLogger(__name__)
@ -141,12 +141,13 @@ def collate_results(hits, dedupe=True):
else:
hit_ids.add(hit.corpus_id)
yield SearchResponse.parse_obj(
yield SearchResponse.model_validate(
{
"entry": hit.raw,
"score": hit.distance,
"corpus_id": str(hit.corpus_id),
"additional": {
"source": hit.file_source,
"file": hit.file_path,
"compiled": hit.compiled,
"heading": hit.heading,
@ -169,6 +170,7 @@ def deduplicated_search_responses(hits: List[SearchResponse]):
"score": hit.score,
"corpus_id": hit.corpus_id,
"additional": {
"source": hit.additional["source"],
"file": hit.additional["file"],
"compiled": hit.additional["compiled"],
"heading": hit.additional["heading"],

View file

@ -7,6 +7,7 @@ app_env_filepath = "~/.khoj/env"
telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
content_directory = "~/.khoj/content/"
default_offline_chat_model = "mistral-7b-instruct-v0.1.Q4_0.gguf"
default_online_chat_model = "gpt-4"
empty_config = {
"search-type": {

View file

@ -7,7 +7,7 @@ from bs4 import BeautifulSoup
from khoj.utils.helpers import get_absolute_path, is_none_or_empty
from khoj.utils.rawconfig import TextContentConfig
from khoj.utils.config import SearchType
from database.models import LocalMarkdownConfig, LocalOrgConfig, LocalPdfConfig, LocalPlaintextConfig
from khoj.database.models import LocalMarkdownConfig, LocalOrgConfig, LocalPdfConfig, LocalPlaintextConfig
logger = logging.getLogger(__name__)

View file

@ -1,16 +1,17 @@
import logging
import os
from database.models import (
from khoj.database.models import (
KhojUser,
OfflineChatProcessorConversationConfig,
OpenAIProcessorConversationConfig,
ChatModelOptions,
)
from khoj.utils.constants import default_offline_chat_model
from khoj.utils.constants import default_offline_chat_model, default_online_chat_model
from khoj.processor.conversation.utils import model_to_prompt_size, model_to_tokenizer
from database.adapters import ConversationAdapters
from khoj.database.adapters import ConversationAdapters
logger = logging.getLogger(__name__)
@ -30,11 +31,6 @@ def initialization():
logger.info(
"🗣️ Configure chat models available to your server. You can always update these at /server/admin using the credentials of your admin account"
)
try:
# Some environments don't support interactive input. We catch the exception and return if that's the case. The admin can still configure their settings from the admin page.
input()
except EOFError:
return
try:
# Note: gpt4all package is not available on all devices.
@ -47,15 +43,27 @@ def initialization():
OfflineChatProcessorConversationConfig.objects.create(enabled=True)
offline_chat_model = input(
f"Enter the name of the offline chat model you want to use, based on the models in HuggingFace (press enter to use the default: {default_offline_chat_model}): "
f"Enter the offline chat model you want to use, See GPT4All for supported models (default: {default_offline_chat_model}): "
)
if offline_chat_model == "":
ChatModelOptions.objects.create(
chat_model=default_offline_chat_model, model_type=ChatModelOptions.ModelType.OFFLINE
)
else:
max_tokens = input("Enter the maximum number of tokens to use for the offline chat model:")
tokenizer = input("Enter the tokenizer to use for the offline chat model:")
default_max_tokens = model_to_prompt_size.get(offline_chat_model, 2000)
max_tokens = input(
f"Enter the maximum number of tokens to use for the offline chat model (default {default_max_tokens}):"
)
max_tokens = max_tokens or default_max_tokens
default_tokenizer = model_to_tokenizer.get(
offline_chat_model, "hf-internal-testing/llama-tokenizer"
)
tokenizer = input(
f"Enter the tokenizer to use for the offline chat model (default: {default_tokenizer}):"
)
tokenizer = tokenizer or default_tokenizer
ChatModelOptions.objects.create(
chat_model=offline_chat_model,
model_type=ChatModelOptions.ModelType.OFFLINE,
@ -71,10 +79,19 @@ def initialization():
logger.info("🗣️ Setting up OpenAI chat model")
api_key = input("Enter your OpenAI API key: ")
OpenAIProcessorConversationConfig.objects.create(api_key=api_key)
openai_chat_model = input("Enter the name of the OpenAI chat model you want to use: ")
max_tokens = input("Enter the maximum number of tokens to use for the OpenAI chat model:")
openai_chat_model = input(
f"Enter the OpenAI chat model you want to use (default: {default_online_chat_model}): "
)
openai_chat_model = openai_chat_model or default_online_chat_model
default_max_tokens = model_to_prompt_size.get(openai_chat_model, 2000)
max_tokens = input(
f"Enter the maximum number of tokens to use for the OpenAI chat model (default: {default_max_tokens}): "
)
max_tokens = max_tokens or default_max_tokens
ChatModelOptions.objects.create(
chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_tokens=max_tokens
chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_prompt_size=max_tokens
)
logger.info("🗣️ Chat model configuration complete")
@ -94,5 +111,8 @@ def initialization():
try:
_create_chat_configuration()
break
# Some environments don't support interactive input. We catch the exception and return if that's the case. The admin can still configure their settings from the admin page.
except EOFError:
return
except Exception as e:
logger.error(f"🚨 Failed to create chat configuration: {e}", exc_info=True)

View file

@ -72,6 +72,9 @@ class ImageSearchConfig(ConfigBase):
encoder_type: Optional[str] = None
model_directory: Optional[Path] = None
class Config:
protected_namespaces = ()
class SearchConfig(ConfigBase):
image: Optional[ImageSearchConfig] = None

View file

@ -1,48 +1,40 @@
# External Packages
import os
from fastapi.testclient import TestClient
from pathlib import Path
import pytest
from fastapi.staticfiles import StaticFiles
from fastapi import FastAPI
import os
from fastapi import FastAPI
import pytest
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.testclient import TestClient
# Internal Packages
from khoj.configure import configure_routes, configure_search_types, configure_middleware
from khoj.configure import configure_middleware, configure_routes, configure_search_types
from khoj.database.models import (
GithubConfig,
GithubRepoConfig,
KhojApiUser,
KhojUser,
LocalMarkdownConfig,
LocalOrgConfig,
LocalPlaintextConfig,
)
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries
from khoj.routers.indexer import configure_content
from khoj.search_type import image_search, text_search
from khoj.utils import fs_syncer, state
from khoj.utils.config import SearchModels
from khoj.utils.constants import web_directory
from khoj.utils.helpers import resolve_absolute_path
from khoj.utils.rawconfig import (
ContentConfig,
ImageContentConfig,
SearchConfig,
ImageSearchConfig,
)
from khoj.utils import state, fs_syncer
from khoj.routers.indexer import configure_content
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
from database.models import (
KhojApiUser,
LocalOrgConfig,
LocalMarkdownConfig,
LocalPlaintextConfig,
GithubConfig,
KhojUser,
GithubRepoConfig,
)
from khoj.utils.rawconfig import ContentConfig, ImageContentConfig, ImageSearchConfig, SearchConfig
from tests.helpers import (
UserFactory,
ChatModelOptionsFactory,
OpenAIProcessorConversationConfigFactory,
OfflineChatProcessorConversationConfigFactory,
UserConversationProcessorConfigFactory,
OpenAIProcessorConversationConfigFactory,
SubscriptionFactory,
UserConversationProcessorConfigFactory,
UserFactory,
)

View file

@ -1,7 +1,7 @@
import factory
import os
from database.models import (
from khoj.database.models import (
KhojUser,
KhojApiUser,
ChatModelOptions,

View file

@ -1,23 +1,23 @@
# Standard Modules
from io import BytesIO
from PIL import Image
from urllib.parse import quote
import pytest
from fastapi import FastAPI
# External Packages
from fastapi.testclient import TestClient
from fastapi import FastAPI
import pytest
from PIL import Image
# Internal Packages
from khoj.configure import configure_routes, configure_search_types
from khoj.database.adapters import EntryAdapters
from khoj.database.models import KhojApiUser, KhojUser
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
from khoj.search_type import image_search, text_search
from khoj.utils import state
from khoj.utils.state import search_models, content_index, config
from khoj.search_type import text_search, image_search
from khoj.utils.rawconfig import ContentConfig, SearchConfig
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
from database.models import KhojUser, KhojApiUser
from database.adapters import EntryAdapters
from khoj.utils.state import config, content_index, search_models
# Test

View file

@ -1,5 +1,6 @@
# Standard Packages
import urllib.parse
from urllib.parse import quote
# External Packages
import pytest
@ -54,6 +55,26 @@ def test_chat_with_no_chat_history_or_retrieved_content_gpt4all(client_offline_c
)
# ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality
@pytest.mark.django_db(transaction=True)
def test_chat_with_online_content(chat_client):
# Act
q = "/online give me the link to paul graham's essay how to do great work"
encoded_q = quote(q, safe="")
response = chat_client.get(f"/api/chat?q={encoded_q}&stream=true")
response_message = response.content.decode("utf-8")
response_message = response_message.split("### compiled references")[0]
# Assert
expected_responses = ["http://www.paulgraham.com/greatwork.html"]
assert response.status_code == 200
assert any([expected_response in response_message for expected_response in expected_responses]), (
"Expected assistants name, [K|k]hoj, in response but got: " + response_message
)
# ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality
@pytest.mark.django_db(transaction=True)

View file

@ -4,7 +4,7 @@ from pathlib import Path
import os
# Internal Packages
from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries
from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
from khoj.utils.fs_syncer import get_markdown_files
from khoj.utils.rawconfig import TextContentConfig

View file

@ -1,24 +1,14 @@
# Standard Modules
from io import BytesIO
from PIL import Image
from urllib.parse import quote
import pytest
# External Packages
from fastapi.testclient import TestClient
from fastapi import FastAPI, UploadFile
from io import BytesIO
import pytest
from khoj.database.models import KhojApiUser, KhojUser
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
# Internal Packages
from khoj.configure import configure_routes, configure_search_types
from khoj.utils import state
from khoj.utils.state import search_models, content_index, config
from khoj.search_type import text_search, image_search
from khoj.utils.rawconfig import ContentConfig, SearchConfig
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
from database.models import KhojUser, KhojApiUser
from database.adapters import EntryAdapters
from khoj.search_type import text_search
# ----------------------------------------------------------------------------------------------------

View file

@ -1,6 +1,7 @@
# Standard Packages
import os
import urllib.parse
from urllib.parse import quote
# External Packages
import pytest
@ -10,7 +11,7 @@ from khoj.processor.conversation import prompts
# Internal Packages
from khoj.processor.conversation.utils import message_to_log
from tests.helpers import ConversationFactory
from database.models import KhojUser
from khoj.database.models import KhojUser
# Initialize variables for tests
api_key = os.getenv("OPENAI_API_KEY")
@ -54,6 +55,26 @@ def test_chat_with_no_chat_history_or_retrieved_content(chat_client):
)
# ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality
@pytest.mark.django_db(transaction=True)
def test_chat_with_online_content(chat_client):
# Act
q = "/online give me the link to paul graham's essay how to do great work"
encoded_q = quote(q, safe="")
response = chat_client.get(f"/api/chat?q={encoded_q}&stream=true")
response_message = response.content.decode("utf-8")
response_message = response_message.split("### compiled references")[0]
# Assert
expected_responses = ["http://www.paulgraham.com/greatwork.html"]
assert response.status_code == 200
assert any([expected_response in response_message for expected_response in expected_responses]), (
"Expected assistants name, [K|k]hoj, in response but got: " + response_message
)
# ----------------------------------------------------------------------------------------------------
@pytest.mark.django_db(transaction=True)
@pytest.mark.chatquality

View file

@ -3,8 +3,8 @@ import json
import os
# Internal Packages
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
from khoj.processor.text_to_entries import TextToEntries
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
from khoj.processor.content.text_to_entries import TextToEntries
from khoj.utils.helpers import is_none_or_empty
from khoj.utils.rawconfig import Entry
from khoj.utils.fs_syncer import get_org_files

View file

@ -2,7 +2,7 @@
import datetime
# Internal Packages
from khoj.processor.data_sources.org_mode import orgnode
from khoj.processor.content.org_mode import orgnode
# Test

View file

@ -3,7 +3,7 @@ import json
import os
# Internal Packages
from khoj.processor.data_sources.pdf.pdf_to_entries import PdfToEntries
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
from khoj.utils.fs_syncer import get_pdf_files
from khoj.utils.rawconfig import TextContentConfig

View file

@ -3,11 +3,12 @@ import json
import os
from pathlib import Path
from khoj.database.models import KhojUser, LocalPlaintextConfig
from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
# Internal Packages
from khoj.utils.fs_syncer import get_plaintext_files
from khoj.utils.rawconfig import TextContentConfig
from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries
from database.models import LocalPlaintextConfig, KhojUser
def test_plaintext_file(tmp_path):

View file

@ -1,19 +1,20 @@
# System Packages
import logging
from pathlib import Path
import os
import asyncio
import logging
import os
from pathlib import Path
# External Packages
import pytest
from khoj.database.models import Entry, GithubConfig, KhojUser, LocalOrgConfig
from khoj.processor.content.github.github_to_entries import GithubToEntries
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
# Internal Packages
from khoj.search_type import text_search
from khoj.utils.rawconfig import ContentConfig, SearchConfig
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
from khoj.processor.data_sources.github.github_to_entries import GithubToEntries
from khoj.utils.fs_syncer import collect_files, get_org_files
from database.models import LocalOrgConfig, KhojUser, Entry, GithubConfig
from khoj.utils.rawconfig import ContentConfig, SearchConfig
logger = logging.getLogger(__name__)