mirror of
https://github.com/khoj-ai/khoj.git
synced 2025-02-17 08:04:21 +00:00
Merge with features/internet-enabled-search
This commit is contained in:
commit
256e8de40a
88 changed files with 365 additions and 316 deletions
2
.github/workflows/pypi.yml
vendored
2
.github/workflows/pypi.yml
vendored
|
@ -48,7 +48,7 @@ jobs:
|
|||
- name: 🌡️ Validate Python Package
|
||||
run: |
|
||||
# Validate PyPi Package
|
||||
pipx run check-wheel-contents dist/*.whl
|
||||
pipx run check-wheel-contents dist/*.whl --ignore W004
|
||||
pipx run twine check dist/*
|
||||
|
||||
- name: ⏫ Upload Python Package Artifacts
|
||||
|
|
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
|
@ -3,7 +3,7 @@ name: test
|
|||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- src/**
|
||||
- src/khoj/**
|
||||
- tests/**
|
||||
- config/**
|
||||
- pyproject.toml
|
||||
|
|
|
@ -17,7 +17,7 @@ RUN sed -i 's/dynamic = \["version"\]/version = "0.0.0"/' pyproject.toml && \
|
|||
COPY . .
|
||||
|
||||
# Set the PYTHONPATH environment variable in order for it to find the Django app.
|
||||
ENV PYTHONPATH=/app/src:$PYTHONPATH
|
||||
ENV PYTHONPATH=/app/src/khoj:$PYTHONPATH
|
||||
|
||||
# Run the Application
|
||||
# There are more arguments required for the application to run,
|
||||
|
|
|
@ -6,5 +6,5 @@
|
|||
"description": "An AI copilot for your Second Brain",
|
||||
"author": "Khoj Inc.",
|
||||
"authorUrl": "https://github.com/khoj-ai",
|
||||
"isDesktopOnly": true
|
||||
"isDesktopOnly": false
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ dependencies = [
|
|||
"tiktoken >= 0.3.2",
|
||||
"tenacity >= 8.2.2",
|
||||
"pillow == 9.3.0",
|
||||
"pydantic >= 1.10.10",
|
||||
"pydantic >= 2.0.0",
|
||||
"pyyaml == 6.0",
|
||||
"rich >= 13.3.1",
|
||||
"schedule == 1.1.0",
|
||||
|
@ -128,6 +128,9 @@ warn_unused_ignores = false
|
|||
[tool.black]
|
||||
line-length = 120
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = "--strict-markers"
|
||||
markers = [
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[pytest]
|
||||
DJANGO_SETTINGS_MODULE = app.settings
|
||||
DJANGO_SETTINGS_MODULE = khoj.app.settings
|
||||
pythonpath = . src
|
||||
testpaths = tests
|
||||
markers =
|
||||
|
|
|
@ -120,7 +120,8 @@
|
|||
// Create a new div for the chat message text and append it to the chat message
|
||||
let chatMessageText = document.createElement('div');
|
||||
chatMessageText.className = `chat-message-text ${by}`;
|
||||
chatMessageText.innerHTML = formattedMessage;
|
||||
let textNode = document.createTextNode(formattedMessage);
|
||||
chatMessageText.appendChild(textNode);
|
||||
chatMessage.appendChild(chatMessageText);
|
||||
|
||||
// Append annotations div to the chat message
|
||||
|
|
|
@ -112,14 +112,14 @@
|
|||
} else if (
|
||||
item.additional.file.endsWith(".md") ||
|
||||
item.additional.file.endsWith(".markdown") ||
|
||||
(item.additional.file.includes("issues") && item.additional.file.includes("github.com")) ||
|
||||
(item.additional.file.includes("commit") && item.additional.file.includes("github.com"))
|
||||
(item.additional.file.includes("issues") && item.additional.source === "github") ||
|
||||
(item.additional.file.includes("commit") && item.additional.source === "github")
|
||||
)
|
||||
{
|
||||
html += render_markdown(query, [item]);
|
||||
} else if (item.additional.file.endsWith(".pdf")) {
|
||||
html += render_pdf(query, [item]);
|
||||
} else if (item.additional.file.includes("notion.so")) {
|
||||
} else if (item.additional.source == "notion") {
|
||||
html += `<div class="results-notion">` + `<b><a href="${item.additional.file}">${item.additional.heading}</a></b>` + `<p>${item.entry}</p>` + `</div>`;
|
||||
} else if (item.additional.file.endsWith(".html")) {
|
||||
html += render_html(query, [item]);
|
||||
|
|
|
@ -6,5 +6,5 @@
|
|||
"description": "An AI copilot for your Second Brain",
|
||||
"author": "Khoj Inc.",
|
||||
"authorUrl": "https://github.com/khoj-ai",
|
||||
"isDesktopOnly": true
|
||||
"isDesktopOnly": false
|
||||
}
|
||||
|
|
|
@ -73,21 +73,19 @@ export default class Khoj extends Plugin {
|
|||
// Check if khoj backend is configured, note if cannot connect to backend
|
||||
let headers = { "Authorization": `Bearer ${this.settings.khojApiKey}` };
|
||||
|
||||
if (this.settings.khojUrl === "https://app.khoj.dev") {
|
||||
if (this.settings.khojApiKey === "") {
|
||||
new Notice(`❗️Khoj API key is not configured. Please visit https://app.khoj.dev/config#clients to get an API key.`);
|
||||
return;
|
||||
}
|
||||
|
||||
await request({ url: this.settings.khojUrl ,method: "GET", headers: headers })
|
||||
.then(response => {
|
||||
this.settings.connectedToBackend = true;
|
||||
})
|
||||
.catch(error => {
|
||||
this.settings.connectedToBackend = false;
|
||||
new Notice(`❗️Ensure Khoj backend is running and Khoj URL is pointing to it in the plugin settings.\n\n${error}`);
|
||||
});
|
||||
if (this.settings.khojApiKey === "" && this.settings.khojUrl === "https://app.khoj.dev") {
|
||||
new Notice(`❗️Khoj API key is not configured. Please visit https://app.khoj.dev/config#clients to get an API key.`);
|
||||
return;
|
||||
}
|
||||
|
||||
await request({ url: this.settings.khojUrl ,method: "GET", headers: headers })
|
||||
.then(response => {
|
||||
this.settings.connectedToBackend = true;
|
||||
})
|
||||
.catch(error => {
|
||||
this.settings.connectedToBackend = false;
|
||||
new Notice(`❗️Ensure Khoj backend is running and Khoj URL is pointing to it in the plugin settings.\n\n${error}`);
|
||||
});
|
||||
}
|
||||
|
||||
async saveSettings() {
|
||||
|
|
|
@ -87,27 +87,18 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
|
|||
}
|
||||
|
||||
async getSuggestions(query: string): Promise<SearchResult[]> {
|
||||
// Query Khoj backend for search results
|
||||
// Setup Query Khoj backend for search results
|
||||
let encodedQuery = encodeURIComponent(query);
|
||||
let searchUrl = `${this.setting.khojUrl}/api/search?q=${encodedQuery}&n=${this.setting.resultsCount}&r=${this.rerank}&client=obsidian`;
|
||||
let headers = { 'Authorization': `Bearer ${this.setting.khojApiKey}` }
|
||||
|
||||
// Get search results for markdown and pdf files
|
||||
let mdResponse = await request({ url: `${searchUrl}&t=markdown`, headers: headers });
|
||||
let pdfResponse = await request({ url: `${searchUrl}&t=pdf`, headers: headers });
|
||||
// Get search results from Khoj backend
|
||||
let response = await request({ url: `${searchUrl}`, headers: headers });
|
||||
|
||||
// Parse search results
|
||||
let mdData = JSON.parse(mdResponse)
|
||||
let results = JSON.parse(response)
|
||||
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
|
||||
.map((result: any) => { return { entry: result.entry, score: result.score, file: result.additional.file }; });
|
||||
let pdfData = JSON.parse(pdfResponse)
|
||||
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
|
||||
.map((result: any) => { return { entry: `## ${result.additional.compiled}`, score: result.score, file: result.additional.file } as SearchResult; })
|
||||
|
||||
// Combine markdown and PDF results and sort them by score
|
||||
let results = mdData.concat(pdfData)
|
||||
.sort((a: any, b: any) => b.score - a.score)
|
||||
.map((result: any) => { return { entry: result.entry, file: result.file } as SearchResult; })
|
||||
.map((result: any) => { return { entry: result.entry, file: result.additional.file } as SearchResult; });
|
||||
|
||||
this.query = query;
|
||||
return results;
|
||||
|
|
|
@ -14,7 +14,7 @@ from pathlib import Path
|
|||
import os
|
||||
|
||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent.parent
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
# Quick-start development settings - unsuitable for production
|
||||
|
@ -24,15 +24,15 @@ BASE_DIR = Path(__file__).resolve().parent.parent.parent
|
|||
SECRET_KEY = os.getenv("KHOJ_DJANGO_SECRET_KEY")
|
||||
|
||||
# SECURITY WARNING: don't run with debug turned on in production!
|
||||
DEBUG = os.getenv("KHOJ_DEBUG", "False") == "True"
|
||||
DEBUG = os.getenv("KHOJ_DEBUG") == "True"
|
||||
|
||||
ALLOWED_HOSTS = [".khoj.dev", "localhost", "127.0.0.1", "[::1]", "beta.khoj.dev"]
|
||||
# All Subdomains of KHOJ_DOMAIN are trusted
|
||||
KHOJ_DOMAIN = os.getenv("KHOJ_DOMAIN", "khoj.dev")
|
||||
ALLOWED_HOSTS = [f".{KHOJ_DOMAIN}", "localhost", "127.0.0.1", "[::1]"]
|
||||
|
||||
CSRF_TRUSTED_ORIGINS = [
|
||||
"https://app.khoj.dev",
|
||||
"https://beta.khoj.dev",
|
||||
"https://khoj.dev",
|
||||
"https://*.khoj.dev",
|
||||
f"https://*.{KHOJ_DOMAIN}",
|
||||
f"https://{KHOJ_DOMAIN}",
|
||||
]
|
||||
|
||||
COOKIE_SAMESITE = "None"
|
||||
|
@ -40,8 +40,8 @@ if DEBUG:
|
|||
SESSION_COOKIE_DOMAIN = "localhost"
|
||||
CSRF_COOKIE_DOMAIN = "localhost"
|
||||
else:
|
||||
SESSION_COOKIE_DOMAIN = "khoj.dev"
|
||||
CSRF_COOKIE_DOMAIN = "khoj.dev"
|
||||
SESSION_COOKIE_DOMAIN = KHOJ_DOMAIN
|
||||
CSRF_COOKIE_DOMAIN = KHOJ_DOMAIN
|
||||
|
||||
SESSION_COOKIE_SECURE = True
|
||||
CSRF_COOKIE_SECURE = True
|
||||
|
@ -53,7 +53,7 @@ SESSION_COOKIE_SAMESITE = "None"
|
|||
INSTALLED_APPS = [
|
||||
"django.contrib.auth",
|
||||
"django.contrib.contenttypes",
|
||||
"database.apps.DatabaseConfig",
|
||||
"khoj.database.apps.DatabaseConfig",
|
||||
"django.contrib.admin",
|
||||
"django.contrib.sessions",
|
||||
"django.contrib.messages",
|
||||
|
@ -143,7 +143,7 @@ USE_TZ = True
|
|||
# https://docs.djangoproject.com/en/4.2/howto/static-files/
|
||||
|
||||
STATIC_ROOT = BASE_DIR / "static"
|
||||
STATICFILES_DIRS = [BASE_DIR / "src/khoj/interface/web"]
|
||||
STATICFILES_DIRS = [BASE_DIR / "interface/web"]
|
||||
STATIC_URL = "/static/"
|
||||
|
||||
# Default primary key field type
|
|
@ -20,8 +20,8 @@ from starlette.authentication import (
|
|||
)
|
||||
|
||||
# Internal Packages
|
||||
from database.models import KhojUser, Subscription
|
||||
from database.adapters import get_all_users, get_or_create_search_model
|
||||
from khoj.database.models import KhojUser, Subscription
|
||||
from khoj.database.adapters import get_all_users, get_or_create_search_model
|
||||
from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
|
||||
from khoj.routers.indexer import configure_content, load_content, configure_search
|
||||
from khoj.utils import constants, state
|
||||
|
@ -45,7 +45,7 @@ class UserAuthenticationBackend(AuthenticationBackend):
|
|||
def __init__(
|
||||
self,
|
||||
):
|
||||
from database.models import KhojUser, KhojApiUser
|
||||
from khoj.database.models import KhojUser, KhojApiUser
|
||||
|
||||
self.khojuser_manager = KhojUser.objects
|
||||
self.khojapiuser_manager = KhojApiUser.objects
|
||||
|
|
|
@ -1,32 +1,29 @@
|
|||
import math
|
||||
from typing import Optional, Type, List
|
||||
from datetime import date, datetime
|
||||
import secrets
|
||||
from typing import Type, List
|
||||
from datetime import date, timezone
|
||||
from datetime import date, datetime, timezone
|
||||
from typing import List, Optional, Type
|
||||
|
||||
from django.db import models
|
||||
from asgiref.sync import sync_to_async
|
||||
from django.contrib.sessions.backends.db import SessionStore
|
||||
from pgvector.django import CosineDistance
|
||||
from django.db.models.manager import BaseManager
|
||||
from django.db import models
|
||||
from django.db.models import Q
|
||||
from django.db.models.manager import BaseManager
|
||||
from fastapi import HTTPException
|
||||
from pgvector.django import CosineDistance
|
||||
from torch import Tensor
|
||||
|
||||
# Import sync_to_async from Django Channels
|
||||
from asgiref.sync import sync_to_async
|
||||
|
||||
from fastapi import HTTPException
|
||||
|
||||
from database.models import (
|
||||
KhojUser,
|
||||
from khoj.database.models import (
|
||||
ChatModelOptions,
|
||||
Conversation,
|
||||
Entry,
|
||||
GithubConfig,
|
||||
GithubRepoConfig,
|
||||
GoogleUser,
|
||||
KhojApiUser,
|
||||
KhojUser,
|
||||
NotionConfig,
|
||||
GithubConfig,
|
||||
Entry,
|
||||
GithubRepoConfig,
|
||||
Conversation,
|
||||
ChatModelOptions,
|
||||
OfflineChatProcessorConversationConfig,
|
||||
OpenAIProcessorConversationConfig,
|
||||
SearchModelConfig,
|
||||
Subscription,
|
||||
UserConversationConfig,
|
||||
|
@ -34,12 +31,12 @@ from database.models import (
|
|||
OfflineChatProcessorConversationConfig,
|
||||
ReflectiveQuestion,
|
||||
)
|
||||
from khoj.utils.helpers import generate_random_name
|
||||
from khoj.search_filter.date_filter import DateFilter
|
||||
from khoj.search_filter.file_filter import FileFilter
|
||||
from khoj.search_filter.word_filter import WordFilter
|
||||
from khoj.utils import state
|
||||
from khoj.utils.config import GPT4AllProcessorModel
|
||||
from khoj.search_filter.word_filter import WordFilter
|
||||
from khoj.search_filter.file_filter import FileFilter
|
||||
from khoj.search_filter.date_filter import DateFilter
|
||||
from khoj.utils.helpers import generate_random_name
|
||||
|
||||
|
||||
async def set_notion_config(token: str, user: KhojUser):
|
|
@ -3,7 +3,7 @@ from django.contrib.auth.admin import UserAdmin
|
|||
|
||||
# Register your models here.
|
||||
|
||||
from database.models import (
|
||||
from khoj.database.models import (
|
||||
KhojUser,
|
||||
ChatModelOptions,
|
||||
OpenAIProcessorConversationConfig,
|
|
@ -3,4 +3,4 @@ from django.apps import AppConfig
|
|||
|
||||
class DatabaseConfig(AppConfig):
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "database"
|
||||
name = "khoj.database"
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
from django.db import migrations
|
||||
|
||||
from typing import List, Any
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
|
@ -9,4 +11,4 @@ class Migration(migrations.Migration):
|
|||
("database", "0010_rename_embeddings_entry_and_more"),
|
||||
]
|
||||
|
||||
operations = []
|
||||
operations: List[Any] = []
|
|
@ -112,14 +112,14 @@
|
|||
} else if (
|
||||
item.additional.file.endsWith(".md") ||
|
||||
item.additional.file.endsWith(".markdown") ||
|
||||
(item.additional.file.includes("issues") && item.additional.file.includes("github.com")) ||
|
||||
(item.additional.file.includes("commit") && item.additional.file.includes("github.com"))
|
||||
(item.additional.file.includes("issues") && item.additional.source === "github") ||
|
||||
(item.additional.file.includes("commit") && item.additional.source === "github")
|
||||
)
|
||||
{
|
||||
html += render_markdown(query, [item]);
|
||||
} else if (item.additional.file.endsWith(".pdf")) {
|
||||
html += render_pdf(query, [item]);
|
||||
} else if (item.additional.file.includes("notion.so")) {
|
||||
} else if (item.additional.source === "notion") {
|
||||
html += `<div class="results-notion">` + `<b><a href="${item.additional.file}">${item.additional.heading}</a></b>` + `<p>${item.entry}</p>` + `</div>`;
|
||||
} else if (item.additional.file.endsWith(".html")) {
|
||||
html += render_html(query, [item]);
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
""" Main module for Khoj Assistant
|
||||
isort:skip_file
|
||||
"""
|
||||
|
||||
# Standard Packages
|
||||
from contextlib import redirect_stdout
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import locale
|
||||
|
@ -25,14 +31,18 @@ from django.core.asgi import get_asgi_application
|
|||
from django.core.management import call_command
|
||||
|
||||
# Initialize Django
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "app.settings")
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "khoj.app.settings")
|
||||
django.setup()
|
||||
|
||||
# Initialize Django Database
|
||||
call_command("migrate", "--noinput")
|
||||
db_migrate_output = io.StringIO()
|
||||
with redirect_stdout(db_migrate_output):
|
||||
call_command("migrate", "--noinput")
|
||||
|
||||
# Initialize Django Static Files
|
||||
call_command("collectstatic", "--noinput")
|
||||
collectstatic_output = io.StringIO()
|
||||
with redirect_stdout(collectstatic_output):
|
||||
call_command("collectstatic", "--noinput")
|
||||
|
||||
# Initialize the Application Server
|
||||
app = FastAPI()
|
||||
|
@ -41,9 +51,16 @@ app = FastAPI()
|
|||
django_app = get_asgi_application()
|
||||
|
||||
# Add CORS middleware
|
||||
KHOJ_DOMAIN = os.getenv("KHOJ_DOMAIN", "app.khoj.dev")
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["app://obsidian.md", "http://localhost:*", "https://app.khoj.dev/*", "app://khoj.dev"],
|
||||
allow_origins=[
|
||||
"app://obsidian.md",
|
||||
"http://localhost:*",
|
||||
"http://127.0.0.1:*",
|
||||
f"https://{KHOJ_DOMAIN}",
|
||||
"app://khoj.dev",
|
||||
],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
|
@ -75,14 +92,16 @@ def run(should_start_server=True):
|
|||
args = cli(state.cli_args)
|
||||
set_state(args)
|
||||
|
||||
logger.info(f"🚒 Initializing Khoj v{state.khoj_version}")
|
||||
|
||||
# Set Logging Level
|
||||
if args.verbose == 0:
|
||||
logger.setLevel(logging.INFO)
|
||||
elif args.verbose >= 1:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
logger.info(f"🚒 Initializing Khoj v{state.khoj_version}")
|
||||
logger.info(f"📦 Initializing DB:\n{db_migrate_output.getvalue().strip()}")
|
||||
logger.debug(f"🌍 Initializing Web Client:\n{collectstatic_output.getvalue().strip()}")
|
||||
|
||||
initialization()
|
||||
|
||||
# Create app directory, if it doesn't exist
|
||||
|
@ -103,10 +122,10 @@ def run(should_start_server=True):
|
|||
|
||||
# Mount Django and Static Files
|
||||
app.mount("/server", django_app, name="server")
|
||||
static_dir = "static"
|
||||
static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
|
||||
if not os.path.exists(static_dir):
|
||||
os.mkdir(static_dir)
|
||||
app.mount(f"/{static_dir}", StaticFiles(directory=static_dir), name=static_dir)
|
||||
app.mount(f"/static", StaticFiles(directory=static_dir), name=static_dir)
|
||||
|
||||
# Configure Middleware
|
||||
configure_middleware(app)
|
||||
|
|
|
@ -6,7 +6,7 @@ import sys
|
|||
|
||||
def main():
|
||||
"""Run administrative tasks."""
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "app.settings")
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "khoj.app.settings")
|
||||
try:
|
||||
from django.core.management import execute_from_command_line
|
||||
except ImportError as exc:
|
|
@ -60,7 +60,7 @@ import logging
|
|||
from packaging import version
|
||||
|
||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
||||
from database.models import (
|
||||
from khoj.database.models import (
|
||||
OpenAIProcessorConversationConfig,
|
||||
OfflineChatProcessorConversationConfig,
|
||||
ChatModelOptions,
|
||||
|
|
|
@ -2,19 +2,20 @@
|
|||
import logging
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Union, Tuple
|
||||
from typing import Dict, List, Tuple, Union
|
||||
|
||||
# External Packages
|
||||
import requests
|
||||
|
||||
from khoj.database.models import Entry as DbEntry
|
||||
from khoj.database.models import GithubConfig, KhojUser
|
||||
from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
|
||||
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
|
||||
from khoj.processor.content.text_to_entries import TextToEntries
|
||||
|
||||
# Internal Packages
|
||||
from khoj.utils.helpers import timer
|
||||
from khoj.utils.rawconfig import Entry, GithubContentConfig, GithubRepoConfig
|
||||
from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries
|
||||
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
|
||||
from khoj.processor.text_to_entries import TextToEntries
|
||||
from database.models import Entry as DbEntry, GithubConfig, KhojUser
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -1,17 +1,19 @@
|
|||
# Standard Packages
|
||||
import logging
|
||||
import re
|
||||
import urllib3
|
||||
from pathlib import Path
|
||||
from typing import Tuple, List
|
||||
from typing import List, Tuple
|
||||
|
||||
import urllib3
|
||||
|
||||
from khoj.database.models import Entry as DbEntry
|
||||
from khoj.database.models import KhojUser
|
||||
|
||||
# Internal Packages
|
||||
from khoj.processor.text_to_entries import TextToEntries
|
||||
from khoj.utils.helpers import timer
|
||||
from khoj.processor.content.text_to_entries import TextToEntries
|
||||
from khoj.utils.constants import empty_escape_sequences
|
||||
from khoj.utils.helpers import timer
|
||||
from khoj.utils.rawconfig import Entry
|
||||
from database.models import Entry as DbEntry, KhojUser
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -1,19 +1,18 @@
|
|||
# Standard Packages
|
||||
import logging
|
||||
from enum import Enum
|
||||
from typing import Tuple
|
||||
|
||||
# External Packages
|
||||
import requests
|
||||
|
||||
from khoj.database.models import Entry as DbEntry
|
||||
from khoj.database.models import KhojUser, NotionConfig
|
||||
from khoj.processor.content.text_to_entries import TextToEntries
|
||||
|
||||
# Internal Packages
|
||||
from khoj.utils.helpers import timer
|
||||
from khoj.utils.rawconfig import Entry, NotionContentConfig
|
||||
from khoj.processor.text_to_entries import TextToEntries
|
||||
from khoj.utils.rawconfig import Entry
|
||||
from database.models import Entry as DbEntry, KhojUser, NotionConfig
|
||||
|
||||
from enum import Enum
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -3,14 +3,15 @@ import logging
|
|||
from pathlib import Path
|
||||
from typing import Iterable, List, Tuple
|
||||
|
||||
from khoj.database.models import Entry as DbEntry
|
||||
from khoj.database.models import KhojUser
|
||||
|
||||
# Internal Packages
|
||||
from khoj.processor.data_sources.org_mode import orgnode
|
||||
from khoj.processor.text_to_entries import TextToEntries
|
||||
from khoj.processor.content.org_mode import orgnode
|
||||
from khoj.processor.content.text_to_entries import TextToEntries
|
||||
from khoj.utils import state
|
||||
from khoj.utils.helpers import timer
|
||||
from khoj.utils.rawconfig import Entry
|
||||
from khoj.utils import state
|
||||
from database.models import Entry as DbEntry, KhojUser
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -1,18 +1,19 @@
|
|||
# Standard Packages
|
||||
import os
|
||||
import logging
|
||||
from typing import List, Tuple
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
from typing import List, Tuple
|
||||
|
||||
# External Packages
|
||||
from langchain.document_loaders import PyMuPDFLoader
|
||||
|
||||
from khoj.database.models import Entry as DbEntry
|
||||
from khoj.database.models import KhojUser
|
||||
|
||||
# Internal Packages
|
||||
from khoj.processor.text_to_entries import TextToEntries
|
||||
from khoj.processor.content.text_to_entries import TextToEntries
|
||||
from khoj.utils.helpers import timer
|
||||
from khoj.utils.rawconfig import Entry
|
||||
from database.models import Entry as DbEntry, KhojUser
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -2,15 +2,16 @@
|
|||
import logging
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from khoj.database.models import Entry as DbEntry
|
||||
from khoj.database.models import KhojUser
|
||||
|
||||
# Internal Packages
|
||||
from khoj.processor.text_to_entries import TextToEntries
|
||||
from khoj.processor.content.text_to_entries import TextToEntries
|
||||
from khoj.utils.helpers import timer
|
||||
from khoj.utils.rawconfig import Entry
|
||||
from database.models import Entry as DbEntry, KhojUser
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -13,8 +13,8 @@ from khoj.utils.helpers import is_none_or_empty, timer, batcher
|
|||
# Internal Packages
|
||||
from khoj.utils.rawconfig import Entry
|
||||
from khoj.search_filter.date_filter import DateFilter
|
||||
from database.models import KhojUser, Entry as DbEntry, EntryDates
|
||||
from database.adapters import EntryAdapters
|
||||
from khoj.database.models import KhojUser, Entry as DbEntry, EntryDates
|
||||
from khoj.database.adapters import EntryAdapters
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
|
@ -10,7 +10,7 @@ You are Khoj, a smart, inquisitive and helpful personal assistant.
|
|||
Use your general knowledge and the past conversation with the user as context to inform your responses.
|
||||
You were created by Khoj Inc. with the following capabilities:
|
||||
|
||||
- You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you. They can share files with you using the Khoj desktop application.
|
||||
- You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you. They can share files with you using any Khoj client, including the native Desktop app, the Obsidian or Emacs plugins, or the web app.
|
||||
- You cannot set reminders.
|
||||
- Say "I don't know" or "I don't understand" if you don't know what to say or if you don't know the answer to a question.
|
||||
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided notes or past conversations.
|
||||
|
@ -128,8 +128,9 @@ The user has a question which you can use the internet to respond to. Can you br
|
|||
Today's date in UTC: {current_date}
|
||||
|
||||
Here are some examples of questions and subqueries:
|
||||
Q: What is the weather like in New York?
|
||||
A: ["weather in new york"]
|
||||
|
||||
Q: Posts about vector databases on Hacker News
|
||||
A: ["site:"news.ycombinator.com vector database"]
|
||||
|
||||
Q: What is the weather like in New York and San Francisco?
|
||||
A: ["weather in new york", "weather in san francisco"]
|
||||
|
|
|
@ -1,64 +1,63 @@
|
|||
# Standard Packages
|
||||
import concurrent.futures
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import time
|
||||
import logging
|
||||
import json
|
||||
from typing import List, Optional, Union, Any, Dict
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from asgiref.sync import sync_to_async
|
||||
|
||||
# External Packages
|
||||
from fastapi import APIRouter, Depends, HTTPException, Header, Request
|
||||
from fastapi import APIRouter, Depends, Header, HTTPException, Request
|
||||
from fastapi.requests import Request
|
||||
from fastapi.responses import Response, StreamingResponse
|
||||
from starlette.authentication import requires
|
||||
from asgiref.sync import sync_to_async
|
||||
|
||||
# Internal Packages
|
||||
from khoj.configure import configure_server
|
||||
from khoj.search_type import image_search, text_search
|
||||
from khoj.search_filter.date_filter import DateFilter
|
||||
from khoj.search_filter.file_filter import FileFilter
|
||||
from khoj.search_filter.word_filter import WordFilter
|
||||
from khoj.utils.config import TextSearchModel, GPT4AllProcessorModel
|
||||
from khoj.utils.helpers import ConversationCommand, is_none_or_empty, timer, command_descriptions
|
||||
from khoj.utils.rawconfig import (
|
||||
FullConfig,
|
||||
SearchConfig,
|
||||
SearchResponse,
|
||||
GithubContentConfig,
|
||||
NotionContentConfig,
|
||||
)
|
||||
from khoj.utils.state import SearchType
|
||||
from khoj.utils import state, constants
|
||||
from khoj.utils.helpers import AsyncIteratorWrapper, get_device
|
||||
from fastapi.responses import StreamingResponse, Response
|
||||
from khoj.routers.helpers import (
|
||||
CommonQueryParams,
|
||||
get_conversation_command,
|
||||
validate_conversation_config,
|
||||
agenerate_chat_response,
|
||||
update_telemetry_state,
|
||||
is_ready_to_chat,
|
||||
ApiUserRateLimiter,
|
||||
)
|
||||
from khoj.processor.conversation.prompts import help_message, no_entries_found
|
||||
from khoj.processor.conversation.openai.gpt import extract_questions
|
||||
from khoj.processor.conversation.gpt4all.chat_model import extract_questions_offline
|
||||
from khoj.processor.tools.online_search import search_with_google
|
||||
from fastapi.requests import Request
|
||||
|
||||
from database import adapters
|
||||
from database.adapters import EntryAdapters, ConversationAdapters
|
||||
from database.models import (
|
||||
from khoj.database import adapters
|
||||
from khoj.database.adapters import ConversationAdapters, EntryAdapters
|
||||
from khoj.database.models import ChatModelOptions
|
||||
from khoj.database.models import Entry as DbEntry
|
||||
from khoj.database.models import (
|
||||
GithubConfig,
|
||||
KhojUser,
|
||||
LocalMarkdownConfig,
|
||||
LocalOrgConfig,
|
||||
LocalPdfConfig,
|
||||
LocalPlaintextConfig,
|
||||
KhojUser,
|
||||
Entry as DbEntry,
|
||||
GithubConfig,
|
||||
NotionConfig,
|
||||
ChatModelOptions,
|
||||
)
|
||||
|
||||
from khoj.processor.conversation.gpt4all.chat_model import extract_questions_offline
|
||||
from khoj.processor.conversation.openai.gpt import extract_questions
|
||||
from khoj.processor.conversation.prompts import help_message, no_entries_found
|
||||
from khoj.processor.tools.online_search import search_with_google
|
||||
from khoj.routers.helpers import (
|
||||
ApiUserRateLimiter,
|
||||
CommonQueryParams,
|
||||
agenerate_chat_response,
|
||||
get_conversation_command,
|
||||
is_ready_to_chat,
|
||||
update_telemetry_state,
|
||||
validate_conversation_config,
|
||||
)
|
||||
from khoj.search_filter.date_filter import DateFilter
|
||||
from khoj.search_filter.file_filter import FileFilter
|
||||
from khoj.search_filter.word_filter import WordFilter
|
||||
from khoj.search_type import image_search, text_search
|
||||
from khoj.utils import constants, state
|
||||
from khoj.utils.config import GPT4AllProcessorModel, TextSearchModel
|
||||
from khoj.utils.helpers import (
|
||||
AsyncIteratorWrapper,
|
||||
ConversationCommand,
|
||||
command_descriptions,
|
||||
get_device,
|
||||
is_none_or_empty,
|
||||
timer,
|
||||
)
|
||||
from khoj.utils.rawconfig import FullConfig, GithubContentConfig, NotionContentConfig, SearchConfig, SearchResponse
|
||||
from khoj.utils.state import SearchType
|
||||
|
||||
# Initialize Router
|
||||
api = APIRouter()
|
||||
|
|
|
@ -15,8 +15,8 @@ from google.oauth2 import id_token
|
|||
from google.auth.transport import requests as google_requests
|
||||
|
||||
# Internal Packages
|
||||
from database.adapters import get_khoj_tokens, get_or_create_user, create_khoj_token, delete_khoj_token
|
||||
from database.models import KhojApiUser
|
||||
from khoj.database.adapters import get_khoj_tokens, get_or_create_user, create_khoj_token, delete_khoj_token
|
||||
from khoj.database.models import KhojApiUser
|
||||
from khoj.routers.helpers import update_telemetry_state
|
||||
from khoj.utils import state
|
||||
|
||||
|
|
|
@ -1,31 +1,28 @@
|
|||
# Standard Packages
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import datetime
|
||||
from functools import partial
|
||||
import logging
|
||||
from time import time
|
||||
import json
|
||||
from typing import Annotated, Iterator, List, Optional, Union, Tuple, Dict, Any
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from khoj.processor.conversation import prompts
|
||||
from typing import Annotated, Any, Dict, Iterator, List, Optional, Tuple, Union
|
||||
|
||||
# External Packages
|
||||
from fastapi import HTTPException, Header, Request, Depends
|
||||
from fastapi import Depends, Header, HTTPException, Request
|
||||
|
||||
from khoj.database.adapters import ConversationAdapters
|
||||
from khoj.database.models import KhojUser, Subscription
|
||||
from khoj.processor.conversation import prompts
|
||||
from khoj.processor.conversation.gpt4all.chat_model import converse_offline, send_message_to_model_offline
|
||||
from khoj.processor.conversation.openai.gpt import converse, send_message_to_model
|
||||
from khoj.processor.conversation.utils import ThreadedGenerator, message_to_log
|
||||
|
||||
# Internal Packages
|
||||
from khoj.utils import state
|
||||
from khoj.utils.config import GPT4AllProcessorModel
|
||||
from khoj.utils.helpers import ConversationCommand, log_telemetry
|
||||
from khoj.processor.conversation.openai.gpt import converse, send_message_to_model
|
||||
from khoj.processor.conversation.gpt4all.chat_model import converse_offline, send_message_to_model_offline
|
||||
from khoj.processor.conversation.utils import message_to_log, ThreadedGenerator
|
||||
from database.models import KhojUser, Subscription, ChatModelOptions
|
||||
from database.adapters import ConversationAdapters
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
@ -1,40 +1,25 @@
|
|||
# Standard Packages
|
||||
import logging
|
||||
from typing import Optional, Union, Dict
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Dict, Optional, Union
|
||||
|
||||
# External Packages
|
||||
from fastapi import APIRouter, Header, Request, Response, UploadFile
|
||||
from pydantic import BaseModel
|
||||
from starlette.authentication import requires
|
||||
|
||||
# Internal Packages
|
||||
from khoj.utils import state, constants
|
||||
from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries
|
||||
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
|
||||
from khoj.processor.data_sources.pdf.pdf_to_entries import PdfToEntries
|
||||
from khoj.processor.data_sources.github.github_to_entries import GithubToEntries
|
||||
from khoj.processor.data_sources.notion.notion_to_entries import NotionToEntries
|
||||
from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries
|
||||
from khoj.search_type import text_search, image_search
|
||||
from khoj.database.models import GithubConfig, KhojUser, NotionConfig
|
||||
from khoj.processor.content.github.github_to_entries import GithubToEntries
|
||||
from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
|
||||
from khoj.processor.content.notion.notion_to_entries import NotionToEntries
|
||||
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
|
||||
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
|
||||
from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
|
||||
from khoj.routers.helpers import update_telemetry_state
|
||||
from khoj.utils.yaml import save_config_to_file_updated_state
|
||||
from khoj.utils.config import SearchModels
|
||||
from khoj.search_type import image_search, text_search
|
||||
from khoj.utils import constants, state
|
||||
from khoj.utils.config import ContentIndex, SearchModels
|
||||
from khoj.utils.helpers import LRU, get_file_type
|
||||
from khoj.utils.rawconfig import (
|
||||
ContentConfig,
|
||||
FullConfig,
|
||||
SearchConfig,
|
||||
)
|
||||
from khoj.utils.config import (
|
||||
ContentIndex,
|
||||
SearchModels,
|
||||
)
|
||||
from database.models import (
|
||||
KhojUser,
|
||||
GithubConfig,
|
||||
NotionConfig,
|
||||
)
|
||||
from khoj.utils.rawconfig import ContentConfig, FullConfig, SearchConfig
|
||||
from khoj.utils.yaml import save_config_to_file_updated_state
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -189,6 +174,9 @@ def configure_content(
|
|||
content_index = ContentIndex()
|
||||
|
||||
success = True
|
||||
if t == None:
|
||||
t = state.SearchType.All
|
||||
|
||||
if t is not None and t in [type.value for type in state.SearchType]:
|
||||
t = state.SearchType(t)
|
||||
|
||||
|
@ -315,7 +303,7 @@ def configure_content(
|
|||
# Initialize Notion Search
|
||||
notion_config = NotionConfig.objects.filter(user=user).first()
|
||||
if (
|
||||
search_type == state.SearchType.All.value or search_type in state.SearchType.Notion.value
|
||||
search_type == state.SearchType.All.value or search_type == state.SearchType.Notion.value
|
||||
) and notion_config:
|
||||
logger.info("🔌 Setting up search for notion")
|
||||
text_search.setup(
|
||||
|
@ -328,7 +316,7 @@ def configure_content(
|
|||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"🚨 Failed to setup GitHub: {e}", exc_info=True)
|
||||
logger.error(f"🚨 Failed to setup Notion: {e}", exc_info=True)
|
||||
success = False
|
||||
|
||||
# Invalidate Query Cache
|
||||
|
|
|
@ -10,7 +10,7 @@ from starlette.authentication import requires
|
|||
import stripe
|
||||
|
||||
# Internal Packages
|
||||
from database import adapters
|
||||
from khoj.database import adapters
|
||||
|
||||
|
||||
# Stripe integration for Khoj Cloud Subscription
|
||||
|
|
|
@ -8,8 +8,8 @@ from fastapi import Request
|
|||
from fastapi.responses import HTMLResponse, FileResponse, RedirectResponse
|
||||
from fastapi.templating import Jinja2Templates
|
||||
from starlette.authentication import requires
|
||||
from database import adapters
|
||||
from database.models import KhojUser
|
||||
from khoj.database import adapters
|
||||
from khoj.database.models import KhojUser
|
||||
from khoj.utils.rawconfig import (
|
||||
GithubContentConfig,
|
||||
GithubRepoConfig,
|
||||
|
@ -18,7 +18,7 @@ from khoj.utils.rawconfig import (
|
|||
|
||||
# Internal Packages
|
||||
from khoj.utils import constants, state
|
||||
from database.adapters import (
|
||||
from khoj.database.adapters import (
|
||||
EntryAdapters,
|
||||
get_user_github_config,
|
||||
get_user_notion_config,
|
||||
|
|
|
@ -12,7 +12,6 @@ from sentence_transformers import SentenceTransformer, util
|
|||
from PIL import Image
|
||||
from tqdm import trange
|
||||
import torch
|
||||
from khoj.utils import state
|
||||
|
||||
# Internal Packages
|
||||
from khoj.utils.helpers import get_absolute_path, get_from_dict, resolve_absolute_path, load_model, timer
|
||||
|
@ -26,9 +25,6 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
def initialize_model(search_config: ImageSearchConfig):
|
||||
# Initialize Model
|
||||
torch.set_num_threads(4)
|
||||
|
||||
# Convert model directory to absolute path
|
||||
search_config.model_directory = resolve_absolute_path(search_config.model_directory)
|
||||
|
||||
|
|
|
@ -18,9 +18,9 @@ from khoj.utils.models import BaseEncoder
|
|||
from khoj.utils.state import SearchType
|
||||
from khoj.utils.rawconfig import SearchResponse, Entry
|
||||
from khoj.utils.jsonl import load_jsonl
|
||||
from khoj.processor.text_to_entries import TextToEntries
|
||||
from database.adapters import EntryAdapters
|
||||
from database.models import KhojUser, Entry as DbEntry
|
||||
from khoj.processor.content.text_to_entries import TextToEntries
|
||||
from khoj.database.adapters import EntryAdapters
|
||||
from khoj.database.models import KhojUser, Entry as DbEntry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -141,12 +141,13 @@ def collate_results(hits, dedupe=True):
|
|||
|
||||
else:
|
||||
hit_ids.add(hit.corpus_id)
|
||||
yield SearchResponse.parse_obj(
|
||||
yield SearchResponse.model_validate(
|
||||
{
|
||||
"entry": hit.raw,
|
||||
"score": hit.distance,
|
||||
"corpus_id": str(hit.corpus_id),
|
||||
"additional": {
|
||||
"source": hit.file_source,
|
||||
"file": hit.file_path,
|
||||
"compiled": hit.compiled,
|
||||
"heading": hit.heading,
|
||||
|
@ -169,6 +170,7 @@ def deduplicated_search_responses(hits: List[SearchResponse]):
|
|||
"score": hit.score,
|
||||
"corpus_id": hit.corpus_id,
|
||||
"additional": {
|
||||
"source": hit.additional["source"],
|
||||
"file": hit.additional["file"],
|
||||
"compiled": hit.additional["compiled"],
|
||||
"heading": hit.additional["heading"],
|
||||
|
|
|
@ -7,6 +7,7 @@ app_env_filepath = "~/.khoj/env"
|
|||
telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
|
||||
content_directory = "~/.khoj/content/"
|
||||
default_offline_chat_model = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
||||
default_online_chat_model = "gpt-4"
|
||||
|
||||
empty_config = {
|
||||
"search-type": {
|
||||
|
|
|
@ -7,7 +7,7 @@ from bs4 import BeautifulSoup
|
|||
from khoj.utils.helpers import get_absolute_path, is_none_or_empty
|
||||
from khoj.utils.rawconfig import TextContentConfig
|
||||
from khoj.utils.config import SearchType
|
||||
from database.models import LocalMarkdownConfig, LocalOrgConfig, LocalPdfConfig, LocalPlaintextConfig
|
||||
from khoj.database.models import LocalMarkdownConfig, LocalOrgConfig, LocalPdfConfig, LocalPlaintextConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
@ -1,16 +1,17 @@
|
|||
import logging
|
||||
import os
|
||||
|
||||
from database.models import (
|
||||
from khoj.database.models import (
|
||||
KhojUser,
|
||||
OfflineChatProcessorConversationConfig,
|
||||
OpenAIProcessorConversationConfig,
|
||||
ChatModelOptions,
|
||||
)
|
||||
|
||||
from khoj.utils.constants import default_offline_chat_model
|
||||
from khoj.utils.constants import default_offline_chat_model, default_online_chat_model
|
||||
from khoj.processor.conversation.utils import model_to_prompt_size, model_to_tokenizer
|
||||
|
||||
from database.adapters import ConversationAdapters
|
||||
from khoj.database.adapters import ConversationAdapters
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -30,11 +31,6 @@ def initialization():
|
|||
logger.info(
|
||||
"🗣️ Configure chat models available to your server. You can always update these at /server/admin using the credentials of your admin account"
|
||||
)
|
||||
try:
|
||||
# Some environments don't support interactive input. We catch the exception and return if that's the case. The admin can still configure their settings from the admin page.
|
||||
input()
|
||||
except EOFError:
|
||||
return
|
||||
|
||||
try:
|
||||
# Note: gpt4all package is not available on all devices.
|
||||
|
@ -47,15 +43,27 @@ def initialization():
|
|||
OfflineChatProcessorConversationConfig.objects.create(enabled=True)
|
||||
|
||||
offline_chat_model = input(
|
||||
f"Enter the name of the offline chat model you want to use, based on the models in HuggingFace (press enter to use the default: {default_offline_chat_model}): "
|
||||
f"Enter the offline chat model you want to use, See GPT4All for supported models (default: {default_offline_chat_model}): "
|
||||
)
|
||||
if offline_chat_model == "":
|
||||
ChatModelOptions.objects.create(
|
||||
chat_model=default_offline_chat_model, model_type=ChatModelOptions.ModelType.OFFLINE
|
||||
)
|
||||
else:
|
||||
max_tokens = input("Enter the maximum number of tokens to use for the offline chat model:")
|
||||
tokenizer = input("Enter the tokenizer to use for the offline chat model:")
|
||||
default_max_tokens = model_to_prompt_size.get(offline_chat_model, 2000)
|
||||
max_tokens = input(
|
||||
f"Enter the maximum number of tokens to use for the offline chat model (default {default_max_tokens}):"
|
||||
)
|
||||
max_tokens = max_tokens or default_max_tokens
|
||||
|
||||
default_tokenizer = model_to_tokenizer.get(
|
||||
offline_chat_model, "hf-internal-testing/llama-tokenizer"
|
||||
)
|
||||
tokenizer = input(
|
||||
f"Enter the tokenizer to use for the offline chat model (default: {default_tokenizer}):"
|
||||
)
|
||||
tokenizer = tokenizer or default_tokenizer
|
||||
|
||||
ChatModelOptions.objects.create(
|
||||
chat_model=offline_chat_model,
|
||||
model_type=ChatModelOptions.ModelType.OFFLINE,
|
||||
|
@ -71,10 +79,19 @@ def initialization():
|
|||
logger.info("🗣️ Setting up OpenAI chat model")
|
||||
api_key = input("Enter your OpenAI API key: ")
|
||||
OpenAIProcessorConversationConfig.objects.create(api_key=api_key)
|
||||
openai_chat_model = input("Enter the name of the OpenAI chat model you want to use: ")
|
||||
max_tokens = input("Enter the maximum number of tokens to use for the OpenAI chat model:")
|
||||
|
||||
openai_chat_model = input(
|
||||
f"Enter the OpenAI chat model you want to use (default: {default_online_chat_model}): "
|
||||
)
|
||||
openai_chat_model = openai_chat_model or default_online_chat_model
|
||||
|
||||
default_max_tokens = model_to_prompt_size.get(openai_chat_model, 2000)
|
||||
max_tokens = input(
|
||||
f"Enter the maximum number of tokens to use for the OpenAI chat model (default: {default_max_tokens}): "
|
||||
)
|
||||
max_tokens = max_tokens or default_max_tokens
|
||||
ChatModelOptions.objects.create(
|
||||
chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_tokens=max_tokens
|
||||
chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_prompt_size=max_tokens
|
||||
)
|
||||
|
||||
logger.info("🗣️ Chat model configuration complete")
|
||||
|
@ -94,5 +111,8 @@ def initialization():
|
|||
try:
|
||||
_create_chat_configuration()
|
||||
break
|
||||
# Some environments don't support interactive input. We catch the exception and return if that's the case. The admin can still configure their settings from the admin page.
|
||||
except EOFError:
|
||||
return
|
||||
except Exception as e:
|
||||
logger.error(f"🚨 Failed to create chat configuration: {e}", exc_info=True)
|
||||
|
|
|
@ -72,6 +72,9 @@ class ImageSearchConfig(ConfigBase):
|
|||
encoder_type: Optional[str] = None
|
||||
model_directory: Optional[Path] = None
|
||||
|
||||
class Config:
|
||||
protected_namespaces = ()
|
||||
|
||||
|
||||
class SearchConfig(ConfigBase):
|
||||
image: Optional[ImageSearchConfig] = None
|
||||
|
|
|
@ -1,48 +1,40 @@
|
|||
# External Packages
|
||||
import os
|
||||
from fastapi.testclient import TestClient
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi import FastAPI
|
||||
import os
|
||||
from fastapi import FastAPI
|
||||
|
||||
import pytest
|
||||
from fastapi import FastAPI
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
# Internal Packages
|
||||
from khoj.configure import configure_routes, configure_search_types, configure_middleware
|
||||
from khoj.configure import configure_middleware, configure_routes, configure_search_types
|
||||
from khoj.database.models import (
|
||||
GithubConfig,
|
||||
GithubRepoConfig,
|
||||
KhojApiUser,
|
||||
KhojUser,
|
||||
LocalMarkdownConfig,
|
||||
LocalOrgConfig,
|
||||
LocalPlaintextConfig,
|
||||
)
|
||||
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
|
||||
from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
|
||||
from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
|
||||
from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries
|
||||
from khoj.routers.indexer import configure_content
|
||||
from khoj.search_type import image_search, text_search
|
||||
from khoj.utils import fs_syncer, state
|
||||
from khoj.utils.config import SearchModels
|
||||
from khoj.utils.constants import web_directory
|
||||
from khoj.utils.helpers import resolve_absolute_path
|
||||
from khoj.utils.rawconfig import (
|
||||
ContentConfig,
|
||||
ImageContentConfig,
|
||||
SearchConfig,
|
||||
ImageSearchConfig,
|
||||
)
|
||||
from khoj.utils import state, fs_syncer
|
||||
from khoj.routers.indexer import configure_content
|
||||
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
|
||||
from database.models import (
|
||||
KhojApiUser,
|
||||
LocalOrgConfig,
|
||||
LocalMarkdownConfig,
|
||||
LocalPlaintextConfig,
|
||||
GithubConfig,
|
||||
KhojUser,
|
||||
GithubRepoConfig,
|
||||
)
|
||||
|
||||
from khoj.utils.rawconfig import ContentConfig, ImageContentConfig, ImageSearchConfig, SearchConfig
|
||||
from tests.helpers import (
|
||||
UserFactory,
|
||||
ChatModelOptionsFactory,
|
||||
OpenAIProcessorConversationConfigFactory,
|
||||
OfflineChatProcessorConversationConfigFactory,
|
||||
UserConversationProcessorConfigFactory,
|
||||
OpenAIProcessorConversationConfigFactory,
|
||||
SubscriptionFactory,
|
||||
UserConversationProcessorConfigFactory,
|
||||
UserFactory,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import factory
|
||||
import os
|
||||
|
||||
from database.models import (
|
||||
from khoj.database.models import (
|
||||
KhojUser,
|
||||
KhojApiUser,
|
||||
ChatModelOptions,
|
||||
|
|
|
@ -1,23 +1,23 @@
|
|||
# Standard Modules
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
from urllib.parse import quote
|
||||
|
||||
import pytest
|
||||
from fastapi import FastAPI
|
||||
|
||||
# External Packages
|
||||
from fastapi.testclient import TestClient
|
||||
from fastapi import FastAPI
|
||||
import pytest
|
||||
from PIL import Image
|
||||
|
||||
# Internal Packages
|
||||
from khoj.configure import configure_routes, configure_search_types
|
||||
from khoj.database.adapters import EntryAdapters
|
||||
from khoj.database.models import KhojApiUser, KhojUser
|
||||
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
|
||||
from khoj.search_type import image_search, text_search
|
||||
from khoj.utils import state
|
||||
from khoj.utils.state import search_models, content_index, config
|
||||
from khoj.search_type import text_search, image_search
|
||||
from khoj.utils.rawconfig import ContentConfig, SearchConfig
|
||||
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
|
||||
from database.models import KhojUser, KhojApiUser
|
||||
from database.adapters import EntryAdapters
|
||||
from khoj.utils.state import config, content_index, search_models
|
||||
|
||||
|
||||
# Test
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# Standard Packages
|
||||
import urllib.parse
|
||||
from urllib.parse import quote
|
||||
|
||||
# External Packages
|
||||
import pytest
|
||||
|
@ -54,6 +55,26 @@ def test_chat_with_no_chat_history_or_retrieved_content_gpt4all(client_offline_c
|
|||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
@pytest.mark.chatquality
|
||||
@pytest.mark.django_db(transaction=True)
|
||||
def test_chat_with_online_content(chat_client):
|
||||
# Act
|
||||
q = "/online give me the link to paul graham's essay how to do great work"
|
||||
encoded_q = quote(q, safe="")
|
||||
response = chat_client.get(f"/api/chat?q={encoded_q}&stream=true")
|
||||
response_message = response.content.decode("utf-8")
|
||||
|
||||
response_message = response_message.split("### compiled references")[0]
|
||||
|
||||
# Assert
|
||||
expected_responses = ["http://www.paulgraham.com/greatwork.html"]
|
||||
assert response.status_code == 200
|
||||
assert any([expected_response in response_message for expected_response in expected_responses]), (
|
||||
"Expected assistants name, [K|k]hoj, in response but got: " + response_message
|
||||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
@pytest.mark.chatquality
|
||||
@pytest.mark.django_db(transaction=True)
|
||||
|
|
|
@ -4,7 +4,7 @@ from pathlib import Path
|
|||
import os
|
||||
|
||||
# Internal Packages
|
||||
from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries
|
||||
from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
|
||||
from khoj.utils.fs_syncer import get_markdown_files
|
||||
from khoj.utils.rawconfig import TextContentConfig
|
||||
|
||||
|
|
|
@ -1,24 +1,14 @@
|
|||
# Standard Modules
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
from urllib.parse import quote
|
||||
import pytest
|
||||
|
||||
# External Packages
|
||||
from fastapi.testclient import TestClient
|
||||
from fastapi import FastAPI, UploadFile
|
||||
from io import BytesIO
|
||||
import pytest
|
||||
|
||||
from khoj.database.models import KhojApiUser, KhojUser
|
||||
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
|
||||
|
||||
# Internal Packages
|
||||
from khoj.configure import configure_routes, configure_search_types
|
||||
from khoj.utils import state
|
||||
from khoj.utils.state import search_models, content_index, config
|
||||
from khoj.search_type import text_search, image_search
|
||||
from khoj.utils.rawconfig import ContentConfig, SearchConfig
|
||||
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
|
||||
from database.models import KhojUser, KhojApiUser
|
||||
from database.adapters import EntryAdapters
|
||||
from khoj.search_type import text_search
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# Standard Packages
|
||||
import os
|
||||
import urllib.parse
|
||||
from urllib.parse import quote
|
||||
|
||||
# External Packages
|
||||
import pytest
|
||||
|
@ -10,7 +11,7 @@ from khoj.processor.conversation import prompts
|
|||
# Internal Packages
|
||||
from khoj.processor.conversation.utils import message_to_log
|
||||
from tests.helpers import ConversationFactory
|
||||
from database.models import KhojUser
|
||||
from khoj.database.models import KhojUser
|
||||
|
||||
# Initialize variables for tests
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
@ -54,6 +55,26 @@ def test_chat_with_no_chat_history_or_retrieved_content(chat_client):
|
|||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
@pytest.mark.chatquality
|
||||
@pytest.mark.django_db(transaction=True)
|
||||
def test_chat_with_online_content(chat_client):
|
||||
# Act
|
||||
q = "/online give me the link to paul graham's essay how to do great work"
|
||||
encoded_q = quote(q, safe="")
|
||||
response = chat_client.get(f"/api/chat?q={encoded_q}&stream=true")
|
||||
response_message = response.content.decode("utf-8")
|
||||
|
||||
response_message = response_message.split("### compiled references")[0]
|
||||
|
||||
# Assert
|
||||
expected_responses = ["http://www.paulgraham.com/greatwork.html"]
|
||||
assert response.status_code == 200
|
||||
assert any([expected_response in response_message for expected_response in expected_responses]), (
|
||||
"Expected assistants name, [K|k]hoj, in response but got: " + response_message
|
||||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
@pytest.mark.django_db(transaction=True)
|
||||
@pytest.mark.chatquality
|
||||
|
|
|
@ -3,8 +3,8 @@ import json
|
|||
import os
|
||||
|
||||
# Internal Packages
|
||||
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
|
||||
from khoj.processor.text_to_entries import TextToEntries
|
||||
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
|
||||
from khoj.processor.content.text_to_entries import TextToEntries
|
||||
from khoj.utils.helpers import is_none_or_empty
|
||||
from khoj.utils.rawconfig import Entry
|
||||
from khoj.utils.fs_syncer import get_org_files
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
import datetime
|
||||
|
||||
# Internal Packages
|
||||
from khoj.processor.data_sources.org_mode import orgnode
|
||||
from khoj.processor.content.org_mode import orgnode
|
||||
|
||||
|
||||
# Test
|
||||
|
|
|
@ -3,7 +3,7 @@ import json
|
|||
import os
|
||||
|
||||
# Internal Packages
|
||||
from khoj.processor.data_sources.pdf.pdf_to_entries import PdfToEntries
|
||||
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
|
||||
|
||||
from khoj.utils.fs_syncer import get_pdf_files
|
||||
from khoj.utils.rawconfig import TextContentConfig
|
||||
|
|
|
@ -3,11 +3,12 @@ import json
|
|||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from khoj.database.models import KhojUser, LocalPlaintextConfig
|
||||
from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
|
||||
|
||||
# Internal Packages
|
||||
from khoj.utils.fs_syncer import get_plaintext_files
|
||||
from khoj.utils.rawconfig import TextContentConfig
|
||||
from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries
|
||||
from database.models import LocalPlaintextConfig, KhojUser
|
||||
|
||||
|
||||
def test_plaintext_file(tmp_path):
|
||||
|
|
|
@ -1,19 +1,20 @@
|
|||
# System Packages
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# External Packages
|
||||
import pytest
|
||||
|
||||
from khoj.database.models import Entry, GithubConfig, KhojUser, LocalOrgConfig
|
||||
from khoj.processor.content.github.github_to_entries import GithubToEntries
|
||||
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
|
||||
|
||||
# Internal Packages
|
||||
from khoj.search_type import text_search
|
||||
from khoj.utils.rawconfig import ContentConfig, SearchConfig
|
||||
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
|
||||
from khoj.processor.data_sources.github.github_to_entries import GithubToEntries
|
||||
from khoj.utils.fs_syncer import collect_files, get_org_files
|
||||
from database.models import LocalOrgConfig, KhojUser, Entry, GithubConfig
|
||||
from khoj.utils.rawconfig import ContentConfig, SearchConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue