From ec06d2c446e466b9f45af570990344faa4b96028 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Thu, 16 Nov 2023 17:19:55 -0800 Subject: [PATCH 01/16] Move data indexer files into a separate folder under processor. Update assoc UTs --- .../processor/{github => data_sources}/__init__.py | 0 .../{markdown => data_sources/github}/__init__.py | 0 .../{ => data_sources}/github/github_to_entries.py | 4 ++-- .../{org_mode => data_sources/markdown}/__init__.py | 0 .../markdown/markdown_to_entries.py | 0 .../{ => data_sources}/notion/notion_to_entries.py | 0 .../{pdf => data_sources/org_mode}/__init__.py | 0 .../{ => data_sources}/org_mode/org_to_entries.py | 2 +- .../processor/{ => data_sources}/org_mode/orgnode.py | 0 .../{plaintext => data_sources/pdf}/__init__.py | 0 .../{ => data_sources}/pdf/pdf_to_entries.py | 0 .../processor/data_sources/plaintext/__init__.py | 0 .../plaintext/plaintext_to_entries.py | 0 src/khoj/routers/indexer.py | 12 ++++++------ src/khoj/utils/helpers.py | 1 + tests/conftest.py | 4 ++-- tests/data/config.yml | 2 +- tests/test_client.py | 2 +- tests/test_markdown_to_entries.py | 2 +- tests/test_multiple_users.py | 2 +- tests/test_org_to_entries.py | 2 +- tests/test_orgnode.py | 2 +- tests/test_pdf_to_entries.py | 2 +- tests/test_plaintext_to_entries.py | 2 +- tests/test_text_search.py | 4 ++-- 25 files changed, 22 insertions(+), 21 deletions(-) rename src/khoj/processor/{github => data_sources}/__init__.py (100%) rename src/khoj/processor/{markdown => data_sources/github}/__init__.py (100%) rename src/khoj/processor/{ => data_sources}/github/github_to_entries.py (98%) rename src/khoj/processor/{org_mode => data_sources/markdown}/__init__.py (100%) rename src/khoj/processor/{ => data_sources}/markdown/markdown_to_entries.py (100%) rename src/khoj/processor/{ => data_sources}/notion/notion_to_entries.py (100%) rename src/khoj/processor/{pdf => data_sources/org_mode}/__init__.py (100%) rename src/khoj/processor/{ => data_sources}/org_mode/org_to_entries.py (99%) rename src/khoj/processor/{ => data_sources}/org_mode/orgnode.py (100%) rename src/khoj/processor/{plaintext => data_sources/pdf}/__init__.py (100%) rename src/khoj/processor/{ => data_sources}/pdf/pdf_to_entries.py (100%) create mode 100644 src/khoj/processor/data_sources/plaintext/__init__.py rename src/khoj/processor/{ => data_sources}/plaintext/plaintext_to_entries.py (100%) diff --git a/src/khoj/processor/github/__init__.py b/src/khoj/processor/data_sources/__init__.py similarity index 100% rename from src/khoj/processor/github/__init__.py rename to src/khoj/processor/data_sources/__init__.py diff --git a/src/khoj/processor/markdown/__init__.py b/src/khoj/processor/data_sources/github/__init__.py similarity index 100% rename from src/khoj/processor/markdown/__init__.py rename to src/khoj/processor/data_sources/github/__init__.py diff --git a/src/khoj/processor/github/github_to_entries.py b/src/khoj/processor/data_sources/github/github_to_entries.py similarity index 98% rename from src/khoj/processor/github/github_to_entries.py rename to src/khoj/processor/data_sources/github/github_to_entries.py index 56279453..592cfcf0 100644 --- a/src/khoj/processor/github/github_to_entries.py +++ b/src/khoj/processor/data_sources/github/github_to_entries.py @@ -10,8 +10,8 @@ import requests # Internal Packages from khoj.utils.helpers import timer from khoj.utils.rawconfig import Entry, GithubContentConfig, GithubRepoConfig -from khoj.processor.markdown.markdown_to_entries import MarkdownToEntries -from khoj.processor.org_mode.org_to_entries import OrgToEntries +from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries +from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries from khoj.processor.text_to_entries import TextToEntries from database.models import Entry as DbEntry, GithubConfig, KhojUser diff --git a/src/khoj/processor/org_mode/__init__.py b/src/khoj/processor/data_sources/markdown/__init__.py similarity index 100% rename from src/khoj/processor/org_mode/__init__.py rename to src/khoj/processor/data_sources/markdown/__init__.py diff --git a/src/khoj/processor/markdown/markdown_to_entries.py b/src/khoj/processor/data_sources/markdown/markdown_to_entries.py similarity index 100% rename from src/khoj/processor/markdown/markdown_to_entries.py rename to src/khoj/processor/data_sources/markdown/markdown_to_entries.py diff --git a/src/khoj/processor/notion/notion_to_entries.py b/src/khoj/processor/data_sources/notion/notion_to_entries.py similarity index 100% rename from src/khoj/processor/notion/notion_to_entries.py rename to src/khoj/processor/data_sources/notion/notion_to_entries.py diff --git a/src/khoj/processor/pdf/__init__.py b/src/khoj/processor/data_sources/org_mode/__init__.py similarity index 100% rename from src/khoj/processor/pdf/__init__.py rename to src/khoj/processor/data_sources/org_mode/__init__.py diff --git a/src/khoj/processor/org_mode/org_to_entries.py b/src/khoj/processor/data_sources/org_mode/org_to_entries.py similarity index 99% rename from src/khoj/processor/org_mode/org_to_entries.py rename to src/khoj/processor/data_sources/org_mode/org_to_entries.py index 04ce97e4..0aef9b67 100644 --- a/src/khoj/processor/org_mode/org_to_entries.py +++ b/src/khoj/processor/data_sources/org_mode/org_to_entries.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Iterable, List, Tuple # Internal Packages -from khoj.processor.org_mode import orgnode +from khoj.processor.data_sources.org_mode import orgnode from khoj.processor.text_to_entries import TextToEntries from khoj.utils.helpers import timer from khoj.utils.rawconfig import Entry diff --git a/src/khoj/processor/org_mode/orgnode.py b/src/khoj/processor/data_sources/org_mode/orgnode.py similarity index 100% rename from src/khoj/processor/org_mode/orgnode.py rename to src/khoj/processor/data_sources/org_mode/orgnode.py diff --git a/src/khoj/processor/plaintext/__init__.py b/src/khoj/processor/data_sources/pdf/__init__.py similarity index 100% rename from src/khoj/processor/plaintext/__init__.py rename to src/khoj/processor/data_sources/pdf/__init__.py diff --git a/src/khoj/processor/pdf/pdf_to_entries.py b/src/khoj/processor/data_sources/pdf/pdf_to_entries.py similarity index 100% rename from src/khoj/processor/pdf/pdf_to_entries.py rename to src/khoj/processor/data_sources/pdf/pdf_to_entries.py diff --git a/src/khoj/processor/data_sources/plaintext/__init__.py b/src/khoj/processor/data_sources/plaintext/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/khoj/processor/plaintext/plaintext_to_entries.py b/src/khoj/processor/data_sources/plaintext/plaintext_to_entries.py similarity index 100% rename from src/khoj/processor/plaintext/plaintext_to_entries.py rename to src/khoj/processor/data_sources/plaintext/plaintext_to_entries.py diff --git a/src/khoj/routers/indexer.py b/src/khoj/routers/indexer.py index a7a1249d..a2cb0381 100644 --- a/src/khoj/routers/indexer.py +++ b/src/khoj/routers/indexer.py @@ -10,12 +10,12 @@ from starlette.authentication import requires # Internal Packages from khoj.utils import state, constants -from khoj.processor.markdown.markdown_to_entries import MarkdownToEntries -from khoj.processor.org_mode.org_to_entries import OrgToEntries -from khoj.processor.pdf.pdf_to_entries import PdfToEntries -from khoj.processor.github.github_to_entries import GithubToEntries -from khoj.processor.notion.notion_to_entries import NotionToEntries -from khoj.processor.plaintext.plaintext_to_entries import PlaintextToEntries +from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries +from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries +from khoj.processor.data_sources.pdf.pdf_to_entries import PdfToEntries +from khoj.processor.data_sources.github.github_to_entries import GithubToEntries +from khoj.processor.data_sources.notion.notion_to_entries import NotionToEntries +from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries from khoj.search_type import text_search, image_search from khoj.routers.helpers import update_telemetry_state from khoj.utils.yaml import save_config_to_file_updated_state diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index 3bce67a0..a41de361 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -272,6 +272,7 @@ class ConversationCommand(str, Enum): General = "general" Notes = "notes" Help = "help" + Online = "online" command_descriptions = { diff --git a/tests/conftest.py b/tests/conftest.py index d90bae95..16f0ef1b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,7 +15,7 @@ app = FastAPI() # Internal Packages from khoj.configure import configure_routes, configure_search_types, configure_middleware from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel -from khoj.processor.plaintext.plaintext_to_entries import PlaintextToEntries +from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries from khoj.search_type import image_search, text_search from khoj.utils.config import SearchModels from khoj.utils.constants import web_directory @@ -28,7 +28,7 @@ from khoj.utils.rawconfig import ( ) from khoj.utils import state, fs_syncer from khoj.routers.indexer import configure_content -from khoj.processor.org_mode.org_to_entries import OrgToEntries +from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries from database.models import ( KhojApiUser, LocalOrgConfig, diff --git a/tests/data/config.yml b/tests/data/config.yml index 2d642a09..bb6736ab 100644 --- a/tests/data/config.yml +++ b/tests/data/config.yml @@ -14,4 +14,4 @@ search-type: asymmetric: cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2 encoder: sentence-transformers/msmarco-MiniLM-L-6-v3 -version: 0.14.0 +version: 0.15.0 diff --git a/tests/test_client.py b/tests/test_client.py index f642a727..9c02a05a 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -15,7 +15,7 @@ from khoj.utils import state from khoj.utils.state import search_models, content_index, config from khoj.search_type import text_search, image_search from khoj.utils.rawconfig import ContentConfig, SearchConfig -from khoj.processor.org_mode.org_to_entries import OrgToEntries +from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries from database.models import KhojUser, KhojApiUser from database.adapters import EntryAdapters diff --git a/tests/test_markdown_to_entries.py b/tests/test_markdown_to_entries.py index 4593b23a..9ec88382 100644 --- a/tests/test_markdown_to_entries.py +++ b/tests/test_markdown_to_entries.py @@ -4,7 +4,7 @@ from pathlib import Path import os # Internal Packages -from khoj.processor.markdown.markdown_to_entries import MarkdownToEntries +from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries from khoj.utils.fs_syncer import get_markdown_files from khoj.utils.rawconfig import TextContentConfig diff --git a/tests/test_multiple_users.py b/tests/test_multiple_users.py index 95a2535f..2b1eb2f4 100644 --- a/tests/test_multiple_users.py +++ b/tests/test_multiple_users.py @@ -16,7 +16,7 @@ from khoj.utils import state from khoj.utils.state import search_models, content_index, config from khoj.search_type import text_search, image_search from khoj.utils.rawconfig import ContentConfig, SearchConfig -from khoj.processor.org_mode.org_to_entries import OrgToEntries +from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries from database.models import KhojUser, KhojApiUser from database.adapters import EntryAdapters diff --git a/tests/test_org_to_entries.py b/tests/test_org_to_entries.py index 1eddcf95..3b80873a 100644 --- a/tests/test_org_to_entries.py +++ b/tests/test_org_to_entries.py @@ -3,7 +3,7 @@ import json import os # Internal Packages -from khoj.processor.org_mode.org_to_entries import OrgToEntries +from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries from khoj.processor.text_to_entries import TextToEntries from khoj.utils.helpers import is_none_or_empty from khoj.utils.rawconfig import Entry diff --git a/tests/test_orgnode.py b/tests/test_orgnode.py index c6ed3447..7ee948e2 100644 --- a/tests/test_orgnode.py +++ b/tests/test_orgnode.py @@ -2,7 +2,7 @@ import datetime # Internal Packages -from khoj.processor.org_mode import orgnode +from khoj.processor.data_sources.org_mode import orgnode # Test diff --git a/tests/test_pdf_to_entries.py b/tests/test_pdf_to_entries.py index 3ab44639..ebf53025 100644 --- a/tests/test_pdf_to_entries.py +++ b/tests/test_pdf_to_entries.py @@ -3,7 +3,7 @@ import json import os # Internal Packages -from khoj.processor.pdf.pdf_to_entries import PdfToEntries +from khoj.processor.data_sources.pdf.pdf_to_entries import PdfToEntries from khoj.utils.fs_syncer import get_pdf_files from khoj.utils.rawconfig import TextContentConfig diff --git a/tests/test_plaintext_to_entries.py b/tests/test_plaintext_to_entries.py index 23b0d652..91a6569c 100644 --- a/tests/test_plaintext_to_entries.py +++ b/tests/test_plaintext_to_entries.py @@ -6,7 +6,7 @@ from pathlib import Path # Internal Packages from khoj.utils.fs_syncer import get_plaintext_files from khoj.utils.rawconfig import TextContentConfig -from khoj.processor.plaintext.plaintext_to_entries import PlaintextToEntries +from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries from database.models import LocalPlaintextConfig, KhojUser diff --git a/tests/test_text_search.py b/tests/test_text_search.py index 3d729ab5..ac24c9a0 100644 --- a/tests/test_text_search.py +++ b/tests/test_text_search.py @@ -10,8 +10,8 @@ import pytest # Internal Packages from khoj.search_type import text_search from khoj.utils.rawconfig import ContentConfig, SearchConfig -from khoj.processor.org_mode.org_to_entries import OrgToEntries -from khoj.processor.github.github_to_entries import GithubToEntries +from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries +from khoj.processor.data_sources.github.github_to_entries import GithubToEntries from khoj.utils.fs_syncer import collect_files, get_org_files from database.models import LocalOrgConfig, KhojUser, Entry, GithubConfig From a0b12b001a34afda3fedfaef3f324a84e74bed75 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Fri, 17 Nov 2023 11:04:36 -0800 Subject: [PATCH 02/16] Provide in-line rendering when output matches certain views --- src/interface/desktop/chat.html | 12 ++++++++++++ src/khoj/interface/web/chat.html | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/interface/desktop/chat.html b/src/interface/desktop/chat.html index ebf93195..09bc6ff7 100644 --- a/src/interface/desktop/chat.html +++ b/src/interface/desktop/chat.html @@ -139,6 +139,8 @@ newHTML = newHTML.replace(/__([\s\S]*?)__/g, '$1'); // Remove any text between [INST] and tags. These are spurious instructions for the AI chat model. newHTML = newHTML.replace(/\[INST\].+(<\/s>)?/g, ''); + // For any text that has single backticks, replace them with tags + newHTML = newHTML.replace(/`([^`]+)`/g, '$1'); return newHTML; } @@ -573,6 +575,16 @@ margin: 10px; } + code.chat-response { + background: var(--primary-hover); + color: var(--primary-inverse); + border-radius: 5px; + padding: 5px; + font-size: 14px; + font-weight: 300; + line-height: 1.5em; + } + button.reference-button { background: var(--background-color); color: var(--main-text-color); diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index 82e3233d..fb7c0b05 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -148,6 +148,8 @@ To get started, just start typing below. You can also type / to see a list of co newHTML = newHTML.replace(/__([\s\S]*?)__/g, '$1'); // Remove any text between [INST] and tags. These are spurious instructions for the AI chat model. newHTML = newHTML.replace(/\[INST\].+(<\/s>)?/g, ''); + // For any text that has single backticks, replace them with tags + newHTML = newHTML.replace(/`([^`]+)`/g, '$1'); return newHTML; } @@ -490,6 +492,16 @@ To get started, just start typing below. You can also type / to see a list of co background: var(--primary-hover); } + code.chat-response { + background: var(--primary-hover); + color: var(--primary-inverse); + border-radius: 5px; + padding: 5px; + font-size: 14px; + font-weight: 300; + line-height: 1.5em; + } + #chat-body { font-size: medium; margin: 0px; From 9ddf3b58c3ac119dd7db914a7192250cffc747a0 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Fri, 17 Nov 2023 12:14:02 -0800 Subject: [PATCH 03/16] Use the markdown parser for rendering the chat messages in the web interface --- src/khoj/interface/web/chat.html | 48 +++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index fb7c0b05..95118895 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -9,6 +9,7 @@ +