From ec06d2c446e466b9f45af570990344faa4b96028 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Thu, 16 Nov 2023 17:19:55 -0800 Subject: [PATCH] Move data indexer files into a separate folder under processor. Update assoc UTs --- .../processor/{github => data_sources}/__init__.py | 0 .../{markdown => data_sources/github}/__init__.py | 0 .../{ => data_sources}/github/github_to_entries.py | 4 ++-- .../{org_mode => data_sources/markdown}/__init__.py | 0 .../markdown/markdown_to_entries.py | 0 .../{ => data_sources}/notion/notion_to_entries.py | 0 .../{pdf => data_sources/org_mode}/__init__.py | 0 .../{ => data_sources}/org_mode/org_to_entries.py | 2 +- .../processor/{ => data_sources}/org_mode/orgnode.py | 0 .../{plaintext => data_sources/pdf}/__init__.py | 0 .../{ => data_sources}/pdf/pdf_to_entries.py | 0 .../processor/data_sources/plaintext/__init__.py | 0 .../plaintext/plaintext_to_entries.py | 0 src/khoj/routers/indexer.py | 12 ++++++------ src/khoj/utils/helpers.py | 1 + tests/conftest.py | 4 ++-- tests/data/config.yml | 2 +- tests/test_client.py | 2 +- tests/test_markdown_to_entries.py | 2 +- tests/test_multiple_users.py | 2 +- tests/test_org_to_entries.py | 2 +- tests/test_orgnode.py | 2 +- tests/test_pdf_to_entries.py | 2 +- tests/test_plaintext_to_entries.py | 2 +- tests/test_text_search.py | 4 ++-- 25 files changed, 22 insertions(+), 21 deletions(-) rename src/khoj/processor/{github => data_sources}/__init__.py (100%) rename src/khoj/processor/{markdown => data_sources/github}/__init__.py (100%) rename src/khoj/processor/{ => data_sources}/github/github_to_entries.py (98%) rename src/khoj/processor/{org_mode => data_sources/markdown}/__init__.py (100%) rename src/khoj/processor/{ => data_sources}/markdown/markdown_to_entries.py (100%) rename src/khoj/processor/{ => data_sources}/notion/notion_to_entries.py (100%) rename src/khoj/processor/{pdf => data_sources/org_mode}/__init__.py (100%) rename src/khoj/processor/{ => data_sources}/org_mode/org_to_entries.py (99%) rename src/khoj/processor/{ => data_sources}/org_mode/orgnode.py (100%) rename src/khoj/processor/{plaintext => data_sources/pdf}/__init__.py (100%) rename src/khoj/processor/{ => data_sources}/pdf/pdf_to_entries.py (100%) create mode 100644 src/khoj/processor/data_sources/plaintext/__init__.py rename src/khoj/processor/{ => data_sources}/plaintext/plaintext_to_entries.py (100%) diff --git a/src/khoj/processor/github/__init__.py b/src/khoj/processor/data_sources/__init__.py similarity index 100% rename from src/khoj/processor/github/__init__.py rename to src/khoj/processor/data_sources/__init__.py diff --git a/src/khoj/processor/markdown/__init__.py b/src/khoj/processor/data_sources/github/__init__.py similarity index 100% rename from src/khoj/processor/markdown/__init__.py rename to src/khoj/processor/data_sources/github/__init__.py diff --git a/src/khoj/processor/github/github_to_entries.py b/src/khoj/processor/data_sources/github/github_to_entries.py similarity index 98% rename from src/khoj/processor/github/github_to_entries.py rename to src/khoj/processor/data_sources/github/github_to_entries.py index 56279453..592cfcf0 100644 --- a/src/khoj/processor/github/github_to_entries.py +++ b/src/khoj/processor/data_sources/github/github_to_entries.py @@ -10,8 +10,8 @@ import requests # Internal Packages from khoj.utils.helpers import timer from khoj.utils.rawconfig import Entry, GithubContentConfig, GithubRepoConfig -from khoj.processor.markdown.markdown_to_entries import MarkdownToEntries -from khoj.processor.org_mode.org_to_entries import OrgToEntries +from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries +from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries from khoj.processor.text_to_entries import TextToEntries from database.models import Entry as DbEntry, GithubConfig, KhojUser diff --git a/src/khoj/processor/org_mode/__init__.py b/src/khoj/processor/data_sources/markdown/__init__.py similarity index 100% rename from src/khoj/processor/org_mode/__init__.py rename to src/khoj/processor/data_sources/markdown/__init__.py diff --git a/src/khoj/processor/markdown/markdown_to_entries.py b/src/khoj/processor/data_sources/markdown/markdown_to_entries.py similarity index 100% rename from src/khoj/processor/markdown/markdown_to_entries.py rename to src/khoj/processor/data_sources/markdown/markdown_to_entries.py diff --git a/src/khoj/processor/notion/notion_to_entries.py b/src/khoj/processor/data_sources/notion/notion_to_entries.py similarity index 100% rename from src/khoj/processor/notion/notion_to_entries.py rename to src/khoj/processor/data_sources/notion/notion_to_entries.py diff --git a/src/khoj/processor/pdf/__init__.py b/src/khoj/processor/data_sources/org_mode/__init__.py similarity index 100% rename from src/khoj/processor/pdf/__init__.py rename to src/khoj/processor/data_sources/org_mode/__init__.py diff --git a/src/khoj/processor/org_mode/org_to_entries.py b/src/khoj/processor/data_sources/org_mode/org_to_entries.py similarity index 99% rename from src/khoj/processor/org_mode/org_to_entries.py rename to src/khoj/processor/data_sources/org_mode/org_to_entries.py index 04ce97e4..0aef9b67 100644 --- a/src/khoj/processor/org_mode/org_to_entries.py +++ b/src/khoj/processor/data_sources/org_mode/org_to_entries.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Iterable, List, Tuple # Internal Packages -from khoj.processor.org_mode import orgnode +from khoj.processor.data_sources.org_mode import orgnode from khoj.processor.text_to_entries import TextToEntries from khoj.utils.helpers import timer from khoj.utils.rawconfig import Entry diff --git a/src/khoj/processor/org_mode/orgnode.py b/src/khoj/processor/data_sources/org_mode/orgnode.py similarity index 100% rename from src/khoj/processor/org_mode/orgnode.py rename to src/khoj/processor/data_sources/org_mode/orgnode.py diff --git a/src/khoj/processor/plaintext/__init__.py b/src/khoj/processor/data_sources/pdf/__init__.py similarity index 100% rename from src/khoj/processor/plaintext/__init__.py rename to src/khoj/processor/data_sources/pdf/__init__.py diff --git a/src/khoj/processor/pdf/pdf_to_entries.py b/src/khoj/processor/data_sources/pdf/pdf_to_entries.py similarity index 100% rename from src/khoj/processor/pdf/pdf_to_entries.py rename to src/khoj/processor/data_sources/pdf/pdf_to_entries.py diff --git a/src/khoj/processor/data_sources/plaintext/__init__.py b/src/khoj/processor/data_sources/plaintext/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/khoj/processor/plaintext/plaintext_to_entries.py b/src/khoj/processor/data_sources/plaintext/plaintext_to_entries.py similarity index 100% rename from src/khoj/processor/plaintext/plaintext_to_entries.py rename to src/khoj/processor/data_sources/plaintext/plaintext_to_entries.py diff --git a/src/khoj/routers/indexer.py b/src/khoj/routers/indexer.py index a7a1249d..a2cb0381 100644 --- a/src/khoj/routers/indexer.py +++ b/src/khoj/routers/indexer.py @@ -10,12 +10,12 @@ from starlette.authentication import requires # Internal Packages from khoj.utils import state, constants -from khoj.processor.markdown.markdown_to_entries import MarkdownToEntries -from khoj.processor.org_mode.org_to_entries import OrgToEntries -from khoj.processor.pdf.pdf_to_entries import PdfToEntries -from khoj.processor.github.github_to_entries import GithubToEntries -from khoj.processor.notion.notion_to_entries import NotionToEntries -from khoj.processor.plaintext.plaintext_to_entries import PlaintextToEntries +from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries +from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries +from khoj.processor.data_sources.pdf.pdf_to_entries import PdfToEntries +from khoj.processor.data_sources.github.github_to_entries import GithubToEntries +from khoj.processor.data_sources.notion.notion_to_entries import NotionToEntries +from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries from khoj.search_type import text_search, image_search from khoj.routers.helpers import update_telemetry_state from khoj.utils.yaml import save_config_to_file_updated_state diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index 3bce67a0..a41de361 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -272,6 +272,7 @@ class ConversationCommand(str, Enum): General = "general" Notes = "notes" Help = "help" + Online = "online" command_descriptions = { diff --git a/tests/conftest.py b/tests/conftest.py index d90bae95..16f0ef1b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,7 +15,7 @@ app = FastAPI() # Internal Packages from khoj.configure import configure_routes, configure_search_types, configure_middleware from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel -from khoj.processor.plaintext.plaintext_to_entries import PlaintextToEntries +from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries from khoj.search_type import image_search, text_search from khoj.utils.config import SearchModels from khoj.utils.constants import web_directory @@ -28,7 +28,7 @@ from khoj.utils.rawconfig import ( ) from khoj.utils import state, fs_syncer from khoj.routers.indexer import configure_content -from khoj.processor.org_mode.org_to_entries import OrgToEntries +from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries from database.models import ( KhojApiUser, LocalOrgConfig, diff --git a/tests/data/config.yml b/tests/data/config.yml index 2d642a09..bb6736ab 100644 --- a/tests/data/config.yml +++ b/tests/data/config.yml @@ -14,4 +14,4 @@ search-type: asymmetric: cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2 encoder: sentence-transformers/msmarco-MiniLM-L-6-v3 -version: 0.14.0 +version: 0.15.0 diff --git a/tests/test_client.py b/tests/test_client.py index f642a727..9c02a05a 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -15,7 +15,7 @@ from khoj.utils import state from khoj.utils.state import search_models, content_index, config from khoj.search_type import text_search, image_search from khoj.utils.rawconfig import ContentConfig, SearchConfig -from khoj.processor.org_mode.org_to_entries import OrgToEntries +from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries from database.models import KhojUser, KhojApiUser from database.adapters import EntryAdapters diff --git a/tests/test_markdown_to_entries.py b/tests/test_markdown_to_entries.py index 4593b23a..9ec88382 100644 --- a/tests/test_markdown_to_entries.py +++ b/tests/test_markdown_to_entries.py @@ -4,7 +4,7 @@ from pathlib import Path import os # Internal Packages -from khoj.processor.markdown.markdown_to_entries import MarkdownToEntries +from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries from khoj.utils.fs_syncer import get_markdown_files from khoj.utils.rawconfig import TextContentConfig diff --git a/tests/test_multiple_users.py b/tests/test_multiple_users.py index 95a2535f..2b1eb2f4 100644 --- a/tests/test_multiple_users.py +++ b/tests/test_multiple_users.py @@ -16,7 +16,7 @@ from khoj.utils import state from khoj.utils.state import search_models, content_index, config from khoj.search_type import text_search, image_search from khoj.utils.rawconfig import ContentConfig, SearchConfig -from khoj.processor.org_mode.org_to_entries import OrgToEntries +from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries from database.models import KhojUser, KhojApiUser from database.adapters import EntryAdapters diff --git a/tests/test_org_to_entries.py b/tests/test_org_to_entries.py index 1eddcf95..3b80873a 100644 --- a/tests/test_org_to_entries.py +++ b/tests/test_org_to_entries.py @@ -3,7 +3,7 @@ import json import os # Internal Packages -from khoj.processor.org_mode.org_to_entries import OrgToEntries +from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries from khoj.processor.text_to_entries import TextToEntries from khoj.utils.helpers import is_none_or_empty from khoj.utils.rawconfig import Entry diff --git a/tests/test_orgnode.py b/tests/test_orgnode.py index c6ed3447..7ee948e2 100644 --- a/tests/test_orgnode.py +++ b/tests/test_orgnode.py @@ -2,7 +2,7 @@ import datetime # Internal Packages -from khoj.processor.org_mode import orgnode +from khoj.processor.data_sources.org_mode import orgnode # Test diff --git a/tests/test_pdf_to_entries.py b/tests/test_pdf_to_entries.py index 3ab44639..ebf53025 100644 --- a/tests/test_pdf_to_entries.py +++ b/tests/test_pdf_to_entries.py @@ -3,7 +3,7 @@ import json import os # Internal Packages -from khoj.processor.pdf.pdf_to_entries import PdfToEntries +from khoj.processor.data_sources.pdf.pdf_to_entries import PdfToEntries from khoj.utils.fs_syncer import get_pdf_files from khoj.utils.rawconfig import TextContentConfig diff --git a/tests/test_plaintext_to_entries.py b/tests/test_plaintext_to_entries.py index 23b0d652..91a6569c 100644 --- a/tests/test_plaintext_to_entries.py +++ b/tests/test_plaintext_to_entries.py @@ -6,7 +6,7 @@ from pathlib import Path # Internal Packages from khoj.utils.fs_syncer import get_plaintext_files from khoj.utils.rawconfig import TextContentConfig -from khoj.processor.plaintext.plaintext_to_entries import PlaintextToEntries +from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries from database.models import LocalPlaintextConfig, KhojUser diff --git a/tests/test_text_search.py b/tests/test_text_search.py index 3d729ab5..ac24c9a0 100644 --- a/tests/test_text_search.py +++ b/tests/test_text_search.py @@ -10,8 +10,8 @@ import pytest # Internal Packages from khoj.search_type import text_search from khoj.utils.rawconfig import ContentConfig, SearchConfig -from khoj.processor.org_mode.org_to_entries import OrgToEntries -from khoj.processor.github.github_to_entries import GithubToEntries +from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries +from khoj.processor.data_sources.github.github_to_entries import GithubToEntries from khoj.utils.fs_syncer import collect_files, get_org_files from database.models import LocalOrgConfig, KhojUser, Entry, GithubConfig