From c9db5321e7d63a3c6c619d9e30e499b72e7f2fdf Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 2 Jul 2023 16:21:21 -0700 Subject: [PATCH] Remove unused org-music as an indexable content type from Khoj Org-music was just a custom content type that worked with org-music. It was mostly only useful for me. Cleaning up that code will reduce number of content types for khoj to manage. --- docker-compose.yml | 1 - src/interface/emacs/khoj.el | 20 +++++--------------- src/khoj/configure.py | 12 ------------ src/khoj/interface/web/index.html | 8 -------- src/khoj/processor/conversation/prompts.py | 6 +----- src/khoj/routers/api.py | 14 -------------- src/khoj/utils/config.py | 2 -- src/khoj/utils/constants.py | 6 ------ src/khoj/utils/rawconfig.py | 1 - tests/test_client.py | 6 +++--- 10 files changed, 9 insertions(+), 67 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 84f8d3f3..ec9af160 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,7 +19,6 @@ services: - ./tests/data/org/:/data/org/ - ./tests/data/images/:/data/images/ - ./tests/data/ledger/:/data/ledger/ - - ./tests/data/music/:/data/music/ - ./tests/data/markdown/:/data/markdown/ - ./tests/data/pdf/:/data/pdf/ # Embeddings and models are populated after the first run diff --git a/src/interface/emacs/khoj.el b/src/interface/emacs/khoj.el index 6f08ab9c..ae625f9f 100644 --- a/src/interface/emacs/khoj.el +++ b/src/interface/emacs/khoj.el @@ -95,8 +95,7 @@ (const "markdown") (const "ledger") (const "image") - (const "pdf") - (const "music"))) + (const "pdf"))) ;; -------------------------- @@ -122,7 +121,6 @@ (declare-function org-element-type "org-mode" (ELEMENT)) (declare-function beancount-mode "beancount" ()) (declare-function markdown-mode "markdown-mode" ()) -(declare-function org-music-mode "org-music" ()) (declare-function which-key--show-keymap "which-key" (KEYMAP-NAME KEYMAP &optional PRIOR-ARGS ALL NO-PAGING FILTER)) @@ -142,9 +140,7 @@ NO-PAGING FILTER)) (when (member 'image enabled-content-types) "C-x i | image\n") (when (member 'pdf enabled-content-types) - "C-x p | pdf\n") - (when (member 'music enabled-content-types) - "C-x M | music\n")))) + "C-x p | pdf\n")))) (defvar khoj--rerank nil "Track when re-rank of results triggered.") (defvar khoj--reference-count 0 "Track number of references currently in chat bufffer.") @@ -152,7 +148,6 @@ NO-PAGING FILTER)) (defun khoj--search-org () "Set content-type to `org-mode'." (interactive) (setq khoj--content-type "org")) (defun khoj--search-ledger () "Set content-type to `ledger'." (interactive) (setq khoj--content-type "ledger")) (defun khoj--search-images () "Set content-type to image." (interactive) (setq khoj--content-type "image")) -(defun khoj--search-music () "Set content-type to music." (interactive) (setq khoj--content-type "music")) (defun khoj--search-pdf () "Set content-type to pdf." (interactive) (setq khoj--content-type "pdf")) (defun khoj--improve-rank () "Use cross-encoder to rerank search results." (interactive) (khoj--incremental-search t)) (defun khoj--make-search-keymap (&optional existing-keymap) @@ -170,8 +165,6 @@ NO-PAGING FILTER)) (define-key kmap (kbd "C-x i") #'khoj--search-images)) (when (member 'pdf enabled-content-types) (define-key kmap (kbd "C-x p") #'khoj--search-pdf)) - (when (member 'music enabled-content-types) - (define-key kmap (kbd "C-x M") #'khoj--search-music)) kmap)) (defvar khoj--keymap nil "Track Khoj keymap in this variable.") @@ -621,7 +614,6 @@ CONFIG is json obtained from Khoj config API." (let ((enabled-content-types (khoj--get-enabled-content-types)) (file-extension (file-name-extension buffer-name))) (cond - ((and (member 'music enabled-content-types) (equal buffer-name "Music.org")) "music") ((and (member 'ledger enabled-content-types) (or (equal file-extension "bean") (equal file-extension "beancount"))) "ledger") ((and (member 'org enabled-content-types) (equal file-extension "org")) "org") ((and (member 'org enabled-content-types) (equal file-extension "pdf")) "pdf") @@ -678,7 +670,7 @@ Render results in BUFFER-NAME using QUERY, CONTENT-TYPE." (json-response (json-parse-buffer :object-type 'alist))) (erase-buffer) (insert - (cond ((or (equal content-type "org") (equal content-type "music")) (khoj--extract-entries-as-org json-response query)) + (cond ((equal content-type "org") (khoj--extract-entries-as-org json-response query)) ((equal content-type "markdown") (khoj--extract-entries-as-markdown json-response query)) ((equal content-type "pdf") (khoj--extract-entries-as-pdf json-response query)) ((equal content-type "ledger") (khoj--extract-entries-as-ledger json-response query)) @@ -697,8 +689,6 @@ Render results in BUFFER-NAME using QUERY, CONTENT-TYPE." ((equal content-type "markdown") (progn (markdown-mode) (visual-line-mode))) ((equal content-type "ledger") (beancount-mode)) - ((equal content-type "music") (progn (org-mode) - (org-music-mode))) ((equal content-type "image") (progn (shr-render-region (point-min) (point-max)) (goto-char (point-min)))) (t (fundamental-mode)))) @@ -920,7 +910,7 @@ RECEIVE-DATE is the message receive date." (remove-hook 'minibuffer-exit-hook #'khoj--teardown-incremental-search)) (defun khoj-incremental () - "Natural, Incremental Search for your personal notes, transactions and music." + "Natural, Incremental Search for your personal notes and documents." (interactive) (let* ((khoj-buffer-name (get-buffer-create khoj--search-buffer-name))) ;; switch to khoj results buffer @@ -1014,7 +1004,7 @@ Paragraph only starts at first text after blank line." ;; set content type to: last used > based on current buffer > default type :init-value (lambda (obj) (oset obj value (format "--content-type=%s" (or khoj--content-type (khoj--buffer-name-to-content-type (buffer-name)))))) ;; dynamically set choices to content types enabled on khoj backend - :choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "ledger" "music" "image"))) + :choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "ledger" "image"))) (transient-define-suffix khoj--search-command (&optional args) (interactive (list (transient-args transient-current-command))) diff --git a/src/khoj/configure.py b/src/khoj/configure.py index 19a07d44..8167d672 100644 --- a/src/khoj/configure.py +++ b/src/khoj/configure.py @@ -106,18 +106,6 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, filters=[DateFilter(), WordFilter(), FileFilter()], ) - # Initialize Org Music Search - if (t == state.SearchType.Music or t == None) and config.content_type.music and config.search_type.asymmetric: - logger.info("🎺 Setting up search for org-music") - # Extract Entries, Generate Music Embeddings - model.music_search = text_search.setup( - OrgToJsonl, - config.content_type.music, - search_config=config.search_type.asymmetric, - regenerate=regenerate, - filters=[DateFilter(), WordFilter()], - ) - # Initialize Markdown Search if ( (t == state.SearchType.Markdown or t == None) diff --git a/src/khoj/interface/web/index.html b/src/khoj/interface/web/index.html index fca3b858..4f95f0e2 100644 --- a/src/khoj/interface/web/index.html +++ b/src/khoj/interface/web/index.html @@ -88,8 +88,6 @@ results = render_markdown(query, data); } else if (type === "org") { results = render_org(query, data, "org-"); - } else if (type === "music") { - results = render_org(query, data, "music-"); } else if (type === "image") { results = data.map(render_image).join(''); } else if (type === "ledger") { @@ -371,17 +369,14 @@ .results-github { text-align: left; } - .results-music, .results-org { text-align: left; white-space: pre-line; } - .results-music h3, .results-org h3 { margin: 20px 0 0 0; font-size: larger; } - span.music-task-status, span.org-task-status { color: white; padding: 3.5px 3.5px 0; @@ -390,15 +385,12 @@ background-color: #eab308; font-size: medium; } - span.music-task-status.todo, span.org-task-status.todo { background-color: #3b82f6 } - span.music-task-status.done, span.org-task-status.done { background-color: #22c55e; } - span.music-task-tag, span.org-task-tag { color: white; padding: 3.5px 3.5px 0; diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index f40ed171..b9fa5fdd 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -143,19 +143,15 @@ search_type = """ Objective: Extract search type from user query and return information as JSON Allowed search types are listed below: - - search-type=["notes","ledger","image","music", "pdf"] + - search-type=["notes","ledger","image", "pdf"] Some examples are given below for reference: Q:What fiction book was I reading last week about AI starship? A:{ "search-type": "notes" } Q: What did the lease say about early termination A: { "search-type": "pdf" } -Q:Play some calm classical music? -A:{ "search-type": "music" } Q:How much did I spend at Subway for dinner last time? A:{ "search-type": "ledger" } -Q:What was that popular Sri lankan song that Alex had mentioned? -A:{ "search-type": "music" } Q:Can you recommend a movie to watch from my notes? A:{ "search-type": "notes" } Q:When did I buy Groceries last? diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 91a67589..6e3818d1 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -257,20 +257,6 @@ async def search( ) ] - if (t == SearchType.Music or t == SearchType.All) and state.model.music_search: - # query music library - search_futures += [ - executor.submit( - text_search.query, - user_query, - state.model.music_search, - question_embedding=encoded_asymmetric_query, - rank_results=r or False, - score_threshold=score_threshold, - dedupe=dedupe or True, - ) - ] - if (t == SearchType.Image) and state.model.image_search: # query images search_futures += [ diff --git a/src/khoj/utils/config.py b/src/khoj/utils/config.py index e3bea7b9..b15fe811 100644 --- a/src/khoj/utils/config.py +++ b/src/khoj/utils/config.py @@ -20,7 +20,6 @@ class SearchType(str, Enum): All = "all" Org = "org" Ledger = "ledger" - Music = "music" Markdown = "markdown" Image = "image" Pdf = "pdf" @@ -62,7 +61,6 @@ class ImageSearchModel: class SearchModels: org_search: TextSearchModel = None ledger_search: TextSearchModel = None - music_search: TextSearchModel = None markdown_search: TextSearchModel = None pdf_search: TextSearchModel = None image_search: ImageSearchModel = None diff --git a/src/khoj/utils/constants.py b/src/khoj/utils/constants.py index cbdcdf82..49a55761 100644 --- a/src/khoj/utils/constants.py +++ b/src/khoj/utils/constants.py @@ -41,12 +41,6 @@ default_config = { "batch-size": 50, "use-xmp-metadata": False, }, - "music": { - "input-files": None, - "input-filter": None, - "compressed-jsonl": "~/.khoj/content/music/music.jsonl.gz", - "embeddings-file": "~/.khoj/content/music/music_embeddings.pt", - }, "github": { "pat-token": None, "repos": [], diff --git a/src/khoj/utils/rawconfig.py b/src/khoj/utils/rawconfig.py index 5d9dcff4..cf3b4f29 100644 --- a/src/khoj/utils/rawconfig.py +++ b/src/khoj/utils/rawconfig.py @@ -74,7 +74,6 @@ class ContentConfig(ConfigBase): org: Optional[TextContentConfig] ledger: Optional[TextContentConfig] image: Optional[ImageContentConfig] - music: Optional[TextContentConfig] markdown: Optional[TextContentConfig] pdf: Optional[TextContentConfig] github: Optional[GithubContentConfig] diff --git a/tests/test_client.py b/tests/test_client.py index 57ea08de..976b6770 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -34,7 +34,7 @@ def test_search_with_invalid_content_type(client): # ---------------------------------------------------------------------------------------------------- def test_search_with_valid_content_type(client): - for content_type in ["all", "org", "markdown", "ledger", "image", "music", "pdf", "plugin1"]: + for content_type in ["all", "org", "markdown", "ledger", "image", "pdf", "plugin1"]: # Act response = client.get(f"/api/search?q=random&t={content_type}") # Assert @@ -52,7 +52,7 @@ def test_update_with_invalid_content_type(client): # ---------------------------------------------------------------------------------------------------- def test_update_with_valid_content_type(client): - for content_type in ["org", "markdown", "ledger", "image", "music", "pdf", "plugin1"]: + for content_type in ["org", "markdown", "ledger", "image", "pdf", "plugin1"]: # Act response = client.get(f"/api/update?t={content_type}") # Assert @@ -70,7 +70,7 @@ def test_regenerate_with_invalid_content_type(client): # ---------------------------------------------------------------------------------------------------- def test_regenerate_with_valid_content_type(client): - for content_type in ["org", "markdown", "ledger", "image", "music", "pdf", "plugin1"]: + for content_type in ["org", "markdown", "ledger", "image", "pdf", "plugin1"]: # Act response = client.get(f"/api/update?force=true&t={content_type}") # Assert