Remove unused org-music as an indexable content type from Khoj

Org-music was just a custom content type that worked with org-music.
It was mostly only useful for me.

Cleaning up that code will reduce number of content types for khoj to
manage.
This commit is contained in:
Debanjum Singh Solanky 2023-07-02 16:21:21 -07:00
parent 30459ee4ba
commit c9db5321e7
10 changed files with 9 additions and 67 deletions

View file

@ -19,7 +19,6 @@ services:
- ./tests/data/org/:/data/org/
- ./tests/data/images/:/data/images/
- ./tests/data/ledger/:/data/ledger/
- ./tests/data/music/:/data/music/
- ./tests/data/markdown/:/data/markdown/
- ./tests/data/pdf/:/data/pdf/
# Embeddings and models are populated after the first run

View file

@ -95,8 +95,7 @@
(const "markdown")
(const "ledger")
(const "image")
(const "pdf")
(const "music")))
(const "pdf")))
;; --------------------------
@ -122,7 +121,6 @@
(declare-function org-element-type "org-mode" (ELEMENT))
(declare-function beancount-mode "beancount" ())
(declare-function markdown-mode "markdown-mode" ())
(declare-function org-music-mode "org-music" ())
(declare-function which-key--show-keymap "which-key" (KEYMAP-NAME KEYMAP &optional PRIOR-ARGS ALL
NO-PAGING FILTER))
@ -142,9 +140,7 @@ NO-PAGING FILTER))
(when (member 'image enabled-content-types)
"C-x i | image\n")
(when (member 'pdf enabled-content-types)
"C-x p | pdf\n")
(when (member 'music enabled-content-types)
"C-x M | music\n"))))
"C-x p | pdf\n"))))
(defvar khoj--rerank nil "Track when re-rank of results triggered.")
(defvar khoj--reference-count 0 "Track number of references currently in chat bufffer.")
@ -152,7 +148,6 @@ NO-PAGING FILTER))
(defun khoj--search-org () "Set content-type to `org-mode'." (interactive) (setq khoj--content-type "org"))
(defun khoj--search-ledger () "Set content-type to `ledger'." (interactive) (setq khoj--content-type "ledger"))
(defun khoj--search-images () "Set content-type to image." (interactive) (setq khoj--content-type "image"))
(defun khoj--search-music () "Set content-type to music." (interactive) (setq khoj--content-type "music"))
(defun khoj--search-pdf () "Set content-type to pdf." (interactive) (setq khoj--content-type "pdf"))
(defun khoj--improve-rank () "Use cross-encoder to rerank search results." (interactive) (khoj--incremental-search t))
(defun khoj--make-search-keymap (&optional existing-keymap)
@ -170,8 +165,6 @@ NO-PAGING FILTER))
(define-key kmap (kbd "C-x i") #'khoj--search-images))
(when (member 'pdf enabled-content-types)
(define-key kmap (kbd "C-x p") #'khoj--search-pdf))
(when (member 'music enabled-content-types)
(define-key kmap (kbd "C-x M") #'khoj--search-music))
kmap))
(defvar khoj--keymap nil "Track Khoj keymap in this variable.")
@ -621,7 +614,6 @@ CONFIG is json obtained from Khoj config API."
(let ((enabled-content-types (khoj--get-enabled-content-types))
(file-extension (file-name-extension buffer-name)))
(cond
((and (member 'music enabled-content-types) (equal buffer-name "Music.org")) "music")
((and (member 'ledger enabled-content-types) (or (equal file-extension "bean") (equal file-extension "beancount"))) "ledger")
((and (member 'org enabled-content-types) (equal file-extension "org")) "org")
((and (member 'org enabled-content-types) (equal file-extension "pdf")) "pdf")
@ -678,7 +670,7 @@ Render results in BUFFER-NAME using QUERY, CONTENT-TYPE."
(json-response (json-parse-buffer :object-type 'alist)))
(erase-buffer)
(insert
(cond ((or (equal content-type "org") (equal content-type "music")) (khoj--extract-entries-as-org json-response query))
(cond ((equal content-type "org") (khoj--extract-entries-as-org json-response query))
((equal content-type "markdown") (khoj--extract-entries-as-markdown json-response query))
((equal content-type "pdf") (khoj--extract-entries-as-pdf json-response query))
((equal content-type "ledger") (khoj--extract-entries-as-ledger json-response query))
@ -697,8 +689,6 @@ Render results in BUFFER-NAME using QUERY, CONTENT-TYPE."
((equal content-type "markdown") (progn (markdown-mode)
(visual-line-mode)))
((equal content-type "ledger") (beancount-mode))
((equal content-type "music") (progn (org-mode)
(org-music-mode)))
((equal content-type "image") (progn (shr-render-region (point-min) (point-max))
(goto-char (point-min))))
(t (fundamental-mode))))
@ -920,7 +910,7 @@ RECEIVE-DATE is the message receive date."
(remove-hook 'minibuffer-exit-hook #'khoj--teardown-incremental-search))
(defun khoj-incremental ()
"Natural, Incremental Search for your personal notes, transactions and music."
"Natural, Incremental Search for your personal notes and documents."
(interactive)
(let* ((khoj-buffer-name (get-buffer-create khoj--search-buffer-name)))
;; switch to khoj results buffer
@ -1014,7 +1004,7 @@ Paragraph only starts at first text after blank line."
;; set content type to: last used > based on current buffer > default type
:init-value (lambda (obj) (oset obj value (format "--content-type=%s" (or khoj--content-type (khoj--buffer-name-to-content-type (buffer-name))))))
;; dynamically set choices to content types enabled on khoj backend
:choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "ledger" "music" "image")))
:choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "ledger" "image")))
(transient-define-suffix khoj--search-command (&optional args)
(interactive (list (transient-args transient-current-command)))

View file

@ -106,18 +106,6 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
filters=[DateFilter(), WordFilter(), FileFilter()],
)
# Initialize Org Music Search
if (t == state.SearchType.Music or t == None) and config.content_type.music and config.search_type.asymmetric:
logger.info("🎺 Setting up search for org-music")
# Extract Entries, Generate Music Embeddings
model.music_search = text_search.setup(
OrgToJsonl,
config.content_type.music,
search_config=config.search_type.asymmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter()],
)
# Initialize Markdown Search
if (
(t == state.SearchType.Markdown or t == None)

View file

@ -88,8 +88,6 @@
results = render_markdown(query, data);
} else if (type === "org") {
results = render_org(query, data, "org-");
} else if (type === "music") {
results = render_org(query, data, "music-");
} else if (type === "image") {
results = data.map(render_image).join('');
} else if (type === "ledger") {
@ -371,17 +369,14 @@
.results-github {
text-align: left;
}
.results-music,
.results-org {
text-align: left;
white-space: pre-line;
}
.results-music h3,
.results-org h3 {
margin: 20px 0 0 0;
font-size: larger;
}
span.music-task-status,
span.org-task-status {
color: white;
padding: 3.5px 3.5px 0;
@ -390,15 +385,12 @@
background-color: #eab308;
font-size: medium;
}
span.music-task-status.todo,
span.org-task-status.todo {
background-color: #3b82f6
}
span.music-task-status.done,
span.org-task-status.done {
background-color: #22c55e;
}
span.music-task-tag,
span.org-task-tag {
color: white;
padding: 3.5px 3.5px 0;

View file

@ -143,19 +143,15 @@ search_type = """
Objective: Extract search type from user query and return information as JSON
Allowed search types are listed below:
- search-type=["notes","ledger","image","music", "pdf"]
- search-type=["notes","ledger","image", "pdf"]
Some examples are given below for reference:
Q:What fiction book was I reading last week about AI starship?
A:{ "search-type": "notes" }
Q: What did the lease say about early termination
A: { "search-type": "pdf" }
Q:Play some calm classical music?
A:{ "search-type": "music" }
Q:How much did I spend at Subway for dinner last time?
A:{ "search-type": "ledger" }
Q:What was that popular Sri lankan song that Alex had mentioned?
A:{ "search-type": "music" }
Q:Can you recommend a movie to watch from my notes?
A:{ "search-type": "notes" }
Q:When did I buy Groceries last?

View file

@ -257,20 +257,6 @@ async def search(
)
]
if (t == SearchType.Music or t == SearchType.All) and state.model.music_search:
# query music library
search_futures += [
executor.submit(
text_search.query,
user_query,
state.model.music_search,
question_embedding=encoded_asymmetric_query,
rank_results=r or False,
score_threshold=score_threshold,
dedupe=dedupe or True,
)
]
if (t == SearchType.Image) and state.model.image_search:
# query images
search_futures += [

View file

@ -20,7 +20,6 @@ class SearchType(str, Enum):
All = "all"
Org = "org"
Ledger = "ledger"
Music = "music"
Markdown = "markdown"
Image = "image"
Pdf = "pdf"
@ -62,7 +61,6 @@ class ImageSearchModel:
class SearchModels:
org_search: TextSearchModel = None
ledger_search: TextSearchModel = None
music_search: TextSearchModel = None
markdown_search: TextSearchModel = None
pdf_search: TextSearchModel = None
image_search: ImageSearchModel = None

View file

@ -41,12 +41,6 @@ default_config = {
"batch-size": 50,
"use-xmp-metadata": False,
},
"music": {
"input-files": None,
"input-filter": None,
"compressed-jsonl": "~/.khoj/content/music/music.jsonl.gz",
"embeddings-file": "~/.khoj/content/music/music_embeddings.pt",
},
"github": {
"pat-token": None,
"repos": [],

View file

@ -74,7 +74,6 @@ class ContentConfig(ConfigBase):
org: Optional[TextContentConfig]
ledger: Optional[TextContentConfig]
image: Optional[ImageContentConfig]
music: Optional[TextContentConfig]
markdown: Optional[TextContentConfig]
pdf: Optional[TextContentConfig]
github: Optional[GithubContentConfig]

View file

@ -34,7 +34,7 @@ def test_search_with_invalid_content_type(client):
# ----------------------------------------------------------------------------------------------------
def test_search_with_valid_content_type(client):
for content_type in ["all", "org", "markdown", "ledger", "image", "music", "pdf", "plugin1"]:
for content_type in ["all", "org", "markdown", "ledger", "image", "pdf", "plugin1"]:
# Act
response = client.get(f"/api/search?q=random&t={content_type}")
# Assert
@ -52,7 +52,7 @@ def test_update_with_invalid_content_type(client):
# ----------------------------------------------------------------------------------------------------
def test_update_with_valid_content_type(client):
for content_type in ["org", "markdown", "ledger", "image", "music", "pdf", "plugin1"]:
for content_type in ["org", "markdown", "ledger", "image", "pdf", "plugin1"]:
# Act
response = client.get(f"/api/update?t={content_type}")
# Assert
@ -70,7 +70,7 @@ def test_regenerate_with_invalid_content_type(client):
# ----------------------------------------------------------------------------------------------------
def test_regenerate_with_valid_content_type(client):
for content_type in ["org", "markdown", "ledger", "image", "music", "pdf", "plugin1"]:
for content_type in ["org", "markdown", "ledger", "image", "pdf", "plugin1"]:
# Act
response = client.get(f"/api/update?force=true&t={content_type}")
# Assert