From 732b2d287ff0826b0f4f140ed0de8f3f27cc36df Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 19 Jul 2022 18:26:16 +0400 Subject: [PATCH] Give the project a short, less generic name. Rename it to Khoj - Semantic Search was just a placeholder used to test the idea out Didn't want to get into naming at that point of time --- Dockerfile | 4 +- README.org | 30 +++++----- config/environment.yml | 2 +- config/environment_osx-arm64.yml | 4 +- src/interface/emacs/README.org | 26 ++++---- .../emacs/{semantic-search.el => khoj.el} | 59 ++++++++++--------- src/interface/web/index.html | 4 +- src/utils/cli.py | 2 +- src/utils/install.py | 8 +-- tests/data/notes/interface_emacs_readme.org | 26 ++++---- tests/data/notes/main_readme.org | 20 +++---- tests/test_asymmetric_search.py | 2 +- tests/test_client.py | 4 +- 13 files changed, 96 insertions(+), 95 deletions(-) rename src/interface/emacs/{semantic-search.el => khoj.el} (71%) diff --git a/Dockerfile b/Dockerfile index 8437d38e..baba33ec 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,11 +19,11 @@ EXPOSE ${PORT} RUN conda env create -f config/environment.yml # Use the conda environment we created to run the application. -# To enable the conda env, we cannot simply RUN `conda activate semantic-search`, +# To enable the conda env, we cannot simply RUN `conda activate khoj`, # since each RUN command in a Dockerfile is a separate bash shell. # The environment would not carry forward. # Instead, we'll use `conda run` to run the application. # There are more arguments required for the script to run, # but these should be passed in through the docker-compose.yml file. -ENTRYPOINT ["conda", "run", "--no-capture-output", "--name", "semantic-search", \ +ENTRYPOINT ["conda", "run", "--no-capture-output", "--name", "khoj", \ "python3", "-m", "src.main"] diff --git a/README.org b/README.org index be0865fe..428adfb0 100644 --- a/README.org +++ b/README.org @@ -1,9 +1,9 @@ -[[https://github.com/debanjum/semantic-search/actions/workflows/test.yml/badge.svg]] [[https://github.com/debanjum/semantic-search/actions/workflows/build.yml/badge.svg]] +[[https://github.com/debanjum/khoj/actions/workflows/test.yml/badge.svg]] [[https://github.com/debanjum/khoj/actions/workflows/build.yml/badge.svg]] -* Semantic Search +* Khoj /Allow natural language search on user content like notes, images, transactions using transformer ML models/ - User can interface with semantic-search via the API or [[./src/interface/emacs/semantic-search.el][Emacs]]. All search is done locally[[https://github.com/debanjum/semantic-search#miscellaneous][*]] + User can interface with Khoj via the API or [[./src/interface/emacs/khoj.el][Emacs]]. All search is done locally[[https://github.com/debanjum/khoj#miscellaneous][*]] ** Demo https://user-images.githubusercontent.com/6413477/168417719-8a8bc4e5-8404-42b2-89a7-4493e3d2582c.mp4 @@ -12,7 +12,7 @@ *** 1. Clone #+begin_src shell - git clone https://github.com/debanjum/semantic-search && cd semantic-search + git clone https://github.com/debanjum/khoj && cd khoj #+end_src *** 2. Configure @@ -27,15 +27,15 @@ /Note: The first run will take time. Let it run, it's mostly not hung, just generating embeddings/ ** Use - - *Semantic Search via API* - - See [[http://localhost:8000/docs][Semantic Search API Docs]] + - *Khoj via API* + - See [[http://localhost:8000/docs][Khoj API Docs]] - [[http://localhost:8000/search?q=%22what%20is%20the%20meaning%20of%20life%22][Query]] - [[http://localhost:8000/regenerate?t=ledger][Regenerate Embeddings]] - [[https://localhost:8000/ui][Configure Application]] - - *Semantic Search via Emacs* - - [[https://github.com/debanjum/semantic-search/tree/master/src/interface/emacs#installation][Install]] [[./src/interface/emacs/semantic-search.el][semantic-search.el]] - - Run ~M-x semantic-search ~ + - *Khoj via Emacs* + - [[https://github.com/debanjum/khoj/tree/master/src/interface/emacs#installation][Install]] [[./src/interface/emacs/khoj.el][khoj.el]] + - Run ~M-x khoj ~ ** Run Unit tests @@ -71,11 +71,11 @@ sudo apt-get -y install libimage-exiftool-perl #+end_src -**** 2. Install Semantic Search +**** 2. Install Khoj #+begin_src shell - git clone https://github.com/debanjum/semantic-search && cd semantic-search + git clone https://github.com/debanjum/khoj && cd khoj conda env create -f config/environment.yml - conda activate semantic-search + conda activate khoj #+end_src **** 3. Configure @@ -92,11 +92,11 @@ *** Upgrade On Local Machine #+begin_src shell - cd semantic-search + cd khoj git pull origin master - conda deactivate semantic-search + conda deactivate khoj conda env update -f config/environment.yml - conda activate semantic-search + conda activate khoj #+end_src ** Acknowledgments diff --git a/config/environment.yml b/config/environment.yml index bf3bb82a..a21b692c 100644 --- a/config/environment.yml +++ b/config/environment.yml @@ -1,4 +1,4 @@ -name: semantic-search +name: khoj channels: - conda-forge dependencies: diff --git a/config/environment_osx-arm64.yml b/config/environment_osx-arm64.yml index d7e0667d..395dd619 100644 --- a/config/environment_osx-arm64.yml +++ b/config/environment_osx-arm64.yml @@ -1,4 +1,4 @@ -name: semantic-search +name: khoj channels: - conda-forge dependencies: @@ -113,4 +113,4 @@ dependencies: - zipp=3.5.0=pyhd8ed1ab_0 - zlib=1.2.11=h31e879b_1009 - zstd=1.5.0=h861e0a7_0 -prefix: /opt/homebrew/Caskroom/miniforge/base/envs/semantic-search +prefix: /opt/homebrew/Caskroom/miniforge/base/envs/khoj diff --git a/src/interface/emacs/README.org b/src/interface/emacs/README.org index b89e1e1e..e0f8bb78 100644 --- a/src/interface/emacs/README.org +++ b/src/interface/emacs/README.org @@ -1,33 +1,33 @@ -* Emacs Semantic Search - /An Emacs interface for [[https://github.com/debanjum/semantic-search][semantic-search]]/ +* Emacs Khoj + /An Emacs interface for [[https://github.com/debanjum/khoj][Khoj]]/ ** Requirements - - Install and Run [[https://github.com/debanjum/semantic-search][semantic-search]] + - Install and Run [[https://github.com/debanjum/khoj][Khoj]] ** Installation - Direct Install - - Put ~semantic-search.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp + - Put ~khoj.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp - Load via ~use-package~ in your ~/.emacs.d/init.el or .emacs file by adding below snippet #+begin_src elisp - ;; Org-Semantic Search Library - (use-package semantic-search - :load-path "~/.emacs.d/lisp/semantic-search.el" - :bind ("C-c s" . 'semantic-search)) + ;; Khoj Package + (use-package khoj + :load-path "~/.emacs.d/lisp/khoj.el" + :bind ("C-c s" . 'khoj)) #+end_src - Use [[https://github.com/quelpa/quelpa#installation][Quelpa]] - Ensure [[https://github.com/quelpa/quelpa#installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed - Add below snippet to your ~/.emacs.d/init.el or .emacs config file and execute it. #+begin_src elisp - ;; Org-Semantic Search Library - (use-package semantic-search - :quelpa (semantic-search :fetcher url :url "https://raw.githubusercontent.com/debanjum/semantic-search/master/interface/emacs/semantic-search.el") - :bind ("C-c s" . 'semantic-search)) + ;; Khoj Package + (use-package khoj + :quelpa (khoj :fetcher url :url "https://raw.githubusercontent.com/debanjum/khoj/master/interface/emacs/khoj.el") + :bind ("C-c s" . 'khoj)) #+end_src ** Usage - 1. Call ~semantic-search~ using keybinding ~C-c s~ or ~M-x semantic-search~ + 1. Call ~khoj~ using keybinding ~C-c s~ or ~M-x khoj~ 2. Enter Query in Natural Language diff --git a/src/interface/emacs/semantic-search.el b/src/interface/emacs/khoj.el similarity index 71% rename from src/interface/emacs/semantic-search.el rename to src/interface/emacs/khoj.el index 4dad77b5..fc6b785e 100644 --- a/src/interface/emacs/semantic-search.el +++ b/src/interface/emacs/khoj.el @@ -1,11 +1,11 @@ -;;; semantic-search.el --- Semantic search via Emacs +;;; khoj.el --- Natural Search via Emacs ;; Copyright (C) 2021-2022 Debanjum Singh Solanky ;; Author: Debanjum Singh Solanky ;; Version: 0.1 ;; Keywords: search, org-mode, outlines -;; URL: http://github.com/debanjum/semantic-search/interface/emacs +;; URL: http://github.com/debanjum/khoj/interface/emacs ;; This file is NOT part of GNU Emacs. @@ -26,26 +26,27 @@ ;;; Commentary: -;; This package provides semantic search on org-mode files -;; It is a wrapper that interfaces with transformer based ML model -;; The models semantic search capabilities are exposed via an HTTP API +;; This package provides natural language search on org-mode notes, +;; beancount transactions and images. +;; It is a wrapper that interfaces with transformer based ML models. +;; The models search capabilities are exposed via the Khoj HTTP API ;;; Code: (require 'url) (require 'json) -(defcustom semantic-search--server-url "http://localhost:8000" - "Location of semantic search API server." - :group 'semantic-search +(defcustom khoj--server-url "http://localhost:8000" + "Location of Khoj API server." + :group 'khoj :type 'string) -(defcustom semantic-search--image-width 156 - "Width of rendered images returned by semantic search" - :group 'semantic-search +(defcustom khoj--image-width 156 + "Width of rendered images returned by Khoj" + :group 'khoj :type 'integer) -(defun semantic-search--extract-entries-as-org (json-response query) +(defun khoj--extract-entries-as-org (json-response query) "Convert json response from API to org-mode entries" ;; remove leading (, ) or SPC from extracted entries string (replace-regexp-in-string @@ -57,7 +58,7 @@ (lambda (args) (format "%s" (cdr (assoc 'Entry args)))) json-response)))) -(defun semantic-search--extract-entries-as-images (json-response query) +(defun khoj--extract-entries-as-images (json-response query) "Convert json response from API to org-mode entries with images" ;; remove leading (, ) or SPC from extracted entries string (replace-regexp-in-string @@ -74,14 +75,14 @@ (cdr (assoc 'score args)) (cdr (assoc 'metadata_score args)) (cdr (assoc 'image_score args)) - semantic-search--server-url + khoj--server-url (cdr (assoc 'entry args)) - semantic-search--server-url + khoj--server-url (cdr (assoc 'entry args)) (random 10000))) json-response))))) -(defun semantic-search--extract-entries-as-ledger (json-response query) +(defun khoj--extract-entries-as-ledger (json-response query) "Convert json response from API to ledger entries" ;; remove leading (, ) or SPC from extracted entries string (replace-regexp-in-string @@ -96,7 +97,7 @@ (format "%s\n\n" (cdr (assoc 'Entry args)))) json-response))))) -(defun semantic-search--buffer-name-to-search-type (buffer-name) +(defun khoj--buffer-name-to-search-type (buffer-name) (let ((file-extension (file-name-extension buffer-name))) (cond ((equal buffer-name "Music.org") "music") @@ -104,18 +105,18 @@ ((equal file-extension "org") "notes") (t "notes")))) -(defun semantic-search--construct-api-query (query search-type) +(defun khoj--construct-api-query (query search-type) (let ((encoded-query (url-hexify-string query))) - (format "%s/search?q=%s&t=%s" semantic-search--server-url encoded-query search-type))) + (format "%s/search?q=%s&t=%s" khoj--server-url encoded-query search-type))) ;;;###autoload -(defun semantic-search (query) - "Semantic search on org-mode content via semantic-search API" +(defun khoj (query) + "Khoj on org-mode content via khoj API" (interactive "sQuery: ") - (let* ((default-type (semantic-search--buffer-name-to-search-type (buffer-name))) + (let* ((default-type (khoj--buffer-name-to-search-type (buffer-name))) (search-type (completing-read "Type: " '("notes" "ledger" "music" "image") nil t default-type)) - (url (semantic-search--construct-api-query query search-type)) - (buff (get-buffer-create (format "*Semantic Search (q:%s t:%s)*" query search-type)))) + (url (khoj--construct-api-query query search-type)) + (buff (get-buffer-create (format "*Khoj (q:%s t:%s)*" query search-type)))) ;; get json response from api (with-current-buffer buff (let ((inhibit-read-only t)) @@ -127,9 +128,9 @@ (json-response (json-parse-buffer :object-type 'alist))) (erase-buffer) (insert - (cond ((or (equal search-type "notes") (equal search-type "music")) (semantic-search--extract-entries-as-org json-response query)) - ((equal search-type "ledger") (semantic-search--extract-entries-as-ledger json-response query)) - ((equal search-type "image") (semantic-search--extract-entries-as-images json-response query)) + (cond ((or (equal search-type "notes") (equal search-type "music")) (khoj--extract-entries-as-org json-response query)) + ((equal search-type "ledger") (khoj--extract-entries-as-ledger json-response query)) + ((equal search-type "image") (khoj--extract-entries-as-images json-response query)) (t (format "%s" json-response)))) (cond ((equal search-type "notes") (org-mode)) ((equal search-type "ledger") (beancount-mode)) @@ -141,6 +142,6 @@ (read-only-mode t)) (switch-to-buffer buff))) -(provide 'semantic-search) +(provide 'khoj) -;;; semantic-search.el ends here +;;; khoj.el ends here diff --git a/src/interface/web/index.html b/src/interface/web/index.html index c14c7ca2..c096ce4c 100644 --- a/src/interface/web/index.html +++ b/src/interface/web/index.html @@ -1,7 +1,7 @@ - Semantic Search + Khoj -

Semantic Search

+

Khoj

diff --git a/src/utils/cli.py b/src/utils/cli.py index 1c1ec9dc..a426ba8c 100644 --- a/src/utils/cli.py +++ b/src/utils/cli.py @@ -15,7 +15,7 @@ def cli(args=None): return None # Setup Argument Parser for the Commandline Interface - parser = argparse.ArgumentParser(description="Expose API for Semantic Search") + parser = argparse.ArgumentParser(description="Expose API for Khoj") parser.add_argument('--org-files', '-i', nargs='*', help="List of org-mode files to process") parser.add_argument('--org-filter', type=str, default=None, help="Regex filter for org-mode files to process") parser.add_argument('--config-file', '-c', type=pathlib.Path, help="YAML file with user configuration") diff --git a/src/utils/install.py b/src/utils/install.py index 9f291992..aceef466 100644 --- a/src/utils/install.py +++ b/src/utils/install.py @@ -22,7 +22,7 @@ def create_script(filepath, content): if __name__ == '__main__': # Setup Argument Parser - parser = argparse.ArgumentParser(description="Setup the semantic search program") + parser = argparse.ArgumentParser(description="Setup the Khoj program") parser.add_argument('--script-dir', '-s', default="./", type=pathlib.Path, help="The project directory. Default: Current Directory") parser.add_argument('--install-dir', '-i', default="./", type=pathlib.Path, help="The directory to install the script. Default: Current Directory") parser.add_argument('--model-dir', '-m', default="./", type=pathlib.Path, help="The directory to store the model in. Default: Current Directory") @@ -32,7 +32,7 @@ if __name__ == '__main__': # Arrange eval "$(conda shell.bash hook)" -conda activate semantic-search +conda activate khoj cd {get_absolute(args.script_dir)} # Act @@ -43,7 +43,7 @@ python3 search_types/asymmetric.py -j {get_absolute(args.model_dir)}/notes.jsonl # Arrange eval "$(conda shell.bash hook)" -conda activate semantic-search +conda activate khoj cd {get_absolute(args.script_dir)} # Act @@ -54,4 +54,4 @@ python3 main.py -j {get_absolute(args.model_dir)}/notes.jsonl.gz -e {get_absolut create_script(f"{args.install_path}run_server"), run_server_content) # Create single command for interactive queries over commandline - create_script(f"{args.install_path}semantic-search"), search_cmd_content) + create_script(f"{args.install_path}khoj"), search_cmd_content) diff --git a/tests/data/notes/interface_emacs_readme.org b/tests/data/notes/interface_emacs_readme.org index b89e1e1e..8eecd36c 100644 --- a/tests/data/notes/interface_emacs_readme.org +++ b/tests/data/notes/interface_emacs_readme.org @@ -1,33 +1,33 @@ -* Emacs Semantic Search - /An Emacs interface for [[https://github.com/debanjum/semantic-search][semantic-search]]/ +* Emacs Khoj + /An Emacs interface for [[https://github.com/debanjum/khoj][khoj]]/ ** Requirements - - Install and Run [[https://github.com/debanjum/semantic-search][semantic-search]] + - Install and Run [[https://github.com/debanjum/khoj][khoj]] ** Installation - Direct Install - - Put ~semantic-search.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp + - Put ~khoj.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp - Load via ~use-package~ in your ~/.emacs.d/init.el or .emacs file by adding below snippet #+begin_src elisp - ;; Org-Semantic Search Library - (use-package semantic-search - :load-path "~/.emacs.d/lisp/semantic-search.el" - :bind ("C-c s" . 'semantic-search)) + ;; Khoj Package + (use-package khoj + :load-path "~/.emacs.d/lisp/khoj.el" + :bind ("C-c s" . 'khoj)) #+end_src - Use [[https://github.com/quelpa/quelpa#installation][Quelpa]] - Ensure [[https://github.com/quelpa/quelpa#installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed - Add below snippet to your ~/.emacs.d/init.el or .emacs config file and execute it. #+begin_src elisp - ;; Org-Semantic Search Library - (use-package semantic-search - :quelpa (semantic-search :fetcher url :url "https://raw.githubusercontent.com/debanjum/semantic-search/master/interface/emacs/semantic-search.el") - :bind ("C-c s" . 'semantic-search)) + ;; Khoj Package + (use-package khoj + :quelpa (khoj :fetcher url :url "https://raw.githubusercontent.com/debanjum/khoj/master/interface/emacs/khoj.el") + :bind ("C-c s" . 'khoj)) #+end_src ** Usage - 1. Call ~semantic-search~ using keybinding ~C-c s~ or ~M-x semantic-search~ + 1. Call ~khoj~ using keybinding ~C-c s~ or ~M-x khoj~ 2. Enter Query in Natural Language diff --git a/tests/data/notes/main_readme.org b/tests/data/notes/main_readme.org index d343288a..917562e2 100644 --- a/tests/data/notes/main_readme.org +++ b/tests/data/notes/main_readme.org @@ -1,7 +1,7 @@ -* Semantic Search +* Khoj /Allow natural language search on user content like notes, images using transformer based models/ - All data is processed locally. User can interface with semantic-search app via [[./interface/emacs/semantic-search.el][Emacs]], API or Commandline + All data is processed locally. User can interface with khoj app via [[./interface/emacs/khoj.el][Emacs]], API or Commandline ** Dependencies - Python3 @@ -9,9 +9,9 @@ ** Install #+begin_src shell - git clone https://github.com/debanjum/semantic-search && cd semantic-search + git clone https://github.com/debanjum/khoj && cd khoj conda env create -f environment.yml - conda activate semantic-search + conda activate khoj #+end_src ** Run @@ -22,16 +22,16 @@ #+end_src ** Use - - *Semantic Search via Emacs* - - [[https://github.com/debanjum/semantic-search/tree/master/interface/emacs#installation][Install]] [[./interface/emacs/semantic-search.el][semantic-search.el]] - - Run ~M-x semantic-search ~ or Call ~C-c C-s~ + - *Khoj via Emacs* + - [[https://github.com/debanjum/khoj/tree/master/interface/emacs#installation][Install]] [[./interface/emacs/khoj.el][khoj.el]] + - Run ~M-x khoj ~ or Call ~C-c C-s~ - - *Semantic Search via API* + - *Khoj via API* - Query: ~GET~ [[http://localhost:8000/search?q=%22what%20is%20the%20meaning%20of%20life%22][http://localhost:8000/search?q="What is the meaning of life"]] - Regenerate Embeddings: ~GET~ [[http://localhost:8000/regenerate][http://localhost:8000/regenerate]] - - [[http://localhost:8000/docs][Semantic Search API Docs]] + - [[http://localhost:8000/docs][Khoj API Docs]] - - *Call Semantic Search via Python Script Directly* + - *Call Khoj via Python Script Directly* #+begin_src shell python3 search_types/asymmetric.py \ --compressed-jsonl .notes.jsonl.gz \ diff --git a/tests/test_asymmetric_search.py b/tests/test_asymmetric_search.py index 6e3eb871..760d2153 100644 --- a/tests/test_asymmetric_search.py +++ b/tests/test_asymmetric_search.py @@ -36,7 +36,7 @@ def test_asymmetric_search(content_config: ContentConfig, search_config: SearchC count=1) # Assert - # Actual_data should contain "Semantic Search via Emacs" entry + # Actual_data should contain "Khoj via Emacs" entry search_result = results[0]["Entry"] assert "git clone" in search_result diff --git a/tests/test_client.py b/tests/test_client.py index ff1a7e98..e6d9d7ed 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -20,7 +20,7 @@ client = TestClient(app) # ---------------------------------------------------------------------------------------------------- def test_search_with_invalid_content_type(): # Arrange - user_query = "How to call semantic search from Emacs?" + user_query = "How to call Khoj from Emacs?" # Act response = client.get(f"/search?q={user_query}&t=invalid_content_type") @@ -122,7 +122,7 @@ def test_notes_search(content_config: ContentConfig, search_config: SearchConfig # Assert assert response.status_code == 200 - # assert actual_data contains "Semantic Search via Emacs" entry + # assert actual_data contains "Khoj via Emacs" entry search_result = response.json()[0]["Entry"] assert "git clone" in search_result