From 569e30b1c83ec2d3fee9ad5912f07f8c1726c63e Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 17 Aug 2021 03:27:16 -0700 Subject: [PATCH] Create a few basic tests --- environment.yml | 3 +- src/tests/__init__.py | 0 src/tests/data/interface_emacs_readme.org | 44 +++++++++++++++ src/tests/data/main_readme.org | 47 ++++++++++++++++ src/tests/test_main.py | 65 +++++++++++++++++++++++ 5 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 src/tests/__init__.py create mode 100644 src/tests/data/interface_emacs_readme.org create mode 100644 src/tests/data/main_readme.org create mode 100644 src/tests/test_main.py diff --git a/environment.yml b/environment.yml index 86b02c5c..bca8ab1e 100644 --- a/environment.yml +++ b/environment.yml @@ -8,4 +8,5 @@ dependencies: - transformers - sentence-transformers - fastapi - - uvicorn \ No newline at end of file + - uvicorn + - pytest \ No newline at end of file diff --git a/src/tests/__init__.py b/src/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/tests/data/interface_emacs_readme.org b/src/tests/data/interface_emacs_readme.org new file mode 100644 index 00000000..b89e1e1e --- /dev/null +++ b/src/tests/data/interface_emacs_readme.org @@ -0,0 +1,44 @@ +* Emacs Semantic Search + /An Emacs interface for [[https://github.com/debanjum/semantic-search][semantic-search]]/ + +** Requirements + - Install and Run [[https://github.com/debanjum/semantic-search][semantic-search]] + +** Installation + - Direct Install + - Put ~semantic-search.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp + + - Load via ~use-package~ in your ~/.emacs.d/init.el or .emacs file by adding below snippet + #+begin_src elisp + ;; Org-Semantic Search Library + (use-package semantic-search + :load-path "~/.emacs.d/lisp/semantic-search.el" + :bind ("C-c s" . 'semantic-search)) + #+end_src + + - Use [[https://github.com/quelpa/quelpa#installation][Quelpa]] + - Ensure [[https://github.com/quelpa/quelpa#installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed + - Add below snippet to your ~/.emacs.d/init.el or .emacs config file and execute it. + #+begin_src elisp + ;; Org-Semantic Search Library + (use-package semantic-search + :quelpa (semantic-search :fetcher url :url "https://raw.githubusercontent.com/debanjum/semantic-search/master/interface/emacs/semantic-search.el") + :bind ("C-c s" . 'semantic-search)) + #+end_src + +** Usage + 1. Call ~semantic-search~ using keybinding ~C-c s~ or ~M-x semantic-search~ + + 2. Enter Query in Natural Language + + e.g "What is the meaning of life?" "What are my life goals?" + + 3. Wait for results + + *Note: It takes about 15s on a Mac M1 and a ~100K lines corpus of org-mode files* + + 4. (Optional) Narrow down results further + + Include/Exclude specific words from results by adding to query + + e.g "What is the meaning of life? -god +none" diff --git a/src/tests/data/main_readme.org b/src/tests/data/main_readme.org new file mode 100644 index 00000000..3e0b479d --- /dev/null +++ b/src/tests/data/main_readme.org @@ -0,0 +1,47 @@ +* Semantic Search + /Allow natural language search on user content like notes, images using transformer based models/ + + All data is processed locally. User can interface with semantic-search app via [[./interface/emacs/semantic-search.el][Emacs]], API or Commandline + +** Dependencies + - Python3 + - [[https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links][Miniconda]] + +** Install + #+begin_src shell + git clone https://github.com/debanjum/semantic-search && cd semantic-search + conda env create -f environment.yml + conda activate semantic-search + #+end_src + +** Run + Load ML model, generate embeddings and expose API to query specified org-mode files + + #+begin_src shell + python3 main.py --input-files ~/Notes/Schedule.org ~/Notes/Incoming.org --verbose + #+end_src + +** Use + - *Semantic Search via Emacs* + - [[https://github.com/debanjum/semantic-search/tree/master/interface/emacs#installation][Install]] [[./interface/emacs/semantic-search.el][semantic-search.el]] + - Run ~M-x semantic-search ~ or Call ~C-c C-s~ + + - *Semantic Search via API* + - Query: ~GET~ [[http://localhost:8000/search?q=%22what%20is%20the%20meaning%20of%20life%22][http://localhost:8000/search?q="What is the meaning of life"]] + - Regenerate Embeddings: ~GET~ [[http://localhost:8000/regenerate][http://localhost:8000/regenerate]] + - [[http://localhost:8000/docs][Semantic Search API Docs]] + + - *Call Semantic Search via Python Script Directly* + #+begin_src shell + python3 search_types/asymmetric.py \ + --compressed-jsonl .notes.jsonl.gz \ + --embeddings .notes_embeddings.pt \ + --results-count 5 \ + --verbose \ + --interactive + #+end_src + +** Acknowledgments + - [[https://huggingface.co/sentence-transformers/msmarco-MiniLM-L-6-v3][MiniLM Model]] for Asymmetric Text Search. See [[https://www.sbert.net/examples/applications/retrieve_rerank/README.html][SBert Documentation]] + - [[https://github.com/openai/CLIP][OpenAI CLIP Model]] for Image Search. See [[https://www.sbert.net/examples/applications/image-search/README.html][SBert Documentation]] + - Charles Cave for [[http://members.optusnet.com.au/~charles57/GTD/orgnode.html][OrgNode Parser]] diff --git a/src/tests/test_main.py b/src/tests/test_main.py new file mode 100644 index 00000000..ddb33483 --- /dev/null +++ b/src/tests/test_main.py @@ -0,0 +1,65 @@ +# Standard Modules +from pathlib import Path + +# External Packages +import pytest +from fastapi.testclient import TestClient + +# Internal Packages +from main import app, cli +from search_type import asymmetric + + +# Arrange +# ---------------------------------------------------------------------------------------------------- +client = TestClient(app) + +input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')] +input_filter = None +compressed_jsonl = Path('tests/data/.test.jsonl.gz') +embeddings = Path('tests/data/.test_embeddings.pt') +regenerate = False +verbose = 1 + + +# ---------------------------------------------------------------------------------------------------- +def test_asymmetric_setup(): + # Act + entries, corpus_embeddings, bi_encoder, cross_encoder, top_k = asymmetric.setup(input_files, input_filter, compressed_jsonl, embeddings, regenerate, verbose) + + # Assert + assert len(entries) == 10 + assert len(corpus_embeddings) == 10 + + +# ---------------------------------------------------------------------------------------------------- +def test_cli_default(): + # Act + args = cli(['--input-files=tests/data/test.org']) + + # Assert + assert args.input_files == ['tests/data/test.org'] + assert args.input_filter == None + assert args.compressed_jsonl == Path('.notes.jsonl.gz') + assert args.embeddings == Path('.notes_embeddings.pt') + assert args.regenerate == False + assert args.verbose == 0 + + +# ---------------------------------------------------------------------------------------------------- +def test_cli_set_by_user(): + # Act + actual_args = cli(['--input-files=tests/data/test.org', + '--input-filter=tests/data/*.org', + '--compressed-jsonl=tests/data/.test.jsonl.gz', + '--embeddings=tests/data/.test_embeddings.pt', + '--regenerate', + '-vvv']) + + # Assert + assert actual_args.input_files == ['tests/data/test.org'] + assert actual_args.input_filter == 'tests/data/*.org' + assert actual_args.compressed_jsonl == Path('tests/data/.test.jsonl.gz') + assert actual_args.embeddings == Path('tests/data/.test_embeddings.pt') + assert actual_args.regenerate == True + assert actual_args.verbose == 3