mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Create a few basic tests
This commit is contained in:
parent
af9660f28e
commit
569e30b1c8
5 changed files with 158 additions and 1 deletions
|
@ -9,3 +9,4 @@ dependencies:
|
||||||
- sentence-transformers
|
- sentence-transformers
|
||||||
- fastapi
|
- fastapi
|
||||||
- uvicorn
|
- uvicorn
|
||||||
|
- pytest
|
0
src/tests/__init__.py
Normal file
0
src/tests/__init__.py
Normal file
44
src/tests/data/interface_emacs_readme.org
Normal file
44
src/tests/data/interface_emacs_readme.org
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
* Emacs Semantic Search
|
||||||
|
/An Emacs interface for [[https://github.com/debanjum/semantic-search][semantic-search]]/
|
||||||
|
|
||||||
|
** Requirements
|
||||||
|
- Install and Run [[https://github.com/debanjum/semantic-search][semantic-search]]
|
||||||
|
|
||||||
|
** Installation
|
||||||
|
- Direct Install
|
||||||
|
- Put ~semantic-search.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp
|
||||||
|
|
||||||
|
- Load via ~use-package~ in your ~/.emacs.d/init.el or .emacs file by adding below snippet
|
||||||
|
#+begin_src elisp
|
||||||
|
;; Org-Semantic Search Library
|
||||||
|
(use-package semantic-search
|
||||||
|
:load-path "~/.emacs.d/lisp/semantic-search.el"
|
||||||
|
:bind ("C-c s" . 'semantic-search))
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
- Use [[https://github.com/quelpa/quelpa#installation][Quelpa]]
|
||||||
|
- Ensure [[https://github.com/quelpa/quelpa#installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed
|
||||||
|
- Add below snippet to your ~/.emacs.d/init.el or .emacs config file and execute it.
|
||||||
|
#+begin_src elisp
|
||||||
|
;; Org-Semantic Search Library
|
||||||
|
(use-package semantic-search
|
||||||
|
:quelpa (semantic-search :fetcher url :url "https://raw.githubusercontent.com/debanjum/semantic-search/master/interface/emacs/semantic-search.el")
|
||||||
|
:bind ("C-c s" . 'semantic-search))
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
** Usage
|
||||||
|
1. Call ~semantic-search~ using keybinding ~C-c s~ or ~M-x semantic-search~
|
||||||
|
|
||||||
|
2. Enter Query in Natural Language
|
||||||
|
|
||||||
|
e.g "What is the meaning of life?" "What are my life goals?"
|
||||||
|
|
||||||
|
3. Wait for results
|
||||||
|
|
||||||
|
*Note: It takes about 15s on a Mac M1 and a ~100K lines corpus of org-mode files*
|
||||||
|
|
||||||
|
4. (Optional) Narrow down results further
|
||||||
|
|
||||||
|
Include/Exclude specific words from results by adding to query
|
||||||
|
|
||||||
|
e.g "What is the meaning of life? -god +none"
|
47
src/tests/data/main_readme.org
Normal file
47
src/tests/data/main_readme.org
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
* Semantic Search
|
||||||
|
/Allow natural language search on user content like notes, images using transformer based models/
|
||||||
|
|
||||||
|
All data is processed locally. User can interface with semantic-search app via [[./interface/emacs/semantic-search.el][Emacs]], API or Commandline
|
||||||
|
|
||||||
|
** Dependencies
|
||||||
|
- Python3
|
||||||
|
- [[https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links][Miniconda]]
|
||||||
|
|
||||||
|
** Install
|
||||||
|
#+begin_src shell
|
||||||
|
git clone https://github.com/debanjum/semantic-search && cd semantic-search
|
||||||
|
conda env create -f environment.yml
|
||||||
|
conda activate semantic-search
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
** Run
|
||||||
|
Load ML model, generate embeddings and expose API to query specified org-mode files
|
||||||
|
|
||||||
|
#+begin_src shell
|
||||||
|
python3 main.py --input-files ~/Notes/Schedule.org ~/Notes/Incoming.org --verbose
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
** Use
|
||||||
|
- *Semantic Search via Emacs*
|
||||||
|
- [[https://github.com/debanjum/semantic-search/tree/master/interface/emacs#installation][Install]] [[./interface/emacs/semantic-search.el][semantic-search.el]]
|
||||||
|
- Run ~M-x semantic-search <user-query>~ or Call ~C-c C-s~
|
||||||
|
|
||||||
|
- *Semantic Search via API*
|
||||||
|
- Query: ~GET~ [[http://localhost:8000/search?q=%22what%20is%20the%20meaning%20of%20life%22][http://localhost:8000/search?q="What is the meaning of life"]]
|
||||||
|
- Regenerate Embeddings: ~GET~ [[http://localhost:8000/regenerate][http://localhost:8000/regenerate]]
|
||||||
|
- [[http://localhost:8000/docs][Semantic Search API Docs]]
|
||||||
|
|
||||||
|
- *Call Semantic Search via Python Script Directly*
|
||||||
|
#+begin_src shell
|
||||||
|
python3 search_types/asymmetric.py \
|
||||||
|
--compressed-jsonl .notes.jsonl.gz \
|
||||||
|
--embeddings .notes_embeddings.pt \
|
||||||
|
--results-count 5 \
|
||||||
|
--verbose \
|
||||||
|
--interactive
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
** Acknowledgments
|
||||||
|
- [[https://huggingface.co/sentence-transformers/msmarco-MiniLM-L-6-v3][MiniLM Model]] for Asymmetric Text Search. See [[https://www.sbert.net/examples/applications/retrieve_rerank/README.html][SBert Documentation]]
|
||||||
|
- [[https://github.com/openai/CLIP][OpenAI CLIP Model]] for Image Search. See [[https://www.sbert.net/examples/applications/image-search/README.html][SBert Documentation]]
|
||||||
|
- Charles Cave for [[http://members.optusnet.com.au/~charles57/GTD/orgnode.html][OrgNode Parser]]
|
65
src/tests/test_main.py
Normal file
65
src/tests/test_main.py
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
# Standard Modules
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# External Packages
|
||||||
|
import pytest
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
# Internal Packages
|
||||||
|
from main import app, cli
|
||||||
|
from search_type import asymmetric
|
||||||
|
|
||||||
|
|
||||||
|
# Arrange
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
client = TestClient(app)
|
||||||
|
|
||||||
|
input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')]
|
||||||
|
input_filter = None
|
||||||
|
compressed_jsonl = Path('tests/data/.test.jsonl.gz')
|
||||||
|
embeddings = Path('tests/data/.test_embeddings.pt')
|
||||||
|
regenerate = False
|
||||||
|
verbose = 1
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
def test_asymmetric_setup():
|
||||||
|
# Act
|
||||||
|
entries, corpus_embeddings, bi_encoder, cross_encoder, top_k = asymmetric.setup(input_files, input_filter, compressed_jsonl, embeddings, regenerate, verbose)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert len(entries) == 10
|
||||||
|
assert len(corpus_embeddings) == 10
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
def test_cli_default():
|
||||||
|
# Act
|
||||||
|
args = cli(['--input-files=tests/data/test.org'])
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert args.input_files == ['tests/data/test.org']
|
||||||
|
assert args.input_filter == None
|
||||||
|
assert args.compressed_jsonl == Path('.notes.jsonl.gz')
|
||||||
|
assert args.embeddings == Path('.notes_embeddings.pt')
|
||||||
|
assert args.regenerate == False
|
||||||
|
assert args.verbose == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
def test_cli_set_by_user():
|
||||||
|
# Act
|
||||||
|
actual_args = cli(['--input-files=tests/data/test.org',
|
||||||
|
'--input-filter=tests/data/*.org',
|
||||||
|
'--compressed-jsonl=tests/data/.test.jsonl.gz',
|
||||||
|
'--embeddings=tests/data/.test_embeddings.pt',
|
||||||
|
'--regenerate',
|
||||||
|
'-vvv'])
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert actual_args.input_files == ['tests/data/test.org']
|
||||||
|
assert actual_args.input_filter == 'tests/data/*.org'
|
||||||
|
assert actual_args.compressed_jsonl == Path('tests/data/.test.jsonl.gz')
|
||||||
|
assert actual_args.embeddings == Path('tests/data/.test_embeddings.pt')
|
||||||
|
assert actual_args.regenerate == True
|
||||||
|
assert actual_args.verbose == 3
|
Loading…
Reference in a new issue