From d75df54385a0cace3c436db544b413f4ef296299 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 15 Aug 2021 17:50:08 -0700 Subject: [PATCH] Create API interface for Semantic Search Use FastAPI, Uvicorn to create app with API endpoint at /search Example Query: http://localhost:8000/?q="why sleep?"&t="notes'&n=5 --- asymmetric.py | 10 ++++++++ environment.yml | 2 ++ main.py | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 main.py diff --git a/asymmetric.py b/asymmetric.py index 449b137f..0b0ddcc3 100644 --- a/asymmetric.py +++ b/asymmetric.py @@ -130,6 +130,16 @@ def render_results(hits, entries, count=5, display_biencoder_results=False): print(f"CrossScore: {hit['cross-score']:.3f}\n-----------------\n{entries[hit['corpus_id']]}") +def collate_results(hits, entries, count=5, verbose=False): + return [ + { + "Entry": entries[hit['corpus_id']], + "Score": f"{hit['cross-score']:.3f}" + } + for hit + in hits[0:count]] + + if __name__ == '__main__': # Setup Argument Parser parser = argparse.ArgumentParser(description="Map Org-Mode notes into JSONL format") diff --git a/environment.yml b/environment.yml index 74a79201..86b02c5c 100644 --- a/environment.yml +++ b/environment.yml @@ -7,3 +7,5 @@ dependencies: - pytorch - transformers - sentence-transformers + - fastapi + - uvicorn \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 00000000..f25b40a1 --- /dev/null +++ b/main.py @@ -0,0 +1,63 @@ +from typing import Optional +from fastapi import FastAPI +from asymmetric import * +import uvicorn + +app = FastAPI() + +def create_search_notes(corpus_embeddings, entries, bi_encoder, cross_encoder, top_k): + "Closure to create search_notes method from initialized model, entries and embeddings" + def search_notes(query): + return query_notes( + query, + corpus_embeddings, + entries, + bi_encoder, + cross_encoder, + top_k) + + return search_notes + + +@app.get('/search') +def search(q: str, n: Optional[int] = 5, t: Optional[str] = 'notes'): + if q is None or q == '': + print(f'No query param (q) passed in API call to initiate search') + return {} + + user_query = q + results_count = n + + if t == 'notes': + # query notes + hits = search_notes(user_query) + + # collate and return results + return collate_results(hits, entries, results_count) + + else: + return {} + + +if __name__ == '__main__': + # Setup Argument Parser + parser = argparse.ArgumentParser(description="Expose API for Semantic Search") + parser.add_argument('--jsonl-file', '-j', required=True, type=pathlib.Path, help="Input file for compressed JSONL formatted notes to compute embeddings from") + parser.add_argument('--embeddings-file', '-e', type=pathlib.Path, help="File to save/load model embeddings to/from. Default: ./embeddings.pt") + parser.add_argument('--verbose', action='store_true', default=False, help="Show verbose conversion logs. Default: false") + args = parser.parse_args() + + # Initialize Model + bi_encoder, cross_encoder, top_k = initialize_model() + + # Extract Entries + entries = extract_entries(args.jsonl_file, args.verbose) + + # Compute or Load Embeddings + corpus_embeddings = compute_embeddings(entries, bi_encoder, args.embeddings_file, args.verbose) + + # Generate search_notes method from initialized model, entries and embeddings + search_notes = create_search_notes(corpus_embeddings, entries, bi_encoder, cross_encoder, top_k) + + # Start Application Server + uvicorn.run(app)