2021-08-17 12:59:58 +02:00
# Standard Packages
import sys
import argparse
import pathlib
2021-08-16 02:50:08 +02:00
from typing import Optional
2021-08-17 12:59:58 +02:00
# External Packages
import uvicorn
2021-08-16 02:50:08 +02:00
from fastapi import FastAPI
2021-08-17 12:59:58 +02:00
# Internal Packages
2021-08-17 01:04:45 +02:00
from search_type import asymmetric
2021-08-17 03:52:38 +02:00
from processor . org_mode . org_to_jsonl import org_to_jsonl
from utils . helpers import is_none_or_empty
2021-08-17 12:59:58 +02:00
2021-08-16 02:50:08 +02:00
app = FastAPI ( )
@app.get ( ' /search ' )
def search ( q : str , n : Optional [ int ] = 5 , t : Optional [ str ] = ' notes ' ) :
if q is None or q == ' ' :
print ( f ' No query param (q) passed in API call to initiate search ' )
return { }
user_query = q
results_count = n
if t == ' notes ' :
# query notes
2021-08-17 01:52:48 +02:00
hits = asymmetric . query_notes (
2021-08-17 03:52:38 +02:00
user_query ,
2021-08-17 01:52:48 +02:00
corpus_embeddings ,
entries ,
bi_encoder ,
cross_encoder ,
top_k )
2021-08-16 02:50:08 +02:00
# collate and return results
2021-08-16 04:09:50 +02:00
return asymmetric . collate_results ( hits , entries , results_count )
2021-08-16 02:50:08 +02:00
else :
return { }
2021-08-17 03:52:38 +02:00
@app.get ( ' /regenerate ' )
def regenerate ( ) :
2021-08-17 08:58:24 +02:00
# Extract Entries, Generate Embeddings
extracted_entries , computed_embeddings , _ , _ , _ = asymmetric . setup ( args . input_files , args . input_filter , args . compressed_jsonl , args . embeddings , regenerate = True , verbose = args . verbose )
2021-08-17 03:52:38 +02:00
2021-08-17 08:47:33 +02:00
# Now Update State
# update state variables after regeneration complete
# minimize time the application is in inconsistent, partially updated state
2021-08-17 03:52:38 +02:00
global corpus_embeddings
2021-08-17 08:47:33 +02:00
global entries
entries = extracted_entries
corpus_embeddings = computed_embeddings
return { ' status ' : ' ok ' , ' message ' : ' regeneration completed ' }
2021-08-17 03:52:38 +02:00
2021-08-16 02:50:08 +02:00
if __name__ == ' __main__ ' :
# Setup Argument Parser
parser = argparse . ArgumentParser ( description = " Expose API for Semantic Search " )
2021-08-17 03:52:38 +02:00
parser . add_argument ( ' --input-files ' , ' -i ' , nargs = ' * ' , help = " List of org-mode files to process " )
parser . add_argument ( ' --input-filter ' , type = str , default = None , help = " Regex filter for org-mode files to process " )
2021-08-17 02:15:41 +02:00
parser . add_argument ( ' --compressed-jsonl ' , ' -j ' , type = pathlib . Path , default = pathlib . Path ( " .notes.jsonl.gz " ) , help = " Compressed JSONL formatted notes file to compute embeddings from " )
parser . add_argument ( ' --embeddings ' , ' -e ' , type = pathlib . Path , default = pathlib . Path ( " .notes_embeddings.pt " ) , help = " File to save/load model embeddings to/from " )
2021-08-17 03:52:38 +02:00
parser . add_argument ( ' --regenerate ' , action = ' store_true ' , default = False , help = " Regenerate embeddings from org-mode files. Default: false " )
2021-08-17 04:16:29 +02:00
parser . add_argument ( ' --verbose ' , action = ' count ' , default = 0 , help = " Show verbose conversion logs. Default: 0 " )
2021-08-16 02:50:08 +02:00
args = parser . parse_args ( )
2021-08-17 08:58:24 +02:00
entries , corpus_embeddings , bi_encoder , cross_encoder , top_k = asymmetric . setup ( args . input_files , args . input_filter , args . compressed_jsonl , args . embeddings , args . regenerate , args . verbose )
2021-08-16 02:50:08 +02:00
# Start Application Server
uvicorn . run ( app )