2021-08-17 12:59:58 +02:00
# Standard Packages
import sys
import argparse
import pathlib
2021-08-16 02:50:08 +02:00
from typing import Optional
2021-08-17 12:59:58 +02:00
# External Packages
import uvicorn
2021-08-16 02:50:08 +02:00
from fastapi import FastAPI
2021-08-17 12:59:58 +02:00
# Internal Packages
2021-08-17 01:04:45 +02:00
from search_type import asymmetric
2021-08-17 03:52:38 +02:00
from processor . org_mode . org_to_jsonl import org_to_jsonl
from utils . helpers import is_none_or_empty
2021-08-17 12:59:58 +02:00
2021-08-16 02:50:08 +02:00
app = FastAPI ( )
@app.get ( ' /search ' )
2021-08-17 13:36:48 +02:00
def search ( q : str , n : Optional [ int ] = 5 , t : Optional [ str ] = None ) :
2021-08-16 02:50:08 +02:00
if q is None or q == ' ' :
print ( f ' No query param (q) passed in API call to initiate search ' )
return { }
user_query = q
results_count = n
2021-08-17 13:36:48 +02:00
if t == ' notes ' or t == None :
2021-08-16 02:50:08 +02:00
# query notes
2021-08-17 01:52:48 +02:00
hits = asymmetric . query_notes (
2021-08-17 03:52:38 +02:00
user_query ,
2021-08-17 01:52:48 +02:00
corpus_embeddings ,
entries ,
bi_encoder ,
cross_encoder ,
top_k )
2021-08-16 02:50:08 +02:00
# collate and return results
2021-08-16 04:09:50 +02:00
return asymmetric . collate_results ( hits , entries , results_count )
2021-08-16 02:50:08 +02:00
else :
return { }
2021-08-17 03:52:38 +02:00
@app.get ( ' /regenerate ' )
2021-08-17 13:36:48 +02:00
def regenerate ( t : Optional [ str ] = None ) :
if t == ' notes ' or t == None :
# Extract Entries, Generate Embeddings
global corpus_embeddings
global entries
entries , corpus_embeddings , _ , _ , _ = asymmetric . setup ( args . input_files , args . input_filter , args . compressed_jsonl , args . embeddings , regenerate = True , verbose = args . verbose )
2021-08-17 08:47:33 +02:00
return { ' status ' : ' ok ' , ' message ' : ' regeneration completed ' }
2021-08-17 03:52:38 +02:00
2021-08-17 13:00:45 +02:00
def cli ( args = None ) :
if not args :
args = sys . argv [ 1 : ]
# Setup Argument Parser for the Commandline Interface
2021-08-16 02:50:08 +02:00
parser = argparse . ArgumentParser ( description = " Expose API for Semantic Search " )
2021-08-17 03:52:38 +02:00
parser . add_argument ( ' --input-files ' , ' -i ' , nargs = ' * ' , help = " List of org-mode files to process " )
parser . add_argument ( ' --input-filter ' , type = str , default = None , help = " Regex filter for org-mode files to process " )
2021-08-17 02:15:41 +02:00
parser . add_argument ( ' --compressed-jsonl ' , ' -j ' , type = pathlib . Path , default = pathlib . Path ( " .notes.jsonl.gz " ) , help = " Compressed JSONL formatted notes file to compute embeddings from " )
parser . add_argument ( ' --embeddings ' , ' -e ' , type = pathlib . Path , default = pathlib . Path ( " .notes_embeddings.pt " ) , help = " File to save/load model embeddings to/from " )
2021-08-17 03:52:38 +02:00
parser . add_argument ( ' --regenerate ' , action = ' store_true ' , default = False , help = " Regenerate embeddings from org-mode files. Default: false " )
2021-08-17 13:00:45 +02:00
parser . add_argument ( ' --verbose ' , ' -v ' , action = ' count ' , default = 0 , help = " Show verbose conversion logs. Default: 0 " )
return parser . parse_args ( args )
if __name__ == ' __main__ ' :
args = cli ( )
2021-08-16 02:50:08 +02:00
2021-08-17 08:58:24 +02:00
entries , corpus_embeddings , bi_encoder , cross_encoder , top_k = asymmetric . setup ( args . input_files , args . input_filter , args . compressed_jsonl , args . embeddings , args . regenerate , args . verbose )
2021-08-16 02:50:08 +02:00
# Start Application Server
uvicorn . run ( app )