2021-08-22 04:21:38 +02:00
# Standard Packages
import argparse
import pathlib
2021-11-28 17:57:33 +01:00
import json
2021-08-22 04:21:38 +02:00
# External Packages
import yaml
# Internal Packages
2021-11-28 17:16:33 +01:00
from src . utils . helpers import is_none_or_empty , get_absolute_path , resolve_absolute_path , merge_dicts
from src . utils . rawconfig import FullConfig
2021-08-22 04:21:38 +02:00
def cli ( args = None ) :
if is_none_or_empty ( args ) :
return None
# Setup Argument Parser for the Commandline Interface
parser = argparse . ArgumentParser ( description = " Expose API for Semantic Search " )
parser . add_argument ( ' --org-files ' , ' -i ' , nargs = ' * ' , help = " List of org-mode files to process " )
parser . add_argument ( ' --org-filter ' , type = str , default = None , help = " Regex filter for org-mode files to process " )
parser . add_argument ( ' --config-file ' , ' -c ' , type = pathlib . Path , help = " YAML file with user configuration " )
parser . add_argument ( ' --regenerate ' , action = ' store_true ' , default = False , help = " Regenerate model embeddings from source files. Default: false " )
parser . add_argument ( ' --verbose ' , ' -v ' , action = ' count ' , default = 0 , help = " Show verbose conversion logs. Default: 0 " )
2021-10-03 01:16:33 +02:00
parser . add_argument ( ' --host ' , type = str , default = ' 127.0.0.1 ' , help = " Host address of the server. Default: 127.0.0.1 " )
parser . add_argument ( ' --port ' , ' -p ' , type = int , default = 8000 , help = " Port of the server. Default: 8000 " )
parser . add_argument ( ' --socket ' , type = pathlib . Path , help = " Path to UNIX socket for server. Use to run server behind reverse proxy. Default: /tmp/uvicorn.sock " )
2021-08-22 04:21:38 +02:00
args = parser . parse_args ( args )
if not ( args . config_file or args . org_files ) :
print ( f " Require at least 1 of --org-file, --org-filter or --config-file flags to be passed from commandline " )
exit ( 1 )
# Config Priority: Cmd Args > Config File > Default Config
args . config = default_config
2021-08-29 12:03:37 +02:00
if args . config_file and resolve_absolute_path ( args . config_file ) . exists ( ) :
2021-08-22 04:21:38 +02:00
with open ( get_absolute_path ( args . config_file ) , ' r ' , encoding = ' utf-8 ' ) as config_file :
config_from_file = yaml . safe_load ( config_file )
args . config = merge_dicts ( priority_dict = config_from_file , default_dict = args . config )
2021-11-28 20:34:32 +01:00
args . config = FullConfig . parse_obj ( args . config )
2021-08-22 04:21:38 +02:00
if args . org_files :
args . config [ ' content-type ' ] [ ' org ' ] [ ' input-files ' ] = args . org_files
if args . org_filter :
args . config [ ' content-type ' ] [ ' org ' ] [ ' input-filter ' ] = args . org_filter
return args
default_config = {
' content-type ' :
{
' org ' :
{
' compressed-jsonl ' : ' .notes.jsonl.gz ' ,
' embeddings-file ' : ' .note_embeddings.pt '
2021-08-22 12:16:57 +02:00
} ,
' ledger ' :
{
' compressed-jsonl ' : ' .transactions.jsonl.gz ' ,
' embeddings-file ' : ' .transaction_embeddings.pt '
2021-08-23 06:00:54 +02:00
} ,
' image ' :
{
2021-09-16 19:51:39 +02:00
' embeddings-file ' : ' .image_embeddings.pt ' ,
2021-09-16 21:01:05 +02:00
' batch-size ' : 50 ,
' use-xmp-metadata ' : ' no '
2021-08-29 12:07:36 +02:00
} ,
' music ' :
{
' compressed-jsonl ' : ' .songs.jsonl.gz ' ,
' embeddings-file ' : ' .song_embeddings.pt '
} ,
2021-08-22 04:21:38 +02:00
} ,
' search-type ' :
{
' asymmetric ' :
{
' encoder ' : " sentence-transformers/msmarco-MiniLM-L-6-v3 " ,
' cross-encoder ' : " cross-encoder/ms-marco-MiniLM-L-6-v2 "
2021-08-23 06:00:54 +02:00
} ,
' image ' :
{
' encoder ' : " clip-ViT-B-32 "
2021-11-26 20:56:26 +01:00
} ,
} ,
' processor ' :
{
' conversation ' :
{
' openai-api-key ' : " " ,
' conversation-logfile ' : " .conversation_logs.json " ,
' conversation-history ' : " "
} ,
2021-08-22 04:21:38 +02:00
}
}