From 97263b82098df2b544c6cc79c710fe97acb498cf Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sat, 21 Aug 2021 19:21:38 -0700 Subject: [PATCH] Move CLI into a separate module. Move CLI tests into a separate file --- src/main.py | 61 ++----------------------------- src/tests/test_cli.py | 70 ++++++++++++++++++++++++++++++++++++ src/tests/test_main.py | 81 ++++++------------------------------------ src/utils/cli.py | 61 +++++++++++++++++++++++++++++++ 4 files changed, 144 insertions(+), 129 deletions(-) create mode 100644 src/tests/test_cli.py create mode 100644 src/utils/cli.py diff --git a/src/main.py b/src/main.py index dd815f62..002300f9 100644 --- a/src/main.py +++ b/src/main.py @@ -1,18 +1,16 @@ # Standard Packages import sys -import argparse import pathlib from typing import Optional # External Packages import uvicorn -import yaml from fastapi import FastAPI # Internal Packages from search_type import asymmetric -from processor.org_mode.org_to_jsonl import org_to_jsonl -from utils.helpers import is_none_or_empty, get_absolute_path, get_from_dict, merge_dicts +from utils.helpers import get_from_dict +from utils.cli import cli app = FastAPI() @@ -62,61 +60,8 @@ def regenerate(t: Optional[str] = None): return {'status': 'ok', 'message': 'regeneration completed'} -def cli(args=None): - if is_none_or_empty(args): - args = sys.argv[1:] - - # Setup Argument Parser for the Commandline Interface - parser = argparse.ArgumentParser(description="Expose API for Semantic Search") - parser.add_argument('--org-files', '-i', nargs='*', help="List of org-mode files to process") - parser.add_argument('--org-filter', type=str, default=None, help="Regex filter for org-mode files to process") - parser.add_argument('--config-file', '-c', type=pathlib.Path, help="YAML file with user configuration") - parser.add_argument('--regenerate', action='store_true', default=False, help="Regenerate model embeddings from source files. Default: false") - parser.add_argument('--verbose', '-v', action='count', default=0, help="Show verbose conversion logs. Default: 0") - args = parser.parse_args(args) - - if not (args.config_file or args.org_files): - print(f"Require at least 1 of --org-file, --org-filter or --config-file flags to be passed from commandline") - exit(1) - - # Config Priority: Cmd Args > Config File > Default Config - args.config = default_config - if args.config_file and args.config_file.exists(): - with open(get_absolute_path(args.config_file), 'r', encoding='utf-8') as config_file: - config_from_file = yaml.safe_load(config_file) - args.config = merge_dicts(priority_dict=config_from_file, default_dict=args.config) - - if args.org_files: - args.config['content-type']['org']['input-files'] = args.org_files - - if args.org_filter: - args.config['content-type']['org']['input-filter'] = args.org_filter - - return args - - -default_config = { - 'content-type': - { - 'org': - { - 'compressed-jsonl': '.notes.jsonl.gz', - 'embeddings-file': '.note_embeddings.pt' - } - }, - 'search-type': - { - 'asymmetric': - { - 'encoder': "sentence-transformers/msmarco-MiniLM-L-6-v3", - 'cross-encoder': "cross-encoder/ms-marco-MiniLM-L-6-v2" - } - } -} - - if __name__ == '__main__': - args = cli() + args = cli(sys.argv[1:]) org_config = get_from_dict(args.config, 'content-type', 'org') notes_search_enabled = False diff --git a/src/tests/test_cli.py b/src/tests/test_cli.py new file mode 100644 index 00000000..98f6f96a --- /dev/null +++ b/src/tests/test_cli.py @@ -0,0 +1,70 @@ +# Standard Modules +from pathlib import Path + +# Internal Packages +from utils.cli import cli + + +# Test +# ---------------------------------------------------------------------------------------------------- +def test_cli_minimal_default(): + # Act + actual_args = cli(['--config-file=tests/data/config.yml']) + + # Assert + assert actual_args.config_file == Path('tests/data/config.yml') + assert actual_args.regenerate == False + assert actual_args.verbose == 0 + +# ---------------------------------------------------------------------------------------------------- +def test_cli_flags(): + # Act + actual_args = cli(['--config-file=tests/data/config.yml', + '--regenerate', + '-vvv']) + + # Assert + assert actual_args.config_file == Path('tests/data/config.yml') + assert actual_args.regenerate == True + assert actual_args.verbose == 3 + + +# ---------------------------------------------------------------------------------------------------- +def test_cli_config_from_file(): + # Act + actual_args = cli(['--config-file=tests/data/config.yml', + '--regenerate', + '-vvv']) + + # Assert + assert actual_args.config_file == Path('tests/data/config.yml') + assert actual_args.regenerate == True + assert actual_args.config is not None + assert actual_args.config['content-type']['org']['input-files'] == ['~/first_from_config.org', '~/second_from_config.org'] + assert actual_args.verbose == 3 + + +# ---------------------------------------------------------------------------------------------------- +def test_cli_config_from_cmd_args(): + "" + # Act + actual_args = cli(['--org-files=first.org']) + + # Assert + assert actual_args.org_files == ['first.org'] + assert actual_args.config_file is None + assert actual_args.config is not None + assert actual_args.config['content-type']['org']['input-files'] == ['first.org'] + + +# ---------------------------------------------------------------------------------------------------- +def test_cli_config_from_cmd_args_override_config_file(): + # Act + actual_args = cli(['--config-file=tests/data/config.yml', + '--org-files=first.org']) + + # Assert + assert actual_args.org_files == ['first.org'] + assert actual_args.config_file == Path('tests/data/config.yml') + assert actual_args.config is not None + assert actual_args.config['content-type']['org']['input-files'] == ['first.org'] diff --git a/src/tests/test_main.py b/src/tests/test_main.py index b6cdf224..059d9190 100644 --- a/src/tests/test_main.py +++ b/src/tests/test_main.py @@ -6,7 +6,7 @@ import pytest from fastapi.testclient import TestClient # Internal Packages -from main import app, cli +from main import app from search_type import asymmetric @@ -14,82 +14,21 @@ from search_type import asymmetric # ---------------------------------------------------------------------------------------------------- client = TestClient(app) -input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')] -input_filter = None -compressed_jsonl = Path('tests/data/.test.jsonl.gz') -embeddings = Path('tests/data/.test_embeddings.pt') -regenerate = False -verbose = 1 - +# Test # ---------------------------------------------------------------------------------------------------- def test_asymmetric_setup(): + # Arrange + input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')] + input_filter = None + compressed_jsonl = Path('tests/data/.test.jsonl.gz') + embeddings = Path('tests/data/.test_embeddings.pt') + regenerate = False + verbose = 1 + # Act entries, corpus_embeddings, bi_encoder, cross_encoder, top_k = asymmetric.setup(input_files, input_filter, compressed_jsonl, embeddings, regenerate, verbose) # Assert assert len(entries) == 10 assert len(corpus_embeddings) == 10 - - -def test_cli_minimal_default(): - # Act - actual_args = cli(['--config-file=tests/data/config.yml']) - - # Assert - assert actual_args.config_file == Path('tests/data/config.yml') - assert actual_args.regenerate == False - assert actual_args.verbose == 0 - -# ---------------------------------------------------------------------------------------------------- -def test_cli_flags(): - # Act - actual_args = cli(['--config-file=tests/data/config.yml', - '--regenerate', - '-vvv']) - - # Assert - assert actual_args.config_file == Path('tests/data/config.yml') - assert actual_args.regenerate == True - assert actual_args.verbose == 3 - - -# ---------------------------------------------------------------------------------------------------- -def test_cli_config_from_file(): - # Act - actual_args = cli(['--config-file=tests/data/config.yml', - '--regenerate', - '-vvv']) - - # Assert - assert actual_args.config_file == Path('tests/data/config.yml') - assert actual_args.regenerate == True - assert actual_args.config is not None - assert actual_args.config['content-type']['org']['input-files'] == ['~/first_from_config.org', '~/second_from_config.org'] - assert actual_args.verbose == 3 - - -# ---------------------------------------------------------------------------------------------------- -def test_cli_config_from_cmd_args(): - "" - # Act - actual_args = cli(['--org-files=first.org']) - - # Assert - assert actual_args.org_files == ['first.org'] - assert actual_args.config_file is None - assert actual_args.config is not None - assert actual_args.config['content-type']['org']['input-files'] == ['first.org'] - - -# ---------------------------------------------------------------------------------------------------- -def test_cli_config_from_cmd_args_override_config_file(): - # Act - actual_args = cli(['--config-file=tests/data/config.yml', - '--org-files=first.org']) - - # Assert - assert actual_args.org_files == ['first.org'] - assert actual_args.config_file == Path('tests/data/config.yml') - assert actual_args.config is not None - assert actual_args.config['content-type']['org']['input-files'] == ['first.org'] diff --git a/src/utils/cli.py b/src/utils/cli.py new file mode 100644 index 00000000..d70cb168 --- /dev/null +++ b/src/utils/cli.py @@ -0,0 +1,61 @@ +# Standard Packages +import argparse +import pathlib + +# External Packages +import yaml + +# Internal Packages +from utils.helpers import is_none_or_empty, get_absolute_path, get_from_dict, merge_dicts + +def cli(args=None): + if is_none_or_empty(args): + return None + + # Setup Argument Parser for the Commandline Interface + parser = argparse.ArgumentParser(description="Expose API for Semantic Search") + parser.add_argument('--org-files', '-i', nargs='*', help="List of org-mode files to process") + parser.add_argument('--org-filter', type=str, default=None, help="Regex filter for org-mode files to process") + parser.add_argument('--config-file', '-c', type=pathlib.Path, help="YAML file with user configuration") + parser.add_argument('--regenerate', action='store_true', default=False, help="Regenerate model embeddings from source files. Default: false") + parser.add_argument('--verbose', '-v', action='count', default=0, help="Show verbose conversion logs. Default: 0") + args = parser.parse_args(args) + + if not (args.config_file or args.org_files): + print(f"Require at least 1 of --org-file, --org-filter or --config-file flags to be passed from commandline") + exit(1) + + # Config Priority: Cmd Args > Config File > Default Config + args.config = default_config + if args.config_file and args.config_file.exists(): + with open(get_absolute_path(args.config_file), 'r', encoding='utf-8') as config_file: + config_from_file = yaml.safe_load(config_file) + args.config = merge_dicts(priority_dict=config_from_file, default_dict=args.config) + + if args.org_files: + args.config['content-type']['org']['input-files'] = args.org_files + + if args.org_filter: + args.config['content-type']['org']['input-filter'] = args.org_filter + + return args + + +default_config = { + 'content-type': + { + 'org': + { + 'compressed-jsonl': '.notes.jsonl.gz', + 'embeddings-file': '.note_embeddings.pt' + } + }, + 'search-type': + { + 'asymmetric': + { + 'encoder': "sentence-transformers/msmarco-MiniLM-L-6-v3", + 'cross-encoder': "cross-encoder/ms-marco-MiniLM-L-6-v2" + } + } +}