From d835467f2c8964c4bd439d7b09db5ec79a97c35b Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sat, 10 Sep 2022 13:05:21 +0300 Subject: [PATCH] Throw exception if no valid entries found in specified content files - Previously we were failing if no valid entries while computing embeddings. This was obscuring the actual issue of no valid entries found in the specified content files - Throwing an exception early with clear message when no entries found should make clarify the issue to be fixed - See issue #83 for details --- src/search_type/text_search.py | 4 +++- tests/test_text_search.py | 25 ++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/search_type/text_search.py b/src/search_type/text_search.py index 8666056c..7a80c296 100644 --- a/src/search_type/text_search.py +++ b/src/search_type/text_search.py @@ -11,7 +11,7 @@ from src.search_filter.base_filter import BaseFilter # Internal Packages from src.utils import state -from src.utils.helpers import get_absolute_path, resolve_absolute_path, load_model +from src.utils.helpers import get_absolute_path, is_none_or_empty, resolve_absolute_path, load_model from src.utils.config import TextSearchModel from src.utils.rawconfig import TextSearchConfig, TextContentConfig from src.utils.jsonl import load_jsonl @@ -174,6 +174,8 @@ def setup(text_to_jsonl, config: TextContentConfig, search_config: TextSearchCon # Extract Entries entries = extract_entries(config.compressed_jsonl) + if is_none_or_empty(entries): + raise ValueError(f"No valid entries found in specified files: {config.input_files} or {config.input_filter}") top_k = min(len(entries), top_k) # top_k hits can't be more than the total entries in corpus # Compute or Load Embeddings diff --git a/tests/test_text_search.py b/tests/test_text_search.py index 39fed92e..6a1471c9 100644 --- a/tests/test_text_search.py +++ b/tests/test_text_search.py @@ -1,6 +1,10 @@ # System Packages +from copy import deepcopy from pathlib import Path +# External Packages +import pytest + # Internal Packages from src.utils.state import model from src.search_type import text_search @@ -9,6 +13,25 @@ from src.processor.org_mode.org_to_jsonl import org_to_jsonl # Test +# ---------------------------------------------------------------------------------------------------- +def test_asymmetric_setup_with_empty_file_raises_error(content_config: ContentConfig, search_config: SearchConfig): + # Arrange + file_to_index = Path(content_config.org.input_filter).parent / "new_file_to_index.org" + file_to_index.touch() + new_org_content_config = deepcopy(content_config.org) + new_org_content_config.input_files = [f'{file_to_index}'] + new_org_content_config.input_filter = None + + # Act + # Generate notes embeddings during asymmetric setup + with pytest.raises(ValueError, match=r'^No valid entries found*'): + text_search.setup(org_to_jsonl, new_org_content_config, search_config.asymmetric, regenerate=True) + + # Cleanup + # delete created test file + file_to_index.unlink() + + # ---------------------------------------------------------------------------------------------------- def test_asymmetric_setup(content_config: ContentConfig, search_config: SearchConfig): # Act @@ -23,7 +46,7 @@ def test_asymmetric_setup(content_config: ContentConfig, search_config: SearchCo # ---------------------------------------------------------------------------------------------------- def test_asymmetric_search(content_config: ContentConfig, search_config: SearchConfig): # Arrange - model.notes_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False) + model.notes_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=True) query = "How to git install application?" # Act