diff --git a/src/khoj/utils/rawconfig.py b/src/khoj/utils/rawconfig.py index 4fbe6543..389e80f6 100644 --- a/src/khoj/utils/rawconfig.py +++ b/src/khoj/utils/rawconfig.py @@ -1,7 +1,7 @@ # System Packages import json from pathlib import Path -from typing import List, Optional +from typing import List, Dict, Optional # External Packages from pydantic import BaseModel, validator @@ -56,6 +56,7 @@ class ContentConfig(ConfigBase): image: Optional[ImageContentConfig] music: Optional[TextContentConfig] markdown: Optional[TextContentConfig] + plugins: Optional[Dict[str, TextContentConfig]] class TextSearchConfig(ConfigBase): diff --git a/tests/data/config.yml b/tests/data/config.yml index 41603972..6d3aa35b 100644 --- a/tests/data/config.yml +++ b/tests/data/config.yml @@ -6,6 +6,17 @@ content-type: embeddings-file: ".note_embeddings.pt" index-header-entries: true + plugins: + content_plugin_1: + input-files: [ "content_plugin_1_new.jsonl.gz" ] + compressed-jsonl: "content_plugin_1.jsonl.gz" + embeddings-file: "content_plugin_1_embeddings.pt" + + content_plugin_2: + input-filter: [ "*2_new.jsonl.gz" ] + compressed-jsonl: "content_plugin_2.jsonl.gz" + embeddings-file: "content_plugin_2_embeddings.pt" + search-type: asymmetric: encoder: "sentence-transformers/msmarco-MiniLM-L-6-v3" diff --git a/tests/test_cli.py b/tests/test_cli.py index b7e18460..98b2353c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -43,8 +43,21 @@ def test_cli_config_from_file(): assert actual_args.no_gui == True assert actual_args.regenerate == True assert actual_args.config is not None + assert actual_args.verbose == 3 + + # Ensure content config is loaded from file assert actual_args.config.content_type.org.input_files == [ Path("~/first_from_config.org"), Path("~/second_from_config.org"), ] - assert actual_args.verbose == 3 + assert len(actual_args.config.content_type.plugins.keys()) == 2 + assert actual_args.config.content_type.plugins["content_plugin_1"].input_files == [ + Path("content_plugin_1_new.jsonl.gz") + ] + assert actual_args.config.content_type.plugins["content_plugin_2"].input_filter == ["*2_new.jsonl.gz"] + assert actual_args.config.content_type.plugins["content_plugin_1"].compressed_jsonl == Path( + "content_plugin_1.jsonl.gz" + ) + assert actual_args.config.content_type.plugins["content_plugin_2"].embeddings_file == Path( + "content_plugin_2_embeddings.pt" + )