Read content plugin configs from Khoj config YAML

Configure external text content plugins via the Khoj YAML
Reuse existing TextContentConfig definition for external text content plugins
This commit is contained in:
Debanjum Singh Solanky 2023-02-22 14:47:22 -06:00
parent f57d7bf5ad
commit fcbbe8c759
3 changed files with 27 additions and 2 deletions

View file

@ -1,7 +1,7 @@
# System Packages # System Packages
import json import json
from pathlib import Path from pathlib import Path
from typing import List, Optional from typing import List, Dict, Optional
# External Packages # External Packages
from pydantic import BaseModel, validator from pydantic import BaseModel, validator
@ -56,6 +56,7 @@ class ContentConfig(ConfigBase):
image: Optional[ImageContentConfig] image: Optional[ImageContentConfig]
music: Optional[TextContentConfig] music: Optional[TextContentConfig]
markdown: Optional[TextContentConfig] markdown: Optional[TextContentConfig]
plugins: Optional[Dict[str, TextContentConfig]]
class TextSearchConfig(ConfigBase): class TextSearchConfig(ConfigBase):

View file

@ -6,6 +6,17 @@ content-type:
embeddings-file: ".note_embeddings.pt" embeddings-file: ".note_embeddings.pt"
index-header-entries: true index-header-entries: true
plugins:
content_plugin_1:
input-files: [ "content_plugin_1_new.jsonl.gz" ]
compressed-jsonl: "content_plugin_1.jsonl.gz"
embeddings-file: "content_plugin_1_embeddings.pt"
content_plugin_2:
input-filter: [ "*2_new.jsonl.gz" ]
compressed-jsonl: "content_plugin_2.jsonl.gz"
embeddings-file: "content_plugin_2_embeddings.pt"
search-type: search-type:
asymmetric: asymmetric:
encoder: "sentence-transformers/msmarco-MiniLM-L-6-v3" encoder: "sentence-transformers/msmarco-MiniLM-L-6-v3"

View file

@ -43,8 +43,21 @@ def test_cli_config_from_file():
assert actual_args.no_gui == True assert actual_args.no_gui == True
assert actual_args.regenerate == True assert actual_args.regenerate == True
assert actual_args.config is not None assert actual_args.config is not None
assert actual_args.verbose == 3
# Ensure content config is loaded from file
assert actual_args.config.content_type.org.input_files == [ assert actual_args.config.content_type.org.input_files == [
Path("~/first_from_config.org"), Path("~/first_from_config.org"),
Path("~/second_from_config.org"), Path("~/second_from_config.org"),
] ]
assert actual_args.verbose == 3 assert len(actual_args.config.content_type.plugins.keys()) == 2
assert actual_args.config.content_type.plugins["content_plugin_1"].input_files == [
Path("content_plugin_1_new.jsonl.gz")
]
assert actual_args.config.content_type.plugins["content_plugin_2"].input_filter == ["*2_new.jsonl.gz"]
assert actual_args.config.content_type.plugins["content_plugin_1"].compressed_jsonl == Path(
"content_plugin_1.jsonl.gz"
)
assert actual_args.config.content_type.plugins["content_plugin_2"].embeddings_file == Path(
"content_plugin_2_embeddings.pt"
)