Read content plugin configs from Khoj config YAML

Configure external text content plugins via the Khoj YAML
Reuse existing TextContentConfig definition for external text content plugins
This commit is contained in:
Debanjum Singh Solanky 2023-02-22 14:47:22 -06:00
parent f57d7bf5ad
commit fcbbe8c759
3 changed files with 27 additions and 2 deletions

View file

@ -1,7 +1,7 @@
# System Packages
import json
from pathlib import Path
from typing import List, Optional
from typing import List, Dict, Optional
# External Packages
from pydantic import BaseModel, validator
@ -56,6 +56,7 @@ class ContentConfig(ConfigBase):
image: Optional[ImageContentConfig]
music: Optional[TextContentConfig]
markdown: Optional[TextContentConfig]
plugins: Optional[Dict[str, TextContentConfig]]
class TextSearchConfig(ConfigBase):

View file

@ -6,6 +6,17 @@ content-type:
embeddings-file: ".note_embeddings.pt"
index-header-entries: true
plugins:
content_plugin_1:
input-files: [ "content_plugin_1_new.jsonl.gz" ]
compressed-jsonl: "content_plugin_1.jsonl.gz"
embeddings-file: "content_plugin_1_embeddings.pt"
content_plugin_2:
input-filter: [ "*2_new.jsonl.gz" ]
compressed-jsonl: "content_plugin_2.jsonl.gz"
embeddings-file: "content_plugin_2_embeddings.pt"
search-type:
asymmetric:
encoder: "sentence-transformers/msmarco-MiniLM-L-6-v3"

View file

@ -43,8 +43,21 @@ def test_cli_config_from_file():
assert actual_args.no_gui == True
assert actual_args.regenerate == True
assert actual_args.config is not None
assert actual_args.verbose == 3
# Ensure content config is loaded from file
assert actual_args.config.content_type.org.input_files == [
Path("~/first_from_config.org"),
Path("~/second_from_config.org"),
]
assert actual_args.verbose == 3
assert len(actual_args.config.content_type.plugins.keys()) == 2
assert actual_args.config.content_type.plugins["content_plugin_1"].input_files == [
Path("content_plugin_1_new.jsonl.gz")
]
assert actual_args.config.content_type.plugins["content_plugin_2"].input_filter == ["*2_new.jsonl.gz"]
assert actual_args.config.content_type.plugins["content_plugin_1"].compressed_jsonl == Path(
"content_plugin_1.jsonl.gz"
)
assert actual_args.config.content_type.plugins["content_plugin_2"].embeddings_file == Path(
"content_plugin_2_embeddings.pt"
)