mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Read content plugin configs from Khoj config YAML
Configure external text content plugins via the Khoj YAML Reuse existing TextContentConfig definition for external text content plugins
This commit is contained in:
parent
f57d7bf5ad
commit
fcbbe8c759
3 changed files with 27 additions and 2 deletions
|
@ -1,7 +1,7 @@
|
|||
# System Packages
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
# External Packages
|
||||
from pydantic import BaseModel, validator
|
||||
|
@ -56,6 +56,7 @@ class ContentConfig(ConfigBase):
|
|||
image: Optional[ImageContentConfig]
|
||||
music: Optional[TextContentConfig]
|
||||
markdown: Optional[TextContentConfig]
|
||||
plugins: Optional[Dict[str, TextContentConfig]]
|
||||
|
||||
|
||||
class TextSearchConfig(ConfigBase):
|
||||
|
|
|
@ -6,6 +6,17 @@ content-type:
|
|||
embeddings-file: ".note_embeddings.pt"
|
||||
index-header-entries: true
|
||||
|
||||
plugins:
|
||||
content_plugin_1:
|
||||
input-files: [ "content_plugin_1_new.jsonl.gz" ]
|
||||
compressed-jsonl: "content_plugin_1.jsonl.gz"
|
||||
embeddings-file: "content_plugin_1_embeddings.pt"
|
||||
|
||||
content_plugin_2:
|
||||
input-filter: [ "*2_new.jsonl.gz" ]
|
||||
compressed-jsonl: "content_plugin_2.jsonl.gz"
|
||||
embeddings-file: "content_plugin_2_embeddings.pt"
|
||||
|
||||
search-type:
|
||||
asymmetric:
|
||||
encoder: "sentence-transformers/msmarco-MiniLM-L-6-v3"
|
||||
|
|
|
@ -43,8 +43,21 @@ def test_cli_config_from_file():
|
|||
assert actual_args.no_gui == True
|
||||
assert actual_args.regenerate == True
|
||||
assert actual_args.config is not None
|
||||
assert actual_args.verbose == 3
|
||||
|
||||
# Ensure content config is loaded from file
|
||||
assert actual_args.config.content_type.org.input_files == [
|
||||
Path("~/first_from_config.org"),
|
||||
Path("~/second_from_config.org"),
|
||||
]
|
||||
assert actual_args.verbose == 3
|
||||
assert len(actual_args.config.content_type.plugins.keys()) == 2
|
||||
assert actual_args.config.content_type.plugins["content_plugin_1"].input_files == [
|
||||
Path("content_plugin_1_new.jsonl.gz")
|
||||
]
|
||||
assert actual_args.config.content_type.plugins["content_plugin_2"].input_filter == ["*2_new.jsonl.gz"]
|
||||
assert actual_args.config.content_type.plugins["content_plugin_1"].compressed_jsonl == Path(
|
||||
"content_plugin_1.jsonl.gz"
|
||||
)
|
||||
assert actual_args.config.content_type.plugins["content_plugin_2"].embeddings_file == Path(
|
||||
"content_plugin_2_embeddings.pt"
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue