Mark all required fields for config. Throw if no input_* field specified

- Add custom validator to throw if neither input_filter or
  input_<files|directories> are specified

- Set field expecting paths to type Path

- Now that default_config isn't used in code. We can update
  fields in rawconfig to specify whether they're required or not.
  This lets pydantic validate config file and throw appropriate error
This commit is contained in:
Debanjum Singh Solanky 2022-08-04 23:26:31 +03:00
parent f78d6ae754
commit 1374065092
2 changed files with 25 additions and 13 deletions

View file

@ -3,7 +3,7 @@ from pathlib import Path
from typing import List, Optional from typing import List, Optional
# External Packages # External Packages
from pydantic import BaseModel from pydantic import BaseModel, validator
# Internal Packages # Internal Packages
from src.utils.helpers import to_snake_case_from_dash from src.utils.helpers import to_snake_case_from_dash
@ -14,17 +14,29 @@ class ConfigBase(BaseModel):
allow_population_by_field_name = True allow_population_by_field_name = True
class TextContentConfig(ConfigBase): class TextContentConfig(ConfigBase):
compressed_jsonl: Optional[Path] input_files: Optional[List[Path]]
input_files: Optional[List[str]]
input_filter: Optional[str] input_filter: Optional[str]
embeddings_file: Optional[Path] compressed_jsonl: Path
embeddings_file: Path
@validator('input_filter')
def input_filter_or_files_required(cls, input_filter, values, **kwargs):
if input_filter is None and ('input_files' not in values or values["input_files"] is None):
raise ValueError("Either input_filter or input_files required in all content-type.<text_search> section of Khoj config file")
return input_filter
class ImageContentConfig(ConfigBase): class ImageContentConfig(ConfigBase):
use_xmp_metadata: Optional[bool]
batch_size: Optional[int]
input_directories: Optional[List[Path]] input_directories: Optional[List[Path]]
input_filter: Optional[str] input_filter: Optional[str]
embeddings_file: Optional[Path] embeddings_file: Path
use_xmp_metadata: bool
batch_size: int
@validator('input_filter')
def input_filter_or_directories_required(cls, input_filter, values, **kwargs):
if input_filter is None and ('input_directories' not in values or values["input_directories"] is None):
raise ValueError("Either input_filter or input_directories required in all content-type.image section of Khoj config file")
return input_filter
class ContentConfig(ConfigBase): class ContentConfig(ConfigBase):
org: Optional[TextContentConfig] org: Optional[TextContentConfig]
@ -34,12 +46,12 @@ class ContentConfig(ConfigBase):
markdown: Optional[TextContentConfig] markdown: Optional[TextContentConfig]
class TextSearchConfig(ConfigBase): class TextSearchConfig(ConfigBase):
encoder: Optional[str] encoder: str
cross_encoder: Optional[str] cross_encoder: str
model_directory: Optional[Path] model_directory: Optional[Path]
class ImageSearchConfig(ConfigBase): class ImageSearchConfig(ConfigBase):
encoder: Optional[str] encoder: str
model_directory: Optional[Path] model_directory: Optional[Path]
class SearchConfig(ConfigBase): class SearchConfig(ConfigBase):
@ -48,8 +60,8 @@ class SearchConfig(ConfigBase):
image: Optional[ImageSearchConfig] image: Optional[ImageSearchConfig]
class ConversationProcessorConfig(ConfigBase): class ConversationProcessorConfig(ConfigBase):
openai_api_key: Optional[str] openai_api_key: str
conversation_logfile: Optional[str] conversation_logfile: Path
class ProcessorConfig(ConfigBase): class ProcessorConfig(ConfigBase):
conversation: Optional[ConversationProcessorConfig] conversation: Optional[ConversationProcessorConfig]

View file

@ -40,5 +40,5 @@ def test_cli_config_from_file():
assert actual_args.config_file == Path('tests/data/config.yml') assert actual_args.config_file == Path('tests/data/config.yml')
assert actual_args.regenerate == True assert actual_args.regenerate == True
assert actual_args.config is not None assert actual_args.config is not None
assert actual_args.config.content_type.org.input_files == ['~/first_from_config.org', '~/second_from_config.org'] assert actual_args.config.content_type.org.input_files == [Path('~/first_from_config.org'), Path('~/second_from_config.org')]
assert actual_args.verbose == 3 assert actual_args.verbose == 3