mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-12-03 20:33:00 +01:00
1374065092
- Add custom validator to throw if neither input_filter or input_<files|directories> are specified - Set field expecting paths to type Path - Now that default_config isn't used in code. We can update fields in rawconfig to specify whether they're required or not. This lets pydantic validate config file and throw appropriate error
72 lines
2.4 KiB
Python
72 lines
2.4 KiB
Python
# System Packages
|
|
from pathlib import Path
|
|
from typing import List, Optional
|
|
|
|
# External Packages
|
|
from pydantic import BaseModel, validator
|
|
|
|
# Internal Packages
|
|
from src.utils.helpers import to_snake_case_from_dash
|
|
|
|
class ConfigBase(BaseModel):
|
|
class Config:
|
|
alias_generator = to_snake_case_from_dash
|
|
allow_population_by_field_name = True
|
|
|
|
class TextContentConfig(ConfigBase):
|
|
input_files: Optional[List[Path]]
|
|
input_filter: Optional[str]
|
|
compressed_jsonl: Path
|
|
embeddings_file: Path
|
|
|
|
@validator('input_filter')
|
|
def input_filter_or_files_required(cls, input_filter, values, **kwargs):
|
|
if input_filter is None and ('input_files' not in values or values["input_files"] is None):
|
|
raise ValueError("Either input_filter or input_files required in all content-type.<text_search> section of Khoj config file")
|
|
return input_filter
|
|
|
|
class ImageContentConfig(ConfigBase):
|
|
input_directories: Optional[List[Path]]
|
|
input_filter: Optional[str]
|
|
embeddings_file: Path
|
|
use_xmp_metadata: bool
|
|
batch_size: int
|
|
|
|
@validator('input_filter')
|
|
def input_filter_or_directories_required(cls, input_filter, values, **kwargs):
|
|
if input_filter is None and ('input_directories' not in values or values["input_directories"] is None):
|
|
raise ValueError("Either input_filter or input_directories required in all content-type.image section of Khoj config file")
|
|
return input_filter
|
|
|
|
class ContentConfig(ConfigBase):
|
|
org: Optional[TextContentConfig]
|
|
ledger: Optional[TextContentConfig]
|
|
image: Optional[ImageContentConfig]
|
|
music: Optional[TextContentConfig]
|
|
markdown: Optional[TextContentConfig]
|
|
|
|
class TextSearchConfig(ConfigBase):
|
|
encoder: str
|
|
cross_encoder: str
|
|
model_directory: Optional[Path]
|
|
|
|
class ImageSearchConfig(ConfigBase):
|
|
encoder: str
|
|
model_directory: Optional[Path]
|
|
|
|
class SearchConfig(ConfigBase):
|
|
asymmetric: Optional[TextSearchConfig]
|
|
symmetric: Optional[TextSearchConfig]
|
|
image: Optional[ImageSearchConfig]
|
|
|
|
class ConversationProcessorConfig(ConfigBase):
|
|
openai_api_key: str
|
|
conversation_logfile: Path
|
|
|
|
class ProcessorConfig(ConfigBase):
|
|
conversation: Optional[ConversationProcessorConfig]
|
|
|
|
class FullConfig(ConfigBase):
|
|
content_type: Optional[ContentConfig]
|
|
search_type: Optional[SearchConfig]
|
|
processor: Optional[ProcessorConfig]
|