mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-12-02 20:03:01 +01:00
918af5e6f8
- Default to using `text-davinci-003' if conversation model not explicitly configured by user. Stop using the older `davinci' and `davinci-instruct' models - Use `model' instead of `engine' as parameter. Usage of `engine' parameter in OpenAI API is deprecated
106 lines
No EOL
3.3 KiB
Python
106 lines
No EOL
3.3 KiB
Python
# System Packages
|
|
import json
|
|
from pathlib import Path
|
|
from typing import List, Optional
|
|
|
|
# External Packages
|
|
from pydantic import BaseModel, validator
|
|
|
|
# Internal Packages
|
|
from src.utils.helpers import to_snake_case_from_dash, is_none_or_empty
|
|
|
|
class ConfigBase(BaseModel):
|
|
class Config:
|
|
alias_generator = to_snake_case_from_dash
|
|
allow_population_by_field_name = True
|
|
|
|
class TextContentConfig(ConfigBase):
|
|
input_files: Optional[List[Path]]
|
|
input_filter: Optional[List[str]]
|
|
compressed_jsonl: Path
|
|
embeddings_file: Path
|
|
index_heading_entries: Optional[bool] = False
|
|
|
|
@validator('input_filter')
|
|
def input_filter_or_files_required(cls, input_filter, values, **kwargs):
|
|
if is_none_or_empty(input_filter) and ('input_files' not in values or values["input_files"] is None):
|
|
raise ValueError("Either input_filter or input_files required in all content-type.<text_search> section of Khoj config file")
|
|
return input_filter
|
|
|
|
class ImageContentConfig(ConfigBase):
|
|
input_directories: Optional[List[Path]]
|
|
input_filter: Optional[List[str]]
|
|
embeddings_file: Path
|
|
use_xmp_metadata: bool
|
|
batch_size: int
|
|
|
|
@validator('input_filter')
|
|
def input_filter_or_directories_required(cls, input_filter, values, **kwargs):
|
|
if is_none_or_empty(input_filter) and ('input_directories' not in values or values["input_directories"] is None):
|
|
raise ValueError("Either input_filter or input_directories required in all content-type.image section of Khoj config file")
|
|
return input_filter
|
|
|
|
class ContentConfig(ConfigBase):
|
|
org: Optional[TextContentConfig]
|
|
ledger: Optional[TextContentConfig]
|
|
image: Optional[ImageContentConfig]
|
|
music: Optional[TextContentConfig]
|
|
markdown: Optional[TextContentConfig]
|
|
|
|
class TextSearchConfig(ConfigBase):
|
|
encoder: str
|
|
cross_encoder: str
|
|
encoder_type: Optional[str]
|
|
model_directory: Optional[Path]
|
|
|
|
class ImageSearchConfig(ConfigBase):
|
|
encoder: str
|
|
encoder_type: Optional[str]
|
|
model_directory: Optional[Path]
|
|
|
|
class SearchConfig(ConfigBase):
|
|
asymmetric: Optional[TextSearchConfig]
|
|
symmetric: Optional[TextSearchConfig]
|
|
image: Optional[ImageSearchConfig]
|
|
|
|
class ConversationProcessorConfig(ConfigBase):
|
|
openai_api_key: str
|
|
conversation_logfile: Path
|
|
model: Optional[str] = "text-davinci-003"
|
|
|
|
class ProcessorConfig(ConfigBase):
|
|
conversation: Optional[ConversationProcessorConfig]
|
|
|
|
class FullConfig(ConfigBase):
|
|
content_type: Optional[ContentConfig]
|
|
search_type: Optional[SearchConfig]
|
|
processor: Optional[ProcessorConfig]
|
|
|
|
class SearchResponse(ConfigBase):
|
|
entry: str
|
|
score: str
|
|
additional: Optional[dict]
|
|
|
|
class Entry():
|
|
raw: str
|
|
compiled: str
|
|
file: Optional[str]
|
|
|
|
def __init__(self, raw: str = None, compiled: str = None, file: Optional[str] = None):
|
|
self.raw = raw
|
|
self.compiled = compiled
|
|
self.file = file
|
|
|
|
def to_json(self) -> str:
|
|
return json.dumps(self.__dict__, ensure_ascii=False)
|
|
|
|
def __repr__(self) -> str:
|
|
return self.__dict__.__repr__()
|
|
|
|
@classmethod
|
|
def from_dict(cls, dictionary: dict):
|
|
return cls(
|
|
raw=dictionary['raw'],
|
|
compiled=dictionary['compiled'],
|
|
file=dictionary.get('file', None)
|
|
) |