mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Update to re-use the raw config base models in config.py as well
This commit is contained in:
parent
6292fe4481
commit
da52433d89
3 changed files with 25 additions and 61 deletions
14
src/main.py
14
src/main.py
|
@ -122,38 +122,38 @@ def chat(q: str):
|
|||
return {'status': 'ok', 'response': gpt_response}
|
||||
|
||||
|
||||
def initialize_search(config, regenerate, verbose):
|
||||
def initialize_search(config: FullConfig, regenerate, verbose):
|
||||
model = SearchModels()
|
||||
search_config = SearchConfig()
|
||||
|
||||
# Initialize Org Notes Search
|
||||
search_config.notes = TextSearchConfig.create_from_dictionary(config, ('content-type', 'org'), verbose)
|
||||
search_config.notes = TextSearchConfig(config.content_type.org, verbose) if config.content_type.org else None
|
||||
if search_config.notes:
|
||||
model.notes_search = asymmetric.setup(search_config.notes, regenerate=regenerate)
|
||||
|
||||
# Initialize Org Music Search
|
||||
search_config.music = TextSearchConfig.create_from_dictionary(config, ('content-type', 'music'), verbose)
|
||||
search_config.music = TextSearchConfig(config.content_type.music, verbose) if config.content_type.music else None
|
||||
if search_config.music:
|
||||
model.music_search = asymmetric.setup(search_config.music, regenerate=regenerate)
|
||||
|
||||
# Initialize Ledger Search
|
||||
search_config.ledger = TextSearchConfig.create_from_dictionary(config, ('content-type', 'ledger'), verbose)
|
||||
search_config.ledger = TextSearchConfig(config.content_type.org, verbose) if config.content_type.ledger else None
|
||||
if search_config.ledger:
|
||||
model.ledger_search = symmetric_ledger.setup(search_config.ledger, regenerate=regenerate)
|
||||
|
||||
# Initialize Image Search
|
||||
search_config.image = ImageSearchConfig.create_from_dictionary(config, ('content-type', 'image'), verbose)
|
||||
search_config.image = ImageSearchConfig(config.content_type.image, verbose) if config.content_type.image else None
|
||||
if search_config.image:
|
||||
model.image_search = image_search.setup(search_config.image, regenerate=regenerate)
|
||||
|
||||
return model, search_config
|
||||
|
||||
|
||||
def initialize_processor(config, verbose):
|
||||
def initialize_processor(config: FullConfig, verbose):
|
||||
processor_config = ProcessorConfig()
|
||||
|
||||
# Initialize Conversation Processor
|
||||
processor_config.conversation = ConversationProcessorConfig.create_from_dictionary(config, ('processor', 'conversation'), verbose)
|
||||
processor_config.conversation = ConversationProcessorConfig(config.processor.conversation, verbose)
|
||||
|
||||
# Load or Initialize Conversation History from Disk
|
||||
conversation_logfile = processor_config.conversation.conversation_logfile
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# Standard Packages
|
||||
import argparse
|
||||
import pathlib
|
||||
import json
|
||||
|
||||
# External Packages
|
||||
import yaml
|
||||
|
@ -36,7 +37,7 @@ def cli(args=None):
|
|||
with open(get_absolute_path(args.config_file), 'r', encoding='utf-8') as config_file:
|
||||
config_from_file = yaml.safe_load(config_file)
|
||||
args.config = merge_dicts(priority_dict=config_from_file, default_dict=args.config)
|
||||
args.config = FullConfig(args.config)
|
||||
args.config = FullConfig.parse_raw(json.dumps(args.config))
|
||||
|
||||
if args.org_files:
|
||||
args.config['content-type']['org']['input-files'] = args.org_files
|
||||
|
|
|
@ -6,6 +6,8 @@ from pathlib import Path
|
|||
# Internal Packages
|
||||
from src.utils.helpers import get_from_dict
|
||||
|
||||
from src.utils.rawconfig import TextSearchConfigTest, ImageSearchConfigTest, ProcessorConversationConfig
|
||||
|
||||
|
||||
class SearchType(str, Enum):
|
||||
Notes = "notes"
|
||||
|
@ -43,49 +45,22 @@ class SearchModels():
|
|||
|
||||
|
||||
class TextSearchConfig():
|
||||
def __init__(self, input_files, input_filter, compressed_jsonl, embeddings_file, verbose):
|
||||
self.input_files = input_files
|
||||
self.input_filter = input_filter
|
||||
self.compressed_jsonl = Path(compressed_jsonl)
|
||||
self.embeddings_file = Path(embeddings_file)
|
||||
def __init__(self, text_search_config: TextSearchConfigTest, verbose: bool):
|
||||
self.input_files = text_search_config.input_files
|
||||
self.input_filter = text_search_config.input_filter
|
||||
self.compressed_jsonl = Path(text_search_config.compressed_jsonl)
|
||||
self.embeddings_file = Path(text_search_config.embeddings_file)
|
||||
self.verbose = verbose
|
||||
|
||||
|
||||
def create_from_dictionary(config, key_tree, verbose):
|
||||
text_config = get_from_dict(config, *key_tree)
|
||||
search_enabled = text_config and ('input-files' in text_config or 'input-filter' in text_config)
|
||||
if not search_enabled:
|
||||
return None
|
||||
|
||||
return TextSearchConfig(
|
||||
input_files = text_config['input-files'],
|
||||
input_filter = text_config['input-filter'],
|
||||
compressed_jsonl = Path(text_config['compressed-jsonl']),
|
||||
embeddings_file = Path(text_config['embeddings-file']),
|
||||
verbose = verbose)
|
||||
|
||||
|
||||
class ImageSearchConfig():
|
||||
def __init__(self, input_directory, embeddings_file, batch_size, use_xmp_metadata, verbose):
|
||||
self.input_directory = input_directory
|
||||
self.embeddings_file = Path(embeddings_file)
|
||||
self.batch_size = batch_size
|
||||
self.use_xmp_metadata = use_xmp_metadata
|
||||
def __init__(self, image_search_config: ImageSearchConfigTest, verbose):
|
||||
self.input_directory = Path(image_search_config.input_directory)
|
||||
self.embeddings_file = Path(image_search_config.embeddings_file)
|
||||
self.batch_size = image_search_config.batch_size
|
||||
self.use_xmp_metadata = image_search_config.use_xmp_metadata
|
||||
self.verbose = verbose
|
||||
|
||||
def create_from_dictionary(config, key_tree, verbose):
|
||||
image_config = get_from_dict(config, *key_tree)
|
||||
search_enabled = image_config and 'input-directory' in image_config
|
||||
if not search_enabled:
|
||||
return None
|
||||
|
||||
return ImageSearchConfig(
|
||||
input_directory = Path(image_config['input-directory']),
|
||||
embeddings_file = Path(image_config['embeddings-file']),
|
||||
batch_size = image_config['batch-size'],
|
||||
use_xmp_metadata = {'yes': True, 'no': False}[image_config['use-xmp-metadata']],
|
||||
verbose = verbose)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchConfig():
|
||||
|
@ -96,24 +71,12 @@ class SearchConfig():
|
|||
|
||||
|
||||
class ConversationProcessorConfig():
|
||||
def __init__(self, conversation_logfile, conversation_history, openai_api_key, verbose):
|
||||
self.openai_api_key = openai_api_key
|
||||
self.conversation_logfile = conversation_logfile
|
||||
self.conversation_history = conversation_history
|
||||
def __init__(self, processor_config: ProcessorConversationConfig, verbose: bool):
|
||||
self.openai_api_key = processor_config.open_api_key
|
||||
self.conversation_logfile = Path(processor_config.conversation_logfile)
|
||||
self.conversation_history = Path(processor_config.conversation_history)
|
||||
self.verbose = verbose
|
||||
|
||||
def create_from_dictionary(config, key_tree, verbose):
|
||||
conversation_config = get_from_dict(config, *key_tree)
|
||||
if not conversation_config:
|
||||
return None
|
||||
|
||||
return ConversationProcessorConfig(
|
||||
openai_api_key = conversation_config['openai-api-key'],
|
||||
conversation_history = '',
|
||||
conversation_logfile = Path(conversation_config['conversation-logfile']),
|
||||
verbose = verbose)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProcessorConfig():
|
||||
conversation: ConversationProcessorConfig = None
|
||||
|
|
Loading…
Reference in a new issue