mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Update to re-use the raw config base models in config.py as well
This commit is contained in:
parent
6292fe4481
commit
da52433d89
3 changed files with 25 additions and 61 deletions
14
src/main.py
14
src/main.py
|
@ -122,38 +122,38 @@ def chat(q: str):
|
||||||
return {'status': 'ok', 'response': gpt_response}
|
return {'status': 'ok', 'response': gpt_response}
|
||||||
|
|
||||||
|
|
||||||
def initialize_search(config, regenerate, verbose):
|
def initialize_search(config: FullConfig, regenerate, verbose):
|
||||||
model = SearchModels()
|
model = SearchModels()
|
||||||
search_config = SearchConfig()
|
search_config = SearchConfig()
|
||||||
|
|
||||||
# Initialize Org Notes Search
|
# Initialize Org Notes Search
|
||||||
search_config.notes = TextSearchConfig.create_from_dictionary(config, ('content-type', 'org'), verbose)
|
search_config.notes = TextSearchConfig(config.content_type.org, verbose) if config.content_type.org else None
|
||||||
if search_config.notes:
|
if search_config.notes:
|
||||||
model.notes_search = asymmetric.setup(search_config.notes, regenerate=regenerate)
|
model.notes_search = asymmetric.setup(search_config.notes, regenerate=regenerate)
|
||||||
|
|
||||||
# Initialize Org Music Search
|
# Initialize Org Music Search
|
||||||
search_config.music = TextSearchConfig.create_from_dictionary(config, ('content-type', 'music'), verbose)
|
search_config.music = TextSearchConfig(config.content_type.music, verbose) if config.content_type.music else None
|
||||||
if search_config.music:
|
if search_config.music:
|
||||||
model.music_search = asymmetric.setup(search_config.music, regenerate=regenerate)
|
model.music_search = asymmetric.setup(search_config.music, regenerate=regenerate)
|
||||||
|
|
||||||
# Initialize Ledger Search
|
# Initialize Ledger Search
|
||||||
search_config.ledger = TextSearchConfig.create_from_dictionary(config, ('content-type', 'ledger'), verbose)
|
search_config.ledger = TextSearchConfig(config.content_type.org, verbose) if config.content_type.ledger else None
|
||||||
if search_config.ledger:
|
if search_config.ledger:
|
||||||
model.ledger_search = symmetric_ledger.setup(search_config.ledger, regenerate=regenerate)
|
model.ledger_search = symmetric_ledger.setup(search_config.ledger, regenerate=regenerate)
|
||||||
|
|
||||||
# Initialize Image Search
|
# Initialize Image Search
|
||||||
search_config.image = ImageSearchConfig.create_from_dictionary(config, ('content-type', 'image'), verbose)
|
search_config.image = ImageSearchConfig(config.content_type.image, verbose) if config.content_type.image else None
|
||||||
if search_config.image:
|
if search_config.image:
|
||||||
model.image_search = image_search.setup(search_config.image, regenerate=regenerate)
|
model.image_search = image_search.setup(search_config.image, regenerate=regenerate)
|
||||||
|
|
||||||
return model, search_config
|
return model, search_config
|
||||||
|
|
||||||
|
|
||||||
def initialize_processor(config, verbose):
|
def initialize_processor(config: FullConfig, verbose):
|
||||||
processor_config = ProcessorConfig()
|
processor_config = ProcessorConfig()
|
||||||
|
|
||||||
# Initialize Conversation Processor
|
# Initialize Conversation Processor
|
||||||
processor_config.conversation = ConversationProcessorConfig.create_from_dictionary(config, ('processor', 'conversation'), verbose)
|
processor_config.conversation = ConversationProcessorConfig(config.processor.conversation, verbose)
|
||||||
|
|
||||||
# Load or Initialize Conversation History from Disk
|
# Load or Initialize Conversation History from Disk
|
||||||
conversation_logfile = processor_config.conversation.conversation_logfile
|
conversation_logfile = processor_config.conversation.conversation_logfile
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# Standard Packages
|
# Standard Packages
|
||||||
import argparse
|
import argparse
|
||||||
import pathlib
|
import pathlib
|
||||||
|
import json
|
||||||
|
|
||||||
# External Packages
|
# External Packages
|
||||||
import yaml
|
import yaml
|
||||||
|
@ -36,7 +37,7 @@ def cli(args=None):
|
||||||
with open(get_absolute_path(args.config_file), 'r', encoding='utf-8') as config_file:
|
with open(get_absolute_path(args.config_file), 'r', encoding='utf-8') as config_file:
|
||||||
config_from_file = yaml.safe_load(config_file)
|
config_from_file = yaml.safe_load(config_file)
|
||||||
args.config = merge_dicts(priority_dict=config_from_file, default_dict=args.config)
|
args.config = merge_dicts(priority_dict=config_from_file, default_dict=args.config)
|
||||||
args.config = FullConfig(args.config)
|
args.config = FullConfig.parse_raw(json.dumps(args.config))
|
||||||
|
|
||||||
if args.org_files:
|
if args.org_files:
|
||||||
args.config['content-type']['org']['input-files'] = args.org_files
|
args.config['content-type']['org']['input-files'] = args.org_files
|
||||||
|
|
|
@ -6,6 +6,8 @@ from pathlib import Path
|
||||||
# Internal Packages
|
# Internal Packages
|
||||||
from src.utils.helpers import get_from_dict
|
from src.utils.helpers import get_from_dict
|
||||||
|
|
||||||
|
from src.utils.rawconfig import TextSearchConfigTest, ImageSearchConfigTest, ProcessorConversationConfig
|
||||||
|
|
||||||
|
|
||||||
class SearchType(str, Enum):
|
class SearchType(str, Enum):
|
||||||
Notes = "notes"
|
Notes = "notes"
|
||||||
|
@ -43,49 +45,22 @@ class SearchModels():
|
||||||
|
|
||||||
|
|
||||||
class TextSearchConfig():
|
class TextSearchConfig():
|
||||||
def __init__(self, input_files, input_filter, compressed_jsonl, embeddings_file, verbose):
|
def __init__(self, text_search_config: TextSearchConfigTest, verbose: bool):
|
||||||
self.input_files = input_files
|
self.input_files = text_search_config.input_files
|
||||||
self.input_filter = input_filter
|
self.input_filter = text_search_config.input_filter
|
||||||
self.compressed_jsonl = Path(compressed_jsonl)
|
self.compressed_jsonl = Path(text_search_config.compressed_jsonl)
|
||||||
self.embeddings_file = Path(embeddings_file)
|
self.embeddings_file = Path(text_search_config.embeddings_file)
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
|
|
||||||
|
|
||||||
def create_from_dictionary(config, key_tree, verbose):
|
|
||||||
text_config = get_from_dict(config, *key_tree)
|
|
||||||
search_enabled = text_config and ('input-files' in text_config or 'input-filter' in text_config)
|
|
||||||
if not search_enabled:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return TextSearchConfig(
|
|
||||||
input_files = text_config['input-files'],
|
|
||||||
input_filter = text_config['input-filter'],
|
|
||||||
compressed_jsonl = Path(text_config['compressed-jsonl']),
|
|
||||||
embeddings_file = Path(text_config['embeddings-file']),
|
|
||||||
verbose = verbose)
|
|
||||||
|
|
||||||
|
|
||||||
class ImageSearchConfig():
|
class ImageSearchConfig():
|
||||||
def __init__(self, input_directory, embeddings_file, batch_size, use_xmp_metadata, verbose):
|
def __init__(self, image_search_config: ImageSearchConfigTest, verbose):
|
||||||
self.input_directory = input_directory
|
self.input_directory = Path(image_search_config.input_directory)
|
||||||
self.embeddings_file = Path(embeddings_file)
|
self.embeddings_file = Path(image_search_config.embeddings_file)
|
||||||
self.batch_size = batch_size
|
self.batch_size = image_search_config.batch_size
|
||||||
self.use_xmp_metadata = use_xmp_metadata
|
self.use_xmp_metadata = image_search_config.use_xmp_metadata
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
|
|
||||||
def create_from_dictionary(config, key_tree, verbose):
|
|
||||||
image_config = get_from_dict(config, *key_tree)
|
|
||||||
search_enabled = image_config and 'input-directory' in image_config
|
|
||||||
if not search_enabled:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return ImageSearchConfig(
|
|
||||||
input_directory = Path(image_config['input-directory']),
|
|
||||||
embeddings_file = Path(image_config['embeddings-file']),
|
|
||||||
batch_size = image_config['batch-size'],
|
|
||||||
use_xmp_metadata = {'yes': True, 'no': False}[image_config['use-xmp-metadata']],
|
|
||||||
verbose = verbose)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class SearchConfig():
|
class SearchConfig():
|
||||||
|
@ -96,24 +71,12 @@ class SearchConfig():
|
||||||
|
|
||||||
|
|
||||||
class ConversationProcessorConfig():
|
class ConversationProcessorConfig():
|
||||||
def __init__(self, conversation_logfile, conversation_history, openai_api_key, verbose):
|
def __init__(self, processor_config: ProcessorConversationConfig, verbose: bool):
|
||||||
self.openai_api_key = openai_api_key
|
self.openai_api_key = processor_config.open_api_key
|
||||||
self.conversation_logfile = conversation_logfile
|
self.conversation_logfile = Path(processor_config.conversation_logfile)
|
||||||
self.conversation_history = conversation_history
|
self.conversation_history = Path(processor_config.conversation_history)
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
|
|
||||||
def create_from_dictionary(config, key_tree, verbose):
|
|
||||||
conversation_config = get_from_dict(config, *key_tree)
|
|
||||||
if not conversation_config:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return ConversationProcessorConfig(
|
|
||||||
openai_api_key = conversation_config['openai-api-key'],
|
|
||||||
conversation_history = '',
|
|
||||||
conversation_logfile = Path(conversation_config['conversation-logfile']),
|
|
||||||
verbose = verbose)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ProcessorConfig():
|
class ProcessorConfig():
|
||||||
conversation: ConversationProcessorConfig = None
|
conversation: ConversationProcessorConfig = None
|
||||||
|
|
Loading…
Reference in a new issue