2021-09-30 04:18:33 +02:00
|
|
|
# System Packages
|
2021-09-30 04:02:55 +02:00
|
|
|
from enum import Enum
|
2021-09-30 04:18:33 +02:00
|
|
|
from dataclasses import dataclass
|
2021-09-30 11:04:04 +02:00
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
# Internal Packages
|
2021-09-30 13:12:14 +02:00
|
|
|
from src.utils.helpers import get_from_dict
|
2021-09-30 04:02:55 +02:00
|
|
|
|
|
|
|
|
|
|
|
class SearchType(str, Enum):
|
|
|
|
Notes = "notes"
|
|
|
|
Ledger = "ledger"
|
|
|
|
Music = "music"
|
|
|
|
Image = "image"
|
|
|
|
|
2021-09-30 04:18:33 +02:00
|
|
|
|
2021-09-30 11:04:04 +02:00
|
|
|
class TextSearchModel():
|
|
|
|
def __init__(self, entries, corpus_embeddings, bi_encoder, cross_encoder, top_k, verbose):
|
2021-09-30 05:24:27 +02:00
|
|
|
self.entries = entries
|
|
|
|
self.corpus_embeddings = corpus_embeddings
|
|
|
|
self.bi_encoder = bi_encoder
|
|
|
|
self.cross_encoder = cross_encoder
|
|
|
|
self.top_k = top_k
|
2021-09-30 11:04:04 +02:00
|
|
|
self.verbose = verbose
|
2021-09-30 06:09:42 +02:00
|
|
|
|
|
|
|
|
|
|
|
class ImageSearchModel():
|
2021-09-30 11:04:04 +02:00
|
|
|
def __init__(self, image_names, image_embeddings, image_metadata_embeddings, image_encoder, verbose):
|
|
|
|
self.image_encoder = image_encoder
|
2021-09-30 06:09:42 +02:00
|
|
|
self.image_names = image_names
|
|
|
|
self.image_embeddings = image_embeddings
|
|
|
|
self.image_metadata_embeddings = image_metadata_embeddings
|
|
|
|
self.image_encoder = image_encoder
|
2021-09-30 11:04:04 +02:00
|
|
|
self.verbose = verbose
|
2021-09-30 06:09:42 +02:00
|
|
|
|
|
|
|
|
2021-09-30 05:24:27 +02:00
|
|
|
@dataclass
|
|
|
|
class SearchModels():
|
2021-09-30 11:04:04 +02:00
|
|
|
notes_search: TextSearchModel = None
|
|
|
|
ledger_search: TextSearchModel = None
|
|
|
|
music_search: TextSearchModel = None
|
2021-09-30 06:09:42 +02:00
|
|
|
image_search: ImageSearchModel = None
|
2021-09-30 11:04:04 +02:00
|
|
|
|
|
|
|
|
|
|
|
class TextSearchConfig():
|
|
|
|
def __init__(self, input_files, input_filter, compressed_jsonl, embeddings_file, verbose):
|
|
|
|
self.input_files = input_files
|
|
|
|
self.input_filter = input_filter
|
|
|
|
self.compressed_jsonl = Path(compressed_jsonl)
|
|
|
|
self.embeddings_file = Path(embeddings_file)
|
|
|
|
self.verbose = verbose
|
|
|
|
|
|
|
|
|
|
|
|
def create_from_dictionary(config, key_tree, verbose):
|
|
|
|
text_config = get_from_dict(config, *key_tree)
|
|
|
|
search_enabled = text_config and ('input-files' in text_config or 'input-filter' in text_config)
|
|
|
|
if not search_enabled:
|
|
|
|
return None
|
|
|
|
|
|
|
|
return TextSearchConfig(
|
|
|
|
input_files = text_config['input-files'],
|
|
|
|
input_filter = text_config['input-filter'],
|
|
|
|
compressed_jsonl = Path(text_config['compressed-jsonl']),
|
|
|
|
embeddings_file = Path(text_config['embeddings-file']),
|
|
|
|
verbose = verbose)
|
|
|
|
|
|
|
|
|
|
|
|
class ImageSearchConfig():
|
|
|
|
def __init__(self, input_directory, embeddings_file, batch_size, use_xmp_metadata, verbose):
|
|
|
|
self.input_directory = input_directory
|
|
|
|
self.embeddings_file = Path(embeddings_file)
|
|
|
|
self.batch_size = batch_size
|
|
|
|
self.use_xmp_metadata = use_xmp_metadata
|
|
|
|
self.verbose = verbose
|
|
|
|
|
|
|
|
def create_from_dictionary(config, key_tree, verbose):
|
|
|
|
image_config = get_from_dict(config, *key_tree)
|
|
|
|
search_enabled = image_config and 'input-directory' in image_config
|
|
|
|
if not search_enabled:
|
|
|
|
return None
|
|
|
|
|
|
|
|
return ImageSearchConfig(
|
|
|
|
input_directory = Path(image_config['input-directory']),
|
|
|
|
embeddings_file = Path(image_config['embeddings-file']),
|
|
|
|
batch_size = image_config['batch-size'],
|
|
|
|
use_xmp_metadata = {'yes': True, 'no': False}[image_config['use-xmp-metadata']],
|
|
|
|
verbose = verbose)
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class SearchConfig():
|
|
|
|
notes: TextSearchConfig = None
|
|
|
|
ledger: TextSearchConfig = None
|
|
|
|
music: TextSearchConfig = None
|
|
|
|
image: ImageSearchConfig = None
|
2021-11-26 20:56:26 +01:00
|
|
|
|
|
|
|
|
|
|
|
class ConversationProcessorConfig():
|
2021-11-27 19:34:39 +01:00
|
|
|
def __init__(self, conversation_logfile, chat_log, meta_log, openai_api_key, verbose):
|
2021-11-26 20:56:26 +01:00
|
|
|
self.openai_api_key = openai_api_key
|
|
|
|
self.conversation_logfile = conversation_logfile
|
2021-11-27 19:34:39 +01:00
|
|
|
self.chat_log = chat_log
|
|
|
|
self.meta_log = meta_log
|
2021-11-26 20:56:26 +01:00
|
|
|
self.verbose = verbose
|
|
|
|
|
|
|
|
def create_from_dictionary(config, key_tree, verbose):
|
|
|
|
conversation_config = get_from_dict(config, *key_tree)
|
|
|
|
if not conversation_config:
|
|
|
|
return None
|
|
|
|
|
|
|
|
return ConversationProcessorConfig(
|
|
|
|
openai_api_key = conversation_config['openai-api-key'],
|
2021-11-27 19:34:39 +01:00
|
|
|
chat_log = '',
|
|
|
|
meta_log = [],
|
2021-11-26 20:56:26 +01:00
|
|
|
conversation_logfile = Path(conversation_config['conversation-logfile']),
|
|
|
|
verbose = verbose)
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class ProcessorConfig():
|
|
|
|
conversation: ConversationProcessorConfig = None
|