Consolidate the search config models and pass verbose as a top level flag

This commit is contained in:
Saba 2021-12-04 11:43:48 -05:00
parent 43e647835b
commit 10e4065e05
6 changed files with 52 additions and 90 deletions

View file

@ -13,16 +13,16 @@ from fastapi.templating import Jinja2Templates
from src.search_type import asymmetric, symmetric_ledger, image_search
from src.utils.helpers import get_absolute_path
from src.utils.cli import cli
from src.utils.config import SearchType, SearchModels, TextSearchConfig, ImageSearchConfig, SearchConfig, ProcessorConfig, ConversationProcessorConfig
from src.utils.rawconfig import FullConfig
from src.utils.config import SearchType, SearchModels, ProcessorConfig, ConversationProcessorConfigDTO
from src.utils.rawconfig import FullConfigModel
from src.processor.conversation.gpt import converse, message_to_log, message_to_prompt, understand
# Application Global State
model = SearchModels()
search_config = SearchConfig()
processor_config = ProcessorConfig()
config = {}
config_file = ""
verbose = 0
app = FastAPI()
app.mount("/views", StaticFiles(directory="views"), name="views")
@ -32,12 +32,12 @@ templates = Jinja2Templates(directory="views/")
def ui(request: Request):
return templates.TemplateResponse("config.html", context={'request': request})
@app.get('/config', response_model=FullConfig)
@app.get('/config', response_model=FullConfigModel)
def config():
return config
@app.post('/config')
async def config(updated_config: FullConfig):
async def config(updated_config: FullConfigModel):
global config
config = updated_config
with open(config_file, 'w') as outfile:
@ -83,7 +83,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None):
return image_search.collate_results(
hits,
model.image_search.image_names,
search_config.image.input_directory,
config.content_type.image.input_directory,
results_count)
else:
@ -92,22 +92,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None):
@app.get('/regenerate')
def regenerate(t: Optional[SearchType] = None):
if (t == SearchType.Notes or t == None) and search_config.notes:
# Extract Entries, Generate Embeddings
model.notes_search = asymmetric.setup(search_config.notes, regenerate=True)
if (t == SearchType.Music or t == None) and search_config.music:
# Extract Entries, Generate Song Embeddings
model.music_search = asymmetric.setup(search_config.music, regenerate=True)
if (t == SearchType.Ledger or t == None) and search_config.ledger:
# Extract Entries, Generate Embeddings
model.ledger_search = symmetric_ledger.setup(search_config.ledger, regenerate=True)
if (t == SearchType.Image or t == None) and search_config.image:
# Extract Images, Generate Embeddings
model.image_search = image_search.setup(search_config.image, regenerate=True)
initialize_search(regenerate=True)
return {'status': 'ok', 'message': 'regeneration completed'}
@ -128,41 +113,40 @@ def chat(q: str):
return {'status': 'ok', 'response': gpt_response}
def initialize_search(regenerate, verbose):
def initialize_search(regenerate: bool, t: SearchType = None):
model = SearchModels()
search_config = SearchConfig()
# Initialize Org Notes Search
if config.content_type.org:
search_config.notes = TextSearchConfig(config.content_type.org, verbose)
model.notes_search = asymmetric.setup(search_config.notes, regenerate=regenerate)
if (t == SearchType.Notes or t == None) and config.content_type.org:
# Extract Entries, Generate Notes Embeddings
model.notes_search = asymmetric.setup(config.content_type.org, regenerate=regenerate, verbose=verbose)
# Initialize Org Music Search
if config.content_type.music:
search_config.music = TextSearchConfig(config.content_type.music, verbose)
model.music_search = asymmetric.setup(search_config.music, regenerate=regenerate)
if (t == SearchType.Music or t == None) and config.content_type.music:
# Extract Entries, Generate Music Embeddings
model.music_search = asymmetric.setup(config.content_type.music, regenerate=regenerate, verbose=verbose)
# Initialize Ledger Search
if config.content_type.ledger:
search_config.ledger = TextSearchConfig(config.content_type.org, verbose)
model.ledger_search = symmetric_ledger.setup(search_config.ledger, regenerate=regenerate)
if (t == SearchType.Ledger or t == None) and config.content_type.ledger:
# Extract Entries, Generate Ledger Embeddings
model.ledger_search = symmetric_ledger.setup(config.content_type.ledger, regenerate=regenerate, verbose=verbose)
# Initialize Image Search
if config.content_type.image:
search_config.image = ImageSearchConfig(config.content_type.image, verbose)
model.image_search = image_search.setup(search_config.image, regenerate=regenerate)
if (t == SearchType.Image or t == None) and config.content_type.image:
# Extract Entries, Generate Image Embeddings
model.image_search = image_search.setup(config.content_type.image, regenerate=regenerate, verbose=verbose)
return model, search_config
return model
def initialize_processor(verbose):
def initialize_processor():
if not config.processor:
return
processor_config = ProcessorConfig()
# Initialize Conversation Processor
processor_config.conversation = ConversationProcessorConfig(config.processor.conversation, verbose)
processor_config.conversation = ConversationProcessorConfigDTO(config.processor.conversation, verbose)
conversation_logfile = processor_config.conversation.conversation_logfile
if processor_config.conversation.verbose:
@ -211,14 +195,17 @@ if __name__ == '__main__':
# Stores the file path to the config file.
config_file = args.config_file
# Store the verbose flag
verbose = args.verbose
# Store the raw config data.
config = args.config
# Initialize Search from Config
model, search_config = initialize_search(args.regenerate, args.verbose)
# Initialize the search model from Config
model = initialize_search(args.regenerate)
# Initialize Processor from Config
processor_config = initialize_processor(args.verbose)
processor_config = initialize_processor()
# Start Application Server
if args.socket:

View file

@ -14,7 +14,8 @@ from sentence_transformers import SentenceTransformer, CrossEncoder, util
# Internal Packages
from src.utils.helpers import get_absolute_path, resolve_absolute_path
from src.processor.org_mode.org_to_jsonl import org_to_jsonl
from src.utils.config import TextSearchModel, TextSearchConfig
from src.utils.config import TextSearchModel
from src.utils.rawconfig import TextSearchConfigModel
def initialize_model():
@ -148,22 +149,22 @@ def collate_results(hits, entries, count=5):
in hits[0:count]]
def setup(config: TextSearchConfig, regenerate: bool) -> TextSearchModel:
def setup(config: TextSearchConfigModel, regenerate: bool, verbose: bool) -> TextSearchModel:
# Initialize Model
bi_encoder, cross_encoder, top_k = initialize_model()
# Map notes in Org-Mode files to (compressed) JSONL formatted file
if not resolve_absolute_path(config.compressed_jsonl).exists() or regenerate:
org_to_jsonl(config.input_files, config.input_filter, config.compressed_jsonl, config.verbose)
org_to_jsonl(config.input_files, config.input_filter, config.compressed_jsonl, verbose)
# Extract Entries
entries = extract_entries(config.compressed_jsonl, config.verbose)
entries = extract_entries(config.compressed_jsonl, verbose)
top_k = min(len(entries), top_k) # top_k hits can't be more than the total entries in corpus
# Compute or Load Embeddings
corpus_embeddings = compute_embeddings(entries, bi_encoder, config.embeddings_file, regenerate=regenerate, verbose=config.verbose)
corpus_embeddings = compute_embeddings(entries, bi_encoder, config.embeddings_file, regenerate=regenerate, verbose=verbose)
return TextSearchModel(entries, corpus_embeddings, bi_encoder, cross_encoder, top_k, verbose=config.verbose)
return TextSearchModel(entries, corpus_embeddings, bi_encoder, cross_encoder, top_k, verbose=verbose)
if __name__ == '__main__':

View file

@ -10,9 +10,10 @@ from tqdm import trange
import torch
# Internal Packages
from src.utils.helpers import get_absolute_path, resolve_absolute_path
from src.utils.helpers import resolve_absolute_path
import src.utils.exiftool as exiftool
from src.utils.config import ImageSearchModel, ImageSearchConfig
from src.utils.config import ImageSearchModel
from src.utils.rawconfig import ImageSearchConfigModel
def initialize_model():
@ -153,7 +154,7 @@ def collate_results(hits, image_names, image_directory, count=5):
in hits[0:count]]
def setup(config: ImageSearchConfig, regenerate: bool) -> ImageSearchModel:
def setup(config: ImageSearchConfigModel, regenerate: bool, verbose: bool) -> ImageSearchModel:
# Initialize Model
encoder = initialize_model()
@ -170,13 +171,13 @@ def setup(config: ImageSearchConfig, regenerate: bool) -> ImageSearchModel:
batch_size=config.batch_size,
regenerate=regenerate,
use_xmp_metadata=config.use_xmp_metadata,
verbose=config.verbose)
verbose=verbose)
return ImageSearchModel(image_names,
image_embeddings,
image_metadata_embeddings,
encoder,
config.verbose)
verbose)
if __name__ == '__main__':

View file

@ -12,7 +12,8 @@ from sentence_transformers import SentenceTransformer, CrossEncoder, util
# Internal Packages
from src.utils.helpers import get_absolute_path, resolve_absolute_path
from src.processor.ledger.beancount_to_jsonl import beancount_to_jsonl
from src.utils.config import TextSearchModel, TextSearchConfig
from src.utils.config import TextSearchModel
from src.utils.rawconfig import TextSearchConfigModel
def initialize_model():
@ -140,7 +141,7 @@ def collate_results(hits, entries, count=5):
in hits[0:count]]
def setup(config: TextSearchConfig, regenerate: bool) -> TextSearchModel:
def setup(config: TextSearchConfigModel, regenerate: bool, verbose: bool) -> TextSearchModel:
# Initialize Model
bi_encoder, cross_encoder, top_k = initialize_model()
@ -153,9 +154,9 @@ def setup(config: TextSearchConfig, regenerate: bool) -> TextSearchModel:
top_k = min(len(entries), top_k)
# Compute or Load Embeddings
corpus_embeddings = compute_embeddings(entries, bi_encoder, config.embeddings_file, regenerate=regenerate, verbose=config.verbose)
corpus_embeddings = compute_embeddings(entries, bi_encoder, config.embeddings_file, regenerate=regenerate, verbose=verbose)
return TextSearchModel(entries, corpus_embeddings, bi_encoder, cross_encoder, top_k, verbose=config.verbose)
return TextSearchModel(entries, corpus_embeddings, bi_encoder, cross_encoder, top_k, verbose=verbose)
if __name__ == '__main__':

View file

@ -8,7 +8,7 @@ import yaml
# Internal Packages
from src.utils.helpers import is_none_or_empty, get_absolute_path, resolve_absolute_path, merge_dicts
from src.utils.rawconfig import FullConfig
from src.utils.rawconfig import FullConfigModel
def cli(args=None):
if is_none_or_empty(args):
@ -37,7 +37,7 @@ def cli(args=None):
with open(get_absolute_path(args.config_file), 'r', encoding='utf-8') as config_file:
config_from_file = yaml.safe_load(config_file)
args.config = merge_dicts(priority_dict=config_from_file, default_dict=args.config)
args.config = FullConfig.parse_obj(args.config)
args.config = FullConfigModel.parse_obj(args.config)
if args.org_files:
args.config['content-type']['org']['input-files'] = args.org_files

View file

@ -4,9 +4,7 @@ from dataclasses import dataclass
from pathlib import Path
# Internal Packages
from src.utils.helpers import get_from_dict
from src.utils.rawconfig import TextSearchConfigModel, ImageSearchConfigModel, ProcessorConversationConfigModel
from src.utils.rawconfig import ProcessorConversationConfigModel
class SearchType(str, Enum):
@ -44,33 +42,7 @@ class SearchModels():
image_search: ImageSearchModel = None
class TextSearchConfigModel():
def __init__(self, text_search_config: TextSearchConfigModel, verbose: bool):
self.input_files = text_search_config.input_files
self.input_filter = text_search_config.input_filter
self.compressed_jsonl = Path(text_search_config.compressed_jsonl)
self.embeddings_file = Path(text_search_config.embeddings_file)
self.verbose = verbose
class ImageSearchConfigModel():
def __init__(self, image_search_config: ImageSearchConfigModel, verbose):
self.input_directory = Path(image_search_config.input_directory)
self.embeddings_file = Path(image_search_config.embeddings_file)
self.batch_size = image_search_config.batch_size
self.use_xmp_metadata = image_search_config.use_xmp_metadata
self.verbose = verbose
@dataclass
class SearchConfig():
notes: TextSearchConfigModel = None
ledger: TextSearchConfigModel = None
music: TextSearchConfigModel = None
image: ImageSearchConfigModel = None
class ConversationProcessorConfig():
class ConversationProcessorConfigDTO():
def __init__(self, processor_config: ProcessorConversationConfigModel, verbose: bool):
self.openai_api_key = processor_config.open_api_key
self.conversation_logfile = Path(processor_config.conversation_logfile)
@ -81,4 +53,4 @@ class ConversationProcessorConfig():
@dataclass
class ProcessorConfig():
conversation: ConversationProcessorConfig = None
conversation: ConversationProcessorConfigDTO = None