2022-08-10 23:13:14 +02:00
|
|
|
# System Packages
|
|
|
|
import sys
|
2022-09-03 13:43:32 +02:00
|
|
|
import logging
|
2022-08-10 23:13:14 +02:00
|
|
|
|
2022-08-06 01:37:52 +02:00
|
|
|
# External Packages
|
2022-08-10 21:30:13 +02:00
|
|
|
import json
|
2022-08-06 01:37:52 +02:00
|
|
|
|
|
|
|
# Internal Packages
|
2022-09-14 09:53:43 +02:00
|
|
|
from src.processor.ledger.beancount_to_jsonl import BeancountToJsonl
|
|
|
|
from src.processor.markdown.markdown_to_jsonl import MarkdownToJsonl
|
|
|
|
from src.processor.org_mode.org_to_jsonl import OrgToJsonl
|
2022-08-06 01:37:52 +02:00
|
|
|
from src.search_type import image_search, text_search
|
|
|
|
from src.utils.config import SearchType, SearchModels, ProcessorConfigModel, ConversationProcessorConfigModel
|
2022-08-06 02:05:35 +02:00
|
|
|
from src.utils import state
|
2022-09-14 01:58:49 +02:00
|
|
|
from src.utils.helpers import LRU, resolve_absolute_path
|
2022-08-10 21:30:13 +02:00
|
|
|
from src.utils.rawconfig import FullConfig, ProcessorConfig
|
2022-09-05 00:05:13 +02:00
|
|
|
from src.search_filter.date_filter import DateFilter
|
|
|
|
from src.search_filter.word_filter import WordFilter
|
|
|
|
from src.search_filter.file_filter import FileFilter
|
2022-08-06 01:37:52 +02:00
|
|
|
|
|
|
|
|
2022-09-03 13:43:32 +02:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2022-08-10 23:13:14 +02:00
|
|
|
def configure_server(args, required=False):
|
|
|
|
if args.config is None:
|
|
|
|
if required:
|
|
|
|
print('Exiting as Khoj is not configured. Configure the application to use it.')
|
|
|
|
sys.exit(1)
|
|
|
|
else:
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
state.config = args.config
|
2022-08-06 01:37:52 +02:00
|
|
|
|
|
|
|
# Initialize the search model from Config
|
2022-09-03 13:43:32 +02:00
|
|
|
state.model = configure_search(state.model, state.config, args.regenerate)
|
2022-08-06 01:37:52 +02:00
|
|
|
|
|
|
|
# Initialize Processor from Config
|
2022-09-03 13:43:32 +02:00
|
|
|
state.processor_config = configure_processor(args.config.processor)
|
2022-08-06 01:37:52 +02:00
|
|
|
|
|
|
|
|
2022-09-03 13:43:32 +02:00
|
|
|
def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, t: SearchType = None):
|
2022-08-06 01:37:52 +02:00
|
|
|
# Initialize Org Notes Search
|
|
|
|
if (t == SearchType.Org or t == None) and config.content_type.org:
|
|
|
|
# Extract Entries, Generate Notes Embeddings
|
2022-09-06 14:27:31 +02:00
|
|
|
model.orgmode_search = text_search.setup(
|
2022-09-14 09:53:43 +02:00
|
|
|
OrgToJsonl,
|
2022-09-06 14:27:31 +02:00
|
|
|
config.content_type.org,
|
|
|
|
search_config=config.search_type.asymmetric,
|
|
|
|
regenerate=regenerate,
|
2022-09-07 01:43:58 +02:00
|
|
|
filters=[DateFilter(), WordFilter(), FileFilter()])
|
2022-08-06 01:37:52 +02:00
|
|
|
|
|
|
|
# Initialize Org Music Search
|
|
|
|
if (t == SearchType.Music or t == None) and config.content_type.music:
|
|
|
|
# Extract Entries, Generate Music Embeddings
|
2022-09-06 14:27:31 +02:00
|
|
|
model.music_search = text_search.setup(
|
2022-09-14 09:53:43 +02:00
|
|
|
OrgToJsonl,
|
2022-09-06 14:27:31 +02:00
|
|
|
config.content_type.music,
|
|
|
|
search_config=config.search_type.asymmetric,
|
2022-09-11 16:29:34 +02:00
|
|
|
regenerate=regenerate,
|
|
|
|
filters=[DateFilter(), WordFilter()])
|
2022-08-06 01:37:52 +02:00
|
|
|
|
|
|
|
# Initialize Markdown Search
|
|
|
|
if (t == SearchType.Markdown or t == None) and config.content_type.markdown:
|
|
|
|
# Extract Entries, Generate Markdown Embeddings
|
2022-09-06 14:27:31 +02:00
|
|
|
model.markdown_search = text_search.setup(
|
2022-09-14 09:53:43 +02:00
|
|
|
MarkdownToJsonl,
|
2022-09-06 14:27:31 +02:00
|
|
|
config.content_type.markdown,
|
|
|
|
search_config=config.search_type.asymmetric,
|
|
|
|
regenerate=regenerate,
|
2022-09-07 01:43:58 +02:00
|
|
|
filters=[DateFilter(), WordFilter(), FileFilter()])
|
2022-08-06 01:37:52 +02:00
|
|
|
|
|
|
|
# Initialize Ledger Search
|
|
|
|
if (t == SearchType.Ledger or t == None) and config.content_type.ledger:
|
|
|
|
# Extract Entries, Generate Ledger Embeddings
|
2022-09-06 14:27:31 +02:00
|
|
|
model.ledger_search = text_search.setup(
|
2022-09-14 09:53:43 +02:00
|
|
|
BeancountToJsonl,
|
2022-09-06 14:27:31 +02:00
|
|
|
config.content_type.ledger,
|
|
|
|
search_config=config.search_type.symmetric,
|
|
|
|
regenerate=regenerate,
|
2022-09-07 01:43:58 +02:00
|
|
|
filters=[DateFilter(), WordFilter(), FileFilter()])
|
2022-08-06 01:37:52 +02:00
|
|
|
|
|
|
|
# Initialize Image Search
|
|
|
|
if (t == SearchType.Image or t == None) and config.content_type.image:
|
|
|
|
# Extract Entries, Generate Image Embeddings
|
2022-09-06 14:27:31 +02:00
|
|
|
model.image_search = image_search.setup(
|
|
|
|
config.content_type.image,
|
|
|
|
search_config=config.search_type.image,
|
|
|
|
regenerate=regenerate)
|
2022-08-06 01:37:52 +02:00
|
|
|
|
2022-09-12 08:28:49 +02:00
|
|
|
# Invalidate Query Cache
|
2022-09-14 01:58:49 +02:00
|
|
|
state.query_cache = LRU()
|
2022-09-12 08:28:49 +02:00
|
|
|
|
2022-08-06 01:37:52 +02:00
|
|
|
return model
|
|
|
|
|
|
|
|
|
2022-09-03 13:43:32 +02:00
|
|
|
def configure_processor(processor_config: ProcessorConfig):
|
2022-08-10 21:30:13 +02:00
|
|
|
if not processor_config:
|
2022-08-06 01:37:52 +02:00
|
|
|
return
|
|
|
|
|
2022-08-10 21:30:13 +02:00
|
|
|
processor = ProcessorConfigModel()
|
2022-08-06 01:37:52 +02:00
|
|
|
|
|
|
|
# Initialize Conversation Processor
|
2022-08-10 21:34:03 +02:00
|
|
|
if processor_config.conversation:
|
2022-09-03 13:43:32 +02:00
|
|
|
processor.conversation = configure_conversation_processor(processor_config.conversation)
|
2022-08-06 01:37:52 +02:00
|
|
|
|
2022-08-10 21:30:13 +02:00
|
|
|
return processor
|
|
|
|
|
|
|
|
|
2022-09-03 13:43:32 +02:00
|
|
|
def configure_conversation_processor(conversation_processor_config):
|
|
|
|
conversation_processor = ConversationProcessorConfigModel(conversation_processor_config)
|
|
|
|
conversation_logfile = resolve_absolute_path(conversation_processor.conversation_logfile)
|
2022-08-10 21:30:13 +02:00
|
|
|
|
2022-09-03 13:43:32 +02:00
|
|
|
if conversation_logfile.is_file():
|
2022-08-06 01:37:52 +02:00
|
|
|
# Load Metadata Logs from Conversation Logfile
|
2022-09-03 13:43:32 +02:00
|
|
|
with conversation_logfile.open('r') as f:
|
2022-08-10 21:30:13 +02:00
|
|
|
conversation_processor.meta_log = json.load(f)
|
2022-09-03 13:43:32 +02:00
|
|
|
logger.info('Conversation logs loaded from disk.')
|
2022-08-06 01:37:52 +02:00
|
|
|
else:
|
|
|
|
# Initialize Conversation Logs
|
2022-08-10 21:30:13 +02:00
|
|
|
conversation_processor.meta_log = {}
|
|
|
|
conversation_processor.chat_session = ""
|
2022-08-06 01:37:52 +02:00
|
|
|
|
2022-09-03 13:43:32 +02:00
|
|
|
return conversation_processor
|