mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Show user friendly information logs by default for context
- Use emojis to make info logs easier to read - Inform when khoj is ready to use - Provide information on what khoj is doing while starting up - Inform when content/search types and processors are setup - Inform when models are being loaded from the web as this step can take time - Convert all other info logs to be only shown in verbose mode
This commit is contained in:
parent
fe03ba3dce
commit
1ae40163a9
14 changed files with 43 additions and 36 deletions
|
@ -66,7 +66,7 @@ def update_search_index():
|
|||
state.search_index_lock.acquire()
|
||||
state.model = configure_search(state.model, state.config, regenerate=False)
|
||||
state.search_index_lock.release()
|
||||
logger.info("Search Index updated via Scheduler")
|
||||
logger.info("📬 Search index updated via Scheduler")
|
||||
|
||||
|
||||
def configure_search_types(config: FullConfig):
|
||||
|
@ -84,6 +84,7 @@ def configure_search_types(config: FullConfig):
|
|||
def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, t: state.SearchType = None):
|
||||
# Initialize Org Notes Search
|
||||
if (t == state.SearchType.Org or t == None) and config.content_type.org:
|
||||
logger.info("🦄 Setting up search for orgmode notes")
|
||||
# Extract Entries, Generate Notes Embeddings
|
||||
model.orgmode_search = text_search.setup(
|
||||
OrgToJsonl,
|
||||
|
@ -95,6 +96,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
|
|||
|
||||
# Initialize Org Music Search
|
||||
if (t == state.SearchType.Music or t == None) and config.content_type.music:
|
||||
logger.info("🎺 Setting up search for org-music")
|
||||
# Extract Entries, Generate Music Embeddings
|
||||
model.music_search = text_search.setup(
|
||||
OrgToJsonl,
|
||||
|
@ -106,6 +108,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
|
|||
|
||||
# Initialize Markdown Search
|
||||
if (t == state.SearchType.Markdown or t == None) and config.content_type.markdown:
|
||||
logger.info("💎 Setting up search for markdown notes")
|
||||
# Extract Entries, Generate Markdown Embeddings
|
||||
model.markdown_search = text_search.setup(
|
||||
MarkdownToJsonl,
|
||||
|
@ -117,6 +120,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
|
|||
|
||||
# Initialize Ledger Search
|
||||
if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger:
|
||||
logger.info("💸 Setting up search for ledger")
|
||||
# Extract Entries, Generate Ledger Embeddings
|
||||
model.ledger_search = text_search.setup(
|
||||
BeancountToJsonl,
|
||||
|
@ -128,6 +132,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
|
|||
|
||||
# Initialize Image Search
|
||||
if (t == state.SearchType.Image or t == None) and config.content_type.image:
|
||||
logger.info("🌄 Setting up search for images")
|
||||
# Extract Entries, Generate Image Embeddings
|
||||
model.image_search = image_search.setup(
|
||||
config.content_type.image, search_config=config.search_type.image, regenerate=regenerate
|
||||
|
@ -135,6 +140,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
|
|||
|
||||
# Initialize External Plugin Search
|
||||
if (t == None or t in state.SearchType) and config.content_type.plugins:
|
||||
logger.info("🔌 Setting up search for plugins")
|
||||
model.plugin_search = {}
|
||||
for plugin_type, plugin_config in config.content_type.plugins.items():
|
||||
model.plugin_search[plugin_type] = text_search.setup(
|
||||
|
@ -159,6 +165,7 @@ def configure_processor(processor_config: ProcessorConfig):
|
|||
|
||||
# Initialize Conversation Processor
|
||||
if processor_config.conversation:
|
||||
logger.info("💬 Setting up conversation processor")
|
||||
processor.conversation = configure_conversation_processor(processor_config.conversation)
|
||||
|
||||
return processor
|
||||
|
@ -172,7 +179,7 @@ def configure_conversation_processor(conversation_processor_config):
|
|||
# Load Metadata Logs from Conversation Logfile
|
||||
with conversation_logfile.open("r") as f:
|
||||
conversation_processor.meta_log = json.load(f)
|
||||
logger.info("Conversation logs loaded from disk.")
|
||||
logger.debug(f"Loaded conversation logs from {conversation_logfile}")
|
||||
else:
|
||||
# Initialize Conversation Logs
|
||||
conversation_processor.meta_log = {}
|
||||
|
|
|
@ -52,10 +52,8 @@ def run():
|
|||
|
||||
# Set Logging Level
|
||||
if args.verbose == 0:
|
||||
logger.setLevel(logging.WARN)
|
||||
elif args.verbose == 1:
|
||||
logger.setLevel(logging.INFO)
|
||||
elif args.verbose >= 2:
|
||||
elif args.verbose >= 1:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
# Set Log File
|
||||
|
@ -63,7 +61,7 @@ def run():
|
|||
fh.setLevel(logging.DEBUG)
|
||||
logger.addHandler(fh)
|
||||
|
||||
logger.info("Starting Khoj...")
|
||||
logger.info("🌘 Starting Khoj")
|
||||
|
||||
if args.no_gui:
|
||||
# Setup task scheduler
|
||||
|
@ -122,7 +120,6 @@ def run():
|
|||
|
||||
|
||||
def sigint_handler(*args):
|
||||
print("\nShutting down Khoj...")
|
||||
QtWidgets.QApplication.quit()
|
||||
|
||||
|
||||
|
@ -135,10 +132,12 @@ def set_state(args):
|
|||
|
||||
|
||||
def start_server(app, host=None, port=None, socket=None):
|
||||
logger.info("🌖 Khoj is ready to use")
|
||||
if socket:
|
||||
uvicorn.run(app, proxy_headers=True, uds=socket, log_level="debug", use_colors=True, log_config=None)
|
||||
else:
|
||||
uvicorn.run(app, host=host, port=port, log_level="debug", use_colors=True, log_config=None)
|
||||
logger.info("🌒 Stopping Khoj")
|
||||
|
||||
|
||||
def poll_task_scheduler():
|
||||
|
|
|
@ -82,7 +82,7 @@ class JsonlToJsonl(TextToJsonl):
|
|||
if any(files_with_non_jsonl_extensions):
|
||||
print(f"[Warning] There maybe non jsonl files in the input set: {files_with_non_jsonl_extensions}")
|
||||
|
||||
logger.info(f"Processing files: {all_jsonl_files}")
|
||||
logger.debug(f"Processing files: {all_jsonl_files}")
|
||||
|
||||
return all_jsonl_files
|
||||
|
||||
|
|
|
@ -88,7 +88,7 @@ class BeancountToJsonl(TextToJsonl):
|
|||
if any(files_with_non_beancount_extensions):
|
||||
print(f"[Warning] There maybe non beancount files in the input set: {files_with_non_beancount_extensions}")
|
||||
|
||||
logger.info(f"Processing files: {all_beancount_files}")
|
||||
logger.debug(f"Processing files: {all_beancount_files}")
|
||||
|
||||
return all_beancount_files
|
||||
|
||||
|
@ -123,7 +123,7 @@ class BeancountToJsonl(TextToJsonl):
|
|||
Entry(compiled=parsed_entry, raw=parsed_entry, file=f"{transaction_to_file_map[parsed_entry]}")
|
||||
)
|
||||
|
||||
logger.info(f"Converted {len(parsed_entries)} transactions to dictionaries")
|
||||
logger.debug(f"Converted {len(parsed_entries)} transactions to dictionaries")
|
||||
|
||||
return entries
|
||||
|
||||
|
|
|
@ -92,7 +92,7 @@ class MarkdownToJsonl(TextToJsonl):
|
|||
f"[Warning] There maybe non markdown-mode files in the input set: {files_with_non_markdown_extensions}"
|
||||
)
|
||||
|
||||
logger.info(f"Processing files: {all_markdown_files}")
|
||||
logger.debug(f"Processing files: {all_markdown_files}")
|
||||
|
||||
return all_markdown_files
|
||||
|
||||
|
@ -126,7 +126,7 @@ class MarkdownToJsonl(TextToJsonl):
|
|||
for parsed_entry in parsed_entries:
|
||||
entries.append(Entry(compiled=parsed_entry, raw=parsed_entry, file=f"{entry_to_file_map[parsed_entry]}"))
|
||||
|
||||
logger.info(f"Converted {len(parsed_entries)} markdown entries to dictionaries")
|
||||
logger.debug(f"Converted {len(parsed_entries)} markdown entries to dictionaries")
|
||||
|
||||
return entries
|
||||
|
||||
|
|
|
@ -86,7 +86,7 @@ class OrgToJsonl(TextToJsonl):
|
|||
if any(files_with_non_org_extensions):
|
||||
logger.warn(f"There maybe non org-mode files in the input set: {files_with_non_org_extensions}")
|
||||
|
||||
logger.info(f"Processing files: {all_org_files}")
|
||||
logger.debug(f"Processing files: {all_org_files}")
|
||||
|
||||
return all_org_files
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ async def set_config_data(updated_config: FullConfig):
|
|||
def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Optional[bool] = False):
|
||||
results: List[SearchResponse] = []
|
||||
if q is None or q == "":
|
||||
logger.info(f"No query param (q) passed in API call to initiate search")
|
||||
logger.warn(f"No query param (q) passed in API call to initiate search")
|
||||
return results
|
||||
|
||||
# initialize variables
|
||||
|
@ -66,7 +66,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Opti
|
|||
# return cached results, if available
|
||||
query_cache_key = f"{user_query}-{n}-{t}-{r}"
|
||||
if query_cache_key in state.query_cache:
|
||||
logger.info(f"Return response from query cache")
|
||||
logger.debug(f"Return response from query cache")
|
||||
return state.query_cache[query_cache_key]
|
||||
|
||||
if (t == SearchType.Org or t == None) and state.model.orgmode_search:
|
||||
|
@ -151,7 +151,7 @@ def update(t: Optional[SearchType] = None, force: Optional[bool] = False):
|
|||
logger.error(e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
else:
|
||||
logger.info("Search Index updated via API call")
|
||||
logger.info("📬 Search index updated via API")
|
||||
|
||||
try:
|
||||
state.processor_config = configure_processor(state.config.processor)
|
||||
|
@ -159,6 +159,6 @@ def update(t: Optional[SearchType] = None, force: Optional[bool] = False):
|
|||
logger.error(e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
else:
|
||||
logger.info("Processor reconfigured via API call")
|
||||
logger.info("📬 Processor reconfigured via API")
|
||||
|
||||
return {"status": "ok", "message": "khoj reloaded"}
|
||||
|
|
|
@ -152,7 +152,6 @@ def save_chat_session():
|
|||
conversation_log["session"].append(session)
|
||||
else:
|
||||
conversation_log["session"] = [session]
|
||||
logger.info("Added new chat session to conversation logs")
|
||||
|
||||
# Save Conversation Metadata Logs to Disk
|
||||
conversation_logfile = resolve_absolute_path(state.processor_config.conversation.conversation_logfile)
|
||||
|
@ -161,4 +160,4 @@ def save_chat_session():
|
|||
json.dump(conversation_log, logfile)
|
||||
|
||||
state.processor_config.conversation.chat_session = None
|
||||
logger.info("Saved updated conversation logs to disk.")
|
||||
logger.info("📩 Saved current chat session to conversation logs")
|
||||
|
|
|
@ -49,7 +49,7 @@ class FileFilter(BaseFilter):
|
|||
query = re.sub(self.file_filter_regex, "", query).strip()
|
||||
cache_key = tuple(files_to_search)
|
||||
if cache_key in self.cache:
|
||||
logger.info(f"Return file filter results from cache")
|
||||
logger.debug(f"Return file filter results from cache")
|
||||
included_entry_indices = self.cache[cache_key]
|
||||
return query, included_entry_indices
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@ class WordFilter(BaseFilter):
|
|||
# Return item from cache if exists
|
||||
cache_key = tuple(sorted(required_words)), tuple(sorted(blocked_words))
|
||||
if cache_key in self.cache:
|
||||
logger.info(f"Return word filter results from cache")
|
||||
logger.debug(f"Return word filter results from cache")
|
||||
included_entry_indices = self.cache[cache_key]
|
||||
return query, included_entry_indices
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@ import glob
|
|||
import pathlib
|
||||
import copy
|
||||
import shutil
|
||||
import time
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
|
@ -50,9 +49,9 @@ def extract_entries(image_directories):
|
|||
image_names.extend(list(image_directory.glob("*.jpg")))
|
||||
image_names.extend(list(image_directory.glob("*.jpeg")))
|
||||
|
||||
if logger.level >= logging.INFO:
|
||||
if logger.level >= logging.DEBUG:
|
||||
image_directory_names = ", ".join([str(image_directory) for image_directory in image_directories])
|
||||
logger.info(f"Found {len(image_names)} images in {image_directory_names}")
|
||||
logger.debug(f"Found {len(image_names)} images in {image_directory_names}")
|
||||
return sorted(image_names)
|
||||
|
||||
|
||||
|
@ -71,7 +70,7 @@ def compute_image_embeddings(image_names, encoder, embeddings_file, batch_size=5
|
|||
# Load pre-computed image embeddings from file if exists
|
||||
if resolve_absolute_path(embeddings_file).exists() and not regenerate:
|
||||
image_embeddings = torch.load(embeddings_file)
|
||||
logger.info(f"Loaded {len(image_embeddings)} image embeddings from {embeddings_file}")
|
||||
logger.debug(f"Loaded {len(image_embeddings)} image embeddings from {embeddings_file}")
|
||||
# Else compute the image embeddings from scratch, which can take a while
|
||||
else:
|
||||
image_embeddings = []
|
||||
|
@ -89,7 +88,7 @@ def compute_image_embeddings(image_names, encoder, embeddings_file, batch_size=5
|
|||
|
||||
# Save computed image embeddings to file
|
||||
torch.save(image_embeddings, embeddings_file)
|
||||
logger.info(f"Saved computed embeddings to {embeddings_file}")
|
||||
logger.info(f"📩 Saved computed image embeddings to {embeddings_file}")
|
||||
|
||||
return image_embeddings
|
||||
|
||||
|
@ -102,7 +101,7 @@ def compute_metadata_embeddings(
|
|||
# Load pre-computed image metadata embedding file if exists
|
||||
if use_xmp_metadata and resolve_absolute_path(f"{embeddings_file}_metadata").exists() and not regenerate:
|
||||
image_metadata_embeddings = torch.load(f"{embeddings_file}_metadata")
|
||||
logger.info(f"Loaded pre-computed embeddings from {embeddings_file}_metadata")
|
||||
logger.debug(f"Loaded image metadata embeddings from {embeddings_file}_metadata")
|
||||
|
||||
# Else compute the image metadata embeddings from scratch, which can take a while
|
||||
if use_xmp_metadata and image_metadata_embeddings is None:
|
||||
|
@ -121,7 +120,7 @@ def compute_metadata_embeddings(
|
|||
)
|
||||
continue
|
||||
torch.save(image_metadata_embeddings, f"{embeddings_file}_metadata")
|
||||
logger.info(f"Saved computed metadata embeddings to {embeddings_file}_metadata")
|
||||
logger.info(f"📩 Saved computed image metadata embeddings to {embeddings_file}_metadata")
|
||||
|
||||
return image_metadata_embeddings
|
||||
|
||||
|
@ -149,12 +148,12 @@ def query(raw_query, count, model: ImageSearchModel):
|
|||
query_imagepath = resolve_absolute_path(pathlib.Path(raw_query[5:]), strict=True)
|
||||
query = copy.deepcopy(Image.open(query_imagepath))
|
||||
query.thumbnail((640, query.height)) # scale down image for faster processing
|
||||
logger.info(f"Find Images by Image: {query_imagepath}")
|
||||
logger.info(f"🔎 Find Images by Image: {query_imagepath}")
|
||||
else:
|
||||
# Truncate words in query to stay below max_tokens supported by ML model
|
||||
max_words = 20
|
||||
query = " ".join(raw_query.split()[:max_words])
|
||||
logger.info(f"Find Images by Text: {query}")
|
||||
logger.info(f"🔎 Find Images by Text: {query}")
|
||||
|
||||
# Now we encode the query (which can either be an image or a text string)
|
||||
with timer("Query Encode Time", logger):
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
# Standard Packages
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import time
|
||||
from typing import List, Tuple, Type
|
||||
|
||||
# External Packages
|
||||
|
@ -68,7 +67,7 @@ def compute_embeddings(
|
|||
# Load pre-computed embeddings from file if exists and update them if required
|
||||
if embeddings_file.exists() and not regenerate:
|
||||
corpus_embeddings = torch.load(get_absolute_path(embeddings_file), map_location=state.device)
|
||||
logger.info(f"Loaded embeddings from {embeddings_file}")
|
||||
logger.debug(f"Loaded {len(corpus_embeddings)} text embeddings from {embeddings_file}")
|
||||
|
||||
# Encode any new entries in the corpus and update corpus embeddings
|
||||
new_entries = [entry.compiled for id, entry in entries_with_ids if id == -1]
|
||||
|
@ -95,7 +94,7 @@ def compute_embeddings(
|
|||
if new_entries:
|
||||
corpus_embeddings = util.normalize_embeddings(corpus_embeddings)
|
||||
torch.save(corpus_embeddings, embeddings_file)
|
||||
logger.info(f"Computed embeddings and saved them to {embeddings_file}")
|
||||
logger.info(f"📩 Saved computed text embeddings to {embeddings_file}")
|
||||
|
||||
return corpus_embeddings
|
||||
|
||||
|
|
|
@ -58,16 +58,20 @@ def merge_dicts(priority_dict: dict, default_dict: dict):
|
|||
def load_model(model_name: str, model_type, model_dir=None, device: str = None) -> Union[BaseEncoder, CrossEncoder]:
|
||||
"Load model from disk or huggingface"
|
||||
# Construct model path
|
||||
logger = logging.getLogger(__name__)
|
||||
model_path = join(model_dir, model_name.replace("/", "_")) if model_dir is not None else None
|
||||
|
||||
# Load model from model_path if it exists there
|
||||
model_type_class = get_class_by_name(model_type) if isinstance(model_type, str) else model_type
|
||||
if model_path is not None and resolve_absolute_path(model_path).exists():
|
||||
logger.debug(f"Loading {model_name} model from disk")
|
||||
model = model_type_class(get_absolute_path(model_path), device=device)
|
||||
# Else load the model from the model_name
|
||||
else:
|
||||
logger.info(f"🤖 Downloading {model_name} model from web")
|
||||
model = model_type_class(model_name, device=device)
|
||||
if model_path is not None:
|
||||
logger.info(f"📩 Saved {model_name} model to disk")
|
||||
model.save(model_path)
|
||||
|
||||
return model
|
||||
|
|
|
@ -31,7 +31,7 @@ def load_jsonl(input_path):
|
|||
jsonl_file.close()
|
||||
|
||||
# Log JSONL entries loaded
|
||||
logger.info(f"Loaded {len(data)} records from {input_path}")
|
||||
logger.debug(f"Loaded {len(data)} records from {input_path}")
|
||||
|
||||
return data
|
||||
|
||||
|
@ -44,7 +44,7 @@ def dump_jsonl(jsonl_data, output_path):
|
|||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
f.write(jsonl_data)
|
||||
|
||||
logger.info(f"Wrote jsonl data to {output_path}")
|
||||
logger.debug(f"Wrote jsonl data to {output_path}")
|
||||
|
||||
|
||||
def compress_jsonl_data(jsonl_data, output_path):
|
||||
|
@ -54,4 +54,4 @@ def compress_jsonl_data(jsonl_data, output_path):
|
|||
with gzip.open(output_path, "wt", encoding="utf-8") as gzip_file:
|
||||
gzip_file.write(jsonl_data)
|
||||
|
||||
logger.info(f"Wrote jsonl data to gzip compressed jsonl at {output_path}")
|
||||
logger.debug(f"Wrote jsonl data to gzip compressed jsonl at {output_path}")
|
||||
|
|
Loading…
Reference in a new issue