Show user friendly information logs by default for context

- Use emojis to make info logs easier to read - Inform when khoj is ready to use - Provide information on what khoj is doing while starting up - Inform when content/search types and processors are setup - Inform when models are being loaded from the web as this step can take time - Convert all other info logs to be only shown in verbose mode
2024-11-23 23:48:56 +01:00 · 2023-03-01 15:22:14 -06:00 · 2023-03-01 15:22:14 -06:00 · 1ae40163a9
commit 1ae40163a9
parent fe03ba3dce
14 changed files with 43 additions and 36 deletions
--- a/src/khoj/configure.py
+++ b/src/khoj/configure.py
@ -66,7 +66,7 @@ def update_search_index():
    state.search_index_lock.acquire()
    state.model = configure_search(state.model, state.config, regenerate=False)
    state.search_index_lock.release()
-    logger.info("Search Index updated via Scheduler")
+    logger.info("📬 Search index updated via Scheduler")


 def configure_search_types(config: FullConfig):
@ -84,6 +84,7 @@ def configure_search_types(config: FullConfig):
 def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, t: state.SearchType = None):
    # Initialize Org Notes Search
    if (t == state.SearchType.Org or t == None) and config.content_type.org:
+        logger.info("🦄 Setting up search for orgmode notes")
        # Extract Entries, Generate Notes Embeddings
        model.orgmode_search = text_search.setup(
            OrgToJsonl,
@ -95,6 +96,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,

    # Initialize Org Music Search
    if (t == state.SearchType.Music or t == None) and config.content_type.music:
+        logger.info("🎺 Setting up search for org-music")
        # Extract Entries, Generate Music Embeddings
        model.music_search = text_search.setup(
            OrgToJsonl,
@ -106,6 +108,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,

    # Initialize Markdown Search
    if (t == state.SearchType.Markdown or t == None) and config.content_type.markdown:
+        logger.info("💎 Setting up search for markdown notes")
        # Extract Entries, Generate Markdown Embeddings
        model.markdown_search = text_search.setup(
            MarkdownToJsonl,
@ -117,6 +120,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,

    # Initialize Ledger Search
    if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger:
+        logger.info("💸 Setting up search for ledger")
        # Extract Entries, Generate Ledger Embeddings
        model.ledger_search = text_search.setup(
            BeancountToJsonl,
@ -128,6 +132,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,

    # Initialize Image Search
    if (t == state.SearchType.Image or t == None) and config.content_type.image:
+        logger.info("🌄 Setting up search for images")
        # Extract Entries, Generate Image Embeddings
        model.image_search = image_search.setup(
            config.content_type.image, search_config=config.search_type.image, regenerate=regenerate
@ -135,6 +140,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,

    # Initialize External Plugin Search
    if (t == None or t in state.SearchType) and config.content_type.plugins:
+        logger.info("🔌 Setting up search for plugins")
        model.plugin_search = {}
        for plugin_type, plugin_config in config.content_type.plugins.items():
            model.plugin_search[plugin_type] = text_search.setup(
@ -159,6 +165,7 @@ def configure_processor(processor_config: ProcessorConfig):

    # Initialize Conversation Processor
    if processor_config.conversation:
+        logger.info("💬 Setting up conversation processor")
        processor.conversation = configure_conversation_processor(processor_config.conversation)

    return processor
@ -172,7 +179,7 @@ def configure_conversation_processor(conversation_processor_config):
        # Load Metadata Logs from Conversation Logfile
        with conversation_logfile.open("r") as f:
            conversation_processor.meta_log = json.load(f)
-        logger.info("Conversation logs loaded from disk.")
+        logger.debug(f"Loaded conversation logs from {conversation_logfile}")
    else:
        # Initialize Conversation Logs
        conversation_processor.meta_log = {}
--- a/src/khoj/main.py
+++ b/src/khoj/main.py
@ -52,10 +52,8 @@ def run():

    # Set Logging Level
    if args.verbose == 0:
-        logger.setLevel(logging.WARN)
-    elif args.verbose == 1:
        logger.setLevel(logging.INFO)
-    elif args.verbose >= 2:
+    elif args.verbose >= 1:
        logger.setLevel(logging.DEBUG)

    # Set Log File
@ -63,7 +61,7 @@ def run():
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

-    logger.info("Starting Khoj...")
+    logger.info("🌘 Starting Khoj")

    if args.no_gui:
        # Setup task scheduler
@ -122,7 +120,6 @@ def run():


 def sigint_handler(*args):
-    print("\nShutting down Khoj...")
    QtWidgets.QApplication.quit()


@ -135,10 +132,12 @@ def set_state(args):


 def start_server(app, host=None, port=None, socket=None):
+    logger.info("🌖 Khoj is ready to use")
    if socket:
        uvicorn.run(app, proxy_headers=True, uds=socket, log_level="debug", use_colors=True, log_config=None)
    else:
        uvicorn.run(app, host=host, port=port, log_level="debug", use_colors=True, log_config=None)
+    logger.info("🌒 Stopping Khoj")


 def poll_task_scheduler():
--- a/src/khoj/processor/jsonl/jsonl_to_jsonl.py
+++ b/src/khoj/processor/jsonl/jsonl_to_jsonl.py
@ -82,7 +82,7 @@ class JsonlToJsonl(TextToJsonl):
        if any(files_with_non_jsonl_extensions):
            print(f"[Warning] There maybe non jsonl files in the input set: {files_with_non_jsonl_extensions}")

-        logger.info(f"Processing files: {all_jsonl_files}")
+        logger.debug(f"Processing files: {all_jsonl_files}")

        return all_jsonl_files

--- a/src/khoj/processor/ledger/beancount_to_jsonl.py
+++ b/src/khoj/processor/ledger/beancount_to_jsonl.py
@ -88,7 +88,7 @@ class BeancountToJsonl(TextToJsonl):
        if any(files_with_non_beancount_extensions):
            print(f"[Warning] There maybe non beancount files in the input set: {files_with_non_beancount_extensions}")

-        logger.info(f"Processing files: {all_beancount_files}")
+        logger.debug(f"Processing files: {all_beancount_files}")

        return all_beancount_files

@ -123,7 +123,7 @@ class BeancountToJsonl(TextToJsonl):
                Entry(compiled=parsed_entry, raw=parsed_entry, file=f"{transaction_to_file_map[parsed_entry]}")
            )

-        logger.info(f"Converted {len(parsed_entries)} transactions to dictionaries")
+        logger.debug(f"Converted {len(parsed_entries)} transactions to dictionaries")

        return entries

--- a/src/khoj/processor/markdown/markdown_to_jsonl.py
+++ b/src/khoj/processor/markdown/markdown_to_jsonl.py
@ -92,7 +92,7 @@ class MarkdownToJsonl(TextToJsonl):
                f"[Warning] There maybe non markdown-mode files in the input set: {files_with_non_markdown_extensions}"
            )

-        logger.info(f"Processing files: {all_markdown_files}")
+        logger.debug(f"Processing files: {all_markdown_files}")

        return all_markdown_files

@ -126,7 +126,7 @@ class MarkdownToJsonl(TextToJsonl):
        for parsed_entry in parsed_entries:
            entries.append(Entry(compiled=parsed_entry, raw=parsed_entry, file=f"{entry_to_file_map[parsed_entry]}"))

-        logger.info(f"Converted {len(parsed_entries)} markdown entries to dictionaries")
+        logger.debug(f"Converted {len(parsed_entries)} markdown entries to dictionaries")

        return entries

--- a/src/khoj/processor/org_mode/org_to_jsonl.py
+++ b/src/khoj/processor/org_mode/org_to_jsonl.py
@ -86,7 +86,7 @@ class OrgToJsonl(TextToJsonl):
        if any(files_with_non_org_extensions):
            logger.warn(f"There maybe non org-mode files in the input set: {files_with_non_org_extensions}")

-        logger.info(f"Processing files: {all_org_files}")
+        logger.debug(f"Processing files: {all_org_files}")

        return all_org_files

--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@ -56,7 +56,7 @@ async def set_config_data(updated_config: FullConfig):
 def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Optional[bool] = False):
    results: List[SearchResponse] = []
    if q is None or q == "":
-        logger.info(f"No query param (q) passed in API call to initiate search")
+        logger.warn(f"No query param (q) passed in API call to initiate search")
        return results

    # initialize variables
@ -66,7 +66,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Opti
    # return cached results, if available
    query_cache_key = f"{user_query}-{n}-{t}-{r}"
    if query_cache_key in state.query_cache:
-        logger.info(f"Return response from query cache")
+        logger.debug(f"Return response from query cache")
        return state.query_cache[query_cache_key]

    if (t == SearchType.Org or t == None) and state.model.orgmode_search:
@ -151,7 +151,7 @@ def update(t: Optional[SearchType] = None, force: Optional[bool] = False):
        logger.error(e)
        raise HTTPException(status_code=500, detail=str(e))
    else:
-        logger.info("Search Index updated via API call")
+        logger.info("📬 Search index updated via API")

    try:
        state.processor_config = configure_processor(state.config.processor)
@ -159,6 +159,6 @@ def update(t: Optional[SearchType] = None, force: Optional[bool] = False):
        logger.error(e)
        raise HTTPException(status_code=500, detail=str(e))
    else:
-        logger.info("Processor reconfigured via API call")
+        logger.info("📬 Processor reconfigured via API")

    return {"status": "ok", "message": "khoj reloaded"}
--- a/src/khoj/routers/api_beta.py
+++ b/src/khoj/routers/api_beta.py
@ -152,7 +152,6 @@ def save_chat_session():
        conversation_log["session"].append(session)
    else:
        conversation_log["session"] = [session]
-    logger.info("Added new chat session to conversation logs")

    # Save Conversation Metadata Logs to Disk
    conversation_logfile = resolve_absolute_path(state.processor_config.conversation.conversation_logfile)
@ -161,4 +160,4 @@ def save_chat_session():
        json.dump(conversation_log, logfile)

    state.processor_config.conversation.chat_session = None
-    logger.info("Saved updated conversation logs to disk.")
+    logger.info("📩 Saved current chat session to conversation logs")
--- a/src/khoj/search_filter/file_filter.py
+++ b/src/khoj/search_filter/file_filter.py
@ -49,7 +49,7 @@ class FileFilter(BaseFilter):
        query = re.sub(self.file_filter_regex, "", query).strip()
        cache_key = tuple(files_to_search)
        if cache_key in self.cache:
-            logger.info(f"Return file filter results from cache")
+            logger.debug(f"Return file filter results from cache")
            included_entry_indices = self.cache[cache_key]
            return query, included_entry_indices

--- a/src/khoj/search_filter/word_filter.py
+++ b/src/khoj/search_filter/word_filter.py
@ -58,7 +58,7 @@ class WordFilter(BaseFilter):
        # Return item from cache if exists
        cache_key = tuple(sorted(required_words)), tuple(sorted(blocked_words))
        if cache_key in self.cache:
-            logger.info(f"Return word filter results from cache")
+            logger.debug(f"Return word filter results from cache")
            included_entry_indices = self.cache[cache_key]
            return query, included_entry_indices

--- a/src/khoj/search_type/image_search.py
+++ b/src/khoj/search_type/image_search.py
@ -3,7 +3,6 @@ import glob
 import pathlib
 import copy
 import shutil
-import time
 import logging
 from typing import List

@ -50,9 +49,9 @@ def extract_entries(image_directories):
        image_names.extend(list(image_directory.glob("*.jpg")))
        image_names.extend(list(image_directory.glob("*.jpeg")))

-    if logger.level >= logging.INFO:
+    if logger.level >= logging.DEBUG:
        image_directory_names = ", ".join([str(image_directory) for image_directory in image_directories])
-        logger.info(f"Found {len(image_names)} images in {image_directory_names}")
+        logger.debug(f"Found {len(image_names)} images in {image_directory_names}")
    return sorted(image_names)


@ -71,7 +70,7 @@ def compute_image_embeddings(image_names, encoder, embeddings_file, batch_size=5
    # Load pre-computed image embeddings from file if exists
    if resolve_absolute_path(embeddings_file).exists() and not regenerate:
        image_embeddings = torch.load(embeddings_file)
-        logger.info(f"Loaded {len(image_embeddings)} image embeddings from {embeddings_file}")
+        logger.debug(f"Loaded {len(image_embeddings)} image embeddings from {embeddings_file}")
    # Else compute the image embeddings from scratch, which can take a while
    else:
        image_embeddings = []
@ -89,7 +88,7 @@ def compute_image_embeddings(image_names, encoder, embeddings_file, batch_size=5

        # Save computed image embeddings to file
        torch.save(image_embeddings, embeddings_file)
-        logger.info(f"Saved computed embeddings to {embeddings_file}")
+        logger.info(f"📩 Saved computed image embeddings to {embeddings_file}")

    return image_embeddings

@ -102,7 +101,7 @@ def compute_metadata_embeddings(
    # Load pre-computed image metadata embedding file if exists
    if use_xmp_metadata and resolve_absolute_path(f"{embeddings_file}_metadata").exists() and not regenerate:
        image_metadata_embeddings = torch.load(f"{embeddings_file}_metadata")
-        logger.info(f"Loaded pre-computed embeddings from {embeddings_file}_metadata")
+        logger.debug(f"Loaded image metadata embeddings from {embeddings_file}_metadata")

    # Else compute the image metadata embeddings from scratch, which can take a while
    if use_xmp_metadata and image_metadata_embeddings is None:
@ -121,7 +120,7 @@ def compute_metadata_embeddings(
                )
                continue
        torch.save(image_metadata_embeddings, f"{embeddings_file}_metadata")
-        logger.info(f"Saved computed metadata embeddings to {embeddings_file}_metadata")
+        logger.info(f"📩 Saved computed image metadata embeddings to {embeddings_file}_metadata")

    return image_metadata_embeddings

@ -149,12 +148,12 @@ def query(raw_query, count, model: ImageSearchModel):
        query_imagepath = resolve_absolute_path(pathlib.Path(raw_query[5:]), strict=True)
        query = copy.deepcopy(Image.open(query_imagepath))
        query.thumbnail((640, query.height))  # scale down image for faster processing
-        logger.info(f"Find Images by Image: {query_imagepath}")
+        logger.info(f"🔎 Find Images by Image: {query_imagepath}")
    else:
        # Truncate words in query to stay below max_tokens supported by ML model
        max_words = 20
        query = " ".join(raw_query.split()[:max_words])
-        logger.info(f"Find Images by Text: {query}")
+        logger.info(f"🔎 Find Images by Text: {query}")

    # Now we encode the query (which can either be an image or a text string)
    with timer("Query Encode Time", logger):
--- a/src/khoj/search_type/text_search.py
+++ b/src/khoj/search_type/text_search.py
@ -1,7 +1,6 @@
 # Standard Packages
 import logging
 from pathlib import Path
-import time
 from typing import List, Tuple, Type

 # External Packages
@ -68,7 +67,7 @@ def compute_embeddings(
    # Load pre-computed embeddings from file if exists and update them if required
    if embeddings_file.exists() and not regenerate:
        corpus_embeddings = torch.load(get_absolute_path(embeddings_file), map_location=state.device)
-        logger.info(f"Loaded embeddings from {embeddings_file}")
+        logger.debug(f"Loaded {len(corpus_embeddings)} text embeddings from {embeddings_file}")

        # Encode any new entries in the corpus and update corpus embeddings
        new_entries = [entry.compiled for id, entry in entries_with_ids if id == -1]
@ -95,7 +94,7 @@ def compute_embeddings(
    if new_entries:
        corpus_embeddings = util.normalize_embeddings(corpus_embeddings)
        torch.save(corpus_embeddings, embeddings_file)
-        logger.info(f"Computed embeddings and saved them to {embeddings_file}")
+        logger.info(f"📩 Saved computed text embeddings to {embeddings_file}")

    return corpus_embeddings

--- a/src/khoj/utils/helpers.py
+++ b/src/khoj/utils/helpers.py
@ -58,16 +58,20 @@ def merge_dicts(priority_dict: dict, default_dict: dict):
 def load_model(model_name: str, model_type, model_dir=None, device: str = None) -> Union[BaseEncoder, CrossEncoder]:
    "Load model from disk or huggingface"
    # Construct model path
+    logger = logging.getLogger(__name__)
    model_path = join(model_dir, model_name.replace("/", "_")) if model_dir is not None else None

    # Load model from model_path if it exists there
    model_type_class = get_class_by_name(model_type) if isinstance(model_type, str) else model_type
    if model_path is not None and resolve_absolute_path(model_path).exists():
+        logger.debug(f"Loading {model_name} model from disk")
        model = model_type_class(get_absolute_path(model_path), device=device)
    # Else load the model from the model_name
    else:
+        logger.info(f"🤖 Downloading {model_name} model from web")
        model = model_type_class(model_name, device=device)
        if model_path is not None:
+            logger.info(f"📩 Saved {model_name} model to disk")
            model.save(model_path)

    return model
--- a/src/khoj/utils/jsonl.py
+++ b/src/khoj/utils/jsonl.py
@ -31,7 +31,7 @@ def load_jsonl(input_path):
    jsonl_file.close()

    # Log JSONL entries loaded
-    logger.info(f"Loaded {len(data)} records from {input_path}")
+    logger.debug(f"Loaded {len(data)} records from {input_path}")

    return data

@ -44,7 +44,7 @@ def dump_jsonl(jsonl_data, output_path):
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(jsonl_data)

-    logger.info(f"Wrote jsonl data to {output_path}")
+    logger.debug(f"Wrote jsonl data to {output_path}")


 def compress_jsonl_data(jsonl_data, output_path):
@ -54,4 +54,4 @@ def compress_jsonl_data(jsonl_data, output_path):
    with gzip.open(output_path, "wt", encoding="utf-8") as gzip_file:
        gzip_file.write(jsonl_data)

-    logger.info(f"Wrote jsonl data to gzip compressed jsonl at {output_path}")
+    logger.debug(f"Wrote jsonl data to gzip compressed jsonl at {output_path}")