From 1ae40163a9a5b394413239ad09da09c81b72766b Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Wed, 1 Mar 2023 15:22:14 -0600
Subject: [PATCH] Show user friendly information logs by default for context

- Use emojis to make info logs easier to read
- Inform when khoj is ready to use
- Provide information on what khoj is doing while starting up
- Inform when content/search types and processors are setup
- Inform when models are being loaded from the web as this step can
  take time
- Convert all other info logs to be only shown in verbose mode
---
 src/khoj/configure.py                           | 11 +++++++++--
 src/khoj/main.py                                |  9 ++++-----
 src/khoj/processor/jsonl/jsonl_to_jsonl.py      |  2 +-
 src/khoj/processor/ledger/beancount_to_jsonl.py |  4 ++--
 .../processor/markdown/markdown_to_jsonl.py     |  4 ++--
 src/khoj/processor/org_mode/org_to_jsonl.py     |  2 +-
 src/khoj/routers/api.py                         |  8 ++++----
 src/khoj/routers/api_beta.py                    |  3 +--
 src/khoj/search_filter/file_filter.py           |  2 +-
 src/khoj/search_filter/word_filter.py           |  2 +-
 src/khoj/search_type/image_search.py            | 17 ++++++++---------
 src/khoj/search_type/text_search.py             |  5 ++---
 src/khoj/utils/helpers.py                       |  4 ++++
 src/khoj/utils/jsonl.py                         |  6 +++---
 14 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/src/khoj/configure.py b/src/khoj/configure.py
index 27067a4b..09c63781 100644
--- a/src/khoj/configure.py
+++ b/src/khoj/configure.py
@@ -66,7 +66,7 @@ def update_search_index():
     state.search_index_lock.acquire()
     state.model = configure_search(state.model, state.config, regenerate=False)
     state.search_index_lock.release()
-    logger.info("Search Index updated via Scheduler")
+    logger.info("📬 Search index updated via Scheduler")
 
 
 def configure_search_types(config: FullConfig):
@@ -84,6 +84,7 @@ def configure_search_types(config: FullConfig):
 def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, t: state.SearchType = None):
     # Initialize Org Notes Search
     if (t == state.SearchType.Org or t == None) and config.content_type.org:
+        logger.info("🦄 Setting up search for orgmode notes")
         # Extract Entries, Generate Notes Embeddings
         model.orgmode_search = text_search.setup(
             OrgToJsonl,
@@ -95,6 +96,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
 
     # Initialize Org Music Search
     if (t == state.SearchType.Music or t == None) and config.content_type.music:
+        logger.info("🎺 Setting up search for org-music")
         # Extract Entries, Generate Music Embeddings
         model.music_search = text_search.setup(
             OrgToJsonl,
@@ -106,6 +108,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
 
     # Initialize Markdown Search
     if (t == state.SearchType.Markdown or t == None) and config.content_type.markdown:
+        logger.info("💎 Setting up search for markdown notes")
         # Extract Entries, Generate Markdown Embeddings
         model.markdown_search = text_search.setup(
             MarkdownToJsonl,
@@ -117,6 +120,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
 
     # Initialize Ledger Search
     if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger:
+        logger.info("💸 Setting up search for ledger")
         # Extract Entries, Generate Ledger Embeddings
         model.ledger_search = text_search.setup(
             BeancountToJsonl,
@@ -128,6 +132,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
 
     # Initialize Image Search
     if (t == state.SearchType.Image or t == None) and config.content_type.image:
+        logger.info("🌄 Setting up search for images")
         # Extract Entries, Generate Image Embeddings
         model.image_search = image_search.setup(
             config.content_type.image, search_config=config.search_type.image, regenerate=regenerate
@@ -135,6 +140,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
 
     # Initialize External Plugin Search
     if (t == None or t in state.SearchType) and config.content_type.plugins:
+        logger.info("🔌 Setting up search for plugins")
         model.plugin_search = {}
         for plugin_type, plugin_config in config.content_type.plugins.items():
             model.plugin_search[plugin_type] = text_search.setup(
@@ -159,6 +165,7 @@ def configure_processor(processor_config: ProcessorConfig):
 
     # Initialize Conversation Processor
     if processor_config.conversation:
+        logger.info("💬 Setting up conversation processor")
         processor.conversation = configure_conversation_processor(processor_config.conversation)
 
     return processor
@@ -172,7 +179,7 @@ def configure_conversation_processor(conversation_processor_config):
         # Load Metadata Logs from Conversation Logfile
         with conversation_logfile.open("r") as f:
             conversation_processor.meta_log = json.load(f)
-        logger.info("Conversation logs loaded from disk.")
+        logger.debug(f"Loaded conversation logs from {conversation_logfile}")
     else:
         # Initialize Conversation Logs
         conversation_processor.meta_log = {}
diff --git a/src/khoj/main.py b/src/khoj/main.py
index e9cfec78..a54b2b4d 100644
--- a/src/khoj/main.py
+++ b/src/khoj/main.py
@@ -52,10 +52,8 @@ def run():
 
     # Set Logging Level
     if args.verbose == 0:
-        logger.setLevel(logging.WARN)
-    elif args.verbose == 1:
         logger.setLevel(logging.INFO)
-    elif args.verbose >= 2:
+    elif args.verbose >= 1:
         logger.setLevel(logging.DEBUG)
 
     # Set Log File
@@ -63,7 +61,7 @@ def run():
     fh.setLevel(logging.DEBUG)
     logger.addHandler(fh)
 
-    logger.info("Starting Khoj...")
+    logger.info("🌘 Starting Khoj")
 
     if args.no_gui:
         # Setup task scheduler
@@ -122,7 +120,6 @@ def run():
 
 
 def sigint_handler(*args):
-    print("\nShutting down Khoj...")
     QtWidgets.QApplication.quit()
 
 
@@ -135,10 +132,12 @@ def set_state(args):
 
 
 def start_server(app, host=None, port=None, socket=None):
+    logger.info("🌖 Khoj is ready to use")
     if socket:
         uvicorn.run(app, proxy_headers=True, uds=socket, log_level="debug", use_colors=True, log_config=None)
     else:
         uvicorn.run(app, host=host, port=port, log_level="debug", use_colors=True, log_config=None)
+    logger.info("🌒 Stopping Khoj")
 
 
 def poll_task_scheduler():
diff --git a/src/khoj/processor/jsonl/jsonl_to_jsonl.py b/src/khoj/processor/jsonl/jsonl_to_jsonl.py
index 0268a659..83c82374 100644
--- a/src/khoj/processor/jsonl/jsonl_to_jsonl.py
+++ b/src/khoj/processor/jsonl/jsonl_to_jsonl.py
@@ -82,7 +82,7 @@ class JsonlToJsonl(TextToJsonl):
         if any(files_with_non_jsonl_extensions):
             print(f"[Warning] There maybe non jsonl files in the input set: {files_with_non_jsonl_extensions}")
 
-        logger.info(f"Processing files: {all_jsonl_files}")
+        logger.debug(f"Processing files: {all_jsonl_files}")
 
         return all_jsonl_files
 
diff --git a/src/khoj/processor/ledger/beancount_to_jsonl.py b/src/khoj/processor/ledger/beancount_to_jsonl.py
index 006d0d70..49c43301 100644
--- a/src/khoj/processor/ledger/beancount_to_jsonl.py
+++ b/src/khoj/processor/ledger/beancount_to_jsonl.py
@@ -88,7 +88,7 @@ class BeancountToJsonl(TextToJsonl):
         if any(files_with_non_beancount_extensions):
             print(f"[Warning] There maybe non beancount files in the input set: {files_with_non_beancount_extensions}")
 
-        logger.info(f"Processing files: {all_beancount_files}")
+        logger.debug(f"Processing files: {all_beancount_files}")
 
         return all_beancount_files
 
@@ -123,7 +123,7 @@ class BeancountToJsonl(TextToJsonl):
                 Entry(compiled=parsed_entry, raw=parsed_entry, file=f"{transaction_to_file_map[parsed_entry]}")
             )
 
-        logger.info(f"Converted {len(parsed_entries)} transactions to dictionaries")
+        logger.debug(f"Converted {len(parsed_entries)} transactions to dictionaries")
 
         return entries
 
diff --git a/src/khoj/processor/markdown/markdown_to_jsonl.py b/src/khoj/processor/markdown/markdown_to_jsonl.py
index c661c1aa..045eea65 100644
--- a/src/khoj/processor/markdown/markdown_to_jsonl.py
+++ b/src/khoj/processor/markdown/markdown_to_jsonl.py
@@ -92,7 +92,7 @@ class MarkdownToJsonl(TextToJsonl):
                 f"[Warning] There maybe non markdown-mode files in the input set: {files_with_non_markdown_extensions}"
             )
 
-        logger.info(f"Processing files: {all_markdown_files}")
+        logger.debug(f"Processing files: {all_markdown_files}")
 
         return all_markdown_files
 
@@ -126,7 +126,7 @@ class MarkdownToJsonl(TextToJsonl):
         for parsed_entry in parsed_entries:
             entries.append(Entry(compiled=parsed_entry, raw=parsed_entry, file=f"{entry_to_file_map[parsed_entry]}"))
 
-        logger.info(f"Converted {len(parsed_entries)} markdown entries to dictionaries")
+        logger.debug(f"Converted {len(parsed_entries)} markdown entries to dictionaries")
 
         return entries
 
diff --git a/src/khoj/processor/org_mode/org_to_jsonl.py b/src/khoj/processor/org_mode/org_to_jsonl.py
index 05eff771..0950a089 100644
--- a/src/khoj/processor/org_mode/org_to_jsonl.py
+++ b/src/khoj/processor/org_mode/org_to_jsonl.py
@@ -86,7 +86,7 @@ class OrgToJsonl(TextToJsonl):
         if any(files_with_non_org_extensions):
             logger.warn(f"There maybe non org-mode files in the input set: {files_with_non_org_extensions}")
 
-        logger.info(f"Processing files: {all_org_files}")
+        logger.debug(f"Processing files: {all_org_files}")
 
         return all_org_files
 
diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index 033ea220..2d5329ea 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -56,7 +56,7 @@ async def set_config_data(updated_config: FullConfig):
 def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Optional[bool] = False):
     results: List[SearchResponse] = []
     if q is None or q == "":
-        logger.info(f"No query param (q) passed in API call to initiate search")
+        logger.warn(f"No query param (q) passed in API call to initiate search")
         return results
 
     # initialize variables
@@ -66,7 +66,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Opti
     # return cached results, if available
     query_cache_key = f"{user_query}-{n}-{t}-{r}"
     if query_cache_key in state.query_cache:
-        logger.info(f"Return response from query cache")
+        logger.debug(f"Return response from query cache")
         return state.query_cache[query_cache_key]
 
     if (t == SearchType.Org or t == None) and state.model.orgmode_search:
@@ -151,7 +151,7 @@ def update(t: Optional[SearchType] = None, force: Optional[bool] = False):
         logger.error(e)
         raise HTTPException(status_code=500, detail=str(e))
     else:
-        logger.info("Search Index updated via API call")
+        logger.info("📬 Search index updated via API")
 
     try:
         state.processor_config = configure_processor(state.config.processor)
@@ -159,6 +159,6 @@ def update(t: Optional[SearchType] = None, force: Optional[bool] = False):
         logger.error(e)
         raise HTTPException(status_code=500, detail=str(e))
     else:
-        logger.info("Processor reconfigured via API call")
+        logger.info("📬 Processor reconfigured via API")
 
     return {"status": "ok", "message": "khoj reloaded"}
diff --git a/src/khoj/routers/api_beta.py b/src/khoj/routers/api_beta.py
index eefd09fa..2f523917 100644
--- a/src/khoj/routers/api_beta.py
+++ b/src/khoj/routers/api_beta.py
@@ -152,7 +152,6 @@ def save_chat_session():
         conversation_log["session"].append(session)
     else:
         conversation_log["session"] = [session]
-    logger.info("Added new chat session to conversation logs")
 
     # Save Conversation Metadata Logs to Disk
     conversation_logfile = resolve_absolute_path(state.processor_config.conversation.conversation_logfile)
@@ -161,4 +160,4 @@ def save_chat_session():
         json.dump(conversation_log, logfile)
 
     state.processor_config.conversation.chat_session = None
-    logger.info("Saved updated conversation logs to disk.")
+    logger.info("📩 Saved current chat session to conversation logs")
diff --git a/src/khoj/search_filter/file_filter.py b/src/khoj/search_filter/file_filter.py
index 39bc10ba..a0e9d790 100644
--- a/src/khoj/search_filter/file_filter.py
+++ b/src/khoj/search_filter/file_filter.py
@@ -49,7 +49,7 @@ class FileFilter(BaseFilter):
         query = re.sub(self.file_filter_regex, "", query).strip()
         cache_key = tuple(files_to_search)
         if cache_key in self.cache:
-            logger.info(f"Return file filter results from cache")
+            logger.debug(f"Return file filter results from cache")
             included_entry_indices = self.cache[cache_key]
             return query, included_entry_indices
 
diff --git a/src/khoj/search_filter/word_filter.py b/src/khoj/search_filter/word_filter.py
index 9aab259a..0c868b0c 100644
--- a/src/khoj/search_filter/word_filter.py
+++ b/src/khoj/search_filter/word_filter.py
@@ -58,7 +58,7 @@ class WordFilter(BaseFilter):
         # Return item from cache if exists
         cache_key = tuple(sorted(required_words)), tuple(sorted(blocked_words))
         if cache_key in self.cache:
-            logger.info(f"Return word filter results from cache")
+            logger.debug(f"Return word filter results from cache")
             included_entry_indices = self.cache[cache_key]
             return query, included_entry_indices
 
diff --git a/src/khoj/search_type/image_search.py b/src/khoj/search_type/image_search.py
index 34264189..024dc79d 100644
--- a/src/khoj/search_type/image_search.py
+++ b/src/khoj/search_type/image_search.py
@@ -3,7 +3,6 @@ import glob
 import pathlib
 import copy
 import shutil
-import time
 import logging
 from typing import List
 
@@ -50,9 +49,9 @@ def extract_entries(image_directories):
         image_names.extend(list(image_directory.glob("*.jpg")))
         image_names.extend(list(image_directory.glob("*.jpeg")))
 
-    if logger.level >= logging.INFO:
+    if logger.level >= logging.DEBUG:
         image_directory_names = ", ".join([str(image_directory) for image_directory in image_directories])
-        logger.info(f"Found {len(image_names)} images in {image_directory_names}")
+        logger.debug(f"Found {len(image_names)} images in {image_directory_names}")
     return sorted(image_names)
 
 
@@ -71,7 +70,7 @@ def compute_image_embeddings(image_names, encoder, embeddings_file, batch_size=5
     # Load pre-computed image embeddings from file if exists
     if resolve_absolute_path(embeddings_file).exists() and not regenerate:
         image_embeddings = torch.load(embeddings_file)
-        logger.info(f"Loaded {len(image_embeddings)} image embeddings from {embeddings_file}")
+        logger.debug(f"Loaded {len(image_embeddings)} image embeddings from {embeddings_file}")
     # Else compute the image embeddings from scratch, which can take a while
     else:
         image_embeddings = []
@@ -89,7 +88,7 @@ def compute_image_embeddings(image_names, encoder, embeddings_file, batch_size=5
 
         # Save computed image embeddings to file
         torch.save(image_embeddings, embeddings_file)
-        logger.info(f"Saved computed embeddings to {embeddings_file}")
+        logger.info(f"📩 Saved computed image embeddings to {embeddings_file}")
 
     return image_embeddings
 
@@ -102,7 +101,7 @@ def compute_metadata_embeddings(
     # Load pre-computed image metadata embedding file if exists
     if use_xmp_metadata and resolve_absolute_path(f"{embeddings_file}_metadata").exists() and not regenerate:
         image_metadata_embeddings = torch.load(f"{embeddings_file}_metadata")
-        logger.info(f"Loaded pre-computed embeddings from {embeddings_file}_metadata")
+        logger.debug(f"Loaded image metadata embeddings from {embeddings_file}_metadata")
 
     # Else compute the image metadata embeddings from scratch, which can take a while
     if use_xmp_metadata and image_metadata_embeddings is None:
@@ -121,7 +120,7 @@ def compute_metadata_embeddings(
                 )
                 continue
         torch.save(image_metadata_embeddings, f"{embeddings_file}_metadata")
-        logger.info(f"Saved computed metadata embeddings to {embeddings_file}_metadata")
+        logger.info(f"📩 Saved computed image metadata embeddings to {embeddings_file}_metadata")
 
     return image_metadata_embeddings
 
@@ -149,12 +148,12 @@ def query(raw_query, count, model: ImageSearchModel):
         query_imagepath = resolve_absolute_path(pathlib.Path(raw_query[5:]), strict=True)
         query = copy.deepcopy(Image.open(query_imagepath))
         query.thumbnail((640, query.height))  # scale down image for faster processing
-        logger.info(f"Find Images by Image: {query_imagepath}")
+        logger.info(f"🔎 Find Images by Image: {query_imagepath}")
     else:
         # Truncate words in query to stay below max_tokens supported by ML model
         max_words = 20
         query = " ".join(raw_query.split()[:max_words])
-        logger.info(f"Find Images by Text: {query}")
+        logger.info(f"🔎 Find Images by Text: {query}")
 
     # Now we encode the query (which can either be an image or a text string)
     with timer("Query Encode Time", logger):
diff --git a/src/khoj/search_type/text_search.py b/src/khoj/search_type/text_search.py
index 882bcb64..af805c42 100644
--- a/src/khoj/search_type/text_search.py
+++ b/src/khoj/search_type/text_search.py
@@ -1,7 +1,6 @@
 # Standard Packages
 import logging
 from pathlib import Path
-import time
 from typing import List, Tuple, Type
 
 # External Packages
@@ -68,7 +67,7 @@ def compute_embeddings(
     # Load pre-computed embeddings from file if exists and update them if required
     if embeddings_file.exists() and not regenerate:
         corpus_embeddings = torch.load(get_absolute_path(embeddings_file), map_location=state.device)
-        logger.info(f"Loaded embeddings from {embeddings_file}")
+        logger.debug(f"Loaded {len(corpus_embeddings)} text embeddings from {embeddings_file}")
 
         # Encode any new entries in the corpus and update corpus embeddings
         new_entries = [entry.compiled for id, entry in entries_with_ids if id == -1]
@@ -95,7 +94,7 @@ def compute_embeddings(
     if new_entries:
         corpus_embeddings = util.normalize_embeddings(corpus_embeddings)
         torch.save(corpus_embeddings, embeddings_file)
-        logger.info(f"Computed embeddings and saved them to {embeddings_file}")
+        logger.info(f"📩 Saved computed text embeddings to {embeddings_file}")
 
     return corpus_embeddings
 
diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py
index d2f87126..521b7b65 100644
--- a/src/khoj/utils/helpers.py
+++ b/src/khoj/utils/helpers.py
@@ -58,16 +58,20 @@ def merge_dicts(priority_dict: dict, default_dict: dict):
 def load_model(model_name: str, model_type, model_dir=None, device: str = None) -> Union[BaseEncoder, CrossEncoder]:
     "Load model from disk or huggingface"
     # Construct model path
+    logger = logging.getLogger(__name__)
     model_path = join(model_dir, model_name.replace("/", "_")) if model_dir is not None else None
 
     # Load model from model_path if it exists there
     model_type_class = get_class_by_name(model_type) if isinstance(model_type, str) else model_type
     if model_path is not None and resolve_absolute_path(model_path).exists():
+        logger.debug(f"Loading {model_name} model from disk")
         model = model_type_class(get_absolute_path(model_path), device=device)
     # Else load the model from the model_name
     else:
+        logger.info(f"🤖 Downloading {model_name} model from web")
         model = model_type_class(model_name, device=device)
         if model_path is not None:
+            logger.info(f"📩 Saved {model_name} model to disk")
             model.save(model_path)
 
     return model
diff --git a/src/khoj/utils/jsonl.py b/src/khoj/utils/jsonl.py
index a98910fb..c9576810 100644
--- a/src/khoj/utils/jsonl.py
+++ b/src/khoj/utils/jsonl.py
@@ -31,7 +31,7 @@ def load_jsonl(input_path):
     jsonl_file.close()
 
     # Log JSONL entries loaded
-    logger.info(f"Loaded {len(data)} records from {input_path}")
+    logger.debug(f"Loaded {len(data)} records from {input_path}")
 
     return data
 
@@ -44,7 +44,7 @@ def dump_jsonl(jsonl_data, output_path):
     with open(output_path, "w", encoding="utf-8") as f:
         f.write(jsonl_data)
 
-    logger.info(f"Wrote jsonl data to {output_path}")
+    logger.debug(f"Wrote jsonl data to {output_path}")
 
 
 def compress_jsonl_data(jsonl_data, output_path):
@@ -54,4 +54,4 @@ def compress_jsonl_data(jsonl_data, output_path):
     with gzip.open(output_path, "wt", encoding="utf-8") as gzip_file:
         gzip_file.write(jsonl_data)
 
-    logger.info(f"Wrote jsonl data to gzip compressed jsonl at {output_path}")
+    logger.debug(f"Wrote jsonl data to gzip compressed jsonl at {output_path}")