From a08b15235851432fbf1aa0f6e31f18b7a8ac7538 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 6 Nov 2023 19:26:54 -0800 Subject: [PATCH] Improve log messages in text_entries and memory leak unit test --- src/khoj/processor/text_to_entries.py | 2 +- tests/test_helpers.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/khoj/processor/text_to_entries.py b/src/khoj/processor/text_to_entries.py index 501ef5d3..4661fd9b 100644 --- a/src/khoj/processor/text_to_entries.py +++ b/src/khoj/processor/text_to_entries.py @@ -93,7 +93,7 @@ class TextToEntries(ABC): num_deleted_entries = 0 if regenerate: - with timer("Prepared dataset for regeneration in", logger): + with timer("Cleared existing dataset for regeneration in", logger): logger.debug(f"Deleting all entries for file type {file_type}") num_deleted_entries = EntryAdapters.delete_all_entries(user, file_type) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 30499049..fdd29b02 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -64,6 +64,7 @@ def test_encode_docs_memory_leak(): batch_size = 20 embeddings_model = EmbeddingsModel() memory_usage_trend = [] + device = f"{helpers.get_device()}".upper() # Act # Encode random strings repeatedly and record memory usage trend @@ -76,8 +77,9 @@ def test_encode_docs_memory_leak(): # Calculate slope of line fitting memory usage history memory_usage_trend = np.array(memory_usage_trend) slope, _, _, _, _ = linregress(np.arange(len(memory_usage_trend)), memory_usage_trend) + print(f"Memory usage increased at ~{slope:.2f} MB per iteration on {device}") # Assert # If slope is positive memory utilization is increasing # Positive threshold of 2, from observing memory usage trend on MPS vs CPU device - assert slope < 2, f"Memory usage increasing at ~{slope:.2f} MB per iteration" + assert slope < 2, f"Memory leak suspected on {device}. Memory usage increased at ~{slope:.2f} MB per iteration"