From ffb8e3988e2a8a185c9032ae826b3c94aaf906ea Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sat, 3 Sep 2022 22:14:37 +0300 Subject: [PATCH] Use Python Logging Framework to Time Performance of Explicit Filter --- src/search_filter/explicit_filter.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/search_filter/explicit_filter.py b/src/search_filter/explicit_filter.py index 2cf82d70..09580e4a 100644 --- a/src/search_filter/explicit_filter.py +++ b/src/search_filter/explicit_filter.py @@ -2,6 +2,7 @@ import re import time import pickle +import logging # External Packages import torch @@ -11,6 +12,9 @@ from src.utils.helpers import resolve_absolute_path from src.utils.config import SearchType +logger = logging.getLogger(__name__) + + class ExplicitFilter: def __init__(self, filter_directory, search_type: SearchType, entry_key='raw'): self.filter_file = resolve_absolute_path(filter_directory / f"{search_type.name.lower()}_explicit_filter_entry_word_sets.pkl") @@ -24,7 +28,7 @@ class ExplicitFilter: with self.filter_file.open('rb') as f: entries_by_word_set = pickle.load(f) end = time.time() - print(f"Load {self.search_type} entries by word set from file: {end - start} seconds") + logger.debug(f"Load {self.search_type} entries by word set from file: {end - start} seconds") else: start = time.time() entry_splitter = r',|\.| |\]|\[\(|\)|\{|\}|\t|\n|\:' @@ -36,7 +40,7 @@ class ExplicitFilter: with self.filter_file.open('wb') as f: pickle.dump(entries_by_word_set, f) end = time.time() - print(f"Convert all {self.search_type} entries to word sets: {end - start} seconds") + logger.debug(f"Convert all {self.search_type} entries to word sets: {end - start} seconds") return entries_by_word_set @@ -58,7 +62,7 @@ class ExplicitFilter: required_words = set([word[1:].lower() for word in raw_query.split() if word.startswith("+")]) blocked_words = set([word[1:].lower() for word in raw_query.split() if word.startswith("-")]) end = time.time() - print(f"Time to extract required, blocked words: {end - start} seconds") + logger.debug(f"Time to extract required, blocked words: {end - start} seconds") if len(required_words) == 0 and len(blocked_words) == 0: return query, entries, embeddings @@ -82,7 +86,7 @@ class ExplicitFilter: if words_in_entry.intersection(blocked_words): entries_to_exclude.add(id) end = time.time() - print(f"Mark entries to filter: {end - start} seconds") + logger.debug(f"Mark entries to filter: {end - start} seconds") # delete entries (and their embeddings) marked for exclusion start = time.time() @@ -90,6 +94,6 @@ class ExplicitFilter: del entries[id] embeddings = torch.cat((embeddings[:id], embeddings[id+1:])) end = time.time() - print(f"Remove entries to filter from embeddings: {end - start} seconds") + logger.debug(f"Remove entries to filter from embeddings: {end - start} seconds") return query, entries, embeddings