From e00bb53336b77f289da203f1e4bec7bbb25b1dd8 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sat, 10 Sep 2022 12:16:53 +0300 Subject: [PATCH] Init word filter dictionary with default value as set to simplify code --- src/search_filter/word_filter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/search_filter/word_filter.py b/src/search_filter/word_filter.py index c7c5d059..a177ba38 100644 --- a/src/search_filter/word_filter.py +++ b/src/search_filter/word_filter.py @@ -3,6 +3,7 @@ import re import time import pickle import logging +from collections import defaultdict # Internal Packages from src.search_filter.base_filter import BaseFilter @@ -37,19 +38,18 @@ class WordFilter(BaseFilter): start = time.time() self.cache = {} # Clear cache on (re-)generating entries_by_word_set entry_splitter = r',|\.| |\]|\[\(|\)|\{|\}|\t|\n|\:' + self.word_to_entry_index = defaultdict(set) # Create map of words to entries they exist in for entry_index, entry in enumerate(entries): for word in re.split(entry_splitter, entry[self.entry_key].lower()): if word == '': continue - if word not in self.word_to_entry_index: - self.word_to_entry_index[word] = set() self.word_to_entry_index[word].add(entry_index) with self.filter_file.open('wb') as f: pickle.dump(self.word_to_entry_index, f) end = time.time() - logger.debug(f"Index {self.search_type} for word filter to {self.filter_file}: {end - start} seconds") + logger.debug(f"Indexed {len(self.word_to_entry_index)} words of {self.search_type} type for word filter to {self.filter_file}: {end - start} seconds") return self.word_to_entry_index