Put entry splitting regex in explicit filter into a variable for code readability

This commit is contained in:
Debanjum Singh Solanky 2022-07-14 22:00:10 +04:00
parent 3aac3c7d52
commit c3b3e8959d

View file

@ -15,11 +15,11 @@ def explicit_filter(raw_query, entries, embeddings):
return query, entries, embeddings
# convert each entry to a set of words
# split on fullstop, comma, colon, tab, newline or any brackets
entry_splitter = r',|\.| |\]|\[\(|\)|\{|\}|\t|\n|\:'
entries_by_word_set = [set(word.lower()
for word
in re.split(
r',|\.| |\]|\[\(|\)|\{|\}|\t|\n|\:', # split on fullstop, comma or any brackets
entry[1])
in re.split(entry_splitter, entry[1])
if word != "")
for entry in entries]