From 9de2097182cc2b5e0951cd633fb48a3ea19d7307 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 14 Jul 2022 21:32:58 +0400 Subject: [PATCH] Fix date filter usage with multi word queries. Simplify date regex --- src/search_filter/date_filter.py | 24 ++++++++++-------------- tests/test_date_filter.py | 23 +++++++++++++---------- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/src/search_filter/date_filter.py b/src/search_filter/date_filter.py index aec28e01..578366ff 100644 --- a/src/search_filter/date_filter.py +++ b/src/search_filter/date_filter.py @@ -11,11 +11,10 @@ import dateparser as dtparse # Date Range Filter Regexes # Example filter queries: -# - dt>=yesterday dt<"tomorrow" +# - dt>="yesterday" dt<"tomorrow" # - dt>="last week" -# - dt:"next year" -date_regex = r'(?:dt([:><=]{1,2})\"?([\w\-\/]+))?\"?' -date_range_regex=f'.*?\s+{date_regex}\s*{date_regex}.*?' +# - dt:"2 years ago" +date_regex = r"dt([:><=]{1,2})\"(.*?)\"" def date_filter(query, entries, embeddings): @@ -54,20 +53,17 @@ def date_filter(query, entries, embeddings): def extract_date_range(query): # find date range filter in query - date_range_match = re.search(date_range_regex, query) - if not date_range_match or date_range_match.groups() == (None, None, None, None): - return None + date_range_matches = re.findall(date_regex, query) - # extract comparators (e.g >,<,=) applied on dates in date filter - date_comparators = [date_cmp for date_cmp in date_range_match.groups()[0::2] if date_cmp] + if len(date_range_matches) == 0: + return None # extract, parse natural dates ranges from date range filter passed in query # e.g today maps to (start_of_day, start_of_tomorrow) - query_dtranges = [] - for date_str in date_range_match.groups()[1::2]: - if date_str and parse(date_str): + for index, (cmp, date_str) in enumerate(date_range_matches): + if parse(date_str): dt_start, dt_end = parse(date_str) - query_dtranges.append((dt_start.timestamp(), dt_end.timestamp())) + date_range_matches[index] = [cmp, (dt_start.timestamp(), dt_end.timestamp())] # Combine dates with their comparators to form date range intervals # For e.g @@ -76,7 +72,7 @@ def extract_date_range(query): # --- effective_date_range = [0, inf] date_range_considering_comparator = [] - for ((dtrange_start, dtrange_end), cmp) in zip(query_dtranges, date_comparators): + for cmp, (dtrange_start, dtrange_end) in date_range_matches: if cmp == '>': date_range_considering_comparator += [[dtrange_end, inf]] elif cmp == '>=': diff --git a/tests/test_date_filter.py b/tests/test_date_filter.py index 7426e14e..2224b865 100644 --- a/tests/test_date_filter.py +++ b/tests/test_date_filter.py @@ -94,17 +94,20 @@ def test_parse(): def test_date_filter_regex(): - dtrange_match = re.search(date_filter.date_range_regex, 'head dt>"today" dt:"1984-01-01" tail') - assert dtrange_match.groups() == ('>', 'today', ':', '1984-01-01') + dtrange_match = re.findall(date_filter.date_regex, 'multi word head dt>"today" dt:"1984-01-01"') + assert dtrange_match == [('>', 'today'), (':', '1984-01-01')] - dtrange_match = re.search(date_filter.date_range_regex, 'head dt>="today" dt="1984-01-01"') - assert dtrange_match.groups() == ('>=', 'today', '=', '1984-01-01') + dtrange_match = re.findall(date_filter.date_regex, 'head dt>"today" dt:"1984-01-01" multi word tail') + assert dtrange_match == [('>', 'today'), (':', '1984-01-01')] - dtrange_match = re.search(date_filter.date_range_regex, 'head dt<"today" tail') - assert dtrange_match.groups() == ('<', 'today', None, None) + dtrange_match = re.findall(date_filter.date_regex, 'multi word head dt>="today" dt="1984-01-01"') + assert dtrange_match == [('>=', 'today'), ('=', '1984-01-01')] - dtrange_match = re.search(date_filter.date_range_regex, 'head dt<="today"') - assert dtrange_match.groups() == ('<=', 'today', None, None) + dtrange_match = re.findall(date_filter.date_regex, 'dt<"multi word date" multi word tail') + assert dtrange_match == [('<', 'multi word date')] - dtrange_match = re.search(date_filter.date_range_regex, 'head tail') - assert dtrange_match.groups() == (None, None, None, None) + dtrange_match = re.findall(date_filter.date_regex, 'head dt<="multi word date"') + assert dtrange_match == [('<=', 'multi word date')] + + dtrange_match = re.findall(date_filter.date_regex, 'head tail') + assert dtrange_match == [] \ No newline at end of file