diff --git a/src/processor/ledger/beancount_to_jsonl.py b/src/processor/ledger/beancount_to_jsonl.py index b2f26b7b..3af45c7a 100644 --- a/src/processor/ledger/beancount_to_jsonl.py +++ b/src/processor/ledger/beancount_to_jsonl.py @@ -88,7 +88,7 @@ def extract_beancount_transactions(beancount_files): # Initialize Regex for extracting Beancount Entries transaction_regex = r'^\n?\d{4}-\d{2}-\d{2} [\*|\!] ' - empty_newline = f'^[{empty_escape_sequences}]*$' + empty_newline = f'^[\n\r\t\ ]*$' entries = [] transaction_to_file_map = [] diff --git a/src/processor/markdown/markdown_to_jsonl.py b/src/processor/markdown/markdown_to_jsonl.py index 7a91533b..e7fc2779 100644 --- a/src/processor/markdown/markdown_to_jsonl.py +++ b/src/processor/markdown/markdown_to_jsonl.py @@ -97,7 +97,8 @@ def extract_markdown_entries(markdown_files): markdown_content = f.read() markdown_entries_per_file = [f'#{entry.strip(empty_escape_sequences)}' for entry - in re.split(markdown_heading_regex, markdown_content, flags=re.MULTILINE)] + in re.split(markdown_heading_regex, markdown_content, flags=re.MULTILINE) + if entry.strip(empty_escape_sequences) != ''] entry_to_file_map += zip(markdown_entries_per_file, [markdown_file]*len(markdown_entries_per_file)) entries.extend(markdown_entries_per_file) diff --git a/src/utils/constants.py b/src/utils/constants.py index 84c3dfbb..8b443944 100644 --- a/src/utils/constants.py +++ b/src/utils/constants.py @@ -2,7 +2,7 @@ from pathlib import Path app_root_directory = Path(__file__).parent.parent.parent web_directory = app_root_directory / 'src/interface/web/' -empty_escape_sequences = r'\n|\r\t ' +empty_escape_sequences = '\n|\r|\t| ' # default app config to use default_config = { diff --git a/src/utils/jsonl.py b/src/utils/jsonl.py index 77b5af11..8a034acd 100644 --- a/src/utils/jsonl.py +++ b/src/utils/jsonl.py @@ -54,4 +54,4 @@ def compress_jsonl_data(jsonl_data, output_path): with gzip.open(output_path, 'wt') as gzip_file: gzip_file.write(jsonl_data) - logger.info(f'Wrote {len(jsonl_data)} lines to gzip compressed jsonl at {output_path}') \ No newline at end of file + logger.info(f'Wrote jsonl data to gzip compressed jsonl at {output_path}') \ No newline at end of file