Fix striping empty escape sequences from strings

- Fix log message on jsonl write
This commit is contained in:
Debanjum Singh Solanky 2022-09-10 23:55:09 +03:00
parent a7cf6c8458
commit 2e1bbe0cac
4 changed files with 5 additions and 4 deletions

View file

@ -88,7 +88,7 @@ def extract_beancount_transactions(beancount_files):
# Initialize Regex for extracting Beancount Entries
transaction_regex = r'^\n?\d{4}-\d{2}-\d{2} [\*|\!] '
empty_newline = f'^[{empty_escape_sequences}]*$'
empty_newline = f'^[\n\r\t\ ]*$'
entries = []
transaction_to_file_map = []

View file

@ -97,7 +97,8 @@ def extract_markdown_entries(markdown_files):
markdown_content = f.read()
markdown_entries_per_file = [f'#{entry.strip(empty_escape_sequences)}'
for entry
in re.split(markdown_heading_regex, markdown_content, flags=re.MULTILINE)]
in re.split(markdown_heading_regex, markdown_content, flags=re.MULTILINE)
if entry.strip(empty_escape_sequences) != '']
entry_to_file_map += zip(markdown_entries_per_file, [markdown_file]*len(markdown_entries_per_file))
entries.extend(markdown_entries_per_file)

View file

@ -2,7 +2,7 @@ from pathlib import Path
app_root_directory = Path(__file__).parent.parent.parent
web_directory = app_root_directory / 'src/interface/web/'
empty_escape_sequences = r'\n|\r\t '
empty_escape_sequences = '\n|\r|\t| '
# default app config to use
default_config = {

View file

@ -54,4 +54,4 @@ def compress_jsonl_data(jsonl_data, output_path):
with gzip.open(output_path, 'wt') as gzip_file:
gzip_file.write(jsonl_data)
logger.info(f'Wrote {len(jsonl_data)} lines to gzip compressed jsonl at {output_path}')
logger.info(f'Wrote jsonl data to gzip compressed jsonl at {output_path}')