Log performance metrics for beancount, markdown to jsonl conversion

2025-02-20 06:55:08 +00:00 · 2022-09-10 22:47:54 +03:00 · 2022-09-10 22:47:54 +03:00 · 4eb84c7f51
commit 4eb84c7f51
parent ebd5039bd1
2 changed files with 21 additions and 5 deletions
--- a/src/processor/ledger/beancount_to_jsonl.py
+++ b/src/processor/ledger/beancount_to_jsonl.py
@ -7,6 +7,7 @@ import pathlib
 import glob
 import re
 import logging
 import time
 # Internal Packages
 from src.utils.helpers import get_absolute_path, is_none_or_empty, mark_entries_for_update
@ -28,18 +29,23 @@ def beancount_to_jsonl(beancount_files, beancount_file_filter, output_file, prev
    beancount_files = get_beancount_files(beancount_files, beancount_file_filter)
    # Extract Entries from specified Beancount files
    start = time.time()
    extracted_transactions, transaction_to_file_map = extract_beancount_entries(beancount_files)
    # Convert Extracted Transactions to Dictionaries
    current_entries = convert_transactions_to_maps(extracted_transactions, transaction_to_file_map)
    end = time.time()
    logger.debug(f"Parse transactions from Beancount files into dictionaries: {end - start} seconds")
    # Identify, mark and merge any new entries with previous entries
    start = time.time()
    if not previous_entries:
        entries_with_ids = list(enumerate(current_entries))
    else:
        entries_with_ids = mark_entries_for_update(current_entries, previous_entries, key='compiled', logger=logger)
    end = time.time()
    logger.debug(f"Identify new or updated transaction: {end - start} seconds")
    # Process Each Entry from All Notes Files
    start = time.time()
    entries = list(map(lambda entry: entry[1], entries_with_ids))
    jsonl_data = convert_transaction_maps_to_jsonl(entries)
@ -48,6 +54,8 @@ def beancount_to_jsonl(beancount_files, beancount_file_filter, output_file, prev
        compress_jsonl_data(jsonl_data, output_file)
    elif output_file.suffix == ".jsonl":
        dump_jsonl(jsonl_data, output_file)
    end = time.time()
    logger.debug(f"Write transactions to JSONL file: {end - start} seconds")
    return entries_with_ids
--- a/src/processor/markdown/markdown_to_jsonl.py
+++ b/src/processor/markdown/markdown_to_jsonl.py
@ -7,6 +7,7 @@ import pathlib
 import glob
 import re
 import logging
 import time
 # Internal Packages
 from src.utils.helpers import get_absolute_path, is_none_or_empty, mark_entries_for_update
@ -28,26 +29,33 @@ def markdown_to_jsonl(markdown_files, markdown_file_filter, output_file, previou
    markdown_files = get_markdown_files(markdown_files, markdown_file_filter)
    # Extract Entries from specified Markdown files
    start = time.time()
    extracted_entries, entry_to_file_map = extract_markdown_entries(markdown_files)
    # Convert Extracted Transactions to Dictionaries
    current_entries = convert_markdown_entries_to_maps(extracted_entries, entry_to_file_map)
    end = time.time()
    logger.debug(f"Parse entries from Markdown files into dictionaries: {end - start} seconds")
    # Identify, mark and merge any new entries with previous entries
    start = time.time()
    if not previous_entries:
        entries_with_ids = list(enumerate(current_entries))
    else:
        entries_with_ids = mark_entries_for_update(current_entries, previous_entries, key='compiled', logger=logger)
    end = time.time()
    logger.debug(f"Identify new or updated entries: {end - start} seconds")
    # Process Each Entry from All Notes Files
    start = time.time()
    entries = list(map(lambda entry: entry[1], entries_with_ids))
-    jsonl_data = convert_markdown_maps_to_jsonl(entries, entry_to_file_map)
+    jsonl_data = convert_markdown_maps_to_jsonl(entries)
    # Compress JSONL formatted Data
    if output_file.suffix == ".gz":
        compress_jsonl_data(jsonl_data, output_file)
    elif output_file.suffix == ".jsonl":
        dump_jsonl(jsonl_data, output_file)
    end = time.time()
    logger.debug(f"Write markdown entries to JSONL file: {end - start} seconds")
    return entries_with_ids