Log performance metrics for beancount, markdown to jsonl conversion

This commit is contained in:
Debanjum Singh Solanky 2022-09-10 22:47:54 +03:00
parent ebd5039bd1
commit 4eb84c7f51
2 changed files with 21 additions and 5 deletions

View file

@ -7,6 +7,7 @@ import pathlib
import glob import glob
import re import re
import logging import logging
import time
# Internal Packages # Internal Packages
from src.utils.helpers import get_absolute_path, is_none_or_empty, mark_entries_for_update from src.utils.helpers import get_absolute_path, is_none_or_empty, mark_entries_for_update
@ -28,18 +29,23 @@ def beancount_to_jsonl(beancount_files, beancount_file_filter, output_file, prev
beancount_files = get_beancount_files(beancount_files, beancount_file_filter) beancount_files = get_beancount_files(beancount_files, beancount_file_filter)
# Extract Entries from specified Beancount files # Extract Entries from specified Beancount files
start = time.time()
extracted_transactions, transaction_to_file_map = extract_beancount_entries(beancount_files) extracted_transactions, transaction_to_file_map = extract_beancount_entries(beancount_files)
# Convert Extracted Transactions to Dictionaries
current_entries = convert_transactions_to_maps(extracted_transactions, transaction_to_file_map) current_entries = convert_transactions_to_maps(extracted_transactions, transaction_to_file_map)
end = time.time()
logger.debug(f"Parse transactions from Beancount files into dictionaries: {end - start} seconds")
# Identify, mark and merge any new entries with previous entries # Identify, mark and merge any new entries with previous entries
start = time.time()
if not previous_entries: if not previous_entries:
entries_with_ids = list(enumerate(current_entries)) entries_with_ids = list(enumerate(current_entries))
else: else:
entries_with_ids = mark_entries_for_update(current_entries, previous_entries, key='compiled', logger=logger) entries_with_ids = mark_entries_for_update(current_entries, previous_entries, key='compiled', logger=logger)
end = time.time()
logger.debug(f"Identify new or updated transaction: {end - start} seconds")
# Process Each Entry from All Notes Files # Process Each Entry from All Notes Files
start = time.time()
entries = list(map(lambda entry: entry[1], entries_with_ids)) entries = list(map(lambda entry: entry[1], entries_with_ids))
jsonl_data = convert_transaction_maps_to_jsonl(entries) jsonl_data = convert_transaction_maps_to_jsonl(entries)
@ -48,6 +54,8 @@ def beancount_to_jsonl(beancount_files, beancount_file_filter, output_file, prev
compress_jsonl_data(jsonl_data, output_file) compress_jsonl_data(jsonl_data, output_file)
elif output_file.suffix == ".jsonl": elif output_file.suffix == ".jsonl":
dump_jsonl(jsonl_data, output_file) dump_jsonl(jsonl_data, output_file)
end = time.time()
logger.debug(f"Write transactions to JSONL file: {end - start} seconds")
return entries_with_ids return entries_with_ids

View file

@ -7,6 +7,7 @@ import pathlib
import glob import glob
import re import re
import logging import logging
import time
# Internal Packages # Internal Packages
from src.utils.helpers import get_absolute_path, is_none_or_empty, mark_entries_for_update from src.utils.helpers import get_absolute_path, is_none_or_empty, mark_entries_for_update
@ -28,26 +29,33 @@ def markdown_to_jsonl(markdown_files, markdown_file_filter, output_file, previou
markdown_files = get_markdown_files(markdown_files, markdown_file_filter) markdown_files = get_markdown_files(markdown_files, markdown_file_filter)
# Extract Entries from specified Markdown files # Extract Entries from specified Markdown files
start = time.time()
extracted_entries, entry_to_file_map = extract_markdown_entries(markdown_files) extracted_entries, entry_to_file_map = extract_markdown_entries(markdown_files)
# Convert Extracted Transactions to Dictionaries
current_entries = convert_markdown_entries_to_maps(extracted_entries, entry_to_file_map) current_entries = convert_markdown_entries_to_maps(extracted_entries, entry_to_file_map)
end = time.time()
logger.debug(f"Parse entries from Markdown files into dictionaries: {end - start} seconds")
# Identify, mark and merge any new entries with previous entries # Identify, mark and merge any new entries with previous entries
start = time.time()
if not previous_entries: if not previous_entries:
entries_with_ids = list(enumerate(current_entries)) entries_with_ids = list(enumerate(current_entries))
else: else:
entries_with_ids = mark_entries_for_update(current_entries, previous_entries, key='compiled', logger=logger) entries_with_ids = mark_entries_for_update(current_entries, previous_entries, key='compiled', logger=logger)
end = time.time()
logger.debug(f"Identify new or updated entries: {end - start} seconds")
# Process Each Entry from All Notes Files # Process Each Entry from All Notes Files
start = time.time()
entries = list(map(lambda entry: entry[1], entries_with_ids)) entries = list(map(lambda entry: entry[1], entries_with_ids))
jsonl_data = convert_markdown_maps_to_jsonl(entries, entry_to_file_map) jsonl_data = convert_markdown_maps_to_jsonl(entries)
# Compress JSONL formatted Data # Compress JSONL formatted Data
if output_file.suffix == ".gz": if output_file.suffix == ".gz":
compress_jsonl_data(jsonl_data, output_file) compress_jsonl_data(jsonl_data, output_file)
elif output_file.suffix == ".jsonl": elif output_file.suffix == ".jsonl":
dump_jsonl(jsonl_data, output_file) dump_jsonl(jsonl_data, output_file)
end = time.time()
logger.debug(f"Write markdown entries to JSONL file: {end - start} seconds")
return entries_with_ids return entries_with_ids