From e220ecc00bd34ea0b8a29b76519528b148b17782 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 21 Jul 2022 02:43:28 +0400 Subject: [PATCH] Generate compiled form of each transaction directly in the beancount processor - The logic for compiling a beancount entry (for later encoding) now completely resides in the org-to-jsonl processor layer - This allows symmetric search to be generic and not be aware of beancount specific properties that were extracted by the beancount-to-jsonl processor layer - Now symmetric search just expects the jsonl to (at least) have the 'compiled' and 'raw' keys for each entry. What original text the entry was compiled from is irrelevant to it. The original text could be location, transaction, chat etc, it doesn't have to care --- src/processor/ledger/beancount_to_jsonl.py | 2 +- src/search_type/symmetric_ledger.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/processor/ledger/beancount_to_jsonl.py b/src/processor/ledger/beancount_to_jsonl.py index ed6f9b4f..aeb93653 100644 --- a/src/processor/ledger/beancount_to_jsonl.py +++ b/src/processor/ledger/beancount_to_jsonl.py @@ -132,7 +132,7 @@ def convert_beancount_entries_to_jsonl(entries, verbose=0): "Convert each Beancount transaction to JSON and collate as JSONL" jsonl = '' for entry in entries: - entry_dict = {'Title': entry} + entry_dict = {'compiled': entry, 'raw': entry} # Convert Dictionary to JSON and Append to JSONL string jsonl += f'{json.dumps(entry_dict, ensure_ascii=False)}\n' diff --git a/src/search_type/symmetric_ledger.py b/src/search_type/symmetric_ledger.py index 3a8922a3..1e0e4033 100644 --- a/src/search_type/symmetric_ledger.py +++ b/src/search_type/symmetric_ledger.py @@ -38,7 +38,7 @@ def initialize_model(search_config: SymmetricSearchConfig): def extract_entries(notesfile, verbose=0): "Load entries from compressed jsonl" - return [{'raw': f'{entry["Title"]}', 'compiled': f'{entry["Title"]}'} + return [{'compiled': f'{entry["compiled"]}', 'raw': f'{entry["raw"]}'} for entry in load_jsonl(notesfile, verbose=verbose)]