Generate compiled form of each transaction directly in the beancount processor

- The logic for compiling a beancount entry (for later encoding) now completely resides in the org-to-jsonl processor layer - This allows symmetric search to be generic and not be aware of beancount specific properties that were extracted by the beancount-to-jsonl processor layer - Now symmetric search just expects the jsonl to (at least) have the 'compiled' and 'raw' keys for each entry. What original text the entry was compiled from is irrelevant to it. The original text could be location, transaction, chat etc, it doesn't have to care
2024-11-27 17:35:07 +01:00 · 2022-07-21 02:43:28 +04:00 · 2022-07-21 02:43:28 +04:00 · e220ecc00b
commit e220ecc00b
parent 06cf425314
2 changed files with 2 additions and 2 deletions
--- a/src/processor/ledger/beancount_to_jsonl.py
+++ b/src/processor/ledger/beancount_to_jsonl.py
@ -132,7 +132,7 @@ def convert_beancount_entries_to_jsonl(entries, verbose=0):
    "Convert each Beancount transaction to JSON and collate as JSONL"
    jsonl = ''
    for entry in entries:
-        entry_dict = {'Title': entry}
+        entry_dict = {'compiled': entry, 'raw': entry}
        # Convert Dictionary to JSON and Append to JSONL string
        jsonl += f'{json.dumps(entry_dict, ensure_ascii=False)}\n'

--- a/src/search_type/symmetric_ledger.py
+++ b/src/search_type/symmetric_ledger.py
@ -38,7 +38,7 @@ def initialize_model(search_config: SymmetricSearchConfig):

 def extract_entries(notesfile, verbose=0):
    "Load entries from compressed jsonl"
-    return [{'raw': f'{entry["Title"]}', 'compiled': f'{entry["Title"]}'}
+    return [{'compiled': f'{entry["compiled"]}', 'raw': f'{entry["raw"]}'}
            for entry
            in load_jsonl(notesfile, verbose=verbose)]