mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Generate compiled form of each transaction directly in the beancount processor
- The logic for compiling a beancount entry (for later encoding) now completely resides in the org-to-jsonl processor layer - This allows symmetric search to be generic and not be aware of beancount specific properties that were extracted by the beancount-to-jsonl processor layer - Now symmetric search just expects the jsonl to (at least) have the 'compiled' and 'raw' keys for each entry. What original text the entry was compiled from is irrelevant to it. The original text could be location, transaction, chat etc, it doesn't have to care
This commit is contained in:
parent
06cf425314
commit
e220ecc00b
2 changed files with 2 additions and 2 deletions
|
@ -132,7 +132,7 @@ def convert_beancount_entries_to_jsonl(entries, verbose=0):
|
|||
"Convert each Beancount transaction to JSON and collate as JSONL"
|
||||
jsonl = ''
|
||||
for entry in entries:
|
||||
entry_dict = {'Title': entry}
|
||||
entry_dict = {'compiled': entry, 'raw': entry}
|
||||
# Convert Dictionary to JSON and Append to JSONL string
|
||||
jsonl += f'{json.dumps(entry_dict, ensure_ascii=False)}\n'
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ def initialize_model(search_config: SymmetricSearchConfig):
|
|||
|
||||
def extract_entries(notesfile, verbose=0):
|
||||
"Load entries from compressed jsonl"
|
||||
return [{'raw': f'{entry["Title"]}', 'compiled': f'{entry["Title"]}'}
|
||||
return [{'compiled': f'{entry["compiled"]}', 'raw': f'{entry["raw"]}'}
|
||||
for entry
|
||||
in load_jsonl(notesfile, verbose=verbose)]
|
||||
|
||||
|
|
Loading…
Reference in a new issue