mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Split entries by max tokens while converting Beancount entries To JSONL
This commit is contained in:
parent
f209e30a3b
commit
17fa123b4e
1 changed files with 6 additions and 0 deletions
|
@ -35,6 +35,12 @@ class BeancountToJsonl(TextToJsonl):
|
|||
end = time.time()
|
||||
logger.debug(f"Parse transactions from Beancount files into dictionaries: {end - start} seconds")
|
||||
|
||||
# Split entries by max tokens supported by model
|
||||
start = time.time()
|
||||
current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=256)
|
||||
end = time.time()
|
||||
logger.debug(f"Split entries by max token size supported by model: {end - start} seconds")
|
||||
|
||||
# Identify, mark and merge any new entries with previous entries
|
||||
start = time.time()
|
||||
if not previous_entries:
|
||||
|
|
Loading…
Reference in a new issue