mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-30 19:03:01 +01:00
Split entries by max tokens while converting Markdown entries To JSONL
This commit is contained in:
parent
24676f95d8
commit
f209e30a3b
1 changed files with 6 additions and 0 deletions
|
@ -35,6 +35,12 @@ class MarkdownToJsonl(TextToJsonl):
|
||||||
end = time.time()
|
end = time.time()
|
||||||
logger.debug(f"Parse entries from Markdown files into dictionaries: {end - start} seconds")
|
logger.debug(f"Parse entries from Markdown files into dictionaries: {end - start} seconds")
|
||||||
|
|
||||||
|
# Split entries by max tokens supported by model
|
||||||
|
start = time.time()
|
||||||
|
current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=256)
|
||||||
|
end = time.time()
|
||||||
|
logger.debug(f"Split entries by max token size supported by model: {end - start} seconds")
|
||||||
|
|
||||||
# Identify, mark and merge any new entries with previous entries
|
# Identify, mark and merge any new entries with previous entries
|
||||||
start = time.time()
|
start = time.time()
|
||||||
if not previous_entries:
|
if not previous_entries:
|
||||||
|
|
Loading…
Reference in a new issue