From 00620356e63c198b9742d9f63b4cb93b8082064b Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 7 Jul 2024 13:25:25 +0530 Subject: [PATCH] View each chunk of a non-hierarchical files as a separate corpus If raw_is_compiled, it means there is no inherent hierarchical structure of the document being chunked. The corpus_id shouldn't be shared for these chunks. Otherwise all chunks of a plain text file will be shown as one during dedupe (default) search --- src/khoj/processor/content/text_to_entries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/khoj/processor/content/text_to_entries.py b/src/khoj/processor/content/text_to_entries.py index 75619443..69e90d7d 100644 --- a/src/khoj/processor/content/text_to_entries.py +++ b/src/khoj/processor/content/text_to_entries.py @@ -108,7 +108,7 @@ class TextToEntries(ABC): raw=entry.raw, heading=entry.heading, file=entry.file, - corpus_id=corpus_id, + corpus_id=uuid.uuid4() if raw_is_compiled else corpus_id, ) )