mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 15:38:55 +01:00
Use hashed value to improve deduplication of search results on server
This commit is contained in:
parent
6814dadd21
commit
2930b57c78
1 changed files with 3 additions and 1 deletions
|
@ -132,11 +132,13 @@ async def query(
|
|||
|
||||
def collate_results(hits, dedupe=True):
|
||||
hit_ids = set()
|
||||
hit_hashes = set()
|
||||
for hit in hits:
|
||||
if dedupe and hit.corpus_id in hit_ids:
|
||||
if dedupe and (hit.hashed_value in hit_hashes or hit.corpus_id in hit_ids):
|
||||
continue
|
||||
|
||||
else:
|
||||
hit_hashes.add(hit.hashed_value)
|
||||
hit_ids.add(hit.corpus_id)
|
||||
yield SearchResponse.model_validate(
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue