mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Use hashed value to improve deduplication of search results on server
This commit is contained in:
parent
6814dadd21
commit
2930b57c78
1 changed files with 3 additions and 1 deletions
|
@ -132,11 +132,13 @@ async def query(
|
||||||
|
|
||||||
def collate_results(hits, dedupe=True):
|
def collate_results(hits, dedupe=True):
|
||||||
hit_ids = set()
|
hit_ids = set()
|
||||||
|
hit_hashes = set()
|
||||||
for hit in hits:
|
for hit in hits:
|
||||||
if dedupe and hit.corpus_id in hit_ids:
|
if dedupe and (hit.hashed_value in hit_hashes or hit.corpus_id in hit_ids):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
hit_hashes.add(hit.hashed_value)
|
||||||
hit_ids.add(hit.corpus_id)
|
hit_ids.add(hit.corpus_id)
|
||||||
yield SearchResponse.model_validate(
|
yield SearchResponse.model_validate(
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue