mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 15:38:55 +01:00
Use a more accurate model for symmetric semantic search
- The all-MiniLM-L6-v2 is more accurate - The exact previous model isn't benchmarked but based on the performance of the closest model to it. Seems like the new model maybe similar in speed and size - On very preliminary evaluation of the model, the new model seems faster, with pretty decent results
This commit is contained in:
parent
4a90972e38
commit
989526ae54
4 changed files with 4 additions and 4 deletions
|
@ -28,7 +28,7 @@ content-type:
|
|||
|
||||
search-type:
|
||||
symmetric:
|
||||
encoder: "sentence-transformers/paraphrase-MiniLM-L6-v2"
|
||||
encoder: "sentence-transformers/all-MiniLM-L6-v2"
|
||||
cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
||||
model_directory: "/data/models/symmetric"
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ if __name__ == '__main__':
|
|||
parser.add_argument('--dataset', type=str, default="./.dataset", help="Path to dataset to generate index from")
|
||||
parser.add_argument('--column', type=str, default="DATA", help="Name of dataset column to index")
|
||||
parser.add_argument('--num_results', type=int, default=10, help="Number of most suitable matches to show")
|
||||
parser.add_argument('--model_name', type=str, default='paraphrase-distilroberta-base-v1', help="Specify name of the SentenceTransformer model to use for encoding")
|
||||
parser.add_argument('--model_name', type=str, default='all-MiniLM-L6-v2', help="Specify name of the SentenceTransformer model to use for encoding")
|
||||
args = parser.parse_args()
|
||||
|
||||
model = SentenceTransformer(args.model_name)
|
||||
|
|
|
@ -79,7 +79,7 @@ default_config = {
|
|||
{
|
||||
'symmetric':
|
||||
{
|
||||
'encoder': "sentence-transformers/paraphrase-MiniLM-L6-v2",
|
||||
'encoder': "sentence-transformers/all-MiniLM-L6-v2",
|
||||
'cross-encoder': "cross-encoder/ms-marco-MiniLM-L-6-v2",
|
||||
'model_directory': None
|
||||
},
|
||||
|
|
|
@ -14,7 +14,7 @@ def search_config(tmp_path_factory):
|
|||
search_config = SearchConfig()
|
||||
|
||||
search_config.asymmetric = SymmetricSearchConfig(
|
||||
encoder = "sentence-transformers/paraphrase-MiniLM-L6-v2",
|
||||
encoder = "sentence-transformers/all-MiniLM-L6-v2",
|
||||
cross_encoder = "cross-encoder/ms-marco-MiniLM-L-6-v2",
|
||||
model_directory = model_dir
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue