khoj/config/khoj_sample.yml
Debanjum Singh Solanky d4072974d7 Use of XMP metadata in Khoj Image Search is broken. Disable by default
- CLIP Image score and XMP metadata score are not combining well.
  When combined they give non sensical results. Enable only once
  figure how best to combine the two.

- Show scores with higher precision for image search
  - Image search scores seem to be mostly be between 0.2 - 0.3 for some reason
  - Higher precision scores make it easier to understand the quality
    of returned results perceived by the model itself
2022-08-19 19:17:28 +03:00

51 lines
2.1 KiB
YAML

content-type:
org:
input-files: # ["/path/to/org-file.org"] REQUIRED IF input-filter IS NOT SET OR
input-filter: # /path/to/org/*.org REQUIRED IF input-files IS NOT SET
compressed-jsonl: "~/.khoj/content/org/org.jsonl.gz"
embeddings-file: "~/.khoj/content/org/org_embeddings.pt"
markdown:
input-files: # ["/path/to/markdown-file.md"] REQUIRED IF input-filter IS NOT SET OR
input-filter: # "/path/to/markdown/*.md" REQUIRED IF input-files IS NOT SET
compressed-jsonl: "~/.khoj/content/markdown/markdown.jsonl.gz"
embeddings-file: "~/.khoj/content/markdown/markdown_embeddings.pt"
ledger:
input-files: # ["/path/to/ledger-file.beancount"] REQUIRED IF input-filter is not set OR
input-filter: # /path/to/ledger/*.beancount REQUIRED IF input-files is not set
compressed-jsonl: "~/.khoj/content/ledger/ledger.jsonl.gz"
embeddings-file: "~/.khoj/content/ledger/ledger_embeddings.pt"
image:
input-directories: # ["/path/to/images/"] REQUIRED IF input-filter IS NOT SET OR
input-filter: # /path/to/images/*.jpg REQUIRED IF input-directories IS NOT SET
embeddings-file: "~/.khoj/content/image/image_embeddings.pt"
batch-size: 50
use-xmp-metadata: false
music:
input-files: # ["/path/to/music-file.org"] REQUIRED IF input-filter IS NOT SET OR
input-filter: # /path/to/music/*.org REQUIRED IF input-files IS NOT SET
compressed-jsonl: "~/.khoj/content/music/music.jsonl.gz"
embeddings-file: "~/.khoj/content/music/music_embeddings.pt"
search-type:
symmetric:
encoder: "sentence-transformers/all-MiniLM-L6-v2"
cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
model_directory: "~/.khoj/search/symmetric/"
asymmetric:
encoder: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
model_directory: "~/.khoj/search/asymmetric/"
image:
encoder: "sentence-transformers/clip-ViT-B-32"
model_directory: "~/.khoj/search/image/"
processor:
conversation:
openai-api-key: # "YOUR_OPENAI_API_KEY"
conversation-logfile: "~/.khoj/processor/conversation/conversation_logs.json"