khoj/config/khoj_docker.yml
Debanjum Singh Solanky 11a18cc452 Update khoj docker config to index sub directories for text content
- Khoj supports indexing subdirectories but the khoj docker config
  wasn't updated to support the same
- This should also allow khoj docker users to index multiple separate
  directory trees by mounting them into separate sub folders within
  /data/<content-type>/.
  For e.g /data/org/dir1, /data/org/dir2 etc in khoj_docker.yml
2023-02-06 21:04:50 -03:00

55 lines
No EOL
1.8 KiB
YAML

content-type:
# The /data/folder/ prefix to the folders is here because this is
# the directory to which the local files are copied in the docker-compose.
# If changing, the docker-compose volumes should also be changed to match.
org:
input-files: null
input-filter: ["/data/org/**/*.org"]
compressed-jsonl: "/data/embeddings/notes.jsonl.gz"
embeddings-file: "/data/embeddings/note_embeddings.pt"
index_heading_entries: false
markdown:
input-files: null
input-filter: ["/data/markdown/**/*.md"]
compressed-jsonl: "/data/embeddings/markdown.jsonl.gz"
embeddings-file: "/data/embeddings/markdown_embeddings.pt"
ledger:
input-files: null
input-filter: ["/data/ledger/**/*.beancount"]
compressed-jsonl: /data/embeddings/transactions.jsonl.gz
embeddings-file: /data/embeddings/transaction_embeddings.pt
image:
input-directories: ["/data/images/"]
embeddings-file: "/data/embeddings/image_embeddings.pt"
batch-size: 50
use-xmp-metadata: false
music:
input-files: ["/data/music/music.org"]
input-filter: null
compressed-jsonl: "/data/embeddings/songs.jsonl.gz"
embeddings-file: "/data/embeddings/song_embeddings.pt"
search-type:
symmetric:
encoder: "sentence-transformers/all-MiniLM-L6-v2"
cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
model_directory: "/data/models/symmetric"
asymmetric:
encoder: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
model_directory: "/data/models/asymmetric"
image:
encoder: "sentence-transformers/clip-ViT-B-32"
model_directory: "/data/models/image_encoder"
processor:
#conversation:
# openai-api-key: null
# model: "text-davinci-003"
# conversation-logfile: "/data/embeddings/conversation_logs.json"