From a876b652d88a44963ca222802e46a9ca27f99b4a Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 4 Aug 2022 22:40:16 +0300 Subject: [PATCH] Rename khoj_sample.yml to more specific khoj_docker.yml - Update docker-compose.yml to start container using khoj_docker.yml - Use /data/org in input-filter for content-type > org --- config/khoj_docker.yml | 53 ++++++++++++++++++++++++++++++++++++++++++ docker-compose.yml | 2 +- 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 config/khoj_docker.yml diff --git a/config/khoj_docker.yml b/config/khoj_docker.yml new file mode 100644 index 00000000..b1ef7d5f --- /dev/null +++ b/config/khoj_docker.yml @@ -0,0 +1,53 @@ +content-type: + # The /data/folder/ prefix to the folders is here because this is + # the directory to which the local files are copied in the docker-compose. + # If changing, the docker-compose volumes should also be changed to match. + org: + input-files: null + input-filter: "/data/org/*.org" + compressed-jsonl: "/data/embeddings/notes.jsonl.gz" + embeddings-file: "/data/embeddings/note_embeddings.pt" + + markdown: + input-files: null + input-filter: "/data/markdown/*.md" + compressed-jsonl: "/data/embeddings/markdown.jsonl.gz" + embeddings-file: "/data/embeddings/markdown_embeddings.pt" + + ledger: + input-files: null + input-filter: /data/ledger/*.beancount + compressed-jsonl: /data/embeddings/transactions.jsonl.gz + embeddings-file: /data/embeddings/transaction_embeddings.pt + +# image: +# input-directories: ["/data/images/"] +# embeddings-file: "/data/embeddings/image_embeddings.pt" +# batch-size: 50 +# use-xmp-metadata: true + + music: + input-files: ["/data/music/music.org"] + input-filter: null + compressed-jsonl: "/data/embeddings/songs.jsonl.gz" + embeddings-file: "/data/embeddings/song_embeddings.pt" + +search-type: + symmetric: + encoder: "sentence-transformers/all-MiniLM-L6-v2" + cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2" + model_directory: "/data/models/symmetric" + + asymmetric: + encoder: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1" + cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2" + model_directory: "/data/models/asymmetric" + + image: + encoder: "sentence-transformers/clip-ViT-B-32" + model_directory: "/data/models/image_encoder" + +processor: + #conversation: + # openai-api-key: null + # conversation-logfile: "/data/embeddings/conversation_logs.json" \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 2807b744..022463f2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,4 +26,4 @@ services: - ./tests/data/embeddings/:/data/embeddings/ - ./tests/data/models/:/data/models/ # Use 0.0.0.0 to explicitly set the host ip for the service on the container. https://pythonspeed.com/articles/docker-connection-refused/ - command: --host="0.0.0.0" --port=8000 -c=config/khoj_sample.yml -vv + command: --host="0.0.0.0" --port=8000 -c=config/khoj_docker.yml -vv