Working example with docker-compose

Still need quite a bit of clean-up, but this adds a working docker-compose + Dockerfile setup
2024-11-23 23:48:56 +01:00 · 2022-01-23 23:44:38 -05:00 · 2022-01-23 23:44:38 -05:00 · 77fa8718d9
commit 77fa8718d9
parent 875188dc6f
3 changed files with 78 additions and 9 deletions
--- a/26
+++ b/26
@ -5,17 +5,25 @@ FROM ubuntu:18.04
 RUN apt-get update -y && \
    apt-get -y install libimage-exiftool-perl

-FROM continuumio/miniconda3
+FROM continuumio/miniconda3:4.10.3p0-alpine

-COPY . /src
-WORKDIR /src
+COPY . .

-COPY environment.yml .
-COPY config.yml .
+# Get the arguments from the docker-compose environment
+ARG PORT
+EXPOSE ${PORT}

+# This allows us to use the arguments during runtime
 RUN conda env create -f environment.yml

-EXPOSE 5000
-COPY . .
-# CMD python3 -m main -c=config.yml -vv
-CMD ["conda", "run", "--name", "semantic-search", "python3", "-m", "main", "-c=config.yml", "-vv"]
+# Use the conda environment we created to run the application.
+# The docker execution process run conda activate semantic-search, since the lifetime of the environment would only be for the single command.
+# Instead, we'll use the conda run to run the application.
+# Use 0.0.0.0 to explicitly set the host ip for the service on the container. https://pythonspeed.com/articles/docker-connection-refused/
+# Use sh -c to start a shell in order to use environment variables in CMD.
+ENTRYPOINT ["conda", "run", "--no-capture-output", "--name", "semantic-search", \
+    "python3", "-m", "src.main"]
+
+    # "python3", "-m", "src.main", "-c=${CONFIG_FILE}", "-vv" ,"--host=${HOST}, "--port=${PORT}"]
+
+# CMD ["sh", "-c", "echo ${CONFIG_FILE}", "echo ${HOST}", "echo ${PORT}"]
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,17 @@
+version: "3.9"
+services:
+  web:
+    build:
+      context: .
+      args:
+        - PORT=8000
+    ports:
+      - "8000:8000"
+    volumes:
+      - .:/code
+      - /home/saba/notes/:/data/notes/
+      - /home/saba/embeddings/:/data/generated/
+      - /home/saba/images/:/data/images/
+      - /home/saba/ledger/:/data/ledger/
+      - /home/saba/music/:/data/music/
+    command: --host="0.0.0.0" --port=8000 -c=docker_sample_config.yml
--- a/docker_sample_config.yml
+++ b/docker_sample_config.yml
@ -0,0 +1,44 @@
+content-type:
+  org:
+    input-files: null
+    input-filter: "/data/notes/*.org"
+    compressed-jsonl: "/data/generated/.notes.json.gz"
+    embeddings-file: "/data/generated/.note_embeddings.pt"
+
+  ledger:
+    # input-files: null
+    # input-filter: /data/ledger/*.beancount
+    # compressed-jsonl: /data/generated/.transactions.jsonl.gz
+    # embeddings-file: /data/generated/.transaction_embeddings.pt
+
+  image:
+    # input-directory: "/data/images/"
+    # embeddings-file: "/data/generated/.image_embeddings.pt"
+    # batch-size: 50
+    # use-xmp-metadata: "no"
+
+  music:
+    # input-files: null
+    # input-filter: "/data/music/*.org"
+    # compressed-jsonl: "/data/generated/.songs.jsonl.gz"
+    # embeddings-file: "/data/generated/.song_embeddings.pt"
+
+search-type:
+  symmetric:
+    encoder: "sentence-transformers/paraphrase-MiniLM-L6-v2"
+    cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
+    model_directory: "/data/models/.symmetric"
+
+  asymmetric:
+    encoder: "sentence-transformers/msmarco-MiniLM-L-6-v3"
+    cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
+    model_directory: "/data/models/.asymmetric"
+
+  image:
+    encoder: "clip-ViT-B-32"
+    model_directory: "/data/models/.image_encoder"
+
+processor:
+  conversation:
+    openai-api-key: null
+    conversation-logfile: "/data/conversation/.conversation_logs.json"