diff --git a/Dockerfile b/Dockerfile index b3e71fa6..84504e02 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,17 +5,25 @@ FROM ubuntu:18.04 RUN apt-get update -y && \ apt-get -y install libimage-exiftool-perl -FROM continuumio/miniconda3 +FROM continuumio/miniconda3:4.10.3p0-alpine -COPY . /src -WORKDIR /src +COPY . . -COPY environment.yml . -COPY config.yml . +# Get the arguments from the docker-compose environment +ARG PORT +EXPOSE ${PORT} +# This allows us to use the arguments during runtime RUN conda env create -f environment.yml -EXPOSE 5000 -COPY . . -# CMD python3 -m main -c=config.yml -vv -CMD ["conda", "run", "--name", "semantic-search", "python3", "-m", "main", "-c=config.yml", "-vv"] +# Use the conda environment we created to run the application. +# The docker execution process run conda activate semantic-search, since the lifetime of the environment would only be for the single command. +# Instead, we'll use the conda run to run the application. +# Use 0.0.0.0 to explicitly set the host ip for the service on the container. https://pythonspeed.com/articles/docker-connection-refused/ +# Use sh -c to start a shell in order to use environment variables in CMD. +ENTRYPOINT ["conda", "run", "--no-capture-output", "--name", "semantic-search", \ + "python3", "-m", "src.main"] + + # "python3", "-m", "src.main", "-c=${CONFIG_FILE}", "-vv" ,"--host=${HOST}, "--port=${PORT}"] + +# CMD ["sh", "-c", "echo ${CONFIG_FILE}", "echo ${HOST}", "echo ${PORT}"] diff --git a/docker-compose.yml b/docker-compose.yml index e69de29b..40019fa5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -0,0 +1,17 @@ +version: "3.9" +services: + web: + build: + context: . + args: + - PORT=8000 + ports: + - "8000:8000" + volumes: + - .:/code + - /home/saba/notes/:/data/notes/ + - /home/saba/embeddings/:/data/generated/ + - /home/saba/images/:/data/images/ + - /home/saba/ledger/:/data/ledger/ + - /home/saba/music/:/data/music/ + command: --host="0.0.0.0" --port=8000 -c=docker_sample_config.yml diff --git a/docker_sample_config.yml b/docker_sample_config.yml new file mode 100644 index 00000000..8abe0209 --- /dev/null +++ b/docker_sample_config.yml @@ -0,0 +1,44 @@ +content-type: + org: + input-files: null + input-filter: "/data/notes/*.org" + compressed-jsonl: "/data/generated/.notes.json.gz" + embeddings-file: "/data/generated/.note_embeddings.pt" + + ledger: + # input-files: null + # input-filter: /data/ledger/*.beancount + # compressed-jsonl: /data/generated/.transactions.jsonl.gz + # embeddings-file: /data/generated/.transaction_embeddings.pt + + image: + # input-directory: "/data/images/" + # embeddings-file: "/data/generated/.image_embeddings.pt" + # batch-size: 50 + # use-xmp-metadata: "no" + + music: + # input-files: null + # input-filter: "/data/music/*.org" + # compressed-jsonl: "/data/generated/.songs.jsonl.gz" + # embeddings-file: "/data/generated/.song_embeddings.pt" + +search-type: + symmetric: + encoder: "sentence-transformers/paraphrase-MiniLM-L6-v2" + cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2" + model_directory: "/data/models/.symmetric" + + asymmetric: + encoder: "sentence-transformers/msmarco-MiniLM-L-6-v3" + cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2" + model_directory: "/data/models/.asymmetric" + + image: + encoder: "clip-ViT-B-32" + model_directory: "/data/models/.image_encoder" + +processor: + conversation: + openai-api-key: null + conversation-logfile: "/data/conversation/.conversation_logs.json" \ No newline at end of file