Use Pip to setup Khoj in Docker, Github Workflows. Publish to PyPi Automatically

### Details #### Github Workflows - Make Github Workflows use `pip` instead of `conda` - Publish Khoj as Python Package Automatically - Version based on [PEP440](https://peps.python.org/pep-0440/) and [Semantic Versioning](https://semver.org/) specs - On Push to Master: Publish a pre-release (alpha) to PyPI - On Push to PR: Publish a development-release on TestPyPI - On creating a (release) tag: Publish the latest version (Major.Minor.Patch) to PyPI #### Docker - Make `Dockerfile` use `pip` to build image for Khoj. Faster, more standard flow than `conda` - Only Build Docker Image on Push to Master - Availability of pip package negates the need for docker image for PR testing - Make Docker-Compose use Docker Image published to Github Container Registry - Default Image search to disabled. [Fix loading CLIP model](https://github.com/UKPLab/sentence-transformers/issues/1659) before re-enabling
2024-11-27 17:35:07 +01:00 · 2022-08-04 05:09:26 +03:00 · 2022-08-04 05:09:26 +03:00 · 496806452f
commit 496806452f
parent 71fcb1087f ebaf5524d1
10 changed files with 122 additions and 89 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -1,3 +1,9 @@
 .git/
 .pytest_cache/
 .vscode/
+.venv/
+docs/
+tests/
+build/
+dist/
+*.egg-info/
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -1,16 +1,6 @@
 name: build

 on:
-  pull_request:
-    branches:
-      - master
-    paths:
-      - src/**
-      - config/**
-      - setup.py
-      - Dockerfile
-      - docker-compose.yml
-      - .github/workflows/build.yml
  push:
    branches:
      - master
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@ -0,0 +1,74 @@
+name: publish
+
+on:
+  pull_request:
+    branches:
+      - 'master'
+    paths:
+      - src/**
+      - setup.py
+      - .github/workflows/publish.yml
+  push:
+    branches:
+      - 'master'
+    paths:
+      - src/**
+      - setup.py
+      - .github/workflows/publish.yml
+
+jobs:
+  publish:
+    name: Publish App to PyPI
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+
+      - name: Install Dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install build twine
+
+      - name: Install Application
+        run: |
+          pip install --upgrade .
+
+      - name: Publish Release to PyPI
+        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.PYPI_API_KEY }}
+        run: |
+          rm -rf dist
+          python -m build
+          twine check dist/*
+          twine upload dist/*
+
+      - name: Publish Master to PyPI
+        if: github.ref == 'refs/heads/master'
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.PYPI_API_KEY }}
+        run: |
+          sed -E -i "s/version=(.*)',/version=\1a$(date +%s)',/g" setup.py
+          rm -rf dist
+          python -m build
+          twine check dist/*
+          twine upload dist/*
+
+      - name: Publish PR to Test PyPI
+        if: github.event_name == 'pull_request'
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_KEY }}
+          PULL_REQUEST_NUMBER: ${{ github.event.number }}
+        run: |
+          sed -E -i "s/version=(.*)',/version=\1.dev$PULL_REQUEST_NUMBER$(date +%s)',/g" setup.py
+          rm -rf dist
+          python -m build
+          twine check dist/*
+          twine upload -r testpypi dist/*
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -22,50 +22,25 @@ on:

 jobs:
  test:
-    strategy:
-      matrix:
-        include:
-          - os: ubuntu-latest
-            label: linux-64
-            prefix: /usr/share/miniconda3/envs/test
-
-    name: ${{ matrix.label }}
-    runs-on: ${{ matrix.os }}
+    name: Run Tests
+    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3

-      - name: Install Environment Dependencies
-        shell: bash -l {0}
-        run: sudo apt-get -y install libimage-exiftool-perl
-
-      - name: Setup Mambaforge
-        uses: conda-incubator/setup-miniconda@v2
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v4
        with:
-          miniforge-variant: Mambaforge
-          miniforge-version: latest
-          activate-environment: test
-          use-mamba: true
-          environment-file: config/environment.yml
-          python-version: 3.8
-          auto-activate-base: false
-          use-only-tar-bz2: true
+          python-version: '3.10'

-      - name: Set cache date
-        run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV
+      - name: Install Dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install pytest

-      - uses: actions/cache@v2
-        with:
-          path: ${{ matrix.prefix }}
-          key: ${{ matrix.label }}-conda-${{ hashFiles('config/environment.yml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}
-        env:
-          # Increase this value to reset cache if environment.yml has not changed
-          CACHE_NUMBER: 0
-        id: cache
+      - name: Install Application
+        run: |
+          pip install --upgrade .

-      - name: Update environment
-        run: mamba env update -n test -f config/environment.yml
-        if: steps.cache.outputs.cache-hit != 'true'
-
-      - name: Run Pytest
-        shell: bash -l {0}        
-        run: python -m pytest
+      - name: Test Application
+        run: |
+          pytest 
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
 __pycache__
 .DS_Store
 .emacs.desktop*
+*.py[cod]
 tests/data/models
 tests/data/embeddings
 src/.data
--- a/33
+++ b/33
@ -1,29 +1,22 @@
 # syntax=docker/dockerfile:1
-FROM continuumio/miniconda3:4.12.0
+FROM python:3.10-slim-bullseye
+LABEL org.opencontainers.image.source https://github.com/debanjum/khoj

-# Install system dependencies.
+# Install System Dependencies
 RUN apt-get update -y && \
    apt-get -y install libimage-exiftool-perl

-# Add the local code to the /app directory and set it to be the working directory.
-# Since we mount the /app directory as a volume in docker-compose.yml, this
-# allows us to automatically update the code in the Docker image when it's changed.
-ADD . /app
+# Copy Application to Container
+COPY . /app
 WORKDIR /app

-# Get the arguments from the docker-compose environment.
+# Install Python Dependencies
+RUN pip install --upgrade pip && \
+    pip install --upgrade .
+
+# Run the Application
+# There are more arguments required for the application to run,
+# but these should be passed in through the docker-compose.yml file.
 ARG PORT
 EXPOSE ${PORT}
-
-# Create the conda environment.
-RUN conda env create -f config/environment.yml
-
-# Use the conda environment we created to run the application.
-# To enable the conda env, we cannot simply RUN `conda activate khoj`,
-# since each RUN command in a Dockerfile is a separate bash shell. 
-# The environment would not carry forward.
-# Instead, we'll use `conda run` to run the application.
-# There are more arguments required for the script to run, 
-# but these should be passed in through the docker-compose.yml file.
-ENTRYPOINT ["conda", "run", "--no-capture-output", "--name", "khoj", \
-    "python3", "-m", "src.main"]
+ENTRYPOINT ["khoj"]
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,3 +1,5 @@
 include Readme.md
-graft docs*
-global-exclude .DS_Store *.py[cod]
+graft src/interface/*
+prune src/interface/web/images*
+prune docs*
+global-exclude .DS_Store *.py[cod]
--- a/config/khoj_sample.yml
+++ b/config/khoj_sample.yml
@ -20,11 +20,11 @@ content-type:
    compressed-jsonl: /data/embeddings/transactions.jsonl.gz
    embeddings-file: /data/embeddings/transaction_embeddings.pt

-  image:
-    input-directories: ["/data/images/"]
-    embeddings-file: "/data/embeddings/image_embeddings.pt"
-    batch-size: 50
-    use-xmp-metadata: true
+#  image:
+#    input-directories: ["/data/images/"]
+#    embeddings-file: "/data/embeddings/image_embeddings.pt"
+#    batch-size: 50
+#    use-xmp-metadata: true

  music:
    input-files: ["/data/music/music.org"]
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -1,11 +1,7 @@
 version: "3.9"
 services:
  server:
-    build:
-      context: .
-      dockerfile: Dockerfile
-      args:
-        - PORT=8000
+    image: ghcr.io/debanjum/khoj:latest
    ports:
      # If changing the local port (left hand side), no other changes required.
      # If changing the remote port (right hand side), 
@ -29,9 +25,5 @@ services:
      # You can set these volumes to point to empty directories on host
      - ./tests/data/embeddings/:/data/embeddings/
      - ./tests/data/models/:/data/models/
-    deploy:
-      resources:
-        limits:
-          memory: 8g
    # Use 0.0.0.0 to explicitly set the host ip for the service on the container. https://pythonspeed.com/articles/docker-connection-refused/
    command: --host="0.0.0.0" --port=8000 -c=config/khoj_sample.yml -vv
--- a/setup.py
+++ b/setup.py
@ -7,7 +7,7 @@ this_directory = Path(__file__).parent

 setup(
    name='khoj-assistant',
-    version='0.1.4',
+    version='0.1.5',
    description="A natural language search engine for your personal notes, transactions and images",
    long_description=(this_directory / "Readme.md").read_text(encoding="utf-8"),
    long_description_content_type="text/markdown",
@ -16,7 +16,7 @@ setup(
    url='https://github.com/debanjum/khoj',
    license="GPLv3",
    keywords="search semantic-search productivity NLP org-mode markdown beancount images",
-    python_requires=">=3.5, <4",
+    python_requires=">=3.8, <4",
    packages=find_packages(
        where=".",
        exclude=["tests*"],