Publish Khoj on PyPi

## Details - Add `setup.py` to support pip install of khoj package - Fixup Application to work with pip install - Update Readme with instructions to install package via pip - Publish Khoj as `khoj-assistant` on [PyPi](https://pypi.org/project/khoj-assistant/)
2025-02-17 08:04:21 +00:00 · 2022-08-03 00:29:49 +03:00 · 2022-08-03 00:29:49 +03:00 · 1295ba90c3
commit 1295ba90c3
parent 0ebfbb43ce b9e6273644
7 changed files with 115 additions and 9 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,8 +4,10 @@ __pycache__
 tests/data/models
 tests/data/embeddings
 src/.data
+/src/interface/web/images
 .vscode
 *.gz
 *.pt
-/src/interface/web/*.jpg
-/src/interface/web/*.png
+/build/
+/dist/
+/khoj_assistant.egg-info/
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -0,0 +1,3 @@
+include Readme.md
+graft docs*
+global-exclude .DS_Store *.py[cod]
--- a/Readme.md
+++ b/Readme.md
@ -12,9 +12,9 @@
  - [Analysis](#Analysis)
 - [Architecture](#Architecture)
 - [Setup](#Setup)
-  - [Clone](#Clone)
-  - [Configure](#Configure)
-  - [Run](#Run)
+  - [Clone](#1.-Clone)
+  - [Configure](#2.-Configure)
+  - [Run](#3.-Run)
 - [Use](#Use)
 - [Upgrade](#Upgrade)
 - [Troubleshoot](#Troubleshoot)
@ -117,6 +117,34 @@ docker-compose build --pull

 ### Setup on Local Machine

+#### Using Pip
+1. Install Dependencies
+   1. Python3, Pip \[Required\]
+   2. Virualenv \[Optional\]
+   3. Install Exiftool \[Optional\]
+      ``` shell
+      sudo apt-get -y install libimage-exiftool-perl
+      ```
+
+2. Install Khoj
+   ``` shell
+   virtualenv -m python3 .venv && source .venv/bin/activate # Optional
+   pip install khoj-assistant
+   ```
+
+3. Configure
+   - Configure files/directories to search in `content-type` section of `sample_config.yml`
+   - To run application on test data, update file paths containing `/data/` to `tests/data/` in `sample_config.yml`
+     - Example replace `/data/notes/*.org` with `tests/data/notes/*.org`
+
+4. Run
+   Load ML model, generate embeddings and expose API to query notes, images, transactions etc specified in config YAML
+
+   ``` shell
+   khoj -c=config/sample_config.yml -vv
+   ```
+
+#### Using Conda
 1. Install Dependencies
   1. Install Python3 \[Required\]
   2. [Install Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html) \[Required\]
@ -145,7 +173,12 @@ docker-compose build --pull
   ```

 ### Upgrade On Local Machine
+#### Using Pip
+``` shell
+pip install --upgrade khoj-assistant
+```

+#### Using Conda
 ``` shell
 cd khoj
 git pull origin master
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+from setuptools import find_packages, setup
+
+from pathlib import Path
+this_directory = Path(__file__).parent
+
+setup(
+    name='khoj-assistant',
+    version='0.1.3',
+    description="A natural language search engine for your personal notes, transactions and images",
+    long_description=(this_directory / "Readme.md").read_text(encoding="utf-8"),
+    long_description_content_type="text/markdown",
+    author='Debanjum Singh Solanky, Saba Imran',
+    author_email='debanjum+pypi@gmail.com, narmiabas@gmail.com',
+    url='https://github.com/debanjum/khoj',
+    license="GPLv3",
+    keywords="search semantic-search productivity NLP org-mode markdown beancount images",
+    python_requires=">=3.5, <4",
+    packages=find_packages(
+        where=".",
+        exclude=["tests*"],
+        include=["src*"]
+    ),
+    install_requires=[
+        "numpy == 1.22.4",
+        "torch == 1.11.0",
+        "torchvision == 0.12.0",
+        "transformers == 4.21.0",
+        "sentence-transformers == 2.1.0",
+        "openai == 0.20.0",
+        "huggingface_hub == 0.8.1",
+        "pydantic == 1.9.1",
+        "fastapi == 0.77.1",
+        "uvicorn == 0.17.6",
+        "jinja2 == 3.1.2",
+        "pyyaml == 6.0",
+        "pytest == 7.1.2",
+        "pillow == 8.4.0",
+        "aiofiles == 0.8.0",
+        "dateparser == 1.1.1",
+    ],
+    include_package_data=True,
+    entry_points={"console_scripts": ["khoj = src.main:run"]},
+    classifiers=[
+        "Development Status :: 4 - Beta",
+        "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
+        "Operating System :: OS Independent",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.5",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+    ]
+)
--- a/src/main.py
+++ b/src/main.py
@ -2,6 +2,7 @@
 import sys, json, yaml, os
 import time
 from typing import Optional
+from pathlib import Path

 # External Packages
 import uvicorn
@ -31,14 +32,15 @@ processor_config = ProcessorConfigModel()
 config_file = ""
 verbose = 0
 app = FastAPI()
-web_directory = f'src/interface/web/'
+this_directory = Path(__file__).parent
+web_directory = this_directory / 'interface/web/'

 app.mount("/static", StaticFiles(directory=web_directory), name="static")
 templates = Jinja2Templates(directory=web_directory)

@app.get("/", response_class=FileResponse)
 def index():
-    return FileResponse(web_directory + "index.html")
+    return FileResponse(web_directory / "index.html")

@app.get('/config', response_class=HTMLResponse)
 def ui(request: Request):
@ -116,7 +118,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Opti
        # query images
        query_start = time.time()
        hits = image_search.query(user_query, results_count, model.image_search)
-        output_directory = f'{os.getcwd()}/{web_directory}/images'
+        output_directory = web_directory / 'images'
        query_end = time.time()

        # collate and return results
@ -279,26 +281,31 @@ def shutdown_event():
    print('INFO:\tConversation logs saved to disk.')


-if __name__ == '__main__':
+def run():
    # Load config from CLI
    args = cli(sys.argv[1:])

    # Stores the file path to the config file.
+    global config_file
    config_file = args.config_file

    # Store the verbose flag
+    global verbose
    verbose = args.verbose

    # Store the raw config data.
+    global config
    config = args.config

    # Set device to GPU if available
    device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

    # Initialize the search model from Config
+    global model
    model = initialize_search(args.config, args.regenerate, device=device)

    # Initialize Processor from Config
+    global processor_config
    processor_config = initialize_processor(args.config)

    # Start Application Server
@ -306,3 +313,7 @@ if __name__ == '__main__':
        uvicorn.run(app, proxy_headers=True, uds=args.socket)
    else:
        uvicorn.run(app, host=args.host, port=args.port)
+
+
+if __name__ == '__main__':
+    run()
--- a/src/processor/conversation/init.py
+++ b/src/processor/conversation/init.py
--- a/src/search_filter/init.py
+++ b/src/search_filter/init.py