diff --git a/.github/workflows/desktop.yml b/.github/workflows/desktop.yml
new file mode 100644
index 00000000..1df8b7e6
--- /dev/null
+++ b/.github/workflows/desktop.yml
@@ -0,0 +1,48 @@
+name: desktop
+
+on:
+ push:
+ tags:
+ - "*"
+ branches:
+ - 'master'
+ paths:
+ - src/interface/desktop/**
+ - .github/workflows/desktop.yml
+
+jobs:
+ build:
+ name: 🖥️ Build, Release Desktop App
+ runs-on: ubuntu-latest
+ env:
+ TODESKTOP_ACCESS_TOKEN: ${{ secrets.TODESKTOP_ACCESS_TOKEN }}
+ TODESKTOP_EMAIL: ${{ secrets.TODESKTOP_EMAIL }}
+ defaults:
+ run:
+ shell: bash
+ working-directory: src/interface/desktop
+ steps:
+ - name: ⬇️ Checkout Code
+ uses: actions/checkout@v3
+ with:
+ fetch-depth: 0
+
+ - name: ⤵️ Install Node
+ uses: actions/setup-node@v3
+ with:
+ node-version: "lts/*"
+
+ - name: ⚙️ Setup Desktop Build
+ run: |
+ yarn
+ npm install -g @todesktop/cli
+ sed -i "s/\"id\": \"\"/\"id\": \"${{ secrets.TODESKTOP_ID }}\"/g" todesktop.json
+
+ - name: ⚙️ Build Desktop App
+ run: |
+ npx todesktop build
+
+ - name: 📦 Release Desktop App
+ if: startsWith(github.ref, 'refs/tags/')
+ run: |
+ npx todesktop release --latest --force
diff --git a/.github/workflows/dockerize_dev.yml b/.github/workflows/dockerize_dev.yml
new file mode 100644
index 00000000..288fdb8a
--- /dev/null
+++ b/.github/workflows/dockerize_dev.yml
@@ -0,0 +1,43 @@
+name: dockerize-dev
+
+on:
+ pull_request:
+ paths:
+ - src/khoj/**
+ - config/**
+ - pyproject.toml
+ - prod.Dockerfile
+ - .github/workflows/dockerize_dev.yml
+ workflow_dispatch:
+
+env:
+ DOCKER_IMAGE_TAG: 'dev'
+
+jobs:
+ build:
+ name: Build Production Docker Image, Push to Container Registry
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout Code
+ uses: actions/checkout@v3
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v2
+
+ - name: Login to GitHub Container Registry
+ uses: docker/login-action@v2
+ with:
+ registry: ghcr.io
+ username: ${{ github.repository_owner }}
+ password: ${{ secrets.PAT }}
+
+ - name: 📦 Build and Push Docker Image
+ uses: docker/build-push-action@v2
+ with:
+ context: .
+ file: prod.Dockerfile
+ platforms: linux/amd64
+ push: true
+ tags: ghcr.io/${{ github.repository }}-cloud:${{ env.DOCKER_IMAGE_TAG }}
+ build-args: |
+ PORT=42110
diff --git a/.github/workflows/dockerize_production.yml b/.github/workflows/dockerize_production.yml
new file mode 100644
index 00000000..2e1eea4b
--- /dev/null
+++ b/.github/workflows/dockerize_production.yml
@@ -0,0 +1,47 @@
+name: dockerize-prod
+
+on:
+ push:
+ tags:
+ - "*"
+ branches:
+ - master
+ paths:
+ - src/khoj/**
+ - config/**
+ - pyproject.toml
+ - prod.Dockerfile
+ - .github/workflows/dockerize_production.yml
+ workflow_dispatch:
+
+env:
+ DOCKER_IMAGE_TAG: ${{ github.ref == 'refs/heads/master' && 'latest' || github.ref_name }}
+
+jobs:
+ build:
+ name: Build Production Docker Image, Push to Container Registry
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout Code
+ uses: actions/checkout@v3
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v2
+
+ - name: Login to GitHub Container Registry
+ uses: docker/login-action@v2
+ with:
+ registry: ghcr.io
+ username: ${{ github.repository_owner }}
+ password: ${{ secrets.PAT }}
+
+ - name: 📦 Build and Push Docker Image
+ uses: docker/build-push-action@v2
+ with:
+ context: .
+ file: prod.Dockerfile
+ platforms: linux/amd64
+ push: true
+ tags: ghcr.io/${{ github.repository }}-cloud:${{ env.DOCKER_IMAGE_TAG }}
+ build-args: |
+ PORT=42110
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
new file mode 100644
index 00000000..a571e8a1
--- /dev/null
+++ b/.github/workflows/pre-commit.yml
@@ -0,0 +1,48 @@
+name: pre-commit
+
+on:
+ pull_request:
+ paths:
+ - src/**
+ - tests/**
+ - config/**
+ - pyproject.toml
+ - .pre-commit-config.yml
+ - .github/workflows/test.yml
+ push:
+ branches:
+ - master
+ paths:
+ - src/khoj/**
+ - tests/**
+ - config/**
+ - pyproject.toml
+ - .pre-commit-config.yml
+ - .github/workflows/test.yml
+
+jobs:
+ test:
+ name: Run Tests
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ steps:
+ - uses: actions/checkout@v3
+ with:
+ fetch-depth: 0
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: 3.11
+
+ - name: ⏬️ Install Dependencies
+ run: |
+ sudo apt update && sudo apt install -y libegl1
+ python -m pip install --upgrade pip
+
+ - name: ⬇️ Install Application
+ run: pip install --upgrade .[dev]
+
+ - name: 🌡️ Validate Application
+ run: pre-commit run --hook-stage manual --all
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index d8aa9be8..697579da 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -2,10 +2,8 @@ name: test
on:
pull_request:
- branches:
- - 'master'
paths:
- - src/khoj/**
+ - src/**
- tests/**
- config/**
- pyproject.toml
@@ -13,7 +11,7 @@ on:
- .github/workflows/test.yml
push:
branches:
- - 'master'
+ - master
paths:
- src/khoj/**
- tests/**
@@ -26,6 +24,7 @@ jobs:
test:
name: Run Tests
runs-on: ubuntu-latest
+ container: ubuntu:jammy
strategy:
fail-fast: false
matrix:
@@ -33,6 +32,17 @@ jobs:
- '3.9'
- '3.10'
- '3.11'
+
+ services:
+ postgres:
+ image: ankane/pgvector
+ env:
+ POSTGRES_PASSWORD: postgres
+ POSTGRES_USER: postgres
+ ports:
+ - 5432:5432
+ options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
+
steps:
- uses: actions/checkout@v3
with:
@@ -43,17 +53,37 @@ jobs:
with:
python-version: ${{ matrix.python_version }}
- - name: ⏬️ Install Dependencies
+ - name: Install Git
run: |
- sudo apt update && sudo apt install -y libegl1
+ apt update && apt install -y git
+
+ - name: ⏬️ Install Dependencies
+ env:
+ DEBIAN_FRONTEND: noninteractive
+ run: |
+ apt update && apt install -y libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6
+
+ - name: ⬇️ Install Postgres
+ env:
+ DEBIAN_FRONTEND: noninteractive
+ run : |
+ apt install -y postgresql postgresql-client && apt install -y postgresql-server-dev-14
+
+ - name: ⬇️ Install pip
+ run: |
+ apt install -y python3-pip
+ python -m ensurepip --upgrade
python -m pip install --upgrade pip
- name: ⬇️ Install Application
- run: pip install --upgrade .[dev]
-
- - name: 🌡️ Validate Application
- run: pre-commit run --hook-stage manual --all
+ run: sed -i 's/dynamic = \["version"\]/version = "0.0.0"/' pyproject.toml && pip install --upgrade .[dev]
- name: 🧪 Test Application
+ env:
+ POSTGRES_HOST: postgres
+ POSTGRES_PORT: 5432
+ POSTGRES_USER: postgres
+ POSTGRES_PASSWORD: postgres
+ POSTGRES_DB: postgres
run: pytest
timeout-minutes: 10
diff --git a/.gitignore b/.gitignore
index 8e99392c..35315263 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,7 +21,8 @@ todesktop.json
khoj_assistant.egg-info
/config/khoj*.yml
.pytest_cache
-khoj.log
+*.log
+static
# Obsidian plugin artifacts
# ---
diff --git a/Dockerfile b/Dockerfile
index bdf9647f..9882a236 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,14 +5,23 @@ LABEL org.opencontainers.image.source https://github.com/khoj-ai/khoj
# Install System Dependencies
RUN apt update -y && apt -y install python3-pip git
+WORKDIR /app
+
# Install Application
-COPY . .
+COPY pyproject.toml .
+COPY README.md .
RUN sed -i 's/dynamic = \["version"\]/version = "0.0.0"/' pyproject.toml && \
pip install --no-cache-dir .
+# Copy Source Code
+COPY . .
+
+# Set the PYTHONPATH environment variable in order for it to find the Django app.
+ENV PYTHONPATH=/app/src:$PYTHONPATH
+
# Run the Application
# There are more arguments required for the application to run,
# but these should be passed in through the docker-compose.yml file.
ARG PORT
EXPOSE ${PORT}
-ENTRYPOINT ["khoj"]
+ENTRYPOINT ["python3", "src/khoj/main.py"]
diff --git a/LICENSE b/LICENSE
index 94a04532..0ad25db4 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,23 +1,21 @@
- GNU GENERAL PUBLIC LICENSE
- Version 3, 29 June 2007
+ GNU AFFERO GENERAL PUBLIC LICENSE
+ Version 3, 19 November 2007
- Copyright (C) 2007 Free Software Foundation, Inc.
+ Copyright (C) 2007 Free Software Foundation, Inc.
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
- The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
+ The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
-the GNU General Public License is intended to guarantee your freedom to
+our General Public Licenses are intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
-software for all its users. We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors. You can apply it to
-your programs, too.
+software for all its users.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
@@ -26,44 +24,34 @@ them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
- To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights. Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
+ Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received. You must make sure that they, too, receive
-or can get the source code. And you must show them these terms so they
-know their rights.
+ A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate. Many developers of free software are heartened and
+encouraged by the resulting cooperation. However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
- Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
+ The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community. It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server. Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
- For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software. For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
- Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so. This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software. The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable. Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products. If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
- Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary. To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
+ An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals. This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
The precise terms and conditions for copying, distribution and
modification follow.
@@ -72,7 +60,7 @@ modification follow.
0. Definitions.
- "This License" refers to version 3 of the GNU General Public License.
+ "This License" refers to version 3 of the GNU Affero General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
@@ -549,35 +537,45 @@ to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
- 13. Use with the GNU Affero General Public License.
+ 13. Remote Network Interaction; Use with the GNU General Public License.
+
+ Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software. This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
+under version 3 of the GNU General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
+the GNU Affero General Public License from time to time. Such new versions
+will be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU General
+Program specifies that a certain numbered version of the GNU Affero General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
+GNU Affero General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
+versions of the GNU Affero General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
@@ -619,3 +617,45 @@ Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published
+ by the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source. For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code. There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+.
diff --git a/docker-compose.yml b/docker-compose.yml
index bc3da2a9..365d2572 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,7 +1,29 @@
version: "3.9"
services:
+ database:
+ image: ankane/pgvector
+ ports:
+ - "5432:5432"
+ environment:
+ POSTGRES_USER: postgres
+ POSTGRES_PASSWORD: postgres
+ POSTGRES_DB: postgres
+ volumes:
+ - khoj_db:/var/lib/postgresql/data/
+ healthcheck:
+ test: ["CMD-SHELL", "pg_isready -U postgres"]
+ interval: 30s
+ timeout: 10s
+ retries: 5
server:
+ depends_on:
+ database:
+ condition: service_healthy
+ # Use the following line to use the latest version of khoj. Otherwise, it will build from source.
image: ghcr.io/khoj-ai/khoj:latest
+ # Uncomment the following line to build from source. This will take a few minutes. Comment the next two lines out if you want to use the offiicial image.
+ # build:
+ # context: .
ports:
# If changing the local port (left hand side), no other changes required.
# If changing the remote port (right hand side),
@@ -10,26 +32,23 @@ services:
- "42110:42110"
working_dir: /app
volumes:
- - .:/app
- # These mounted volumes hold the raw data that should be indexed for search.
- # The path in your local directory (left hand side)
- # points to the files you want to index.
- # The path of the mounted directory (right hand side),
- # must match the path prefix in your config file.
- - ./tests/data/org/:/data/org/
- - ./tests/data/images/:/data/images/
- - ./tests/data/markdown/:/data/markdown/
- - ./tests/data/pdf/:/data/pdf/
- # Embeddings and models are populated after the first run
- # You can set these volumes to point to empty directories on host
- - ./tests/data/embeddings/:/root/.khoj/content/
- - ./tests/data/models/:/root/.khoj/search/
- khoj_config:/root/.khoj/
- - sentence_tranformer_models:/root/.cache/torch/sentence_transformers
+ - khoj_models:/root/.cache/torch/sentence_transformers
# Use 0.0.0.0 to explicitly set the host ip for the service on the container. https://pythonspeed.com/articles/docker-connection-refused/
- command: --host="0.0.0.0" --port=42110 -vv
+ environment:
+ - POSTGRES_DB=postgres
+ - POSTGRES_USER=postgres
+ - POSTGRES_PASSWORD=postgres
+ - POSTGRES_HOST=database
+ - POSTGRES_PORT=5432
+ - KHOJ_DJANGO_SECRET_KEY=secret
+ - KHOJ_DEBUG=True
+ - KHOJ_ADMIN_EMAIL=username@example.com
+ - KHOJ_ADMIN_PASSWORD=password
+ command: --host="0.0.0.0" --port=42110 -vv --anonymous-mode
volumes:
khoj_config:
- sentence_tranformer_models:
+ khoj_db:
+ khoj_models:
diff --git a/docs/README.md b/docs/README.md
index 06d026a4..04a2226a 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -9,7 +9,7 @@
-An AI personal assistant for your digital brain
+An AI copilot for your Second Brain
@@ -24,30 +24,29 @@
## Introduction
-Welcome to the Khoj Docs! This is the best place to [get started](./setup.md) with Khoj.
+Welcome to the Khoj Docs! This is the best place to get setup and explore Khoj's features.
-- Khoj is a desktop application to [search](./search.md) and [chat](./chat.md) with your notes, documents and images
-- It is an offline-first, open source AI personal assistant accessible from your [Emacs](./emacs.md), [Obsidian](./obsidian.md) or [Web browser](./web.md)
-- It works with jpeg, markdown, [notion](./notion_integration.md) org-mode, pdf files and [github repositories](./github_integration.md)
-- If you have more questions, check out the [FAQ](https://faq.khoj.dev/) - it's a live Khoj instance indexing our Github repository!
+- Khoj is an open source, personal AI
+- You can [chat](chat.md) with it about anything. When relevant, it'll use any notes or documents you shared with it to respond
+- Quickly [find](search.md) relevant notes and documents using natural language
+- It understands pdf, plaintext, markdown, org-mode files, [notion pages](notion_integration.md) and [github repositories](github_integration.md)
+- Access it from your [Emacs](emacs.md), [Obsidian](obsidian.md), [Web browser](web.md) or the [Khoj Desktop app](desktop.md)
+- You can self-host Khoj on your consumer hardware or share it with your family, friends or team from your private cloud
## Quickstart
-[Click here](./setup.md) for full setup instructions
-
-```shell
-pip install khoj-assistant && khoj
-```
+- [Try Khoj Cloud](https://app.khoj.dev) to get started quickly
+- [Read these instructions](./setup.md) to self-host a private instance of Khoj
## Overview
-#### [Search](./search.md)
- - **Local**: Your personal data stays local. All search and indexing is done on your machine.
+#### [Search](search.md)
+ - **Natural**: Use natural language queries to quickly find relevant notes and documents.
- **Incremental**: Incremental search for a fast, search-as-you-type experience
-#### [Chat](./chat.md)
+#### [Chat](chat.md)
- **Faster answers**: Find answers faster, smoother than search. No need to manually scan through your notes to find answers.
- **Iterative discovery**: Iteratively explore and (re-)discover your notes
- **Assisted creativity**: Smoothly weave across answers retrieval and content generation
diff --git a/docs/_sidebar.md b/docs/_sidebar.md
index 9e0b8849..348b785a 100644
--- a/docs/_sidebar.md
+++ b/docs/_sidebar.md
@@ -1,12 +1,13 @@
- Get Started
- [Overview](README.md)
- - [Install](setup.md)
+ - [Self-Host](setup.md)
- [Demos](demos.md)
- Use
- [Features](features.md)
- [Chat](chat.md)
- [Search](search.md)
- - Interfaces
+ - Clients
+ - [Desktop](desktop.md)
- [Obsidian](obsidian.md)
- [Emacs](emacs.md)
- [Web](web.md)
diff --git a/docs/advanced.md b/docs/advanced.md
index a567783f..95dacf30 100644
--- a/docs/advanced.md
+++ b/docs/advanced.md
@@ -1,63 +1,11 @@
## Advanced Usage
-### Search across Different Languages
+
+### Search across Different Languages (Self-Hosting)
To search for notes in multiple, different languages, you can use a [multi-lingual model](https://www.sbert.net/docs/pretrained_models.html#multi-lingual-models).
For example, the [paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) supports [50+ languages](https://www.sbert.net/docs/pretrained_models.html#:~:text=we%20used%20the%20following%2050%2B%20languages), has good search quality and speed. To use it:
-1. Manually update `search-type > asymmetric > encoder` to `paraphrase-multilingual-MiniLM-L12-v2` in your `~/.khoj/khoj.yml` file for now. See diff of `khoj.yml` below for illustration:
-
- ```diff
- asymmetric:
- - encoder: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
- + encoder: paraphrase-multilingual-MiniLM-L12-v2
- cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
- model_directory: "~/.khoj/search/asymmetric/"
- ```
-
-2. Regenerate your content index. For example, by opening [\/api/update?t=force](http://localhost:42110/api/update?t=force)
-
-### Access Khoj on Mobile
-1. [Setup Khoj](/#/setup) on your personal server. This can be any always-on machine, i.e an old computer, RaspberryPi(?) etc
-2. [Install](https://tailscale.com/kb/installation/) [Tailscale](tailscale.com/) on your personal server and phone
-3. Open the Khoj web interface of the server from your phone browser. It should be `http://tailscale-ip-of-server:42110` or `http://name-of-server:42110` if you've setup [MagicDNS](https://tailscale.com/kb/1081/magicdns/)
-4. Click the [Add to Homescreen](https://developer.mozilla.org/en-US/docs/Web/Progressive_web_apps/Add_to_home_screen) button
-5. Enjoy exploring your notes, documents and images from your phone!
-
-![](./assets/khoj_pwa_android.png?)
-
-### Use OpenAI Models for Search
-#### Setup
-1. Set `encoder-type`, `encoder` and `model-directory` under `asymmetric` and/or `symmetric` `search-type` in your `khoj.yml` (at `~/.khoj/khoj.yml`):
- ```diff
- asymmetric:
- - encoder: "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
- + encoder: text-embedding-ada-002
- + encoder-type: khoj.utils.models.OpenAI
- cross-encoder: "cross-encoder/ms-marco-MiniLM-L-6-v2"
- - encoder-type: sentence_transformers.SentenceTransformer
- - model_directory: "~/.khoj/search/asymmetric/"
- + model-directory: null
- ```
-2. [Setup your OpenAI API key in Khoj](/#/chat?id=setup)
-3. Restart Khoj server to generate embeddings. It will take longer than with the offline search models.
-
-#### Warnings
- This configuration *uses an online model*
- - It will **send all notes to OpenAI** to generate embeddings
- - **All queries will be sent to OpenAI** when you search with Khoj
- - You will be **charged by OpenAI** based on the total tokens processed
- - It *requires an active internet connection* to search and index
-
-### Bootstrap Khoj Search for Offline Usage later
-
-You can bootstrap Khoj pre-emptively to run on machines that do not have internet access. An example use-case would be to run Khoj on an air-gapped machine.
-Note: *Only search can currently run in fully offline mode, not chat.*
-
-- With Internet
- 1. Manually download the [asymmetric text](https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1), [symmetric text](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) and [image search](https://huggingface.co/sentence-transformers/clip-ViT-B-32) models from HuggingFace
- 2. Pip install khoj (and dependencies) in an associated virtualenv. E.g `python -m venv .venv && source .venv/bin/activate && pip install khoj-assistant`
-- Without Internet
- 1. Copy each of the search models into their respective folders, `asymmetric`, `symmetric` and `image` under the `~/.khoj/search/` directory on the air-gapped machine
- 2. Copy the khoj virtual environment directory onto the air-gapped machine, activate the environment and start and khoj as normal. E.g `source .venv/bin/activate && khoj`
+1. Manually update the search config in server's admin settings page. Go to [the search config](http://localhost:42110/server/admin/database/searchmodelconfig/). Either create a new one, if none exists, or update the existing one. Set the bi_encoder to `sentence-transformers/multi-qa-MiniLM-L6-cos-v1` and the cross_encoder to `cross-encoder/ms-marco-MiniLM-L-6-v2`.
+2. Regenerate your content index from all the relevant clients. This step is very important, as you'll need to re-encode all your content with the new model.
### Query Filters
diff --git a/docs/assets/khoj_chat_on_desktop.png b/docs/assets/khoj_chat_on_desktop.png
new file mode 100644
index 00000000..e8c10718
Binary files /dev/null and b/docs/assets/khoj_chat_on_desktop.png differ
diff --git a/docs/assets/khoj_search_on_desktop.png b/docs/assets/khoj_search_on_desktop.png
new file mode 100644
index 00000000..1dc3231d
Binary files /dev/null and b/docs/assets/khoj_search_on_desktop.png differ
diff --git a/docs/chat.md b/docs/chat.md
index 2efd7b1b..4ea64c3f 100644
--- a/docs/chat.md
+++ b/docs/chat.md
@@ -1,13 +1,13 @@
-### Khoj Chat
-#### Overview
+## Khoj Chat
+### Overview
- Creates a personal assistant for you to inquire and engage with your notes
- You can choose to use Online or Offline Chat depending on your requirements
- Supports multi-turn conversations with the relevant notes for context
- Shows reference notes used to generate a response
-### Setup
+### Setup (Self-Hosting)
#### Offline Chat
-Offline chat stays completely private and works without internet. But it is slower, lower quality and more compute intensive.
+Offline chat stays completely private and works without internet using open-source models.
> **System Requirements**:
> - Minimum 8 GB RAM. Recommend **16Gb VRAM**
@@ -15,9 +15,10 @@ Offline chat stays completely private and works without internet. But it is slow
> - A CPU supporting [AVX or AVX2 instructions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) is required
> - A Mac M1+ or [Vulcan supported GPU](https://vulkan.gpuinfo.org/) should significantly speed up chat response times
-- Open your [Khoj settings](http://localhost:42110/config/) and click *Enable* on the Offline Chat card
+1. Open your [Khoj offline settings](http://localhost:42110/server/admin/database/offlinechatprocessorconversationconfig/) and click *Enable* on the Offline Chat configuration.
+2. Open your [Chat model options](http://localhost:42110/server/admin/database/chatmodeloptions/) and add a new option for the offline chat model you want to use. Make sure to use `Offline` as its type. We currently only support offline models that use the [Llama chat prompt](https://replicate.com/blog/how-to-prompt-llama#wrap-user-input-with-inst-inst-tags) format. We recommend using `mistral-7b-instruct-v0.1.Q4_0.gguf`.
-![Configure offline chat](https://user-images.githubusercontent.com/6413477/257021364-8a2029f5-dc21-4de8-9af9-9ba6100d695c.mp4 ':include :type=mp4')
+!> **Note**: Offline chat is not supported for a multi-user scenario. The host machine will encounter segmentation faults if multiple users try to use offline chat at the same time.
#### Online Chat
Online chat requires internet to use ChatGPT but is faster, higher quality and less compute intensive.
@@ -25,14 +26,12 @@ Online chat requires internet to use ChatGPT but is faster, higher quality and l
!> **Warning**: This will enable Khoj to send your chat queries and query relevant notes to OpenAI for processing
1. Get your [OpenAI API Key](https://platform.openai.com/account/api-keys)
-2. Open your [Khoj Online Chat settings](http://localhost:42110/config/processor/conversation), add your OpenAI API key, and click *Save*. Then go to your [Khoj settings](http://localhost:42110/config) and click `Configure`. This will refresh Khoj with your OpenAI API key.
-
-![Configure online chat](https://user-images.githubusercontent.com/6413477/256998908-ac26e55e-13a2-45fb-9348-3b90a62f7687.mp4 ':include :type=mp4')
-
+2. Open your [Khoj Online Chat settings](http://localhost:42110/server/admin/database/openaiprocessorconversationconfig/). Add a new setting with your OpenAI API key, and click *Save*. Only one configuration will be used, so make sure that's the only one you have.
+3. Open your [Chat model options](http://localhost:42110/server/admin/database/chatmodeloptions/) and add a new option for the OpenAI chat model you want to use. Make sure to use `OpenAI` as its type.
### Use
1. Open Khoj Chat
- - **On Web**: Open [/chat](http://localhost:42110/chat) in your web browser
+ - **On Web**: Open [/chat](https://app.khoj.dev/chat) in your web browser
- **On Obsidian**: Search for *Khoj: Chat* in the [Command Palette](https://help.obsidian.md/Plugins/Command+palette)
- **On Emacs**: Run `M-x khoj `
2. Enter your queries to chat with Khoj. Use [slash commands](#commands) and [query filters](./advanced.md#query-filters) to change what Khoj uses to respond
diff --git a/docs/desktop.md b/docs/desktop.md
new file mode 100644
index 00000000..a28352db
--- /dev/null
+++ b/docs/desktop.md
@@ -0,0 +1,23 @@
+
Desktop
+
+> An AI copilot for your Second Brain
+
+## Features
+- **Chat**
+ - **Faster answers**: Find answers quickly, from your private notes or the public internet
+ - **Assisted creativity**: Smoothly weave across retrieving answers and generating content
+ - **Iterative discovery**: Iteratively explore and re-discover your notes
+- **Search**
+ - **Natural**: Advanced natural language understanding using Transformer based ML Models
+ - **Incremental**: Incremental search for a fast, search-as-you-type experience
+
+## Setup
+
+1. Install the [Khoj Desktop app](https://khoj.dev/downloads) for your OS
+2. Generate an API key on the [Khoj Web App](https://app.khoj.dev/config#clients)
+3. Set your Khoj API Key on the *Settings* page of the Khoj Desktop app
+4. [Optional] Add any files, folders you'd like Khoj to be aware of on the *Settings* page and Click *Save*
+
+## Interface
+![](./assets/khoj_chat_on_desktop.png ':size=600px')
+![](./assets/khoj_search_on_desktop.png ':size=600px')
diff --git a/docs/desktop_installation.md b/docs/desktop_installation.md
index d79a282f..42a89383 100644
--- a/docs/desktop_installation.md
+++ b/docs/desktop_installation.md
@@ -28,5 +28,5 @@ For the Linux installation, you have to have `glibc` version 2.35 or higher. You
If you decide you want to uninstall the application, you can uninstall it like any other application on your system. For example, on MacOS, you can drag the application to the trash. On Windows, you can uninstall it from the `Add or Remove Programs` menu. On Linux, you can uninstall it with `sudo apt remove khoj`.
In addition to that, you might want to `rm -rf` the following directories:
-- `~/.khoj`
-- `~/.cache/gpt4all`
+ - `~/.khoj`
+ - `~/.cache/gpt4all`
diff --git a/docs/development.md b/docs/development.md
index dd1aad46..0d715dc4 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -25,13 +25,7 @@ pip install -e .'[dev]'
khoj -vv
```
2. Configure Khoj
- - **Via the Settings UI**: Add files, directories to index the [Khoj settings](http://localhost:42110/config) UI once Khoj has started up. Once you've saved all your settings, click `Configure`.
- - **Manually**:
- - Copy the `config/khoj_sample.yml` to `~/.khoj/khoj.yml`
- - Set `input-files` or `input-filter` in each relevant `content-type` section of `~/.khoj/khoj.yml`
- - Set `input-directories` field in `image` `content-type` section
- - Delete `content-type` and `processor` sub-section(s) irrelevant for your use-case
- - Restart khoj
+ - **Via the Desktop application**: Add files, directories to index using the settings page of your desktop application. Click "Save" to immediately trigger indexing.
Note: Wait after configuration for khoj to Load ML model, generate embeddings and expose API to query notes, images, documents etc specified in config YAML
diff --git a/docs/emacs.md b/docs/emacs.md
index 6492ecc4..d8e7e682 100644
--- a/docs/emacs.md
+++ b/docs/emacs.md
@@ -1,6 +1,6 @@
-
Emacs
+
Emacs
-> An AI personal assistance for your digital brain
+> An AI copilot for your Second Brain in Emacs
@@ -10,14 +10,13 @@
## Features
+- **Chat**
+ - **Faster answers**: Find answers quickly, from your private notes or the public internet
+ - **Assisted creativity**: Smoothly weave across retrieving answers and generating content
+ - **Iterative discovery**: Iteratively explore and re-discover your notes
- **Search**
- **Natural**: Advanced natural language understanding using Transformer based ML Models
- - **Local**: Your personal data stays local. All search, indexing is done on your machine*
- **Incremental**: Incremental search for a fast, search-as-you-type experience
-- **Chat**
- - **Faster answers**: Find answers faster than search
- - **Iterative discovery**: Iteratively explore and (re-)discover your notes
- - **Assisted creativity**: Smoothly weave across answer retrieval and content generation
## Interface
#### Search
@@ -27,79 +26,76 @@
![khoj chat on emacs](./assets/khoj_chat_on_emacs.png ':size=400px')
## Setup
-- *Make sure [python](https://realpython.com/installing-python/) and [pip](https://pip.pypa.io/en/stable/installation/) are installed on your machine*
+1. Generate an API key on the [Khoj Web App](https://app.khoj.dev/config#clients)
+2. Add below snippet to your Emacs config file, usually at `~/.emacs.d/init.el`
-- *khoj.el attempts to automatically install, start and configure the khoj server.*
- If this fails, follow [these instructions](/setup) to manually setup the khoj server.
-### Direct Install
+
+
+#### **Direct Install**
+*Khoj will index your org-agenda files, by default*
+
```elisp
+;; Install Khoj.el
M-x package-install khoj
+
+; Set your Khoj API key
+(setq khoj-api-key "YOUR_KHOJ_CLOUD_API_KEY")
```
-### Minimal Install
-Add below snippet to your Emacs config file.
-Indexes your org-agenda files, by default.
+#### **Minimal Install**
+*Khoj will index your org-agenda files, by default*
```elisp
- ;; Install Khoj Package from MELPA Stable
- (use-package khoj
- :ensure t
- :pin melpa-stable
- :bind ("C-c s" . 'khoj))
-```
-
-- Note: Install `khoj.el` from MELPA (instead of MELPA Stable) if you installed the pre-release version of khoj
- - That is, use `:pin melpa` to install khoj.el in above snippet if khoj server was installed with `--pre` flag, i.e `pip install --pre khoj-assistant`
- - Else use `:pin melpa-stable` to install khoj.el in above snippet if khoj was installed with `pip install khoj-assistant`
- - This ensures both khoj.el and khoj app are from the same version (git tagged or latest)
-
-### Standard Install
- Add below snippet to your Emacs config file.
- Indexes the specified org files, directories. Sets up OpenAI API key for Khoj Chat
-
-```elisp
-;; Install Khoj Package from MELPA Stable
+;; Install Khoj client from MELPA Stable
(use-package khoj
:ensure t
:pin melpa-stable
:bind ("C-c s" . 'khoj)
- :config (setq khoj-org-directories '("~/docs/org-roam" "~/docs/notes")
- khoj-org-files '("~/docs/todo.org" "~/docs/work.org")
- khoj-openai-api-key "YOUR_OPENAI_API_KEY")) ; required to enable chat
+ :config (setq khoj-api-key "YOUR_KHOJ_CLOUD_API_KEY"))
```
-### With [Straight.el](https://github.com/raxod502/straight.el)
-Add below snippet to your Emacs config file.
-Indexes the specified org files, directories. Sets up OpenAI API key for Khoj Chat
+#### **Standard Install**
+*Configures the specified org files, directories to be indexed by Khoj*
```elisp
- ;; Install Khoj Package using Straight.el
- (use-package khoj
- :after org
- :straight (khoj :type git :host github :repo "khoj-ai/khoj" :files (:defaults "src/interface/emacs/khoj.el"))
- :bind ("C-c s" . 'khoj)
- :config (setq khoj-org-directories '("~/docs/org-roam" "~/docs/notes")
- khoj-org-files '("~/docs/todo.org" "~/docs/work.org")
- khoj-openai-api-key "YOUR_OPENAI_API_KEY" ; required to enable chat)
- ```
+;; Install Khoj client from MELPA Stable
+(use-package khoj
+ :ensure t
+ :pin melpa-stable
+ :bind ("C-c s" . 'khoj)
+ :config (setq khoj-api-key "YOUR_KHOJ_CLOUD_API_KEY"
+ khoj-org-directories '("~/docs/org-roam" "~/docs/notes")
+ khoj-org-files '("~/docs/todo.org" "~/docs/work.org")))
+```
+#### **Straight.el**
+*Configures the specified org files, directories to be indexed by Khoj*
+
+```elisp
+;; Install Khoj client using Straight.el
+(use-package khoj
+ :after org
+ :straight (khoj :type git :host github :repo "khoj-ai/khoj" :files (:defaults "src/interface/emacs/khoj.el"))
+ :bind ("C-c s" . 'khoj)
+ :config (setq khoj-api-key "YOUR_KHOJ_CLOUD_API_KEY"
+ khoj-org-directories '("~/docs/org-roam" "~/docs/notes")
+ khoj-org-files '("~/docs/todo.org" "~/docs/work.org")))
+```
+
+
## Use
### Search
+See [Khoj Search](search.md) for details
1. Hit `C-c s s` (or `M-x khoj RET s`) to open khoj search
-
-2. Enter your query in natural language
-
- e.g "What is the meaning of life?", "My life goals for 2023"
+2. Enter your query in natural language
+ E.g *"What is the meaning of life?"*, *"My life goals for 2023"*
### Chat
+See [Khoj Chat](chat.md) for details
1. Hit `C-c s c` (or `M-x khoj RET c`) to open khoj chat
-
-2. Ask questions in a natural, conversational style
-
- E.g "When did I file my taxes last year?"
-
- See [Khoj Chat](/#/chat) for more details
+2. Ask questions in a natural, conversational style
+ E.g *"When did I file my taxes last year?"*
### Find Similar Entries
This feature finds entries similar to the one you are currently on.
@@ -108,7 +104,6 @@ This feature finds entries similar to the one you are currently on.
### Advanced Usage
- Add [query filters](https://github.com/khoj-ai/khoj/#query-filters) during search to narrow down results further
-
e.g `What is the meaning of life? -"god" +"none" dt>"last week"`
- Use `C-c C-o 2` to open the current result at cursor in its source org file
@@ -121,31 +116,21 @@ This feature finds entries similar to the one you are currently on.
![](./assets/khoj_emacs_menu.png)
Hit `C-c s` (or `M-x khoj`) to open the khoj menu above. Then:
- Hit `t` until you preferred content type is selected in the khoj menu
-
`Content Type` specifies the content to perform `Search`, `Update` or `Find Similar` actions on
- Hit `n` twice and then enter number of results you want to see
-
`Results Count` is used by the `Search` and `Find Similar` actions
- Hit `-f u` to `force` update the khoj content index
-
The `Force Update` switch is only used by the `Update` action
## Upgrade
-### Upgrade Khoj Backend
-```bash
-pip install --upgrade khoj-assistant
-```
-### Upgrade Khoj.el
Use your Emacs package manager to upgrade `khoj.el`
+
-- For `khoj.el` from MELPA
- - Method 1
- - Run `M-x package-list-packages` to list all packages
- - Press `U` on `khoj` to mark it for upgrade
- - Press `x` to execute the marked actions
- - Method 2
- - Run `M-x package-refresh-content`
- - Run `M-x package-reinstall khoj`
+#### **With MELPA**
+1. Run `M-x package-refresh-content`
+2. Run `M-x package-reinstall khoj`
-- For `khoj.el` from Straight
- - Run `M-x straight-pull-package khoj`
+#### **With Straight.el**
+- Run `M-x straight-pull-package khoj`
+
+
diff --git a/docs/features.md b/docs/features.md
index 3bd8939f..f59e0657 100644
--- a/docs/features.md
+++ b/docs/features.md
@@ -1,10 +1,10 @@
## Features
-#### [Search](./search.md)
+#### [Search](search.md)
- **Local**: Your personal data stays local. All search and indexing is done on your machine.
- **Incremental**: Incremental search for a fast, search-as-you-type experience
-#### [Chat](./chat.md)
+#### [Chat](chat.md)
- **Faster answers**: Find answers faster, smoother than search. No need to manually scan through your notes to find answers.
- **Iterative discovery**: Iteratively explore and (re-)discover your notes
- **Assisted creativity**: Smoothly weave across answers retrieval and content generation
diff --git a/docs/github_integration.md b/docs/github_integration.md
index 6b8dce48..b7c8a4fe 100644
--- a/docs/github_integration.md
+++ b/docs/github_integration.md
@@ -1,14 +1,14 @@
-# Setup the Github integration
+# 🧑🏾💻 Setup the Github integration
The Github integration allows you to index as many repositories as you want. It's currently default configured to index Issues, Commits, and all Markdown/Org files in each repository. For large repositories, this takes a fairly long time, but it works well for smaller projects.
# Configure your settings
-1. Go to [http://localhost:42110/config](http://localhost:42110/config) and enter in settings for the data sources you want to index. You'll have to specify the file paths.
+1. Go to [https://app.khoj.dev/config](https://app.khoj.dev/config) and enter in settings for the data sources you want to index. You'll have to specify the file paths.
## Use the Github plugin
1. Generate a [classic PAT (personal access token)](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) from [Github](https://github.com/settings/tokens) with `repo` and `admin:org` scopes at least.
-2. Navigate to [http://localhost:42110/config/content_type/github](http://localhost:42110/config/content_type/github) to configure your Github settings. Enter in your PAT, along with details for each repository you want to index.
+2. Navigate to [https://app.khoj.dev/config/content-source/github](https://app.khoj.dev/config/content-source/github) to configure your Github settings. Enter in your PAT, along with details for each repository you want to index.
3. Click `Save`. Go back to the settings page and click `Configure`.
-4. Go to [http://localhost:42110/](http://localhost:42110/) and start searching!
+4. Go to [https://app.khoj.dev/](https://app.khoj.dev/) and start searching!
diff --git a/docs/index.html b/docs/index.html
index 33ba0735..5c1d3466 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -5,6 +5,15 @@
Document
+
+
+
+
+
+
+
+
+
@@ -17,11 +26,13 @@
repo: 'https://github.com/khoj-ai/khoj',
loadSidebar: true,
themeColor: '#c2a600',
+ auto2top: true,
// coverpage: true,
}
+
diff --git a/docs/notion_integration.md b/docs/notion_integration.md
index 5fee7ff6..6a309d41 100644
--- a/docs/notion_integration.md
+++ b/docs/notion_integration.md
@@ -8,7 +8,7 @@ We haven't setup a fancy integration with OAuth yet, so this integration still r
![setup_new_integration](https://github.com/khoj-ai/khoj/assets/65192171/b056e057-d4dc-47dc-aad3-57b59a22c68b)
3. Share all the workspaces that you want to integrate with the Khoj integration you just made in the previous step
![enable_workspace](https://github.com/khoj-ai/khoj/assets/65192171/98290303-b5b8-4cb0-b32c-f68c6923a3d0)
-4. In the first step, you generated an API key. Use the newly generated API Key in your Khoj settings, by default at http://localhost:42110/config/content_type/notion. Click `Save`.
-5. Click `Configure` in http://localhost:42110/config to index your Notion workspace(s).
+4. In the first step, you generated an API key. Use the newly generated API Key in your Khoj settings, by default at https://app.khoj.dev/config/content-source/notion. Click `Save`.
+5. Click `Configure` in https://app.khoj.dev/config to index your Notion workspace(s).
That's it! You should be ready to start searching and chatting. Make sure you've configured your OpenAI API Key for chat.
diff --git a/docs/obsidian.md b/docs/obsidian.md
index 10e65ba1..478c4c60 100644
--- a/docs/obsidian.md
+++ b/docs/obsidian.md
@@ -1,16 +1,15 @@
-
Obsidian
+
Obsidian
-> An AI personal assistant for your Digital Brain in Obsidian
+> An AI copilot for your Second Brain in Obsidian
## Features
+- **Chat**
+ - **Faster answers**: Find answers quickly, from your private notes or the public internet
+ - **Assisted creativity**: Smoothly weave across retrieving answers and generating content
+ - **Iterative discovery**: Iteratively explore and re-discover your notes
- **Search**
- **Natural**: Advanced natural language understanding using Transformer based ML Models
- - **Local**: Your personal data stays local. All search and indexing is done on your machine. *Unlike chat which requires access to GPT.*
- **Incremental**: Incremental search for a fast, search-as-you-type experience
-- **Chat**
- - **Faster answers**: Find answers faster and with less effort than search
- - **Iterative discovery**: Iteratively explore and (re-)discover your notes
- - **Assisted creativity**: Smoothly weave across answers retrieval and content generation
## Interface
![](./assets/khoj_search_on_obsidian.png ':size=400px')
@@ -18,102 +17,37 @@
## Setup
-- *Make sure [python](https://realpython.com/installing-python/) and [pip](https://pip.pypa.io/en/stable/installation/) are installed on your machine*
-- *Ensure you follow the ordering of the setup steps. Install the plugin after starting the khoj backend. This allows the plugin to configure the khoj backend*
-### 1. Setup Backend
-Open terminal/cmd and run below command to install and start the khoj backend
-- On Linux/MacOS
- ```shell
- python -m pip install khoj-assistant && khoj
- ```
-
-- On Windows
- ```shell
- py -m pip install khoj-assistant && khoj
- ```
-
-### 2. Setup Plugin
1. Open [Khoj](https://obsidian.md/plugins?id=khoj) from the *Community plugins* tab in Obsidian settings panel
2. Click *Install*, then *Enable* on the Khoj plugin page in Obsidian
- 3. [Optional] To enable Khoj Chat, set your [OpenAI API key](https://platform.openai.com/account/api-keys) in the Khoj plugin settings
+ 3. Generate an API key on the [Khoj Web App](https://app.khoj.dev/config#clients)
+ 4. Set your Khoj API Key in the Khoj plugin settings in Obsidian
-See [official Obsidian plugin docs](https://help.obsidian.md/Extending+Obsidian/Community+plugins) for details
+See the official [Obsidian Plugin Docs](https://help.obsidian.md/Extending+Obsidian/Community+plugins) for more details on installing Obsidian plugins.
## Use
### Chat
Run *Khoj: Chat* from the [Command Palette](https://help.obsidian.md/Plugins/Command+palette) and ask questions in a natural, conversational style.
-E.g "When did I file my taxes last year?"
-
-Notes:
-- *Using Khoj Chat will result in query relevant notes being shared with OpenAI for ChatGPT to respond.*
-- *To use Khoj Chat, ensure you've set your [OpenAI API key](https://platform.openai.com/account/api-keys) in the Khoj plugin settings.*
+E.g *"When did I file my taxes last year?"*
See [Khoj Chat](/chat) for more details
-### Search
-Click the *Khoj search* icon 🔎 on the [Ribbon](https://help.obsidian.md/User+interface/Workspace/Ribbon) or run *Khoj: Search* from the [Command Palette](https://help.obsidian.md/Plugins/Command+palette)
-
-*Note: Ensure the khoj server is running in the background before searching. Execute `khoj` in your terminal if it is not already running*
-
-[search_demo](https://user-images.githubusercontent.com/6413477/218801155-cd67e8b4-a770-404a-8179-d6b61caa0f93.mp4 ':include :type=mp4')
-
-#### Query Filters
-
-Use structured query syntax to filter the natural language search results
-- **Word Filter**: Get entries that include/exclude a specified term
- - Entries that contain term_to_include: `+"term_to_include"`
- - Entries that contain term_to_exclude: `-"term_to_exclude"`
-- **Date Filter**: Get entries containing dates in YYYY-MM-DD format from specified date (range)
- - Entries from April 1st 1984: `dt:"1984-04-01"`
- - Entries after March 31st 1984: `dt>="1984-04-01"`
- - Entries before April 2nd 1984 : `dt<="1984-04-01"`
-- **File Filter**: Get entries from a specified file
- - Entries from incoming.org file: `file:"incoming.org"`
-- Combined Example
- - `what is the meaning of life? file:"1984.org" dt>="1984-01-01" dt<="1985-01-01" -"big" -"brother"`
- - Adds all filters to the natural language query. It should return entries
- - from the file *1984.org*
- - containing dates from the year *1984*
- - excluding words *"big"* and *"brother"*
- - that best match the natural language query *"what is the meaning of life?"*
-
### Find Similar Notes
To see other notes similar to the current one, run *Khoj: Find Similar Notes* from the [Command Palette](https://help.obsidian.md/Plugins/Command+palette)
+### Search
+Click the *Khoj search* icon 🔎 on the [Ribbon](https://help.obsidian.md/User+interface/Workspace/Ribbon) or run *Khoj: Search* from the [Command Palette](https://help.obsidian.md/Plugins/Command+palette)
+
+See [Khoj Search](/search) for more details. Use [query filters](/advanced#query-filters) to limit entries to search
+
+[search_demo](https://user-images.githubusercontent.com/6413477/218801155-cd67e8b4-a770-404a-8179-d6b61caa0f93.mp4 ':include :type=mp4')
+
## Upgrade
-### 1. Upgrade Backend
- ```shell
- pip install --upgrade khoj-assistant
- ```
-### 2. Upgrade Plugin
1. Open *Community plugins* tab in Obsidian settings
2. Click the *Check for updates* button
3. Click the *Update* button next to Khoj, if available
-## Demo
-### Search Demo
-[demo](https://github-production-user-asset-6210df.s3.amazonaws.com/6413477/240061700-3e33d8ea-25bb-46c8-a3bf-c92f78d0f56b.mp4 ':include :type=mp4')
-
-#### Description
-
-1. Install Khoj via `pip` and start Khoj backend
- ```shell
- python -m pip install khoj-assistant && khoj
- ```
-2. Install Khoj plugin via Community Plugins settings pane on Obsidian app
- - Check the new Khoj plugin settings
- - Wait for Khoj backend to index markdown, PDF files in the current Vault
- - Open Khoj plugin on Obsidian via Search button on Left Pane
- - Search \"*Announce plugin to folks*\" in the [Obsidian Plugin docs](https://marcus.se.net/obsidian-plugin-docs/)
- - Jump to the [search result](https://marcus.se.net/obsidian-plugin-docs/publishing/submit-your-plugin)
-
-
## Troubleshooting
- Open the Khoj plugin settings pane, to configure Khoj
- Toggle Enable/Disable Khoj, if setting changes have not applied
- Click *Update* button to force index to refresh, if results are failing or stale
-
-## Current Limitations
-- The plugin loads the index of only one vault at a time.
- So notes across multiple vaults **cannot** be searched at the same time
diff --git a/docs/search.md b/docs/search.md
index 579034ec..f2387f06 100644
--- a/docs/search.md
+++ b/docs/search.md
@@ -1,7 +1,7 @@
## Khoj Search
### Use
1. Open Khoj Search
- - **On Web**: Open in your web browser
+ - **On Web**: Open in your web browser
- **On Obsidian**: Click the *Khoj search* icon 🔎 on the [Ribbon](https://help.obsidian.md/User+interface/Workspace/Ribbon) or Search for *Khoj: Search* in the [Command Palette](https://help.obsidian.md/Plugins/Command+palette)
- **On Emacs**: Run `M-x khoj `
2. Query using natural language to find relevant entries from your knowledge base. Use [query filters](./advanced.md#query-filters) to limit entries to search
diff --git a/docs/setup.md b/docs/setup.md
index 2e02d271..a1d2c17c 100644
--- a/docs/setup.md
+++ b/docs/setup.md
@@ -3,41 +3,15 @@ These are the general setup instructions for Khoj.
- Make sure [python](https://realpython.com/installing-python/) and [pip](https://pip.pypa.io/en/stable/installation/) are installed on your machine
- Check the [Khoj Emacs docs](/emacs?id=setup) to setup Khoj with Emacs
- Its simpler as it can skip the server *install*, *run* and *configure* step below.
+ It's simpler as it can skip the server *install*, *run* and *configure* step below.
- Check the [Khoj Obsidian docs](/obsidian?id=_2-setup-plugin) to setup Khoj with Obsidian
Its simpler as it can skip the *configure* step below.
-### 1. Install
+For Installation, you can either use Docker or install Khoj locally.
-#### 1.1 Local Server Setup
-Run the following command in your terminal to install the Khoj backend.
+### 1. Installation (Docker)
-- On Linux/MacOS
- ```shell
- python -m pip install khoj-assistant
- ```
-
-- On Windows
- ```shell
- py -m pip install khoj-assistant
- ```
-For more detailed Windows installation and troubleshooting, see [Windows Install](./windows_install.md).
-
-
-##### 1.1.1 Local Server Start
-
-Run the following command from your terminal to start the Khoj backend and open Khoj in your browser.
-
-```shell
-khoj
-```
-
-Khoj should now be running at http://localhost:42110. You can see the web UI in your browser.
-
-Note: To start Khoj automatically in the background use [Task scheduler](https://www.windowscentral.com/how-create-automated-task-using-task-scheduler-windows-10) on Windows or [Cron](https://en.wikipedia.org/wiki/Cron) on Mac, Linux (e.g with `@reboot khoj`)
-
-#### 1.2 Local Docker Setup
-Use the sample docker-compose [in Github](https://github.com/khoj-ai/khoj/blob/master/docker-compose.yml) to run Khoj in Docker. To start the container, run the following command in the same directory as the docker-compose.yml file. You'll have to configure the mounted directories to match your local knowledge base.
+Use the sample docker-compose [in Github](https://github.com/khoj-ai/khoj/blob/master/docker-compose.yml) to run Khoj in Docker. Start by configuring all the environment variables to your choosing. Your admin account will automatically be created based on the admin credentials in that file, so pay attention to those. To start the container, run the following command in the same directory as the docker-compose.yml file. This will automatically setup the database and run the Khoj server.
```shell
docker-compose up
@@ -45,27 +19,131 @@ docker-compose up
Khoj should now be running at http://localhost:42110. You can see the web UI in your browser.
-#### 1.3 Download the desktop client [Optional]
+### 1. Installation (Local)
-You can use our desktop executables to select file paths and folders to index. You can simply select the folders or files, and they'll be automatically uploaded to the server. Once you specify a file or file path, you don't need to update the configuration again; it will grab any data diffs dynamically over time. This part is currently optional, but may make setup and configuration slightly easier. It removes the need for setting up custom file paths for your Khoj data configurations.
+#### Prerequisites
-**To download the desktop client, go to https://download.khoj.dev** and the correct executable for your OS will automatically start downloading. Once downloaded, you can configure your folders for indexing using the settings tab. To set your chat configuration, you'll have to use the web interface for the Khoj server you setup in the previous step.
+##### Install Postgres (with PgVector)
-### 1.4 Use (deprecated) desktop builds
+Khoj uses the `pgvector` package to store embeddings of your index in a Postgres database. In order to use this, you need to have Postgres installed.
-Before `v0.12.0``, we had self-contained desktop builds that included both the server and the client. These were difficult to maintain, but are still available as part of earlier releases. To find setup instructions, see here:
+
-- [Desktop Installation](desktop_installation.md)
-- [Windows Installation](windows_install.md)
+#### **MacOS**
-### 2. Configure
-1. Set `File`, `Folder` and hit `Save` in each Plugins you want to enable for Search on the Khoj config page
-2. Add your OpenAI API key to Chat Feature settings if you want to use Chat
-3. Click `Configure` and wait. The app will download ML models and index the content for search and (optionally) chat
+Install [Postgres.app](https://postgresapp.com/). This comes pre-installed with `pgvector` and relevant dependencies.
-![configure demo](https://user-images.githubusercontent.com/6413477/255307879-61247d3f-c69a-46ef-b058-9bc533cb5c72.mp4 ':include :type=mp4')
+#### **Windows**
-### 3. Install Interface Plugins (Optional)
+Use the [recommended installer](https://www.postgresql.org/download/windows/)
+
+#### **Linux**
+From [official instructions](https://wiki.postgresql.org/wiki/Apt)
+
+```bash
+sudo apt install -y postgresql-common
+sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh
+sudo apt install postgres-16 postgresql-16-pgvector
+```
+
+##### **From Source**
+1. Follow instructions to [Install Postgres](https://www.postgresql.org/download/)
+2. Follow instructions to [Install PgVector](https://github.com/pgvector/pgvector#installation) in case you need to manually install it. Reproduced instructions below for convenience.
+
+```bash
+cd /tmp
+git clone --branch v0.5.1 https://github.com/pgvector/pgvector.git
+cd pgvector
+make
+make install # may need sudo
+```
+
+
+
+
+##### Create the Khoj database
+
+Make sure to update your environment variables to match your Postgres configuration if you're using a different name. The default values should work for most people.
+
+
+
+#### **MacOS**
+```bash
+createdb khoj -U postgres
+```
+
+#### **Windows**
+```bash
+createdb khoj -U postgres
+```
+
+#### **Linux**
+```bash
+sudo -u postgres createdb khoj
+```
+
+
+
+#### Install package
+
+##### Local Server Setup
+- *Make sure [python](https://realpython.com/installing-python/) and [pip](https://pip.pypa.io/en/stable/installation/) are installed on your machine*
+
+Run the following command in your terminal to install the Khoj backend.
+
+
+
+#### **MacOS**
+
+```shell
+python -m pip install khoj-assistant
+```
+
+#### **Windows**
+
+```shell
+py -m pip install khoj-assistant
+```
+For more detailed Windows installation and troubleshooting, see [Windows Install](./windows_install.md).
+
+#### **Linux**
+
+```shell
+python -m pip install khoj-assistant
+```
+
+
+
+##### Local Server Start
+
+Run the following command from your terminal to start the Khoj backend and open Khoj in your browser.
+
+```shell
+khoj --anonymous-mode
+```
+`--anonymous-mode` allows you to run the server without setting up Google credentials for login. This allows you to use any of the clients without a login wall. If you want to use Google login, you can skip this flag, but you will have to add your Google developer credentials.
+
+On the first run, you will be prompted to input credentials for your admin account and do some basic configuration for your chat model settings. Once created, you can go to http://localhost:42110/server/admin and login with the credentials you just created.
+
+Khoj should now be running at http://localhost:42110. You can see the web UI in your browser.
+
+Note: To start Khoj automatically in the background use [Task scheduler](https://www.windowscentral.com/how-create-automated-task-using-task-scheduler-windows-10) on Windows or [Cron](https://en.wikipedia.org/wiki/Cron) on Mac, Linux (e.g with `@reboot khoj`)
+
+
+### 2. Download the desktop client
+
+You can use our desktop executables to select file paths and folders to index. You can simply select the folders or files, and they'll be automatically uploaded to the server. Once you specify a file or file path, you don't need to update the configuration again; it will grab any data diffs dynamically over time.
+
+**To download the latest desktop client, go to https://download.khoj.dev** and the correct executable for your OS will automatically start downloading. Once downloaded, you can configure your folders for indexing using the settings tab. To set your chat configuration, you'll have to use the web interface for the Khoj server you setup in the previous step.
+
+To use the desktop client, you need to go to your Khoj server's settings page (http://localhost:42110/config) and copy the API key. Then, paste it into the desktop client's settings page. Once you've done that, you can select files and folders to index.
+
+### 3. Configure
+1. Go to http://localhost:42110/server/admin and login with your admin credentials. Go to the ChatModelOptions if you want to add additional models for chat.
+1. Select files and folders to index [using the desktop client](./setup.md?id=_2-download-the-desktop-client). When you click 'Save', the files will be sent to your server for indexing.
+ - Select Notion workspaces and Github repositories to index using the web interface.
+
+### 4. Install Client Plugins (Optional)
Khoj exposes a web interface to search, chat and configure by default.
The optional steps below allow using Khoj from within an existing application like Obsidian or Emacs.
@@ -75,9 +153,17 @@ The optional steps below allow using Khoj from within an existing application li
- **Khoj Emacs**:
[Install](/emacs?id=setup) khoj.el
+### 5. Use Khoj 🚀
+
+You can head to http://localhost:42110 to use the web interface. You can also use the desktop client to search and chat.
## Upgrade
### Upgrade Khoj Server
+
+
+
+#### **Local Setup**
+
```shell
pip install --upgrade khoj-assistant
```
@@ -88,6 +174,16 @@ pip install --upgrade khoj-assistant
pip install --upgrade --pre khoj-assistant
```
+#### **Docker**
+From the same directory where you have your `docker-compose` file, this will fetch the latest build and upgrade your server.
+
+```shell
+docker-compose up --build
+```
+
+
+
+
### Upgrade Khoj on Emacs
- Use your Emacs Package Manager to Upgrade
- See [khoj.el package setup](/emacs?id=setup) for details
@@ -100,8 +196,8 @@ pip install --upgrade --pre khoj-assistant
1. (Optional) Hit `Ctrl-C` in the terminal running the khoj server to stop it
2. Delete the khoj directory in your home folder (i.e `~/.khoj` on Linux, Mac or `C:\Users\\.khoj` on Windows)
5. You might want to `rm -rf` the following directories:
-- `~/.khoj`
-- `~/.cache/gpt4all`
+ - `~/.khoj`
+ - `~/.cache/gpt4all`
3. Uninstall the khoj server with `pip uninstall khoj-assistant`
4. (Optional) Uninstall khoj.el or the khoj obsidian plugin in the standard way on Emacs, Obsidian
diff --git a/docs/telemetry.md b/docs/telemetry.md
index 11490c3a..060bcdab 100644
--- a/docs/telemetry.md
+++ b/docs/telemetry.md
@@ -1,4 +1,4 @@
-# Telemetry
+# Telemetry (self-hosting)
We collect some high level, anonymized metadata about usage of Khoj. This includes:
- Client (Web, Emacs, Obsidian)
diff --git a/docs/web.md b/docs/web.md
index 90791c6e..21571f46 100644
--- a/docs/web.md
+++ b/docs/web.md
@@ -1,19 +1,18 @@
-