mirror of
https://github.com/khoj-ai/khoj.git
synced 2025-02-17 08:04:21 +00:00
Address merge conflicts from master branch
This commit is contained in:
commit
ea62d47aa5
41 changed files with 1599 additions and 515 deletions
27
.github/workflows/publish.yml
vendored
27
.github/workflows/publish.yml
vendored
|
@ -1,14 +1,16 @@
|
|||
name: publish
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
tags:
|
||||
- v*
|
||||
branches:
|
||||
- 'master'
|
||||
paths:
|
||||
- src/**
|
||||
- setup.py
|
||||
- .github/workflows/publish.yml
|
||||
push:
|
||||
pull_request:
|
||||
branches:
|
||||
- 'master'
|
||||
paths:
|
||||
|
@ -38,11 +40,16 @@ jobs:
|
|||
pip install --upgrade .
|
||||
|
||||
- name: Publish Release to PyPI
|
||||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
|
||||
if: startsWith(github.ref, 'refs/tags')
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_KEY }}
|
||||
run: |
|
||||
# Setup Environment for Reproducible Builds
|
||||
export PYTHONHASHSEED=42
|
||||
export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct)
|
||||
|
||||
# Build and Upload PyPi Package
|
||||
rm -rf dist
|
||||
python -m build
|
||||
twine check dist/*
|
||||
|
@ -54,7 +61,14 @@ jobs:
|
|||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_KEY }}
|
||||
run: |
|
||||
# Set Pre-Release Version
|
||||
sed -E -i "s/version=(.*)',/version=\1a$(date +%s)',/g" setup.py
|
||||
|
||||
# Setup Environment for Reproducible Builds
|
||||
export PYTHONHASHSEED=42
|
||||
export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct)
|
||||
|
||||
# Build and Upload PyPi Package
|
||||
rm -rf dist
|
||||
python -m build
|
||||
twine check dist/*
|
||||
|
@ -67,7 +81,14 @@ jobs:
|
|||
TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_KEY }}
|
||||
PULL_REQUEST_NUMBER: ${{ github.event.number }}
|
||||
run: |
|
||||
# Set Development Release Version
|
||||
sed -E -i "s/version=(.*)',/version=\1.dev$PULL_REQUEST_NUMBER$(date +%s)',/g" setup.py
|
||||
|
||||
# Setup Environment for Reproducible Builds
|
||||
export PYTHONHASHSEED=42
|
||||
export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct)
|
||||
|
||||
# Build and Upload PyPi Package
|
||||
rm -rf dist
|
||||
python -m build
|
||||
twine check dist/*
|
||||
|
|
117
.github/workflows/release.yml
vendored
Normal file
117
.github/workflows/release.yml
vendored
Normal file
|
@ -0,0 +1,117 @@
|
|||
name: release
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version Number'
|
||||
required: true
|
||||
type: string
|
||||
push:
|
||||
tags:
|
||||
- v*
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-latest
|
||||
extension: deb
|
||||
- os: macos-latest
|
||||
extension: dmg
|
||||
- os: windows-latest
|
||||
extension: exe
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python 3.9
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.9'
|
||||
|
||||
- name: Install Dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
sudo apt install libegl1 libxcb-xinerama0 python3-tk -y
|
||||
fi
|
||||
python -m pip install --upgrade pip
|
||||
pip install pyinstaller
|
||||
|
||||
- name: Install Khoj App
|
||||
run: |
|
||||
pip install --upgrade .
|
||||
|
||||
- name: Package Khoj App
|
||||
shell: bash
|
||||
run: |
|
||||
# Setup Environment for Reproducible Builds
|
||||
export PYTHONHASHSEED=42
|
||||
export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct)
|
||||
|
||||
pyinstaller --noconfirm Khoj.spec
|
||||
if [ "$RUNNER_OS" == "Windows" ]; then
|
||||
mv dist/Khoj.exe dist/khoj_"$GITHUB_REF_NAME"_amd64.exe
|
||||
fi
|
||||
|
||||
- name: Create Mac App DMG
|
||||
if: matrix.os == 'macos-latest'
|
||||
run: |
|
||||
# Install Mac DMG Creator
|
||||
brew install create-dmg
|
||||
# Copy app to separate dmg folder
|
||||
mkdir -p dist/dmg && cp -r dist/Khoj.app dist/dmg
|
||||
# Create disk image with the app
|
||||
create-dmg \
|
||||
--volname "Khoj" \
|
||||
--volicon "src/interface/web/assets/icons/favicon.icns" \
|
||||
--window-pos 200 120 \
|
||||
--window-size 600 300 \
|
||||
--icon-size 100 \
|
||||
--icon "Khoj.app" 175 120 \
|
||||
--hide-extension "Khoj.app" \
|
||||
--app-drop-link 425 120 \
|
||||
"dist/khoj_"$GITHUB_REF_NAME"_amd64.dmg" \
|
||||
"dist/dmg/"
|
||||
|
||||
- uses: ruby/setup-ruby@v1
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
with:
|
||||
ruby-version: '3.0'
|
||||
- name: Create Debian Package
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
shell: bash
|
||||
env:
|
||||
DEBIAN_PACKAGE_VERSION: ${{ inputs.version }}
|
||||
run: |
|
||||
# Install Debian Packager
|
||||
gem install fpm
|
||||
|
||||
# Copy app files into expected output directory structure
|
||||
mkdir -p package/opt package/usr/share/applications package/usr/share/icons/hicolor/128x128/apps
|
||||
cp -r dist/Khoj package/opt/Khoj
|
||||
cp src/interface/web/assets/icons/favicon-128x128.png package/usr/share/icons/hicolor/128x128/apps/Khoj.png
|
||||
cp Khoj.desktop package/usr/share/applications
|
||||
|
||||
# Fix permissions to be usable by non-root users
|
||||
find package/usr/share -type f -exec chmod 644 -- {} +
|
||||
chmod 755 package/opt/Khoj
|
||||
|
||||
# Package the app
|
||||
if [ -z "$DEBIAN_PACKAGE_VERSION" ]; then
|
||||
DEBIAN_PACKAGE_VERSION=$(echo $GITHUB_REF_NAME | sed -E 's/v(.*)/\1/g')
|
||||
fi
|
||||
fpm -C package -s dir -t deb -n Khoj --version $DEBIAN_PACKAGE_VERSION -p dist/khoj_"$GITHUB_REF_NAME"_amd64.deb
|
||||
|
||||
- uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: khoj_${{github.ref_name}}_amd64.${{matrix.extension}}
|
||||
path: dist/khoj_${{github.ref_name}}_amd64.${{matrix.extension}}
|
||||
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v1
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
files: dist/khoj_${{github.ref_name}}_amd64.${{matrix.extension}}
|
1
.github/workflows/test.yml
vendored
1
.github/workflows/test.yml
vendored
|
@ -34,6 +34,7 @@ jobs:
|
|||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo apt install libegl1 -y
|
||||
python -m pip install --upgrade pip
|
||||
pip install pytest
|
||||
|
||||
|
|
7
Khoj.desktop
Normal file
7
Khoj.desktop
Normal file
|
@ -0,0 +1,7 @@
|
|||
[Desktop Entry]
|
||||
Type=Application
|
||||
Name=Khoj
|
||||
Comment=A natural language search engine for your personal notes, transactions and images.
|
||||
Path=/opt
|
||||
Exec=/opt/Khoj
|
||||
Icon=Khoj
|
115
Khoj.spec
Normal file
115
Khoj.spec
Normal file
|
@ -0,0 +1,115 @@
|
|||
# -*- mode: python ; coding: utf-8 -*-
|
||||
from os.path import join
|
||||
from platform import system
|
||||
from PyInstaller.utils.hooks import copy_metadata
|
||||
import sysconfig
|
||||
|
||||
datas = [
|
||||
('src/interface/web', 'src/interface/web'),
|
||||
(f'{sysconfig.get_paths()["purelib"]}/transformers', 'transformers')
|
||||
]
|
||||
datas += copy_metadata('tqdm')
|
||||
datas += copy_metadata('regex')
|
||||
datas += copy_metadata('requests')
|
||||
datas += copy_metadata('packaging')
|
||||
datas += copy_metadata('filelock')
|
||||
datas += copy_metadata('numpy')
|
||||
datas += copy_metadata('tokenizers')
|
||||
|
||||
block_cipher = None
|
||||
|
||||
a = Analysis(
|
||||
['src/main.py'],
|
||||
pathex=[],
|
||||
binaries=[],
|
||||
datas=datas,
|
||||
hiddenimports=['huggingface_hub.repository'],
|
||||
hookspath=[],
|
||||
hooksconfig={},
|
||||
runtime_hooks=[],
|
||||
excludes=[],
|
||||
win_no_prefer_redirects=False,
|
||||
win_private_assemblies=False,
|
||||
cipher=block_cipher,
|
||||
noarchive=False,
|
||||
)
|
||||
|
||||
# Filter out unused and/or duplicate shared libs
|
||||
torch_lib_paths = {
|
||||
join('torch', 'lib', 'libtorch_cuda.so'),
|
||||
join('torch', 'lib', 'libtorch_cpu.so'),
|
||||
}
|
||||
a.datas = [entry for entry in a.datas if not entry[0] in torch_lib_paths]
|
||||
|
||||
os_path_separator = '\\' if system() == 'Windows' else '/'
|
||||
a.datas = [entry for entry in a.datas if not f'torch{os_path_separator}_C.cp' in entry[0]]
|
||||
a.datas = [entry for entry in a.datas if not f'torch{os_path_separator}_dl.cp' in entry[0]]
|
||||
|
||||
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
|
||||
|
||||
if system() != 'Darwin':
|
||||
# Add Splash screen to show on app launch
|
||||
splash = Splash(
|
||||
'src/interface/web/assets/icons/favicon-144x144.png',
|
||||
binaries=a.binaries,
|
||||
datas=a.datas,
|
||||
text_pos=(10, 160),
|
||||
text_size=12,
|
||||
text_color='black',
|
||||
minify_script=True,
|
||||
always_on_top=True
|
||||
)
|
||||
|
||||
exe = EXE(
|
||||
pyz,
|
||||
a.scripts,
|
||||
a.binaries,
|
||||
a.zipfiles,
|
||||
a.datas,
|
||||
splash,
|
||||
splash.binaries,
|
||||
[],
|
||||
name='Khoj',
|
||||
debug=False,
|
||||
bootloader_ignore_signals=False,
|
||||
strip=False,
|
||||
upx=True,
|
||||
upx_exclude=[],
|
||||
runtime_tmpdir=None,
|
||||
console=False,
|
||||
disable_windowed_traceback=False,
|
||||
argv_emulation=False,
|
||||
target_arch='x86_64',
|
||||
codesign_identity=None,
|
||||
entitlements_file=None,
|
||||
icon='src/interface/web/assets/icons/favicon-144x144.ico',
|
||||
)
|
||||
else:
|
||||
exe = EXE(
|
||||
pyz,
|
||||
a.scripts,
|
||||
a.binaries,
|
||||
a.zipfiles,
|
||||
a.datas,
|
||||
[],
|
||||
name='Khoj',
|
||||
debug=False,
|
||||
bootloader_ignore_signals=False,
|
||||
strip=False,
|
||||
upx=True,
|
||||
upx_exclude=[],
|
||||
runtime_tmpdir=None,
|
||||
console=False,
|
||||
disable_windowed_traceback=False,
|
||||
argv_emulation=False,
|
||||
target_arch='x86_64',
|
||||
codesign_identity=None,
|
||||
entitlements_file=None,
|
||||
icon='src/interface/web/assets/icons/favicon.icns',
|
||||
)
|
||||
app = BUNDLE(
|
||||
exe,
|
||||
name='Khoj.app',
|
||||
icon='src/interface/web/assets/icons/favicon.icns',
|
||||
bundle_identifier=None,
|
||||
)
|
145
Readme.md
145
Readme.md
|
@ -2,6 +2,7 @@
|
|||
[![build](https://github.com/debanjum/khoj/actions/workflows/build.yml/badge.svg)](https://github.com/debanjum/khoj/actions/workflows/build.yml)
|
||||
[![test](https://github.com/debanjum/khoj/actions/workflows/test.yml/badge.svg)](https://github.com/debanjum/khoj/actions/workflows/test.yml)
|
||||
[![publish](https://github.com/debanjum/khoj/actions/workflows/publish.yml/badge.svg)](https://github.com/debanjum/khoj/actions/workflows/publish.yml)
|
||||
[![release](https://github.com/debanjum/khoj/actions/workflows/release.yml/badge.svg)](https://github.com/debanjum/khoj/actions/workflows/release.yml)
|
||||
|
||||
*A natural language search engine for your personal notes, transactions and images*
|
||||
|
||||
|
@ -11,6 +12,7 @@
|
|||
- [Demo](#Demo)
|
||||
- [Description](#Description)
|
||||
- [Analysis](#Analysis)
|
||||
- [Interfaces](#Interfaces)
|
||||
- [Architecture](#Architecture)
|
||||
- [Setup](#Setup)
|
||||
- [Install](#1-Install)
|
||||
|
@ -34,7 +36,7 @@
|
|||
|
||||
## Features
|
||||
|
||||
- **Natural**: Advanced Natural language understanding using Transformer based ML Models
|
||||
- **Natural**: Advanced natural language understanding using Transformer based ML Models
|
||||
- **Local**: Your personal data stays local. All search, indexing is done on your machine[\*](https://github.com/debanjum/khoj#miscellaneous)
|
||||
- **Incremental**: Incremental search for a fast, search-as-you-type experience
|
||||
- **Pluggable**: Modular architecture makes it easy to plug in new data sources, frontends and ML models
|
||||
|
@ -43,12 +45,14 @@
|
|||
|
||||
## Demo
|
||||
|
||||
<https://user-images.githubusercontent.com/6413477/181664862-31565b0a-0e64-47e1-a79a-599dfc486c74.mp4>
|
||||
https://user-images.githubusercontent.com/6413477/184735169-92c78bf1-d827-4663-9087-a1ea194b8f4b.mp4
|
||||
|
||||
### Description
|
||||
|
||||
- User searches for \"*Setup editor*\"
|
||||
- The demo looks for the most relevant section in this readme and the [khoj.el readme](https://github.com/debanjum/khoj/tree/master/src/interface/emacs)
|
||||
- Install Khoj via pip
|
||||
- Start Khoj app
|
||||
- Add this readme and [khoj.el readme](https://github.com/debanjum/khoj/tree/master/src/interface/emacs) as org-mode for Khoj to index
|
||||
- Search \"*Setup editor*\" on the Web and Emacs. Re-rank the results for better accuracy
|
||||
- Top result is what we are looking for, the [section to Install Khoj.el on Emacs](https://github.com/debanjum/khoj/tree/master/src/interface/emacs#installation)
|
||||
|
||||
### Analysis
|
||||
|
@ -56,7 +60,11 @@
|
|||
- The results do not have any words used in the query
|
||||
- *Based on the top result it seems the re-ranking model understands that Emacs is an editor?*
|
||||
- The results incrementally update as the query is entered
|
||||
- The results are re-ranked, for better accuracy, once user is idle
|
||||
- The results are re-ranked, for better accuracy, once user hits enter
|
||||
|
||||
### Interfaces
|
||||
|
||||
![](https://github.com/debanjum/khoj/blob/master/docs/interfaces.png)
|
||||
|
||||
## Architecture
|
||||
|
||||
|
@ -64,56 +72,58 @@
|
|||
|
||||
## Setup
|
||||
### 1. Install
|
||||
``` shell
|
||||
pip install khoj-assistant
|
||||
```
|
||||
|
||||
### 2. Configure
|
||||
- Set `input-files` or `input-filter` in each relevant `content-type` section of [khoj_sample.yml](./config/khoj_sample.yml)
|
||||
- Set `input-directories` field in `content-type.image` section
|
||||
- Delete `content-type`, `processor` sub-sections irrelevant for your use-case
|
||||
```shell
|
||||
pip install khoj-assistant
|
||||
```
|
||||
|
||||
### 3. Run
|
||||
``` shell
|
||||
khoj config/khoj_sample.yml -vv
|
||||
```
|
||||
Loads ML model, generates embeddings and exposes API to search notes, images, transactions etc specified in config YAML
|
||||
### 2. Start App
|
||||
|
||||
```shell
|
||||
khoj
|
||||
```
|
||||
|
||||
### 3. Configure
|
||||
|
||||
1. Enable content types and point to files to search in the First Run Screen that pops up on app start
|
||||
2. Click configure and wait. The app will load ML model, generates embeddings and expose the search API
|
||||
|
||||
## Use
|
||||
|
||||
- **Khoj via Web**
|
||||
- Open <http://localhost:8000/>
|
||||
- Open <http://localhost:8000/> via desktop interface or directly
|
||||
- **Khoj via Emacs**
|
||||
- [Install](https://github.com/debanjum/khoj/tree/master/src/interface/emacs#installation) [khoj.el](./src/interface/emacs/khoj.el)
|
||||
- Run `M-x khoj <user-query>`
|
||||
- **Khoj via API**
|
||||
- See [Khoj FastAPI Docs](http://localhost:8000/docs), [Khoj FastAPI ReDocs](http://localhost:8000/redocs)
|
||||
- See the Khoj FastAPI [Swagger Docs](http://localhost:8000/docs), [ReDocs](http://localhost:8000/redocs)
|
||||
|
||||
## Upgrade
|
||||
``` shell
|
||||
|
||||
```shell
|
||||
pip install --upgrade khoj-assistant
|
||||
```
|
||||
|
||||
## Troubleshoot
|
||||
|
||||
- Symptom: Errors out complaining about Tensors mismatch, null etc
|
||||
- Mitigation: Delete `content-type` > `image` section from `khoj_sample.yml`
|
||||
|
||||
- Mitigation: Disable `image` search on the desktop GUI
|
||||
- Symptom: Errors out with \"Killed\" in error message in Docker
|
||||
- Fix: Increase RAM available to Docker Containers in Docker Settings
|
||||
- Refer: [StackOverflow Solution](https://stackoverflow.com/a/50770267), [Configure Resources on Docker for Mac](https://docs.docker.com/desktop/mac/#resources)
|
||||
|
||||
## Miscellaneous
|
||||
|
||||
- The experimental [chat](localhost:8000/chat) API endpoint uses the [OpenAI API](https://openai.com/api/)
|
||||
- It is disabled by default
|
||||
- To use it add your `openai-api-key` to config.yml
|
||||
- The beta [chat](http://localhost:8000/beta/chat) and [search](http://localhost:8000/beta/search) API endpoints use [OpenAI API](https://openai.com/api/)
|
||||
- It is disabled by default
|
||||
- To use it add your `openai-api-key` via the app configure screen
|
||||
- Warning: *If you use the above beta APIs, your query and top result(s) will be sent to OpenAI for processing*
|
||||
|
||||
## Performance
|
||||
|
||||
### Query performance
|
||||
|
||||
- Semantic search using the bi-encoder is fairly fast at \<5 ms
|
||||
- Semantic search using the bi-encoder is fairly fast at \<50 ms
|
||||
- Reranking using the cross-encoder is slower at \<2s on 15 results. Tweak `top_k` to tradeoff speed for accuracy of results
|
||||
- Applying explicit filters is very slow currently at \~6s. This is because the filters are rudimentary. Considerable speed-ups can be achieved using indexes etc
|
||||
|
||||
|
@ -133,39 +143,48 @@ pip install --upgrade khoj-assistant
|
|||
### Setup
|
||||
#### Using Pip
|
||||
##### 1. Install
|
||||
``` shell
|
||||
git clone https://github.com/debanjum/khoj && cd khoj
|
||||
python -m venv .venv && source .venv/bin/activate
|
||||
pip install
|
||||
```
|
||||
|
||||
```shell
|
||||
git clone https://github.com/debanjum/khoj && cd khoj
|
||||
python3 -m venv .venv && source .venv/bin/activate
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
##### 2. Configure
|
||||
- Set `input-files` or `input-filter` in each relevant `content-type` section of `khoj_sample.yml`
|
||||
- Set `input-directories` field in `image` `content-type` section
|
||||
- Delete `content-type`, `processor` sub-sections irrelevant for your use-case
|
||||
|
||||
- Copy the `config/khoj_sample.yml` to `~/.khoj/khoj.yml`
|
||||
- Set `input-files` or `input-filter` in each relevant `content-type` section of `~/.khoj/khoj.yml`
|
||||
- Set `input-directories` field in `image` `content-type` section
|
||||
- Delete `content-type` and `processor` sub-section(s) irrelevant for your use-case
|
||||
|
||||
##### 3. Run
|
||||
``` shell
|
||||
khoj config/khoj_sample.yml -vv
|
||||
```
|
||||
Load ML model, generate embeddings and expose API to query notes, images, transactions etc specified in config YAML
|
||||
|
||||
```shell
|
||||
khoj -vv
|
||||
```
|
||||
Load ML model, generate embeddings and expose API to query notes, images, transactions etc specified in config YAML
|
||||
|
||||
##### 4. Upgrade
|
||||
|
||||
```shell
|
||||
# To Upgrade To Latest Stable Release
|
||||
# Maps to the latest tagged version of khoj on master branch
|
||||
pip install --upgrade khoj-assistant
|
||||
|
||||
# To Upgrade To Latest Pre-Release
|
||||
# Maps to the latest commit on the master branch
|
||||
pip install --upgrade --pre khoj-assistant
|
||||
|
||||
# To Upgrade To Specific Development Release
|
||||
pip install -r testpypi khoj-assistant==0.1.5.dev491659577806
|
||||
# To Upgrade To Specific Development Release.
|
||||
# Useful to test, review a PR.
|
||||
# Note: khoj-assistant is published to test PyPi on creating a PR
|
||||
pip install -i https://test.pypi.org/simple/ khoj-assistant==0.1.5.dev57166025766
|
||||
```
|
||||
|
||||
#### Using Docker
|
||||
##### 1. Clone
|
||||
|
||||
``` shell
|
||||
```shell
|
||||
git clone https://github.com/debanjum/khoj && cd khoj
|
||||
```
|
||||
|
||||
|
@ -176,7 +195,7 @@ git clone https://github.com/debanjum/khoj && cd khoj
|
|||
|
||||
##### 3. Run
|
||||
|
||||
``` shell
|
||||
```shell
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
|
@ -184,38 +203,39 @@ docker-compose up -d
|
|||
|
||||
##### 4. Upgrade
|
||||
|
||||
``` shell
|
||||
```shell
|
||||
docker-compose build --pull
|
||||
```
|
||||
|
||||
#### Using Conda
|
||||
##### 1. Install Dependencies
|
||||
- [Install Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html) \[Required\]
|
||||
- Install Exiftool \[Optional\]
|
||||
``` shell
|
||||
sudo apt -y install libimage-exiftool-perl
|
||||
```
|
||||
- [Install Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html) \[Required\]
|
||||
- Install Exiftool \[Optional\]
|
||||
``` shell
|
||||
sudo apt -y install libimage-exiftool-perl
|
||||
```
|
||||
|
||||
##### 2. Install Khoj
|
||||
``` shell
|
||||
git clone https://github.com/debanjum/khoj && cd khoj
|
||||
conda env create -f config/environment.yml
|
||||
conda activate khoj
|
||||
```
|
||||
```shell
|
||||
git clone https://github.com/debanjum/khoj && cd khoj
|
||||
conda env create -f config/environment.yml
|
||||
conda activate khoj
|
||||
```
|
||||
|
||||
##### 3. Configure
|
||||
- Set `input-files` or `input-filter` in each relevant `content-type` section of `khoj_sample.yml`
|
||||
- Set `input-directories` field in `image` `content-type` section
|
||||
- Delete `content-type`, `processor` sub-sections irrelevant for your use-case
|
||||
- Copy the `config/khoj_sample.yml` to `~/.khoj/khoj.yml`
|
||||
- Set `input-files` or `input-filter` in each relevant `content-type` section of `~/.khoj/khoj.yml`
|
||||
- Set `input-directories` field in `image` `content-type` section
|
||||
- Delete `content-type`, `processor` sub-sections irrelevant for your use-case
|
||||
|
||||
##### 4. Run
|
||||
``` shell
|
||||
python3 -m src.main config/khoj_sample.yml -vv
|
||||
```
|
||||
Load ML model, generate embeddings and expose API to query notes, images, transactions etc specified in config YAML
|
||||
```shell
|
||||
python3 -m src.main -vv
|
||||
```
|
||||
Load ML model, generate embeddings and expose API to query notes, images, transactions etc specified in config YAML
|
||||
|
||||
##### 5. Upgrade
|
||||
``` shell
|
||||
```shell
|
||||
cd khoj
|
||||
git pull origin master
|
||||
conda deactivate khoj
|
||||
|
@ -224,8 +244,7 @@ conda activate khoj
|
|||
```
|
||||
|
||||
### Test
|
||||
|
||||
``` shell
|
||||
```shell
|
||||
pytest
|
||||
```
|
||||
|
||||
|
|
BIN
docs/demo.mp4
BIN
docs/demo.mp4
Binary file not shown.
BIN
docs/interfaces.png
Normal file
BIN
docs/interfaces.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 606 KiB |
12
setup.py
12
setup.py
|
@ -7,7 +7,7 @@ this_directory = Path(__file__).parent
|
|||
|
||||
setup(
|
||||
name='khoj-assistant',
|
||||
version='0.1.5',
|
||||
version='0.1.6',
|
||||
description="A natural language search engine for your personal notes, transactions and images",
|
||||
long_description=(this_directory / "Readme.md").read_text(encoding="utf-8"),
|
||||
long_description_content_type="text/markdown",
|
||||
|
@ -24,8 +24,8 @@ setup(
|
|||
),
|
||||
install_requires=[
|
||||
"numpy == 1.22.4",
|
||||
"torch == 1.11.0",
|
||||
"torchvision == 0.12.0",
|
||||
"torch == 1.12.1",
|
||||
"torchvision == 0.13.1",
|
||||
"transformers == 4.21.0",
|
||||
"sentence-transformers == 2.1.0",
|
||||
"openai == 0.20.0",
|
||||
|
@ -36,9 +36,10 @@ setup(
|
|||
"jinja2 == 3.1.2",
|
||||
"pyyaml == 6.0",
|
||||
"pytest == 7.1.2",
|
||||
"pillow >= 9.0.1",
|
||||
"pillow == 9.2.0",
|
||||
"aiofiles == 0.8.0",
|
||||
"dateparser == 1.1.1",
|
||||
"pyqt6 == 6.3.1",
|
||||
],
|
||||
include_package_data=True,
|
||||
entry_points={"console_scripts": ["khoj = src.main:run"]},
|
||||
|
@ -47,9 +48,6 @@ setup(
|
|||
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.5",
|
||||
"Programming Language :: Python :: 3.6",
|
||||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
|
|
102
src/configure.py
Normal file
102
src/configure.py
Normal file
|
@ -0,0 +1,102 @@
|
|||
# System Packages
|
||||
import sys
|
||||
|
||||
# External Packages
|
||||
import torch
|
||||
import json
|
||||
|
||||
# Internal Packages
|
||||
from src.processor.ledger.beancount_to_jsonl import beancount_to_jsonl
|
||||
from src.processor.markdown.markdown_to_jsonl import markdown_to_jsonl
|
||||
from src.processor.panchayat.panchayat_to_jsonl import panchayat_to_jsonl
|
||||
from src.processor.org_mode.org_to_jsonl import org_to_jsonl
|
||||
from src.search_type import image_search, text_search
|
||||
from src.utils.config import SearchType, SearchModels, ProcessorConfigModel, ConversationProcessorConfigModel
|
||||
from src.utils import state
|
||||
from src.utils.helpers import get_absolute_path
|
||||
from src.utils.rawconfig import FullConfig, ProcessorConfig
|
||||
|
||||
|
||||
def configure_server(args, required=False):
|
||||
if args.config is None:
|
||||
if required:
|
||||
print('Exiting as Khoj is not configured. Configure the application to use it.')
|
||||
sys.exit(1)
|
||||
else:
|
||||
return
|
||||
else:
|
||||
state.config = args.config
|
||||
|
||||
# Initialize the search model from Config
|
||||
state.model = configure_search(state.model, state.config, args.regenerate, verbose=state.verbose)
|
||||
|
||||
# Initialize Processor from Config
|
||||
state.processor_config = configure_processor(args.config.processor, verbose=state.verbose)
|
||||
|
||||
|
||||
def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, t: SearchType = None, verbose: int = 0):
|
||||
# Initialize Org Notes Search
|
||||
if (t == SearchType.Org or t == None) and config.content_type.org:
|
||||
# Extract Entries, Generate Notes Embeddings
|
||||
model.orgmode_search = text_search.setup(org_to_jsonl, config.content_type.org, search_config=config.search_type.asymmetric, regenerate=regenerate, verbose=verbose)
|
||||
|
||||
# Initialize Org Music Search
|
||||
if (t == SearchType.Music or t == None) and config.content_type.music:
|
||||
# Extract Entries, Generate Music Embeddings
|
||||
model.music_search = text_search.setup(org_to_jsonl, config.content_type.music, search_config=config.search_type.asymmetric, regenerate=regenerate, verbose=verbose)
|
||||
|
||||
# Initialize Markdown Search
|
||||
if (t == SearchType.Markdown or t == None) and config.content_type.markdown:
|
||||
# Extract Entries, Generate Markdown Embeddings
|
||||
model.markdown_search = text_search.setup(markdown_to_jsonl, config.content_type.markdown, search_config=config.search_type.asymmetric, regenerate=regenerate, verbose=verbose)
|
||||
|
||||
# Initialize Panchayat Search
|
||||
if (t == SearchType.Panchayat or t == None) and config.content_type.panchayat:
|
||||
# Extract Entries, Generate Yaml Embeddings
|
||||
model.panchayat_search = text_search.setup(panchayat_to_jsonl, config.content_type.panchayat, search_config=config.search_type.asymmetric, regenerate=regenerate, verbose=verbose)
|
||||
|
||||
# Initialize Ledger Search
|
||||
if (t == SearchType.Ledger or t == None) and config.content_type.ledger:
|
||||
# Extract Entries, Generate Ledger Embeddings
|
||||
model.ledger_search = text_search.setup(beancount_to_jsonl, config.content_type.ledger, search_config=config.search_type.symmetric, regenerate=regenerate, verbose=verbose)
|
||||
|
||||
# Initialize Image Search
|
||||
if (t == SearchType.Image or t == None) and config.content_type.image:
|
||||
# Extract Entries, Generate Image Embeddings
|
||||
model.image_search = image_search.setup(config.content_type.image, search_config=config.search_type.image, regenerate=regenerate, verbose=verbose)
|
||||
|
||||
return model
|
||||
|
||||
|
||||
def configure_processor(processor_config: ProcessorConfig, verbose: int):
|
||||
if not processor_config:
|
||||
return
|
||||
|
||||
processor = ProcessorConfigModel()
|
||||
|
||||
# Initialize Conversation Processor
|
||||
if processor_config.conversation:
|
||||
processor.conversation = configure_conversation_processor(processor_config.conversation, verbose)
|
||||
|
||||
return processor
|
||||
|
||||
|
||||
def configure_conversation_processor(conversation_processor_config, verbose: int):
|
||||
conversation_processor = ConversationProcessorConfigModel(conversation_processor_config, verbose)
|
||||
|
||||
conversation_logfile = conversation_processor.conversation_logfile
|
||||
if conversation_processor.verbose:
|
||||
print('INFO:\tLoading conversation logs from disk...')
|
||||
|
||||
if conversation_logfile.expanduser().absolute().is_file():
|
||||
# Load Metadata Logs from Conversation Logfile
|
||||
with open(get_absolute_path(conversation_logfile), 'r') as f:
|
||||
conversation_processor.meta_log = json.load(f)
|
||||
|
||||
print('INFO:\tConversation logs loaded from disk.')
|
||||
else:
|
||||
# Initialize Conversation Logs
|
||||
conversation_processor.meta_log = {}
|
||||
conversation_processor.chat_session = ""
|
||||
|
||||
return conversation_processor
|
0
src/interface/desktop/__init__.py
Normal file
0
src/interface/desktop/__init__.py
Normal file
72
src/interface/desktop/file_browser.py
Normal file
72
src/interface/desktop/file_browser.py
Normal file
|
@ -0,0 +1,72 @@
|
|||
# External Packages
|
||||
from PyQt6 import QtWidgets
|
||||
from PyQt6.QtCore import QDir
|
||||
|
||||
# Internal Packages
|
||||
from src.utils.config import SearchType
|
||||
from src.utils.helpers import is_none_or_empty
|
||||
|
||||
|
||||
class FileBrowser(QtWidgets.QWidget):
|
||||
def __init__(self, title, search_type: SearchType=None, default_files:list=[]):
|
||||
QtWidgets.QWidget.__init__(self)
|
||||
layout = QtWidgets.QHBoxLayout()
|
||||
self.setLayout(layout)
|
||||
self.search_type = search_type
|
||||
|
||||
self.filter_name = self.getFileFilter(search_type)
|
||||
self.dirpath = QDir.homePath()
|
||||
|
||||
self.label = QtWidgets.QLabel()
|
||||
self.label.setText(title)
|
||||
self.label.setFixedWidth(95)
|
||||
self.label.setWordWrap(True)
|
||||
layout.addWidget(self.label)
|
||||
|
||||
self.lineEdit = QtWidgets.QPlainTextEdit(self)
|
||||
self.lineEdit.setFixedWidth(330)
|
||||
self.setFiles(default_files)
|
||||
self.lineEdit.setFixedHeight(min(7+20*len(self.lineEdit.toPlainText().split('\n')),90))
|
||||
self.lineEdit.textChanged.connect(self.updateFieldHeight)
|
||||
layout.addWidget(self.lineEdit)
|
||||
|
||||
self.button = QtWidgets.QPushButton('Add')
|
||||
self.button.clicked.connect(self.storeFilesSelectedInFileDialog)
|
||||
layout.addWidget(self.button)
|
||||
layout.addStretch()
|
||||
|
||||
def getFileFilter(self, search_type):
|
||||
if search_type == SearchType.Org:
|
||||
return 'Org-Mode Files (*.org)'
|
||||
elif search_type == SearchType.Ledger:
|
||||
return 'Beancount Files (*.bean *.beancount)'
|
||||
elif search_type == SearchType.Markdown:
|
||||
return 'Markdown Files (*.md *.markdown)'
|
||||
elif search_type == SearchType.Music:
|
||||
return 'Org-Music Files (*.org)'
|
||||
elif search_type == SearchType.Image:
|
||||
return 'Images (*.jp[e]g)'
|
||||
|
||||
def storeFilesSelectedInFileDialog(self):
|
||||
filepaths = self.getPaths()
|
||||
if self.search_type == SearchType.Image:
|
||||
filepaths.append(QtWidgets.QFileDialog.getExistingDirectory(self, caption='Choose Folder',
|
||||
directory=self.dirpath))
|
||||
else:
|
||||
filepaths.extend(QtWidgets.QFileDialog.getOpenFileNames(self, caption='Choose Files',
|
||||
directory=self.dirpath,
|
||||
filter=self.filter_name)[0])
|
||||
self.setFiles(filepaths)
|
||||
|
||||
def setFiles(self, paths:list):
|
||||
self.filepaths = [path for path in paths if not is_none_or_empty(path)]
|
||||
self.lineEdit.setPlainText("\n".join(self.filepaths))
|
||||
|
||||
def getPaths(self) -> list:
|
||||
if self.lineEdit.toPlainText() == '':
|
||||
return []
|
||||
else:
|
||||
return self.lineEdit.toPlainText().split('\n')
|
||||
|
||||
def updateFieldHeight(self):
|
||||
self.lineEdit.setFixedHeight(min(7+20*len(self.lineEdit.toPlainText().split('\n')),90))
|
27
src/interface/desktop/labelled_text_field.py
Normal file
27
src/interface/desktop/labelled_text_field.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
# External Packages
|
||||
from PyQt6 import QtWidgets
|
||||
|
||||
# Internal Packages
|
||||
from src.utils.config import ProcessorType
|
||||
|
||||
|
||||
class LabelledTextField(QtWidgets.QWidget):
|
||||
def __init__(self, title, processor_type: ProcessorType=None, default_value: str=None):
|
||||
QtWidgets.QWidget.__init__(self)
|
||||
layout = QtWidgets.QHBoxLayout()
|
||||
self.setLayout(layout)
|
||||
self.processor_type = processor_type
|
||||
|
||||
self.label = QtWidgets.QLabel()
|
||||
self.label.setText(title)
|
||||
self.label.setFixedWidth(95)
|
||||
self.label.setWordWrap(True)
|
||||
layout.addWidget(self.label)
|
||||
|
||||
self.input_field = QtWidgets.QTextEdit(self)
|
||||
self.input_field.setFixedWidth(410)
|
||||
self.input_field.setFixedHeight(27)
|
||||
self.input_field.setText(default_value)
|
||||
|
||||
layout.addWidget(self.input_field)
|
||||
layout.addStretch()
|
318
src/interface/desktop/main_window.py
Normal file
318
src/interface/desktop/main_window.py
Normal file
|
@ -0,0 +1,318 @@
|
|||
# Standard Packages
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from copy import deepcopy
|
||||
import webbrowser
|
||||
|
||||
# External Packages
|
||||
from PyQt6 import QtGui, QtWidgets
|
||||
from PyQt6.QtCore import Qt, QThread, QObject, pyqtSignal
|
||||
|
||||
# Internal Packages
|
||||
from src.configure import configure_server
|
||||
from src.interface.desktop.file_browser import FileBrowser
|
||||
from src.interface.desktop.labelled_text_field import LabelledTextField
|
||||
from src.utils import constants, state, yaml as yaml_utils
|
||||
from src.utils.cli import cli
|
||||
from src.utils.config import SearchType, ProcessorType
|
||||
from src.utils.helpers import merge_dicts, resolve_absolute_path
|
||||
|
||||
|
||||
class MainWindow(QtWidgets.QMainWindow):
|
||||
"""Create Window to Configure Khoj
|
||||
Allow user to
|
||||
1. Configure content types to search
|
||||
2. Configure conversation processor
|
||||
3. Save the configuration to khoj.yml
|
||||
"""
|
||||
|
||||
def __init__(self, config_file: Path):
|
||||
super(MainWindow, self).__init__()
|
||||
self.config_file = config_file
|
||||
# Set regenerate flag to regenerate embeddings everytime user clicks configure
|
||||
if state.cli_args:
|
||||
state.cli_args += ['--regenerate']
|
||||
else:
|
||||
state.cli_args = ['--regenerate']
|
||||
|
||||
# Load config from existing config, if exists, else load from default config
|
||||
if resolve_absolute_path(self.config_file).exists():
|
||||
self.first_run = False
|
||||
self.current_config = yaml_utils.load_config_from_file(self.config_file)
|
||||
else:
|
||||
self.first_run = True
|
||||
self.current_config = deepcopy(constants.default_config)
|
||||
self.new_config = self.current_config
|
||||
|
||||
# Initialize Configure Window
|
||||
self.setWindowTitle("Khoj")
|
||||
self.setFixedWidth(600)
|
||||
|
||||
# Set Window Icon
|
||||
icon_path = constants.web_directory / 'assets/icons/favicon-144x144.png'
|
||||
self.setWindowIcon(QtGui.QIcon(f'{icon_path.absolute()}'))
|
||||
|
||||
# Initialize Configure Window Layout
|
||||
self.layout = QtWidgets.QVBoxLayout()
|
||||
|
||||
# Add Settings Panels for each Search Type to Configure Window Layout
|
||||
self.search_settings_panels = []
|
||||
for search_type in SearchType:
|
||||
current_content_config = self.current_config['content-type'].get(search_type, {})
|
||||
self.search_settings_panels += [self.add_settings_panel(current_content_config, search_type)]
|
||||
|
||||
# Add Conversation Processor Panel to Configure Screen
|
||||
self.processor_settings_panels = []
|
||||
conversation_type = ProcessorType.Conversation
|
||||
current_conversation_config = self.current_config['processor'].get(conversation_type, {})
|
||||
self.processor_settings_panels += [self.add_processor_panel(current_conversation_config, conversation_type)]
|
||||
|
||||
# Add Action Buttons Panel
|
||||
self.add_action_panel()
|
||||
|
||||
# Set the central widget of the Window. Widget will expand
|
||||
# to take up all the space in the window by default.
|
||||
self.config_window = QtWidgets.QWidget()
|
||||
self.config_window.setLayout(self.layout)
|
||||
self.setCentralWidget(self.config_window)
|
||||
self.position_window()
|
||||
|
||||
def add_settings_panel(self, current_content_config: dict, search_type: SearchType):
|
||||
"Add Settings Panel for specified Search Type. Toggle Editable Search Types"
|
||||
# Get current files from config for given search type
|
||||
if search_type == SearchType.Image:
|
||||
current_content_files = current_content_config.get('input-directories', [])
|
||||
file_input_text = f'{search_type.name} Folders'
|
||||
else:
|
||||
current_content_files = current_content_config.get('input-files', [])
|
||||
file_input_text = f'{search_type.name} Files'
|
||||
|
||||
# Create widgets to display settings for given search type
|
||||
search_type_settings = QtWidgets.QWidget()
|
||||
search_type_layout = QtWidgets.QVBoxLayout(search_type_settings)
|
||||
enable_search_type = SearchCheckBox(f"Search {search_type.name}", search_type)
|
||||
# Add file browser to set input files for given search type
|
||||
input_files = FileBrowser(file_input_text, search_type, current_content_files)
|
||||
|
||||
# Set enabled/disabled based on checkbox state
|
||||
enable_search_type.setChecked(current_content_files is not None and len(current_content_files) > 0)
|
||||
input_files.setEnabled(enable_search_type.isChecked())
|
||||
enable_search_type.stateChanged.connect(lambda _: input_files.setEnabled(enable_search_type.isChecked()))
|
||||
|
||||
# Add setting widgets for given search type to panel
|
||||
search_type_layout.addWidget(enable_search_type)
|
||||
search_type_layout.addWidget(input_files)
|
||||
self.layout.addWidget(search_type_settings)
|
||||
|
||||
return search_type_settings
|
||||
|
||||
def add_processor_panel(self, current_conversation_config: dict, processor_type: ProcessorType):
|
||||
"Add Conversation Processor Panel"
|
||||
# Get current settings from config for given processor type
|
||||
current_openai_api_key = current_conversation_config.get('openai-api-key', None)
|
||||
|
||||
# Create widgets to display settings for given processor type
|
||||
processor_type_settings = QtWidgets.QWidget()
|
||||
processor_type_layout = QtWidgets.QVBoxLayout(processor_type_settings)
|
||||
enable_conversation = ProcessorCheckBox(f"Conversation", processor_type)
|
||||
# Add file browser to set input files for given processor type
|
||||
input_field = LabelledTextField("OpenAI API Key", processor_type, current_openai_api_key)
|
||||
|
||||
# Set enabled/disabled based on checkbox state
|
||||
enable_conversation.setChecked(current_openai_api_key is not None)
|
||||
input_field.setEnabled(enable_conversation.isChecked())
|
||||
enable_conversation.stateChanged.connect(lambda _: input_field.setEnabled(enable_conversation.isChecked()))
|
||||
|
||||
# Add setting widgets for given processor type to panel
|
||||
processor_type_layout.addWidget(enable_conversation)
|
||||
processor_type_layout.addWidget(input_field)
|
||||
self.layout.addWidget(processor_type_settings)
|
||||
|
||||
return processor_type_settings
|
||||
|
||||
def add_action_panel(self):
|
||||
"Add Action Panel"
|
||||
# Button to Save Settings
|
||||
action_bar = QtWidgets.QWidget()
|
||||
action_bar_layout = QtWidgets.QHBoxLayout(action_bar)
|
||||
|
||||
self.configure_button = QtWidgets.QPushButton("Configure", clicked=self.configure_app)
|
||||
self.search_button = QtWidgets.QPushButton("Search", clicked=lambda: webbrowser.open(f'http://{state.host}:{state.port}/'))
|
||||
self.search_button.setEnabled(not self.first_run)
|
||||
|
||||
action_bar_layout.addWidget(self.configure_button)
|
||||
action_bar_layout.addWidget(self.search_button)
|
||||
self.layout.addWidget(action_bar)
|
||||
|
||||
def get_default_config(self, search_type:SearchType=None, processor_type:ProcessorType=None):
|
||||
"Get default config"
|
||||
config = constants.default_config
|
||||
if search_type:
|
||||
return config['content-type'][search_type]
|
||||
elif processor_type:
|
||||
return config['processor'][processor_type]
|
||||
else:
|
||||
return config
|
||||
|
||||
def add_error_message(self, message: str):
|
||||
"Add Error Message to Configure Screen"
|
||||
# Remove any existing error messages
|
||||
for message_prefix in ErrorType:
|
||||
for i in reversed(range(self.layout.count())):
|
||||
current_widget = self.layout.itemAt(i).widget()
|
||||
if isinstance(current_widget, QtWidgets.QLabel) and current_widget.text().startswith(message_prefix.value):
|
||||
self.layout.removeWidget(current_widget)
|
||||
current_widget.deleteLater()
|
||||
|
||||
# Add new error message
|
||||
if message:
|
||||
error_message = QtWidgets.QLabel()
|
||||
error_message.setWordWrap(True)
|
||||
error_message.setText(message)
|
||||
error_message.setStyleSheet("color: red")
|
||||
self.layout.addWidget(error_message)
|
||||
|
||||
def update_search_settings(self):
|
||||
"Update config with search settings from UI"
|
||||
for settings_panel in self.search_settings_panels:
|
||||
for child in settings_panel.children():
|
||||
if not isinstance(child, (SearchCheckBox, FileBrowser)):
|
||||
continue
|
||||
if isinstance(child, SearchCheckBox):
|
||||
# Search Type Disabled
|
||||
if not child.isChecked() and child.search_type in self.new_config['content-type']:
|
||||
del self.new_config['content-type'][child.search_type]
|
||||
# Search Type (re)-Enabled
|
||||
if child.isChecked():
|
||||
current_search_config = self.current_config['content-type'].get(child.search_type, {})
|
||||
default_search_config = self.get_default_config(search_type = child.search_type)
|
||||
self.new_config['content-type'][child.search_type.value] = merge_dicts(current_search_config, default_search_config)
|
||||
elif isinstance(child, FileBrowser) and child.search_type in self.new_config['content-type']:
|
||||
if child.search_type.value == SearchType.Image:
|
||||
self.new_config['content-type'][child.search_type.value]['input-directories'] = child.getPaths() if child.getPaths() != [] else None
|
||||
else:
|
||||
self.new_config['content-type'][child.search_type.value]['input-files'] = child.getPaths() if child.getPaths() != [] else None
|
||||
|
||||
def update_processor_settings(self):
|
||||
"Update config with conversation settings from UI"
|
||||
for settings_panel in self.processor_settings_panels:
|
||||
for child in settings_panel.children():
|
||||
if not isinstance(child, (ProcessorCheckBox, LabelledTextField)):
|
||||
continue
|
||||
if isinstance(child, ProcessorCheckBox):
|
||||
# Processor Type Disabled
|
||||
if not child.isChecked() and child.processor_type in self.new_config['processor']:
|
||||
del self.new_config['processor'][child.processor_type]
|
||||
# Processor Type (re)-Enabled
|
||||
if child.isChecked():
|
||||
current_processor_config = self.current_config['processor'].get(child.processor_type, {})
|
||||
default_processor_config = self.get_default_config(processor_type = child.processor_type)
|
||||
self.new_config['processor'][child.processor_type.value] = merge_dicts(current_processor_config, default_processor_config)
|
||||
elif isinstance(child, LabelledTextField) and child.processor_type in self.new_config['processor']:
|
||||
if child.processor_type == ProcessorType.Conversation:
|
||||
self.new_config['processor'][child.processor_type.value]['openai-api-key'] = child.input_field.toPlainText() if child.input_field.toPlainText() != '' else None
|
||||
|
||||
def save_settings_to_file(self) -> bool:
|
||||
"Save validated settings to file"
|
||||
# Validate config before writing to file
|
||||
try:
|
||||
yaml_utils.parse_config_from_string(self.new_config)
|
||||
except Exception as e:
|
||||
print(f"Error validating config: {e}")
|
||||
self.add_error_message(f"{ErrorType.ConfigValidationError.value}: {e}")
|
||||
return False
|
||||
|
||||
# Save the config to app config file
|
||||
self.add_error_message(None)
|
||||
yaml_utils.save_config_to_file(self.new_config, self.config_file)
|
||||
return True
|
||||
|
||||
def load_updated_settings(self):
|
||||
"Hot swap to use the updated config from config file"
|
||||
# Load parsed, validated config from app config file
|
||||
args = cli(state.cli_args)
|
||||
self.current_config = self.new_config
|
||||
|
||||
# Configure server with loaded config
|
||||
configure_server(args, required=True)
|
||||
|
||||
def configure_app(self):
|
||||
"Save the new settings to khoj.yml. Reload app with updated settings"
|
||||
self.update_search_settings()
|
||||
self.update_processor_settings()
|
||||
if self.save_settings_to_file():
|
||||
# Setup thread to load updated settings in background
|
||||
self.thread = QThread()
|
||||
self.settings_loader = SettingsLoader(self.load_updated_settings)
|
||||
self.settings_loader.moveToThread(self.thread)
|
||||
|
||||
# Connect slots and signals for thread
|
||||
self.thread.started.connect(self.settings_loader.run)
|
||||
self.settings_loader.finished.connect(self.thread.quit)
|
||||
self.settings_loader.finished.connect(self.settings_loader.deleteLater)
|
||||
self.settings_loader.error.connect(self.add_error_message)
|
||||
self.thread.finished.connect(self.thread.deleteLater)
|
||||
|
||||
# Start thread
|
||||
self.thread.start()
|
||||
|
||||
# Disable Save Button
|
||||
self.search_button.setEnabled(False)
|
||||
self.configure_button.setEnabled(False)
|
||||
self.configure_button.setText("Configuring...")
|
||||
|
||||
# Reset UI
|
||||
self.thread.finished.connect(lambda: self.configure_button.setText("Configure"))
|
||||
self.thread.finished.connect(lambda: self.configure_button.setEnabled(True))
|
||||
self.thread.finished.connect(lambda: self.search_button.setEnabled(True))
|
||||
|
||||
def position_window(self):
|
||||
"Position the window at center of X axis and near top on Y axis"
|
||||
window_rectangle = self.geometry()
|
||||
screen_center = self.screen().availableGeometry().center()
|
||||
window_rectangle.moveCenter(screen_center)
|
||||
self.move(window_rectangle.topLeft().x(), 25)
|
||||
|
||||
def show_on_top(self):
|
||||
"Bring Window on Top"
|
||||
self.show()
|
||||
self.setWindowState(Qt.WindowState.WindowActive)
|
||||
self.activateWindow() # For Bringing to Top on Windows
|
||||
self.raise_() # For Bringing to Top from Minimized State on OSX
|
||||
|
||||
|
||||
class SettingsLoader(QObject):
|
||||
"Load Settings Thread"
|
||||
finished = pyqtSignal()
|
||||
error = pyqtSignal(str)
|
||||
|
||||
def __init__(self, load_settings_func):
|
||||
super(SettingsLoader, self).__init__()
|
||||
self.load_settings_func = load_settings_func
|
||||
|
||||
def run(self):
|
||||
"Load Settings"
|
||||
try:
|
||||
self.load_settings_func()
|
||||
except FileNotFoundError as e:
|
||||
self.error.emit(f"{ErrorType.ConfigLoadingError.value}: {e}")
|
||||
else:
|
||||
self.error.emit(None)
|
||||
self.finished.emit()
|
||||
|
||||
|
||||
class SearchCheckBox(QtWidgets.QCheckBox):
|
||||
def __init__(self, text, search_type: SearchType, parent=None):
|
||||
self.search_type = search_type
|
||||
super(SearchCheckBox, self).__init__(text, parent=parent)
|
||||
|
||||
|
||||
class ProcessorCheckBox(QtWidgets.QCheckBox):
|
||||
def __init__(self, text, processor_type: ProcessorType, parent=None):
|
||||
self.processor_type = processor_type
|
||||
super(ProcessorCheckBox, self).__init__(text, parent=parent)
|
||||
|
||||
class ErrorType(Enum):
|
||||
"Error Types"
|
||||
ConfigLoadingError = "Config Loading Error"
|
||||
ConfigValidationError = "Config Validation Error"
|
41
src/interface/desktop/system_tray.py
Normal file
41
src/interface/desktop/system_tray.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
# Standard Packages
|
||||
import webbrowser
|
||||
|
||||
# External Packages
|
||||
from PyQt6 import QtGui, QtWidgets
|
||||
|
||||
# Internal Packages
|
||||
from src.utils import constants, state
|
||||
|
||||
|
||||
def create_system_tray(gui: QtWidgets.QApplication, main_window: QtWidgets.QMainWindow):
|
||||
"""Create System Tray with Menu. Menu contain options to
|
||||
1. Open Search Page on the Web Interface
|
||||
2. Open App Configuration Screen
|
||||
3. Quit Application
|
||||
"""
|
||||
|
||||
# Create the system tray with icon
|
||||
icon_path = constants.web_directory / 'assets/icons/favicon-144x144.png'
|
||||
icon = QtGui.QIcon(f'{icon_path.absolute()}')
|
||||
tray = QtWidgets.QSystemTrayIcon(icon)
|
||||
tray.setVisible(True)
|
||||
|
||||
# Create the menu and menu actions
|
||||
menu = QtWidgets.QMenu()
|
||||
menu_actions = [
|
||||
('Search', lambda: webbrowser.open(f'http://{state.host}:{state.port}/')),
|
||||
('Configure', main_window.show_on_top),
|
||||
('Quit', gui.quit),
|
||||
]
|
||||
|
||||
# Add the menu actions to the menu
|
||||
for action_text, action_function in menu_actions:
|
||||
menu_action = QtGui.QAction(action_text, menu)
|
||||
menu_action.triggered.connect(action_function)
|
||||
menu.addAction(menu_action)
|
||||
|
||||
# Add the menu to the system tray
|
||||
tray.setContextMenu(menu)
|
||||
|
||||
return tray
|
|
@ -1,10 +1,12 @@
|
|||
;;; khoj.el --- Natural, Incremental Search via Emacs
|
||||
;;; khoj.el --- Natural, Incremental Search for your Second Brain -*- lexical-binding: t -*-
|
||||
|
||||
;; Copyright (C) 2021-2022 Debanjum Singh Solanky
|
||||
|
||||
;; Author: Debanjum Singh Solanky <debanjum@gmail.com>
|
||||
;; Version: 2.0
|
||||
;; Keywords: search, org-mode, outlines, markdown, image
|
||||
;; Description: Natural, Incremental Search for your Second Brain
|
||||
;; Keywords: search, org-mode, outlines, markdown, beancount, ledger, image
|
||||
;; Version: 0.1.6
|
||||
;; Package-Requires: ((emacs "27.1"))
|
||||
;; URL: http://github.com/debanjum/khoj/interface/emacs
|
||||
|
||||
;; This file is NOT part of GNU Emacs.
|
||||
|
@ -27,9 +29,20 @@
|
|||
;;; Commentary:
|
||||
|
||||
;; This package provides a natural, incremental search interface to your
|
||||
;; org-mode notes, markdown files, beancount transactions and images.
|
||||
;; It is a wrapper that interfaces with transformer based ML models.
|
||||
;; The models search capabilities are exposed via the Khoj HTTP API.
|
||||
;; `org-mode' notes, `markdown' files, `beancount' transactions and images.
|
||||
;; It is a wrapper that interfaces with the Khoj server.
|
||||
;; The server exposes an API for advanced search using transformer ML models.
|
||||
;; The Khoj server needs to be running to use this package.
|
||||
;; See the repository docs for detailed setup of the Khoj server.
|
||||
;;
|
||||
;; Quickstart
|
||||
;; -------------
|
||||
;; 1. Install Khoj Server
|
||||
;; pip install khoj-assistant
|
||||
;; 2. Start, Configure Khoj Server
|
||||
;; khoj
|
||||
;; 3. Install khoj.el
|
||||
;; (use-package khoj :bind ("C-c s" . 'khoj))
|
||||
|
||||
;;; Code:
|
||||
|
||||
|
@ -51,11 +64,6 @@
|
|||
:group 'khoj
|
||||
:type 'integer)
|
||||
|
||||
(defcustom khoj-rerank-after-idle-time 2.0
|
||||
"Idle time (in seconds) to trigger cross-encoder to rerank incremental search results."
|
||||
:group 'khoj
|
||||
:type 'float)
|
||||
|
||||
(defcustom khoj-results-count 5
|
||||
"Number of results to get from Khoj API for each query."
|
||||
:group 'khoj
|
||||
|
@ -69,9 +77,6 @@
|
|||
(const "ledger")
|
||||
(const "music")))
|
||||
|
||||
(defvar khoj--rerank-timer nil
|
||||
"Idle timer to make cross-encoder re-rank incremental search results if user idle.")
|
||||
|
||||
(defvar khoj--minibuffer-window nil
|
||||
"Minibuffer window being used by user to enter query.")
|
||||
|
||||
|
@ -85,6 +90,7 @@
|
|||
"The type of content to perform search on.")
|
||||
|
||||
(defun khoj--keybindings-info-message ()
|
||||
"Show available khoj keybindings in-context, when user invokes Khoj."
|
||||
(let ((enabled-content-types (khoj--get-enabled-content-types)))
|
||||
(concat
|
||||
"
|
||||
|
@ -101,15 +107,18 @@
|
|||
(when (member 'music enabled-content-types)
|
||||
"C-x M | music\n"))))
|
||||
|
||||
(defun khoj--search-markdown () (interactive) (setq khoj--search-type "markdown"))
|
||||
(defun khoj--search-org () (interactive) (setq khoj--search-type "org"))
|
||||
(defun khoj--search-ledger () (interactive) (setq khoj--search-type "ledger"))
|
||||
(defun khoj--search-images () (interactive) (setq khoj--search-type "image"))
|
||||
(defun khoj--search-music () (interactive) (setq khoj--search-type "music"))
|
||||
(defvar khoj--rerank nil "Track when re-rank of results triggered")
|
||||
(defun khoj--search-markdown () "Set search-type to 'markdown'." (interactive) (setq khoj--search-type "markdown"))
|
||||
(defun khoj--search-org () "Set search-type to 'org-mode'." (interactive) (setq khoj--search-type "org"))
|
||||
(defun khoj--search-ledger () "Set search-type to 'ledger'." (interactive) (setq khoj--search-type "ledger"))
|
||||
(defun khoj--search-images () "Set search-type to image." (interactive) (setq khoj--search-type "image"))
|
||||
(defun khoj--search-music () "Set search-type to music." (interactive) (setq khoj--search-type "music"))
|
||||
(defun khoj--improve-rank () "Use cross-encoder to rerank search results." (interactive) (khoj--incremental-search t))
|
||||
(defun khoj--make-search-keymap (&optional existing-keymap)
|
||||
"Setup keymap to configure Khoj search"
|
||||
"Setup keymap to configure Khoj search. Build of EXISTING-KEYMAP when passed."
|
||||
(let ((enabled-content-types (khoj--get-enabled-content-types))
|
||||
(kmap (or existing-keymap (make-sparse-keymap))))
|
||||
(define-key kmap (kbd "C-c RET") #'khoj--improve-rank)
|
||||
(when (member 'markdown enabled-content-types)
|
||||
(define-key kmap (kbd "C-x m") #'khoj--search-markdown))
|
||||
(when (member 'org enabled-content-types)
|
||||
|
@ -121,6 +130,8 @@
|
|||
(when (member 'music enabled-content-types)
|
||||
(define-key kmap (kbd "C-x M") #'khoj--search-music))
|
||||
kmap))
|
||||
|
||||
(defvar khoj--keymap nil "Track Khoj keymap in this variable.")
|
||||
(defun khoj--display-keybinding-info ()
|
||||
"Display information on keybindings to customize khoj search.
|
||||
Use `which-key` if available, else display simple message in echo area"
|
||||
|
@ -132,7 +143,7 @@ Use `which-key` if available, else display simple message in echo area"
|
|||
(message "%s" (khoj--keybindings-info-message))))
|
||||
|
||||
(defun khoj--extract-entries-as-markdown (json-response query)
|
||||
"Convert json response from API to markdown entries"
|
||||
"Convert JSON-RESPONSE, QUERY from API to markdown entries."
|
||||
;; remove leading (, ) or SPC from extracted entries string
|
||||
(replace-regexp-in-string
|
||||
"^[\(\) ]" ""
|
||||
|
@ -147,12 +158,12 @@ Use `which-key` if available, else display simple message in echo area"
|
|||
json-response))))
|
||||
|
||||
(defun khoj--extract-entries-as-org (json-response query)
|
||||
"Convert json response from API to org-mode entries"
|
||||
"Convert JSON-RESPONSE, QUERY from API to 'org-mode' entries."
|
||||
;; remove leading (, ) or SPC from extracted entries string
|
||||
(replace-regexp-in-string
|
||||
"^[\(\) ]" ""
|
||||
;; extract entries from response as single string and convert to entries
|
||||
(format "#+STARTUP: showall hidestars inlineimages\n* %s\n%s"
|
||||
(format "* %s\n%s\n#+STARTUP: showall hidestars inlineimages"
|
||||
query
|
||||
(mapcar
|
||||
(lambda (args)
|
||||
|
@ -162,7 +173,7 @@ Use `which-key` if available, else display simple message in echo area"
|
|||
json-response))))
|
||||
|
||||
(defun khoj--extract-entries-as-images (json-response query)
|
||||
"Convert json response from API to html with images"
|
||||
"Convert JSON-RESPONSE, QUERY from API to html with images."
|
||||
;; remove leading (, ) or SPC from extracted entries string
|
||||
(replace-regexp-in-string
|
||||
"[\(\) ]$" ""
|
||||
|
@ -188,7 +199,7 @@ Use `which-key` if available, else display simple message in echo area"
|
|||
json-response)))))
|
||||
|
||||
(defun khoj--extract-entries-as-ledger (json-response query)
|
||||
"Convert json response from API to ledger entries"
|
||||
"Convert JSON-RESPONSE, QUERY from API to ledger entries."
|
||||
;; remove leading (, ) or SPC from extracted entries string
|
||||
(replace-regexp-in-string
|
||||
"[\(\) ]$" ""
|
||||
|
@ -203,6 +214,7 @@ Use `which-key` if available, else display simple message in echo area"
|
|||
json-response)))))
|
||||
|
||||
(defun khoj--buffer-name-to-search-type (buffer-name)
|
||||
"Infer search type based on BUFFER-NAME."
|
||||
(let ((enabled-content-types (khoj--get-enabled-content-types))
|
||||
(file-extension (file-name-extension buffer-name)))
|
||||
(cond
|
||||
|
@ -213,7 +225,7 @@ Use `which-key` if available, else display simple message in echo area"
|
|||
(t khoj-default-search-type))))
|
||||
|
||||
(defun khoj--get-enabled-content-types ()
|
||||
"Get content types enabled for search from API"
|
||||
"Get content types enabled for search from API."
|
||||
(let ((config-url (format "%s/config/data" khoj-server-url)))
|
||||
(with-temp-buffer
|
||||
(erase-buffer)
|
||||
|
@ -228,11 +240,14 @@ Use `which-key` if available, else display simple message in echo area"
|
|||
content-type))))))
|
||||
|
||||
(defun khoj--construct-api-query (query search-type &optional rerank)
|
||||
"Construct API Query from QUERY, SEARCH-TYPE and (optional) RERANK params."
|
||||
(let ((rerank (or rerank "false"))
|
||||
(encoded-query (url-hexify-string query)))
|
||||
(format "%s/search?q=%s&t=%s&r=%s&n=%s" khoj-server-url encoded-query search-type rerank khoj-results-count)))
|
||||
|
||||
(defun khoj--query-api-and-render-results (query search-type query-url buffer-name)
|
||||
"Query Khoj API using QUERY, SEARCH-TYPE, QUERY-URL.
|
||||
Render results in BUFFER-NAME."
|
||||
;; get json response from api
|
||||
(with-current-buffer buffer-name
|
||||
(let ((inhibit-read-only t))
|
||||
|
@ -260,8 +275,8 @@ Use `which-key` if available, else display simple message in echo area"
|
|||
(read-only-mode t)))
|
||||
|
||||
|
||||
;; Incremental Search on Khoj
|
||||
(defun khoj--incremental-search (&optional rerank)
|
||||
"Perform Incremental Search on Khoj. Allow optional RERANK of results."
|
||||
(let* ((rerank-str (cond (rerank "true") (t "false")))
|
||||
(khoj-buffer-name (get-buffer-create khoj--buffer-name))
|
||||
(query (minibuffer-contents-no-properties))
|
||||
|
@ -271,18 +286,27 @@ Use `which-key` if available, else display simple message in echo area"
|
|||
;; 1. user hasn't started typing query
|
||||
;; 2. during recursive edits
|
||||
;; 3. with contents of other buffers user may jump to
|
||||
(when (and (not (equal query "")) (active-minibuffer-window) (equal (current-buffer) khoj--minibuffer-window))
|
||||
;; 4. search not triggered right after rerank
|
||||
;; ignore to not overwrite reranked results before the user even sees them
|
||||
(if khoj--rerank
|
||||
(setq khoj--rerank nil)
|
||||
(when
|
||||
(and
|
||||
(not (equal query ""))
|
||||
(active-minibuffer-window)
|
||||
(equal (current-buffer) khoj--minibuffer-window))
|
||||
(progn
|
||||
(when rerank
|
||||
(setq khoj--rerank t)
|
||||
(message "Khoj: Rerank Results"))
|
||||
(khoj--query-api-and-render-results
|
||||
query
|
||||
khoj--search-type
|
||||
query-url
|
||||
khoj-buffer-name)))))
|
||||
khoj-buffer-name))))))
|
||||
|
||||
(defun delete-open-network-connections-to-khoj ()
|
||||
"Delete all network connections to khoj server"
|
||||
(defun khoj--delete-open-network-connections-to-server ()
|
||||
"Delete all network connections to khoj server."
|
||||
(dolist (proc (process-list))
|
||||
(let ((proc-buf (buffer-name (process-buffer proc)))
|
||||
(khoj-network-proc-buf (string-join (split-string khoj-server-url "://") " ")))
|
||||
|
@ -290,33 +314,24 @@ Use `which-key` if available, else display simple message in echo area"
|
|||
(delete-process proc)))))
|
||||
|
||||
(defun khoj--teardown-incremental-search ()
|
||||
"Teardown hooks used for incremental search."
|
||||
(message "Khoj: Teardown Incremental Search")
|
||||
;; remove advice to rerank results on normal exit from minibuffer
|
||||
(advice-remove 'exit-minibuffer #'khoj--minibuffer-exit-advice)
|
||||
;; unset khoj minibuffer window
|
||||
(setq khoj--minibuffer-window nil)
|
||||
;; cancel rerank timer
|
||||
(when (timerp khoj--rerank-timer)
|
||||
(cancel-timer khoj--rerank-timer))
|
||||
;; delete open connections to khoj
|
||||
(delete-open-network-connections-to-khoj)
|
||||
;; delete open connections to khoj server
|
||||
(khoj--delete-open-network-connections-to-server)
|
||||
;; remove hooks for khoj incremental query and self
|
||||
(remove-hook 'post-command-hook #'khoj--incremental-search)
|
||||
(remove-hook 'minibuffer-exit-hook #'khoj--teardown-incremental-search))
|
||||
|
||||
(defun khoj--minibuffer-exit-advice (&rest _args)
|
||||
(khoj--incremental-search t))
|
||||
|
||||
|
||||
;;;###autoload
|
||||
(defun khoj ()
|
||||
"Natural, Incremental Search for your personal notes, transactions and music using Khoj"
|
||||
"Natural, Incremental Search for your personal notes, transactions and music."
|
||||
(interactive)
|
||||
(let* ((khoj-buffer-name (get-buffer-create khoj--buffer-name)))
|
||||
;; set khoj search type to last used or based on current buffer
|
||||
(setq khoj--search-type (or khoj--search-type (khoj--buffer-name-to-search-type (buffer-name))))
|
||||
;; setup rerank to improve results once user idle for KHOJ-RERANK-AFTER-IDLE-TIME seconds
|
||||
(setq khoj--rerank-timer (run-with-idle-timer khoj-rerank-after-idle-time t 'khoj--incremental-search t))
|
||||
;; switch to khoj results buffer
|
||||
(switch-to-buffer khoj-buffer-name)
|
||||
;; open and setup minibuffer for incremental search
|
||||
|
@ -329,15 +344,13 @@ Use `which-key` if available, else display simple message in echo area"
|
|||
;; set current (mini-)buffer entered as khoj minibuffer
|
||||
;; used to query khoj API only when user in khoj minibuffer
|
||||
(setq khoj--minibuffer-window (current-buffer))
|
||||
;; rerank results on normal exit from minibuffer
|
||||
(advice-add 'exit-minibuffer :before #'khoj--minibuffer-exit-advice)
|
||||
(add-hook 'post-command-hook #'khoj--incremental-search) ; do khoj incremental search after every user action
|
||||
(add-hook 'minibuffer-exit-hook #'khoj--teardown-incremental-search)) ; teardown khoj incremental search on minibuffer exit
|
||||
(read-string khoj--query-prompt))))
|
||||
|
||||
;;;###autoload
|
||||
(defun khoj-simple (query)
|
||||
"Natural Search for QUERY in your personal notes, transactions, music and images using Khoj"
|
||||
"Natural Search for QUERY on your personal notes, transactions, music and images."
|
||||
(interactive "s🦅Khoj: ")
|
||||
(let* ((rerank "true")
|
||||
(default-type (khoj--buffer-name-to-search-type (buffer-name)))
|
||||
|
|
BIN
src/interface/web/assets/icons/favicon-128x128.png
Normal file
BIN
src/interface/web/assets/icons/favicon-128x128.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 26 KiB |
BIN
src/interface/web/assets/icons/favicon-144x144.ico
Normal file
BIN
src/interface/web/assets/icons/favicon-144x144.ico
Normal file
Binary file not shown.
After Width: | Height: | Size: 159 KiB |
BIN
src/interface/web/assets/icons/favicon.icns
Normal file
BIN
src/interface/web/assets/icons/favicon.icns
Normal file
Binary file not shown.
|
@ -8,8 +8,8 @@
|
|||
<link rel="icon" type="image/png" sizes="144x144" href="/static/assets/icons/favicon-144x144.png">
|
||||
<link rel="manifest" href="/static/khoj.webmanifest">
|
||||
</head>
|
||||
<script type="text/javascript" src="static/assets/org.js"></script>
|
||||
<script type="text/javascript" src="static/assets/markdown-it.js"></script>
|
||||
<script type="text/javascript" src="static/assets/org.min.js"></script>
|
||||
<script type="text/javascript" src="static/assets/markdown-it.min.js"></script>
|
||||
|
||||
<script>
|
||||
function render_image(item) {
|
||||
|
@ -38,6 +38,12 @@
|
|||
}).join("\n"));
|
||||
}
|
||||
|
||||
function render_ledger(query, data) {
|
||||
return `<div id="results-ledger">` + data.map(function (item) {
|
||||
return `<p>${item.entry}</p>`
|
||||
}).join("\n") + `</div>`;
|
||||
}
|
||||
|
||||
function render_json(data, query, type) {
|
||||
if (type === "markdown") {
|
||||
return render_markdown(query, data);
|
||||
|
@ -47,6 +53,8 @@
|
|||
return render_org(query, data, "music-");
|
||||
} else if (type === "image") {
|
||||
return data.map(render_image).join('');
|
||||
} else if (type === "ledger") {
|
||||
return render_ledger(query, data);
|
||||
} else {
|
||||
return `<pre id="json">${JSON.stringify(data, null, 2)}</pre>`;
|
||||
}
|
||||
|
@ -223,7 +231,11 @@
|
|||
#json {
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
#results-markdown {
|
||||
#results-ledger {
|
||||
white-space: pre-line;
|
||||
text-align: left;
|
||||
}
|
||||
#results-markdown {
|
||||
text-align: left;
|
||||
}
|
||||
#results-music,
|
||||
|
|
432
src/main.py
432
src/main.py
|
@ -1,344 +1,124 @@
|
|||
# Standard Packages
|
||||
import sys, json, yaml
|
||||
import time
|
||||
from typing import Optional
|
||||
from pathlib import Path
|
||||
from functools import lru_cache
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
from platform import system
|
||||
|
||||
# External Packages
|
||||
import uvicorn
|
||||
import torch
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import HTMLResponse, FileResponse
|
||||
from fastapi import FastAPI
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.templating import Jinja2Templates
|
||||
from PyQt6 import QtWidgets
|
||||
from PyQt6.QtCore import QThread, QTimer
|
||||
|
||||
# Internal Packages
|
||||
from src.search_type import image_search, text_search
|
||||
from src.processor.org_mode.org_to_jsonl import org_to_jsonl
|
||||
from src.processor.ledger.beancount_to_jsonl import beancount_to_jsonl
|
||||
from src.processor.markdown.markdown_to_jsonl import markdown_to_jsonl
|
||||
from src.processor.panchayat.panchayat_to_jsonl import panchayat_to_jsonl
|
||||
from src.utils.helpers import get_absolute_path, get_from_dict
|
||||
from src.configure import configure_server
|
||||
from src.router import router
|
||||
from src.utils import constants, state
|
||||
from src.utils.cli import cli
|
||||
from src.utils.config import SearchType, SearchModels, ProcessorConfigModel, ConversationProcessorConfigModel
|
||||
from src.utils.rawconfig import FullConfig
|
||||
from src.processor.conversation.gpt import converse, extract_search_type, message_to_log, message_to_prompt, understand, summarize
|
||||
from src.search_filter.explicit_filter import ExplicitFilter
|
||||
from src.search_filter.date_filter import DateFilter
|
||||
from src.interface.desktop.main_window import MainWindow
|
||||
from src.interface.desktop.system_tray import create_system_tray
|
||||
|
||||
# Application Global State
|
||||
config = FullConfig()
|
||||
model = SearchModels()
|
||||
processor_config = ProcessorConfigModel()
|
||||
config_file = ""
|
||||
verbose = 0
|
||||
|
||||
# Initialize the Application Server
|
||||
app = FastAPI()
|
||||
this_directory = Path(__file__).parent
|
||||
web_directory = this_directory / 'interface/web/'
|
||||
|
||||
app.mount("/static", StaticFiles(directory=web_directory), name="static")
|
||||
templates = Jinja2Templates(directory=web_directory)
|
||||
|
||||
|
||||
# Controllers
|
||||
@app.get("/", response_class=FileResponse)
|
||||
def index():
|
||||
return FileResponse(web_directory / "index.html")
|
||||
|
||||
@app.get('/config', response_class=HTMLResponse)
|
||||
def config(request: Request):
|
||||
return templates.TemplateResponse("config.html", context={'request': request})
|
||||
|
||||
@app.get('/config/data', response_model=FullConfig)
|
||||
def config_data():
|
||||
return config
|
||||
|
||||
@app.post('/config/data')
|
||||
async def config_data(updated_config: FullConfig):
|
||||
global config
|
||||
config = updated_config
|
||||
with open(config_file, 'w') as outfile:
|
||||
yaml.dump(yaml.safe_load(config.json(by_alias=True)), outfile)
|
||||
outfile.close()
|
||||
return config
|
||||
|
||||
@app.get('/search')
|
||||
@lru_cache(maxsize=100)
|
||||
def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Optional[bool] = False):
|
||||
if q is None or q == '':
|
||||
print(f'No query param (q) passed in API call to initiate search')
|
||||
return {}
|
||||
|
||||
# initialize variables
|
||||
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
|
||||
user_query = q
|
||||
results_count = n
|
||||
results = {}
|
||||
query_start, query_end, collate_start, collate_end = None, None, None, None
|
||||
|
||||
if (t == SearchType.Org or t == None) and model.orgmode_search:
|
||||
# query org-mode notes
|
||||
query_start = time.time()
|
||||
hits, entries = text_search.query(user_query, model.orgmode_search, rank_results=r, device=device, filters=[DateFilter(), ExplicitFilter()], verbose=verbose)
|
||||
query_end = time.time()
|
||||
|
||||
# collate and return results
|
||||
collate_start = time.time()
|
||||
results = text_search.collate_results(hits, entries, results_count)
|
||||
collate_end = time.time()
|
||||
|
||||
if (t == SearchType.Music or t == None) and model.music_search:
|
||||
# query music library
|
||||
query_start = time.time()
|
||||
hits, entries = text_search.query(user_query, model.music_search, rank_results=r, device=device, filters=[DateFilter(), ExplicitFilter()], verbose=verbose)
|
||||
query_end = time.time()
|
||||
|
||||
# collate and return results
|
||||
collate_start = time.time()
|
||||
results = text_search.collate_results(hits, entries, results_count)
|
||||
collate_end = time.time()
|
||||
|
||||
if (t == SearchType.Markdown or t == None) and model.orgmode_search:
|
||||
# query markdown files
|
||||
query_start = time.time()
|
||||
hits, entries = text_search.query(user_query, model.markdown_search, rank_results=r, device=device, filters=[ExplicitFilter(), DateFilter()], verbose=verbose)
|
||||
query_end = time.time()
|
||||
|
||||
# collate and return results
|
||||
collate_start = time.time()
|
||||
results = text_search.collate_results(hits, entries, results_count)
|
||||
collate_end = time.time()
|
||||
|
||||
if (t == SearchType.Panchayat or t == None) and model.panchayat_search:
|
||||
# query Panchayat yaml files
|
||||
query_start = time.time()
|
||||
hits, entries = text_search.query(user_query, model.panchayat_search, rank_results=r, device=device, filters=[ExplicitFilter(), DateFilter()], verbose=verbose)
|
||||
query_end = time.time()
|
||||
|
||||
# collate and return results
|
||||
collate_start = time.time()
|
||||
results = text_search.collate_results(hits, entries, results_count)
|
||||
collate_end = time.time()
|
||||
|
||||
if (t == SearchType.Ledger or t == None) and model.ledger_search:
|
||||
# query transactions
|
||||
query_start = time.time()
|
||||
hits, entries = text_search.query(user_query, model.ledger_search, rank_results=r, device=device, filters=[ExplicitFilter(), DateFilter()], verbose=verbose)
|
||||
query_end = time.time()
|
||||
|
||||
# collate and return results
|
||||
collate_start = time.time()
|
||||
results = text_search.collate_results(hits, entries, results_count)
|
||||
collate_end = time.time()
|
||||
|
||||
if (t == SearchType.Image or t == None) and model.image_search:
|
||||
# query images
|
||||
query_start = time.time()
|
||||
hits = image_search.query(user_query, results_count, model.image_search)
|
||||
output_directory = web_directory / 'images'
|
||||
query_end = time.time()
|
||||
|
||||
# collate and return results
|
||||
collate_start = time.time()
|
||||
results = image_search.collate_results(
|
||||
hits,
|
||||
image_names=model.image_search.image_names,
|
||||
output_directory=output_directory,
|
||||
image_files_url='/static/images',
|
||||
count=results_count)
|
||||
collate_end = time.time()
|
||||
|
||||
if verbose > 1:
|
||||
if query_start and query_end:
|
||||
print(f"Query took {query_end - query_start:.3f} seconds")
|
||||
if collate_start and collate_end:
|
||||
print(f"Collating results took {collate_end - collate_start:.3f} seconds")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
@app.get('/reload')
|
||||
def reload(t: Optional[SearchType] = None):
|
||||
global model
|
||||
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
|
||||
model = initialize_search(config, regenerate=False, t=t, device=device)
|
||||
return {'status': 'ok', 'message': 'reload completed'}
|
||||
|
||||
|
||||
@app.get('/regenerate')
|
||||
def regenerate(t: Optional[SearchType] = None):
|
||||
global model
|
||||
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
|
||||
model = initialize_search(config, regenerate=True, t=t, device=device)
|
||||
return {'status': 'ok', 'message': 'regeneration completed'}
|
||||
|
||||
|
||||
@app.get('/beta/search')
|
||||
def search_beta(q: str, n: Optional[int] = 1):
|
||||
# Extract Search Type using GPT
|
||||
metadata = extract_search_type(q, api_key=processor_config.conversation.openai_api_key, verbose=verbose)
|
||||
search_type = get_from_dict(metadata, "search-type")
|
||||
|
||||
# Search
|
||||
search_results = search(q, n=n, t=SearchType(search_type))
|
||||
|
||||
# Return response
|
||||
return {'status': 'ok', 'result': search_results, 'type': search_type}
|
||||
|
||||
|
||||
@app.get('/chat')
|
||||
def chat(q: str):
|
||||
# Load Conversation History
|
||||
chat_session = processor_config.conversation.chat_session
|
||||
meta_log = processor_config.conversation.meta_log
|
||||
|
||||
# Converse with OpenAI GPT
|
||||
metadata = understand(q, api_key=processor_config.conversation.openai_api_key, verbose=verbose)
|
||||
if verbose > 1:
|
||||
print(f'Understood: {get_from_dict(metadata, "intent")}')
|
||||
|
||||
if get_from_dict(metadata, "intent", "memory-type") == "notes":
|
||||
query = get_from_dict(metadata, "intent", "query")
|
||||
result_list = search(query, n=1, t=SearchType.Org)
|
||||
collated_result = "\n".join([item["entry"] for item in result_list])
|
||||
if verbose > 1:
|
||||
print(f'Semantically Similar Notes:\n{collated_result}')
|
||||
gpt_response = summarize(collated_result, summary_type="notes", user_query=q, api_key=processor_config.conversation.openai_api_key)
|
||||
else:
|
||||
gpt_response = converse(q, chat_session, api_key=processor_config.conversation.openai_api_key)
|
||||
|
||||
# Update Conversation History
|
||||
processor_config.conversation.chat_session = message_to_prompt(q, chat_session, gpt_message=gpt_response)
|
||||
processor_config.conversation.meta_log['chat'] = message_to_log(q, metadata, gpt_response, meta_log.get('chat', []))
|
||||
|
||||
return {'status': 'ok', 'response': gpt_response}
|
||||
|
||||
|
||||
def initialize_search(config: FullConfig, regenerate: bool, t: SearchType = None, device=torch.device("cpu")):
|
||||
# Initialize Org Notes Search
|
||||
if (t == SearchType.Org or t == None) and config.content_type.org:
|
||||
# Extract Entries, Generate Notes Embeddings
|
||||
model.orgmode_search = text_search.setup(org_to_jsonl, config.content_type.org, search_config=config.search_type.asymmetric, regenerate=regenerate, device=device, verbose=verbose)
|
||||
|
||||
# Initialize Org Music Search
|
||||
if (t == SearchType.Music or t == None) and config.content_type.music:
|
||||
# Extract Entries, Generate Music Embeddings
|
||||
model.music_search = text_search.setup(org_to_jsonl, config.content_type.music, search_config=config.search_type.asymmetric, regenerate=regenerate, device=device, verbose=verbose)
|
||||
|
||||
# Initialize Markdown Search
|
||||
if (t == SearchType.Markdown or t == None) and config.content_type.markdown:
|
||||
# Extract Entries, Generate Markdown Embeddings
|
||||
model.markdown_search = text_search.setup(markdown_to_jsonl, config.content_type.markdown, search_config=config.search_type.asymmetric, regenerate=regenerate, device=device, verbose=verbose)
|
||||
|
||||
# Initialize Panchayat Search
|
||||
if (t == SearchType.Panchayat or t == None) and config.content_type.panchayat:
|
||||
# Extract Entries, Generate Yaml Embeddings
|
||||
model.panchayat_search = text_search.setup(panchayat_to_jsonl, config.content_type.panchayat, search_config=config.search_type.asymmetric, regenerate=regenerate, device=device, verbose=verbose)
|
||||
|
||||
# Initialize Ledger Search
|
||||
if (t == SearchType.Ledger or t == None) and config.content_type.ledger:
|
||||
# Extract Entries, Generate Ledger Embeddings
|
||||
model.ledger_search = text_search.setup(beancount_to_jsonl, config.content_type.ledger, search_config=config.search_type.symmetric, regenerate=regenerate, verbose=verbose)
|
||||
|
||||
# Initialize Image Search
|
||||
if (t == SearchType.Image or t == None) and config.content_type.image:
|
||||
# Extract Entries, Generate Image Embeddings
|
||||
model.image_search = image_search.setup(config.content_type.image, search_config=config.search_type.image, regenerate=regenerate, verbose=verbose)
|
||||
|
||||
return model
|
||||
|
||||
|
||||
def initialize_processor(config: FullConfig):
|
||||
if not config.processor:
|
||||
return
|
||||
|
||||
processor_config = ProcessorConfigModel()
|
||||
|
||||
# Initialize Conversation Processor
|
||||
processor_config.conversation = ConversationProcessorConfigModel(config.processor.conversation, verbose)
|
||||
|
||||
conversation_logfile = processor_config.conversation.conversation_logfile
|
||||
if processor_config.conversation.verbose:
|
||||
print('INFO:\tLoading conversation logs from disk...')
|
||||
|
||||
if conversation_logfile.expanduser().absolute().is_file():
|
||||
# Load Metadata Logs from Conversation Logfile
|
||||
with open(get_absolute_path(conversation_logfile), 'r') as f:
|
||||
processor_config.conversation.meta_log = json.load(f)
|
||||
|
||||
print('INFO:\tConversation logs loaded from disk.')
|
||||
else:
|
||||
# Initialize Conversation Logs
|
||||
processor_config.conversation.meta_log = {}
|
||||
processor_config.conversation.chat_session = ""
|
||||
|
||||
return processor_config
|
||||
|
||||
|
||||
@app.on_event('shutdown')
|
||||
def shutdown_event():
|
||||
# No need to create empty log file
|
||||
if not (processor_config and processor_config.conversation and processor_config.conversation.meta_log):
|
||||
return
|
||||
elif processor_config.conversation.verbose:
|
||||
print('INFO:\tSaving conversation logs to disk...')
|
||||
|
||||
# Summarize Conversation Logs for this Session
|
||||
chat_session = processor_config.conversation.chat_session
|
||||
openai_api_key = processor_config.conversation.openai_api_key
|
||||
conversation_log = processor_config.conversation.meta_log
|
||||
session = {
|
||||
"summary": summarize(chat_session, summary_type="chat", api_key=openai_api_key),
|
||||
"session-start": conversation_log.get("session", [{"session-end": 0}])[-1]["session-end"],
|
||||
"session-end": len(conversation_log["chat"])
|
||||
}
|
||||
if 'session' in conversation_log:
|
||||
conversation_log['session'].append(session)
|
||||
else:
|
||||
conversation_log['session'] = [session]
|
||||
|
||||
# Save Conversation Metadata Logs to Disk
|
||||
conversation_logfile = get_absolute_path(processor_config.conversation.conversation_logfile)
|
||||
with open(conversation_logfile, "w+", encoding='utf-8') as logfile:
|
||||
json.dump(conversation_log, logfile)
|
||||
|
||||
print('INFO:\tConversation logs saved to disk.')
|
||||
|
||||
app.mount("/static", StaticFiles(directory=constants.web_directory), name="static")
|
||||
app.include_router(router)
|
||||
|
||||
def run():
|
||||
# Turn Tokenizers Parallelism Off. App does not support it.
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = 'false'
|
||||
|
||||
# Load config from CLI
|
||||
args = cli(sys.argv[1:])
|
||||
state.cli_args = sys.argv[1:]
|
||||
args = cli(state.cli_args)
|
||||
set_state(args)
|
||||
|
||||
# Stores the file path to the config file.
|
||||
global config_file
|
||||
config_file = args.config_file
|
||||
|
||||
# Store the raw config data.
|
||||
global config
|
||||
config = args.config
|
||||
|
||||
# Store the verbose flag
|
||||
global verbose
|
||||
verbose = args.verbose
|
||||
|
||||
# Set device to GPU if available
|
||||
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
|
||||
|
||||
# Initialize the search model from Config
|
||||
global model
|
||||
model = initialize_search(args.config, args.regenerate, device=device)
|
||||
|
||||
# Initialize Processor from Config
|
||||
global processor_config
|
||||
processor_config = initialize_processor(args.config)
|
||||
|
||||
# Start Application Server
|
||||
if args.socket:
|
||||
uvicorn.run(app, proxy_headers=True, uds=args.socket)
|
||||
if args.no_gui:
|
||||
# Start Server
|
||||
configure_server(args, required=True)
|
||||
start_server(app, host=args.host, port=args.port, socket=args.socket)
|
||||
else:
|
||||
uvicorn.run(app, host=args.host, port=args.port)
|
||||
# Setup GUI
|
||||
gui = QtWidgets.QApplication([])
|
||||
main_window = MainWindow(args.config_file)
|
||||
|
||||
# System tray is only available on Windows, MacOS.
|
||||
# On Linux (Gnome) the System tray is not supported.
|
||||
# Since only the Main Window is available
|
||||
# Quitting it should quit the application
|
||||
if system() in ['Windows', 'Darwin']:
|
||||
gui.setQuitOnLastWindowClosed(False)
|
||||
tray = create_system_tray(gui, main_window)
|
||||
tray.show()
|
||||
|
||||
# Setup Server
|
||||
configure_server(args, required=False)
|
||||
server = ServerThread(app, args.host, args.port, args.socket)
|
||||
|
||||
# Show Main Window on First Run Experience or if on Linux
|
||||
if args.config is None or system() not in ['Windows', 'Darwin']:
|
||||
main_window.show()
|
||||
|
||||
# Setup Signal Handlers
|
||||
signal.signal(signal.SIGINT, sigint_handler)
|
||||
# Invoke python Interpreter every 500ms to handle signals
|
||||
timer = QTimer()
|
||||
timer.start(500)
|
||||
timer.timeout.connect(lambda: None)
|
||||
|
||||
# Start Application
|
||||
server.start()
|
||||
gui.aboutToQuit.connect(server.terminate)
|
||||
|
||||
# Close Splash Screen if still open
|
||||
if system() != 'Darwin':
|
||||
try:
|
||||
import pyi_splash
|
||||
# Update the text on the splash screen
|
||||
pyi_splash.update_text("Khoj setup complete")
|
||||
# Close Splash Screen
|
||||
pyi_splash.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
gui.exec()
|
||||
|
||||
|
||||
def sigint_handler(*args):
|
||||
print("\nShutting down Khoj...")
|
||||
QtWidgets.QApplication.quit()
|
||||
|
||||
|
||||
def set_state(args):
|
||||
state.config_file = args.config_file
|
||||
state.config = args.config
|
||||
state.verbose = args.verbose
|
||||
state.host = args.host
|
||||
state.port = args.port
|
||||
|
||||
|
||||
def start_server(app, host=None, port=None, socket=None):
|
||||
if socket:
|
||||
uvicorn.run(app, proxy_headers=True, uds=socket)
|
||||
else:
|
||||
uvicorn.run(app, host=host, port=port)
|
||||
|
||||
|
||||
class ServerThread(QThread):
|
||||
def __init__(self, app, host=None, port=None, socket=None):
|
||||
super(ServerThread, self).__init__()
|
||||
self.app = app
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.socket = socket
|
||||
|
||||
def __del__(self):
|
||||
self.wait()
|
||||
|
||||
def run(self):
|
||||
start_server(self.app, self.host, self.port, self.socket)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
run()
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Standard Packages
|
||||
import re
|
||||
import json
|
||||
import argparse
|
||||
import pathlib
|
||||
|
@ -71,14 +72,14 @@ def extract_org_entries(org_files):
|
|||
return entries
|
||||
|
||||
|
||||
def convert_org_entries_to_jsonl(entries, verbose=0):
|
||||
def convert_org_entries_to_jsonl(entries, verbose=0) -> str:
|
||||
"Convert each Org-Mode entries to JSON and collate as JSONL"
|
||||
jsonl = ''
|
||||
for entry in entries:
|
||||
entry_dict = dict()
|
||||
|
||||
# Ignore title notes i.e notes with just headings and empty body
|
||||
if not entry.Body() or entry.Body().strip(empty_escape_sequences) == "":
|
||||
if not entry.Body() or re.sub(r'\n|\t|\r| ', '', entry.Body()) == "":
|
||||
continue
|
||||
|
||||
entry_dict["compiled"] = f'{entry.Heading()}.'
|
||||
|
|
|
@ -38,6 +38,8 @@ import datetime
|
|||
from pathlib import Path
|
||||
from os.path import relpath
|
||||
|
||||
indent_regex = re.compile(r'^\s*')
|
||||
|
||||
def normalize_filename(filename):
|
||||
file_relative_to_home = f'~/{relpath(filename, start=Path.home())}'
|
||||
escaped_filename = f'{file_relative_to_home}'.replace("[","\[").replace("]","\]")
|
||||
|
@ -370,7 +372,9 @@ class Orgnode(object):
|
|||
n = ''
|
||||
for _ in range(0, self.level):
|
||||
n = n + '*'
|
||||
n = n + ' ' + self.todo + ' '
|
||||
n = n + ' '
|
||||
if self.todo:
|
||||
n = n + self.todo + ' '
|
||||
if self.prty:
|
||||
n = n + '[#' + self.prty + '] '
|
||||
n = n + self.headline
|
||||
|
@ -382,7 +386,12 @@ class Orgnode(object):
|
|||
n = n + closecolon
|
||||
n = n + "\n"
|
||||
|
||||
# Get body indentation from first line of body
|
||||
indent = indent_regex.match(self.body).group()
|
||||
|
||||
# Output Closed Date, Scheduled Date, Deadline Date
|
||||
if self.closed or self.scheduled or self.deadline:
|
||||
n = n + indent
|
||||
if self.closed:
|
||||
n = n + f'CLOSED: [{self.closed.strftime("%Y-%m-%d %a")}] '
|
||||
if self.scheduled:
|
||||
|
@ -393,10 +402,10 @@ class Orgnode(object):
|
|||
n = n + '\n'
|
||||
|
||||
# Ouput Property Drawer
|
||||
n = n + ":PROPERTIES:\n"
|
||||
n = n + indent + ":PROPERTIES:\n"
|
||||
for key, value in self.properties.items():
|
||||
n = n + f":{key}: {value}\n"
|
||||
n = n + ":END:\n"
|
||||
n = n + indent + f":{key}: {value}\n"
|
||||
n = n + indent + ":END:\n"
|
||||
|
||||
n = n + self.body
|
||||
|
||||
|
|
223
src/router.py
Normal file
223
src/router.py
Normal file
|
@ -0,0 +1,223 @@
|
|||
# Standard Packages
|
||||
import yaml
|
||||
import json
|
||||
import time
|
||||
from typing import Optional
|
||||
from functools import lru_cache
|
||||
|
||||
# External Packages
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Request
|
||||
from fastapi.responses import HTMLResponse, FileResponse
|
||||
from fastapi.templating import Jinja2Templates
|
||||
|
||||
# Internal Packages
|
||||
from src.configure import configure_search
|
||||
from src.search_type import image_search, text_search
|
||||
from src.processor.conversation.gpt import converse, extract_search_type, message_to_log, message_to_prompt, understand, summarize
|
||||
from src.search_filter.explicit_filter import ExplicitFilter
|
||||
from src.search_filter.date_filter import DateFilter
|
||||
from src.utils.rawconfig import FullConfig
|
||||
from src.utils.config import SearchType
|
||||
from src.utils.helpers import get_absolute_path, get_from_dict
|
||||
from src.utils import state, constants
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
templates = Jinja2Templates(directory=constants.web_directory)
|
||||
|
||||
@router.get("/", response_class=FileResponse)
|
||||
def index():
|
||||
return FileResponse(constants.web_directory / "index.html")
|
||||
|
||||
@router.get('/config', response_class=HTMLResponse)
|
||||
def config_page(request: Request):
|
||||
return templates.TemplateResponse("config.html", context={'request': request})
|
||||
|
||||
@router.get('/config/data', response_model=FullConfig)
|
||||
def config_data():
|
||||
return state.config
|
||||
|
||||
@router.post('/config/data')
|
||||
async def config_data(updated_config: FullConfig):
|
||||
state.config = updated_config
|
||||
with open(state.config_file, 'w') as outfile:
|
||||
yaml.dump(yaml.safe_load(state.config.json(by_alias=True)), outfile)
|
||||
outfile.close()
|
||||
return state.config
|
||||
|
||||
@router.get('/search')
|
||||
@lru_cache(maxsize=100)
|
||||
def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Optional[bool] = False):
|
||||
if q is None or q == '':
|
||||
print(f'No query param (q) passed in API call to initiate search')
|
||||
return {}
|
||||
|
||||
# initialize variables
|
||||
user_query = q
|
||||
results_count = n
|
||||
results = {}
|
||||
query_start, query_end, collate_start, collate_end = None, None, None, None
|
||||
|
||||
if (t == SearchType.Org or t == None) and state.model.orgmode_search:
|
||||
# query org-mode notes
|
||||
query_start = time.time()
|
||||
hits, entries = text_search.query(user_query, state.model.orgmode_search, rank_results=r, filters=[DateFilter(), ExplicitFilter()], verbose=state.verbose)
|
||||
query_end = time.time()
|
||||
|
||||
# collate and return results
|
||||
collate_start = time.time()
|
||||
results = text_search.collate_results(hits, entries, results_count)
|
||||
collate_end = time.time()
|
||||
|
||||
if (t == SearchType.Music or t == None) and state.model.music_search:
|
||||
# query music library
|
||||
query_start = time.time()
|
||||
hits, entries = text_search.query(user_query, state.model.music_search, rank_results=r, filters=[DateFilter(), ExplicitFilter()], verbose=state.verbose)
|
||||
query_end = time.time()
|
||||
|
||||
# collate and return results
|
||||
collate_start = time.time()
|
||||
results = text_search.collate_results(hits, entries, results_count)
|
||||
collate_end = time.time()
|
||||
|
||||
if (t == SearchType.Markdown or t == None) and state.model.markdown_search:
|
||||
# query markdown files
|
||||
query_start = time.time()
|
||||
hits, entries = text_search.query(user_query, state.model.markdown_search, rank_results=r, filters=[ExplicitFilter(), DateFilter()], verbose=state.verbose)
|
||||
query_end = time.time()
|
||||
|
||||
# collate and return results
|
||||
collate_start = time.time()
|
||||
results = text_search.collate_results(hits, entries, results_count)
|
||||
collate_end = time.time()
|
||||
|
||||
if (t == SearchType.Ledger or t == None) and state.model.ledger_search:
|
||||
# query transactions
|
||||
query_start = time.time()
|
||||
hits, entries = text_search.query(user_query, state.model.ledger_search, rank_results=r, filters=[ExplicitFilter(), DateFilter()], verbose=state.verbose)
|
||||
query_end = time.time()
|
||||
|
||||
# collate and return results
|
||||
collate_start = time.time()
|
||||
results = text_search.collate_results(hits, entries, results_count)
|
||||
collate_end = time.time()
|
||||
|
||||
if (t == SearchType.Panchayat or t == None) and state.model.panchayat_search:
|
||||
# query Panchayat yaml files
|
||||
query_start = time.time()
|
||||
hits, entries = text_search.query(user_query, state.model.panchayat_search, rank_results=r, filters=[ExplicitFilter(), DateFilter()], verbose=state.verbose)
|
||||
query_end = time.time()
|
||||
|
||||
# collate and return results
|
||||
collate_start = time.time()
|
||||
results = text_search.collate_results(hits, entries, results_count)
|
||||
collate_end = time.time()
|
||||
|
||||
if (t == SearchType.Image or t == None) and state.model.image_search:
|
||||
# query images
|
||||
query_start = time.time()
|
||||
hits = image_search.query(user_query, results_count, state.model.image_search)
|
||||
output_directory = constants.web_directory / 'images'
|
||||
query_end = time.time()
|
||||
|
||||
# collate and return results
|
||||
collate_start = time.time()
|
||||
results = image_search.collate_results(
|
||||
hits,
|
||||
image_names=state.model.image_search.image_names,
|
||||
output_directory=output_directory,
|
||||
image_files_url='/static/images',
|
||||
count=results_count)
|
||||
collate_end = time.time()
|
||||
|
||||
if state.verbose > 1:
|
||||
if query_start and query_end:
|
||||
print(f"Query took {query_end - query_start:.3f} seconds")
|
||||
if collate_start and collate_end:
|
||||
print(f"Collating results took {collate_end - collate_start:.3f} seconds")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
@router.get('/reload')
|
||||
def reload(t: Optional[SearchType] = None):
|
||||
state.model = configure_search(state.model, state.config, regenerate=False, t=t)
|
||||
return {'status': 'ok', 'message': 'reload completed'}
|
||||
|
||||
|
||||
@router.get('/regenerate')
|
||||
def regenerate(t: Optional[SearchType] = None):
|
||||
state.model = configure_search(state.model, state.config, regenerate=True, t=t)
|
||||
return {'status': 'ok', 'message': 'regeneration completed'}
|
||||
|
||||
|
||||
@router.get('/beta/search')
|
||||
def search_beta(q: str, n: Optional[int] = 1):
|
||||
# Extract Search Type using GPT
|
||||
metadata = extract_search_type(q, api_key=state.processor_config.conversation.openai_api_key, verbose=state.verbose)
|
||||
search_type = get_from_dict(metadata, "search-type")
|
||||
|
||||
# Search
|
||||
search_results = search(q, n=n, t=SearchType(search_type))
|
||||
|
||||
# Return response
|
||||
return {'status': 'ok', 'result': search_results, 'type': search_type}
|
||||
|
||||
|
||||
@router.get('/beta/chat')
|
||||
def chat(q: str):
|
||||
# Load Conversation History
|
||||
chat_session = state.processor_config.conversation.chat_session
|
||||
meta_log = state.processor_config.conversation.meta_log
|
||||
|
||||
# Converse with OpenAI GPT
|
||||
metadata = understand(q, api_key=state.processor_config.conversation.openai_api_key, verbose=state.verbose)
|
||||
if state.verbose > 1:
|
||||
print(f'Understood: {get_from_dict(metadata, "intent")}')
|
||||
|
||||
if get_from_dict(metadata, "intent", "memory-type") == "notes":
|
||||
query = get_from_dict(metadata, "intent", "query")
|
||||
result_list = search(query, n=1, t=SearchType.Org)
|
||||
collated_result = "\n".join([item["entry"] for item in result_list])
|
||||
if state.verbose > 1:
|
||||
print(f'Semantically Similar Notes:\n{collated_result}')
|
||||
gpt_response = summarize(collated_result, summary_type="notes", user_query=q, api_key=state.processor_config.conversation.openai_api_key)
|
||||
else:
|
||||
gpt_response = converse(q, chat_session, api_key=state.processor_config.conversation.openai_api_key)
|
||||
|
||||
# Update Conversation History
|
||||
state.processor_config.conversation.chat_session = message_to_prompt(q, chat_session, gpt_message=gpt_response)
|
||||
state.processor_config.conversation.meta_log['chat'] = message_to_log(q, metadata, gpt_response, meta_log.get('chat', []))
|
||||
|
||||
return {'status': 'ok', 'response': gpt_response}
|
||||
|
||||
|
||||
@router.on_event('shutdown')
|
||||
def shutdown_event():
|
||||
# No need to create empty log file
|
||||
if not (state.processor_config and state.processor_config.conversation and state.processor_config.conversation.meta_log):
|
||||
return
|
||||
elif state.processor_config.conversation.verbose:
|
||||
print('INFO:\tSaving conversation logs to disk...')
|
||||
|
||||
# Summarize Conversation Logs for this Session
|
||||
chat_session = state.processor_config.conversation.chat_session
|
||||
openai_api_key = state.processor_config.conversation.openai_api_key
|
||||
conversation_log = state.processor_config.conversation.meta_log
|
||||
session = {
|
||||
"summary": summarize(chat_session, summary_type="chat", api_key=openai_api_key),
|
||||
"session-start": conversation_log.get("session", [{"session-end": 0}])[-1]["session-end"],
|
||||
"session-end": len(conversation_log["chat"])
|
||||
}
|
||||
if 'session' in conversation_log:
|
||||
conversation_log['session'].append(session)
|
||||
else:
|
||||
conversation_log['session'] = [session]
|
||||
|
||||
# Save Conversation Metadata Logs to Disk
|
||||
conversation_logfile = get_absolute_path(state.processor_config.conversation.conversation_logfile)
|
||||
with open(conversation_logfile, "w+", encoding='utf-8') as logfile:
|
||||
json.dump(conversation_log, logfile)
|
||||
|
||||
print('INFO:\tConversation logs saved to disk.')
|
|
@ -211,9 +211,9 @@ def collate_results(hits, image_names, output_directory, image_files_url, count=
|
|||
# Add the image metadata to the results
|
||||
results += [{
|
||||
"entry": f'{image_files_url}/{target_image_name}',
|
||||
"score": f"{hit['score']:.3f}",
|
||||
"image_score": f"{hit['image_score']:.3f}",
|
||||
"metadata_score": f"{hit['metadata_score']:.3f}",
|
||||
"score": f"{hit['score']:.9f}",
|
||||
"image_score": f"{hit['image_score']:.9f}",
|
||||
"metadata_score": f"{hit['metadata_score']:.9f}",
|
||||
}]
|
||||
|
||||
return results
|
||||
|
|
|
@ -9,6 +9,7 @@ import torch
|
|||
from sentence_transformers import SentenceTransformer, CrossEncoder, util
|
||||
|
||||
# Internal Packages
|
||||
from src.utils import state
|
||||
from src.utils.helpers import get_absolute_path, resolve_absolute_path, load_model
|
||||
from src.utils.config import TextSearchModel
|
||||
from src.utils.rawconfig import TextSearchConfig, TextContentConfig
|
||||
|
@ -32,13 +33,15 @@ def initialize_model(search_config: TextSearchConfig):
|
|||
bi_encoder = load_model(
|
||||
model_dir = search_config.model_directory,
|
||||
model_name = search_config.encoder,
|
||||
model_type = SentenceTransformer)
|
||||
model_type = SentenceTransformer,
|
||||
device=f'{state.device}')
|
||||
|
||||
# The cross-encoder re-ranks the results to improve quality
|
||||
cross_encoder = load_model(
|
||||
model_dir = search_config.model_directory,
|
||||
model_name = search_config.cross_encoder,
|
||||
model_type = CrossEncoder)
|
||||
model_type = CrossEncoder,
|
||||
device=f'{state.device}')
|
||||
|
||||
return bi_encoder, cross_encoder, top_k
|
||||
|
||||
|
@ -50,17 +53,16 @@ def extract_entries(jsonl_file, verbose=0):
|
|||
in load_jsonl(jsonl_file, verbose=verbose)]
|
||||
|
||||
|
||||
def compute_embeddings(entries, bi_encoder, embeddings_file, regenerate=False, device='cpu', verbose=0):
|
||||
def compute_embeddings(entries, bi_encoder, embeddings_file, regenerate=False, verbose=0):
|
||||
"Compute (and Save) Embeddings or Load Pre-Computed Embeddings"
|
||||
# Load pre-computed embeddings from file if exists
|
||||
if embeddings_file.exists() and not regenerate:
|
||||
corpus_embeddings = torch.load(get_absolute_path(embeddings_file))
|
||||
corpus_embeddings = torch.load(get_absolute_path(embeddings_file), map_location=state.device)
|
||||
if verbose > 0:
|
||||
print(f"Loaded embeddings from {embeddings_file}")
|
||||
|
||||
else: # Else compute the corpus_embeddings from scratch, which can take a while
|
||||
corpus_embeddings = bi_encoder.encode([entry['compiled'] for entry in entries], convert_to_tensor=True, show_progress_bar=True)
|
||||
corpus_embeddings.to(device)
|
||||
corpus_embeddings = bi_encoder.encode([entry['compiled'] for entry in entries], convert_to_tensor=True, device=state.device, show_progress_bar=True)
|
||||
corpus_embeddings = util.normalize_embeddings(corpus_embeddings)
|
||||
torch.save(corpus_embeddings, embeddings_file)
|
||||
if verbose > 0:
|
||||
|
@ -69,7 +71,7 @@ def compute_embeddings(entries, bi_encoder, embeddings_file, regenerate=False, d
|
|||
return corpus_embeddings
|
||||
|
||||
|
||||
def query(raw_query: str, model: TextSearchModel, rank_results=False, device='cpu', filters: list = [], verbose=0):
|
||||
def query(raw_query: str, model: TextSearchModel, rank_results=False, filters: list = [], verbose=0):
|
||||
"Search for entries that answer the query"
|
||||
query = raw_query
|
||||
|
||||
|
@ -99,19 +101,18 @@ def query(raw_query: str, model: TextSearchModel, rank_results=False, device='cp
|
|||
|
||||
# Encode the query using the bi-encoder
|
||||
start = time.time()
|
||||
question_embedding = model.bi_encoder.encode([query], convert_to_tensor=True)
|
||||
question_embedding.to(device)
|
||||
question_embedding = model.bi_encoder.encode([query], convert_to_tensor=True, device=state.device)
|
||||
question_embedding = util.normalize_embeddings(question_embedding)
|
||||
end = time.time()
|
||||
if verbose > 1:
|
||||
print(f"Query Encode Time: {end - start:.3f} seconds")
|
||||
print(f"Query Encode Time: {end - start:.3f} seconds on device: {state.device}")
|
||||
|
||||
# Find relevant entries for the query
|
||||
start = time.time()
|
||||
hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=model.top_k, score_function=util.dot_score)[0]
|
||||
end = time.time()
|
||||
if verbose > 1:
|
||||
print(f"Search Time: {end - start:.3f} seconds")
|
||||
print(f"Search Time: {end - start:.3f} seconds on device: {state.device}")
|
||||
|
||||
# Score all retrieved entries using the cross-encoder
|
||||
if rank_results:
|
||||
|
@ -120,7 +121,7 @@ def query(raw_query: str, model: TextSearchModel, rank_results=False, device='cp
|
|||
cross_scores = model.cross_encoder.predict(cross_inp)
|
||||
end = time.time()
|
||||
if verbose > 1:
|
||||
print(f"Cross-Encoder Predict Time: {end - start:.3f} seconds")
|
||||
print(f"Cross-Encoder Predict Time: {end - start:.3f} seconds on device: {state.device}")
|
||||
|
||||
# Store cross-encoder scores in results dictionary for ranking
|
||||
for idx in range(len(cross_scores)):
|
||||
|
@ -133,7 +134,7 @@ def query(raw_query: str, model: TextSearchModel, rank_results=False, device='cp
|
|||
hits.sort(key=lambda x: x['cross-score'], reverse=True) # sort by cross-encoder score
|
||||
end = time.time()
|
||||
if verbose > 1:
|
||||
print(f"Rank Time: {end - start:.3f} seconds")
|
||||
print(f"Rank Time: {end - start:.3f} seconds on device: {state.device}")
|
||||
|
||||
return hits, entries
|
||||
|
||||
|
@ -166,7 +167,7 @@ def collate_results(hits, entries, count=5):
|
|||
in hits[0:count]]
|
||||
|
||||
|
||||
def setup(text_to_jsonl, config: TextContentConfig, search_config: TextSearchConfig, regenerate: bool, device='cpu', verbose: bool=False) -> TextSearchModel:
|
||||
def setup(text_to_jsonl, config: TextContentConfig, search_config: TextSearchConfig, regenerate: bool, verbose: bool=False) -> TextSearchModel:
|
||||
# Initialize Model
|
||||
bi_encoder, cross_encoder, top_k = initialize_model(search_config)
|
||||
|
||||
|
@ -181,7 +182,7 @@ def setup(text_to_jsonl, config: TextContentConfig, search_config: TextSearchCon
|
|||
|
||||
# Compute or Load Embeddings
|
||||
config.embeddings_file = resolve_absolute_path(config.embeddings_file)
|
||||
corpus_embeddings = compute_embeddings(entries, bi_encoder, config.embeddings_file, regenerate=regenerate, device=device, verbose=verbose)
|
||||
corpus_embeddings = compute_embeddings(entries, bi_encoder, config.embeddings_file, regenerate=regenerate, verbose=verbose)
|
||||
|
||||
return TextSearchModel(entries, corpus_embeddings, bi_encoder, cross_encoder, top_k, verbose=verbose)
|
||||
|
||||
|
|
|
@ -2,17 +2,15 @@
|
|||
import argparse
|
||||
import pathlib
|
||||
|
||||
# External Packages
|
||||
import yaml
|
||||
|
||||
# Internal Packages
|
||||
from src.utils.helpers import is_none_or_empty, get_absolute_path, resolve_absolute_path
|
||||
from src.utils.rawconfig import FullConfig
|
||||
from src.utils.helpers import resolve_absolute_path
|
||||
from src.utils.yaml import parse_config_from_file
|
||||
|
||||
def cli(args=None):
|
||||
# Setup Argument Parser for the Commandline Interface
|
||||
parser = argparse.ArgumentParser(description="Start Khoj; A Natural Language Search Engine for your personal Notes, Transactions and Photos")
|
||||
parser.add_argument('config_file', type=pathlib.Path, help="YAML file to configure Khoj")
|
||||
parser.add_argument('--config-file', '-c', default='~/.khoj/khoj.yml', type=pathlib.Path, help="YAML file to configure Khoj")
|
||||
parser.add_argument('--no-gui', action='store_true', default=False, help="Do not show native desktop GUI. Default: false")
|
||||
parser.add_argument('--regenerate', action='store_true', default=False, help="Regenerate model embeddings from source files. Default: false")
|
||||
parser.add_argument('--verbose', '-v', action='count', default=0, help="Show verbose conversion logs. Default: 0")
|
||||
parser.add_argument('--host', type=str, default='127.0.0.1', help="Host address of the server. Default: 127.0.0.1")
|
||||
|
@ -21,15 +19,12 @@ def cli(args=None):
|
|||
|
||||
args = parser.parse_args(args)
|
||||
|
||||
if not resolve_absolute_path(args.config_file).exists():
|
||||
raise ValueError(f"Config file {args.config_file} does not exist")
|
||||
# Normalize config_file path to absolute path
|
||||
args.config_file = resolve_absolute_path(args.config_file)
|
||||
|
||||
# Read Config from YML file
|
||||
config_from_file = None
|
||||
with open(get_absolute_path(args.config_file), 'r', encoding='utf-8') as config_file:
|
||||
config_from_file = yaml.safe_load(config_file)
|
||||
if not args.config_file.exists():
|
||||
args.config = None
|
||||
else:
|
||||
args.config = parse_config_from_file(args.config_file)
|
||||
|
||||
# Parse, Validate Config in YML file
|
||||
args.config = FullConfig.parse_obj(config_from_file)
|
||||
|
||||
return args
|
||||
return args
|
||||
|
|
|
@ -16,6 +16,10 @@ class SearchType(str, Enum):
|
|||
Image = "image"
|
||||
|
||||
|
||||
class ProcessorType(str, Enum):
|
||||
Conversation = "conversation"
|
||||
|
||||
|
||||
class TextSearchModel():
|
||||
def __init__(self, entries, corpus_embeddings, bi_encoder, cross_encoder, top_k, verbose):
|
||||
self.entries = entries
|
||||
|
|
|
@ -1 +1,64 @@
|
|||
empty_escape_sequences = r'\n|\r\t '
|
||||
from pathlib import Path
|
||||
|
||||
app_root_directory = Path(__file__).parent.parent.parent
|
||||
web_directory = app_root_directory / 'src/interface/web/'
|
||||
empty_escape_sequences = r'\n|\r\t '
|
||||
|
||||
# default app config to use
|
||||
default_config = {
|
||||
'content-type': {
|
||||
'org': {
|
||||
'input-files': None,
|
||||
'input-filter': None,
|
||||
'compressed-jsonl': '~/.khoj/content/org/org.jsonl.gz',
|
||||
'embeddings-file': '~/.khoj/content/org/org_embeddings.pt'
|
||||
},
|
||||
'markdown': {
|
||||
'input-files': None,
|
||||
'input-filter': None,
|
||||
'compressed-jsonl': '~/.khoj/content/markdown/markdown.jsonl.gz',
|
||||
'embeddings-file': '~/.khoj/content/markdown/markdown_embeddings.pt'
|
||||
},
|
||||
'ledger': {
|
||||
'input-files': None,
|
||||
'input-filter': None,
|
||||
'compressed-jsonl': '~/.khoj/content/ledger/ledger.jsonl.gz',
|
||||
'embeddings-file': '~/.khoj/content/ledger/ledger_embeddings.pt'
|
||||
},
|
||||
'image': {
|
||||
'input-directories': None,
|
||||
'input-filter': None,
|
||||
'embeddings-file': '~/.khoj/content/image/image_embeddings.pt',
|
||||
'batch-size': 50,
|
||||
'use-xmp-metadata': False
|
||||
},
|
||||
'music': {
|
||||
'input-files': None,
|
||||
'input-filter': None,
|
||||
'compressed-jsonl': '~/.khoj/content/music/music.jsonl.gz',
|
||||
'embeddings-file': '~/.khoj/content/music/music_embeddings.pt'
|
||||
}
|
||||
},
|
||||
'search-type': {
|
||||
'symmetric': {
|
||||
'encoder': 'sentence-transformers/all-MiniLM-L6-v2',
|
||||
'cross-encoder': 'cross-encoder/ms-marco-MiniLM-L-6-v2',
|
||||
'model_directory': '~/.khoj/search/symmetric/'
|
||||
},
|
||||
'asymmetric': {
|
||||
'encoder': 'sentence-transformers/multi-qa-MiniLM-L6-cos-v1',
|
||||
'cross-encoder': 'cross-encoder/ms-marco-MiniLM-L-6-v2',
|
||||
'model_directory': '~/.khoj/search/asymmetric/'
|
||||
},
|
||||
'image': {
|
||||
'encoder': 'sentence-transformers/clip-ViT-B-32',
|
||||
'model_directory': '~/.khoj/search/image/'
|
||||
}
|
||||
},
|
||||
'processor': {
|
||||
'conversation': {
|
||||
'openai-api-key': None,
|
||||
'conversation-logfile': '~/.khoj/processor/conversation/conversation_logs.json'
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,10 +1,11 @@
|
|||
# Standard Packages
|
||||
import pathlib
|
||||
import sys
|
||||
from os.path import join
|
||||
|
||||
|
||||
def is_none_or_empty(item):
|
||||
return item == None or (hasattr(item, '__iter__') and len(item) == 0)
|
||||
return item == None or (hasattr(item, '__iter__') and len(item) == 0) or item == ''
|
||||
|
||||
|
||||
def to_snake_case_from_dash(item: str):
|
||||
|
@ -40,18 +41,23 @@ def merge_dicts(priority_dict: dict, default_dict: dict):
|
|||
return merged_dict
|
||||
|
||||
|
||||
def load_model(model_name, model_dir, model_type):
|
||||
def load_model(model_name, model_dir, model_type, device:str=None):
|
||||
"Load model from disk or huggingface"
|
||||
# Construct model path
|
||||
model_path = join(model_dir, model_name.replace("/", "_")) if model_dir is not None else None
|
||||
|
||||
# Load model from model_path if it exists there
|
||||
if model_path is not None and resolve_absolute_path(model_path).exists():
|
||||
model = model_type(get_absolute_path(model_path))
|
||||
model = model_type(get_absolute_path(model_path), device=device)
|
||||
# Else load the model from the model_name
|
||||
else:
|
||||
model = model_type(model_name)
|
||||
model = model_type(model_name, device=device)
|
||||
if model_path is not None:
|
||||
model.save(model_path)
|
||||
|
||||
return model
|
||||
return model
|
||||
|
||||
|
||||
def is_pyinstaller_app():
|
||||
"Returns true if the app is running from Native GUI created by PyInstaller"
|
||||
return getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS')
|
28
src/utils/state.py
Normal file
28
src/utils/state.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
# Standard Packages
|
||||
from packaging import version
|
||||
# External Packages
|
||||
import torch
|
||||
from pathlib import Path
|
||||
|
||||
# Internal Packages
|
||||
from src.utils.config import SearchModels, ProcessorConfigModel
|
||||
from src.utils.rawconfig import FullConfig
|
||||
|
||||
# Application Global State
|
||||
config = FullConfig()
|
||||
model = SearchModels()
|
||||
processor_config = ProcessorConfigModel()
|
||||
config_file: Path = ""
|
||||
verbose: int = 0
|
||||
host: str = None
|
||||
port: int = None
|
||||
cli_args = None
|
||||
|
||||
if torch.cuda.is_available():
|
||||
# Use CUDA GPU
|
||||
device = torch.device("cuda:0")
|
||||
elif version.parse(torch.__version__) >= version.parse("1.13.0.dev") and torch.backends.mps.is_available():
|
||||
# Use Apple M1 Metal Acceleration
|
||||
device = torch.device("mps")
|
||||
else:
|
||||
device = torch.device("cpu")
|
38
src/utils/yaml.py
Normal file
38
src/utils/yaml.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
# Standard Packages
|
||||
from pathlib import Path
|
||||
|
||||
# External Packages
|
||||
import yaml
|
||||
|
||||
# Internal Packages
|
||||
from src.utils.helpers import get_absolute_path, resolve_absolute_path
|
||||
from src.utils.rawconfig import FullConfig
|
||||
|
||||
# Do not emit tags when dumping to YAML
|
||||
yaml.emitter.Emitter.process_tag = lambda self, *args, **kwargs: None
|
||||
|
||||
def save_config_to_file(yaml_config: dict, yaml_config_file: Path):
|
||||
"Write config to YML file"
|
||||
# Create output directory, if it doesn't exist
|
||||
yaml_config_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(yaml_config_file, 'w', encoding='utf-8') as config_file:
|
||||
yaml.safe_dump(yaml_config, config_file, allow_unicode=True)
|
||||
|
||||
|
||||
def load_config_from_file(yaml_config_file: Path) -> dict:
|
||||
"Read config from YML file"
|
||||
config_from_file = None
|
||||
with open(yaml_config_file, 'r', encoding='utf-8') as config_file:
|
||||
config_from_file = yaml.safe_load(config_file)
|
||||
return config_from_file
|
||||
|
||||
|
||||
def parse_config_from_string(yaml_config: dict) -> FullConfig:
|
||||
"Parse and validate config in YML string"
|
||||
return FullConfig.parse_obj(yaml_config)
|
||||
|
||||
|
||||
def parse_config_from_file(yaml_config_file):
|
||||
"Parse and validate config in YML file"
|
||||
return parse_config_from_string(load_config_from_file(yaml_config_file))
|
|
@ -1,11 +1,11 @@
|
|||
# Standard Packages
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
# Internal Packages
|
||||
from src.search_type import image_search, text_search
|
||||
from src.utils.rawconfig import ContentConfig, TextContentConfig, ImageContentConfig, SearchConfig, TextSearchConfig, ImageSearchConfig
|
||||
from src.processor.org_mode.org_to_jsonl import org_to_jsonl
|
||||
from src.utils import state
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
|
@ -37,7 +37,6 @@ def search_config(tmp_path_factory):
|
|||
@pytest.fixture(scope='session')
|
||||
def model_dir(search_config):
|
||||
model_dir = search_config.asymmetric.model_directory
|
||||
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
|
||||
|
||||
# Generate Image Embeddings from Test Images
|
||||
content_config = ContentConfig()
|
||||
|
@ -56,7 +55,7 @@ def model_dir(search_config):
|
|||
compressed_jsonl = model_dir.joinpath('notes.jsonl.gz'),
|
||||
embeddings_file = model_dir.joinpath('note_embeddings.pt'))
|
||||
|
||||
text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, device=device, verbose=True)
|
||||
text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, verbose=True)
|
||||
|
||||
return model_dir
|
||||
|
||||
|
@ -73,7 +72,7 @@ def content_config(model_dir):
|
|||
content_config.image = ImageContentConfig(
|
||||
input_directories = ['tests/data/images'],
|
||||
embeddings_file = model_dir.joinpath('image_embeddings.pt'),
|
||||
batch_size = 10,
|
||||
batch_size = 1,
|
||||
use_xmp_metadata = False)
|
||||
|
||||
return content_config
|
|
@ -7,34 +7,44 @@ import pytest
|
|||
|
||||
# Internal Packages
|
||||
from src.utils.cli import cli
|
||||
from src.utils.helpers import resolve_absolute_path
|
||||
|
||||
|
||||
# Test
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_cli_minimal_default():
|
||||
# Act
|
||||
actual_args = cli(['tests/data/config.yml'])
|
||||
actual_args = cli([])
|
||||
|
||||
# Assert
|
||||
assert actual_args.config_file == Path('tests/data/config.yml')
|
||||
assert actual_args.config_file == resolve_absolute_path(Path('~/.khoj/khoj.yml'))
|
||||
assert actual_args.regenerate == False
|
||||
assert actual_args.no_gui == False
|
||||
assert actual_args.verbose == 0
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_cli_invalid_config_file_path():
|
||||
# Arrange
|
||||
non_existent_config_file = f"non-existent-khoj-{random()}.yml"
|
||||
|
||||
# Act
|
||||
with pytest.raises(ValueError):
|
||||
cli([f"non-existent-khoj-{random()}.yml"])
|
||||
actual_args = cli([f'-c={non_existent_config_file}'])
|
||||
|
||||
# Assert
|
||||
assert actual_args.config_file == resolve_absolute_path(non_existent_config_file)
|
||||
assert actual_args.config == None
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_cli_config_from_file():
|
||||
# Act
|
||||
actual_args = cli(['tests/data/config.yml',
|
||||
actual_args = cli(['-c=tests/data/config.yml',
|
||||
'--regenerate',
|
||||
'--no-gui',
|
||||
'-vvv'])
|
||||
|
||||
# Assert
|
||||
assert actual_args.config_file == Path('tests/data/config.yml')
|
||||
assert actual_args.config_file == resolve_absolute_path(Path('tests/data/config.yml'))
|
||||
assert actual_args.no_gui == True
|
||||
assert actual_args.regenerate == True
|
||||
assert actual_args.config is not None
|
||||
assert actual_args.config.content_type.org.input_files == [Path('~/first_from_config.org'), Path('~/second_from_config.org')]
|
||||
|
|
|
@ -7,7 +7,8 @@ from fastapi.testclient import TestClient
|
|||
import pytest
|
||||
|
||||
# Internal Packages
|
||||
from src.main import app, model, config
|
||||
from src.main import app
|
||||
from src.utils.state import model, config
|
||||
from src.search_type import text_search, image_search
|
||||
from src.utils.rawconfig import ContentConfig, SearchConfig
|
||||
from src.processor.org_mode import org_to_jsonl
|
||||
|
@ -37,7 +38,7 @@ def test_search_with_valid_content_type(content_config: ContentConfig, search_co
|
|||
config.search_type = search_config
|
||||
|
||||
# config.content_type.image = search_config.image
|
||||
for content_type in ["org", "markdown", "ledger", "music", "image"]:
|
||||
for content_type in ["org", "markdown", "ledger", "music"]:
|
||||
# Act
|
||||
response = client.get(f"/search?q=random&t={content_type}")
|
||||
# Assert
|
||||
|
@ -59,7 +60,7 @@ def test_reload_with_valid_content_type(content_config: ContentConfig, search_co
|
|||
config.content_type = content_config
|
||||
config.search_type = search_config
|
||||
|
||||
for content_type in ["org", "markdown", "ledger", "music", "image"]:
|
||||
for content_type in ["org", "markdown", "ledger", "music"]:
|
||||
# Act
|
||||
response = client.get(f"/reload?t={content_type}")
|
||||
# Assert
|
||||
|
@ -89,7 +90,6 @@ def test_regenerate_with_valid_content_type(content_config: ContentConfig, searc
|
|||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
@pytest.mark.skip(reason="Flaky test. Search doesn't always return expected image path.")
|
||||
def test_image_search(content_config: ContentConfig, search_config: SearchConfig):
|
||||
# Arrange
|
||||
config.content_type = content_config
|
||||
|
|
|
@ -6,7 +6,8 @@ from PIL import Image
|
|||
import pytest
|
||||
|
||||
# Internal Packages
|
||||
from src.main import model, web_directory
|
||||
from src.utils.state import model
|
||||
from src.utils.constants import web_directory
|
||||
from src.search_type import image_search
|
||||
from src.utils.helpers import resolve_absolute_path
|
||||
from src.utils.rawconfig import ContentConfig, SearchConfig
|
||||
|
@ -25,7 +26,6 @@ def test_image_search_setup(content_config: ContentConfig, search_config: Search
|
|||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
@pytest.mark.skip(reason="results inconsistent currently")
|
||||
def test_image_search(content_config: ContentConfig, search_config: SearchConfig):
|
||||
# Arrange
|
||||
output_directory = resolve_absolute_path(web_directory)
|
||||
|
|
62
tests/test_org_to_jsonl.py
Normal file
62
tests/test_org_to_jsonl.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
# Standard Packages
|
||||
import json
|
||||
from posixpath import split
|
||||
|
||||
# Internal Packages
|
||||
from src.processor.org_mode.org_to_jsonl import convert_org_entries_to_jsonl, extract_org_entries
|
||||
from src.utils.helpers import is_none_or_empty
|
||||
|
||||
|
||||
def test_entry_with_empty_body_line_to_jsonl(tmp_path):
|
||||
'''Ensure entries with empty body are ignored.
|
||||
Property drawers not considered Body. Ignore control characters for evaluating if Body empty.'''
|
||||
# Arrange
|
||||
entry = f'''*** Heading
|
||||
:PROPERTIES:
|
||||
:ID: 42-42-42
|
||||
:END:
|
||||
\t\r\n
|
||||
'''
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
# Extract Entries from specified Org files
|
||||
entries = extract_org_entries(org_files=[orgfile])
|
||||
|
||||
# Process Each Entry from All Notes Files
|
||||
jsonl_data = convert_org_entries_to_jsonl(entries)
|
||||
|
||||
# Assert
|
||||
assert is_none_or_empty(jsonl_data)
|
||||
|
||||
|
||||
def test_entry_with_body_to_jsonl(tmp_path):
|
||||
"Ensure entries with valid body text are loaded."
|
||||
# Arrange
|
||||
entry = f'''*** Heading
|
||||
:PROPERTIES:
|
||||
:ID: 42-42-42
|
||||
:END:
|
||||
\t\r\nBody Line 1\n
|
||||
'''
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
# Extract Entries from specified Org files
|
||||
entries = extract_org_entries(org_files=[orgfile])
|
||||
|
||||
# Process Each Entry from All Notes Files
|
||||
jsonl_string = convert_org_entries_to_jsonl(entries)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
assert len(jsonl_data) == 1
|
||||
|
||||
|
||||
# Helper Functions
|
||||
def create_file(tmp_path, entry, filename="test.org"):
|
||||
org_file = tmp_path / f"notes/{filename}"
|
||||
org_file.parent.mkdir()
|
||||
org_file.touch()
|
||||
org_file.write_text(entry)
|
||||
return org_file
|
|
@ -37,7 +37,7 @@ def test_parse_complete_entry(tmp_path):
|
|||
"Test parsing of entry with all important fields"
|
||||
# Arrange
|
||||
entry = f'''
|
||||
*** [#A] Heading :Tag1:TAG2:tag3:
|
||||
*** DONE [#A] Heading :Tag1:TAG2:tag3:
|
||||
CLOSED: [1984-04-01 Sun 12:00] SCHEDULED: <1984-04-01 Sun 09:00> DEADLINE: <1984-04-01 Sun>
|
||||
:PROPERTIES:
|
||||
:ID: 123-456-789-4234-1231
|
||||
|
@ -56,6 +56,7 @@ Body Line 2'''
|
|||
# Assert
|
||||
assert len(entries) == 1
|
||||
assert entries[0].Heading() == "Heading"
|
||||
assert entries[0].Todo() == "DONE"
|
||||
assert entries[0].Tags() == {"Tag1", "TAG2", "tag3"}
|
||||
assert entries[0].Body() == "- Clocked Log 1\nBody Line 1\nBody Line 2"
|
||||
assert entries[0].Priority() == "A"
|
||||
|
@ -124,7 +125,7 @@ def test_parse_multiple_entries(tmp_path):
|
|||
"Test parsing of multiple entries"
|
||||
# Arrange
|
||||
content = f'''
|
||||
*** [#A] Heading1 :tag1:
|
||||
*** FAILED [#A] Heading1 :tag1:
|
||||
CLOSED: [1984-04-01 Sun 12:00] SCHEDULED: <1984-04-01 Sun 09:00> DEADLINE: <1984-04-01 Sun>
|
||||
:PROPERTIES:
|
||||
:ID: 123-456-789-4234-0001
|
||||
|
@ -135,7 +136,7 @@ CLOCK: [1984-04-01 Sun 09:00]--[1984-04-01 Sun 12:00] => 3:00
|
|||
:END:
|
||||
Body 1
|
||||
|
||||
*** [#A] Heading2 :tag2:
|
||||
*** CANCELLED [#A] Heading2 :tag2:
|
||||
CLOSED: [1984-04-02 Sun 12:00] SCHEDULED: <1984-04-02 Sun 09:00> DEADLINE: <1984-04-02 Sun>
|
||||
:PROPERTIES:
|
||||
:ID: 123-456-789-4234-0002
|
||||
|
@ -156,6 +157,7 @@ Body 2
|
|||
assert len(entries) == 2
|
||||
for index, entry in enumerate(entries):
|
||||
assert entry.Heading() == f"Heading{index+1}"
|
||||
assert entry.Todo() == "FAILED" if index == 0 else "CANCELLED"
|
||||
assert entry.Tags() == {f"tag{index+1}"}
|
||||
assert entry.Body() == f"- Clocked Log {index+1}\nBody {index+1}\n\n"
|
||||
assert entry.Priority() == "A"
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
from pathlib import Path
|
||||
|
||||
# Internal Packages
|
||||
from src.main import model
|
||||
from src.utils.state import model
|
||||
from src.search_type import text_search
|
||||
from src.utils.rawconfig import ContentConfig, SearchConfig
|
||||
from src.processor.org_mode.org_to_jsonl import org_to_jsonl
|
Loading…
Add table
Reference in a new issue