mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-12-18 10:37:11 +00:00
Merge branch 'master' of github.com:khoj-ai/khoj into features/allow-multi-outputs-in-chat
This commit is contained in:
commit
c5329d76ba
38 changed files with 437 additions and 180 deletions
|
@ -1,10 +1,11 @@
|
|||
.git/
|
||||
.pytest_cache/
|
||||
.vscode/
|
||||
.venv/
|
||||
docs/
|
||||
.*
|
||||
**/__pycache__/
|
||||
*.egg-info/
|
||||
documentation/
|
||||
tests/
|
||||
build/
|
||||
dist/
|
||||
scripts/
|
||||
*.egg-info/
|
||||
src/interface/
|
||||
src/telemetry/
|
||||
!src/interface/web
|
||||
|
|
36
.github/workflows/dockerize.yml
vendored
36
.github/workflows/dockerize.yml
vendored
|
@ -38,13 +38,23 @@ env:
|
|||
jobs:
|
||||
build:
|
||||
name: Publish Khoj Docker Images
|
||||
runs-on: ubuntu-linux-x64-high
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
image:
|
||||
- 'local'
|
||||
- 'cloud'
|
||||
include:
|
||||
- image: 'local'
|
||||
platform: linux/amd64
|
||||
runner: ubuntu-latest
|
||||
- image: 'local'
|
||||
platform: linux/arm64
|
||||
runner: ubuntu-linux-arm64
|
||||
- image: 'cloud'
|
||||
platform: linux/amd64
|
||||
runner: ubuntu-latest
|
||||
- image: 'cloud'
|
||||
platform: linux/arm64
|
||||
runner: ubuntu-linux-arm64
|
||||
runs-on: ${{ matrix.runner }}
|
||||
steps:
|
||||
- name: Checkout Code
|
||||
uses: actions/checkout@v3
|
||||
|
@ -73,12 +83,12 @@ jobs:
|
|||
run: rm -rf /opt/hostedtoolcache
|
||||
|
||||
- name: 📦 Build and Push Docker Image
|
||||
uses: docker/build-push-action@v2
|
||||
uses: docker/build-push-action@v4
|
||||
if: (matrix.image == 'local' && github.event_name == 'workflow_dispatch') && github.event.inputs.khoj == 'true' || (matrix.image == 'local' && github.event_name == 'push')
|
||||
with:
|
||||
context: .
|
||||
file: Dockerfile
|
||||
platforms: linux/amd64, linux/arm64
|
||||
platforms: ${{ matrix.platform }}
|
||||
push: true
|
||||
tags: |
|
||||
ghcr.io/${{ github.repository }}:${{ env.DOCKER_IMAGE_TAG }}
|
||||
|
@ -86,14 +96,19 @@ jobs:
|
|||
build-args: |
|
||||
VERSION=${{ steps.hatch.outputs.version }}
|
||||
PORT=42110
|
||||
cache-from: type=gha,scope=${{ matrix.image }}-${{ matrix.platform }}
|
||||
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.platform}}
|
||||
labels: |
|
||||
org.opencontainers.image.description=Khoj AI - Your second brain powered by LLMs and Neural Search
|
||||
org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }}
|
||||
|
||||
- name: 📦️⛅️ Build and Push Cloud Docker Image
|
||||
uses: docker/build-push-action@v2
|
||||
uses: docker/build-push-action@v4
|
||||
if: (matrix.image == 'cloud' && github.event_name == 'workflow_dispatch') && github.event.inputs.khoj-cloud == 'true' || (matrix.image == 'cloud' && github.event_name == 'push')
|
||||
with:
|
||||
context: .
|
||||
file: prod.Dockerfile
|
||||
platforms: linux/amd64, linux/arm64
|
||||
platforms: ${{ matrix.platform }}
|
||||
push: true
|
||||
tags: |
|
||||
ghcr.io/${{ github.repository }}-cloud:${{ env.DOCKER_IMAGE_TAG }}
|
||||
|
@ -101,3 +116,8 @@ jobs:
|
|||
build-args: |
|
||||
VERSION=${{ steps.hatch.outputs.version }}
|
||||
PORT=42110
|
||||
cache-from: type=gha,scope=${{ matrix.image }}-${{ matrix.platform }}
|
||||
cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.platform}}
|
||||
labels: |
|
||||
org.opencontainers.image.description=Khoj AI Cloud - Your second brain powered by LLMs and Neural Search
|
||||
org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }}
|
||||
|
|
7
.github/workflows/run_evals.yml
vendored
7
.github/workflows/run_evals.yml
vendored
|
@ -25,6 +25,8 @@ on:
|
|||
options:
|
||||
- frames
|
||||
- simpleqa
|
||||
- gpqa
|
||||
- math500
|
||||
sample_size:
|
||||
description: 'Number of samples to evaluate'
|
||||
required: false
|
||||
|
@ -95,8 +97,9 @@ jobs:
|
|||
KHOJ_URL: "http://localhost:42110"
|
||||
KHOJ_LLM_SEED: "42"
|
||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
SERPER_DEV_API_KEY: ${{ secrets.SERPER_DEV_API_KEY }}
|
||||
OLOSTEP_API_KEY: ${{ secrets.OLOSTEP_API_KEY }}
|
||||
SERPER_DEV_API_KEY: ${{ matrix.dataset != 'math500' && secrets.SERPER_DEV_API_KEY }}
|
||||
OLOSTEP_API_KEY: ${{ matrix.dataset != 'math500' && secrets.OLOSTEP_API_KEY }}
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
KHOJ_ADMIN_EMAIL: khoj
|
||||
KHOJ_ADMIN_PASSWORD: khoj
|
||||
POSTGRES_HOST: localhost
|
||||
|
|
11
.github/workflows/test.yml
vendored
11
.github/workflows/test.yml
vendored
|
@ -55,15 +55,13 @@ jobs:
|
|||
with:
|
||||
python-version: ${{ matrix.python_version }}
|
||||
|
||||
- name: Install Git
|
||||
run: |
|
||||
apt update && apt install -y git
|
||||
|
||||
- name: ⏬️ Install Dependencies
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
run: |
|
||||
apt update && apt install -y libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6
|
||||
apt update && apt install -y git libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6
|
||||
# required by llama-cpp-python prebuilt wheels
|
||||
apt install -y musl-dev && ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
|
||||
|
||||
- name: ⬇️ Install Postgres
|
||||
env:
|
||||
|
@ -78,6 +76,9 @@ jobs:
|
|||
python -m pip install --upgrade pip
|
||||
|
||||
- name: ⬇️ Install Application
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu https://abetlen.github.io/llama-cpp-python/whl/cpu"
|
||||
CUDA_VISIBLE_DEVICES: ""
|
||||
run: sed -i 's/dynamic = \["version"\]/version = "0.0.0"/' pyproject.toml && pip install --upgrade .[dev]
|
||||
|
||||
- name: 🧪 Test Application
|
||||
|
|
54
Dockerfile
54
Dockerfile
|
@ -1,5 +1,5 @@
|
|||
# syntax=docker/dockerfile:1
|
||||
FROM ubuntu:jammy
|
||||
FROM ubuntu:jammy AS base
|
||||
LABEL homepage="https://khoj.dev"
|
||||
LABEL repository="https://github.com/khoj-ai/khoj"
|
||||
LABEL org.opencontainers.image.source="https://github.com/khoj-ai/khoj"
|
||||
|
@ -10,44 +10,54 @@ RUN apt update -y && apt -y install \
|
|||
python3-pip \
|
||||
swig \
|
||||
curl \
|
||||
# Required by llama-cpp-python pre-built wheels. See #1628
|
||||
musl-dev \
|
||||
# Required by RapidOCR
|
||||
libgl1 \
|
||||
libglx-mesa0 \
|
||||
libglib2.0-0 && \
|
||||
# Required by Next.js Web app
|
||||
curl -sL https://deb.nodesource.com/setup_20.x | bash - && \
|
||||
curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - && \
|
||||
echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list && \
|
||||
apt update -y && apt -y --no-install-recommends install nodejs yarn && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* && \
|
||||
libglib2.0-0 \
|
||||
# Required by llama-cpp-python pre-built wheels. See #1628
|
||||
ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
|
||||
musl-dev && \
|
||||
ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 && \
|
||||
# Clean up
|
||||
apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Application
|
||||
# Build Server
|
||||
FROM base AS server-deps
|
||||
WORKDIR /app
|
||||
COPY pyproject.toml .
|
||||
COPY README.md .
|
||||
ARG VERSION=0.0.0
|
||||
ENV PIP_EXTRA_INDEX_URL=https://abetlen.github.io/llama-cpp-python/whl/cpu
|
||||
# use the pre-built llama-cpp-python, torch cpu wheel
|
||||
ENV PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://abetlen.github.io/llama-cpp-python/whl/cpu"
|
||||
# avoid downloading unused cuda specific python packages
|
||||
ENV CUDA_VISIBLE_DEVICES=""
|
||||
RUN sed -i "s/dynamic = \\[\"version\"\\]/version = \"$VERSION\"/" pyproject.toml && \
|
||||
pip install --no-cache-dir .
|
||||
|
||||
# Copy Source Code
|
||||
COPY . .
|
||||
|
||||
# Set the PYTHONPATH environment variable in order for it to find the Django app.
|
||||
ENV PYTHONPATH=/app/src:$PYTHONPATH
|
||||
|
||||
# Go to the directory src/interface/web and export the built Next.js assets
|
||||
# Build Web App
|
||||
FROM node:20-alpine AS web-app
|
||||
# Set build optimization env vars
|
||||
ENV NODE_ENV=production
|
||||
ENV NEXT_TELEMETRY_DISABLED=1
|
||||
WORKDIR /app/src/interface/web
|
||||
RUN bash -c "yarn install --frozen-lockfile && yarn ciexport && yarn cache clean"
|
||||
# Install dependencies first (cache layer)
|
||||
COPY src/interface/web/package.json src/interface/web/yarn.lock ./
|
||||
RUN yarn install --frozen-lockfile
|
||||
# Copy source and build
|
||||
COPY src/interface/web/. ./
|
||||
RUN yarn build
|
||||
|
||||
# Merge the Server and Web App into a Single Image
|
||||
FROM base
|
||||
ENV PYTHONPATH=/app/src
|
||||
WORKDIR /app
|
||||
COPY --from=server-deps /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
|
||||
COPY --from=web-app /app/src/interface/web/out ./src/khoj/interface/built
|
||||
COPY . .
|
||||
RUN cd src && python3 khoj/manage.py collectstatic --noinput
|
||||
|
||||
# Run the Application
|
||||
# There are more arguments required for the application to run,
|
||||
# but these should be passed in through the docker-compose.yml file.
|
||||
# but those should be passed in through the docker-compose.yml file.
|
||||
ARG PORT
|
||||
EXPOSE ${PORT}
|
||||
ENTRYPOINT ["python3", "src/khoj/main.py"]
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"id": "khoj",
|
||||
"name": "Khoj",
|
||||
"version": "1.30.7",
|
||||
"version": "1.30.10",
|
||||
"minAppVersion": "0.15.0",
|
||||
"description": "Your Second Brain",
|
||||
"author": "Khoj Inc.",
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# syntax=docker/dockerfile:1
|
||||
FROM ubuntu:jammy
|
||||
FROM ubuntu:jammy AS base
|
||||
LABEL homepage="https://khoj.dev"
|
||||
LABEL repository="https://github.com/khoj-ai/khoj"
|
||||
LABEL org.opencontainers.image.source="https://github.com/khoj-ai/khoj"
|
||||
|
@ -16,38 +16,49 @@ RUN apt update -y && apt -y install \
|
|||
curl \
|
||||
# Required by llama-cpp-python pre-built wheels. See #1628
|
||||
musl-dev && \
|
||||
# Required by Next.js Web app
|
||||
curl -sL https://deb.nodesource.com/setup_20.x | bash - && \
|
||||
curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - && \
|
||||
echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list && \
|
||||
apt update -y && apt -y --no-install-recommends install nodejs yarn && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* && \
|
||||
# Required by llama-cpp-python pre-built wheels. See #1628
|
||||
ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
|
||||
ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 && \
|
||||
# Clean up
|
||||
apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Application
|
||||
# Build Server
|
||||
FROM base AS server-deps
|
||||
WORKDIR /app
|
||||
COPY pyproject.toml .
|
||||
COPY README.md .
|
||||
ARG VERSION=0.0.0
|
||||
ENV PIP_EXTRA_INDEX_URL=https://abetlen.github.io/llama-cpp-python/whl/cpu
|
||||
# use the pre-built llama-cpp-python, torch cpu wheel
|
||||
ENV PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://abetlen.github.io/llama-cpp-python/whl/cpu"
|
||||
# avoid downloading unused cuda specific python packages
|
||||
ENV CUDA_VISIBLE_DEVICES=""
|
||||
RUN sed -i "s/dynamic = \\[\"version\"\\]/version = \"$VERSION\"/" pyproject.toml && \
|
||||
pip install --no-cache-dir -e .[prod]
|
||||
|
||||
# Copy Source Code
|
||||
COPY . .
|
||||
|
||||
# Set the PYTHONPATH environment variable in order for it to find the Django app.
|
||||
ENV PYTHONPATH=/app/src:$PYTHONPATH
|
||||
|
||||
# Go to the directory src/interface/web and export the built Next.js assets
|
||||
# Build Web App
|
||||
FROM node:20-alpine AS web-app
|
||||
# Set build optimization env vars
|
||||
ENV NODE_ENV=production
|
||||
ENV NEXT_TELEMETRY_DISABLED=1
|
||||
WORKDIR /app/src/interface/web
|
||||
RUN bash -c "yarn install --frozen-lockfile && yarn ciexport && yarn cache clean"
|
||||
# Install dependencies first (cache layer)
|
||||
COPY src/interface/web/package.json src/interface/web/yarn.lock ./
|
||||
RUN yarn install --frozen-lockfile
|
||||
# Copy source and build
|
||||
COPY src/interface/web/. ./
|
||||
RUN yarn build
|
||||
|
||||
# Merge the Server and Web App into a Single Image
|
||||
FROM base
|
||||
ENV PYTHONPATH=/app/src:$PYTHONPATH
|
||||
WORKDIR /app
|
||||
COPY --from=server-deps /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
|
||||
COPY --from=server-deps /usr/local/bin /usr/local/bin
|
||||
COPY --from=web-app /app/src/interface/web/out ./src/khoj/interface/built
|
||||
COPY . .
|
||||
RUN cd src && python3 khoj/manage.py collectstatic --noinput
|
||||
|
||||
# Run the Application
|
||||
# There are more arguments required for the application to run,
|
||||
# but these should be passed in through the docker-compose.yml file.
|
||||
# but those should be passed in through the docker-compose.yml file.
|
||||
ARG PORT
|
||||
EXPOSE ${PORT}
|
||||
ENTRYPOINT ["gunicorn", "-c", "gunicorn-config.py", "src.khoj.main:app"]
|
||||
|
|
|
@ -88,6 +88,7 @@ dependencies = [
|
|||
"anthropic == 0.26.1",
|
||||
"docx2txt == 0.8",
|
||||
"google-generativeai == 0.8.3",
|
||||
"pyjson5 == 1.6.7",
|
||||
]
|
||||
dynamic = ["version"]
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "Khoj",
|
||||
"version": "1.30.7",
|
||||
"version": "1.30.10",
|
||||
"description": "Your Second Brain",
|
||||
"author": "Khoj Inc. <team@khoj.dev>",
|
||||
"license": "GPL-3.0-or-later",
|
||||
|
@ -16,7 +16,7 @@
|
|||
"start": "yarn electron ."
|
||||
},
|
||||
"dependencies": {
|
||||
"@todesktop/runtime": "^1.6.4",
|
||||
"@todesktop/runtime": "^2.0.0",
|
||||
"axios": "^1.7.4",
|
||||
"cron": "^2.4.3",
|
||||
"electron-store": "^8.1.0"
|
||||
|
|
|
@ -50,17 +50,17 @@
|
|||
dependencies:
|
||||
defer-to-connect "^2.0.0"
|
||||
|
||||
"@todesktop/runtime@^1.6.4":
|
||||
version "1.6.4"
|
||||
resolved "https://registry.yarnpkg.com/@todesktop/runtime/-/runtime-1.6.4.tgz#a9d62a021cf2647c51371c892bfb1d4c5a29ed7e"
|
||||
integrity sha512-n6dOxhrKKsXMM+i2u9iRvoJSR2KCWw0orYK+FT9RbWNPykhuFIYd0yy8dYgYy/OuClKGyGl4SJFi2757FLhWDA==
|
||||
"@todesktop/runtime@^2.0.0":
|
||||
version "2.0.0"
|
||||
resolved "https://registry.yarnpkg.com/@todesktop/runtime/-/runtime-2.0.0.tgz#dfd409186ae664f5e28186a03b99e620ec7b7f82"
|
||||
integrity sha512-0a2tmWpIc/HJE/873xRMZKQNggfrYhoKYIchfN+k8RqKdzTPwTWa5ztur7GdCHLHBUaiMBPNRzF3h4kwHd1NCw==
|
||||
dependencies:
|
||||
del "^6.0.0"
|
||||
electron-updater "^4.6.1"
|
||||
eventemitter2 "^6.4.5"
|
||||
del "^6.1.1"
|
||||
electron-updater "^6.3.9"
|
||||
eventemitter2 "^6.4.9"
|
||||
execa "^5.0.0"
|
||||
lodash.once "^4.1.1"
|
||||
semver "^7.3.2"
|
||||
semver "^7.6.3"
|
||||
|
||||
"@types/cacheable-request@^6.0.1":
|
||||
version "6.0.3"
|
||||
|
@ -90,16 +90,16 @@
|
|||
integrity sha512-jYvz8UMLDgy3a5SkGJne8H7VA7zPV2Lwohjx0V8V31+SqAjNmurWMkk9cQhfvlcnXWudBpK9xPM1n4rljOcHYQ==
|
||||
|
||||
"@types/node@*":
|
||||
version "22.9.1"
|
||||
resolved "https://registry.yarnpkg.com/@types/node/-/node-22.9.1.tgz#bdf91c36e0e7ecfb7257b2d75bf1b206b308ca71"
|
||||
integrity sha512-p8Yy/8sw1caA8CdRIQBG5tiLHmxtQKObCijiAa9Ez+d4+PRffM4054xbju0msf+cvhJpnFEeNjxmVT/0ipktrg==
|
||||
version "22.10.1"
|
||||
resolved "https://registry.yarnpkg.com/@types/node/-/node-22.10.1.tgz#41ffeee127b8975a05f8c4f83fb89bcb2987d766"
|
||||
integrity sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==
|
||||
dependencies:
|
||||
undici-types "~6.19.8"
|
||||
undici-types "~6.20.0"
|
||||
|
||||
"@types/node@^18.11.18":
|
||||
version "18.19.64"
|
||||
resolved "https://registry.yarnpkg.com/@types/node/-/node-18.19.64.tgz#122897fb79f2a9ec9c979bded01c11461b2b1478"
|
||||
integrity sha512-955mDqvO2vFf/oL7V3WiUtiz+BugyX8uVbaT2H8oj3+8dRyH2FLiNdowe7eNqRM7IOIZvzDH76EoAT+gwm6aIQ==
|
||||
version "18.19.67"
|
||||
resolved "https://registry.yarnpkg.com/@types/node/-/node-18.19.67.tgz#77c4b01641a1e3e1509aff7e10d39e4afd5ae06d"
|
||||
integrity sha512-wI8uHusga+0ZugNp0Ol/3BqQfEcCCNfojtO6Oou9iVNGPTL6QNSdnUdqq85fRgIorLhLMuPIKpsN98QE9Nh+KQ==
|
||||
dependencies:
|
||||
undici-types "~5.26.4"
|
||||
|
||||
|
@ -110,11 +110,6 @@
|
|||
dependencies:
|
||||
"@types/node" "*"
|
||||
|
||||
"@types/semver@^7.3.6":
|
||||
version "7.5.8"
|
||||
resolved "https://registry.yarnpkg.com/@types/semver/-/semver-7.5.8.tgz#8268a8c57a3e4abd25c165ecd36237db7948a55e"
|
||||
integrity sha512-I8EUhyrgfLrcTkzV3TSsGyl1tSuPrEDzr0yd5m90UgNxQkyDXULk3b6MlQqTCpZpNtWe1K0hzclnZkTcLBe2UQ==
|
||||
|
||||
"@types/yauzl@^2.9.1":
|
||||
version "2.10.3"
|
||||
resolved "https://registry.yarnpkg.com/@types/yauzl/-/yauzl-2.10.3.tgz#e9b2808b4f109504a03cda958259876f61017999"
|
||||
|
@ -168,9 +163,9 @@ atomically@^1.7.0:
|
|||
integrity sha512-Xcz9l0z7y9yQ9rdDaxlmaI4uJHf/T8g9hOEzJcsEqX2SjCj4J20uK7+ldkDHMbpJDK76wF7xEIgxc/vSlsfw5w==
|
||||
|
||||
axios@^1.7.4:
|
||||
version "1.7.7"
|
||||
resolved "https://registry.yarnpkg.com/axios/-/axios-1.7.7.tgz#2f554296f9892a72ac8d8e4c5b79c14a91d0a47f"
|
||||
integrity sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==
|
||||
version "1.7.8"
|
||||
resolved "https://registry.yarnpkg.com/axios/-/axios-1.7.8.tgz#1997b1496b394c21953e68c14aaa51b7b5de3d6e"
|
||||
integrity sha512-Uu0wb7KNqK2t5K+YQyVCLM76prD5sRFjKHbJYCP1J7JFGEQ6nN7HWn9+04LAeiJ3ji54lgS/gZCH1oxyrf1SPw==
|
||||
dependencies:
|
||||
follow-redirects "^1.15.6"
|
||||
form-data "^4.0.0"
|
||||
|
@ -206,12 +201,12 @@ buffer-crc32@~0.2.3:
|
|||
resolved "https://registry.yarnpkg.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz#0d333e3f00eac50aa1454abd30ef8c2a5d9a7242"
|
||||
integrity sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==
|
||||
|
||||
builder-util-runtime@8.9.2:
|
||||
version "8.9.2"
|
||||
resolved "https://registry.yarnpkg.com/builder-util-runtime/-/builder-util-runtime-8.9.2.tgz#a9669ae5b5dcabfe411ded26678e7ae997246c28"
|
||||
integrity sha512-rhuKm5vh7E0aAmT6i8aoSfEjxzdYEFX7zDApK+eNgOhjofnWb74d9SRJv0H/8nsgOkos0TZ4zxW0P8J4N7xQ2A==
|
||||
builder-util-runtime@9.2.10:
|
||||
version "9.2.10"
|
||||
resolved "https://registry.yarnpkg.com/builder-util-runtime/-/builder-util-runtime-9.2.10.tgz#a0f7d9e214158402e78b74a745c8d9f870c604bc"
|
||||
integrity sha512-6p/gfG1RJSQeIbz8TK5aPNkoztgY1q5TgmGFMAXcY8itsGW6Y2ld1ALsZ5UJn8rog7hKF3zHx5iQbNQ8uLcRlw==
|
||||
dependencies:
|
||||
debug "^4.3.2"
|
||||
debug "^4.3.4"
|
||||
sax "^1.2.4"
|
||||
|
||||
cacheable-lookup@^5.0.3:
|
||||
|
@ -296,7 +291,7 @@ debounce-fn@^4.0.0:
|
|||
dependencies:
|
||||
mimic-fn "^3.0.0"
|
||||
|
||||
debug@^4.1.0, debug@^4.1.1, debug@^4.3.2:
|
||||
debug@^4.1.0, debug@^4.1.1, debug@^4.3.4:
|
||||
version "4.3.7"
|
||||
resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.7.tgz#87945b4151a011d76d95a198d7111c865c360a52"
|
||||
integrity sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==
|
||||
|
@ -333,7 +328,7 @@ define-properties@^1.2.1:
|
|||
has-property-descriptors "^1.0.0"
|
||||
object-keys "^1.1.1"
|
||||
|
||||
del@^6.0.0:
|
||||
del@^6.1.1:
|
||||
version "6.1.1"
|
||||
resolved "https://registry.yarnpkg.com/del/-/del-6.1.1.tgz#3b70314f1ec0aa325c6b14eb36b95786671edb7a"
|
||||
integrity sha512-ua8BhapfP0JUJKC/zV9yHHDW/rDoDxP4Zhn3AkA6/xT6gY7jYXJiaeyBZznYVujhZZET+UgcbZiQ7sN3WqcImg==
|
||||
|
@ -379,19 +374,19 @@ electron-store@^8.1.0:
|
|||
conf "^10.2.0"
|
||||
type-fest "^2.17.0"
|
||||
|
||||
electron-updater@^4.6.1:
|
||||
version "4.6.5"
|
||||
resolved "https://registry.yarnpkg.com/electron-updater/-/electron-updater-4.6.5.tgz#e9a75458bbfd6bb41a58a829839e150ad2eb2d3d"
|
||||
integrity sha512-kdTly8O9mSZfm9fslc1mnCY+mYOeaYRy7ERa2Fed240u01BKll3aiupzkd07qKw69KvhBSzuHroIW3mF0D8DWA==
|
||||
electron-updater@^6.3.9:
|
||||
version "6.3.9"
|
||||
resolved "https://registry.yarnpkg.com/electron-updater/-/electron-updater-6.3.9.tgz#e1e7f155624c58e6f3760f376c3a584028165ec4"
|
||||
integrity sha512-2PJNONi+iBidkoC5D1nzT9XqsE8Q1X28Fn6xRQhO3YX8qRRyJ3mkV4F1aQsuRnYPqq6Hw+E51y27W75WgDoofw==
|
||||
dependencies:
|
||||
"@types/semver" "^7.3.6"
|
||||
builder-util-runtime "8.9.2"
|
||||
fs-extra "^10.0.0"
|
||||
builder-util-runtime "9.2.10"
|
||||
fs-extra "^10.1.0"
|
||||
js-yaml "^4.1.0"
|
||||
lazy-val "^1.0.5"
|
||||
lodash.escaperegexp "^4.1.2"
|
||||
lodash.isequal "^4.5.0"
|
||||
semver "^7.3.5"
|
||||
semver "^7.6.3"
|
||||
tiny-typed-emitter "^2.1.0"
|
||||
|
||||
electron@28.2.1:
|
||||
version "28.2.1"
|
||||
|
@ -436,7 +431,7 @@ escape-string-regexp@^4.0.0:
|
|||
resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34"
|
||||
integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==
|
||||
|
||||
eventemitter2@^6.4.5:
|
||||
eventemitter2@^6.4.9:
|
||||
version "6.4.9"
|
||||
resolved "https://registry.yarnpkg.com/eventemitter2/-/eventemitter2-6.4.9.tgz#41f2750781b4230ed58827bc119d293471ecb125"
|
||||
integrity sha512-JEPTiaOt9f04oa6NOkc4aH+nVp5I3wEjpHbIPqfgCdD5v5bUzy7xQqwcVO2aDQgOWhI28da57HksMrzK9HlRxg==
|
||||
|
@ -530,7 +525,7 @@ form-data@^4.0.0:
|
|||
combined-stream "^1.0.8"
|
||||
mime-types "^2.1.12"
|
||||
|
||||
fs-extra@^10.0.0:
|
||||
fs-extra@^10.1.0:
|
||||
version "10.1.0"
|
||||
resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-10.1.0.tgz#02873cfbc4084dde127eaa5f9905eef2325d1abf"
|
||||
integrity sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==
|
||||
|
@ -1115,7 +1110,7 @@ semver@^6.2.0:
|
|||
resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4"
|
||||
integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==
|
||||
|
||||
semver@^7.3.2, semver@^7.3.5:
|
||||
semver@^7.3.2, semver@^7.3.5, semver@^7.6.3:
|
||||
version "7.6.3"
|
||||
resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.3.tgz#980f7b5550bc175fb4dc09403085627f9eb33143"
|
||||
integrity sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==
|
||||
|
@ -1166,6 +1161,11 @@ sumchecker@^3.0.1:
|
|||
dependencies:
|
||||
debug "^4.1.0"
|
||||
|
||||
tiny-typed-emitter@^2.1.0:
|
||||
version "2.1.0"
|
||||
resolved "https://registry.yarnpkg.com/tiny-typed-emitter/-/tiny-typed-emitter-2.1.0.tgz#b3b027fdd389ff81a152c8e847ee2f5be9fad7b5"
|
||||
integrity sha512-qVtvMxeXbVej0cQWKqVSSAHmKZEHAvxdF8HEUBFWts8h+xEo5m/lEiPakuyZ3BnCBjOD8i24kzNOiOLLgsSxhA==
|
||||
|
||||
to-regex-range@^5.0.1:
|
||||
version "5.0.1"
|
||||
resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4"
|
||||
|
@ -1188,10 +1188,10 @@ undici-types@~5.26.4:
|
|||
resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-5.26.5.tgz#bcd539893d00b56e964fd2657a4866b221a65617"
|
||||
integrity sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==
|
||||
|
||||
undici-types@~6.19.8:
|
||||
version "6.19.8"
|
||||
resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-6.19.8.tgz#35111c9d1437ab83a7cdc0abae2f26d88eda0a02"
|
||||
integrity sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==
|
||||
undici-types@~6.20.0:
|
||||
version "6.20.0"
|
||||
resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-6.20.0.tgz#8171bf22c1f588d1554d55bf204bc624af388433"
|
||||
integrity sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==
|
||||
|
||||
universalify@^0.1.0:
|
||||
version "0.1.2"
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
;; Saba Imran <saba@khoj.dev>
|
||||
;; Description: Your Second Brain
|
||||
;; Keywords: search, chat, ai, org-mode, outlines, markdown, pdf, image
|
||||
;; Version: 1.30.7
|
||||
;; Version: 1.30.10
|
||||
;; Package-Requires: ((emacs "27.1") (transient "0.3.0") (dash "2.19.1"))
|
||||
;; URL: https://github.com/khoj-ai/khoj/tree/master/src/interface/emacs
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"id": "khoj",
|
||||
"name": "Khoj",
|
||||
"version": "1.30.7",
|
||||
"version": "1.30.10",
|
||||
"minAppVersion": "0.15.0",
|
||||
"description": "Your Second Brain",
|
||||
"author": "Khoj Inc.",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "Khoj",
|
||||
"version": "1.30.7",
|
||||
"version": "1.30.10",
|
||||
"description": "Your Second Brain",
|
||||
"author": "Debanjum Singh Solanky, Saba Imran <team@khoj.dev>",
|
||||
"license": "GPL-3.0-or-later",
|
||||
|
|
|
@ -98,5 +98,8 @@
|
|||
"1.30.4": "0.15.0",
|
||||
"1.30.5": "0.15.0",
|
||||
"1.30.6": "0.15.0",
|
||||
"1.30.7": "0.15.0"
|
||||
"1.30.7": "0.15.0",
|
||||
"1.30.8": "0.15.0",
|
||||
"1.30.9": "0.15.0",
|
||||
"1.30.10": "0.15.0"
|
||||
}
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
import type { Metadata } from "next";
|
||||
import { Noto_Sans } from "next/font/google";
|
||||
import { noto_sans, noto_sans_arabic } from "@/app/fonts";
|
||||
import "../globals.css";
|
||||
|
||||
const inter = Noto_Sans({ subsets: ["latin"] });
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: "Khoj AI - Agents",
|
||||
description: "Find a specialized agent that can help you address more specific needs.",
|
||||
|
@ -33,7 +31,7 @@ export default function RootLayout({
|
|||
children: React.ReactNode;
|
||||
}>) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
|
||||
<meta
|
||||
httpEquiv="Content-Security-Policy"
|
||||
content="default-src 'self' https://assets.khoj.dev;
|
||||
|
@ -46,7 +44,7 @@ export default function RootLayout({
|
|||
child-src 'none';
|
||||
object-src 'none';"
|
||||
></meta>
|
||||
<body className={inter.className}>{children}</body>
|
||||
<body>{children}</body>
|
||||
</html>
|
||||
);
|
||||
}
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
import type { Metadata } from "next";
|
||||
import { Noto_Sans } from "next/font/google";
|
||||
import { noto_sans, noto_sans_arabic } from "@/app/fonts";
|
||||
import "../globals.css";
|
||||
|
||||
const inter = Noto_Sans({ subsets: ["latin"] });
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: "Khoj AI - Chat",
|
||||
description:
|
||||
|
@ -34,7 +32,7 @@ export default function RootLayout({
|
|||
children: React.ReactNode;
|
||||
}>) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
|
||||
<meta
|
||||
httpEquiv="Content-Security-Policy"
|
||||
content="default-src 'self' https://assets.khoj.dev;
|
||||
|
@ -47,7 +45,7 @@ export default function RootLayout({
|
|||
child-src 'none';
|
||||
object-src 'none';"
|
||||
></meta>
|
||||
<body className={inter.className}>
|
||||
<body>
|
||||
{children}
|
||||
<script
|
||||
dangerouslySetInnerHTML={{
|
||||
|
|
13
src/interface/web/app/fonts.ts
Normal file
13
src/interface/web/app/fonts.ts
Normal file
|
@ -0,0 +1,13 @@
|
|||
import { Noto_Sans, Noto_Sans_Arabic } from "next/font/google";
|
||||
|
||||
export const noto_sans = Noto_Sans({
|
||||
subsets: ["latin", "latin-ext", "cyrillic", "cyrillic-ext", "devanagari", "vietnamese"],
|
||||
display: "swap",
|
||||
variable: "--font-noto-sans",
|
||||
});
|
||||
|
||||
export const noto_sans_arabic = Noto_Sans_Arabic({
|
||||
subsets: ["arabic"],
|
||||
display: "swap",
|
||||
variable: "--font-noto-sans-arabic",
|
||||
});
|
|
@ -1,7 +1,6 @@
|
|||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
@import url("https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@100..900&family=Noto+Sans:ital,wght@0,100..900;1,100..900&display=swap");
|
||||
|
||||
@layer base {
|
||||
:root {
|
||||
|
@ -25,7 +24,7 @@
|
|||
--input: 220 13% 91%;
|
||||
--ring: 24.6 95% 53.1%;
|
||||
--radius: 0.5rem;
|
||||
--font-family: "Noto Sans", "Noto Sans Arabic", sans-serif !important;
|
||||
--font-family: var(--font-noto-sans), var(--font-noto-sans-arabic), sans-serif !important;
|
||||
|
||||
/* Khoj Custom Colors */
|
||||
--frosted-background-color: 20 13% 95%;
|
||||
|
@ -188,7 +187,7 @@
|
|||
--border: 0 0% 9%;
|
||||
--input: 0 0% 9%;
|
||||
--ring: 20.5 90.2% 48.2%;
|
||||
--font-family: "Noto Sans", "Noto Sans Arabic", sans-serif !important;
|
||||
--font-family: var(--font-noto-sans), var(--font-noto-sans-arabic), sans-serif !important;
|
||||
|
||||
/* Imported from highlight.js */
|
||||
pre code.hljs {
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
import type { Metadata } from "next";
|
||||
import { Noto_Sans } from "next/font/google";
|
||||
import { noto_sans, noto_sans_arabic } from "@/app/fonts";
|
||||
import "./globals.css";
|
||||
|
||||
const inter = Noto_Sans({ subsets: ["latin"] });
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: "Khoj AI - Home",
|
||||
description: "Your Second Brain.",
|
||||
|
@ -39,7 +37,7 @@ export default function RootLayout({
|
|||
children: React.ReactNode;
|
||||
}>) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
|
||||
<meta
|
||||
httpEquiv="Content-Security-Policy"
|
||||
content="default-src 'self' https://assets.khoj.dev;
|
||||
|
@ -52,7 +50,7 @@ export default function RootLayout({
|
|||
child-src 'none';
|
||||
object-src 'none';"
|
||||
></meta>
|
||||
<body className={inter.className}>{children}</body>
|
||||
<body>{children}</body>
|
||||
</html>
|
||||
);
|
||||
}
|
||||
|
|
|
@ -1,10 +1,8 @@
|
|||
import type { Metadata } from "next";
|
||||
import { Noto_Sans } from "next/font/google";
|
||||
import { noto_sans, noto_sans_arabic } from "@/app/fonts";
|
||||
import "../globals.css";
|
||||
import { Toaster } from "@/components/ui/toaster";
|
||||
|
||||
const inter = Noto_Sans({ subsets: ["latin"] });
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: "Khoj AI - Settings",
|
||||
description: "Configure Khoj to get personalized, deeper assistance.",
|
||||
|
@ -34,7 +32,7 @@ export default function RootLayout({
|
|||
children: React.ReactNode;
|
||||
}>) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
|
||||
<meta
|
||||
httpEquiv="Content-Security-Policy"
|
||||
content="default-src 'self' https://assets.khoj.dev;
|
||||
|
@ -46,7 +44,7 @@ export default function RootLayout({
|
|||
child-src 'none';
|
||||
object-src 'none';"
|
||||
></meta>
|
||||
<body className={inter.className}>
|
||||
<body>
|
||||
{children}
|
||||
<Toaster />
|
||||
</body>
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
import type { Metadata } from "next";
|
||||
import { Noto_Sans } from "next/font/google";
|
||||
import { noto_sans, noto_sans_arabic } from "@/app/fonts";
|
||||
import "../../globals.css";
|
||||
|
||||
const inter = Noto_Sans({ subsets: ["latin"] });
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: "Khoj AI - Chat",
|
||||
description: "Use this page to view a chat with Khoj AI.",
|
||||
|
@ -15,7 +13,7 @@ export default function RootLayout({
|
|||
children: React.ReactNode;
|
||||
}>) {
|
||||
return (
|
||||
<html lang="en">
|
||||
<html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
|
||||
<meta
|
||||
httpEquiv="Content-Security-Policy"
|
||||
content="default-src 'self' https://assets.khoj.dev;
|
||||
|
@ -27,7 +25,7 @@ export default function RootLayout({
|
|||
child-src 'none';
|
||||
object-src 'none';"
|
||||
></meta>
|
||||
<body className={inter.className}>
|
||||
<body>
|
||||
{children}
|
||||
<script
|
||||
dangerouslySetInnerHTML={{
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "khoj-ai",
|
||||
"version": "1.30.7",
|
||||
"version": "1.30.10",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
|
@ -62,6 +62,9 @@
|
|||
"react-hook-form": "^7.52.1",
|
||||
"shadcn-ui": "^0.8.0",
|
||||
"swr": "^2.2.5",
|
||||
"tailwind-merge": "^2.3.0",
|
||||
"tailwindcss": "^3.4.6",
|
||||
"tailwindcss-animate": "^1.0.7",
|
||||
"typescript": "^5",
|
||||
"vaul": "^0.9.1",
|
||||
"zod": "^3.23.8"
|
||||
|
@ -82,9 +85,6 @@
|
|||
"lint-staged": "^15.2.7",
|
||||
"nodemon": "^3.1.3",
|
||||
"prettier": "3.3.3",
|
||||
"tailwind-merge": "^2.3.0",
|
||||
"tailwindcss": "^3.4.6",
|
||||
"tailwindcss-animate": "^1.0.7",
|
||||
"typescript": "^5"
|
||||
},
|
||||
"prettier": {
|
||||
|
|
|
@ -55,6 +55,9 @@ const config = {
|
|||
},
|
||||
},
|
||||
extend: {
|
||||
fontFamily: {
|
||||
sans: ["var(--font-noto-sans)", "var(--font-noto-sans-arabic)"],
|
||||
},
|
||||
colors: {
|
||||
border: "hsl(var(--border))",
|
||||
input: "hsl(var(--input))",
|
||||
|
|
11
src/khoj/interface/web/.well-known/assetlinks.json
Normal file
11
src/khoj/interface/web/.well-known/assetlinks.json
Normal file
|
@ -0,0 +1,11 @@
|
|||
[{
|
||||
"relation": ["delegate_permission/common.handle_all_urls"],
|
||||
"target": {
|
||||
"namespace": "android_app",
|
||||
"package_name": "dev.khoj.app",
|
||||
"sha256_cert_fingerprints": [
|
||||
"CC:98:4A:0A:F1:CC:84:26:AC:02:86:49:AA:69:64:B9:5E:63:A3:EF:18:56:EA:CA:13:C1:3A:15:CA:49:77:46",
|
||||
"D4:5A:6F:6C:18:28:D2:1C:78:27:92:C6:AC:DB:4C:12:C4:52:A1:88:9B:A1:F5:67:D1:22:FE:A0:0F:B1:AE:92"
|
||||
]
|
||||
}
|
||||
}]
|
|
@ -1,9 +1,8 @@
|
|||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import pyjson5
|
||||
from langchain.schema import ChatMessage
|
||||
|
||||
from khoj.database.models import Agent, ChatModelOptions, KhojUser
|
||||
|
@ -110,7 +109,7 @@ def extract_questions_anthropic(
|
|||
# Extract, Clean Message from Claude's Response
|
||||
try:
|
||||
response = clean_json(response)
|
||||
response = json.loads(response)
|
||||
response = pyjson5.loads(response)
|
||||
response = [q.strip() for q in response["queries"] if q.strip()]
|
||||
if not isinstance(response, list) or not response:
|
||||
logger.error(f"Invalid response for constructing subqueries: {response}")
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import pyjson5
|
||||
from langchain.schema import ChatMessage
|
||||
|
||||
from khoj.database.models import Agent, ChatModelOptions, KhojUser
|
||||
|
@ -104,7 +103,7 @@ def extract_questions_gemini(
|
|||
# Extract, Clean Message from Gemini's Response
|
||||
try:
|
||||
response = clean_json(response)
|
||||
response = json.loads(response)
|
||||
response = pyjson5.loads(response)
|
||||
response = [q.strip() for q in response["queries"] if q.strip()]
|
||||
if not isinstance(response, list) or not response:
|
||||
logger.error(f"Invalid response for constructing subqueries: {response}")
|
||||
|
|
|
@ -5,6 +5,7 @@ from datetime import datetime, timedelta
|
|||
from threading import Thread
|
||||
from typing import Any, Iterator, List, Optional, Union
|
||||
|
||||
import pyjson5
|
||||
from langchain.schema import ChatMessage
|
||||
from llama_cpp import Llama
|
||||
|
||||
|
@ -13,6 +14,7 @@ from khoj.processor.conversation import prompts
|
|||
from khoj.processor.conversation.offline.utils import download_model
|
||||
from khoj.processor.conversation.utils import (
|
||||
ThreadedGenerator,
|
||||
clean_json,
|
||||
commit_conversation_trace,
|
||||
generate_chatml_messages_with_context,
|
||||
messages_to_print,
|
||||
|
@ -114,8 +116,8 @@ def extract_questions_offline(
|
|||
|
||||
# Extract and clean the chat model's response
|
||||
try:
|
||||
response = response.strip(empty_escape_sequences)
|
||||
response = json.loads(response)
|
||||
response = clean_json(empty_escape_sequences)
|
||||
response = pyjson5.loads(response)
|
||||
questions = [q.strip() for q in response["queries"] if q.strip()]
|
||||
questions = filter_questions(questions)
|
||||
except:
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import pyjson5
|
||||
from langchain.schema import ChatMessage
|
||||
|
||||
from khoj.database.models import Agent, ChatModelOptions, KhojUser
|
||||
|
@ -104,7 +104,7 @@ def extract_questions(
|
|||
# Extract, Clean Message from GPT's Response
|
||||
try:
|
||||
response = clean_json(response)
|
||||
response = json.loads(response)
|
||||
response = pyjson5.loads(response)
|
||||
response = [q.strip() for q in response["queries"] if q.strip()]
|
||||
if not isinstance(response, list) or not response:
|
||||
logger.error(f"Invalid response for constructing subqueries: {response}")
|
||||
|
|
|
@ -30,11 +30,9 @@ You were created by Khoj Inc. with the following capabilities:
|
|||
|
||||
- You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you.
|
||||
- Users can share files and other information with you using the Khoj Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
|
||||
- Say "I don't know" or "I don't understand" if you don't know what to say or if you don't know the answer to a question.
|
||||
- Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following
|
||||
- inline math mode : `\\(` and `\\)`
|
||||
- display math mode: insert linebreak after opening `$$`, `\\[` and before closing `$$`, `\\]`
|
||||
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided notes or past conversations.
|
||||
- Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay".
|
||||
|
||||
Today is {day_of_week}, {current_date} in UTC.
|
||||
|
|
|
@ -5,6 +5,7 @@ import math
|
|||
import mimetypes
|
||||
import os
|
||||
import queue
|
||||
import re
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
|
@ -14,6 +15,7 @@ from time import perf_counter
|
|||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import PIL.Image
|
||||
import pyjson5
|
||||
import requests
|
||||
import tiktoken
|
||||
import yaml
|
||||
|
@ -590,6 +592,47 @@ def clean_code_python(code: str):
|
|||
return code.strip().removeprefix("```python").removesuffix("```")
|
||||
|
||||
|
||||
def load_complex_json(json_str):
|
||||
"""
|
||||
Preprocess a raw JSON string to escape unescaped double quotes within value strings,
|
||||
while preserving the JSON structure and already escaped quotes.
|
||||
"""
|
||||
|
||||
def replace_unescaped_quotes(match):
|
||||
# Get the content between colons and commas/end braces
|
||||
content = match.group(1)
|
||||
# Replace unescaped double, single quotes that aren't already escaped
|
||||
# Uses negative lookbehind to avoid replacing already escaped quotes
|
||||
# Replace " with \"
|
||||
processed_dq = re.sub(r'(?<!\\)"', '\\"', content)
|
||||
# Replace \' with \\'
|
||||
processed_final = re.sub(r"(?<!\\)\\'", r"\\\\'", processed_dq)
|
||||
return f': "{processed_final}"'
|
||||
|
||||
# Match content between : and either , or }
|
||||
# This pattern looks for ': ' followed by any characters until , or }
|
||||
pattern = r':\s*"(.*?)(?<!\\)"(?=[,}])'
|
||||
|
||||
# Process the JSON string
|
||||
cleaned = clean_json(rf"{json_str}")
|
||||
processed = re.sub(pattern, replace_unescaped_quotes, cleaned)
|
||||
|
||||
# See which json loader can load the processed JSON as valid
|
||||
errors = []
|
||||
json_loaders_to_try = [json.loads, pyjson5.loads]
|
||||
for loads in json_loaders_to_try:
|
||||
try:
|
||||
return loads(processed)
|
||||
except (json.JSONDecodeError, pyjson5.Json5Exception) as e:
|
||||
errors.append(f"{type(e).__name__}: {str(e)}")
|
||||
|
||||
# If all loaders fail, raise the aggregated error
|
||||
raise ValueError(
|
||||
f"Failed to load JSON with errors: {'; '.join(errors)}\n\n"
|
||||
f"While attempting to load this cleaned JSON:\n{processed}"
|
||||
)
|
||||
|
||||
|
||||
def defilter_query(query: str):
|
||||
"""Remove any query filters in query"""
|
||||
defiltered_query = query
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
import base64
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
|
@ -15,8 +14,8 @@ from khoj.processor.conversation import prompts
|
|||
from khoj.processor.conversation.utils import (
|
||||
ChatEvent,
|
||||
clean_code_python,
|
||||
clean_json,
|
||||
construct_chat_history,
|
||||
load_complex_json,
|
||||
)
|
||||
from khoj.routers.helpers import send_message_to_model_wrapper
|
||||
from khoj.utils.helpers import is_none_or_empty, timer, truncate_code_context
|
||||
|
@ -135,8 +134,7 @@ async def generate_python_code(
|
|||
)
|
||||
|
||||
# Validate that the response is a non-empty, JSON-serializable list
|
||||
response = clean_json(response)
|
||||
response = json.loads(response)
|
||||
response = load_complex_json(response)
|
||||
code = response.get("code", "").strip()
|
||||
input_files = response.get("input_files", [])
|
||||
input_links = response.get("input_links", [])
|
||||
|
|
|
@ -27,6 +27,7 @@ from typing import (
|
|||
from urllib.parse import parse_qs, quote, unquote, urljoin, urlparse
|
||||
|
||||
import cron_descriptor
|
||||
import pyjson5
|
||||
import pytz
|
||||
import requests
|
||||
from apscheduler.job import Job
|
||||
|
@ -541,7 +542,7 @@ async def generate_online_subqueries(
|
|||
# Validate that the response is a non-empty, JSON-serializable list
|
||||
try:
|
||||
response = clean_json(response)
|
||||
response = json.loads(response)
|
||||
response = pyjson5.loads(response)
|
||||
response = {q.strip() for q in response["queries"] if q.strip()}
|
||||
if not isinstance(response, set) or not response or len(response) == 0:
|
||||
logger.error(f"Invalid response for constructing subqueries: {response}. Returning original query: {q}")
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Callable, Dict, List, Optional
|
||||
|
@ -10,10 +9,10 @@ from khoj.database.models import Agent, KhojUser
|
|||
from khoj.processor.conversation import prompts
|
||||
from khoj.processor.conversation.utils import (
|
||||
InformationCollectionIteration,
|
||||
clean_json,
|
||||
construct_chat_history,
|
||||
construct_iteration_history,
|
||||
construct_tool_chat_history,
|
||||
load_complex_json,
|
||||
)
|
||||
from khoj.processor.tools.online_search import read_webpages, search_online
|
||||
from khoj.processor.tools.run_code import run_code
|
||||
|
@ -106,8 +105,7 @@ async def apick_next_tool(
|
|||
return
|
||||
|
||||
try:
|
||||
response = clean_json(response)
|
||||
response = json.loads(response)
|
||||
response = load_complex_json(response)
|
||||
selected_tool = response.get("tool", None)
|
||||
generated_query = response.get("query", None)
|
||||
scratchpad = response.get("scratchpad", None)
|
||||
|
|
|
@ -134,3 +134,8 @@ def automations_config_page(
|
|||
"request": request,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@web_client.get("/.well-known/assetlinks.json", response_class=FileResponse)
|
||||
def assetlinks(request: Request):
|
||||
return FileResponse(constants.assetlinks_file_path)
|
||||
|
|
|
@ -5,6 +5,7 @@ app_root_directory = Path(__file__).parent.parent.parent
|
|||
web_directory = app_root_directory / "khoj/interface/web/"
|
||||
next_js_directory = app_root_directory / "khoj/interface/built/"
|
||||
pypi_static_directory = app_root_directory / "khoj/interface/compiled/"
|
||||
assetlinks_file_path = web_directory / ".well-known/assetlinks.json"
|
||||
empty_escape_sequences = "\n|\r|\t| "
|
||||
app_env_filepath = "~/.khoj/env"
|
||||
telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
|
||||
|
|
|
@ -3,8 +3,10 @@ import concurrent.futures
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
from functools import partial
|
||||
from io import StringIO
|
||||
from textwrap import dedent
|
||||
from threading import Lock
|
||||
|
@ -24,13 +26,10 @@ logger = logging.getLogger(__name__)
|
|||
KHOJ_URL = os.getenv("KHOJ_URL", "http://localhost:42110")
|
||||
KHOJ_CHAT_API_URL = f"{KHOJ_URL}/api/chat"
|
||||
KHOJ_API_KEY = os.getenv("KHOJ_API_KEY")
|
||||
KHOJ_MODE = os.getenv("KHOJ_MODE", "default") # E.g research, general, notes etc.
|
||||
KHOJ_MODE = os.getenv("KHOJ_MODE", "default").lower() # E.g research, general, notes etc.
|
||||
|
||||
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
||||
GEMINI_EVAL_MODEL = os.getenv("GEMINI_EVAL_MODEL", "gemini-1.5-pro-002")
|
||||
GEMINI_API_URL = (
|
||||
f"https://generativelanguage.googleapis.com/v1beta/models/{GEMINI_EVAL_MODEL}:generateContent?key={GEMINI_API_KEY}"
|
||||
)
|
||||
|
||||
SAMPLE_SIZE = os.getenv("SAMPLE_SIZE") # Number of examples to evaluate
|
||||
RANDOMIZE = os.getenv("RANDOMIZE", "false").lower() == "true" # Randomize examples
|
||||
|
@ -128,6 +127,99 @@ def load_simpleqa_dataset():
|
|||
return None
|
||||
|
||||
|
||||
def load_gpqa_dataset():
|
||||
"""
|
||||
Load the Google GPQA benchmark dataset from HuggingFace
|
||||
|
||||
GPQA is a benchmark dataset to evaluate retrieval and answering capabilities of agents.
|
||||
It contains ~800 requiring multi-hop retrieval and reasoning across various topics.
|
||||
|
||||
### Data Fields
|
||||
- Prompt: The question to be answered
|
||||
- Answer: The ground truth answer
|
||||
- reasoning_types: The type of reasoning required to answer the question
|
||||
"""
|
||||
import random
|
||||
|
||||
def format_multiple_choice_question(row: Dict) -> tuple[str, str]:
|
||||
"""
|
||||
Create GPQA multi-choice prompt from shuffled answer choices and question.
|
||||
Refer: https://github.com/openai/simple-evals/blob/a8e85cc8a5dea497d915f870895250e07f9cc737/common.py#L12
|
||||
|
||||
Returns formatted prompt and correct answer letter.
|
||||
"""
|
||||
# Gather choices
|
||||
choices = [
|
||||
row["Incorrect Answer 1"],
|
||||
row["Incorrect Answer 2"],
|
||||
row["Incorrect Answer 3"],
|
||||
row["Correct Answer"],
|
||||
]
|
||||
# Shuffle choices
|
||||
random.shuffle(choices)
|
||||
|
||||
# Get correct answer letter
|
||||
correct_index = choices.index(row["Correct Answer"])
|
||||
correct_letter = "ABCD"[correct_index]
|
||||
|
||||
prompt = f"""
|
||||
Answer the following multiple choice question. Answer should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
|
||||
|
||||
{row["Question"]}
|
||||
|
||||
A) {choices[0]}
|
||||
B) {choices[1]}
|
||||
C) {choices[2]}
|
||||
D) {choices[3]}
|
||||
""".strip()
|
||||
|
||||
return prompt, correct_letter
|
||||
|
||||
try:
|
||||
dataset = load_dataset("Idavidrein/gpqa", "gpqa_diamond", split="train")
|
||||
|
||||
# Create multi-choice q&a prompt from choices and correct answer
|
||||
prompts_and_answers = [format_multiple_choice_question(row) for row in dataset]
|
||||
|
||||
# Normalize dataset to FRAMES format
|
||||
dataset = dataset.rename_columns({"Subdomain": "reasoning_types"})
|
||||
dataset = dataset.add_column("Prompt", [p[0] for p in prompts_and_answers])
|
||||
dataset = dataset.add_column("Answer", [p[1] for p in prompts_and_answers])
|
||||
|
||||
# Sample and shuffle dataset if configured
|
||||
dataset = dataset.shuffle() if RANDOMIZE else dataset
|
||||
dataset = dataset[: int(SAMPLE_SIZE)] if SAMPLE_SIZE else dataset
|
||||
|
||||
return dataset
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading dataset: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def load_math500_dataset():
|
||||
"""
|
||||
Load and format the MATH500 dataset to match the evaluation script's structure.
|
||||
|
||||
Args:
|
||||
sample_size (int, optional): Number of samples to include. Defaults to None (use full dataset).
|
||||
randomize (bool, optional): Whether to randomize the dataset. Defaults to False.
|
||||
|
||||
Returns:
|
||||
Dataset: Formatted HuggingFace Dataset.
|
||||
"""
|
||||
try:
|
||||
# Load the MATH500 dataset from HuggingFace
|
||||
dataset = load_dataset("HuggingFaceH4/MATH-500", split="test")
|
||||
dataset = dataset.rename_columns({"problem": "Prompt", "answer": "Answer", "subject": "reasoning_types"})
|
||||
dataset = dataset.shuffle() if RANDOMIZE else dataset
|
||||
dataset = dataset.select(range(int(SAMPLE_SIZE))) if SAMPLE_SIZE else dataset
|
||||
|
||||
return dataset
|
||||
except Exception as e:
|
||||
print(f"Error loading and formatting MATH500 dataset: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def get_agent_response(prompt: str) -> Dict[str, Any]:
|
||||
"""Get response from the Khoj API"""
|
||||
# Set headers
|
||||
|
@ -152,7 +244,30 @@ def get_agent_response(prompt: str) -> Dict[str, Any]:
|
|||
return {"response": "", "usage": {}}
|
||||
|
||||
|
||||
def evaluate_response(query: str, agent_response: str, ground_truth: str) -> tuple[bool | None, str, float]:
|
||||
def evaluate_response_with_mcq_match(
|
||||
query: str, agent_response: str, ground_truth: str
|
||||
) -> tuple[bool | None, str, float]:
|
||||
"""Evaluate Khoj response against benchmark ground truth using string matching"""
|
||||
try:
|
||||
# Extract answer from agent response
|
||||
answer_pattern_multichoice = r"(?i)Answer\s*:\s*([A-D])"
|
||||
match = re.search(answer_pattern_multichoice, agent_response)
|
||||
extracted_answer = match.group(1) if match else None
|
||||
|
||||
# Check if extracted answer matches ground truth
|
||||
decision = extracted_answer == ground_truth
|
||||
explanation = f"Agent response {'matches' if decision else 'does not match'} ground truth {ground_truth}"
|
||||
|
||||
# Return decision, explanation and cost in structured form
|
||||
return decision, explanation, 0.0
|
||||
except Exception as e:
|
||||
logger.error(f"Error in evaluation: {e}")
|
||||
return None, f"Evaluation failed: {str(e)}", 0.0
|
||||
|
||||
|
||||
def evaluate_response_with_gemini(
|
||||
query: str, agent_response: str, ground_truth: str, eval_model=GEMINI_EVAL_MODEL
|
||||
) -> tuple[bool | None, str, float]:
|
||||
"""Evaluate Khoj response against benchmark ground truth using Gemini"""
|
||||
evaluation_prompt = f"""
|
||||
Compare the following agent response with the ground truth answer.
|
||||
|
@ -166,10 +281,13 @@ def evaluate_response(query: str, agent_response: str, ground_truth: str) -> tup
|
|||
Provide your evaluation in the following json format:
|
||||
{"explanation:" "[How you made the decision?)", "decision:" "(TRUE if response contains key information, FALSE otherwise)"}
|
||||
"""
|
||||
gemini_api_url = (
|
||||
f"https://generativelanguage.googleapis.com/v1beta/models/{eval_model}:generateContent?key={GEMINI_API_KEY}"
|
||||
)
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
GEMINI_API_URL,
|
||||
gemini_api_url,
|
||||
headers={"Content-Type": "application/json"},
|
||||
json={
|
||||
"contents": [{"parts": [{"text": evaluation_prompt}]}],
|
||||
|
@ -182,7 +300,7 @@ def evaluate_response(query: str, agent_response: str, ground_truth: str) -> tup
|
|||
# Update cost of evaluation
|
||||
input_tokens = response_json["usageMetadata"]["promptTokenCount"]
|
||||
ouput_tokens = response_json["usageMetadata"]["candidatesTokenCount"]
|
||||
cost = get_cost_of_chat_message(GEMINI_EVAL_MODEL, input_tokens, ouput_tokens)
|
||||
cost = get_cost_of_chat_message(eval_model, input_tokens, ouput_tokens)
|
||||
|
||||
# Parse evaluation response
|
||||
eval_response: dict[str, str] = json.loads(
|
||||
|
@ -200,7 +318,7 @@ def evaluate_response(query: str, agent_response: str, ground_truth: str) -> tup
|
|||
return None, f"Evaluation failed: {str(e)}", 0.0
|
||||
|
||||
|
||||
def process_batch(batch, batch_start, results, dataset_length):
|
||||
def process_batch(batch, batch_start, results, dataset_length, response_evaluator):
|
||||
global running_cost
|
||||
for idx, (prompt, answer, reasoning_type) in enumerate(batch):
|
||||
current_index = batch_start + idx
|
||||
|
@ -219,7 +337,7 @@ def process_batch(batch, batch_start, results, dataset_length):
|
|||
decision = None
|
||||
explanation = "Agent response is empty. This maybe due to a service error."
|
||||
else:
|
||||
decision, explanation, eval_cost = evaluate_response(prompt, agent_response, answer)
|
||||
decision, explanation, eval_cost = response_evaluator(prompt, agent_response, answer)
|
||||
|
||||
# Store results
|
||||
results.append(
|
||||
|
@ -292,7 +410,7 @@ def parse_args():
|
|||
"--dataset",
|
||||
"-d",
|
||||
default="frames",
|
||||
choices=["frames", "simpleqa"],
|
||||
choices=["frames", "simpleqa", "gpqa", "math500"],
|
||||
help="Dataset to use for evaluation (default: frames)",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
@ -309,12 +427,24 @@ def main():
|
|||
dataset = load_frames_dataset()
|
||||
elif args.dataset == "simpleqa":
|
||||
dataset = load_simpleqa_dataset()
|
||||
elif args.dataset == "gpqa":
|
||||
dataset = load_gpqa_dataset()
|
||||
elif args.dataset == "math500":
|
||||
dataset = load_math500_dataset()
|
||||
if dataset is None:
|
||||
return
|
||||
|
||||
# Initialize variables
|
||||
results = []
|
||||
dataset_length = len(dataset["Prompt"])
|
||||
if args.dataset == "gpqa":
|
||||
response_evaluator = evaluate_response_with_mcq_match
|
||||
elif args.dataset == "math500":
|
||||
response_evaluator = partial(
|
||||
evaluate_response_with_gemini, eval_model=os.getenv("GEMINI_EVAL_MODEL", "gemini-1.5-flash-002")
|
||||
)
|
||||
else:
|
||||
response_evaluator = evaluate_response_with_gemini
|
||||
|
||||
# Process examples in batches
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
|
@ -326,7 +456,9 @@ def main():
|
|||
dataset["Answer"][i : i + BATCH_SIZE],
|
||||
dataset["reasoning_types"][i : i + BATCH_SIZE],
|
||||
)
|
||||
futures.append(executor.submit(process_batch, batch, batch_start, results, dataset_length))
|
||||
futures.append(
|
||||
executor.submit(process_batch, batch, batch_start, results, dataset_length, response_evaluator)
|
||||
)
|
||||
|
||||
# Wait for all futures to complete
|
||||
concurrent.futures.wait(futures)
|
||||
|
|
|
@ -104,6 +104,18 @@ class TestTruncateMessage:
|
|||
assert truncated_chat_history[0] != copy_big_chat_message
|
||||
|
||||
|
||||
def test_load_complex_raw_json_string():
|
||||
# Arrange
|
||||
raw_json = r"""{"key": "value with unescaped " and unescaped \' and escaped \" and escaped \\'"}"""
|
||||
expeced_json = {"key": "value with unescaped \" and unescaped \\' and escaped \" and escaped \\'"}
|
||||
|
||||
# Act
|
||||
parsed_json = utils.load_complex_json(raw_json)
|
||||
|
||||
# Assert
|
||||
assert parsed_json == expeced_json
|
||||
|
||||
|
||||
def generate_content(count):
|
||||
return " ".join([f"{index}" for index, _ in enumerate(range(count))])
|
||||
|
||||
|
|
|
@ -98,5 +98,8 @@
|
|||
"1.30.4": "0.15.0",
|
||||
"1.30.5": "0.15.0",
|
||||
"1.30.6": "0.15.0",
|
||||
"1.30.7": "0.15.0"
|
||||
"1.30.7": "0.15.0",
|
||||
"1.30.8": "0.15.0",
|
||||
"1.30.9": "0.15.0",
|
||||
"1.30.10": "0.15.0"
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue