Merge branch 'master' of github.com:khoj-ai/khoj into features/allow-multi-outputs-in-chat

2024-12-18 10:37:11 +00:00 · 2024-11-29 14:12:03 -08:00 · 2024-11-29 14:12:03 -08:00 · c5329d76ba
commit c5329d76ba
parent 46f647d91d 439b18c21f
38 changed files with 437 additions and 180 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -1,10 +1,11 @@
-.git/
+.*
-.pytest_cache/
+**/__pycache__/
-.vscode/
+*.egg-info/
-.venv/
+documentation/
 docs/
 tests/
 build/
 dist/
 scripts/
-*.egg-info/
+src/interface/
 src/telemetry/
 !src/interface/web
--- a/.github/workflows/dockerize.yml
+++ b/.github/workflows/dockerize.yml
@ -38,13 +38,23 @@ env:
 jobs:
  build:
    name: Publish Khoj Docker Images
    runs-on: ubuntu-linux-x64-high
    strategy:
      fail-fast: false
      matrix:
-        image:
+        include:
-          - 'local'
+          - image: 'local'
-          - 'cloud'
+            platform: linux/amd64
            runner: ubuntu-latest
          - image: 'local'
            platform: linux/arm64
            runner: ubuntu-linux-arm64
          - image: 'cloud'
            platform: linux/amd64
            runner: ubuntu-latest
          - image: 'cloud'
            platform: linux/arm64
            runner: ubuntu-linux-arm64
    runs-on: ${{ matrix.runner }}
    steps:
      - name: Checkout Code
        uses: actions/checkout@v3
@ -73,12 +83,12 @@ jobs:
        run: rm -rf /opt/hostedtoolcache
      - name: 📦 Build and Push Docker Image
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v4
        if: (matrix.image == 'local' && github.event_name == 'workflow_dispatch') && github.event.inputs.khoj == 'true' || (matrix.image == 'local' && github.event_name == 'push')
        with:
          context: .
          file: Dockerfile
-          platforms: linux/amd64, linux/arm64
+          platforms: ${{ matrix.platform }}
          push: true
          tags: |
            ghcr.io/${{ github.repository }}:${{ env.DOCKER_IMAGE_TAG }}
@ -86,14 +96,19 @@ jobs:
          build-args: |
            VERSION=${{ steps.hatch.outputs.version }}
            PORT=42110
          cache-from: type=gha,scope=${{ matrix.image }}-${{ matrix.platform }}
          cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.platform}}
          labels: |
            org.opencontainers.image.description=Khoj AI - Your second brain powered by LLMs and Neural Search
            org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }}
      - name: 📦️⛅️ Build and Push Cloud Docker Image
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v4
        if: (matrix.image == 'cloud' && github.event_name == 'workflow_dispatch') && github.event.inputs.khoj-cloud == 'true' || (matrix.image == 'cloud' && github.event_name == 'push')
        with:
          context: .
          file: prod.Dockerfile
-          platforms: linux/amd64, linux/arm64
+          platforms: ${{ matrix.platform }}
          push: true
          tags: |
            ghcr.io/${{ github.repository }}-cloud:${{ env.DOCKER_IMAGE_TAG }}
@ -101,3 +116,8 @@ jobs:
          build-args: |
            VERSION=${{ steps.hatch.outputs.version }}
            PORT=42110
          cache-from: type=gha,scope=${{ matrix.image }}-${{ matrix.platform }}
          cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.platform}}
          labels: |
            org.opencontainers.image.description=Khoj AI Cloud - Your second brain powered by LLMs and Neural Search
            org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }}
--- a/.github/workflows/run_evals.yml
+++ b/.github/workflows/run_evals.yml
@ -25,6 +25,8 @@ on:
        options:
          - frames
          - simpleqa
          - gpqa
          - math500
      sample_size:
        description: 'Number of samples to evaluate'
        required: false
@ -95,8 +97,9 @@ jobs:
          KHOJ_URL: "http://localhost:42110"
          KHOJ_LLM_SEED: "42"
          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
-          SERPER_DEV_API_KEY: ${{ secrets.SERPER_DEV_API_KEY }}
+          SERPER_DEV_API_KEY: ${{ matrix.dataset != 'math500' && secrets.SERPER_DEV_API_KEY }}
-          OLOSTEP_API_KEY: ${{ secrets.OLOSTEP_API_KEY }}
+          OLOSTEP_API_KEY: ${{ matrix.dataset != 'math500' && secrets.OLOSTEP_API_KEY }}
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          KHOJ_ADMIN_EMAIL: khoj
          KHOJ_ADMIN_PASSWORD: khoj
          POSTGRES_HOST: localhost
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -55,15 +55,13 @@ jobs:
        with:
          python-version: ${{ matrix.python_version }}
      - name: Install Git
        run: |
          apt update && apt install -y git
      - name: ⏬️ Install Dependencies
        env:
          DEBIAN_FRONTEND: noninteractive
        run: |
-          apt update && apt install -y libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6
+          apt update && apt install -y git libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6
          # required by llama-cpp-python prebuilt wheels
          apt install -y musl-dev && ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
      - name: ⬇️ Install Postgres
        env:
@ -78,6 +76,9 @@ jobs:
          python -m pip install --upgrade pip
      - name: ⬇️ Install Application
        env:
          PIP_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu https://abetlen.github.io/llama-cpp-python/whl/cpu"
          CUDA_VISIBLE_DEVICES: ""
        run: sed -i 's/dynamic = \["version"\]/version = "0.0.0"/' pyproject.toml && pip install --upgrade .[dev]
      - name: 🧪 Test Application
--- a/54
+++ b/54
@ -1,5 +1,5 @@
 # syntax=docker/dockerfile:1
-FROM ubuntu:jammy
+FROM ubuntu:jammy AS base
 LABEL homepage="https://khoj.dev"
 LABEL repository="https://github.com/khoj-ai/khoj"
 LABEL org.opencontainers.image.source="https://github.com/khoj-ai/khoj"
@ -10,44 +10,54 @@ RUN apt update -y && apt -y install \
    python3-pip \
    swig \
    curl \
    # Required by llama-cpp-python pre-built wheels. See #1628
    musl-dev \
    # Required by RapidOCR
    libgl1 \
    libglx-mesa0 \
-    libglib2.0-0 && \
+    libglib2.0-0 \
    # Required by Next.js Web app
    curl -sL https://deb.nodesource.com/setup_20.x | bash - && \
    curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - && \
    echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list && \
    apt update -y && apt -y --no-install-recommends install nodejs yarn && \
    apt clean && rm -rf /var/lib/apt/lists/* && \
    # Required by llama-cpp-python pre-built wheels. See #1628
-    ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
+    musl-dev && \
    ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 && \
    # Clean up
    apt clean && rm -rf /var/lib/apt/lists/*
-# Install Application
+# Build Server
 FROM base AS server-deps
 WORKDIR /app
 COPY pyproject.toml .
 COPY README.md .
 ARG VERSION=0.0.0
-ENV PIP_EXTRA_INDEX_URL=https://abetlen.github.io/llama-cpp-python/whl/cpu
+# use the pre-built llama-cpp-python, torch cpu wheel
 ENV PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://abetlen.github.io/llama-cpp-python/whl/cpu"
 # avoid downloading unused cuda specific python packages
 ENV CUDA_VISIBLE_DEVICES=""
 RUN sed -i "s/dynamic = \\[\"version\"\\]/version = \"$VERSION\"/" pyproject.toml && \
    pip install --no-cache-dir .
-# Copy Source Code
+# Build Web App
-COPY . .
+FROM node:20-alpine AS web-app
-
+# Set build optimization env vars
-# Set the PYTHONPATH environment variable in order for it to find the Django app.
+ENV NODE_ENV=production
-ENV PYTHONPATH=/app/src:$PYTHONPATH
+ENV NEXT_TELEMETRY_DISABLED=1
 # Go to the directory src/interface/web and export the built Next.js assets
 WORKDIR /app/src/interface/web
-RUN bash -c "yarn install --frozen-lockfile && yarn ciexport && yarn cache clean"
+# Install dependencies first (cache layer)
 COPY src/interface/web/package.json src/interface/web/yarn.lock ./
 RUN yarn install --frozen-lockfile
 # Copy source and build
 COPY src/interface/web/. ./
 RUN yarn build
 # Merge the Server and Web App into a Single Image
 FROM base
 ENV PYTHONPATH=/app/src
 WORKDIR /app
 COPY --from=server-deps /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
 COPY --from=web-app /app/src/interface/web/out ./src/khoj/interface/built
 COPY . .
 RUN cd src && python3 khoj/manage.py collectstatic --noinput
 # Run the Application
 # There are more arguments required for the application to run,
-# but these should be passed in through the docker-compose.yml file.
+# but those should be passed in through the docker-compose.yml file.
 ARG PORT
 EXPOSE ${PORT}
 ENTRYPOINT ["python3", "src/khoj/main.py"]
--- a/manifest.json
+++ b/manifest.json
@ -1,7 +1,7 @@
 {
 	"id": "khoj",
 	"name": "Khoj",
-	"version": "1.30.7",
+	"version": "1.30.10",
 	"minAppVersion": "0.15.0",
 	"description": "Your Second Brain",
 	"author": "Khoj Inc.",
--- a/prod.Dockerfile
+++ b/prod.Dockerfile
@ -1,5 +1,5 @@
 # syntax=docker/dockerfile:1
-FROM ubuntu:jammy
+FROM ubuntu:jammy AS base
 LABEL homepage="https://khoj.dev"
 LABEL repository="https://github.com/khoj-ai/khoj"
 LABEL org.opencontainers.image.source="https://github.com/khoj-ai/khoj"
@ -16,38 +16,49 @@ RUN apt update -y && apt -y install \
    curl \
    # Required by llama-cpp-python pre-built wheels. See #1628
    musl-dev && \
-    # Required by Next.js Web app
+    ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 && \
-    curl -sL https://deb.nodesource.com/setup_20.x | bash - && \
+    # Clean up
-    curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - && \
+    apt clean && rm -rf /var/lib/apt/lists/*
    echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list && \
    apt update -y && apt -y --no-install-recommends install nodejs yarn && \
    apt clean && rm -rf /var/lib/apt/lists/* && \
    # Required by llama-cpp-python pre-built wheels. See #1628
    ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
-# Install Application
+# Build Server
 FROM base AS server-deps
 WORKDIR /app
 COPY pyproject.toml .
 COPY README.md .
 ARG VERSION=0.0.0
-ENV PIP_EXTRA_INDEX_URL=https://abetlen.github.io/llama-cpp-python/whl/cpu
+# use the pre-built llama-cpp-python, torch cpu wheel
 ENV PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://abetlen.github.io/llama-cpp-python/whl/cpu"
 # avoid downloading unused cuda specific python packages
 ENV CUDA_VISIBLE_DEVICES=""
 RUN sed -i "s/dynamic = \\[\"version\"\\]/version = \"$VERSION\"/" pyproject.toml && \
    pip install --no-cache-dir -e .[prod]
-# Copy Source Code
+# Build Web App
-COPY . .
+FROM node:20-alpine AS web-app
-
+# Set build optimization env vars
-# Set the PYTHONPATH environment variable in order for it to find the Django app.
+ENV NODE_ENV=production
-ENV PYTHONPATH=/app/src:$PYTHONPATH
+ENV NEXT_TELEMETRY_DISABLED=1
 # Go to the directory src/interface/web and export the built Next.js assets
 WORKDIR /app/src/interface/web
-RUN bash -c "yarn install --frozen-lockfile && yarn ciexport && yarn cache clean"
+# Install dependencies first (cache layer)
 COPY src/interface/web/package.json src/interface/web/yarn.lock ./
 RUN yarn install --frozen-lockfile
 # Copy source and build
 COPY src/interface/web/. ./
 RUN yarn build
 # Merge the Server and Web App into a Single Image
 FROM base
 ENV PYTHONPATH=/app/src:$PYTHONPATH
 WORKDIR /app
 COPY --from=server-deps /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
 COPY --from=server-deps /usr/local/bin /usr/local/bin
 COPY --from=web-app /app/src/interface/web/out ./src/khoj/interface/built
 COPY . .
 RUN cd src && python3 khoj/manage.py collectstatic --noinput
 # Run the Application
 # There are more arguments required for the application to run,
-# but these should be passed in through the docker-compose.yml file.
+# but those should be passed in through the docker-compose.yml file.
 ARG PORT
 EXPOSE ${PORT}
 ENTRYPOINT ["gunicorn", "-c", "gunicorn-config.py", "src.khoj.main:app"]
--- a/pyproject.toml
+++ b/pyproject.toml
@ -88,6 +88,7 @@ dependencies = [
    "anthropic == 0.26.1",
    "docx2txt == 0.8",
    "google-generativeai == 0.8.3",
    "pyjson5 == 1.6.7",
 ]
 dynamic = ["version"]
--- a/src/interface/desktop/package.json
+++ b/src/interface/desktop/package.json
@ -1,6 +1,6 @@
 {
  "name": "Khoj",
-  "version": "1.30.7",
+  "version": "1.30.10",
  "description": "Your Second Brain",
  "author": "Khoj Inc. <team@khoj.dev>",
  "license": "GPL-3.0-or-later",
@ -16,7 +16,7 @@
    "start": "yarn electron ."
  },
  "dependencies": {
-    "@todesktop/runtime": "^1.6.4",
+    "@todesktop/runtime": "^2.0.0",
    "axios": "^1.7.4",
    "cron": "^2.4.3",
    "electron-store": "^8.1.0"
--- a/src/interface/desktop/yarn.lock
+++ b/src/interface/desktop/yarn.lock
@ -50,17 +50,17 @@
  dependencies:
    defer-to-connect "^2.0.0"
-"@todesktop/runtime@^1.6.4":
+"@todesktop/runtime@^2.0.0":
-  version "1.6.4"
+  version "2.0.0"
-  resolved "https://registry.yarnpkg.com/@todesktop/runtime/-/runtime-1.6.4.tgz#a9d62a021cf2647c51371c892bfb1d4c5a29ed7e"
+  resolved "https://registry.yarnpkg.com/@todesktop/runtime/-/runtime-2.0.0.tgz#dfd409186ae664f5e28186a03b99e620ec7b7f82"
-  integrity sha512-n6dOxhrKKsXMM+i2u9iRvoJSR2KCWw0orYK+FT9RbWNPykhuFIYd0yy8dYgYy/OuClKGyGl4SJFi2757FLhWDA==
+  integrity sha512-0a2tmWpIc/HJE/873xRMZKQNggfrYhoKYIchfN+k8RqKdzTPwTWa5ztur7GdCHLHBUaiMBPNRzF3h4kwHd1NCw==
  dependencies:
-    del "^6.0.0"
+    del "^6.1.1"
-    electron-updater "^4.6.1"
+    electron-updater "^6.3.9"
-    eventemitter2 "^6.4.5"
+    eventemitter2 "^6.4.9"
    execa "^5.0.0"
    lodash.once "^4.1.1"
-    semver "^7.3.2"
+    semver "^7.6.3"
 "@types/cacheable-request@^6.0.1":
  version "6.0.3"
@ -90,16 +90,16 @@
  integrity sha512-jYvz8UMLDgy3a5SkGJne8H7VA7zPV2Lwohjx0V8V31+SqAjNmurWMkk9cQhfvlcnXWudBpK9xPM1n4rljOcHYQ==
 "@types/node@*":
-  version "22.9.1"
+  version "22.10.1"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-22.9.1.tgz#bdf91c36e0e7ecfb7257b2d75bf1b206b308ca71"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-22.10.1.tgz#41ffeee127b8975a05f8c4f83fb89bcb2987d766"
-  integrity sha512-p8Yy/8sw1caA8CdRIQBG5tiLHmxtQKObCijiAa9Ez+d4+PRffM4054xbju0msf+cvhJpnFEeNjxmVT/0ipktrg==
+  integrity sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==
  dependencies:
-    undici-types "~6.19.8"
+    undici-types "~6.20.0"
 "@types/node@^18.11.18":
-  version "18.19.64"
+  version "18.19.67"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-18.19.64.tgz#122897fb79f2a9ec9c979bded01c11461b2b1478"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-18.19.67.tgz#77c4b01641a1e3e1509aff7e10d39e4afd5ae06d"
-  integrity sha512-955mDqvO2vFf/oL7V3WiUtiz+BugyX8uVbaT2H8oj3+8dRyH2FLiNdowe7eNqRM7IOIZvzDH76EoAT+gwm6aIQ==
+  integrity sha512-wI8uHusga+0ZugNp0Ol/3BqQfEcCCNfojtO6Oou9iVNGPTL6QNSdnUdqq85fRgIorLhLMuPIKpsN98QE9Nh+KQ==
  dependencies:
    undici-types "~5.26.4"
@ -110,11 +110,6 @@
  dependencies:
    "@types/node" "*"
 "@types/semver@^7.3.6":
  version "7.5.8"
  resolved "https://registry.yarnpkg.com/@types/semver/-/semver-7.5.8.tgz#8268a8c57a3e4abd25c165ecd36237db7948a55e"
  integrity sha512-I8EUhyrgfLrcTkzV3TSsGyl1tSuPrEDzr0yd5m90UgNxQkyDXULk3b6MlQqTCpZpNtWe1K0hzclnZkTcLBe2UQ==
 "@types/yauzl@^2.9.1":
  version "2.10.3"
  resolved "https://registry.yarnpkg.com/@types/yauzl/-/yauzl-2.10.3.tgz#e9b2808b4f109504a03cda958259876f61017999"
@ -168,9 +163,9 @@ atomically@^1.7.0:
  integrity sha512-Xcz9l0z7y9yQ9rdDaxlmaI4uJHf/T8g9hOEzJcsEqX2SjCj4J20uK7+ldkDHMbpJDK76wF7xEIgxc/vSlsfw5w==
 axios@^1.7.4:
-  version "1.7.7"
+  version "1.7.8"
-  resolved "https://registry.yarnpkg.com/axios/-/axios-1.7.7.tgz#2f554296f9892a72ac8d8e4c5b79c14a91d0a47f"
+  resolved "https://registry.yarnpkg.com/axios/-/axios-1.7.8.tgz#1997b1496b394c21953e68c14aaa51b7b5de3d6e"
-  integrity sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==
+  integrity sha512-Uu0wb7KNqK2t5K+YQyVCLM76prD5sRFjKHbJYCP1J7JFGEQ6nN7HWn9+04LAeiJ3ji54lgS/gZCH1oxyrf1SPw==
  dependencies:
    follow-redirects "^1.15.6"
    form-data "^4.0.0"
@ -206,12 +201,12 @@ buffer-crc32@~0.2.3:
  resolved "https://registry.yarnpkg.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz#0d333e3f00eac50aa1454abd30ef8c2a5d9a7242"
  integrity sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==
-builder-util-runtime@8.9.2:
+builder-util-runtime@9.2.10:
-  version "8.9.2"
+  version "9.2.10"
-  resolved "https://registry.yarnpkg.com/builder-util-runtime/-/builder-util-runtime-8.9.2.tgz#a9669ae5b5dcabfe411ded26678e7ae997246c28"
+  resolved "https://registry.yarnpkg.com/builder-util-runtime/-/builder-util-runtime-9.2.10.tgz#a0f7d9e214158402e78b74a745c8d9f870c604bc"
-  integrity sha512-rhuKm5vh7E0aAmT6i8aoSfEjxzdYEFX7zDApK+eNgOhjofnWb74d9SRJv0H/8nsgOkos0TZ4zxW0P8J4N7xQ2A==
+  integrity sha512-6p/gfG1RJSQeIbz8TK5aPNkoztgY1q5TgmGFMAXcY8itsGW6Y2ld1ALsZ5UJn8rog7hKF3zHx5iQbNQ8uLcRlw==
  dependencies:
-    debug "^4.3.2"
+    debug "^4.3.4"
    sax "^1.2.4"
 cacheable-lookup@^5.0.3:
@ -296,7 +291,7 @@ debounce-fn@^4.0.0:
  dependencies:
    mimic-fn "^3.0.0"
-debug@^4.1.0, debug@^4.1.1, debug@^4.3.2:
+debug@^4.1.0, debug@^4.1.1, debug@^4.3.4:
  version "4.3.7"
  resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.7.tgz#87945b4151a011d76d95a198d7111c865c360a52"
  integrity sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==
@ -333,7 +328,7 @@ define-properties@^1.2.1:
    has-property-descriptors "^1.0.0"
    object-keys "^1.1.1"
-del@^6.0.0:
+del@^6.1.1:
  version "6.1.1"
  resolved "https://registry.yarnpkg.com/del/-/del-6.1.1.tgz#3b70314f1ec0aa325c6b14eb36b95786671edb7a"
  integrity sha512-ua8BhapfP0JUJKC/zV9yHHDW/rDoDxP4Zhn3AkA6/xT6gY7jYXJiaeyBZznYVujhZZET+UgcbZiQ7sN3WqcImg==
@ -379,19 +374,19 @@ electron-store@^8.1.0:
    conf "^10.2.0"
    type-fest "^2.17.0"
-electron-updater@^4.6.1:
+electron-updater@^6.3.9:
-  version "4.6.5"
+  version "6.3.9"
-  resolved "https://registry.yarnpkg.com/electron-updater/-/electron-updater-4.6.5.tgz#e9a75458bbfd6bb41a58a829839e150ad2eb2d3d"
+  resolved "https://registry.yarnpkg.com/electron-updater/-/electron-updater-6.3.9.tgz#e1e7f155624c58e6f3760f376c3a584028165ec4"
-  integrity sha512-kdTly8O9mSZfm9fslc1mnCY+mYOeaYRy7ERa2Fed240u01BKll3aiupzkd07qKw69KvhBSzuHroIW3mF0D8DWA==
+  integrity sha512-2PJNONi+iBidkoC5D1nzT9XqsE8Q1X28Fn6xRQhO3YX8qRRyJ3mkV4F1aQsuRnYPqq6Hw+E51y27W75WgDoofw==
  dependencies:
-    "@types/semver" "^7.3.6"
+    builder-util-runtime "9.2.10"
-    builder-util-runtime "8.9.2"
+    fs-extra "^10.1.0"
    fs-extra "^10.0.0"
    js-yaml "^4.1.0"
    lazy-val "^1.0.5"
    lodash.escaperegexp "^4.1.2"
    lodash.isequal "^4.5.0"
-    semver "^7.3.5"
+    semver "^7.6.3"
    tiny-typed-emitter "^2.1.0"
 electron@28.2.1:
  version "28.2.1"
@ -436,7 +431,7 @@ escape-string-regexp@^4.0.0:
  resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34"
  integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==
-eventemitter2@^6.4.5:
+eventemitter2@^6.4.9:
  version "6.4.9"
  resolved "https://registry.yarnpkg.com/eventemitter2/-/eventemitter2-6.4.9.tgz#41f2750781b4230ed58827bc119d293471ecb125"
  integrity sha512-JEPTiaOt9f04oa6NOkc4aH+nVp5I3wEjpHbIPqfgCdD5v5bUzy7xQqwcVO2aDQgOWhI28da57HksMrzK9HlRxg==
@ -530,7 +525,7 @@ form-data@^4.0.0:
    combined-stream "^1.0.8"
    mime-types "^2.1.12"
-fs-extra@^10.0.0:
+fs-extra@^10.1.0:
  version "10.1.0"
  resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-10.1.0.tgz#02873cfbc4084dde127eaa5f9905eef2325d1abf"
  integrity sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==
@ -1115,7 +1110,7 @@ semver@^6.2.0:
  resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4"
  integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==
-semver@^7.3.2, semver@^7.3.5:
+semver@^7.3.2, semver@^7.3.5, semver@^7.6.3:
  version "7.6.3"
  resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.3.tgz#980f7b5550bc175fb4dc09403085627f9eb33143"
  integrity sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==
@ -1166,6 +1161,11 @@ sumchecker@^3.0.1:
  dependencies:
    debug "^4.1.0"
 tiny-typed-emitter@^2.1.0:
  version "2.1.0"
  resolved "https://registry.yarnpkg.com/tiny-typed-emitter/-/tiny-typed-emitter-2.1.0.tgz#b3b027fdd389ff81a152c8e847ee2f5be9fad7b5"
  integrity sha512-qVtvMxeXbVej0cQWKqVSSAHmKZEHAvxdF8HEUBFWts8h+xEo5m/lEiPakuyZ3BnCBjOD8i24kzNOiOLLgsSxhA==
 to-regex-range@^5.0.1:
  version "5.0.1"
  resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4"
@ -1188,10 +1188,10 @@ undici-types@~5.26.4:
  resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-5.26.5.tgz#bcd539893d00b56e964fd2657a4866b221a65617"
  integrity sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==
-undici-types@~6.19.8:
+undici-types@~6.20.0:
-  version "6.19.8"
+  version "6.20.0"
-  resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-6.19.8.tgz#35111c9d1437ab83a7cdc0abae2f26d88eda0a02"
+  resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-6.20.0.tgz#8171bf22c1f588d1554d55bf204bc624af388433"
-  integrity sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==
+  integrity sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==
 universalify@^0.1.0:
  version "0.1.2"
--- a/src/interface/emacs/khoj.el
+++ b/src/interface/emacs/khoj.el
@ -6,7 +6,7 @@
 ;;         Saba Imran <saba@khoj.dev>
 ;; Description: Your Second Brain
 ;; Keywords: search, chat, ai, org-mode, outlines, markdown, pdf, image
-;; Version: 1.30.7
+;; Version: 1.30.10
 ;; Package-Requires: ((emacs "27.1") (transient "0.3.0") (dash "2.19.1"))
 ;; URL: https://github.com/khoj-ai/khoj/tree/master/src/interface/emacs
--- a/src/interface/obsidian/manifest.json
+++ b/src/interface/obsidian/manifest.json
@ -1,7 +1,7 @@
 {
 	"id": "khoj",
 	"name": "Khoj",
-	"version": "1.30.7",
+	"version": "1.30.10",
 	"minAppVersion": "0.15.0",
 	"description": "Your Second Brain",
 	"author": "Khoj Inc.",
--- a/src/interface/obsidian/package.json
+++ b/src/interface/obsidian/package.json
@ -1,6 +1,6 @@
 {
    "name": "Khoj",
-    "version": "1.30.7",
+    "version": "1.30.10",
    "description": "Your Second Brain",
    "author": "Debanjum Singh Solanky, Saba Imran <team@khoj.dev>",
    "license": "GPL-3.0-or-later",
--- a/src/interface/obsidian/versions.json
+++ b/src/interface/obsidian/versions.json
@ -98,5 +98,8 @@
 	"1.30.4": "0.15.0",
 	"1.30.5": "0.15.0",
 	"1.30.6": "0.15.0",
-	"1.30.7": "0.15.0"
+	"1.30.7": "0.15.0",
 	"1.30.8": "0.15.0",
 	"1.30.9": "0.15.0",
 	"1.30.10": "0.15.0"
 }
--- a/src/interface/web/app/agents/layout.tsx
+++ b/src/interface/web/app/agents/layout.tsx
@ -1,9 +1,7 @@
 import type { Metadata } from "next";
-import { Noto_Sans } from "next/font/google";
+import { noto_sans, noto_sans_arabic } from "@/app/fonts";
 import "../globals.css";
 const inter = Noto_Sans({ subsets: ["latin"] });
 export const metadata: Metadata = {
    title: "Khoj AI - Agents",
    description: "Find a specialized agent that can help you address more specific needs.",
@ -33,7 +31,7 @@ export default function RootLayout({
    children: React.ReactNode;
 }>) {
    return (
-        <html lang="en">
+        <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
            <meta
                httpEquiv="Content-Security-Policy"
                content="default-src 'self' https://assets.khoj.dev;
@ -46,7 +44,7 @@ export default function RootLayout({
                       child-src 'none';
                       object-src 'none';"
            ></meta>
-            <body className={inter.className}>{children}</body>
+            <body>{children}</body>
        </html>
    );
 }
--- a/src/interface/web/app/chat/layout.tsx
+++ b/src/interface/web/app/chat/layout.tsx
@ -1,9 +1,7 @@
 import type { Metadata } from "next";
-import { Noto_Sans } from "next/font/google";
+import { noto_sans, noto_sans_arabic } from "@/app/fonts";
 import "../globals.css";
 const inter = Noto_Sans({ subsets: ["latin"] });
 export const metadata: Metadata = {
    title: "Khoj AI - Chat",
    description:
@ -34,7 +32,7 @@ export default function RootLayout({
    children: React.ReactNode;
 }>) {
    return (
-        <html lang="en">
+        <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
            <meta
                httpEquiv="Content-Security-Policy"
                content="default-src 'self' https://assets.khoj.dev;
@ -47,7 +45,7 @@ export default function RootLayout({
                       child-src 'none';
                       object-src 'none';"
            ></meta>
-            <body className={inter.className}>
+            <body>
                {children}
                <script
                    dangerouslySetInnerHTML={{
--- a/src/interface/web/app/fonts.ts
+++ b/src/interface/web/app/fonts.ts
@ -0,0 +1,13 @@
 import { Noto_Sans, Noto_Sans_Arabic } from "next/font/google";
 export const noto_sans = Noto_Sans({
    subsets: ["latin", "latin-ext", "cyrillic", "cyrillic-ext", "devanagari", "vietnamese"],
    display: "swap",
    variable: "--font-noto-sans",
 });
 export const noto_sans_arabic = Noto_Sans_Arabic({
    subsets: ["arabic"],
    display: "swap",
    variable: "--font-noto-sans-arabic",
 });
--- a/src/interface/web/app/globals.css
+++ b/src/interface/web/app/globals.css
@ -1,7 +1,6 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
@import url("https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@100..900&family=Noto+Sans:ital,wght@0,100..900;1,100..900&display=swap");
@layer base {
    :root {
@ -25,7 +24,7 @@
        --input: 220 13% 91%;
        --ring: 24.6 95% 53.1%;
        --radius: 0.5rem;
-        --font-family: "Noto Sans", "Noto Sans Arabic", sans-serif !important;
+        --font-family: var(--font-noto-sans), var(--font-noto-sans-arabic), sans-serif !important;
        /* Khoj Custom Colors */
        --frosted-background-color: 20 13% 95%;
@ -188,7 +187,7 @@
        --border: 0 0% 9%;
        --input: 0 0% 9%;
        --ring: 20.5 90.2% 48.2%;
-        --font-family: "Noto Sans", "Noto Sans Arabic", sans-serif !important;
+        --font-family: var(--font-noto-sans), var(--font-noto-sans-arabic), sans-serif !important;
        /* Imported from highlight.js */
        pre code.hljs {
--- a/src/interface/web/app/layout.tsx
+++ b/src/interface/web/app/layout.tsx
@ -1,9 +1,7 @@
 import type { Metadata } from "next";
-import { Noto_Sans } from "next/font/google";
+import { noto_sans, noto_sans_arabic } from "@/app/fonts";
 import "./globals.css";
 const inter = Noto_Sans({ subsets: ["latin"] });
 export const metadata: Metadata = {
    title: "Khoj AI - Home",
    description: "Your Second Brain.",
@ -39,7 +37,7 @@ export default function RootLayout({
    children: React.ReactNode;
 }>) {
    return (
-        <html lang="en">
+        <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
            <meta
                httpEquiv="Content-Security-Policy"
                content="default-src 'self' https://assets.khoj.dev;
@ -52,7 +50,7 @@ export default function RootLayout({
                       child-src 'none';
                       object-src 'none';"
            ></meta>
-            <body className={inter.className}>{children}</body>
+            <body>{children}</body>
        </html>
    );
 }
--- a/src/interface/web/app/settings/layout.tsx
+++ b/src/interface/web/app/settings/layout.tsx
@ -1,10 +1,8 @@
 import type { Metadata } from "next";
-import { Noto_Sans } from "next/font/google";
+import { noto_sans, noto_sans_arabic } from "@/app/fonts";
 import "../globals.css";
 import { Toaster } from "@/components/ui/toaster";
 const inter = Noto_Sans({ subsets: ["latin"] });
 export const metadata: Metadata = {
    title: "Khoj AI - Settings",
    description: "Configure Khoj to get personalized, deeper assistance.",
@ -34,7 +32,7 @@ export default function RootLayout({
    children: React.ReactNode;
 }>) {
    return (
-        <html lang="en">
+        <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
            <meta
                httpEquiv="Content-Security-Policy"
                content="default-src 'self' https://assets.khoj.dev;
@ -46,7 +44,7 @@ export default function RootLayout({
                        child-src 'none';
                        object-src 'none';"
            ></meta>
-            <body className={inter.className}>
+            <body>
                {children}
                <Toaster />
            </body>
--- a/src/interface/web/app/share/chat/layout.tsx
+++ b/src/interface/web/app/share/chat/layout.tsx
@ -1,9 +1,7 @@
 import type { Metadata } from "next";
-import { Noto_Sans } from "next/font/google";
+import { noto_sans, noto_sans_arabic } from "@/app/fonts";
 import "../../globals.css";
 const inter = Noto_Sans({ subsets: ["latin"] });
 export const metadata: Metadata = {
    title: "Khoj AI - Chat",
    description: "Use this page to view a chat with Khoj AI.",
@ -15,7 +13,7 @@ export default function RootLayout({
    children: React.ReactNode;
 }>) {
    return (
-        <html lang="en">
+        <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
            <meta
                httpEquiv="Content-Security-Policy"
                content="default-src 'self' https://assets.khoj.dev;
@ -27,7 +25,7 @@ export default function RootLayout({
                       child-src 'none';
                       object-src 'none';"
            ></meta>
-            <body className={inter.className}>
+            <body>
                {children}
                <script
                    dangerouslySetInnerHTML={{
--- a/src/interface/web/package.json
+++ b/src/interface/web/package.json
@ -1,6 +1,6 @@
 {
    "name": "khoj-ai",
-    "version": "1.30.7",
+    "version": "1.30.10",
    "private": true,
    "scripts": {
        "dev": "next dev",
@ -62,6 +62,9 @@
        "react-hook-form": "^7.52.1",
        "shadcn-ui": "^0.8.0",
        "swr": "^2.2.5",
        "tailwind-merge": "^2.3.0",
        "tailwindcss": "^3.4.6",
        "tailwindcss-animate": "^1.0.7",
        "typescript": "^5",
        "vaul": "^0.9.1",
        "zod": "^3.23.8"
@ -82,9 +85,6 @@
        "lint-staged": "^15.2.7",
        "nodemon": "^3.1.3",
        "prettier": "3.3.3",
        "tailwind-merge": "^2.3.0",
        "tailwindcss": "^3.4.6",
        "tailwindcss-animate": "^1.0.7",
        "typescript": "^5"
    },
    "prettier": {
--- a/src/interface/web/tailwind.config.ts
+++ b/src/interface/web/tailwind.config.ts
@ -55,6 +55,9 @@ const config = {
            },
        },
        extend: {
            fontFamily: {
                sans: ["var(--font-noto-sans)", "var(--font-noto-sans-arabic)"],
            },
            colors: {
                border: "hsl(var(--border))",
                input: "hsl(var(--input))",
--- a/src/khoj/interface/web/.well-known/assetlinks.json
+++ b/src/khoj/interface/web/.well-known/assetlinks.json
@ -0,0 +1,11 @@
 [{
 "relation": ["delegate_permission/common.handle_all_urls"],
 "target": {
   "namespace": "android_app",
   "package_name": "dev.khoj.app",
    "sha256_cert_fingerprints": [
      "CC:98:4A:0A:F1:CC:84:26:AC:02:86:49:AA:69:64:B9:5E:63:A3:EF:18:56:EA:CA:13:C1:3A:15:CA:49:77:46",
      "D4:5A:6F:6C:18:28:D2:1C:78:27:92:C6:AC:DB:4C:12:C4:52:A1:88:9B:A1:F5:67:D1:22:FE:A0:0F:B1:AE:92"
    ]
 }
 }]
--- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py
+++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py
@ -1,9 +1,8 @@
 import json
 import logging
 import re
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional
 import pyjson5
 from langchain.schema import ChatMessage
 from khoj.database.models import Agent, ChatModelOptions, KhojUser
@ -110,7 +109,7 @@ def extract_questions_anthropic(
    # Extract, Clean Message from Claude's Response
    try:
        response = clean_json(response)
-        response = json.loads(response)
+        response = pyjson5.loads(response)
        response = [q.strip() for q in response["queries"] if q.strip()]
        if not isinstance(response, list) or not response:
            logger.error(f"Invalid response for constructing subqueries: {response}")
--- a/src/khoj/processor/conversation/google/gemini_chat.py
+++ b/src/khoj/processor/conversation/google/gemini_chat.py
@ -1,9 +1,8 @@
 import json
 import logging
 import re
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional
 import pyjson5
 from langchain.schema import ChatMessage
 from khoj.database.models import Agent, ChatModelOptions, KhojUser
@ -104,7 +103,7 @@ def extract_questions_gemini(
    # Extract, Clean Message from Gemini's Response
    try:
        response = clean_json(response)
-        response = json.loads(response)
+        response = pyjson5.loads(response)
        response = [q.strip() for q in response["queries"] if q.strip()]
        if not isinstance(response, list) or not response:
            logger.error(f"Invalid response for constructing subqueries: {response}")
--- a/src/khoj/processor/conversation/offline/chat_model.py
+++ b/src/khoj/processor/conversation/offline/chat_model.py
@ -5,6 +5,7 @@ from datetime import datetime, timedelta
 from threading import Thread
 from typing import Any, Iterator, List, Optional, Union
 import pyjson5
 from langchain.schema import ChatMessage
 from llama_cpp import Llama
@ -13,6 +14,7 @@ from khoj.processor.conversation import prompts
 from khoj.processor.conversation.offline.utils import download_model
 from khoj.processor.conversation.utils import (
    ThreadedGenerator,
    clean_json,
    commit_conversation_trace,
    generate_chatml_messages_with_context,
    messages_to_print,
@ -114,8 +116,8 @@ def extract_questions_offline(
    # Extract and clean the chat model's response
    try:
-        response = response.strip(empty_escape_sequences)
+        response = clean_json(empty_escape_sequences)
-        response = json.loads(response)
+        response = pyjson5.loads(response)
        questions = [q.strip() for q in response["queries"] if q.strip()]
        questions = filter_questions(questions)
    except:
--- a/src/khoj/processor/conversation/openai/gpt.py
+++ b/src/khoj/processor/conversation/openai/gpt.py
@ -1,8 +1,8 @@
 import json
 import logging
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional
 import pyjson5
 from langchain.schema import ChatMessage
 from khoj.database.models import Agent, ChatModelOptions, KhojUser
@ -104,7 +104,7 @@ def extract_questions(
    # Extract, Clean Message from GPT's Response
    try:
        response = clean_json(response)
-        response = json.loads(response)
+        response = pyjson5.loads(response)
        response = [q.strip() for q in response["queries"] if q.strip()]
        if not isinstance(response, list) or not response:
            logger.error(f"Invalid response for constructing subqueries: {response}")
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@ -30,11 +30,9 @@ You were created by Khoj Inc. with the following capabilities:
 - You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you.
 - Users can share files and other information with you using the Khoj Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
 - Say "I don't know" or "I don't understand" if you don't know what to say or if you don't know the answer to a question.
 - Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following
    - inline math mode : `\\(` and `\\)`
    - display math mode: insert linebreak after opening `$$`, `\\[` and before closing `$$`, `\\]`
 - Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided notes or past conversations.
 - Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay".
 Today is {day_of_week}, {current_date} in UTC.
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@ -5,6 +5,7 @@ import math
 import mimetypes
 import os
 import queue
 import re
 import uuid
 from dataclasses import dataclass
 from datetime import datetime
@ -14,6 +15,7 @@ from time import perf_counter
 from typing import Any, Callable, Dict, List, Optional
 import PIL.Image
 import pyjson5
 import requests
 import tiktoken
 import yaml
@ -590,6 +592,47 @@ def clean_code_python(code: str):
    return code.strip().removeprefix("```python").removesuffix("```")
 def load_complex_json(json_str):
    """
    Preprocess a raw JSON string to escape unescaped double quotes within value strings,
    while preserving the JSON structure and already escaped quotes.
    """
    def replace_unescaped_quotes(match):
        # Get the content between colons and commas/end braces
        content = match.group(1)
        # Replace unescaped double, single quotes that aren't already escaped
        # Uses negative lookbehind to avoid replacing already escaped quotes
        # Replace " with \"
        processed_dq = re.sub(r'(?<!\\)"', '\\"', content)
        # Replace \' with \\'
        processed_final = re.sub(r"(?<!\\)\\'", r"\\\\'", processed_dq)
        return f': "{processed_final}"'
    # Match content between : and either , or }
    # This pattern looks for ': ' followed by any characters until , or }
    pattern = r':\s*"(.*?)(?<!\\)"(?=[,}])'
    # Process the JSON string
    cleaned = clean_json(rf"{json_str}")
    processed = re.sub(pattern, replace_unescaped_quotes, cleaned)
    # See which json loader can load the processed JSON as valid
    errors = []
    json_loaders_to_try = [json.loads, pyjson5.loads]
    for loads in json_loaders_to_try:
        try:
            return loads(processed)
        except (json.JSONDecodeError, pyjson5.Json5Exception) as e:
            errors.append(f"{type(e).__name__}: {str(e)}")
    # If all loaders fail, raise the aggregated error
    raise ValueError(
        f"Failed to load JSON with errors: {'; '.join(errors)}\n\n"
        f"While attempting to load this cleaned JSON:\n{processed}"
    )
 def defilter_query(query: str):
    """Remove any query filters in query"""
    defiltered_query = query
--- a/src/khoj/processor/tools/run_code.py
+++ b/src/khoj/processor/tools/run_code.py
@ -1,6 +1,5 @@
 import base64
 import datetime
 import json
 import logging
 import mimetypes
 import os
@ -15,8 +14,8 @@ from khoj.processor.conversation import prompts
 from khoj.processor.conversation.utils import (
    ChatEvent,
    clean_code_python,
    clean_json,
    construct_chat_history,
    load_complex_json,
 )
 from khoj.routers.helpers import send_message_to_model_wrapper
 from khoj.utils.helpers import is_none_or_empty, timer, truncate_code_context
@ -135,8 +134,7 @@ async def generate_python_code(
    )
    # Validate that the response is a non-empty, JSON-serializable list
-    response = clean_json(response)
+    response = load_complex_json(response)
    response = json.loads(response)
    code = response.get("code", "").strip()
    input_files = response.get("input_files", [])
    input_links = response.get("input_links", [])
--- a/src/khoj/routers/helpers.py
+++ b/src/khoj/routers/helpers.py
@ -27,6 +27,7 @@ from typing import (
 from urllib.parse import parse_qs, quote, unquote, urljoin, urlparse
 import cron_descriptor
 import pyjson5
 import pytz
 import requests
 from apscheduler.job import Job
@ -541,7 +542,7 @@ async def generate_online_subqueries(
    # Validate that the response is a non-empty, JSON-serializable list
    try:
        response = clean_json(response)
-        response = json.loads(response)
+        response = pyjson5.loads(response)
        response = {q.strip() for q in response["queries"] if q.strip()}
        if not isinstance(response, set) or not response or len(response) == 0:
            logger.error(f"Invalid response for constructing subqueries: {response}. Returning original query: {q}")
--- a/src/khoj/routers/research.py
+++ b/src/khoj/routers/research.py
@ -1,4 +1,3 @@
 import json
 import logging
 from datetime import datetime
 from typing import Callable, Dict, List, Optional
@ -10,10 +9,10 @@ from khoj.database.models import Agent, KhojUser
 from khoj.processor.conversation import prompts
 from khoj.processor.conversation.utils import (
    InformationCollectionIteration,
    clean_json,
    construct_chat_history,
    construct_iteration_history,
    construct_tool_chat_history,
    load_complex_json,
 )
 from khoj.processor.tools.online_search import read_webpages, search_online
 from khoj.processor.tools.run_code import run_code
@ -106,8 +105,7 @@ async def apick_next_tool(
        return
    try:
-        response = clean_json(response)
+        response = load_complex_json(response)
        response = json.loads(response)
        selected_tool = response.get("tool", None)
        generated_query = response.get("query", None)
        scratchpad = response.get("scratchpad", None)
--- a/src/khoj/routers/web_client.py
+++ b/src/khoj/routers/web_client.py
@ -134,3 +134,8 @@ def automations_config_page(
            "request": request,
        },
    )
@web_client.get("/.well-known/assetlinks.json", response_class=FileResponse)
 def assetlinks(request: Request):
    return FileResponse(constants.assetlinks_file_path)
--- a/src/khoj/utils/constants.py
+++ b/src/khoj/utils/constants.py
@ -5,6 +5,7 @@ app_root_directory = Path(__file__).parent.parent.parent
 web_directory = app_root_directory / "khoj/interface/web/"
 next_js_directory = app_root_directory / "khoj/interface/built/"
 pypi_static_directory = app_root_directory / "khoj/interface/compiled/"
 assetlinks_file_path = web_directory / ".well-known/assetlinks.json"
 empty_escape_sequences = "\n|\r|\t| "
 app_env_filepath = "~/.khoj/env"
 telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
--- a/tests/evals/eval.py
+++ b/tests/evals/eval.py
@ -3,8 +3,10 @@ import concurrent.futures
 import json
 import logging
 import os
 import re
 import time
 from datetime import datetime
 from functools import partial
 from io import StringIO
 from textwrap import dedent
 from threading import Lock
@ -24,13 +26,10 @@ logger = logging.getLogger(__name__)
 KHOJ_URL = os.getenv("KHOJ_URL", "http://localhost:42110")
 KHOJ_CHAT_API_URL = f"{KHOJ_URL}/api/chat"
 KHOJ_API_KEY = os.getenv("KHOJ_API_KEY")
-KHOJ_MODE = os.getenv("KHOJ_MODE", "default")  # E.g research, general, notes etc.
+KHOJ_MODE = os.getenv("KHOJ_MODE", "default").lower()  # E.g research, general, notes etc.
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 GEMINI_EVAL_MODEL = os.getenv("GEMINI_EVAL_MODEL", "gemini-1.5-pro-002")
 GEMINI_API_URL = (
    f"https://generativelanguage.googleapis.com/v1beta/models/{GEMINI_EVAL_MODEL}:generateContent?key={GEMINI_API_KEY}"
 )
 SAMPLE_SIZE = os.getenv("SAMPLE_SIZE")  # Number of examples to evaluate
 RANDOMIZE = os.getenv("RANDOMIZE", "false").lower() == "true"  # Randomize examples
@ -128,6 +127,99 @@ def load_simpleqa_dataset():
        return None
 def load_gpqa_dataset():
    """
    Load the Google GPQA benchmark dataset from HuggingFace
    GPQA is a benchmark dataset to evaluate retrieval and answering capabilities of agents.
    It contains ~800 requiring multi-hop retrieval and reasoning across various topics.
    ### Data Fields
    - Prompt: The question to be answered
    - Answer: The ground truth answer
    - reasoning_types: The type of reasoning required to answer the question
    """
    import random
    def format_multiple_choice_question(row: Dict) -> tuple[str, str]:
        """
        Create GPQA multi-choice prompt from shuffled answer choices and question.
        Refer: https://github.com/openai/simple-evals/blob/a8e85cc8a5dea497d915f870895250e07f9cc737/common.py#L12
        Returns formatted prompt and correct answer letter.
        """
        # Gather choices
        choices = [
            row["Incorrect Answer 1"],
            row["Incorrect Answer 2"],
            row["Incorrect Answer 3"],
            row["Correct Answer"],
        ]
        # Shuffle choices
        random.shuffle(choices)
        # Get correct answer letter
        correct_index = choices.index(row["Correct Answer"])
        correct_letter = "ABCD"[correct_index]
        prompt = f"""
 Answer the following multiple choice question. Answer should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
 {row["Question"]}
 A) {choices[0]}
 B) {choices[1]}
 C) {choices[2]}
 D) {choices[3]}
        """.strip()
        return prompt, correct_letter
    try:
        dataset = load_dataset("Idavidrein/gpqa", "gpqa_diamond", split="train")
        # Create multi-choice q&a prompt from choices and correct answer
        prompts_and_answers = [format_multiple_choice_question(row) for row in dataset]
        # Normalize dataset to FRAMES format
        dataset = dataset.rename_columns({"Subdomain": "reasoning_types"})
        dataset = dataset.add_column("Prompt", [p[0] for p in prompts_and_answers])
        dataset = dataset.add_column("Answer", [p[1] for p in prompts_and_answers])
        # Sample and shuffle dataset if configured
        dataset = dataset.shuffle() if RANDOMIZE else dataset
        dataset = dataset[: int(SAMPLE_SIZE)] if SAMPLE_SIZE else dataset
        return dataset
    except Exception as e:
        logger.error(f"Error loading dataset: {e}")
        return None
 def load_math500_dataset():
    """
    Load and format the MATH500 dataset to match the evaluation script's structure.
    Args:
        sample_size (int, optional): Number of samples to include. Defaults to None (use full dataset).
        randomize (bool, optional): Whether to randomize the dataset. Defaults to False.
    Returns:
        Dataset: Formatted HuggingFace Dataset.
    """
    try:
        # Load the MATH500 dataset from HuggingFace
        dataset = load_dataset("HuggingFaceH4/MATH-500", split="test")
        dataset = dataset.rename_columns({"problem": "Prompt", "answer": "Answer", "subject": "reasoning_types"})
        dataset = dataset.shuffle() if RANDOMIZE else dataset
        dataset = dataset.select(range(int(SAMPLE_SIZE))) if SAMPLE_SIZE else dataset
        return dataset
    except Exception as e:
        print(f"Error loading and formatting MATH500 dataset: {e}")
        return None
 def get_agent_response(prompt: str) -> Dict[str, Any]:
    """Get response from the Khoj API"""
    # Set headers
@ -152,7 +244,30 @@ def get_agent_response(prompt: str) -> Dict[str, Any]:
        return {"response": "", "usage": {}}
-def evaluate_response(query: str, agent_response: str, ground_truth: str) -> tuple[bool | None, str, float]:
+def evaluate_response_with_mcq_match(
    query: str, agent_response: str, ground_truth: str
 ) -> tuple[bool | None, str, float]:
    """Evaluate Khoj response against benchmark ground truth using string matching"""
    try:
        # Extract answer from agent response
        answer_pattern_multichoice = r"(?i)Answer\s*:\s*([A-D])"
        match = re.search(answer_pattern_multichoice, agent_response)
        extracted_answer = match.group(1) if match else None
        # Check if extracted answer matches ground truth
        decision = extracted_answer == ground_truth
        explanation = f"Agent response {'matches' if decision else 'does not match'} ground truth {ground_truth}"
        # Return decision, explanation and cost in structured form
        return decision, explanation, 0.0
    except Exception as e:
        logger.error(f"Error in evaluation: {e}")
        return None, f"Evaluation failed: {str(e)}", 0.0
 def evaluate_response_with_gemini(
    query: str, agent_response: str, ground_truth: str, eval_model=GEMINI_EVAL_MODEL
 ) -> tuple[bool | None, str, float]:
    """Evaluate Khoj response against benchmark ground truth using Gemini"""
    evaluation_prompt = f"""
    Compare the following agent response with the ground truth answer.
@ -166,10 +281,13 @@ def evaluate_response(query: str, agent_response: str, ground_truth: str) -> tup
    Provide your evaluation in the following json format:
    {"explanation:" "[How you made the decision?)", "decision:" "(TRUE if response contains key information, FALSE otherwise)"}
    """
    gemini_api_url = (
        f"https://generativelanguage.googleapis.com/v1beta/models/{eval_model}:generateContent?key={GEMINI_API_KEY}"
    )
    try:
        response = requests.post(
-            GEMINI_API_URL,
+            gemini_api_url,
            headers={"Content-Type": "application/json"},
            json={
                "contents": [{"parts": [{"text": evaluation_prompt}]}],
@ -182,7 +300,7 @@ def evaluate_response(query: str, agent_response: str, ground_truth: str) -> tup
        # Update cost of evaluation
        input_tokens = response_json["usageMetadata"]["promptTokenCount"]
        ouput_tokens = response_json["usageMetadata"]["candidatesTokenCount"]
-        cost = get_cost_of_chat_message(GEMINI_EVAL_MODEL, input_tokens, ouput_tokens)
+        cost = get_cost_of_chat_message(eval_model, input_tokens, ouput_tokens)
        # Parse evaluation response
        eval_response: dict[str, str] = json.loads(
@ -200,7 +318,7 @@ def evaluate_response(query: str, agent_response: str, ground_truth: str) -> tup
        return None, f"Evaluation failed: {str(e)}", 0.0
-def process_batch(batch, batch_start, results, dataset_length):
+def process_batch(batch, batch_start, results, dataset_length, response_evaluator):
    global running_cost
    for idx, (prompt, answer, reasoning_type) in enumerate(batch):
        current_index = batch_start + idx
@ -219,7 +337,7 @@ def process_batch(batch, batch_start, results, dataset_length):
            decision = None
            explanation = "Agent response is empty. This maybe due to a service error."
        else:
-            decision, explanation, eval_cost = evaluate_response(prompt, agent_response, answer)
+            decision, explanation, eval_cost = response_evaluator(prompt, agent_response, answer)
        # Store results
        results.append(
@ -292,7 +410,7 @@ def parse_args():
        "--dataset",
        "-d",
        default="frames",
-        choices=["frames", "simpleqa"],
+        choices=["frames", "simpleqa", "gpqa", "math500"],
        help="Dataset to use for evaluation (default: frames)",
    )
    return parser.parse_args()
@ -309,12 +427,24 @@ def main():
            dataset = load_frames_dataset()
        elif args.dataset == "simpleqa":
            dataset = load_simpleqa_dataset()
        elif args.dataset == "gpqa":
            dataset = load_gpqa_dataset()
        elif args.dataset == "math500":
            dataset = load_math500_dataset()
    if dataset is None:
        return
    # Initialize variables
    results = []
    dataset_length = len(dataset["Prompt"])
    if args.dataset == "gpqa":
        response_evaluator = evaluate_response_with_mcq_match
    elif args.dataset == "math500":
        response_evaluator = partial(
            evaluate_response_with_gemini, eval_model=os.getenv("GEMINI_EVAL_MODEL", "gemini-1.5-flash-002")
        )
    else:
        response_evaluator = evaluate_response_with_gemini
    # Process examples in batches
    with concurrent.futures.ThreadPoolExecutor() as executor:
@ -326,7 +456,9 @@ def main():
                dataset["Answer"][i : i + BATCH_SIZE],
                dataset["reasoning_types"][i : i + BATCH_SIZE],
            )
-            futures.append(executor.submit(process_batch, batch, batch_start, results, dataset_length))
+            futures.append(
                executor.submit(process_batch, batch, batch_start, results, dataset_length, response_evaluator)
            )
        # Wait for all futures to complete
        concurrent.futures.wait(futures)
--- a/tests/test_conversation_utils.py
+++ b/tests/test_conversation_utils.py
@ -104,6 +104,18 @@ class TestTruncateMessage:
        assert truncated_chat_history[0] != copy_big_chat_message
 def test_load_complex_raw_json_string():
    # Arrange
    raw_json = r"""{"key": "value with unescaped " and unescaped \' and escaped \" and escaped \\'"}"""
    expeced_json = {"key": "value with unescaped \" and unescaped \\' and escaped \" and escaped \\'"}
    # Act
    parsed_json = utils.load_complex_json(raw_json)
    # Assert
    assert parsed_json == expeced_json
 def generate_content(count):
    return " ".join([f"{index}" for index, _ in enumerate(range(count))])
--- a/versions.json
+++ b/versions.json
@ -98,5 +98,8 @@
 	"1.30.4": "0.15.0",
 	"1.30.5": "0.15.0",
 	"1.30.6": "0.15.0",
-	"1.30.7": "0.15.0"
+	"1.30.7": "0.15.0",
 	"1.30.8": "0.15.0",
 	"1.30.9": "0.15.0",
 	"1.30.10": "0.15.0"
 }