Merge branch 'master' of github.com:khoj-ai/khoj into features/allow-multi-outputs-in-chat

2024-12-18 10:37:11 +00:00 · 2024-11-29 14:12:03 -08:00 · 2024-11-29 14:12:03 -08:00 · c5329d76ba
commit c5329d76ba
parent 46f647d91d 439b18c21f
38 changed files with 437 additions and 180 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -1,10 +1,11 @@
-.git/
-.pytest_cache/
-.vscode/
-.venv/
-docs/
+.*
+**/__pycache__/
+*.egg-info/
+documentation/
 tests/
 build/
 dist/
 scripts/
-*.egg-info/
+src/interface/
+src/telemetry/
+!src/interface/web
--- a/.github/workflows/dockerize.yml
+++ b/.github/workflows/dockerize.yml
@ -38,13 +38,23 @@ env:
 jobs:
  build:
    name: Publish Khoj Docker Images
-    runs-on: ubuntu-linux-x64-high
    strategy:
      fail-fast: false
      matrix:
-        image:
-          - 'local'
-          - 'cloud'
+        include:
+          - image: 'local'
+            platform: linux/amd64
+            runner: ubuntu-latest
+          - image: 'local'
+            platform: linux/arm64
+            runner: ubuntu-linux-arm64
+          - image: 'cloud'
+            platform: linux/amd64
+            runner: ubuntu-latest
+          - image: 'cloud'
+            platform: linux/arm64
+            runner: ubuntu-linux-arm64
+    runs-on: ${{ matrix.runner }}
    steps:
      - name: Checkout Code
        uses: actions/checkout@v3
@ -73,12 +83,12 @@ jobs:
        run: rm -rf /opt/hostedtoolcache

      - name: 📦 Build and Push Docker Image
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v4
        if: (matrix.image == 'local' && github.event_name == 'workflow_dispatch') && github.event.inputs.khoj == 'true' || (matrix.image == 'local' && github.event_name == 'push')
        with:
          context: .
          file: Dockerfile
-          platforms: linux/amd64, linux/arm64
+          platforms: ${{ matrix.platform }}
          push: true
          tags: |
            ghcr.io/${{ github.repository }}:${{ env.DOCKER_IMAGE_TAG }}
@ -86,14 +96,19 @@ jobs:
          build-args: |
            VERSION=${{ steps.hatch.outputs.version }}
            PORT=42110
+          cache-from: type=gha,scope=${{ matrix.image }}-${{ matrix.platform }}
+          cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.platform}}
+          labels: |
+            org.opencontainers.image.description=Khoj AI - Your second brain powered by LLMs and Neural Search
+            org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }}

      - name: 📦️⛅️ Build and Push Cloud Docker Image
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v4
        if: (matrix.image == 'cloud' && github.event_name == 'workflow_dispatch') && github.event.inputs.khoj-cloud == 'true' || (matrix.image == 'cloud' && github.event_name == 'push')
        with:
          context: .
          file: prod.Dockerfile
-          platforms: linux/amd64, linux/arm64
+          platforms: ${{ matrix.platform }}
          push: true
          tags: |
            ghcr.io/${{ github.repository }}-cloud:${{ env.DOCKER_IMAGE_TAG }}
@ -101,3 +116,8 @@ jobs:
          build-args: |
            VERSION=${{ steps.hatch.outputs.version }}
            PORT=42110
+          cache-from: type=gha,scope=${{ matrix.image }}-${{ matrix.platform }}
+          cache-to: type=gha,mode=max,scope=${{ matrix.image }}-${{ matrix.platform}}
+          labels: |
+            org.opencontainers.image.description=Khoj AI Cloud - Your second brain powered by LLMs and Neural Search
+            org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }}
--- a/.github/workflows/run_evals.yml
+++ b/.github/workflows/run_evals.yml
@ -25,6 +25,8 @@ on:
        options:
          - frames
          - simpleqa
+          - gpqa
+          - math500
      sample_size:
        description: 'Number of samples to evaluate'
        required: false
@ -95,8 +97,9 @@ jobs:
          KHOJ_URL: "http://localhost:42110"
          KHOJ_LLM_SEED: "42"
          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
-          SERPER_DEV_API_KEY: ${{ secrets.SERPER_DEV_API_KEY }}
-          OLOSTEP_API_KEY: ${{ secrets.OLOSTEP_API_KEY }}
+          SERPER_DEV_API_KEY: ${{ matrix.dataset != 'math500' && secrets.SERPER_DEV_API_KEY }}
+          OLOSTEP_API_KEY: ${{ matrix.dataset != 'math500' && secrets.OLOSTEP_API_KEY }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          KHOJ_ADMIN_EMAIL: khoj
          KHOJ_ADMIN_PASSWORD: khoj
          POSTGRES_HOST: localhost
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -55,15 +55,13 @@ jobs:
        with:
          python-version: ${{ matrix.python_version }}

-      - name: Install Git
-        run: |
-          apt update && apt install -y git
-
      - name: ⏬️ Install Dependencies
        env:
          DEBIAN_FRONTEND: noninteractive
        run: |
-          apt update && apt install -y libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6
+          apt update && apt install -y git libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6
+          # required by llama-cpp-python prebuilt wheels
+          apt install -y musl-dev && ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1

      - name: ⬇️ Install Postgres
        env:
@ -78,6 +76,9 @@ jobs:
          python -m pip install --upgrade pip

      - name: ⬇️ Install Application
+        env:
+          PIP_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu https://abetlen.github.io/llama-cpp-python/whl/cpu"
+          CUDA_VISIBLE_DEVICES: ""
        run: sed -i 's/dynamic = \["version"\]/version = "0.0.0"/' pyproject.toml && pip install --upgrade .[dev]

      - name: 🧪 Test Application
--- a/54
+++ b/54
@ -1,5 +1,5 @@
 # syntax=docker/dockerfile:1
-FROM ubuntu:jammy
+FROM ubuntu:jammy AS base
 LABEL homepage="https://khoj.dev"
 LABEL repository="https://github.com/khoj-ai/khoj"
 LABEL org.opencontainers.image.source="https://github.com/khoj-ai/khoj"
@ -10,44 +10,54 @@ RUN apt update -y && apt -y install \
    python3-pip \
    swig \
    curl \
-    # Required by llama-cpp-python pre-built wheels. See #1628
-    musl-dev \
    # Required by RapidOCR
    libgl1 \
    libglx-mesa0 \
-    libglib2.0-0 && \
-    # Required by Next.js Web app
-    curl -sL https://deb.nodesource.com/setup_20.x | bash - && \
-    curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - && \
-    echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list && \
-    apt update -y && apt -y --no-install-recommends install nodejs yarn && \
-    apt clean && rm -rf /var/lib/apt/lists/* && \
+    libglib2.0-0 \
    # Required by llama-cpp-python pre-built wheels. See #1628
-    ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
+    musl-dev && \
+    ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 && \
+    # Clean up
+    apt clean && rm -rf /var/lib/apt/lists/*

-# Install Application
+# Build Server
+FROM base AS server-deps
 WORKDIR /app
 COPY pyproject.toml .
 COPY README.md .
 ARG VERSION=0.0.0
-ENV PIP_EXTRA_INDEX_URL=https://abetlen.github.io/llama-cpp-python/whl/cpu
+# use the pre-built llama-cpp-python, torch cpu wheel
+ENV PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://abetlen.github.io/llama-cpp-python/whl/cpu"
+# avoid downloading unused cuda specific python packages
+ENV CUDA_VISIBLE_DEVICES=""
 RUN sed -i "s/dynamic = \\[\"version\"\\]/version = \"$VERSION\"/" pyproject.toml && \
    pip install --no-cache-dir .

-# Copy Source Code
-COPY . .
-
-# Set the PYTHONPATH environment variable in order for it to find the Django app.
-ENV PYTHONPATH=/app/src:$PYTHONPATH
-
-# Go to the directory src/interface/web and export the built Next.js assets
+# Build Web App
+FROM node:20-alpine AS web-app
+# Set build optimization env vars
+ENV NODE_ENV=production
+ENV NEXT_TELEMETRY_DISABLED=1
 WORKDIR /app/src/interface/web
-RUN bash -c "yarn install --frozen-lockfile && yarn ciexport && yarn cache clean"
+# Install dependencies first (cache layer)
+COPY src/interface/web/package.json src/interface/web/yarn.lock ./
+RUN yarn install --frozen-lockfile
+# Copy source and build
+COPY src/interface/web/. ./
+RUN yarn build
+
+# Merge the Server and Web App into a Single Image
+FROM base
+ENV PYTHONPATH=/app/src
 WORKDIR /app
+COPY --from=server-deps /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
+COPY --from=web-app /app/src/interface/web/out ./src/khoj/interface/built
+COPY . .
+RUN cd src && python3 khoj/manage.py collectstatic --noinput

 # Run the Application
 # There are more arguments required for the application to run,
-# but these should be passed in through the docker-compose.yml file.
+# but those should be passed in through the docker-compose.yml file.
 ARG PORT
 EXPOSE ${PORT}
 ENTRYPOINT ["python3", "src/khoj/main.py"]
--- a/manifest.json
+++ b/manifest.json
@ -1,7 +1,7 @@
 {
 	"id": "khoj",
 	"name": "Khoj",
-	"version": "1.30.7",
+	"version": "1.30.10",
 	"minAppVersion": "0.15.0",
 	"description": "Your Second Brain",
 	"author": "Khoj Inc.",
--- a/prod.Dockerfile
+++ b/prod.Dockerfile
@ -1,5 +1,5 @@
 # syntax=docker/dockerfile:1
-FROM ubuntu:jammy
+FROM ubuntu:jammy AS base
 LABEL homepage="https://khoj.dev"
 LABEL repository="https://github.com/khoj-ai/khoj"
 LABEL org.opencontainers.image.source="https://github.com/khoj-ai/khoj"
@ -16,38 +16,49 @@ RUN apt update -y && apt -y install \
    curl \
    # Required by llama-cpp-python pre-built wheels. See #1628
    musl-dev && \
-    # Required by Next.js Web app
-    curl -sL https://deb.nodesource.com/setup_20.x | bash - && \
-    curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - && \
-    echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list && \
-    apt update -y && apt -y --no-install-recommends install nodejs yarn && \
-    apt clean && rm -rf /var/lib/apt/lists/* && \
-    # Required by llama-cpp-python pre-built wheels. See #1628
-    ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
+    ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 && \
+    # Clean up
+    apt clean && rm -rf /var/lib/apt/lists/*

-# Install Application
+# Build Server
+FROM base AS server-deps
 WORKDIR /app
 COPY pyproject.toml .
 COPY README.md .
 ARG VERSION=0.0.0
-ENV PIP_EXTRA_INDEX_URL=https://abetlen.github.io/llama-cpp-python/whl/cpu
+# use the pre-built llama-cpp-python, torch cpu wheel
+ENV PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://abetlen.github.io/llama-cpp-python/whl/cpu"
+# avoid downloading unused cuda specific python packages
+ENV CUDA_VISIBLE_DEVICES=""
 RUN sed -i "s/dynamic = \\[\"version\"\\]/version = \"$VERSION\"/" pyproject.toml && \
    pip install --no-cache-dir -e .[prod]

-# Copy Source Code
-COPY . .
-
-# Set the PYTHONPATH environment variable in order for it to find the Django app.
-ENV PYTHONPATH=/app/src:$PYTHONPATH
-
-# Go to the directory src/interface/web and export the built Next.js assets
+# Build Web App
+FROM node:20-alpine AS web-app
+# Set build optimization env vars
+ENV NODE_ENV=production
+ENV NEXT_TELEMETRY_DISABLED=1
 WORKDIR /app/src/interface/web
-RUN bash -c "yarn install --frozen-lockfile && yarn ciexport && yarn cache clean"
+# Install dependencies first (cache layer)
+COPY src/interface/web/package.json src/interface/web/yarn.lock ./
+RUN yarn install --frozen-lockfile
+# Copy source and build
+COPY src/interface/web/. ./
+RUN yarn build
+
+# Merge the Server and Web App into a Single Image
+FROM base
+ENV PYTHONPATH=/app/src:$PYTHONPATH
 WORKDIR /app
+COPY --from=server-deps /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
+COPY --from=server-deps /usr/local/bin /usr/local/bin
+COPY --from=web-app /app/src/interface/web/out ./src/khoj/interface/built
+COPY . .
+RUN cd src && python3 khoj/manage.py collectstatic --noinput

 # Run the Application
 # There are more arguments required for the application to run,
-# but these should be passed in through the docker-compose.yml file.
+# but those should be passed in through the docker-compose.yml file.
 ARG PORT
 EXPOSE ${PORT}
 ENTRYPOINT ["gunicorn", "-c", "gunicorn-config.py", "src.khoj.main:app"]
--- a/pyproject.toml
+++ b/pyproject.toml
@ -88,6 +88,7 @@ dependencies = [
    "anthropic == 0.26.1",
    "docx2txt == 0.8",
    "google-generativeai == 0.8.3",
+    "pyjson5 == 1.6.7",
 ]
 dynamic = ["version"]

--- a/src/interface/desktop/package.json
+++ b/src/interface/desktop/package.json
@ -1,6 +1,6 @@
 {
  "name": "Khoj",
-  "version": "1.30.7",
+  "version": "1.30.10",
  "description": "Your Second Brain",
  "author": "Khoj Inc. <team@khoj.dev>",
  "license": "GPL-3.0-or-later",
@ -16,7 +16,7 @@
    "start": "yarn electron ."
  },
  "dependencies": {
-    "@todesktop/runtime": "^1.6.4",
+    "@todesktop/runtime": "^2.0.0",
    "axios": "^1.7.4",
    "cron": "^2.4.3",
    "electron-store": "^8.1.0"
--- a/src/interface/desktop/yarn.lock
+++ b/src/interface/desktop/yarn.lock
@ -50,17 +50,17 @@
  dependencies:
    defer-to-connect "^2.0.0"

-"@todesktop/runtime@^1.6.4":
-  version "1.6.4"
-  resolved "https://registry.yarnpkg.com/@todesktop/runtime/-/runtime-1.6.4.tgz#a9d62a021cf2647c51371c892bfb1d4c5a29ed7e"
-  integrity sha512-n6dOxhrKKsXMM+i2u9iRvoJSR2KCWw0orYK+FT9RbWNPykhuFIYd0yy8dYgYy/OuClKGyGl4SJFi2757FLhWDA==
+"@todesktop/runtime@^2.0.0":
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/@todesktop/runtime/-/runtime-2.0.0.tgz#dfd409186ae664f5e28186a03b99e620ec7b7f82"
+  integrity sha512-0a2tmWpIc/HJE/873xRMZKQNggfrYhoKYIchfN+k8RqKdzTPwTWa5ztur7GdCHLHBUaiMBPNRzF3h4kwHd1NCw==
  dependencies:
-    del "^6.0.0"
-    electron-updater "^4.6.1"
-    eventemitter2 "^6.4.5"
+    del "^6.1.1"
+    electron-updater "^6.3.9"
+    eventemitter2 "^6.4.9"
    execa "^5.0.0"
    lodash.once "^4.1.1"
-    semver "^7.3.2"
+    semver "^7.6.3"

 "@types/cacheable-request@^6.0.1":
  version "6.0.3"
@ -90,16 +90,16 @@
  integrity sha512-jYvz8UMLDgy3a5SkGJne8H7VA7zPV2Lwohjx0V8V31+SqAjNmurWMkk9cQhfvlcnXWudBpK9xPM1n4rljOcHYQ==

 "@types/node@*":
-  version "22.9.1"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-22.9.1.tgz#bdf91c36e0e7ecfb7257b2d75bf1b206b308ca71"
-  integrity sha512-p8Yy/8sw1caA8CdRIQBG5tiLHmxtQKObCijiAa9Ez+d4+PRffM4054xbju0msf+cvhJpnFEeNjxmVT/0ipktrg==
+  version "22.10.1"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-22.10.1.tgz#41ffeee127b8975a05f8c4f83fb89bcb2987d766"
+  integrity sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==
  dependencies:
-    undici-types "~6.19.8"
+    undici-types "~6.20.0"

 "@types/node@^18.11.18":
-  version "18.19.64"
-  resolved "https://registry.yarnpkg.com/@types/node/-/node-18.19.64.tgz#122897fb79f2a9ec9c979bded01c11461b2b1478"
-  integrity sha512-955mDqvO2vFf/oL7V3WiUtiz+BugyX8uVbaT2H8oj3+8dRyH2FLiNdowe7eNqRM7IOIZvzDH76EoAT+gwm6aIQ==
+  version "18.19.67"
+  resolved "https://registry.yarnpkg.com/@types/node/-/node-18.19.67.tgz#77c4b01641a1e3e1509aff7e10d39e4afd5ae06d"
+  integrity sha512-wI8uHusga+0ZugNp0Ol/3BqQfEcCCNfojtO6Oou9iVNGPTL6QNSdnUdqq85fRgIorLhLMuPIKpsN98QE9Nh+KQ==
  dependencies:
    undici-types "~5.26.4"

@ -110,11 +110,6 @@
  dependencies:
    "@types/node" "*"

-"@types/semver@^7.3.6":
-  version "7.5.8"
-  resolved "https://registry.yarnpkg.com/@types/semver/-/semver-7.5.8.tgz#8268a8c57a3e4abd25c165ecd36237db7948a55e"
-  integrity sha512-I8EUhyrgfLrcTkzV3TSsGyl1tSuPrEDzr0yd5m90UgNxQkyDXULk3b6MlQqTCpZpNtWe1K0hzclnZkTcLBe2UQ==
-
 "@types/yauzl@^2.9.1":
  version "2.10.3"
  resolved "https://registry.yarnpkg.com/@types/yauzl/-/yauzl-2.10.3.tgz#e9b2808b4f109504a03cda958259876f61017999"
@ -168,9 +163,9 @@ atomically@^1.7.0:
  integrity sha512-Xcz9l0z7y9yQ9rdDaxlmaI4uJHf/T8g9hOEzJcsEqX2SjCj4J20uK7+ldkDHMbpJDK76wF7xEIgxc/vSlsfw5w==

 axios@^1.7.4:
-  version "1.7.7"
-  resolved "https://registry.yarnpkg.com/axios/-/axios-1.7.7.tgz#2f554296f9892a72ac8d8e4c5b79c14a91d0a47f"
-  integrity sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==
+  version "1.7.8"
+  resolved "https://registry.yarnpkg.com/axios/-/axios-1.7.8.tgz#1997b1496b394c21953e68c14aaa51b7b5de3d6e"
+  integrity sha512-Uu0wb7KNqK2t5K+YQyVCLM76prD5sRFjKHbJYCP1J7JFGEQ6nN7HWn9+04LAeiJ3ji54lgS/gZCH1oxyrf1SPw==
  dependencies:
    follow-redirects "^1.15.6"
    form-data "^4.0.0"
@ -206,12 +201,12 @@ buffer-crc32@~0.2.3:
  resolved "https://registry.yarnpkg.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz#0d333e3f00eac50aa1454abd30ef8c2a5d9a7242"
  integrity sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==

-builder-util-runtime@8.9.2:
-  version "8.9.2"
-  resolved "https://registry.yarnpkg.com/builder-util-runtime/-/builder-util-runtime-8.9.2.tgz#a9669ae5b5dcabfe411ded26678e7ae997246c28"
-  integrity sha512-rhuKm5vh7E0aAmT6i8aoSfEjxzdYEFX7zDApK+eNgOhjofnWb74d9SRJv0H/8nsgOkos0TZ4zxW0P8J4N7xQ2A==
+builder-util-runtime@9.2.10:
+  version "9.2.10"
+  resolved "https://registry.yarnpkg.com/builder-util-runtime/-/builder-util-runtime-9.2.10.tgz#a0f7d9e214158402e78b74a745c8d9f870c604bc"
+  integrity sha512-6p/gfG1RJSQeIbz8TK5aPNkoztgY1q5TgmGFMAXcY8itsGW6Y2ld1ALsZ5UJn8rog7hKF3zHx5iQbNQ8uLcRlw==
  dependencies:
-    debug "^4.3.2"
+    debug "^4.3.4"
    sax "^1.2.4"

 cacheable-lookup@^5.0.3:
@ -296,7 +291,7 @@ debounce-fn@^4.0.0:
  dependencies:
    mimic-fn "^3.0.0"

-debug@^4.1.0, debug@^4.1.1, debug@^4.3.2:
+debug@^4.1.0, debug@^4.1.1, debug@^4.3.4:
  version "4.3.7"
  resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.7.tgz#87945b4151a011d76d95a198d7111c865c360a52"
  integrity sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==
@ -333,7 +328,7 @@ define-properties@^1.2.1:
    has-property-descriptors "^1.0.0"
    object-keys "^1.1.1"

-del@^6.0.0:
+del@^6.1.1:
  version "6.1.1"
  resolved "https://registry.yarnpkg.com/del/-/del-6.1.1.tgz#3b70314f1ec0aa325c6b14eb36b95786671edb7a"
  integrity sha512-ua8BhapfP0JUJKC/zV9yHHDW/rDoDxP4Zhn3AkA6/xT6gY7jYXJiaeyBZznYVujhZZET+UgcbZiQ7sN3WqcImg==
@ -379,19 +374,19 @@ electron-store@^8.1.0:
    conf "^10.2.0"
    type-fest "^2.17.0"

-electron-updater@^4.6.1:
-  version "4.6.5"
-  resolved "https://registry.yarnpkg.com/electron-updater/-/electron-updater-4.6.5.tgz#e9a75458bbfd6bb41a58a829839e150ad2eb2d3d"
-  integrity sha512-kdTly8O9mSZfm9fslc1mnCY+mYOeaYRy7ERa2Fed240u01BKll3aiupzkd07qKw69KvhBSzuHroIW3mF0D8DWA==
+electron-updater@^6.3.9:
+  version "6.3.9"
+  resolved "https://registry.yarnpkg.com/electron-updater/-/electron-updater-6.3.9.tgz#e1e7f155624c58e6f3760f376c3a584028165ec4"
+  integrity sha512-2PJNONi+iBidkoC5D1nzT9XqsE8Q1X28Fn6xRQhO3YX8qRRyJ3mkV4F1aQsuRnYPqq6Hw+E51y27W75WgDoofw==
  dependencies:
-    "@types/semver" "^7.3.6"
-    builder-util-runtime "8.9.2"
-    fs-extra "^10.0.0"
+    builder-util-runtime "9.2.10"
+    fs-extra "^10.1.0"
    js-yaml "^4.1.0"
    lazy-val "^1.0.5"
    lodash.escaperegexp "^4.1.2"
    lodash.isequal "^4.5.0"
-    semver "^7.3.5"
+    semver "^7.6.3"
+    tiny-typed-emitter "^2.1.0"

 electron@28.2.1:
  version "28.2.1"
@ -436,7 +431,7 @@ escape-string-regexp@^4.0.0:
  resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34"
  integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==

-eventemitter2@^6.4.5:
+eventemitter2@^6.4.9:
  version "6.4.9"
  resolved "https://registry.yarnpkg.com/eventemitter2/-/eventemitter2-6.4.9.tgz#41f2750781b4230ed58827bc119d293471ecb125"
  integrity sha512-JEPTiaOt9f04oa6NOkc4aH+nVp5I3wEjpHbIPqfgCdD5v5bUzy7xQqwcVO2aDQgOWhI28da57HksMrzK9HlRxg==
@ -530,7 +525,7 @@ form-data@^4.0.0:
    combined-stream "^1.0.8"
    mime-types "^2.1.12"

-fs-extra@^10.0.0:
+fs-extra@^10.1.0:
  version "10.1.0"
  resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-10.1.0.tgz#02873cfbc4084dde127eaa5f9905eef2325d1abf"
  integrity sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==
@ -1115,7 +1110,7 @@ semver@^6.2.0:
  resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4"
  integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==

-semver@^7.3.2, semver@^7.3.5:
+semver@^7.3.2, semver@^7.3.5, semver@^7.6.3:
  version "7.6.3"
  resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.3.tgz#980f7b5550bc175fb4dc09403085627f9eb33143"
  integrity sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==
@ -1166,6 +1161,11 @@ sumchecker@^3.0.1:
  dependencies:
    debug "^4.1.0"

+tiny-typed-emitter@^2.1.0:
+  version "2.1.0"
+  resolved "https://registry.yarnpkg.com/tiny-typed-emitter/-/tiny-typed-emitter-2.1.0.tgz#b3b027fdd389ff81a152c8e847ee2f5be9fad7b5"
+  integrity sha512-qVtvMxeXbVej0cQWKqVSSAHmKZEHAvxdF8HEUBFWts8h+xEo5m/lEiPakuyZ3BnCBjOD8i24kzNOiOLLgsSxhA==
+
 to-regex-range@^5.0.1:
  version "5.0.1"
  resolved "https://registry.yarnpkg.com/to-regex-range/-/to-regex-range-5.0.1.tgz#1648c44aae7c8d988a326018ed72f5b4dd0392e4"
@ -1188,10 +1188,10 @@ undici-types@~5.26.4:
  resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-5.26.5.tgz#bcd539893d00b56e964fd2657a4866b221a65617"
  integrity sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==

-undici-types@~6.19.8:
-  version "6.19.8"
-  resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-6.19.8.tgz#35111c9d1437ab83a7cdc0abae2f26d88eda0a02"
-  integrity sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==
+undici-types@~6.20.0:
+  version "6.20.0"
+  resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-6.20.0.tgz#8171bf22c1f588d1554d55bf204bc624af388433"
+  integrity sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==

 universalify@^0.1.0:
  version "0.1.2"
--- a/src/interface/emacs/khoj.el
+++ b/src/interface/emacs/khoj.el
@ -6,7 +6,7 @@
 ;;         Saba Imran <saba@khoj.dev>
 ;; Description: Your Second Brain
 ;; Keywords: search, chat, ai, org-mode, outlines, markdown, pdf, image
-;; Version: 1.30.7
+;; Version: 1.30.10
 ;; Package-Requires: ((emacs "27.1") (transient "0.3.0") (dash "2.19.1"))
 ;; URL: https://github.com/khoj-ai/khoj/tree/master/src/interface/emacs

--- a/src/interface/obsidian/manifest.json
+++ b/src/interface/obsidian/manifest.json
@ -1,7 +1,7 @@
 {
 	"id": "khoj",
 	"name": "Khoj",
-	"version": "1.30.7",
+	"version": "1.30.10",
 	"minAppVersion": "0.15.0",
 	"description": "Your Second Brain",
 	"author": "Khoj Inc.",
--- a/src/interface/obsidian/package.json
+++ b/src/interface/obsidian/package.json
@ -1,6 +1,6 @@
 {
    "name": "Khoj",
-    "version": "1.30.7",
+    "version": "1.30.10",
    "description": "Your Second Brain",
    "author": "Debanjum Singh Solanky, Saba Imran <team@khoj.dev>",
    "license": "GPL-3.0-or-later",
--- a/src/interface/obsidian/versions.json
+++ b/src/interface/obsidian/versions.json
@ -98,5 +98,8 @@
 	"1.30.4": "0.15.0",
 	"1.30.5": "0.15.0",
 	"1.30.6": "0.15.0",
-	"1.30.7": "0.15.0"
+	"1.30.7": "0.15.0",
+	"1.30.8": "0.15.0",
+	"1.30.9": "0.15.0",
+	"1.30.10": "0.15.0"
 }
--- a/src/interface/web/app/agents/layout.tsx
+++ b/src/interface/web/app/agents/layout.tsx
@ -1,9 +1,7 @@
 import type { Metadata } from "next";
-import { Noto_Sans } from "next/font/google";
+import { noto_sans, noto_sans_arabic } from "@/app/fonts";
 import "../globals.css";

-const inter = Noto_Sans({ subsets: ["latin"] });
-
 export const metadata: Metadata = {
    title: "Khoj AI - Agents",
    description: "Find a specialized agent that can help you address more specific needs.",
@ -33,7 +31,7 @@ export default function RootLayout({
    children: React.ReactNode;
 }>) {
    return (
-        <html lang="en">
+        <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
            <meta
                httpEquiv="Content-Security-Policy"
                content="default-src 'self' https://assets.khoj.dev;
@ -46,7 +44,7 @@ export default function RootLayout({
                       child-src 'none';
                       object-src 'none';"
            ></meta>
-            <body className={inter.className}>{children}</body>
+            <body>{children}</body>
        </html>
    );
 }
--- a/src/interface/web/app/chat/layout.tsx
+++ b/src/interface/web/app/chat/layout.tsx
@ -1,9 +1,7 @@
 import type { Metadata } from "next";
-import { Noto_Sans } from "next/font/google";
+import { noto_sans, noto_sans_arabic } from "@/app/fonts";
 import "../globals.css";

-const inter = Noto_Sans({ subsets: ["latin"] });
-
 export const metadata: Metadata = {
    title: "Khoj AI - Chat",
    description:
@ -34,7 +32,7 @@ export default function RootLayout({
    children: React.ReactNode;
 }>) {
    return (
-        <html lang="en">
+        <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
            <meta
                httpEquiv="Content-Security-Policy"
                content="default-src 'self' https://assets.khoj.dev;
@ -47,7 +45,7 @@ export default function RootLayout({
                       child-src 'none';
                       object-src 'none';"
            ></meta>
-            <body className={inter.className}>
+            <body>
                {children}
                <script
                    dangerouslySetInnerHTML={{
--- a/src/interface/web/app/fonts.ts
+++ b/src/interface/web/app/fonts.ts
@ -0,0 +1,13 @@
+import { Noto_Sans, Noto_Sans_Arabic } from "next/font/google";
+
+export const noto_sans = Noto_Sans({
+    subsets: ["latin", "latin-ext", "cyrillic", "cyrillic-ext", "devanagari", "vietnamese"],
+    display: "swap",
+    variable: "--font-noto-sans",
+});
+
+export const noto_sans_arabic = Noto_Sans_Arabic({
+    subsets: ["arabic"],
+    display: "swap",
+    variable: "--font-noto-sans-arabic",
+});
--- a/src/interface/web/app/globals.css
+++ b/src/interface/web/app/globals.css
@ -1,7 +1,6 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
-@import url("https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@100..900&family=Noto+Sans:ital,wght@0,100..900;1,100..900&display=swap");

@layer base {
    :root {
@ -25,7 +24,7 @@
        --input: 220 13% 91%;
        --ring: 24.6 95% 53.1%;
        --radius: 0.5rem;
-        --font-family: "Noto Sans", "Noto Sans Arabic", sans-serif !important;
+        --font-family: var(--font-noto-sans), var(--font-noto-sans-arabic), sans-serif !important;

        /* Khoj Custom Colors */
        --frosted-background-color: 20 13% 95%;
@ -188,7 +187,7 @@
        --border: 0 0% 9%;
        --input: 0 0% 9%;
        --ring: 20.5 90.2% 48.2%;
-        --font-family: "Noto Sans", "Noto Sans Arabic", sans-serif !important;
+        --font-family: var(--font-noto-sans), var(--font-noto-sans-arabic), sans-serif !important;

        /* Imported from highlight.js */
        pre code.hljs {
--- a/src/interface/web/app/layout.tsx
+++ b/src/interface/web/app/layout.tsx
@ -1,9 +1,7 @@
 import type { Metadata } from "next";
-import { Noto_Sans } from "next/font/google";
+import { noto_sans, noto_sans_arabic } from "@/app/fonts";
 import "./globals.css";

-const inter = Noto_Sans({ subsets: ["latin"] });
-
 export const metadata: Metadata = {
    title: "Khoj AI - Home",
    description: "Your Second Brain.",
@ -39,7 +37,7 @@ export default function RootLayout({
    children: React.ReactNode;
 }>) {
    return (
-        <html lang="en">
+        <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
            <meta
                httpEquiv="Content-Security-Policy"
                content="default-src 'self' https://assets.khoj.dev;
@ -52,7 +50,7 @@ export default function RootLayout({
                       child-src 'none';
                       object-src 'none';"
            ></meta>
-            <body className={inter.className}>{children}</body>
+            <body>{children}</body>
        </html>
    );
 }
--- a/src/interface/web/app/settings/layout.tsx
+++ b/src/interface/web/app/settings/layout.tsx
@ -1,10 +1,8 @@
 import type { Metadata } from "next";
-import { Noto_Sans } from "next/font/google";
+import { noto_sans, noto_sans_arabic } from "@/app/fonts";
 import "../globals.css";
 import { Toaster } from "@/components/ui/toaster";

-const inter = Noto_Sans({ subsets: ["latin"] });
-
 export const metadata: Metadata = {
    title: "Khoj AI - Settings",
    description: "Configure Khoj to get personalized, deeper assistance.",
@ -34,7 +32,7 @@ export default function RootLayout({
    children: React.ReactNode;
 }>) {
    return (
-        <html lang="en">
+        <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
            <meta
                httpEquiv="Content-Security-Policy"
                content="default-src 'self' https://assets.khoj.dev;
@ -46,7 +44,7 @@ export default function RootLayout({
                        child-src 'none';
                        object-src 'none';"
            ></meta>
-            <body className={inter.className}>
+            <body>
                {children}
                <Toaster />
            </body>
--- a/src/interface/web/app/share/chat/layout.tsx
+++ b/src/interface/web/app/share/chat/layout.tsx
@ -1,9 +1,7 @@
 import type { Metadata } from "next";
-import { Noto_Sans } from "next/font/google";
+import { noto_sans, noto_sans_arabic } from "@/app/fonts";
 import "../../globals.css";

-const inter = Noto_Sans({ subsets: ["latin"] });
-
 export const metadata: Metadata = {
    title: "Khoj AI - Chat",
    description: "Use this page to view a chat with Khoj AI.",
@ -15,7 +13,7 @@ export default function RootLayout({
    children: React.ReactNode;
 }>) {
    return (
-        <html lang="en">
+        <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
            <meta
                httpEquiv="Content-Security-Policy"
                content="default-src 'self' https://assets.khoj.dev;
@ -27,7 +25,7 @@ export default function RootLayout({
                       child-src 'none';
                       object-src 'none';"
            ></meta>
-            <body className={inter.className}>
+            <body>
                {children}
                <script
                    dangerouslySetInnerHTML={{
--- a/src/interface/web/package.json
+++ b/src/interface/web/package.json
@ -1,6 +1,6 @@
 {
    "name": "khoj-ai",
-    "version": "1.30.7",
+    "version": "1.30.10",
    "private": true,
    "scripts": {
        "dev": "next dev",
@ -62,6 +62,9 @@
        "react-hook-form": "^7.52.1",
        "shadcn-ui": "^0.8.0",
        "swr": "^2.2.5",
+        "tailwind-merge": "^2.3.0",
+        "tailwindcss": "^3.4.6",
+        "tailwindcss-animate": "^1.0.7",
        "typescript": "^5",
        "vaul": "^0.9.1",
        "zod": "^3.23.8"
@ -82,9 +85,6 @@
        "lint-staged": "^15.2.7",
        "nodemon": "^3.1.3",
        "prettier": "3.3.3",
-        "tailwind-merge": "^2.3.0",
-        "tailwindcss": "^3.4.6",
-        "tailwindcss-animate": "^1.0.7",
        "typescript": "^5"
    },
    "prettier": {
--- a/src/interface/web/tailwind.config.ts
+++ b/src/interface/web/tailwind.config.ts
@ -55,6 +55,9 @@ const config = {
            },
        },
        extend: {
+            fontFamily: {
+                sans: ["var(--font-noto-sans)", "var(--font-noto-sans-arabic)"],
+            },
            colors: {
                border: "hsl(var(--border))",
                input: "hsl(var(--input))",
--- a/src/khoj/interface/web/.well-known/assetlinks.json
+++ b/src/khoj/interface/web/.well-known/assetlinks.json
@ -0,0 +1,11 @@
+[{
+ "relation": ["delegate_permission/common.handle_all_urls"],
+ "target": {
+   "namespace": "android_app",
+   "package_name": "dev.khoj.app",
+    "sha256_cert_fingerprints": [
+      "CC:98:4A:0A:F1:CC:84:26:AC:02:86:49:AA:69:64:B9:5E:63:A3:EF:18:56:EA:CA:13:C1:3A:15:CA:49:77:46",
+      "D4:5A:6F:6C:18:28:D2:1C:78:27:92:C6:AC:DB:4C:12:C4:52:A1:88:9B:A1:F5:67:D1:22:FE:A0:0F:B1:AE:92"
+    ]
+ }
+}]
--- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py
+++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py
@ -1,9 +1,8 @@
-import json
 import logging
-import re
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional

+import pyjson5
 from langchain.schema import ChatMessage

 from khoj.database.models import Agent, ChatModelOptions, KhojUser
@ -110,7 +109,7 @@ def extract_questions_anthropic(
    # Extract, Clean Message from Claude's Response
    try:
        response = clean_json(response)
-        response = json.loads(response)
+        response = pyjson5.loads(response)
        response = [q.strip() for q in response["queries"] if q.strip()]
        if not isinstance(response, list) or not response:
            logger.error(f"Invalid response for constructing subqueries: {response}")
--- a/src/khoj/processor/conversation/google/gemini_chat.py
+++ b/src/khoj/processor/conversation/google/gemini_chat.py
@ -1,9 +1,8 @@
-import json
 import logging
-import re
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional

+import pyjson5
 from langchain.schema import ChatMessage

 from khoj.database.models import Agent, ChatModelOptions, KhojUser
@ -104,7 +103,7 @@ def extract_questions_gemini(
    # Extract, Clean Message from Gemini's Response
    try:
        response = clean_json(response)
-        response = json.loads(response)
+        response = pyjson5.loads(response)
        response = [q.strip() for q in response["queries"] if q.strip()]
        if not isinstance(response, list) or not response:
            logger.error(f"Invalid response for constructing subqueries: {response}")
--- a/src/khoj/processor/conversation/offline/chat_model.py
+++ b/src/khoj/processor/conversation/offline/chat_model.py
@ -5,6 +5,7 @@ from datetime import datetime, timedelta
 from threading import Thread
 from typing import Any, Iterator, List, Optional, Union

+import pyjson5
 from langchain.schema import ChatMessage
 from llama_cpp import Llama

@ -13,6 +14,7 @@ from khoj.processor.conversation import prompts
 from khoj.processor.conversation.offline.utils import download_model
 from khoj.processor.conversation.utils import (
    ThreadedGenerator,
+    clean_json,
    commit_conversation_trace,
    generate_chatml_messages_with_context,
    messages_to_print,
@ -114,8 +116,8 @@ def extract_questions_offline(

    # Extract and clean the chat model's response
    try:
-        response = response.strip(empty_escape_sequences)
-        response = json.loads(response)
+        response = clean_json(empty_escape_sequences)
+        response = pyjson5.loads(response)
        questions = [q.strip() for q in response["queries"] if q.strip()]
        questions = filter_questions(questions)
    except:
--- a/src/khoj/processor/conversation/openai/gpt.py
+++ b/src/khoj/processor/conversation/openai/gpt.py
@ -1,8 +1,8 @@
-import json
 import logging
 from datetime import datetime, timedelta
 from typing import Dict, List, Optional

+import pyjson5
 from langchain.schema import ChatMessage

 from khoj.database.models import Agent, ChatModelOptions, KhojUser
@ -104,7 +104,7 @@ def extract_questions(
    # Extract, Clean Message from GPT's Response
    try:
        response = clean_json(response)
-        response = json.loads(response)
+        response = pyjson5.loads(response)
        response = [q.strip() for q in response["queries"] if q.strip()]
        if not isinstance(response, list) or not response:
            logger.error(f"Invalid response for constructing subqueries: {response}")
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@ -30,11 +30,9 @@ You were created by Khoj Inc. with the following capabilities:

 - You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you.
 - Users can share files and other information with you using the Khoj Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
- Say "I don't know" or "I don't understand" if you don't know what to say or if you don't know the answer to a question.
 - Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following
    - inline math mode : `\\(` and `\\)`
    - display math mode: insert linebreak after opening `$$`, `\\[` and before closing `$$`, `\\]`
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided notes or past conversations.
 - Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay".

 Today is {day_of_week}, {current_date} in UTC.
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@ -5,6 +5,7 @@ import math
 import mimetypes
 import os
 import queue
+import re
 import uuid
 from dataclasses import dataclass
 from datetime import datetime
@ -14,6 +15,7 @@ from time import perf_counter
 from typing import Any, Callable, Dict, List, Optional

 import PIL.Image
+import pyjson5
 import requests
 import tiktoken
 import yaml
@ -590,6 +592,47 @@ def clean_code_python(code: str):
    return code.strip().removeprefix("```python").removesuffix("```")


+def load_complex_json(json_str):
+    """
+    Preprocess a raw JSON string to escape unescaped double quotes within value strings,
+    while preserving the JSON structure and already escaped quotes.
+    """
+
+    def replace_unescaped_quotes(match):
+        # Get the content between colons and commas/end braces
+        content = match.group(1)
+        # Replace unescaped double, single quotes that aren't already escaped
+        # Uses negative lookbehind to avoid replacing already escaped quotes
+        # Replace " with \"
+        processed_dq = re.sub(r'(?<!\\)"', '\\"', content)
+        # Replace \' with \\'
+        processed_final = re.sub(r"(?<!\\)\\'", r"\\\\'", processed_dq)
+        return f': "{processed_final}"'
+
+    # Match content between : and either , or }
+    # This pattern looks for ': ' followed by any characters until , or }
+    pattern = r':\s*"(.*?)(?<!\\)"(?=[,}])'
+
+    # Process the JSON string
+    cleaned = clean_json(rf"{json_str}")
+    processed = re.sub(pattern, replace_unescaped_quotes, cleaned)
+
+    # See which json loader can load the processed JSON as valid
+    errors = []
+    json_loaders_to_try = [json.loads, pyjson5.loads]
+    for loads in json_loaders_to_try:
+        try:
+            return loads(processed)
+        except (json.JSONDecodeError, pyjson5.Json5Exception) as e:
+            errors.append(f"{type(e).__name__}: {str(e)}")
+
+    # If all loaders fail, raise the aggregated error
+    raise ValueError(
+        f"Failed to load JSON with errors: {'; '.join(errors)}\n\n"
+        f"While attempting to load this cleaned JSON:\n{processed}"
+    )
+
+
 def defilter_query(query: str):
    """Remove any query filters in query"""
    defiltered_query = query
--- a/src/khoj/processor/tools/run_code.py
+++ b/src/khoj/processor/tools/run_code.py
@ -1,6 +1,5 @@
 import base64
 import datetime
-import json
 import logging
 import mimetypes
 import os
@ -15,8 +14,8 @@ from khoj.processor.conversation import prompts
 from khoj.processor.conversation.utils import (
    ChatEvent,
    clean_code_python,
-    clean_json,
    construct_chat_history,
+    load_complex_json,
 )
 from khoj.routers.helpers import send_message_to_model_wrapper
 from khoj.utils.helpers import is_none_or_empty, timer, truncate_code_context
@ -135,8 +134,7 @@ async def generate_python_code(
    )

    # Validate that the response is a non-empty, JSON-serializable list
-    response = clean_json(response)
-    response = json.loads(response)
+    response = load_complex_json(response)
    code = response.get("code", "").strip()
    input_files = response.get("input_files", [])
    input_links = response.get("input_links", [])
--- a/src/khoj/routers/helpers.py
+++ b/src/khoj/routers/helpers.py
@ -27,6 +27,7 @@ from typing import (
 from urllib.parse import parse_qs, quote, unquote, urljoin, urlparse

 import cron_descriptor
+import pyjson5
 import pytz
 import requests
 from apscheduler.job import Job
@ -541,7 +542,7 @@ async def generate_online_subqueries(
    # Validate that the response is a non-empty, JSON-serializable list
    try:
        response = clean_json(response)
-        response = json.loads(response)
+        response = pyjson5.loads(response)
        response = {q.strip() for q in response["queries"] if q.strip()}
        if not isinstance(response, set) or not response or len(response) == 0:
            logger.error(f"Invalid response for constructing subqueries: {response}. Returning original query: {q}")
--- a/src/khoj/routers/research.py
+++ b/src/khoj/routers/research.py
@ -1,4 +1,3 @@
-import json
 import logging
 from datetime import datetime
 from typing import Callable, Dict, List, Optional
@ -10,10 +9,10 @@ from khoj.database.models import Agent, KhojUser
 from khoj.processor.conversation import prompts
 from khoj.processor.conversation.utils import (
    InformationCollectionIteration,
-    clean_json,
    construct_chat_history,
    construct_iteration_history,
    construct_tool_chat_history,
+    load_complex_json,
 )
 from khoj.processor.tools.online_search import read_webpages, search_online
 from khoj.processor.tools.run_code import run_code
@ -106,8 +105,7 @@ async def apick_next_tool(
        return

    try:
-        response = clean_json(response)
-        response = json.loads(response)
+        response = load_complex_json(response)
        selected_tool = response.get("tool", None)
        generated_query = response.get("query", None)
        scratchpad = response.get("scratchpad", None)
--- a/src/khoj/routers/web_client.py
+++ b/src/khoj/routers/web_client.py
@ -134,3 +134,8 @@ def automations_config_page(
            "request": request,
        },
    )
+
+
+@web_client.get("/.well-known/assetlinks.json", response_class=FileResponse)
+def assetlinks(request: Request):
+    return FileResponse(constants.assetlinks_file_path)
--- a/src/khoj/utils/constants.py
+++ b/src/khoj/utils/constants.py
@ -5,6 +5,7 @@ app_root_directory = Path(__file__).parent.parent.parent
 web_directory = app_root_directory / "khoj/interface/web/"
 next_js_directory = app_root_directory / "khoj/interface/built/"
 pypi_static_directory = app_root_directory / "khoj/interface/compiled/"
+assetlinks_file_path = web_directory / ".well-known/assetlinks.json"
 empty_escape_sequences = "\n|\r|\t| "
 app_env_filepath = "~/.khoj/env"
 telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
--- a/tests/evals/eval.py
+++ b/tests/evals/eval.py
@ -3,8 +3,10 @@ import concurrent.futures
 import json
 import logging
 import os
+import re
 import time
 from datetime import datetime
+from functools import partial
 from io import StringIO
 from textwrap import dedent
 from threading import Lock
@ -24,13 +26,10 @@ logger = logging.getLogger(__name__)
 KHOJ_URL = os.getenv("KHOJ_URL", "http://localhost:42110")
 KHOJ_CHAT_API_URL = f"{KHOJ_URL}/api/chat"
 KHOJ_API_KEY = os.getenv("KHOJ_API_KEY")
-KHOJ_MODE = os.getenv("KHOJ_MODE", "default")  # E.g research, general, notes etc.
+KHOJ_MODE = os.getenv("KHOJ_MODE", "default").lower()  # E.g research, general, notes etc.

 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 GEMINI_EVAL_MODEL = os.getenv("GEMINI_EVAL_MODEL", "gemini-1.5-pro-002")
-GEMINI_API_URL = (
-    f"https://generativelanguage.googleapis.com/v1beta/models/{GEMINI_EVAL_MODEL}:generateContent?key={GEMINI_API_KEY}"
-)

 SAMPLE_SIZE = os.getenv("SAMPLE_SIZE")  # Number of examples to evaluate
 RANDOMIZE = os.getenv("RANDOMIZE", "false").lower() == "true"  # Randomize examples
@ -128,6 +127,99 @@ def load_simpleqa_dataset():
        return None


+def load_gpqa_dataset():
+    """
+    Load the Google GPQA benchmark dataset from HuggingFace
+
+    GPQA is a benchmark dataset to evaluate retrieval and answering capabilities of agents.
+    It contains ~800 requiring multi-hop retrieval and reasoning across various topics.
+
+    ### Data Fields
+    - Prompt: The question to be answered
+    - Answer: The ground truth answer
+    - reasoning_types: The type of reasoning required to answer the question
+    """
+    import random
+
+    def format_multiple_choice_question(row: Dict) -> tuple[str, str]:
+        """
+        Create GPQA multi-choice prompt from shuffled answer choices and question.
+        Refer: https://github.com/openai/simple-evals/blob/a8e85cc8a5dea497d915f870895250e07f9cc737/common.py#L12
+
+        Returns formatted prompt and correct answer letter.
+        """
+        # Gather choices
+        choices = [
+            row["Incorrect Answer 1"],
+            row["Incorrect Answer 2"],
+            row["Incorrect Answer 3"],
+            row["Correct Answer"],
+        ]
+        # Shuffle choices
+        random.shuffle(choices)
+
+        # Get correct answer letter
+        correct_index = choices.index(row["Correct Answer"])
+        correct_letter = "ABCD"[correct_index]
+
+        prompt = f"""
+Answer the following multiple choice question. Answer should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
+
+{row["Question"]}
+
+A) {choices[0]}
+B) {choices[1]}
+C) {choices[2]}
+D) {choices[3]}
+        """.strip()
+
+        return prompt, correct_letter
+
+    try:
+        dataset = load_dataset("Idavidrein/gpqa", "gpqa_diamond", split="train")
+
+        # Create multi-choice q&a prompt from choices and correct answer
+        prompts_and_answers = [format_multiple_choice_question(row) for row in dataset]
+
+        # Normalize dataset to FRAMES format
+        dataset = dataset.rename_columns({"Subdomain": "reasoning_types"})
+        dataset = dataset.add_column("Prompt", [p[0] for p in prompts_and_answers])
+        dataset = dataset.add_column("Answer", [p[1] for p in prompts_and_answers])
+
+        # Sample and shuffle dataset if configured
+        dataset = dataset.shuffle() if RANDOMIZE else dataset
+        dataset = dataset[: int(SAMPLE_SIZE)] if SAMPLE_SIZE else dataset
+
+        return dataset
+    except Exception as e:
+        logger.error(f"Error loading dataset: {e}")
+        return None
+
+
+def load_math500_dataset():
+    """
+    Load and format the MATH500 dataset to match the evaluation script's structure.
+
+    Args:
+        sample_size (int, optional): Number of samples to include. Defaults to None (use full dataset).
+        randomize (bool, optional): Whether to randomize the dataset. Defaults to False.
+
+    Returns:
+        Dataset: Formatted HuggingFace Dataset.
+    """
+    try:
+        # Load the MATH500 dataset from HuggingFace
+        dataset = load_dataset("HuggingFaceH4/MATH-500", split="test")
+        dataset = dataset.rename_columns({"problem": "Prompt", "answer": "Answer", "subject": "reasoning_types"})
+        dataset = dataset.shuffle() if RANDOMIZE else dataset
+        dataset = dataset.select(range(int(SAMPLE_SIZE))) if SAMPLE_SIZE else dataset
+
+        return dataset
+    except Exception as e:
+        print(f"Error loading and formatting MATH500 dataset: {e}")
+        return None
+
+
 def get_agent_response(prompt: str) -> Dict[str, Any]:
    """Get response from the Khoj API"""
    # Set headers
@ -152,7 +244,30 @@ def get_agent_response(prompt: str) -> Dict[str, Any]:
        return {"response": "", "usage": {}}


-def evaluate_response(query: str, agent_response: str, ground_truth: str) -> tuple[bool | None, str, float]:
+def evaluate_response_with_mcq_match(
+    query: str, agent_response: str, ground_truth: str
+) -> tuple[bool | None, str, float]:
+    """Evaluate Khoj response against benchmark ground truth using string matching"""
+    try:
+        # Extract answer from agent response
+        answer_pattern_multichoice = r"(?i)Answer\s*:\s*([A-D])"
+        match = re.search(answer_pattern_multichoice, agent_response)
+        extracted_answer = match.group(1) if match else None
+
+        # Check if extracted answer matches ground truth
+        decision = extracted_answer == ground_truth
+        explanation = f"Agent response {'matches' if decision else 'does not match'} ground truth {ground_truth}"
+
+        # Return decision, explanation and cost in structured form
+        return decision, explanation, 0.0
+    except Exception as e:
+        logger.error(f"Error in evaluation: {e}")
+        return None, f"Evaluation failed: {str(e)}", 0.0
+
+
+def evaluate_response_with_gemini(
+    query: str, agent_response: str, ground_truth: str, eval_model=GEMINI_EVAL_MODEL
+) -> tuple[bool | None, str, float]:
    """Evaluate Khoj response against benchmark ground truth using Gemini"""
    evaluation_prompt = f"""
    Compare the following agent response with the ground truth answer.
@ -166,10 +281,13 @@ def evaluate_response(query: str, agent_response: str, ground_truth: str) -> tup
    Provide your evaluation in the following json format:
    {"explanation:" "[How you made the decision?)", "decision:" "(TRUE if response contains key information, FALSE otherwise)"}
    """
+    gemini_api_url = (
+        f"https://generativelanguage.googleapis.com/v1beta/models/{eval_model}:generateContent?key={GEMINI_API_KEY}"
+    )

    try:
        response = requests.post(
-            GEMINI_API_URL,
+            gemini_api_url,
            headers={"Content-Type": "application/json"},
            json={
                "contents": [{"parts": [{"text": evaluation_prompt}]}],
@ -182,7 +300,7 @@ def evaluate_response(query: str, agent_response: str, ground_truth: str) -> tup
        # Update cost of evaluation
        input_tokens = response_json["usageMetadata"]["promptTokenCount"]
        ouput_tokens = response_json["usageMetadata"]["candidatesTokenCount"]
-        cost = get_cost_of_chat_message(GEMINI_EVAL_MODEL, input_tokens, ouput_tokens)
+        cost = get_cost_of_chat_message(eval_model, input_tokens, ouput_tokens)

        # Parse evaluation response
        eval_response: dict[str, str] = json.loads(
@ -200,7 +318,7 @@ def evaluate_response(query: str, agent_response: str, ground_truth: str) -> tup
        return None, f"Evaluation failed: {str(e)}", 0.0


-def process_batch(batch, batch_start, results, dataset_length):
+def process_batch(batch, batch_start, results, dataset_length, response_evaluator):
    global running_cost
    for idx, (prompt, answer, reasoning_type) in enumerate(batch):
        current_index = batch_start + idx
@ -219,7 +337,7 @@ def process_batch(batch, batch_start, results, dataset_length):
            decision = None
            explanation = "Agent response is empty. This maybe due to a service error."
        else:
-            decision, explanation, eval_cost = evaluate_response(prompt, agent_response, answer)
+            decision, explanation, eval_cost = response_evaluator(prompt, agent_response, answer)

        # Store results
        results.append(
@ -292,7 +410,7 @@ def parse_args():
        "--dataset",
        "-d",
        default="frames",
-        choices=["frames", "simpleqa"],
+        choices=["frames", "simpleqa", "gpqa", "math500"],
        help="Dataset to use for evaluation (default: frames)",
    )
    return parser.parse_args()
@ -309,12 +427,24 @@ def main():
            dataset = load_frames_dataset()
        elif args.dataset == "simpleqa":
            dataset = load_simpleqa_dataset()
+        elif args.dataset == "gpqa":
+            dataset = load_gpqa_dataset()
+        elif args.dataset == "math500":
+            dataset = load_math500_dataset()
    if dataset is None:
        return

    # Initialize variables
    results = []
    dataset_length = len(dataset["Prompt"])
+    if args.dataset == "gpqa":
+        response_evaluator = evaluate_response_with_mcq_match
+    elif args.dataset == "math500":
+        response_evaluator = partial(
+            evaluate_response_with_gemini, eval_model=os.getenv("GEMINI_EVAL_MODEL", "gemini-1.5-flash-002")
+        )
+    else:
+        response_evaluator = evaluate_response_with_gemini

    # Process examples in batches
    with concurrent.futures.ThreadPoolExecutor() as executor:
@ -326,7 +456,9 @@ def main():
                dataset["Answer"][i : i + BATCH_SIZE],
                dataset["reasoning_types"][i : i + BATCH_SIZE],
            )
-            futures.append(executor.submit(process_batch, batch, batch_start, results, dataset_length))
+            futures.append(
+                executor.submit(process_batch, batch, batch_start, results, dataset_length, response_evaluator)
+            )

        # Wait for all futures to complete
        concurrent.futures.wait(futures)
--- a/tests/test_conversation_utils.py
+++ b/tests/test_conversation_utils.py
@ -104,6 +104,18 @@ class TestTruncateMessage:
        assert truncated_chat_history[0] != copy_big_chat_message


+def test_load_complex_raw_json_string():
+    # Arrange
+    raw_json = r"""{"key": "value with unescaped " and unescaped \' and escaped \" and escaped \\'"}"""
+    expeced_json = {"key": "value with unescaped \" and unescaped \\' and escaped \" and escaped \\'"}
+
+    # Act
+    parsed_json = utils.load_complex_json(raw_json)
+
+    # Assert
+    assert parsed_json == expeced_json
+
+
 def generate_content(count):
    return " ".join([f"{index}" for index, _ in enumerate(range(count))])

--- a/versions.json
+++ b/versions.json
@ -98,5 +98,8 @@
 	"1.30.4": "0.15.0",
 	"1.30.5": "0.15.0",
 	"1.30.6": "0.15.0",
-	"1.30.7": "0.15.0"
+	"1.30.7": "0.15.0",
+	"1.30.8": "0.15.0",
+	"1.30.9": "0.15.0",
+	"1.30.10": "0.15.0"
 }