Resolve merge conflicts

2024-11-23 23:48:56 +01:00 · 2023-10-19 14:39:05 -07:00 · 2023-10-19 14:39:05 -07:00 · 963cd165eb
commit 963cd165eb
parent c125995d94 e3f8a95784
42 changed files with 941 additions and 590 deletions
--- a/docs/chat.md
+++ b/docs/chat.md
@ -7,18 +7,21 @@
 ### Setup
 #### Offline Chat
-Offline chat works without internet but it is slower, lower quality and more compute intensive.
+Offline chat stays completely private and works without internet. But it is slower, lower quality and more compute intensive.
-!> **Warning**: This will download a 3Gb+ Llama v2 chat model which can take some time
+> **System Requirements**:
 >  - Machine with at least **6 GB of RAM** and **4 GB of Disk** available
 >  - A CPU supporting [AVX or AVX2 instructions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) is required
 >  - A Mac M1+ or [Vulcan supported GPU](https://vulkan.gpuinfo.org/) should significantly speed up chat response times
- Open your [Khoj settings](http://localhost:42110/config/), click *Enable* on the Offline Chat card
+- Open your [Khoj settings](http://localhost:42110/config/) and click *Enable* on the Offline Chat card
 ![Configure offline chat](https://user-images.githubusercontent.com/6413477/257021364-8a2029f5-dc21-4de8-9af9-9ba6100d695c.mp4 ':include :type=mp4')
 #### Online Chat
 Online chat requires internet to use ChatGPT but is faster, higher quality and less compute intensive.
-!> **Warning**: This will enable Khoj to send your chat queries and notes to OpenAI for processing
+!> **Warning**: This will enable Khoj to send your chat queries and query relevant notes to OpenAI for processing
 1. Get your [OpenAI API Key](https://platform.openai.com/account/api-keys)
 2. Open your [Khoj Online Chat settings](http://localhost:42110/config/processor/conversation), add your OpenAI API key, and click *Save*. Then go to your [Khoj settings](http://localhost:42110/config) and click `Configure`. This will refresh Khoj with your OpenAI API key.
--- a/docs/emacs.md
+++ b/docs/emacs.md
@ -46,7 +46,7 @@ Indexes your org-agenda files, by default.
  (use-package khoj
    :ensure t
    :pin melpa-stable
-    :bind ("C-c s" . 'khoj)
+    :bind ("C-c s" . 'khoj))
 ```
 - Note: Install `khoj.el` from MELPA (instead of MELPA Stable) if you installed the pre-release version of khoj
--- a/manifest.json
+++ b/manifest.json
@ -1,7 +1,7 @@
 {
 	"id": "khoj",
 	"name": "Khoj",
-	"version": "0.12.3",
+	"version": "0.13.0",
 	"minAppVersion": "0.15.0",
 	"description": "An Open-Source AI Personal Assistant for your Digital Brain",
 	"author": "Khoj Inc.",
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "khoj-assistant"
-description = "An AI personal assistant for your Digital Brain"
+description = "An AI copilot for your Second Brain"
 readme = "README.md"
 license = "GPL-3.0-or-later"
 requires-python = ">=3.8"
@ -40,8 +40,9 @@ dependencies = [
    "dateparser >= 1.1.1",
    "defusedxml == 0.7.1",
    "fastapi == 0.77.1",
    "python-multipart >= 0.0.5",
    "jinja2 == 3.1.2",
-    "openai >= 0.27.0",
+    "openai >= 0.27.0, < 1.0.0",
    "tiktoken >= 0.3.2",
    "tenacity >= 8.2.2",
    "pillow == 9.3.0",
@ -83,6 +84,7 @@ test = [
    "freezegun >= 1.2.0",
    "factory-boy >= 3.2.1",
    "trio >= 0.22.0",
    "pytest-xdist",
 ]
 dev = [
    "khoj-assistant[test]",
--- a/scripts/bump_version.sh
+++ b/scripts/bump_version.sh
@ -9,6 +9,10 @@ do
            # Get current project version
            current_version=$OPTARG
            # Bump Desktop app to current version
            cd $project_root/src/interface/desktop
            sed -E -i.bak "s/version\": \"(.*)\",/version\": \"$current_version\",/" package.json
            # Bump Obsidian plugin to current version
            cd $project_root/src/interface/obsidian
            sed -E -i.bak "s/version\": \"(.*)\",/version\": \"$current_version\",/" package.json
--- a/src/app/main.py
+++ b/src/app/main.py
@ -14,10 +14,11 @@ warnings.filterwarnings("ignore", message=r"legacy way to download files from th
 # External Packages
 import uvicorn
 import django
 import schedule
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 import schedule
 import django
 from fastapi.staticfiles import StaticFiles
 from rich.logging import RichHandler
 from django.core.asgi import get_asgi_application
@ -41,6 +42,15 @@ app = FastAPI()
 # Get Django Application
 django_app = get_asgi_application()
 # Add CORS middleware
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["app://obsidian.md", "http://localhost:*", "https://app.khoj.dev/*", "app://khoj.dev"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 # Set Locale
 locale.setlocale(locale.LC_ALL, "")
--- a/src/interface/desktop/main.js
+++ b/src/interface/desktop/main.js
@ -8,7 +8,6 @@ const {dialog} = require('electron');
 const cron = require('cron').CronJob;
 const axios = require('axios');
 const { Readable } = require('stream');
 const KHOJ_URL = 'http://127.0.0.1:42110'
@ -65,7 +64,7 @@ const schema = {
 var state = {}
-const store = new Store({schema});
+const store = new Store({ schema });
 console.log(store);
@ -86,57 +85,65 @@ function handleSetTitle (event, title) {
    });
 }
 function filenameToMimeType (filename) {
    const extension = filename.split('.').pop();
    switch (extension) {
        case 'pdf':
            return 'application/pdf';
        case 'png':
            return 'image/png';
        case 'jpg':
        case 'jpeg':
            return 'image/jpeg';
        case 'md':
        case 'markdown':
            return 'text/markdown';
        case 'org':
            return 'text/org';
        default:
            return 'text/plain';
    }
 }
 function pushDataToKhoj (regenerate = false) {
    let filesToPush = [];
-    const files = store.get('files');
+    const files = store.get('files') || [];
-    const folders = store.get('folders');
+    const folders = store.get('folders') || [];
-    state = {
+    state = { completed: true }
-        completed: true
+
    // Collect paths of all configured files to index
    for (const file of files) {
        filesToPush.push(file.path);
    }
-    if (files) {
+    // Collect paths of all indexable files in configured folders
-        for (file of files) {
+    for (const folder of folders) {
-            filesToPush.push(file.path);
+        const files = fs.readdirSync(folder.path, { withFileTypes: true });
-        }
+        for (const file of files) {
-    }
+            if (file.isFile() && validFileTypes.includes(file.name.split('.').pop())) {
-    if (folders) {
+                filesToPush.push(path.join(folder.path, file.name));
        for (folder of folders) {
            const files = fs.readdirSync(folder.path, { withFileTypes: true });
            for (file of files) {
                if (file.isFile() && validFileTypes.includes(file.name.split('.').pop())) {
                    filesToPush.push(path.join(folder.path, file.name));
                }
            }
        }
    }
    let data = {
        files: []
    }
    const lastSync = store.get('lastSync') || [];
-
+    const formData = new FormData();
-    for (file of filesToPush) {
+    for (const file of filesToPush) {
        const stats = fs.statSync(file);
        if (!regenerate) {
            // Only push files that have been modified since last sync
            if (stats.mtime.toISOString() < lastSync.find((syncedFile) => syncedFile.path === file)?.datetime) {
                continue;
            }
        }
        // Collect all updated or newly created files since last sync to index on Khoj server
        try {
-            let rawData;
+            let encoding = binaryFileTypes.includes(file.split('.').pop()) ? "binary" : "utf8";
-            // If the file is a PDF or IMG file, read it as a binary file
+            let mimeType = filenameToMimeType(file) + (encoding === "utf8" ? "; charset=UTF-8" : "");
-            if (binaryFileTypes.includes(file.split('.').pop())) {
+            let fileContent = Buffer.from(fs.readFileSync(file, { encoding: encoding }), encoding);
-                rawData = fs.readFileSync(file).toString('base64');
+            let fileObj = new Blob([fileContent], { type: mimeType });
-            } else {
+            formData.append('files', fileObj, file);
                rawData = fs.readFileSync(file, 'utf8');
            }
            data.files.push({
                path: file,
                content: rawData
            });
            state[file] = {
                success: true,
            }
@ -149,46 +156,46 @@ function pushDataToKhoj (regenerate = false) {
        }
    }
    // Mark deleted files for removal from index on Khoj server
    for (const syncedFile of lastSync) {
        if (!filesToPush.includes(syncedFile.path)) {
-            data.files.push({
+            fileObj = new Blob([""], { type: filenameToMimeType(syncedFile.path) });
-                path: syncedFile.path,
+            formData.append('files', fileObj, syncedFile.path);
                content: ""
            });
        }
    }
-    const headers = { 'x-api-key': 'secret', 'Content-Type': 'application/json' };
+    // Send collected files to Khoj server for indexing
-
+    if (!!formData?.entries()?.next().value) {
-    const stream = new Readable({
+        const hostURL = store.get('hostURL') || KHOJ_URL;
-        read() {
+        const headers = {
-            this.push(JSON.stringify(data));
+            'x-api-key': 'secret'
-            this.push(null);
+        };
-        }
+        axios.post(`${hostURL}/api/v1/index/update?force=${regenerate}&client=desktop`, formData, { headers })
-    });
+            .then(response => {
-
+                console.log(response.data);
-    const hostURL = store.get('hostURL') || KHOJ_URL;
+                let lastSync = [];
-
+                for (const file of filesToPush) {
-    axios.post(`${hostURL}/v1/indexer/batch?regenerate=${regenerate}`, stream, { headers })
+                    lastSync.push({
-        .then(response => {
+                        path: file,
-            console.log(response.data);
+                        datetime: new Date().toISOString()
-            const win = BrowserWindow.getAllWindows()[0];
+                    });
-            win.webContents.send('update-state', state);
+                }
-            let lastSync = [];
+                store.set('lastSync', lastSync);
-            for (const file of filesToPush) {
+            })
-                lastSync.push({
+            .catch(error => {
-                    path: file,
+                console.error(error);
-                    datetime: new Date().toISOString()
+                state['completed'] = false
-                });
+            })
-            }
+            .finally(() => {
-            store.set('lastSync', lastSync);
+                // Syncing complete
-        })
+                const win = BrowserWindow.getAllWindows()[0];
-        .catch(error => {
+                if (win) win.webContents.send('update-state', state);
-            console.error(error);
+            });
-            state['completed'] = false
+    } else {
-            const win = BrowserWindow.getAllWindows()[0];
+        // Syncing complete
-            win.webContents.send('update-state', state);
+        const win = BrowserWindow.getAllWindows()[0];
-        });
+        if (win) win.webContents.send('update-state', state);
    }
 }
 pushDataToKhoj();
--- a/src/interface/desktop/package.json
+++ b/src/interface/desktop/package.json
@ -1,13 +1,13 @@
 {
  "name": "Khoj",
-  "homepage": ".",
+  "version": "0.13.0",
-  "productName": "Khoj",
+  "description": "An AI copilot for your Second Brain",
-  "version": "1.0.2",
+  "author": "Saba Imran, Debanjum Singh Solanky <team@khoj.dev>",
-  "description": "Scaffolding for the desktop entrypoint to Khoj",
+  "license": "GPL-3.0-or-later",
-  "main": "main.js",
+  "homepage": "https://khoj.dev",
  "repository": "\"https://github.com/khoj-ai/khoj\"",
-  "author": "Khoj <team@khoj.dev>",
+  "productName": "Khoj",
-  "license": "MIT",
+  "main": "main.js",
  "private": false,
  "devDependencies": {
    "electron": "25.8.1"
--- a/src/interface/emacs/khoj.el
+++ b/src/interface/emacs/khoj.el
@ -1,11 +1,12 @@
-;;; khoj.el --- AI personal assistant for your digital brain -*- lexical-binding: t -*-
+;;; khoj.el --- AI copilot for your Second Brain -*- lexical-binding: t -*-
-;; Copyright (C) 2021-2022 Debanjum Singh Solanky
+;; Copyright (C) 2021-2023 Khoj Inc.
-;; Author: Debanjum Singh Solanky <debanjum@gmail.com>
+;; Author: Debanjum Singh Solanky <debanjum@khoj.dev>
-;; Description: An AI personal assistant for your digital brain
+;;         Saba Imran <saba@khoj.dev>
 ;; Description: An AI copilot for your Second Brain
 ;; Keywords: search, chat, org-mode, outlines, markdown, pdf, image
-;; Version: 0.12.3
+;; Version: 0.13.0
 ;; Package-Requires: ((emacs "27.1") (transient "0.3.0") (dash "2.19.1"))
 ;; URL: https://github.com/khoj-ai/khoj/tree/master/src/interface/emacs
@ -28,8 +29,8 @@
 ;;; Commentary:
-;; Create an AI personal assistant for your `org-mode', `markdown' notes,
+;; Create an AI copilot to your `org-mode', `markdown' notes,
-;; PDFs and images. The assistant exposes 2 modes, search and chat:
+;; PDFs and images. The copilot exposes 2 modes, search and chat:
 ;;
 ;; Chat provides faster answers, iterative discovery and assisted
 ;; creativity. It requires your OpenAI API key to access GPT models
@ -87,6 +88,21 @@
  :group 'khoj
  :type 'integer)
 (defcustom khoj-search-on-idle-time 0.3
  "Idle time (in seconds) to wait before triggering search."
  :group 'khoj
  :type 'number)
 (defcustom khoj-server-api-key "secret"
  "API Key to Khoj server."
  :group 'khoj
  :type 'string)
 (defcustom khoj-index-interval 3600
  "Interval (in seconds) to wait before updating content index."
  :group 'khoj
  :type 'number)
 (defcustom khoj-default-content-type "org"
  "The default content type to perform search on."
  :group 'khoj
@ -115,6 +131,15 @@
 (defvar khoj--content-type "org"
  "The type of content to perform search on.")
 (defvar khoj--search-on-idle-timer nil
  "Idle timer to trigger incremental search.")
 (defvar khoj--index-timer nil
  "Timer to trigger content indexing.")
 (defvar khoj--indexed-files '()
  "Files that were indexed in previous content indexing run.")
 (declare-function org-element-property "org-mode" (PROPERTY ELEMENT))
 (declare-function org-element-type "org-mode" (ELEMENT))
 (declare-function markdown-mode "markdown-mode" ())
@ -236,6 +261,11 @@ for example), set this to the full interpreter path."
  :type 'boolean
  :group 'khoj)
 (defcustom khoj-offline-chat-model nil
  "Specify chat model to use for offline chat with khoj."
  :type 'string
  :group 'khoj)
 (defcustom khoj-auto-setup t
  "Automate install, configure and start of khoj server.
 Auto invokes setup steps on calling main entrypoint."
@ -365,9 +395,9 @@ CONFIG is json obtained from Khoj config API."
          (string-join "/"))))
 (defun khoj--server-configure ()
-  "Configure the the Khoj server for search and chat."
+  "Configure the Khoj server for search and chat."
  (interactive)
-  (let* ((org-directory-regexes (or (mapcar (lambda (dir) (format "%s/**/*.org" dir)) khoj-org-directories) json-null))
+  (let* ((url-request-method "GET")
         (current-config
          (with-temp-buffer
            (url-insert-file-contents (format "%s/api/config/data" khoj-server-url))
@ -376,56 +406,12 @@ CONFIG is json obtained from Khoj config API."
           (with-temp-buffer
             (url-insert-file-contents (format "%s/api/config/data/default" khoj-server-url))
             (ignore-error json-end-of-file (json-parse-buffer :object-type 'alist :array-type 'list :null-object json-null :false-object json-false))))
         (default-index-dir (khoj--get-directory-from-config default-config '(content-type org embeddings-file)))
         (default-chat-dir (khoj--get-directory-from-config default-config '(processor conversation conversation-logfile)))
         (chat-model (or khoj-chat-model (alist-get 'chat-model (alist-get 'openai (alist-get 'conversation (alist-get 'processor default-config))))))
-         (default-model (alist-get 'model (alist-get 'conversation (alist-get 'processor default-config))))
+         (enable-offline-chat (or khoj-chat-offline (alist-get 'enable-offline-chat (alist-get 'offline-chat (alist-get 'conversation (alist-get 'processor default-config))))))
-         (enable-offline-chat (or khoj-chat-offline (alist-get 'enable-offline-chat (alist-get 'conversation (alist-get 'processor default-config)))))
+         (offline-chat-model (or khoj-offline-chat-model (alist-get 'chat-model (alist-get 'offline-chat (alist-get 'conversation (alist-get 'processor default-config))))))
         (config (or current-config default-config)))
    ;; Configure content types
    (cond
     ;; If khoj backend is not configured yet
     ((not current-config)
      (message "khoj.el: Server not configured yet.")
      (setq config (delq (assoc 'content-type config) config))
      (cl-pushnew `(content-type . ((org . ((input-files . ,khoj-org-files)
                                            (input-filter . ,org-directory-regexes)
                                            (compressed-jsonl . ,(format "%s/org.jsonl.gz" default-index-dir))
                                            (embeddings-file . ,(format "%s/org.pt" default-index-dir))
                                            (index-heading-entries . ,json-false)))))
                  config))
     ;; Else if khoj config has no org content config
     ((not (alist-get 'org (alist-get 'content-type config)))
      (message "khoj.el: Org-mode content on server not configured yet.")
     (let ((new-content-type (alist-get 'content-type config)))
        (setq new-content-type (delq (assoc 'org new-content-type) new-content-type))
        (cl-pushnew `(org . ((input-files . ,khoj-org-files)
                             (input-filter . ,org-directory-regexes)
                             (compressed-jsonl . ,(format "%s/org.jsonl.gz" default-index-dir))
                             (embeddings-file . ,(format "%s/org.pt" default-index-dir))
                             (index-heading-entries . ,json-false)))
                    new-content-type)
        (setq config (delq (assoc 'content-type config) config))
        (cl-pushnew `(content-type . ,new-content-type) config)))
     ;; Else if khoj is not configured to index specified org files
     ((not (and (equal (alist-get 'input-files (alist-get 'org (alist-get 'content-type config))) khoj-org-files)
                (equal (alist-get 'input-filter (alist-get 'org (alist-get 'content-type config))) org-directory-regexes)))
      (message "khoj.el: Org-mode content on server is stale.")
      (let* ((index-directory (khoj--get-directory-from-config config '(content-type org embeddings-file)))
             (new-content-type (alist-get 'content-type config)))
        (setq new-content-type (delq (assoc 'org new-content-type) new-content-type))
        (cl-pushnew `(org . ((input-files . ,khoj-org-files)
                             (input-filter . ,org-directory-regexes)
                             (compressed-jsonl . ,(format "%s/org.jsonl.gz" index-directory))
                             (embeddings-file . ,(format "%s/org.pt" index-directory))
                             (index-heading-entries . ,json-false)))
                    new-content-type)
        (setq config (delq (assoc 'content-type config) config))
        (cl-pushnew `(content-type . ,new-content-type) config))))
    ;; Configure processors
    (cond
     ((not khoj-openai-api-key)
@ -441,10 +427,11 @@ CONFIG is json obtained from Khoj config API."
     ;; If khoj backend isn't configured yet
     ((not current-config)
-      (message "khoj.el: Chat not configured yet.")
+      (message "khoj.el: Khoj not configured yet.")
      (setq config (delq (assoc 'processor config) config))
      (cl-pushnew `(processor . ((conversation . ((conversation-logfile . ,(format "%s/conversation.json" default-chat-dir))
-                                                  (enable-offline-chat . ,enable-offline-chat)
+                                                  (offline-chat . ((enable-offline-chat . ,enable-offline-chat)
                                                                   (chat-model . ,offline-chat-model)))
                                                  (openai . ((chat-model . ,chat-model)
                                                             (api-key . ,khoj-openai-api-key)))))))
                  config))
@ -455,7 +442,8 @@ CONFIG is json obtained from Khoj config API."
       (let ((new-processor-type (alist-get 'processor config)))
         (setq new-processor-type (delq (assoc 'conversation new-processor-type) new-processor-type))
         (cl-pushnew `(conversation . ((conversation-logfile . ,(format "%s/conversation.json" default-chat-dir))
-                                       (enable-offline-chat . ,enable-offline-chat)
+                                       (offline-chat . ((enable-offline-chat . ,enable-offline-chat)
                                                        (chat-model . ,offline-chat-model)))
                                       (openai . ((chat-model . ,chat-model)
                                                  (api-key . ,khoj-openai-api-key)))))
                     new-processor-type)
@ -465,13 +453,15 @@ CONFIG is json obtained from Khoj config API."
     ;; Else if chat configuration in khoj backend has gone stale
     ((not (and (equal (alist-get 'api-key (alist-get 'openai (alist-get 'conversation (alist-get 'processor config)))) khoj-openai-api-key)
                (equal (alist-get 'chat-model (alist-get 'openai (alist-get 'conversation (alist-get 'processor config)))) khoj-chat-model)
-                (equal (alist-get 'enable-offline-chat (alist-get 'conversation (alist-get 'processor config))) enable-offline-chat)))
+                (equal (alist-get 'enable-offline-chat (alist-get 'offline-chat (alist-get 'conversation (alist-get 'processor config)))) enable-offline-chat)
                (equal (alist-get 'chat-model (alist-get 'offline-chat (alist-get 'conversation (alist-get 'processor config)))) offline-chat-model)))
      (message "khoj.el: Chat configuration has gone stale.")
      (let* ((chat-directory (khoj--get-directory-from-config config '(processor conversation conversation-logfile)))
             (new-processor-type (alist-get 'processor config)))
        (setq new-processor-type (delq (assoc 'conversation new-processor-type) new-processor-type))
        (cl-pushnew `(conversation . ((conversation-logfile . ,(format "%s/conversation.json" chat-directory))
-                                      (enable-offline-chat . ,enable-offline-chat)
+                                      (offline-chat . ((enable-offline-chat . ,enable-offline-chat)
                                                       (chat-model . ,offline-chat-model)))
                                      (openai . ((chat-model . ,khoj-chat-model)
                                                 (api-key . ,khoj-openai-api-key)))))
                    new-processor-type)
@ -509,9 +499,75 @@ CONFIG is json obtained from Khoj config API."
      (khoj--server-configure))))
-;; -----------------------------------------------
+;; -------------------
-;; Extract and Render Entries of each Content Type
+;; Khoj Index Content
-;; -----------------------------------------------
+;; -------------------
 (defun khoj--server-index-files (&optional force content-type file-paths)
  "Send files at `FILE-PATHS' to the Khoj server to index for search and chat.
 `FORCE' re-indexes all files of `CONTENT-TYPE' even if they are already indexed."
  (interactive)
  (let ((boundary (format "-------------------------%d" (random (expt 10 10))))
        (files-to-index (or file-paths
                            (append (mapcan (lambda (dir) (directory-files-recursively dir "\\.org$")) khoj-org-directories) khoj-org-files)))
        (type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type)))
        (inhibit-message t)
        (message-log-max nil))
    (let ((url-request-method "POST")
          (url-request-data (khoj--render-files-as-request-body files-to-index khoj--indexed-files boundary))
          (url-request-extra-headers `(("content-type" . ,(format "multipart/form-data; boundary=%s" boundary))
                                       ("x-api-key" . ,khoj-server-api-key))))
      (with-current-buffer
          (url-retrieve (format "%s/api/v1/index/update?%s&force=%s&client=emacs" khoj-server-url type-query (or force "false"))
                        ;; render response from indexing API endpoint on server
                        (lambda (status)
                          (if (not status)
                              (message "khoj.el: %scontent index %supdated" (if content-type (format "%s " content-type) "") (if force "force " ""))
                            (with-current-buffer (current-buffer)
                              (goto-char "\n\n")
                              (message "khoj.el: Failed to %supdate %s content index. Status: %s. Response: %s"
                                       (if force "force " "")
                                       content-type
                                       status
                                       (string-trim (buffer-substring-no-properties (point) (point-max)))))))
                        nil t t)))
    (setq khoj--indexed-files files-to-index)))
 (defun khoj--render-files-as-request-body (files-to-index previously-indexed-files boundary)
  "Render `FILES-TO-INDEX', `PREVIOUSLY-INDEXED-FILES' as multi-part form body.
 Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request."
  (with-temp-buffer
    (set-buffer-multibyte nil)
    (insert "\n")
    (dolist (file-to-index files-to-index)
      (insert (format "--%s\r\n" boundary))
      (insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
      (insert "Content-Type: text/org\r\n\r\n")
      (insert (with-temp-buffer
                (insert-file-contents-literally file-to-index)
                (buffer-string)))
      (insert "\r\n"))
    (dolist (file-to-index previously-indexed-files)
      (when (not (member file-to-index files-to-index))
        (insert (format "--%s\r\n" boundary))
        (insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
        (insert "Content-Type: text/org\r\n\r\n")
        (insert "")
        (insert "\r\n")))
    (insert (format "--%s--\r\n" boundary))
    (buffer-string)))
 ;; Cancel any running indexing timer, first
 (when khoj--index-timer
    (cancel-timer khoj--index-timer))
 ;; Send files to index on server every `khoj-index-interval' seconds
 (setq khoj--index-timer
      (run-with-timer 60 khoj-index-interval 'khoj--server-index-files))
 ;; -------------------------------------------
 ;; Render Response from Khoj server for Emacs
 ;; -------------------------------------------
 (defun khoj--extract-entries-as-markdown (json-response query)
  "Convert JSON-RESPONSE, QUERY from API to markdown entries."
@ -920,6 +976,9 @@ RECEIVE-DATE is the message receive date."
  (message "khoj.el: Teardown Incremental Search")
  ;; unset khoj minibuffer window
  (setq khoj--minibuffer-window nil)
  (when (and khoj--search-on-idle-timer
             (timerp khoj--search-on-idle-timer))
    (cancel-timer khoj--search-on-idle-timer))
  ;; delete open connections to khoj server
  (khoj--delete-open-network-connections-to-server)
  ;; remove hooks for khoj incremental query and self
@ -942,8 +1001,10 @@ RECEIVE-DATE is the message receive date."
          ;; set current (mini-)buffer entered as khoj minibuffer
          ;; used to query khoj API only when user in khoj minibuffer
          (setq khoj--minibuffer-window (current-buffer))
-          (add-hook 'post-command-hook #'khoj--incremental-search) ; do khoj incremental search after every user action
+          ; do khoj incremental search after idle time
-          (add-hook 'minibuffer-exit-hook #'khoj--teardown-incremental-search)) ; teardown khoj incremental search on minibuffer exit
+          (setq khoj--search-on-idle-timer (run-with-idle-timer khoj-search-on-idle-time t #'khoj--incremental-search))
          ; teardown khoj incremental search on minibuffer exit
          (add-hook 'minibuffer-exit-hook #'khoj--teardown-incremental-search))
      (read-string khoj--query-prompt))))
@ -1014,17 +1075,20 @@ Paragraph only starts at first text after blank line."
 ;; Khoj Menu
 ;; ---------
-(transient-define-argument khoj--content-type-switch ()
+(defun khoj--setup-and-show-menu ()
-  :class 'transient-switches
+  "Create Transient menu for khoj and show it."
-  :argument-format "--content-type=%s"
+  ;; Create the Khoj Transient menu
-  :argument-regexp ".+"
+  (transient-define-argument khoj--content-type-switch ()
-  ;; set content type to: last used > based on current buffer > default type
+    :class 'transient-switches
-  :init-value (lambda (obj) (oset obj value (format "--content-type=%s" (or khoj--content-type (khoj--buffer-name-to-content-type (buffer-name))))))
+    :argument-format "--content-type=%s"
-  ;; dynamically set choices to content types enabled on khoj backend
+    :argument-regexp ".+"
-  :choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "image")))
+    ;; set content type to: last used > based on current buffer > default type
    :init-value (lambda (obj) (oset obj value (format "--content-type=%s" (or khoj--content-type (khoj--buffer-name-to-content-type (buffer-name))))))
    ;; dynamically set choices to content types enabled on khoj backend
    :choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "image")))
-(transient-define-suffix khoj--search-command (&optional args)
+  (transient-define-suffix khoj--search-command (&optional args)
-  (interactive (list (transient-args transient-current-command)))
+    (interactive (list (transient-args transient-current-command)))
    (progn
      ;; set content type to: specified > last used > based on current buffer > default type
      (setq khoj--content-type (or (transient-arg-value "--content-type=" args) (khoj--buffer-name-to-content-type (buffer-name))))
@ -1033,9 +1097,9 @@ Paragraph only starts at first text after blank line."
      ;; trigger incremental search
      (call-interactively #'khoj-incremental)))
-(transient-define-suffix khoj--find-similar-command (&optional args)
+  (transient-define-suffix khoj--find-similar-command (&optional args)
-  "Find items similar to current item at point."
+    "Find items similar to current item at point."
-  (interactive (list (transient-args transient-current-command)))
+    (interactive (list (transient-args transient-current-command)))
    (progn
      ;; set content type to: specified > last used > based on current buffer > default type
      (setq khoj--content-type (or (transient-arg-value "--content-type=" args) (khoj--buffer-name-to-content-type (buffer-name))))
@ -1043,37 +1107,38 @@ Paragraph only starts at first text after blank line."
      (setq khoj-results-count (or (transient-arg-value "--results-count=" args) khoj-results-count))
      (khoj--find-similar khoj--content-type)))
-(transient-define-suffix khoj--update-command (&optional args)
+  (transient-define-suffix khoj--update-command (&optional args)
-  "Call khoj API to update index of specified content type."
+    "Call khoj API to update index of specified content type."
-  (interactive (list (transient-args transient-current-command)))
+    (interactive (list (transient-args transient-current-command)))
-  (let* ((force-update (if (member "--force-update" args) "true" "false"))
+    (let* ((force-update (if (member "--force-update" args) "true" "false"))
-         ;; set content type to: specified > last used > based on current buffer > default type
+           ;; set content type to: specified > last used > based on current buffer > default type
-         (content-type (or (transient-arg-value "--content-type=" args) (khoj--buffer-name-to-content-type (buffer-name))))
+           (content-type (or (transient-arg-value "--content-type=" args) (khoj--buffer-name-to-content-type (buffer-name))))
-         (type-query (if (equal content-type "all") "" (format "t=%s" content-type)))
+           (url-request-method "GET"))
-         (update-url (format "%s/api/update?%s&force=%s&client=emacs" khoj-server-url type-query force-update))
+      (progn
-         (url-request-method "GET"))
+        (setq khoj--content-type content-type)
-    (progn
+        (khoj--server-index-files force-update content-type))))
      (setq khoj--content-type content-type)
      (url-retrieve update-url (lambda (_) (message "khoj.el: %s index %supdated!" content-type (if (member "--force-update" args) "force " "")))))))
-(transient-define-suffix khoj--chat-command (&optional _)
+  (transient-define-suffix khoj--chat-command (&optional _)
-  "Command to Chat with Khoj."
+    "Command to Chat with Khoj."
-  (interactive (list (transient-args transient-current-command)))
+    (interactive (list (transient-args transient-current-command)))
-  (khoj--chat))
+    (khoj--chat))
-(transient-define-prefix khoj--menu ()
+  (transient-define-prefix khoj--menu ()
-  "Create Khoj Menu to Configure and Execute Commands."
+    "Create Khoj Menu to Configure and Execute Commands."
-  [["Configure Search"
+    [["Configure Search"
-    ("n" "Results Count" "--results-count=" :init-value (lambda (obj) (oset obj value (format "%s" khoj-results-count))))
+      ("n" "Results Count" "--results-count=" :init-value (lambda (obj) (oset obj value (format "%s" khoj-results-count))))
-    ("t" "Content Type" khoj--content-type-switch)]
+      ("t" "Content Type" khoj--content-type-switch)]
-   ["Configure Update"
+     ["Configure Update"
-    ("-f" "Force Update" "--force-update")]]
+      ("-f" "Force Update" "--force-update")]]
-  [["Act"
+    [["Act"
-    ("c" "Chat" khoj--chat-command)
+      ("c" "Chat" khoj--chat-command)
-    ("s" "Search" khoj--search-command)
+      ("s" "Search" khoj--search-command)
-    ("f" "Find Similar" khoj--find-similar-command)
+      ("f" "Find Similar" khoj--find-similar-command)
-    ("u" "Update" khoj--update-command)
+      ("u" "Update" khoj--update-command)
-    ("q" "Quit" transient-quit-one)]])
+      ("q" "Quit" transient-quit-one)]])
  ;; Show the Khoj Transient menu
  (khoj--menu))
 ;; ----------
@ -1086,7 +1151,7 @@ Paragraph only starts at first text after blank line."
  (interactive)
  (when khoj-auto-setup
    (khoj-setup t))
-  (khoj--menu))
+  (khoj--setup-and-show-menu))
 (provide 'khoj)
--- a/src/interface/emacs/tests/khoj-tests.el
+++ b/src/interface/emacs/tests/khoj-tests.el
@ -206,6 +206,64 @@ Rule everything\n")
      "Rule everything"))
    ))
 ;; -------------------------------------
 ;; Test Helpers to Index Content
 ;; -------------------------------------
 (ert-deftest khoj-tests--render-files-to-add-request-body ()
  "Test files are formatted into a multi-part http request body"
  (let ((upgrade-file (make-temp-file "upgrade" nil ".org" "# Become God\n## Upgrade\n\nPenance to Immortality\n\n"))
        (act-file (make-temp-file "act" nil ".org" "## Act\n\nRule everything\n\n")))
    (unwind-protect
        (progn
          (should
           (equal
            (khoj--render-files-as-request-body (list upgrade-file act-file) '() "khoj")
            (format
            "\n--khoj\r\n\
 Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
 Content-Type: text/org\r\n\r\n\
 # Become God\n\
 ## Upgrade\n\n\
 Penance to Immortality\n\n\r
 --khoj\r\n\
 Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
 Content-Type: text/org\r\n\r\n\
 ## Act\n\n\
 Rule everything\n\n\r\n\
 --khoj--\r\n" upgrade-file act-file))))
      (delete-file upgrade-file)
      (delete-file act-file))))
 (ert-deftest khoj-tests--render-files-to-add-delete-in-request-body ()
  "Test files are formatted into a multi-part http request body"
  (let ((upgrade-file (make-temp-file "upgrade" nil ".org" "# Become God\n## Upgrade\n\nPenance to Immortality\n\n"))
        (act-file (make-temp-file "act" nil ".org" "## Act\n\nRule everything\n\n")))
    (unwind-protect
        (progn
          (should
           (equal
            (khoj--render-files-as-request-body (list upgrade-file act-file) (list upgrade-file act-file "/tmp/deleted-file.org") "khoj")
            (format
            "\n--khoj\r\n\
 Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
 Content-Type: text/org\r\n\r\n\
 # Become God\n\
 ## Upgrade\n\n\
 Penance to Immortality\n\n\r
 --khoj\r\n\
 Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
 Content-Type: text/org\r\n\r\n\
 ## Act\n\n\
 Rule everything\n\n\r
 --khoj\r\n\
 Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
 Content-Type: text/org\r\n\r\n\
 \r
 --khoj--\r\n" upgrade-file act-file "/tmp/deleted-file.org"))))
      (delete-file upgrade-file)
      (delete-file act-file))))
 (provide 'khoj-tests)
--- a/src/interface/obsidian/manifest.json
+++ b/src/interface/obsidian/manifest.json
@ -1,7 +1,7 @@
 {
 	"id": "khoj",
 	"name": "Khoj",
-	"version": "0.12.3",
+	"version": "0.13.0",
 	"minAppVersion": "0.15.0",
 	"description": "An Open-Source AI Personal Assistant for your Digital Brain",
 	"author": "Khoj Inc.",
--- a/src/interface/obsidian/package.json
+++ b/src/interface/obsidian/package.json
@ -1,7 +1,9 @@
 {
    "name": "Khoj",
-    "version": "0.12.3",
+    "version": "0.13.0",
-    "description": "An AI Personal Assistant for your Digital Brain",
+    "description": "An AI copilot for your Second Brain",
    "author": "Debanjum Singh Solanky, Saba Imran <team@khoj.dev>",
    "license": "GPL-3.0-or-later",
    "main": "src/main.js",
    "scripts": {
        "dev": "node esbuild.config.mjs",
@ -14,8 +16,6 @@
        "AI",
        "assistant"
    ],
    "author": "Debanjum Singh Solanky",
    "license": "GPL-3.0-or-later",
    "devDependencies": {
        "@types/node": "^16.11.6",
        "@typescript-eslint/eslint-plugin": "5.29.0",
--- a/src/interface/obsidian/src/main.ts
+++ b/src/interface/obsidian/src/main.ts
@ -1,12 +1,13 @@
-import { Notice, Plugin } from 'obsidian';
+import { Notice, Plugin, TFile } from 'obsidian';
 import { KhojSetting, KhojSettingTab, DEFAULT_SETTINGS } from 'src/settings'
 import { KhojSearchModal } from 'src/search_modal'
 import { KhojChatModal } from 'src/chat_modal'
-import { configureKhojBackend } from './utils';
+import { configureKhojBackend, updateContentIndex } from './utils';
 export default class Khoj extends Plugin {
    settings: KhojSetting;
    indexingTimer: NodeJS.Timeout;
    async onload() {
        await this.loadSettings();
@ -54,6 +55,15 @@ export default class Khoj extends Plugin {
        // Add a settings tab so the user can configure khoj
        this.addSettingTab(new KhojSettingTab(this.app, this));
        // Add scheduled job to update index every 60 minutes
        this.indexingTimer = setInterval(async () => {
            if (this.settings.autoConfigure) {
                this.settings.lastSyncedFiles = await updateContentIndex(
                    this.app.vault, this.settings, this.settings.lastSyncedFiles
                );
            }
        }, 60 * 60 * 1000);
    }
    async loadSettings() {
@ -72,4 +82,12 @@ export default class Khoj extends Plugin {
        }
        this.saveData(this.settings);
    }
    async onunload() {
        // Remove scheduled job to update index at regular cadence
        if (this.indexingTimer)
            clearInterval(this.indexingTimer);
        this.unload();
    }
 }
--- a/src/interface/obsidian/src/settings.ts
+++ b/src/interface/obsidian/src/settings.ts
@ -1,5 +1,6 @@
-import { App, Notice, PluginSettingTab, request, Setting } from 'obsidian';
+import { App, Notice, PluginSettingTab, Setting, TFile } from 'obsidian';
 import Khoj from 'src/main';
 import { updateContentIndex } from './utils';
 export interface KhojSetting {
    enableOfflineChat: boolean;
@ -8,6 +9,7 @@ export interface KhojSetting {
    khojUrl: string;
    connectedToBackend: boolean;
    autoConfigure: boolean;
    lastSyncedFiles: TFile[];
 }
 export const DEFAULT_SETTINGS: KhojSetting = {
@ -17,6 +19,7 @@ export const DEFAULT_SETTINGS: KhojSetting = {
    connectedToBackend: false,
    autoConfigure: true,
    openaiApiKey: '',
    lastSyncedFiles: []
 }
 export class KhojSettingTab extends PluginSettingTab {
@ -118,8 +121,9 @@ export class KhojSettingTab extends PluginSettingTab {
                    }, 300);
                    this.plugin.registerInterval(progress_indicator);
-                    await request(`${this.plugin.settings.khojUrl}/api/update?t=markdown&force=true&client=obsidian`);
+                    this.plugin.settings.lastSyncedFiles = await updateContentIndex(
-                    await request(`${this.plugin.settings.khojUrl}/api/update?t=pdf&force=true&client=obsidian`);
+                        this.app.vault, this.plugin.settings, this.plugin.settings.lastSyncedFiles, true
                    );
                    new Notice('✅ Updated Khoj index.');
                    // Reset button once index is updated
--- a/src/interface/obsidian/src/utils.ts
+++ b/src/interface/obsidian/src/utils.ts
@ -1,4 +1,4 @@
-import { FileSystemAdapter, Notice, RequestUrlParam, request, Vault, Modal } from 'obsidian';
+import { FileSystemAdapter, Notice, RequestUrlParam, request, Vault, Modal, TFile } from 'obsidian';
 import { KhojSetting } from 'src/settings'
 export function getVaultAbsolutePath(vault: Vault): string {
@ -14,18 +14,85 @@ type OpenAIType = null | {
    "api-key": string;
 };
 type OfflineChatType = null | {
    "chat-model": string;
    "enable-offline-chat": boolean;
 };
 interface ProcessorData {
    conversation: {
      "conversation-logfile": string;
      openai: OpenAIType;
-      "enable-offline-chat": boolean;
+      "offline-chat": OfflineChatType;
      "tokenizer": null | string;
      "max-prompt-size": null | number;
    };
 }
 function fileExtensionToMimeType (extension: string): string {
    switch (extension) {
        case 'pdf':
            return 'application/pdf';
        case 'png':
            return 'image/png';
        case 'jpg':
        case 'jpeg':
            return 'image/jpeg';
        case 'md':
        case 'markdown':
            return 'text/markdown';
        case 'org':
            return 'text/org';
        default:
            return 'text/plain';
    }
 }
 export async function updateContentIndex(vault: Vault, setting: KhojSetting, lastSyncedFiles: TFile[], regenerate: boolean = false): Promise<TFile[]> {
    // Get all markdown, pdf files in the vault
    console.log(`Khoj: Updating Khoj content index...`)
    const files = vault.getFiles().filter(file => file.extension === 'md' || file.extension === 'pdf');
    const binaryFileTypes = ['pdf', 'png', 'jpg', 'jpeg']
    let countOfFilesToIndex = 0;
    let countOfFilesToDelete = 0;
    // Add all files to index as multipart form data
    const formData = new FormData();
    for (const file of files) {
        countOfFilesToIndex++;
        const encoding = binaryFileTypes.includes(file.extension) ? "binary" : "utf8";
        const mimeType = fileExtensionToMimeType(file.extension) + (encoding === "utf8" ? "; charset=UTF-8" : "");
        const fileContent = encoding == 'binary' ? await vault.readBinary(file) : await vault.read(file);
        formData.append('files', new Blob([fileContent], { type: mimeType }), file.path);
    }
    // Add any previously synced files to be deleted to multipart form data
    for (const lastSyncedFile of lastSyncedFiles) {
        if (!files.includes(lastSyncedFile)) {
            countOfFilesToDelete++;
            formData.append('files', new Blob([]), lastSyncedFile.path);
        }
    }
    // Call Khoj backend to update index with all markdown, pdf files
    const response = await fetch(`${setting.khojUrl}/api/v1/index/update?force=${regenerate}&client=obsidian`, {
        method: 'POST',
        headers: {
            'x-api-key': 'secret',
        },
        body: formData,
    });
    if (!response.ok) {
        new Notice(`❗️Failed to update Khoj content index. Ensure Khoj server connected or raise issue on Khoj Discord/Github\nError: ${response.statusText}`);
    } else {
        console.log(`✅ Refreshed Khoj content index. Updated: ${countOfFilesToIndex} files, Deleted: ${countOfFilesToDelete} files.`);
    }
    return files;
 }
 export async function configureKhojBackend(vault: Vault, setting: KhojSetting, notify: boolean = true) {
    let vaultPath = getVaultAbsolutePath(vault);
    let mdInVault = `${vaultPath}/**/*.md`;
    let pdfInVault = `${vaultPath}/**/*.pdf`;
    let khojConfigUrl = `${setting.khojUrl}/api/config/data`;
    // Check if khoj backend is configured, note if cannot connect to backend
@ -43,124 +110,33 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
    if (!setting.connectedToBackend) return;
    // Set index name from the path of the current vault
    let indexName = vaultPath.replace(/\//g, '_').replace(/\\/g, '_').replace(/ /g, '_').replace(/:/g, '_');
    // Get default config fields from khoj backend
    let defaultConfig = await request(`${khojConfigUrl}/default`).then(response => JSON.parse(response));
    let khojDefaultMdIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["markdown"]["embeddings-file"]);
    let khojDefaultPdfIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["pdf"]["embeddings-file"]);
    let khojDefaultChatDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["processor"]["conversation"]["conversation-logfile"]);
-    let khojDefaultChatModelName = defaultConfig["processor"]["conversation"]["openai"]["chat-model"];
+    let khojDefaultOpenAIChatModelName = defaultConfig["processor"]["conversation"]["openai"]["chat-model"];
    let khojDefaultOfflineChatModelName = defaultConfig["processor"]["conversation"]["offline-chat"]["chat-model"];
    // Get current config if khoj backend configured, else get default config from khoj backend
    await request(khoj_already_configured ? khojConfigUrl : `${khojConfigUrl}/default`)
        .then(response => JSON.parse(response))
        .then(data => {
            khoj_already_configured = data["content-type"] != null;
            // If khoj backend not configured yet
            if (!khoj_already_configured) {
                // Create khoj content-type config with only markdown configured
                data["content-type"] = {
                    "markdown": {
                        "input-filter": [mdInVault],
                        "input-files": null,
                        "embeddings-file": `${khojDefaultMdIndexDirectory}/${indexName}.pt`,
                        "compressed-jsonl": `${khojDefaultMdIndexDirectory}/${indexName}.jsonl.gz`,
                    }
                }
                const hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf');
                if (hasPdfFiles) {
                    data["content-type"]["pdf"] = {
                        "input-filter": [pdfInVault],
                        "input-files": null,
                        "embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
                        "compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
                    }
                }
            }
            // Else if khoj config has no markdown content config
            else if (!data["content-type"]["markdown"]) {
                // Add markdown config to khoj content-type config
                // Set markdown config to index markdown files in configured obsidian vault
                data["content-type"]["markdown"] = {
                    "input-filter": [mdInVault],
                    "input-files": null,
                    "embeddings-file": `${khojDefaultMdIndexDirectory}/${indexName}.pt`,
                    "compressed-jsonl": `${khojDefaultMdIndexDirectory}/${indexName}.jsonl.gz`,
                }
            }
            // Else if khoj is not configured to index markdown files in configured obsidian vault
            else if (
                data["content-type"]["markdown"]["input-files"] != null ||
                data["content-type"]["markdown"]["input-filter"] == null ||
                data["content-type"]["markdown"]["input-filter"].length != 1 ||
                data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) {
                    // Update markdown config in khoj content-type config
                    // Set markdown config to only index markdown files in configured obsidian vault
                    let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
                    data["content-type"]["markdown"] = {
                        "input-filter": [mdInVault],
                        "input-files": null,
                        "embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`,
                        "compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`,
                    }
            }
            if (khoj_already_configured && !data["content-type"]["pdf"]) {
                const hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf');
                if (hasPdfFiles) {
                    data["content-type"]["pdf"] = {
                        "input-filter": [pdfInVault],
                        "input-files": null,
                        "embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
                        "compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
                    }
                } else {
                    data["content-type"]["pdf"] = null;
                }
            }
            // Else if khoj is not configured to index pdf files in configured obsidian vault
            else if (khoj_already_configured &&
                (
                    data["content-type"]["pdf"]["input-files"] != null ||
                    data["content-type"]["pdf"]["input-filter"] == null ||
                    data["content-type"]["pdf"]["input-filter"].length != 1 ||
                    data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) {
                let hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf');
                if (hasPdfFiles) {
                    // Update pdf config in khoj content-type config
                    // Set pdf config to only index pdf files in configured obsidian vault
                    let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);
                    data["content-type"]["pdf"] = {
                        "input-filter": [pdfInVault],
                        "input-files": null,
                        "embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`,
                        "compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`,
                    }
                } else {
                    data["content-type"]["pdf"] = null;
                }
            }
            let conversationLogFile = data?.["processor"]?.["conversation"]?.["conversation-logfile"] ?? `${khojDefaultChatDirectory}/conversation.json`;
            let processorData: ProcessorData = {
                "conversation": {
                    "conversation-logfile": conversationLogFile,
                    "openai": null,
-                    "enable-offline-chat": setting.enableOfflineChat,
+                    "offline-chat": {
                        "chat-model": khojDefaultOfflineChatModelName,
                        "enable-offline-chat": setting.enableOfflineChat,
                    },
                    "tokenizer": null,
                    "max-prompt-size": null,
                }
            }
            // If the Open AI API Key was configured in the plugin settings
            if (!!setting.openaiApiKey) {
-
+                let openAIChatModel = data?.["processor"]?.["conversation"]?.["openai"]?.["chat-model"] ?? khojDefaultOpenAIChatModelName;
                let openAIChatModel = data?.["processor"]?.["conversation"]?.["openai"]?.["chat-model"] ?? khojDefaultChatModelName;
                processorData = {
                    "conversation": {
                        "conversation-logfile": conversationLogFile,
@ -168,7 +144,12 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
                            "chat-model": openAIChatModel,
                            "api-key": setting.openaiApiKey,
                        },
-                        "enable-offline-chat": setting.enableOfflineChat,
+                        "offline-chat": {
                            "chat-model": khojDefaultOfflineChatModelName,
                            "enable-offline-chat": setting.enableOfflineChat,
                        },
                        "tokenizer": null,
                        "max-prompt-size": null,
                    },
                }
            }
@ -197,12 +178,8 @@ export async function updateKhojBackend(khojUrl: string, khojConfig: Object) {
        method: 'POST',
        contentType: 'application/json',
    };
    // Save khojConfig on khoj backend at khojConfigUrl
-    await request(requestContent)
+    request(requestContent);
        // Refresh khoj search index after updating config
        .then(_ => request(`${khojUrl}/api/update?t=markdown`))
        .then(_ => request(`${khojUrl}/api/update?t=pdf`));
 }
 function getIndexDirectoryFromBackendConfig(filepath: string) {
--- a/src/interface/obsidian/versions.json
+++ b/src/interface/obsidian/versions.json
@ -24,5 +24,6 @@
 	"0.12.0": "0.15.0",
 	"0.12.1": "0.15.0",
 	"0.12.2": "0.15.0",
-	"0.12.3": "0.15.0"
+	"0.12.3": "0.15.0",
 	"0.13.0": "0.15.0"
 }
--- a/src/khoj/configure.py
+++ b/src/khoj/configure.py
@ -28,7 +28,7 @@ from khoj.utils.config import (
 )
 from khoj.utils.helpers import resolve_absolute_path, merge_dicts
 from khoj.utils.fs_syncer import collect_files
-from khoj.utils.rawconfig import FullConfig, ProcessorConfig, ConversationProcessorConfig
+from khoj.utils.rawconfig import FullConfig, OfflineChatProcessorConfig, ProcessorConfig, ConversationProcessorConfig
 from khoj.routers.indexer import configure_content, load_content, configure_search
@ -136,7 +136,7 @@ def configure_routes(app):
    app.include_router(api, prefix="/api")
    app.include_router(api_beta, prefix="/api/beta")
-    app.include_router(indexer, prefix="/v1/indexer")
+    app.include_router(indexer, prefix="/api/v1/index")
    app.include_router(web_client)
    app.include_router(auth_router, prefix="/auth")
@ -156,7 +156,7 @@ if not state.demo:
            state.content_index = configure_content(
                state.content_index, state.config.content_type, all_files, state.search_models
            )
-            logger.info("📬 Content index updated via Scheduler")
+            logger.info("📪 Content index updated via Scheduler")
        except Exception as e:
            logger.error(f"🚨 Error updating content index via Scheduler: {e}", exc_info=True)
@ -207,9 +207,7 @@ def configure_conversation_processor(
            conversation_config=ConversationProcessorConfig(
                conversation_logfile=conversation_logfile,
                openai=(conversation_config.openai if (conversation_config is not None) else None),
-                enable_offline_chat=(
+                offline_chat=conversation_config.offline_chat if conversation_config else OfflineChatProcessorConfig(),
                    conversation_config.enable_offline_chat if (conversation_config is not None) else False
                ),
            )
        )
    else:
--- a/src/khoj/interface/web/config.html
+++ b/src/khoj/interface/web/config.html
@ -236,7 +236,7 @@
                    </h3>
                </div>
                <div class="card-description-row">
-                <p class="card-description">Setup chat using OpenAI</p>
+                <p class="card-description">Setup online chat using OpenAI</p>
                </div>
                <div class="card-action-row">
                    <a class="card-button" href="/config/processor/conversation/openai">
@ -261,21 +261,21 @@
                    <img class="card-icon" src="/static/assets/icons/chat.svg" alt="Chat">
                    <h3 class="card-title">
                        Offline Chat
-                        <img id="configured-icon-conversation-enable-offline-chat" class="configured-icon {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat and current_model_state.conversation_gpt4all %}enabled{% else %}disabled{% endif %}" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
+                        <img id="configured-icon-conversation-enable-offline-chat" class="configured-icon {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat and current_model_state.conversation_gpt4all %}enabled{% else %}disabled{% endif %}" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
-                        {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat and not current_model_state.conversation_gpt4all %}
+                        {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat and not current_model_state.conversation_gpt4all %}
                            <img id="misconfigured-icon-conversation-enable-offline-chat" class="configured-icon" src="/static/assets/icons/question-mark-icon.svg" alt="Not Configured" title="The model was not downloaded as expected.">
                        {% endif %}
                    </h3>
                </div>
                <div class="card-description-row">
-                <p class="card-description">Setup offline chat (Llama V2)</p>
+                <p class="card-description">Setup offline chat</p>
                </div>
-                <div id="clear-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat %}enabled{% else %}disabled{% endif %}">
+                <div id="clear-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat %}enabled{% else %}disabled{% endif %}">
                    <button class="card-button" onclick="toggleEnableLocalLLLM(false)">
                        Disable
                    </button>
                </div>
-                <div id="set-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat %}disabled{% else %}enabled{% endif %}">
+                <div id="set-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat %}disabled{% else %}enabled{% endif %}">
                    <button class="card-button happy" onclick="toggleEnableLocalLLLM(true)">
                        Enable
                    </button>
@ -346,7 +346,7 @@
            featuresHintText.classList.add("show");
        }
-        fetch('/api/config/data/processor/conversation/enable_offline_chat' + '?enable_offline_chat=' + enable, {
+        fetch('/api/config/data/processor/conversation/offline_chat' + '?enable_offline_chat=' + enable, {
            method: 'POST',
            headers: {
                'Content-Type': 'application/json',
--- a/src/khoj/interface/web/content_type_input.html
+++ b/src/khoj/interface/web/content_type_input.html
@ -34,7 +34,7 @@
                            <input type="text" id="input-filter" name="input-filter" placeholder="~/Documents/{{content_type}}">
                        {% else %}
                            {% for input_filter in current_config['input_filter'] %}
-                                <input type="text" id="input-filter" name="input-filter" placeholder="~/Documents/{{content_type}}" value="{{ input_filter.split('/*')[0] }}">
+                                <input type="text" id="input-filter" name="input-filter" placeholder="~/Documents/{{content_type}}" value="{{ input_filter }}">
                            {% endfor %}
                        {% endif %}
                    </td>
@ -106,17 +106,18 @@
    submit.addEventListener("click", function(event) {
        event.preventDefault();
-        let globFormat = "**/*."
+        let globFormat = "**/*"
        let suffixes = [];
        if ('{{content_type}}' == "markdown")
-            suffixes = ["md", "markdown"]
+            suffixes = [".md", ".markdown"]
        else if ('{{content_type}}' == "org")
-            suffixes = ["org"]
+            suffixes = [".org"]
        else if ('{{content_type}}' === "pdf")
-            suffixes = ["pdf"]
+            suffixes = [".pdf"]
        else if ('{{content_type}}' === "plaintext")
-            suffixes = ['*']
+            suffixes = ['.*']
        let globs = suffixes.map(x => `${globFormat}${x}`)
        var inputFileNodes = document.getElementsByName("input-files");
        var inputFiles = getValidInputNodes(inputFileNodes).map(node => node.value);
@ -124,10 +125,19 @@
        var inputFilter = [];
        var nodes = getValidInputNodes(inputFilterNodes);
        // A regex that checks for globs in the path.  If they exist,
        // we are going to just not add our own globing.  If they don't,
        // then we will assume globbing should be done.
        const glob_regex = /([*?\[\]])/;
        if (nodes.length > 0) {
            for (var i = 0; i < nodes.length; i++) {
-                for (var j = 0; j < suffixes.length; j++) {
+                for (var j = 0; j < globs.length; j++) {
-                    inputFilter.push(nodes[i].value + globFormat + suffixes[j]);
+                    if (glob_regex.test(nodes[i].value)) {
                        inputFilter.push(nodes[i].value);
                    } else {
                        inputFilter.push(nodes[i].value + globs[j]);
                    }
                }
            }
        }
--- a/src/khoj/migrations/migrate_offline_chat_schema.py
+++ b/src/khoj/migrations/migrate_offline_chat_schema.py
@ -0,0 +1,83 @@
 """
 Current format of khoj.yml
 ---
 app:
    ...
 content-type:
    ...
 processor:
  conversation:
    enable-offline-chat: false
    conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
    openai:
        ...
 search-type:
    ...
 New format of khoj.yml
 ---
 app:
    ...
 content-type:
    ...
 processor:
  conversation:
    offline-chat:
        enable-offline-chat: false
        chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
    tokenizer: null
    max_prompt_size: null
    conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
    openai:
        ...
 search-type:
    ...
 """
 import logging
 from packaging import version
 from khoj.utils.yaml import load_config_from_file, save_config_to_file
 logger = logging.getLogger(__name__)
 def migrate_offline_chat_schema(args):
    schema_version = "0.12.3"
    raw_config = load_config_from_file(args.config_file)
    previous_version = raw_config.get("version")
    if "processor" not in raw_config:
        return args
    if raw_config["processor"] is None:
        return args
    if "conversation" not in raw_config["processor"]:
        return args
    if previous_version is None or version.parse(previous_version) < version.parse("0.12.3"):
        logger.info(
            f"Upgrading config schema to {schema_version} from {previous_version} to make (offline) chat more configuration"
        )
        raw_config["version"] = schema_version
        # Create max-prompt-size field in conversation processor schema
        raw_config["processor"]["conversation"]["max-prompt-size"] = None
        raw_config["processor"]["conversation"]["tokenizer"] = None
        # Create offline chat schema based on existing enable_offline_chat field in khoj config schema
        offline_chat_model = (
            raw_config["processor"]["conversation"]
            .get("offline-chat", {})
            .get("chat-model", "llama-2-7b-chat.ggmlv3.q4_0.bin")
        )
        raw_config["processor"]["conversation"]["offline-chat"] = {
            "enable-offline-chat": raw_config["processor"]["conversation"].get("enable-offline-chat", False),
            "chat-model": offline_chat_model,
        }
        # Delete old enable-offline-chat field from conversation processor schema
        if "enable-offline-chat" in raw_config["processor"]["conversation"]:
            del raw_config["processor"]["conversation"]["enable-offline-chat"]
        save_config_to_file(raw_config, args.config_file)
    return args
--- a/src/khoj/processor/conversation/gpt4all/chat_model.py
+++ b/src/khoj/processor/conversation/gpt4all/chat_model.py
@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
 def extract_questions_offline(
    text: str,
-    model: str = "llama-2-7b-chat.ggmlv3.q4_K_S.bin",
+    model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin",
    loaded_model: Union[Any, None] = None,
    conversation_log={},
    use_history: bool = True,
@ -113,7 +113,7 @@ def filter_questions(questions: List[str]):
    ]
    filtered_questions = []
    for q in questions:
-        if not any([word in q.lower() for word in hint_words]):
+        if not any([word in q.lower() for word in hint_words]) and not is_none_or_empty(q):
            filtered_questions.append(q)
    return filtered_questions
@ -123,10 +123,12 @@ def converse_offline(
    references,
    user_query,
    conversation_log={},
-    model: str = "llama-2-7b-chat.ggmlv3.q4_K_S.bin",
+    model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin",
    loaded_model: Union[Any, None] = None,
    completion_func=None,
    conversation_command=ConversationCommand.Default,
    max_prompt_size=None,
    tokenizer_name=None,
 ) -> Union[ThreadedGenerator, Iterator[str]]:
    """
    Converse with user using Llama
@ -158,6 +160,8 @@ def converse_offline(
        prompts.system_prompt_message_llamav2,
        conversation_log,
        model_name=model,
        max_prompt_size=max_prompt_size,
        tokenizer_name=tokenizer_name,
    )
    g = ThreadedGenerator(references, completion_func=completion_func)
--- a/src/khoj/processor/conversation/gpt4all/model_metadata.py
+++ b/src/khoj/processor/conversation/gpt4all/model_metadata.py
@ -1,3 +0,0 @@
 model_name_to_url = {
    "llama-2-7b-chat.ggmlv3.q4_K_S.bin": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_K_S.bin"
 }
--- a/src/khoj/processor/conversation/gpt4all/utils.py
+++ b/src/khoj/processor/conversation/gpt4all/utils.py
@ -1,24 +1,8 @@
 import os
 import logging
 import requests
 import hashlib
 from tqdm import tqdm
 from khoj.processor.conversation.gpt4all import model_metadata
 logger = logging.getLogger(__name__)
 expected_checksum = {"llama-2-7b-chat.ggmlv3.q4_K_S.bin": "cfa87b15d92fb15a2d7c354b0098578b"}
 def get_md5_checksum(filename: str):
    hash_md5 = hashlib.md5()
    with open(filename, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()
 def download_model(model_name: str):
    try:
@ -27,57 +11,12 @@ def download_model(model_name: str):
        logger.info("There was an error importing GPT4All. Please run pip install gpt4all in order to install it.")
        raise e
-    url = model_metadata.model_name_to_url.get(model_name)
+    # Use GPU for Chat Model, if available
    model_path = os.path.expanduser(f"~/.cache/gpt4all/")
    if not url:
        logger.debug(f"Model {model_name} not found in model metadata. Skipping download.")
        return GPT4All(model_name=model_name, model_path=model_path)
    filename = os.path.expanduser(f"~/.cache/gpt4all/{model_name}")
    if os.path.exists(filename):
        # Check if the user is connected to the internet
        try:
            requests.get("https://www.google.com/", timeout=5)
        except:
            logger.debug("User is offline. Disabling allowed download flag")
            return GPT4All(model_name=model_name, model_path=model_path, allow_download=False)
        return GPT4All(model_name=model_name, model_path=model_path)
    # Download the model to a tmp file. Once the download is completed, move the tmp file to the actual file
    tmp_filename = filename + ".tmp"
    try:
-        os.makedirs(os.path.dirname(tmp_filename), exist_ok=True)
+        model = GPT4All(model_name=model_name, device="gpu")
-        logger.debug(f"Downloading model {model_name} from {url} to {filename}...")
+        logger.debug("Loaded chat model to GPU.")
-        with requests.get(url, stream=True) as r:
+    except ValueError:
-            r.raise_for_status()
+        model = GPT4All(model_name=model_name)
-            total_size = int(r.headers.get("content-length", 0))
+        logger.debug("Loaded chat model to CPU.")
            with open(tmp_filename, "wb") as f, tqdm(
                unit="B",  # unit string to be displayed.
                unit_scale=True,  # let tqdm to determine the scale in kilo, mega..etc.
                unit_divisor=1024,  # is used when unit_scale is true
                total=total_size,  # the total iteration.
                desc=model_name,  # prefix to be displayed on progress bar.
            ) as progress_bar:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
                    progress_bar.update(len(chunk))
-        # Verify the checksum
+    return model
        if expected_checksum.get(model_name) != get_md5_checksum(tmp_filename):
            logger.error(
                f"Checksum verification failed for {filename}. Removing the tmp file. Offline model will not be available."
            )
            os.remove(tmp_filename)
            raise ValueError(f"Checksum verification failed for downloading {model_name} from {url}.")
        # Move the tmp file to the actual file
        os.rename(tmp_filename, filename)
        logger.debug(f"Successfully downloaded model {model_name} from {url} to {filename}")
        return GPT4All(model_name)
    except Exception as e:
        logger.error(f"Failed to download model {model_name} from {url} to {filename}. Error: {e}", exc_info=True)
        # Remove the tmp file if it exists
        if os.path.exists(tmp_filename):
            os.remove(tmp_filename)
        return None
--- a/src/khoj/processor/conversation/openai/gpt.py
+++ b/src/khoj/processor/conversation/openai/gpt.py
@ -116,6 +116,8 @@ def converse(
    temperature: float = 0.2,
    completion_func=None,
    conversation_command=ConversationCommand.Default,
    max_prompt_size=None,
    tokenizer_name=None,
 ):
    """
    Converse with user using OpenAI's ChatGPT
@ -141,6 +143,8 @@ def converse(
        prompts.personality.format(),
        conversation_log,
        model,
        max_prompt_size,
        tokenizer_name,
    )
    truncated_messages = "\n".join({f"{message.content[:40]}..." for message in messages})
    logger.debug(f"Conversation Context for GPT: {truncated_messages}")
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@ -23,7 +23,7 @@ no_notes_found = PromptTemplate.from_template(
    """.strip()
 )
-system_prompt_message_llamav2 = f"""You are Khoj, a friendly, smart and helpful personal assistant.
+system_prompt_message_llamav2 = f"""You are Khoj, a smart, inquisitive and helpful personal assistant.
 Using your general knowledge and our past conversations as context, answer the following question.
 If you do not know the answer, say 'I don't know.'"""
@ -51,13 +51,13 @@ extract_questions_system_prompt_llamav2 = PromptTemplate.from_template(
 general_conversation_llamav2 = PromptTemplate.from_template(
    """
-<s>[INST]{query}[/INST]
+<s>[INST] {query} [/INST]
 """.strip()
 )
 chat_history_llamav2_from_user = PromptTemplate.from_template(
    """
-<s>[INST]{message}[/INST]
+<s>[INST] {message} [/INST]
 """.strip()
 )
@ -69,7 +69,7 @@ chat_history_llamav2_from_assistant = PromptTemplate.from_template(
 conversation_llamav2 = PromptTemplate.from_template(
    """
-<s>[INST]{query}[/INST]
+<s>[INST] {query} [/INST]
 """.strip()
 )
@ -91,7 +91,7 @@ Question: {query}
 notes_conversation_llamav2 = PromptTemplate.from_template(
    """
-Notes:
+User's Notes:
 {references}
 Question: {query}
 """.strip()
@ -134,19 +134,25 @@ Answer (in second person):"""
 extract_questions_llamav2_sample = PromptTemplate.from_template(
    """
-<s>[INST]<<SYS>>Current Date: {current_date}<</SYS>>[/INST]</s>
+<s>[INST] <<SYS>>Current Date: {current_date}<</SYS>> [/INST]</s>
-<s>[INST]How was my trip to Cambodia?[/INST][]</s>
+<s>[INST] How was my trip to Cambodia? [/INST]
-<s>[INST]Who did I visit the temple with on that trip?[/INST]Who did I visit the temple with in Cambodia?</s>
+How was my trip to Cambodia?</s>
-<s>[INST]How should I take care of my plants?[/INST]What kind of plants do I have? What issues do my plants have?</s>
+<s>[INST] Who did I visit the temple with on that trip? [/INST]
-<s>[INST]How many tennis balls fit in the back of a 2002 Honda Civic?[/INST]What is the size of a tennis ball? What is the trunk size of a 2002 Honda Civic?</s>
+Who did I visit the temple with in Cambodia?</s>
-<s>[INST]What did I do for Christmas last year?[/INST]What did I do for Christmas {last_year} dt>='{last_christmas_date}' dt<'{next_christmas_date}'</s>
+<s>[INST] How should I take care of my plants? [/INST]
-<s>[INST]How are you feeling today?[/INST]</s>
+What kind of plants do I have? What issues do my plants have?</s>
-<s>[INST]Is Alice older than Bob?[/INST]When was Alice born? What is Bob's age?</s>
+<s>[INST] How many tennis balls fit in the back of a 2002 Honda Civic? [/INST]
-<s>[INST]<<SYS>>
+What is the size of a tennis ball? What is the trunk size of a 2002 Honda Civic?</s>
 <s>[INST] What did I do for Christmas last year? [/INST]
 What did I do for Christmas {last_year} dt>='{last_christmas_date}' dt<'{next_christmas_date}'</s>
 <s>[INST] How are you feeling today? [/INST]</s>
 <s>[INST] Is Alice older than Bob? [/INST]
 When was Alice born? What is Bob's age?</s>
 <s>[INST] <<SYS>>
 Use these notes from the user's previous conversations to provide a response:
 {chat_history}
-<</SYS>>[/INST]</s>
+<</SYS>> [/INST]</s>
-<s>[INST]{query}[/INST]
+<s>[INST] {query} [/INST]
 """
 )
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@ -3,24 +3,27 @@ import logging
 from time import perf_counter
 import json
 from datetime import datetime
 import queue
 import tiktoken
 # External packages
 from langchain.schema import ChatMessage
-from transformers import LlamaTokenizerFast
+from transformers import AutoTokenizer
 # Internal Packages
 import queue
 from khoj.utils.helpers import merge_dicts
 logger = logging.getLogger(__name__)
-max_prompt_size = {
+model_to_prompt_size = {
    "gpt-3.5-turbo": 4096,
    "gpt-4": 8192,
-    "llama-2-7b-chat.ggmlv3.q4_K_S.bin": 1548,
+    "llama-2-7b-chat.ggmlv3.q4_0.bin": 1548,
    "gpt-3.5-turbo-16k": 15000,
 }
-tokenizer = {"llama-2-7b-chat.ggmlv3.q4_K_S.bin": "hf-internal-testing/llama-tokenizer"}
+model_to_tokenizer = {
    "llama-2-7b-chat.ggmlv3.q4_0.bin": "hf-internal-testing/llama-tokenizer",
 }
 class ThreadedGenerator:
@ -82,9 +85,26 @@ def message_to_log(
 def generate_chatml_messages_with_context(
-    user_message, system_message, conversation_log={}, model_name="gpt-3.5-turbo", lookback_turns=2
+    user_message,
    system_message,
    conversation_log={},
    model_name="gpt-3.5-turbo",
    max_prompt_size=None,
    tokenizer_name=None,
 ):
    """Generate messages for ChatGPT with context from previous conversation"""
    # Set max prompt size from user config, pre-configured for model or to default prompt size
    try:
        max_prompt_size = max_prompt_size or model_to_prompt_size[model_name]
    except:
        max_prompt_size = 2000
        logger.warning(
            f"Fallback to default prompt size: {max_prompt_size}.\nConfigure max_prompt_size for unsupported model: {model_name} in Khoj settings to longer context window."
        )
    # Scale lookback turns proportional to max prompt size supported by model
    lookback_turns = max_prompt_size // 750
    # Extract Chat History for Context
    chat_logs = []
    for chat in conversation_log.get("chat", []):
@ -105,19 +125,28 @@ def generate_chatml_messages_with_context(
    messages = user_chatml_message + rest_backnforths + system_chatml_message
    # Truncate oldest messages from conversation history until under max supported prompt size by model
-    messages = truncate_messages(messages, max_prompt_size[model_name], model_name)
+    messages = truncate_messages(messages, max_prompt_size, model_name, tokenizer_name)
    # Return message in chronological order
    return messages[::-1]
-def truncate_messages(messages: list[ChatMessage], max_prompt_size, model_name) -> list[ChatMessage]:
+def truncate_messages(
    messages: list[ChatMessage], max_prompt_size, model_name: str, tokenizer_name=None
 ) -> list[ChatMessage]:
    """Truncate messages to fit within max prompt size supported by model"""
-    if "llama" in model_name:
+    try:
-        encoder = LlamaTokenizerFast.from_pretrained(tokenizer[model_name])
+        if model_name.startswith("gpt-"):
-    else:
+            encoder = tiktoken.encoding_for_model(model_name)
-        encoder = tiktoken.encoding_for_model(model_name)
+        else:
            encoder = AutoTokenizer.from_pretrained(tokenizer_name or model_to_tokenizer[model_name])
    except:
        default_tokenizer = "hf-internal-testing/llama-tokenizer"
        encoder = AutoTokenizer.from_pretrained(default_tokenizer)
        logger.warning(
            f"Fallback to default chat model tokenizer: {default_tokenizer}.\nConfigure tokenizer for unsupported model: {model_name} in Khoj settings to improve context stuffing."
        )
    system_message = messages.pop()
    system_message_tokens = len(encoder.encode(system_message.content))
--- a/src/khoj/processor/pdf/pdf_to_jsonl.py
+++ b/src/khoj/processor/pdf/pdf_to_jsonl.py
@ -65,7 +65,7 @@ class PdfToJsonl(TextToJsonl):
                # Write the PDF file to a temporary file, as it is stored in byte format in the pdf_file object and the PyPDFLoader expects a file path
                tmp_file = f"tmp_pdf_file.pdf"
                with open(f"{tmp_file}", "wb") as f:
-                    bytes = base64.b64decode(pdf_files[pdf_file])
+                    bytes = pdf_files[pdf_file]
                    f.write(bytes)
                loader = PyMuPDFLoader(f"{tmp_file}")
                pdf_entries_per_file = [page.page_content for page in loader.load()]
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@ -30,6 +30,7 @@ from khoj.utils.rawconfig import (
    GithubContentConfig,
    NotionContentConfig,
    ConversationProcessorConfig,
    OfflineChatProcessorConfig,
 )
 from khoj.utils.helpers import resolve_absolute_path
 from khoj.utils.state import SearchType
@ -185,6 +186,10 @@ if not state.demo:
            state.content_index.markdown = None
        elif content_type == "org":
            state.content_index.org = None
        elif content_type == "plaintext":
            state.content_index.plaintext = None
        else:
            logger.warning(f"Request to delete unknown content type: {content_type} via API")
        try:
            save_config_to_file_updated_state()
@ -284,10 +289,11 @@ if not state.demo:
        except Exception as e:
            return {"status": "error", "message": str(e)}
-    @api.post("/config/data/processor/conversation/enable_offline_chat", status_code=200)
+    @api.post("/config/data/processor/conversation/offline_chat", status_code=200)
    async def set_processor_enable_offline_chat_config_data(
        request: Request,
        enable_offline_chat: bool,
        offline_chat_model: Optional[str] = None,
        client: Optional[str] = None,
    ):
        _initialize_config()
@ -301,7 +307,12 @@ if not state.demo:
            state.config.processor = ProcessorConfig(conversation=ConversationProcessorConfig(conversation_logfile=conversation_logfile))  # type: ignore
        assert state.config.processor.conversation is not None
-        state.config.processor.conversation.enable_offline_chat = enable_offline_chat
+        if state.config.processor.conversation.offline_chat is None:
            state.config.processor.conversation.offline_chat = OfflineChatProcessorConfig()
        state.config.processor.conversation.offline_chat.enable_offline_chat = enable_offline_chat
        if offline_chat_model is not None:
            state.config.processor.conversation.offline_chat.chat_model = offline_chat_model
        state.processor_config = configure_processor(state.config.processor, state.processor_config)
        update_telemetry_state(
@ -322,7 +333,7 @@ if not state.demo:
 # Create Routes
@api.get("/config/data/default")
 def get_default_config_data():
-    return constants.default_config
+    return constants.empty_config
@api.get("/config/types", response_model=List[str])
@ -387,7 +398,7 @@ async def search(
    # Encode query with filter terms removed
    defiltered_query = user_query
    for filter in [DateFilter(), WordFilter(), FileFilter()]:
-        defiltered_query = filter.defilter(user_query)
+        defiltered_query = filter.defilter(defiltered_query)
    encoded_asymmetric_query = None
    if t == SearchType.All or t != SearchType.Image:
@ -622,7 +633,7 @@ def update(
        if state.processor_config:
            components.append("Conversation processor")
        components_msg = ", ".join(components)
-        logger.info(f"📬 {components_msg} updated via API")
+        logger.info(f"📪 {components_msg} updated via API")
    update_telemetry_state(
        request=request,
@ -702,12 +713,18 @@ async def chat(
 ) -> Response:
    perform_chat_checks()
    conversation_command = get_conversation_command(query=q, any_references=True)
    q = q.replace(f"/{conversation_command.value}", "").strip()
    compiled_references, inferred_queries, defiltered_query = await extract_references_and_questions(
        request, q, (n or 5), conversation_command
    )
-    conversation_command = get_conversation_command(query=q, any_references=not is_none_or_empty(compiled_references))
+
    if conversation_command == ConversationCommand.Default and is_none_or_empty(compiled_references):
        conversation_command = ConversationCommand.General
    if conversation_command == ConversationCommand.Help:
-        model_type = "offline" if state.processor_config.conversation.enable_offline_chat else "openai"
+        model_type = "offline" if state.processor_config.conversation.offline_chat.enable_offline_chat else "openai"
        formatted_help = help_message.format(model=model_type, version=state.khoj_version)
        return StreamingResponse(iter([formatted_help]), media_type="text/event-stream", status_code=200)
@ -768,23 +785,21 @@ async def extract_references_and_questions(
        logger.warning(
            "No content index loaded, so cannot extract references from knowledge base. Please configure your data sources and update the index to chat with your notes."
        )
-        return compiled_references, inferred_queries
+        return compiled_references, inferred_queries, q
    if conversation_type == ConversationCommand.General:
        return compiled_references, inferred_queries, q
    # Extract filter terms from user message
    defiltered_query = q
    filter_terms = []
    for filter in [DateFilter(), WordFilter(), FileFilter()]:
-        filter_terms += filter.get_filter_terms(q)
+        defiltered_query = filter.defilter(defiltered_query)
-        defiltered_query = filter.defilter(q)
+    filters_in_query = q.replace(defiltered_query, "").strip()
    filters_in_query = " ".join(filter_terms)
    # Infer search queries from user message
    with timer("Extracting search queries took", logger):
        # If we've reached here, either the user has enabled offline chat or the openai model is enabled.
-        if state.processor_config.conversation.enable_offline_chat:
+        if state.processor_config.conversation.offline_chat.enable_offline_chat:
            loaded_model = state.processor_config.conversation.gpt4all_model.loaded_model
            inferred_queries = extract_questions_offline(
                defiltered_query, loaded_model=loaded_model, conversation_log=meta_log, should_extract_questions=False
@ -800,7 +815,7 @@ async def extract_references_and_questions(
    with timer("Searching knowledge base took", logger):
        result_list = []
        for query in inferred_queries:
-            n_items = min(n, 3) if state.processor_config.conversation.enable_offline_chat else n
+            n_items = min(n, 3) if state.processor_config.conversation.offline_chat.enable_offline_chat else n
            result_list.extend(
                await search(
                    f"{query} {filters_in_query}",
--- a/src/khoj/routers/helpers.py
+++ b/src/khoj/routers/helpers.py
@ -113,7 +113,7 @@ def generate_chat_response(
            meta_log=meta_log,
        )
-        if state.processor_config.conversation.enable_offline_chat:
+        if state.processor_config.conversation.offline_chat.enable_offline_chat:
            loaded_model = state.processor_config.conversation.gpt4all_model.loaded_model
            chat_response = converse_offline(
                references=compiled_references,
@ -122,6 +122,9 @@ def generate_chat_response(
                conversation_log=meta_log,
                completion_func=partial_completion,
                conversation_command=conversation_command,
                model=state.processor_config.conversation.offline_chat.chat_model,
                max_prompt_size=state.processor_config.conversation.max_prompt_size,
                tokenizer_name=state.processor_config.conversation.tokenizer,
            )
        elif state.processor_config.conversation.openai_model:
@ -135,6 +138,8 @@ def generate_chat_response(
                api_key=api_key,
                completion_func=partial_completion,
                conversation_command=conversation_command,
                max_prompt_size=state.processor_config.conversation.max_prompt_size,
                tokenizer_name=state.processor_config.conversation.tokenizer,
            )
    except Exception as e:
--- a/src/khoj/routers/indexer.py
+++ b/src/khoj/routers/indexer.py
@ -1,11 +1,11 @@
 # Standard Packages
 import logging
 import sys
 from typing import Optional, Union, Dict
 # External Packages
-from fastapi import APIRouter, HTTPException, Header, Request, Body, Response
+from fastapi import APIRouter, HTTPException, Header, Request, Response, UploadFile
 from pydantic import BaseModel
 from khoj.routers.helpers import update_telemetry_state
 # Internal Packages
 from khoj.utils import state, constants
@ -56,42 +56,30 @@ class IndexerInput(BaseModel):
    plaintext: Optional[dict[str, str]] = None
-@indexer.post("/batch")
+@indexer.post("/update")
-async def index_batch(
+async def update(
    request: Request,
    files: list[UploadFile],
    x_api_key: str = Header(None),
-    regenerate: bool = False,
+    force: bool = False,
-    search_type: Optional[Union[state.SearchType, str]] = None,
+    t: Optional[Union[state.SearchType, str]] = None,
    client: Optional[str] = None,
    user_agent: Optional[str] = Header(None),
    referer: Optional[str] = Header(None),
    host: Optional[str] = Header(None),
 ):
    if x_api_key != "secret":
        raise HTTPException(status_code=401, detail="Invalid API Key")
    state.config_lock.acquire()
    try:
-        logger.info(f"Received batch indexing request")
+        logger.info(f"📬 Updating content index via API call by {client} client")
        index_batch_request_acc = b""
        async for chunk in request.stream():
            index_batch_request_acc += chunk
        data_bytes = sys.getsizeof(index_batch_request_acc)
        unit = "KB"
        data_size = data_bytes / 1024
        if data_size > 1000:
            unit = "MB"
            data_size = data_size / 1024
        if data_size > 1000:
            unit = "GB"
            data_size = data_size / 1024
        data_size_metric = f"{data_size:.2f} {unit}"
        logger.info(f"Received {data_size_metric} of data")
        index_batch_request = IndexBatchRequest.parse_raw(index_batch_request_acc)
        logger.info(f"Received {len(index_batch_request.files)} files")
        org_files: Dict[str, str] = {}
        markdown_files: Dict[str, str] = {}
        pdf_files: Dict[str, str] = {}
        plaintext_files: Dict[str, str] = {}
-        for file in index_batch_request.files:
+        for file in files:
-            file_type = get_file_type(file.path)
+            file_type, encoding = get_file_type(file.content_type)
            dict_to_update = None
            if file_type == "org":
                dict_to_update = org_files
@ -103,9 +91,11 @@ async def index_batch(
                dict_to_update = plaintext_files
            if dict_to_update is not None:
-                dict_to_update[file.path] = file.content
+                dict_to_update[file.filename] = (
                    file.file.read().decode("utf-8") if encoding == "utf-8" else file.file.read()
                )
            else:
-                logger.info(f"Skipping unsupported streamed file: {file.path}")
+                logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file.filename}")
        indexer_input = IndexerInput(
            org=org_files,
@ -115,7 +105,7 @@ async def index_batch(
        )
        if state.config == None:
-            logger.info("First run, initializing state.")
+            logger.info("📬 Initializing content index on first run.")
            default_full_config = FullConfig(
                content_type=None,
                search_type=SearchConfig.parse_obj(constants.default_config["search-type"]),
@ -142,15 +132,30 @@ async def index_batch(
            state.config.content_type,
            indexer_input.dict(),
            state.search_models,
-            regenerate=regenerate,
+            regenerate=force,
-            t=search_type,
+            t=t,
            full_corpus=False,
        )
    except Exception as e:
-        logger.error(f"Failed to process batch indexing request: {e}", exc_info=True)
+        logger.error(
            f"🚨 Failed to {force} update {t} content index triggered via API call by {client} client: {e}",
            exc_info=True,
        )
    finally:
        state.config_lock.release()
    update_telemetry_state(
        request=request,
        telemetry_type="api",
        api="index/update",
        client=client,
        user_agent=user_agent,
        referer=referer,
        host=host,
    )
    logger.info(f"📪 Content index updated via API call by {client} client")
    return Response(content="OK", status_code=200)
--- a/src/khoj/utils/cli.py
+++ b/src/khoj/utils/cli.py
@ -9,6 +9,7 @@ from khoj.utils.yaml import parse_config_from_file
 from khoj.migrations.migrate_version import migrate_config_to_version
 from khoj.migrations.migrate_processor_config_openai import migrate_processor_conversation_schema
 from khoj.migrations.migrate_offline_model import migrate_offline_model
 from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
 def cli(args=None):
@ -55,7 +56,12 @@ def cli(args=None):
 def run_migrations(args):
-    migrations = [migrate_config_to_version, migrate_processor_conversation_schema, migrate_offline_model]
+    migrations = [
        migrate_config_to_version,
        migrate_processor_conversation_schema,
        migrate_offline_model,
        migrate_offline_chat_schema,
    ]
    for migration in migrations:
        args = migration(args)
    return args
--- a/src/khoj/utils/config.py
+++ b/src/khoj/utils/config.py
@ -12,6 +12,8 @@ from khoj.processor.conversation.gpt4all.utils import download_model
 # External Packages
 import torch
 from khoj.utils.rawconfig import OfflineChatProcessorConfig
 logger = logging.getLogger(__name__)
 # Internal Packages
@ -84,7 +86,6 @@ class SearchModels:
@dataclass
 class GPT4AllProcessorConfig:
    chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_K_S.bin"
    loaded_model: Union[Any, None] = None
@ -95,18 +96,20 @@ class ConversationProcessorConfigModel:
    ):
        self.openai_model = conversation_config.openai
        self.gpt4all_model = GPT4AllProcessorConfig()
-        self.enable_offline_chat = conversation_config.enable_offline_chat
+        self.offline_chat = conversation_config.offline_chat or OfflineChatProcessorConfig()
        self.max_prompt_size = conversation_config.max_prompt_size
        self.tokenizer = conversation_config.tokenizer
        self.conversation_logfile = Path(conversation_config.conversation_logfile)
        self.chat_session: List[str] = []
        self.meta_log: dict = {}
-        if self.enable_offline_chat:
+        if self.offline_chat.enable_offline_chat:
            try:
-                self.gpt4all_model.loaded_model = download_model(self.gpt4all_model.chat_model)
+                self.gpt4all_model.loaded_model = download_model(self.offline_chat.chat_model)
-            except ValueError as e:
+            except Exception as e:
                self.offline_chat.enable_offline_chat = False
                self.gpt4all_model.loaded_model = None
                logger.error(f"Error while loading offline chat model: {e}", exc_info=True)
                self.enable_offline_chat = False
        else:
            self.gpt4all_model.loaded_model = None
--- a/src/khoj/utils/constants.py
+++ b/src/khoj/utils/constants.py
@ -6,6 +6,64 @@ empty_escape_sequences = "\n|\r|\t| "
 app_env_filepath = "~/.khoj/env"
 telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
 empty_config = {
    "content-type": {
        "org": {
            "input-files": None,
            "input-filter": None,
            "compressed-jsonl": "~/.khoj/content/org/org.jsonl.gz",
            "embeddings-file": "~/.khoj/content/org/org_embeddings.pt",
            "index-heading-entries": False,
        },
        "markdown": {
            "input-files": None,
            "input-filter": None,
            "compressed-jsonl": "~/.khoj/content/markdown/markdown.jsonl.gz",
            "embeddings-file": "~/.khoj/content/markdown/markdown_embeddings.pt",
        },
        "pdf": {
            "input-files": None,
            "input-filter": None,
            "compressed-jsonl": "~/.khoj/content/pdf/pdf.jsonl.gz",
            "embeddings-file": "~/.khoj/content/pdf/pdf_embeddings.pt",
        },
        "plaintext": {
            "input-files": None,
            "input-filter": None,
            "compressed-jsonl": "~/.khoj/content/plaintext/plaintext.jsonl.gz",
            "embeddings-file": "~/.khoj/content/plaintext/plaintext_embeddings.pt",
        },
    },
    "search-type": {
        "symmetric": {
            "encoder": "sentence-transformers/all-MiniLM-L6-v2",
            "cross-encoder": "cross-encoder/ms-marco-MiniLM-L-6-v2",
            "model_directory": "~/.khoj/search/symmetric/",
        },
        "asymmetric": {
            "encoder": "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
            "cross-encoder": "cross-encoder/ms-marco-MiniLM-L-6-v2",
            "model_directory": "~/.khoj/search/asymmetric/",
        },
        "image": {"encoder": "sentence-transformers/clip-ViT-B-32", "model_directory": "~/.khoj/search/image/"},
    },
    "processor": {
        "conversation": {
            "openai": {
                "api-key": None,
                "chat-model": "gpt-3.5-turbo",
            },
            "offline-chat": {
                "enable-offline-chat": False,
                "chat-model": "llama-2-7b-chat.ggmlv3.q4_0.bin",
            },
            "tokenizer": None,
            "max-prompt-size": None,
            "conversation-logfile": "~/.khoj/processor/conversation/conversation_logs.json",
        }
    },
 }
 # default app config to use
 default_config = {
    "content-type": {
@ -72,7 +130,12 @@ default_config = {
                "api-key": None,
                "chat-model": "gpt-3.5-turbo",
            },
-            "enable-offline-chat": False,
+            "offline-chat": {
                "enable-offline-chat": False,
                "chat-model": "llama-2-7b-chat.ggmlv3.q4_0.bin",
            },
            "tokenizer": None,
            "max-prompt-size": None,
            "conversation-logfile": "~/.khoj/processor/conversation/conversation_logs.json",
        }
    },
--- a/src/khoj/utils/fs_syncer.py
+++ b/src/khoj/utils/fs_syncer.py
@ -1,6 +1,6 @@
 import logging
 import glob
-import base64
+import os
 from typing import Optional
 from bs4 import BeautifulSoup
@ -39,13 +39,13 @@ def get_plaintext_files(config: TextContentConfig) -> dict[str, str]:
        return soup.get_text(strip=True, separator="\n")
    # Extract required fields from config
-    input_files, input_filter = (
+    input_files, input_filters = (
        config.input_files,
        config.input_filter,
    )
    # Input Validation
-    if is_none_or_empty(input_files) and is_none_or_empty(input_filter):
+    if is_none_or_empty(input_files) and is_none_or_empty(input_filters):
        logger.debug("At least one of input-files or input-file-filter is required to be specified")
        return {}
@ -53,11 +53,12 @@ def get_plaintext_files(config: TextContentConfig) -> dict[str, str]:
    absolute_plaintext_files, filtered_plaintext_files = set(), set()
    if input_files:
        absolute_plaintext_files = {get_absolute_path(jsonl_file) for jsonl_file in input_files}
-    if input_filter:
+    if input_filters:
        filtered_plaintext_files = {
            filtered_file
-            for jsonl_file_filter in input_filter
+            for plaintext_file_filter in input_filters
-            for filtered_file in glob.glob(get_absolute_path(jsonl_file_filter), recursive=True)
+            for filtered_file in glob.glob(get_absolute_path(plaintext_file_filter), recursive=True)
            if os.path.isfile(filtered_file)
        }
    all_target_files = sorted(absolute_plaintext_files | filtered_plaintext_files)
@ -73,12 +74,12 @@ def get_plaintext_files(config: TextContentConfig) -> dict[str, str]:
    filename_to_content_map = {}
    for file in all_target_files:
-        with open(file, "r") as f:
+        with open(file, "r", encoding="utf8") as f:
            try:
                plaintext_content = f.read()
                if file.endswith(("html", "htm", "xml")):
                    plaintext_content = extract_html_content(plaintext_content)
-                filename_to_content_map[file] = f.read()
+                filename_to_content_map[file] = plaintext_content
            except Exception as e:
                logger.warning(f"Unable to read file: {file} as plaintext. Skipping file.")
                logger.warning(e, exc_info=True)
@ -88,13 +89,13 @@ def get_plaintext_files(config: TextContentConfig) -> dict[str, str]:
 def get_org_files(config: TextContentConfig):
    # Extract required fields from config
-    org_files, org_file_filter = (
+    org_files, org_file_filters = (
        config.input_files,
        config.input_filter,
    )
    # Input Validation
-    if is_none_or_empty(org_files) and is_none_or_empty(org_file_filter):
+    if is_none_or_empty(org_files) and is_none_or_empty(org_file_filters):
        logger.debug("At least one of org-files or org-file-filter is required to be specified")
        return {}
@ -102,11 +103,12 @@ def get_org_files(config: TextContentConfig):
    absolute_org_files, filtered_org_files = set(), set()
    if org_files:
        absolute_org_files = {get_absolute_path(org_file) for org_file in org_files}
-    if org_file_filter:
+    if org_file_filters:
        filtered_org_files = {
            filtered_file
-            for org_file_filter in org_file_filter
+            for org_file_filter in org_file_filters
            for filtered_file in glob.glob(get_absolute_path(org_file_filter), recursive=True)
            if os.path.isfile(filtered_file)
        }
    all_org_files = sorted(absolute_org_files | filtered_org_files)
@ -119,7 +121,7 @@ def get_org_files(config: TextContentConfig):
    filename_to_content_map = {}
    for file in all_org_files:
-        with open(file, "r") as f:
+        with open(file, "r", encoding="utf8") as f:
            try:
                filename_to_content_map[file] = f.read()
            except Exception as e:
@ -131,26 +133,27 @@ def get_org_files(config: TextContentConfig):
 def get_markdown_files(config: TextContentConfig):
    # Extract required fields from config
-    markdown_files, markdown_file_filter = (
+    markdown_files, markdown_file_filters = (
        config.input_files,
        config.input_filter,
    )
    # Input Validation
-    if is_none_or_empty(markdown_files) and is_none_or_empty(markdown_file_filter):
+    if is_none_or_empty(markdown_files) and is_none_or_empty(markdown_file_filters):
        logger.debug("At least one of markdown-files or markdown-file-filter is required to be specified")
        return {}
-    "Get Markdown files to process"
+    # Get markdown files to process
    absolute_markdown_files, filtered_markdown_files = set(), set()
    if markdown_files:
        absolute_markdown_files = {get_absolute_path(markdown_file) for markdown_file in markdown_files}
-    if markdown_file_filter:
+    if markdown_file_filters:
        filtered_markdown_files = {
            filtered_file
-            for markdown_file_filter in markdown_file_filter
+            for markdown_file_filter in markdown_file_filters
            for filtered_file in glob.glob(get_absolute_path(markdown_file_filter), recursive=True)
            if os.path.isfile(filtered_file)
        }
    all_markdown_files = sorted(absolute_markdown_files | filtered_markdown_files)
@ -168,7 +171,7 @@ def get_markdown_files(config: TextContentConfig):
    filename_to_content_map = {}
    for file in all_markdown_files:
-        with open(file, "r") as f:
+        with open(file, "r", encoding="utf8") as f:
            try:
                filename_to_content_map[file] = f.read()
            except Exception as e:
@ -180,13 +183,13 @@ def get_markdown_files(config: TextContentConfig):
 def get_pdf_files(config: TextContentConfig):
    # Extract required fields from config
-    pdf_files, pdf_file_filter = (
+    pdf_files, pdf_file_filters = (
        config.input_files,
        config.input_filter,
    )
    # Input Validation
-    if is_none_or_empty(pdf_files) and is_none_or_empty(pdf_file_filter):
+    if is_none_or_empty(pdf_files) and is_none_or_empty(pdf_file_filters):
        logger.debug("At least one of pdf-files or pdf-file-filter is required to be specified")
        return {}
@ -194,11 +197,12 @@ def get_pdf_files(config: TextContentConfig):
    absolute_pdf_files, filtered_pdf_files = set(), set()
    if pdf_files:
        absolute_pdf_files = {get_absolute_path(pdf_file) for pdf_file in pdf_files}
-    if pdf_file_filter:
+    if pdf_file_filters:
        filtered_pdf_files = {
            filtered_file
-            for pdf_file_filter in pdf_file_filter
+            for pdf_file_filter in pdf_file_filters
            for filtered_file in glob.glob(get_absolute_path(pdf_file_filter), recursive=True)
            if os.path.isfile(filtered_file)
        }
    all_pdf_files = sorted(absolute_pdf_files | filtered_pdf_files)
@ -214,7 +218,7 @@ def get_pdf_files(config: TextContentConfig):
    for file in all_pdf_files:
        with open(file, "rb") as f:
            try:
-                filename_to_content_map[file] = base64.b64encode(f.read()).decode("utf-8")
+                filename_to_content_map[file] = f.read()
            except Exception as e:
                logger.warning(f"Unable to read file: {file} as PDF. Skipping file.")
                logger.warning(e, exc_info=True)
--- a/src/khoj/utils/helpers.py
+++ b/src/khoj/utils/helpers.py
@ -66,20 +66,25 @@ def merge_dicts(priority_dict: dict, default_dict: dict):
    return merged_dict
-def get_file_type(filepath: str) -> str:
+def get_file_type(file_type: str) -> tuple[str, str]:
-    "Get file type from file path"
+    "Get file type from file mime type"
    file_type = Path(filepath).suffix[1:]
-    if file_type in ["md", "markdown"]:
+    encoding = file_type.split("=")[1].strip().lower() if ";" in file_type else None
-        return "markdown"
+    file_type = file_type.split(";")[0].strip() if ";" in file_type else file_type
-    elif file_type in ["org", "orgmode"]:
+    if file_type in ["text/markdown"]:
-        return "org"
+        return "markdown", encoding
-    elif file_type in ["txt", "text", "html", "xml", "htm", "rst"]:
+    elif file_type in ["text/org"]:
-        return "plaintext"
+        return "org", encoding
-    elif file_type in ["pdf"]:
+    elif file_type in ["application/pdf"]:
-        return "pdf"
+        return "pdf", encoding
-
+    elif file_type in ["image/jpeg"]:
-    return file_type
+        return "jpeg", encoding
    elif file_type in ["image/png"]:
        return "png", encoding
    elif file_type in ["text/plain", "text/html", "application/xml", "text/x-rst"]:
        return "plaintext", encoding
    else:
        return "other", encoding
 def load_model(
--- a/src/khoj/utils/rawconfig.py
+++ b/src/khoj/utils/rawconfig.py
@ -91,10 +91,17 @@ class OpenAIProcessorConfig(ConfigBase):
    chat_model: Optional[str] = "gpt-3.5-turbo"
 class OfflineChatProcessorConfig(ConfigBase):
    enable_offline_chat: Optional[bool] = False
    chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_0.bin"
 class ConversationProcessorConfig(ConfigBase):
    conversation_logfile: Path
    openai: Optional[OpenAIProcessorConfig]
-    enable_offline_chat: Optional[bool] = False
+    offline_chat: Optional[OfflineChatProcessorConfig]
    max_prompt_size: Optional[int]
    tokenizer: Optional[str]
 class ProcessorConfig(ConfigBase):
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -18,6 +18,7 @@ from khoj.utils.helpers import resolve_absolute_path
 from khoj.utils.rawconfig import (
    ContentConfig,
    ConversationProcessorConfig,
    OfflineChatProcessorConfig,
    OpenAIProcessorConfig,
    ProcessorConfig,
    TextContentConfig,
@ -207,8 +208,9 @@ def processor_config_offline_chat(tmp_path_factory):
    # Setup conversation processor
    processor_config = ProcessorConfig()
    offline_chat = OfflineChatProcessorConfig(enable_offline_chat=True)
    processor_config.conversation = ConversationProcessorConfig(
-        enable_offline_chat=True,
+        offline_chat=offline_chat,
        conversation_logfile=processor_dir.joinpath("conversation_logs.json"),
    )
--- a/tests/test_client.py
+++ b/tests/test_client.py
@ -6,6 +6,7 @@ from urllib.parse import quote
 # External Packages
 from fastapi.testclient import TestClient
 import pytest
 # Internal Packages
 from app.main import app
@ -60,13 +61,13 @@ def test_regenerate_with_invalid_content_type(client):
 # ----------------------------------------------------------------------------------------------------
-def test_index_batch(client):
+def test_index_update(client):
    # Arrange
-    request_body = get_sample_files_data()
+    files = get_sample_files_data()
    headers = {"x-api-key": "secret"}
    # Act
-    response = client.post("/v1/indexer/batch", json=request_body, headers=headers)
+    response = client.post("/api/v1/index/update", files=files, headers=headers)
    # Assert
    assert response.status_code == 200
@ -76,12 +77,11 @@ def test_index_batch(client):
 def test_regenerate_with_valid_content_type(client):
    for content_type in ["all", "org", "markdown", "image", "pdf", "notion", "plugin1"]:
        # Arrange
-        request_body = get_sample_files_data()
+        files = get_sample_files_data()
        headers = {"x-api-key": "secret"}
        # Act
-        response = client.post(f"/v1/indexer/batch?search_type={content_type}", json=request_body, headers=headers)
+        response = client.post(f"/api/v1/index/update?t={content_type}", files=files, headers=headers)
        # Assert
        assert response.status_code == 200, f"Returned status: {response.status_code} for content type: {content_type}"
@ -92,17 +92,17 @@ def test_regenerate_with_github_fails_without_pat(client):
    response = client.get(f"/api/update?force=true&t=github")
    # Arrange
-    request_body = get_sample_files_data()
+    files = get_sample_files_data()
    headers = {"x-api-key": "secret"}
    # Act
-    response = client.post(f"/v1/indexer/batch?search_type=github", json=request_body, headers=headers)
+    response = client.post(f"/api/v1/index/update?t=github", files=files, headers=headers)
    # Assert
    assert response.status_code == 200, f"Returned status: {response.status_code} for content type: github"
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.skip(reason="Flaky test on parallel test runs")
 def test_get_configured_types_via_api(client):
    # Act
    response = client.get(f"/api/config/types")
@ -288,24 +288,20 @@ def test_notes_search_with_exclude_filter(
 def get_sample_files_data():
    return {
-        "org": {
+        "files": ("path/to/filename.org", "* practicing piano", "text/org"),
-            "path/to/filename.org": "* practicing piano",
+        "files": ("path/to/filename1.org", "** top 3 reasons why I moved to SF", "text/org"),
-            "path/to/filename1.org": "** top 3 reasons why I moved to SF",
+        "files": ("path/to/filename2.org", "* how to build a search engine", "text/org"),
-            "path/to/filename2.org": "* how to build a search engine",
+        "files": ("path/to/filename.pdf", "Moore's law does not apply to consumer hardware", "application/pdf"),
-        },
+        "files": ("path/to/filename1.pdf", "The sun is a ball of helium", "application/pdf"),
-        "pdf": {
+        "files": ("path/to/filename2.pdf", "Effect of sunshine on baseline human happiness", "application/pdf"),
-            "path/to/filename.pdf": "Moore's law does not apply to consumer hardware",
+        "files": ("path/to/filename.txt", "data,column,value", "text/plain"),
-            "path/to/filename1.pdf": "The sun is a ball of helium",
+        "files": ("path/to/filename1.txt", "<html>my first web page</html>", "text/plain"),
-            "path/to/filename2.pdf": "Effect of sunshine on baseline human happiness",
+        "files": ("path/to/filename2.txt", "2021-02-02 Journal Entry", "text/plain"),
-        },
+        "files": ("path/to/filename.md", "# Notes from client call", "text/markdown"),
-        "plaintext": {
+        "files": (
-            "path/to/filename.txt": "data,column,value",
+            "path/to/filename1.md",
-            "path/to/filename1.txt": "<html>my first web page</html>",
+            "## Studying anthropological records from the Fatimid caliphate",
-            "path/to/filename2.txt": "2021-02-02 Journal Entry",
+            "text/markdown",
-        },
+        ),
-        "markdown": {
+        "files": ("path/to/filename2.md", "**Understanding science through the lens of art**", "text/markdown"),
            "path/to/filename.md": "# Notes from client call",
            "path/to/filename1.md": "## Studying anthropological records from the Fatimid caliphate",
            "path/to/filename2.md": "**Understanding science through the lens of art**",
        },
    }
--- a/tests/test_gpt4all_chat_actors.py
+++ b/tests/test_gpt4all_chat_actors.py
@ -24,7 +24,7 @@ from khoj.processor.conversation.gpt4all.utils import download_model
 from khoj.processor.conversation.utils import message_to_log
-MODEL_NAME = "llama-2-7b-chat.ggmlv3.q4_K_S.bin"
+MODEL_NAME = "llama-2-7b-chat.ggmlv3.q4_0.bin"
@pytest.fixture(scope="session")
@ -128,15 +128,15 @@ def test_extract_multiple_explicit_questions_from_message(loaded_model):
@pytest.mark.chatquality
 def test_extract_multiple_implicit_questions_from_message(loaded_model):
    # Act
-    response = extract_questions_offline("Is Morpheus taller than Neo?", loaded_model=loaded_model)
+    response = extract_questions_offline("Is Carl taller than Ross?", loaded_model=loaded_model)
    # Assert
-    expected_responses = ["height", "taller", "shorter", "heights"]
+    expected_responses = ["height", "taller", "shorter", "heights", "who"]
    assert len(response) <= 3
    for question in response:
        assert any([expected_response in question.lower() for expected_response in expected_responses]), (
-            "Expected chat actor to ask follow-up questions about Morpheus and Neo, but got: " + question
+            "Expected chat actor to ask follow-up questions about Carl and Ross, but got: " + question
        )
@ -145,7 +145,7 @@ def test_extract_multiple_implicit_questions_from_message(loaded_model):
 def test_generate_search_query_using_question_from_chat_history(loaded_model):
    # Arrange
    message_list = [
-        ("What is the name of Mr. Vader's daughter?", "Princess Leia", []),
+        ("What is the name of Mr. Anderson's daughter?", "Miss Barbara", []),
    ]
    # Act
@ -156,17 +156,22 @@ def test_generate_search_query_using_question_from_chat_history(loaded_model):
        use_history=True,
    )
-    expected_responses = [
+    all_expected_in_response = [
-        "Vader",
+        "Anderson",
-        "sons",
+    ]
    any_expected_in_response = [
        "son",
-        "Darth",
+        "sons",
        "children",
    ]
    # Assert
    assert len(response) >= 1
-    assert any([expected_response in response[0] for expected_response in expected_responses]), (
+    assert all([expected_response in response[0] for expected_response in all_expected_in_response]), (
        "Expected chat actor to ask for clarification in response, but got: " + response[0]
    )
    assert any([expected_response in response[0] for expected_response in any_expected_in_response]), (
        "Expected chat actor to ask for clarification in response, but got: " + response[0]
    )
@ -176,20 +181,20 @@ def test_generate_search_query_using_question_from_chat_history(loaded_model):
 def test_generate_search_query_using_answer_from_chat_history(loaded_model):
    # Arrange
    message_list = [
-        ("What is the name of Mr. Vader's daughter?", "Princess Leia", []),
+        ("What is the name of Mr. Anderson's daughter?", "Miss Barbara", []),
    ]
    # Act
    response = extract_questions_offline(
-        "Is she a Jedi?",
+        "Is she a Doctor?",
        conversation_log=populate_chat_history(message_list),
        loaded_model=loaded_model,
        use_history=True,
    )
    expected_responses = [
-        "Leia",
+        "Barbara",
-        "Vader",
+        "Robert",
        "daughter",
    ]
--- a/tests/test_pdf_to_jsonl.py
+++ b/tests/test_pdf_to_jsonl.py
@ -1,7 +1,6 @@
 # Standard Packages
 import json
 import os
 import base64
 # Internal Packages
 from khoj.processor.pdf.pdf_to_jsonl import PdfToJsonl
@ -16,7 +15,7 @@ def test_single_page_pdf_to_jsonl():
    # Extract Entries from specified Pdf files
    # Read singlepage.pdf into memory as bytes
    with open("tests/data/pdf/singlepage.pdf", "rb") as f:
-        pdf_bytes = base64.b64encode(f.read()).decode("utf-8")
+        pdf_bytes = f.read()
    data = {"tests/data/pdf/singlepage.pdf": pdf_bytes}
    entries, entry_to_file_map = PdfToJsonl.extract_pdf_entries(pdf_files=data)
@ -36,7 +35,7 @@ def test_multi_page_pdf_to_jsonl():
    # Act
    # Extract Entries from specified Pdf files
    with open("tests/data/pdf/multipage.pdf", "rb") as f:
-        pdf_bytes = base64.b64encode(f.read()).decode("utf-8")
+        pdf_bytes = f.read()
    data = {"tests/data/pdf/multipage.pdf": pdf_bytes}
    entries, entry_to_file_map = PdfToJsonl.extract_pdf_entries(pdf_files=data)
--- a/tests/test_text_search.py
+++ b/tests/test_text_search.py
@ -1,26 +1,25 @@
 # System Packages
 import logging
 import locale
 from pathlib import Path
 import os
 # External Packages
 import pytest
 from khoj.utils.config import SearchModels
 # Internal Packages
 from khoj.utils.state import content_index, search_models
 from khoj.search_type import text_search
 from khoj.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig
 from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
 from khoj.processor.github.github_to_jsonl import GithubToJsonl
 from khoj.utils.config import SearchModels
 from khoj.utils.fs_syncer import get_org_files
 from khoj.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig
 # Test
 # ----------------------------------------------------------------------------------------------------
-def test_text_search_setup_with_missing_file_raises_error(
+def test_text_search_setup_with_missing_file_raises_error(org_config_with_only_new_file: TextContentConfig):
    org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig
 ):
    # Arrange
    # Ensure file mentioned in org.input-files is missing
    single_new_file = Path(org_config_with_only_new_file.input_files[0])
@ -29,7 +28,23 @@ def test_text_search_setup_with_missing_file_raises_error(
    # Act
    # Generate notes embeddings during asymmetric setup
    with pytest.raises(FileNotFoundError):
-        data = get_org_files(org_config_with_only_new_file)
+        get_org_files(org_config_with_only_new_file)
 # ----------------------------------------------------------------------------------------------------
 def test_get_org_files_with_org_suffixed_dir_doesnt_raise_error(tmp_path: Path):
    # Arrange
    orgfile = tmp_path / "directory.org" / "file.org"
    orgfile.parent.mkdir()
    with open(orgfile, "w") as f:
        f.write("* Heading\n- List item\n")
    org_content_config = TextContentConfig(
        input_filter=[f"{tmp_path}/**/*"], compressed_jsonl="test.jsonl", embeddings_file="test.pt"
    )
    # Act
    # should not raise IsADirectoryError and return orgfile
    assert get_org_files(org_content_config) == {f"{orgfile}": "* Heading\n- List item\n"}
 # ----------------------------------------------------------------------------------------------------
@ -48,6 +63,7 @@ def test_text_search_setup_with_empty_file_raises_error(
 def test_text_search_setup(content_config: ContentConfig, search_models: SearchModels):
    # Arrange
    data = get_org_files(content_config.org)
    # Act
    # Regenerate notes embeddings during asymmetric setup
    notes_model = text_search.setup(
--- a/versions.json
+++ b/versions.json
@ -24,5 +24,6 @@
 	"0.12.0": "0.15.0",
 	"0.12.1": "0.15.0",
 	"0.12.2": "0.15.0",
-	"0.12.3": "0.15.0"
+	"0.12.3": "0.15.0",
 	"0.13.0": "0.15.0"
 }