Resolve merge conflicts

2024-11-23 23:48:56 +01:00 · 2023-10-19 14:39:05 -07:00 · 2023-10-19 14:39:05 -07:00 · 963cd165eb
commit 963cd165eb
parent c125995d94 e3f8a95784
42 changed files with 941 additions and 590 deletions
--- a/docs/chat.md
+++ b/docs/chat.md
@ -7,18 +7,21 @@

 ### Setup
 #### Offline Chat
-Offline chat works without internet but it is slower, lower quality and more compute intensive.
+Offline chat stays completely private and works without internet. But it is slower, lower quality and more compute intensive.

-!> **Warning**: This will download a 3Gb+ Llama v2 chat model which can take some time
+> **System Requirements**:
+>  - Machine with at least **6 GB of RAM** and **4 GB of Disk** available
+>  - A CPU supporting [AVX or AVX2 instructions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) is required
+>  - A Mac M1+ or [Vulcan supported GPU](https://vulkan.gpuinfo.org/) should significantly speed up chat response times

- Open your [Khoj settings](http://localhost:42110/config/), click *Enable* on the Offline Chat card
+- Open your [Khoj settings](http://localhost:42110/config/) and click *Enable* on the Offline Chat card

 ![Configure offline chat](https://user-images.githubusercontent.com/6413477/257021364-8a2029f5-dc21-4de8-9af9-9ba6100d695c.mp4 ':include :type=mp4')

 #### Online Chat
 Online chat requires internet to use ChatGPT but is faster, higher quality and less compute intensive.

-!> **Warning**: This will enable Khoj to send your chat queries and notes to OpenAI for processing
+!> **Warning**: This will enable Khoj to send your chat queries and query relevant notes to OpenAI for processing

 1. Get your [OpenAI API Key](https://platform.openai.com/account/api-keys)
 2. Open your [Khoj Online Chat settings](http://localhost:42110/config/processor/conversation), add your OpenAI API key, and click *Save*. Then go to your [Khoj settings](http://localhost:42110/config) and click `Configure`. This will refresh Khoj with your OpenAI API key.
--- a/docs/emacs.md
+++ b/docs/emacs.md
@ -46,7 +46,7 @@ Indexes your org-agenda files, by default.
  (use-package khoj
    :ensure t
    :pin melpa-stable
-    :bind ("C-c s" . 'khoj)
+    :bind ("C-c s" . 'khoj))
 ```

 - Note: Install `khoj.el` from MELPA (instead of MELPA Stable) if you installed the pre-release version of khoj
--- a/manifest.json
+++ b/manifest.json
@ -1,7 +1,7 @@
 {
 	"id": "khoj",
 	"name": "Khoj",
-	"version": "0.12.3",
+	"version": "0.13.0",
 	"minAppVersion": "0.15.0",
 	"description": "An Open-Source AI Personal Assistant for your Digital Brain",
 	"author": "Khoj Inc.",
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "hatchling.build"

 [project]
 name = "khoj-assistant"
-description = "An AI personal assistant for your Digital Brain"
+description = "An AI copilot for your Second Brain"
 readme = "README.md"
 license = "GPL-3.0-or-later"
 requires-python = ">=3.8"
@ -40,8 +40,9 @@ dependencies = [
    "dateparser >= 1.1.1",
    "defusedxml == 0.7.1",
    "fastapi == 0.77.1",
+    "python-multipart >= 0.0.5",
    "jinja2 == 3.1.2",
-    "openai >= 0.27.0",
+    "openai >= 0.27.0, < 1.0.0",
    "tiktoken >= 0.3.2",
    "tenacity >= 8.2.2",
    "pillow == 9.3.0",
@ -83,6 +84,7 @@ test = [
    "freezegun >= 1.2.0",
    "factory-boy >= 3.2.1",
    "trio >= 0.22.0",
+    "pytest-xdist",
 ]
 dev = [
    "khoj-assistant[test]",
--- a/scripts/bump_version.sh
+++ b/scripts/bump_version.sh
@ -9,6 +9,10 @@ do
            # Get current project version
            current_version=$OPTARG

+            # Bump Desktop app to current version
+            cd $project_root/src/interface/desktop
+            sed -E -i.bak "s/version\": \"(.*)\",/version\": \"$current_version\",/" package.json
+
            # Bump Obsidian plugin to current version
            cd $project_root/src/interface/obsidian
            sed -E -i.bak "s/version\": \"(.*)\",/version\": \"$current_version\",/" package.json
--- a/src/app/main.py
+++ b/src/app/main.py
@ -14,10 +14,11 @@ warnings.filterwarnings("ignore", message=r"legacy way to download files from th

 # External Packages
 import uvicorn
-import django
-import schedule
-
 from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+import schedule
+import django
+
 from fastapi.staticfiles import StaticFiles
 from rich.logging import RichHandler
 from django.core.asgi import get_asgi_application
@ -41,6 +42,15 @@ app = FastAPI()
 # Get Django Application
 django_app = get_asgi_application()

+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["app://obsidian.md", "http://localhost:*", "https://app.khoj.dev/*", "app://khoj.dev"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
 # Set Locale
 locale.setlocale(locale.LC_ALL, "")

--- a/src/interface/desktop/main.js
+++ b/src/interface/desktop/main.js
@ -8,7 +8,6 @@ const {dialog} = require('electron');

 const cron = require('cron').CronJob;
 const axios = require('axios');
-const { Readable } = require('stream');

 const KHOJ_URL = 'http://127.0.0.1:42110'

@ -65,7 +64,7 @@ const schema = {

 var state = {}

-const store = new Store({schema});
+const store = new Store({ schema });

 console.log(store);

@ -86,57 +85,65 @@ function handleSetTitle (event, title) {
    });
 }

+function filenameToMimeType (filename) {
+    const extension = filename.split('.').pop();
+    switch (extension) {
+        case 'pdf':
+            return 'application/pdf';
+        case 'png':
+            return 'image/png';
+        case 'jpg':
+        case 'jpeg':
+            return 'image/jpeg';
+        case 'md':
+        case 'markdown':
+            return 'text/markdown';
+        case 'org':
+            return 'text/org';
+        default:
+            return 'text/plain';
+    }
+}
+
 function pushDataToKhoj (regenerate = false) {
    let filesToPush = [];
-    const files = store.get('files');
-    const folders = store.get('folders');
-    state = {
-        completed: true
+    const files = store.get('files') || [];
+    const folders = store.get('folders') || [];
+    state = { completed: true }
+
+    // Collect paths of all configured files to index
+    for (const file of files) {
+        filesToPush.push(file.path);
    }

-    if (files) {
-        for (file of files) {
-            filesToPush.push(file.path);
-        }
-    }
-    if (folders) {
-        for (folder of folders) {
-            const files = fs.readdirSync(folder.path, { withFileTypes: true });
-            for (file of files) {
-                if (file.isFile() && validFileTypes.includes(file.name.split('.').pop())) {
-                    filesToPush.push(path.join(folder.path, file.name));
-                }
+    // Collect paths of all indexable files in configured folders
+    for (const folder of folders) {
+        const files = fs.readdirSync(folder.path, { withFileTypes: true });
+        for (const file of files) {
+            if (file.isFile() && validFileTypes.includes(file.name.split('.').pop())) {
+                filesToPush.push(path.join(folder.path, file.name));
            }
        }
    }

-    let data = {
-        files: []
-    }
-
    const lastSync = store.get('lastSync') || [];
-
-    for (file of filesToPush) {
+    const formData = new FormData();
+    for (const file of filesToPush) {
        const stats = fs.statSync(file);
        if (!regenerate) {
+            // Only push files that have been modified since last sync
            if (stats.mtime.toISOString() < lastSync.find((syncedFile) => syncedFile.path === file)?.datetime) {
                continue;
            }
        }

+        // Collect all updated or newly created files since last sync to index on Khoj server
        try {
-            let rawData;
-            // If the file is a PDF or IMG file, read it as a binary file
-            if (binaryFileTypes.includes(file.split('.').pop())) {
-                rawData = fs.readFileSync(file).toString('base64');
-            } else {
-                rawData = fs.readFileSync(file, 'utf8');
-            }
-
-            data.files.push({
-                path: file,
-                content: rawData
-            });
+            let encoding = binaryFileTypes.includes(file.split('.').pop()) ? "binary" : "utf8";
+            let mimeType = filenameToMimeType(file) + (encoding === "utf8" ? "; charset=UTF-8" : "");
+            let fileContent = Buffer.from(fs.readFileSync(file, { encoding: encoding }), encoding);
+            let fileObj = new Blob([fileContent], { type: mimeType });
+            formData.append('files', fileObj, file);
            state[file] = {
                success: true,
            }
@ -149,46 +156,46 @@ function pushDataToKhoj (regenerate = false) {
        }
    }

+    // Mark deleted files for removal from index on Khoj server
    for (const syncedFile of lastSync) {
        if (!filesToPush.includes(syncedFile.path)) {
-            data.files.push({
-                path: syncedFile.path,
-                content: ""
-            });
+            fileObj = new Blob([""], { type: filenameToMimeType(syncedFile.path) });
+            formData.append('files', fileObj, syncedFile.path);
        }
    }

-    const headers = { 'x-api-key': 'secret', 'Content-Type': 'application/json' };
-
-    const stream = new Readable({
-        read() {
-            this.push(JSON.stringify(data));
-            this.push(null);
-        }
-    });
-
-    const hostURL = store.get('hostURL') || KHOJ_URL;
-
-    axios.post(`${hostURL}/v1/indexer/batch?regenerate=${regenerate}`, stream, { headers })
-        .then(response => {
-            console.log(response.data);
-            const win = BrowserWindow.getAllWindows()[0];
-            win.webContents.send('update-state', state);
-            let lastSync = [];
-            for (const file of filesToPush) {
-                lastSync.push({
-                    path: file,
-                    datetime: new Date().toISOString()
-                });
-            }
-            store.set('lastSync', lastSync);
-        })
-        .catch(error => {
-            console.error(error);
-            state['completed'] = false
-            const win = BrowserWindow.getAllWindows()[0];
-            win.webContents.send('update-state', state);
-        });
+    // Send collected files to Khoj server for indexing
+    if (!!formData?.entries()?.next().value) {
+        const hostURL = store.get('hostURL') || KHOJ_URL;
+        const headers = {
+            'x-api-key': 'secret'
+        };
+        axios.post(`${hostURL}/api/v1/index/update?force=${regenerate}&client=desktop`, formData, { headers })
+            .then(response => {
+                console.log(response.data);
+                let lastSync = [];
+                for (const file of filesToPush) {
+                    lastSync.push({
+                        path: file,
+                        datetime: new Date().toISOString()
+                    });
+                }
+                store.set('lastSync', lastSync);
+            })
+            .catch(error => {
+                console.error(error);
+                state['completed'] = false
+            })
+            .finally(() => {
+                // Syncing complete
+                const win = BrowserWindow.getAllWindows()[0];
+                if (win) win.webContents.send('update-state', state);
+            });
+    } else {
+        // Syncing complete
+        const win = BrowserWindow.getAllWindows()[0];
+        if (win) win.webContents.send('update-state', state);
+    }
 }

 pushDataToKhoj();
--- a/src/interface/desktop/package.json
+++ b/src/interface/desktop/package.json
@ -1,13 +1,13 @@
 {
  "name": "Khoj",
-  "homepage": ".",
-  "productName": "Khoj",
-  "version": "1.0.2",
-  "description": "Scaffolding for the desktop entrypoint to Khoj",
-  "main": "main.js",
+  "version": "0.13.0",
+  "description": "An AI copilot for your Second Brain",
+  "author": "Saba Imran, Debanjum Singh Solanky <team@khoj.dev>",
+  "license": "GPL-3.0-or-later",
+  "homepage": "https://khoj.dev",
  "repository": "\"https://github.com/khoj-ai/khoj\"",
-  "author": "Khoj <team@khoj.dev>",
-  "license": "MIT",
+  "productName": "Khoj",
+  "main": "main.js",
  "private": false,
  "devDependencies": {
    "electron": "25.8.1"
--- a/src/interface/emacs/khoj.el
+++ b/src/interface/emacs/khoj.el
@ -1,11 +1,12 @@
-;;; khoj.el --- AI personal assistant for your digital brain -*- lexical-binding: t -*-
+;;; khoj.el --- AI copilot for your Second Brain -*- lexical-binding: t -*-

-;; Copyright (C) 2021-2022 Debanjum Singh Solanky
+;; Copyright (C) 2021-2023 Khoj Inc.

-;; Author: Debanjum Singh Solanky <debanjum@gmail.com>
-;; Description: An AI personal assistant for your digital brain
+;; Author: Debanjum Singh Solanky <debanjum@khoj.dev>
+;;         Saba Imran <saba@khoj.dev>
+;; Description: An AI copilot for your Second Brain
 ;; Keywords: search, chat, org-mode, outlines, markdown, pdf, image
-;; Version: 0.12.3
+;; Version: 0.13.0
 ;; Package-Requires: ((emacs "27.1") (transient "0.3.0") (dash "2.19.1"))
 ;; URL: https://github.com/khoj-ai/khoj/tree/master/src/interface/emacs

@ -28,8 +29,8 @@

 ;;; Commentary:

-;; Create an AI personal assistant for your `org-mode', `markdown' notes,
-;; PDFs and images. The assistant exposes 2 modes, search and chat:
+;; Create an AI copilot to your `org-mode', `markdown' notes,
+;; PDFs and images. The copilot exposes 2 modes, search and chat:
 ;;
 ;; Chat provides faster answers, iterative discovery and assisted
 ;; creativity. It requires your OpenAI API key to access GPT models
@ -87,6 +88,21 @@
  :group 'khoj
  :type 'integer)

+(defcustom khoj-search-on-idle-time 0.3
+  "Idle time (in seconds) to wait before triggering search."
+  :group 'khoj
+  :type 'number)
+
+(defcustom khoj-server-api-key "secret"
+  "API Key to Khoj server."
+  :group 'khoj
+  :type 'string)
+
+(defcustom khoj-index-interval 3600
+  "Interval (in seconds) to wait before updating content index."
+  :group 'khoj
+  :type 'number)
+
 (defcustom khoj-default-content-type "org"
  "The default content type to perform search on."
  :group 'khoj
@ -115,6 +131,15 @@
 (defvar khoj--content-type "org"
  "The type of content to perform search on.")

+(defvar khoj--search-on-idle-timer nil
+  "Idle timer to trigger incremental search.")
+
+(defvar khoj--index-timer nil
+  "Timer to trigger content indexing.")
+
+(defvar khoj--indexed-files '()
+  "Files that were indexed in previous content indexing run.")
+
 (declare-function org-element-property "org-mode" (PROPERTY ELEMENT))
 (declare-function org-element-type "org-mode" (ELEMENT))
 (declare-function markdown-mode "markdown-mode" ())
@ -236,6 +261,11 @@ for example), set this to the full interpreter path."
  :type 'boolean
  :group 'khoj)

+(defcustom khoj-offline-chat-model nil
+  "Specify chat model to use for offline chat with khoj."
+  :type 'string
+  :group 'khoj)
+
 (defcustom khoj-auto-setup t
  "Automate install, configure and start of khoj server.
 Auto invokes setup steps on calling main entrypoint."
@ -365,9 +395,9 @@ CONFIG is json obtained from Khoj config API."
          (string-join "/"))))

 (defun khoj--server-configure ()
-  "Configure the the Khoj server for search and chat."
+  "Configure the Khoj server for search and chat."
  (interactive)
-  (let* ((org-directory-regexes (or (mapcar (lambda (dir) (format "%s/**/*.org" dir)) khoj-org-directories) json-null))
+  (let* ((url-request-method "GET")
         (current-config
          (with-temp-buffer
            (url-insert-file-contents (format "%s/api/config/data" khoj-server-url))
@ -376,56 +406,12 @@ CONFIG is json obtained from Khoj config API."
           (with-temp-buffer
             (url-insert-file-contents (format "%s/api/config/data/default" khoj-server-url))
             (ignore-error json-end-of-file (json-parse-buffer :object-type 'alist :array-type 'list :null-object json-null :false-object json-false))))
-         (default-index-dir (khoj--get-directory-from-config default-config '(content-type org embeddings-file)))
         (default-chat-dir (khoj--get-directory-from-config default-config '(processor conversation conversation-logfile)))
         (chat-model (or khoj-chat-model (alist-get 'chat-model (alist-get 'openai (alist-get 'conversation (alist-get 'processor default-config))))))
-         (default-model (alist-get 'model (alist-get 'conversation (alist-get 'processor default-config))))
-         (enable-offline-chat (or khoj-chat-offline (alist-get 'enable-offline-chat (alist-get 'conversation (alist-get 'processor default-config)))))
+         (enable-offline-chat (or khoj-chat-offline (alist-get 'enable-offline-chat (alist-get 'offline-chat (alist-get 'conversation (alist-get 'processor default-config))))))
+         (offline-chat-model (or khoj-offline-chat-model (alist-get 'chat-model (alist-get 'offline-chat (alist-get 'conversation (alist-get 'processor default-config))))))
         (config (or current-config default-config)))

-    ;; Configure content types
-    (cond
-     ;; If khoj backend is not configured yet
-     ((not current-config)
-      (message "khoj.el: Server not configured yet.")
-      (setq config (delq (assoc 'content-type config) config))
-      (cl-pushnew `(content-type . ((org . ((input-files . ,khoj-org-files)
-                                            (input-filter . ,org-directory-regexes)
-                                            (compressed-jsonl . ,(format "%s/org.jsonl.gz" default-index-dir))
-                                            (embeddings-file . ,(format "%s/org.pt" default-index-dir))
-                                            (index-heading-entries . ,json-false)))))
-                  config))
-
-     ;; Else if khoj config has no org content config
-     ((not (alist-get 'org (alist-get 'content-type config)))
-      (message "khoj.el: Org-mode content on server not configured yet.")
-     (let ((new-content-type (alist-get 'content-type config)))
-        (setq new-content-type (delq (assoc 'org new-content-type) new-content-type))
-        (cl-pushnew `(org . ((input-files . ,khoj-org-files)
-                             (input-filter . ,org-directory-regexes)
-                             (compressed-jsonl . ,(format "%s/org.jsonl.gz" default-index-dir))
-                             (embeddings-file . ,(format "%s/org.pt" default-index-dir))
-                             (index-heading-entries . ,json-false)))
-                    new-content-type)
-        (setq config (delq (assoc 'content-type config) config))
-        (cl-pushnew `(content-type . ,new-content-type) config)))
-
-     ;; Else if khoj is not configured to index specified org files
-     ((not (and (equal (alist-get 'input-files (alist-get 'org (alist-get 'content-type config))) khoj-org-files)
-                (equal (alist-get 'input-filter (alist-get 'org (alist-get 'content-type config))) org-directory-regexes)))
-      (message "khoj.el: Org-mode content on server is stale.")
-      (let* ((index-directory (khoj--get-directory-from-config config '(content-type org embeddings-file)))
-             (new-content-type (alist-get 'content-type config)))
-        (setq new-content-type (delq (assoc 'org new-content-type) new-content-type))
-        (cl-pushnew `(org . ((input-files . ,khoj-org-files)
-                             (input-filter . ,org-directory-regexes)
-                             (compressed-jsonl . ,(format "%s/org.jsonl.gz" index-directory))
-                             (embeddings-file . ,(format "%s/org.pt" index-directory))
-                             (index-heading-entries . ,json-false)))
-                    new-content-type)
-        (setq config (delq (assoc 'content-type config) config))
-        (cl-pushnew `(content-type . ,new-content-type) config))))
-
    ;; Configure processors
    (cond
     ((not khoj-openai-api-key)
@ -441,10 +427,11 @@ CONFIG is json obtained from Khoj config API."

     ;; If khoj backend isn't configured yet
     ((not current-config)
-      (message "khoj.el: Chat not configured yet.")
+      (message "khoj.el: Khoj not configured yet.")
      (setq config (delq (assoc 'processor config) config))
      (cl-pushnew `(processor . ((conversation . ((conversation-logfile . ,(format "%s/conversation.json" default-chat-dir))
-                                                  (enable-offline-chat . ,enable-offline-chat)
+                                                  (offline-chat . ((enable-offline-chat . ,enable-offline-chat)
+                                                                   (chat-model . ,offline-chat-model)))
                                                  (openai . ((chat-model . ,chat-model)
                                                             (api-key . ,khoj-openai-api-key)))))))
                  config))
@ -455,7 +442,8 @@ CONFIG is json obtained from Khoj config API."
       (let ((new-processor-type (alist-get 'processor config)))
         (setq new-processor-type (delq (assoc 'conversation new-processor-type) new-processor-type))
         (cl-pushnew `(conversation . ((conversation-logfile . ,(format "%s/conversation.json" default-chat-dir))
-                                       (enable-offline-chat . ,enable-offline-chat)
+                                       (offline-chat . ((enable-offline-chat . ,enable-offline-chat)
+                                                        (chat-model . ,offline-chat-model)))
                                       (openai . ((chat-model . ,chat-model)
                                                  (api-key . ,khoj-openai-api-key)))))
                     new-processor-type)
@ -465,13 +453,15 @@ CONFIG is json obtained from Khoj config API."
     ;; Else if chat configuration in khoj backend has gone stale
     ((not (and (equal (alist-get 'api-key (alist-get 'openai (alist-get 'conversation (alist-get 'processor config)))) khoj-openai-api-key)
                (equal (alist-get 'chat-model (alist-get 'openai (alist-get 'conversation (alist-get 'processor config)))) khoj-chat-model)
-                (equal (alist-get 'enable-offline-chat (alist-get 'conversation (alist-get 'processor config))) enable-offline-chat)))
+                (equal (alist-get 'enable-offline-chat (alist-get 'offline-chat (alist-get 'conversation (alist-get 'processor config)))) enable-offline-chat)
+                (equal (alist-get 'chat-model (alist-get 'offline-chat (alist-get 'conversation (alist-get 'processor config)))) offline-chat-model)))
      (message "khoj.el: Chat configuration has gone stale.")
      (let* ((chat-directory (khoj--get-directory-from-config config '(processor conversation conversation-logfile)))
             (new-processor-type (alist-get 'processor config)))
        (setq new-processor-type (delq (assoc 'conversation new-processor-type) new-processor-type))
        (cl-pushnew `(conversation . ((conversation-logfile . ,(format "%s/conversation.json" chat-directory))
-                                      (enable-offline-chat . ,enable-offline-chat)
+                                      (offline-chat . ((enable-offline-chat . ,enable-offline-chat)
+                                                       (chat-model . ,offline-chat-model)))
                                      (openai . ((chat-model . ,khoj-chat-model)
                                                 (api-key . ,khoj-openai-api-key)))))
                    new-processor-type)
@ -509,9 +499,75 @@ CONFIG is json obtained from Khoj config API."
      (khoj--server-configure))))


-;; -----------------------------------------------
-;; Extract and Render Entries of each Content Type
-;; -----------------------------------------------
+;; -------------------
+;; Khoj Index Content
+;; -------------------
+
+(defun khoj--server-index-files (&optional force content-type file-paths)
+  "Send files at `FILE-PATHS' to the Khoj server to index for search and chat.
+`FORCE' re-indexes all files of `CONTENT-TYPE' even if they are already indexed."
+  (interactive)
+  (let ((boundary (format "-------------------------%d" (random (expt 10 10))))
+        (files-to-index (or file-paths
+                            (append (mapcan (lambda (dir) (directory-files-recursively dir "\\.org$")) khoj-org-directories) khoj-org-files)))
+        (type-query (if (or (equal content-type "all") (not content-type)) "" (format "t=%s" content-type)))
+        (inhibit-message t)
+        (message-log-max nil))
+    (let ((url-request-method "POST")
+          (url-request-data (khoj--render-files-as-request-body files-to-index khoj--indexed-files boundary))
+          (url-request-extra-headers `(("content-type" . ,(format "multipart/form-data; boundary=%s" boundary))
+                                       ("x-api-key" . ,khoj-server-api-key))))
+      (with-current-buffer
+          (url-retrieve (format "%s/api/v1/index/update?%s&force=%s&client=emacs" khoj-server-url type-query (or force "false"))
+                        ;; render response from indexing API endpoint on server
+                        (lambda (status)
+                          (if (not status)
+                              (message "khoj.el: %scontent index %supdated" (if content-type (format "%s " content-type) "") (if force "force " ""))
+                            (with-current-buffer (current-buffer)
+                              (goto-char "\n\n")
+                              (message "khoj.el: Failed to %supdate %s content index. Status: %s. Response: %s"
+                                       (if force "force " "")
+                                       content-type
+                                       status
+                                       (string-trim (buffer-substring-no-properties (point) (point-max)))))))
+                        nil t t)))
+    (setq khoj--indexed-files files-to-index)))
+
+(defun khoj--render-files-as-request-body (files-to-index previously-indexed-files boundary)
+  "Render `FILES-TO-INDEX', `PREVIOUSLY-INDEXED-FILES' as multi-part form body.
+Use `BOUNDARY' to separate files. This is sent to Khoj server as a POST request."
+  (with-temp-buffer
+    (set-buffer-multibyte nil)
+    (insert "\n")
+    (dolist (file-to-index files-to-index)
+      (insert (format "--%s\r\n" boundary))
+      (insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
+      (insert "Content-Type: text/org\r\n\r\n")
+      (insert (with-temp-buffer
+                (insert-file-contents-literally file-to-index)
+                (buffer-string)))
+      (insert "\r\n"))
+    (dolist (file-to-index previously-indexed-files)
+      (when (not (member file-to-index files-to-index))
+        (insert (format "--%s\r\n" boundary))
+        (insert (format "Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n" file-to-index))
+        (insert "Content-Type: text/org\r\n\r\n")
+        (insert "")
+        (insert "\r\n")))
+    (insert (format "--%s--\r\n" boundary))
+    (buffer-string)))
+
+;; Cancel any running indexing timer, first
+(when khoj--index-timer
+    (cancel-timer khoj--index-timer))
+;; Send files to index on server every `khoj-index-interval' seconds
+(setq khoj--index-timer
+      (run-with-timer 60 khoj-index-interval 'khoj--server-index-files))
+
+
+;; -------------------------------------------
+;; Render Response from Khoj server for Emacs
+;; -------------------------------------------

 (defun khoj--extract-entries-as-markdown (json-response query)
  "Convert JSON-RESPONSE, QUERY from API to markdown entries."
@ -920,6 +976,9 @@ RECEIVE-DATE is the message receive date."
  (message "khoj.el: Teardown Incremental Search")
  ;; unset khoj minibuffer window
  (setq khoj--minibuffer-window nil)
+  (when (and khoj--search-on-idle-timer
+             (timerp khoj--search-on-idle-timer))
+    (cancel-timer khoj--search-on-idle-timer))
  ;; delete open connections to khoj server
  (khoj--delete-open-network-connections-to-server)
  ;; remove hooks for khoj incremental query and self
@ -942,8 +1001,10 @@ RECEIVE-DATE is the message receive date."
          ;; set current (mini-)buffer entered as khoj minibuffer
          ;; used to query khoj API only when user in khoj minibuffer
          (setq khoj--minibuffer-window (current-buffer))
-          (add-hook 'post-command-hook #'khoj--incremental-search) ; do khoj incremental search after every user action
-          (add-hook 'minibuffer-exit-hook #'khoj--teardown-incremental-search)) ; teardown khoj incremental search on minibuffer exit
+          ; do khoj incremental search after idle time
+          (setq khoj--search-on-idle-timer (run-with-idle-timer khoj-search-on-idle-time t #'khoj--incremental-search))
+          ; teardown khoj incremental search on minibuffer exit
+          (add-hook 'minibuffer-exit-hook #'khoj--teardown-incremental-search))
      (read-string khoj--query-prompt))))


@ -1014,17 +1075,20 @@ Paragraph only starts at first text after blank line."
 ;; Khoj Menu
 ;; ---------

-(transient-define-argument khoj--content-type-switch ()
-  :class 'transient-switches
-  :argument-format "--content-type=%s"
-  :argument-regexp ".+"
-  ;; set content type to: last used > based on current buffer > default type
-  :init-value (lambda (obj) (oset obj value (format "--content-type=%s" (or khoj--content-type (khoj--buffer-name-to-content-type (buffer-name))))))
-  ;; dynamically set choices to content types enabled on khoj backend
-  :choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "image")))
+(defun khoj--setup-and-show-menu ()
+  "Create Transient menu for khoj and show it."
+  ;; Create the Khoj Transient menu
+  (transient-define-argument khoj--content-type-switch ()
+    :class 'transient-switches
+    :argument-format "--content-type=%s"
+    :argument-regexp ".+"
+    ;; set content type to: last used > based on current buffer > default type
+    :init-value (lambda (obj) (oset obj value (format "--content-type=%s" (or khoj--content-type (khoj--buffer-name-to-content-type (buffer-name))))))
+    ;; dynamically set choices to content types enabled on khoj backend
+    :choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "image")))

-(transient-define-suffix khoj--search-command (&optional args)
-  (interactive (list (transient-args transient-current-command)))
+  (transient-define-suffix khoj--search-command (&optional args)
+    (interactive (list (transient-args transient-current-command)))
    (progn
      ;; set content type to: specified > last used > based on current buffer > default type
      (setq khoj--content-type (or (transient-arg-value "--content-type=" args) (khoj--buffer-name-to-content-type (buffer-name))))
@ -1033,9 +1097,9 @@ Paragraph only starts at first text after blank line."
      ;; trigger incremental search
      (call-interactively #'khoj-incremental)))

-(transient-define-suffix khoj--find-similar-command (&optional args)
-  "Find items similar to current item at point."
-  (interactive (list (transient-args transient-current-command)))
+  (transient-define-suffix khoj--find-similar-command (&optional args)
+    "Find items similar to current item at point."
+    (interactive (list (transient-args transient-current-command)))
    (progn
      ;; set content type to: specified > last used > based on current buffer > default type
      (setq khoj--content-type (or (transient-arg-value "--content-type=" args) (khoj--buffer-name-to-content-type (buffer-name))))
@ -1043,37 +1107,38 @@ Paragraph only starts at first text after blank line."
      (setq khoj-results-count (or (transient-arg-value "--results-count=" args) khoj-results-count))
      (khoj--find-similar khoj--content-type)))

-(transient-define-suffix khoj--update-command (&optional args)
-  "Call khoj API to update index of specified content type."
-  (interactive (list (transient-args transient-current-command)))
-  (let* ((force-update (if (member "--force-update" args) "true" "false"))
-         ;; set content type to: specified > last used > based on current buffer > default type
-         (content-type (or (transient-arg-value "--content-type=" args) (khoj--buffer-name-to-content-type (buffer-name))))
-         (type-query (if (equal content-type "all") "" (format "t=%s" content-type)))
-         (update-url (format "%s/api/update?%s&force=%s&client=emacs" khoj-server-url type-query force-update))
-         (url-request-method "GET"))
-    (progn
-      (setq khoj--content-type content-type)
-      (url-retrieve update-url (lambda (_) (message "khoj.el: %s index %supdated!" content-type (if (member "--force-update" args) "force " "")))))))
+  (transient-define-suffix khoj--update-command (&optional args)
+    "Call khoj API to update index of specified content type."
+    (interactive (list (transient-args transient-current-command)))
+    (let* ((force-update (if (member "--force-update" args) "true" "false"))
+           ;; set content type to: specified > last used > based on current buffer > default type
+           (content-type (or (transient-arg-value "--content-type=" args) (khoj--buffer-name-to-content-type (buffer-name))))
+           (url-request-method "GET"))
+      (progn
+        (setq khoj--content-type content-type)
+        (khoj--server-index-files force-update content-type))))

-(transient-define-suffix khoj--chat-command (&optional _)
-  "Command to Chat with Khoj."
-  (interactive (list (transient-args transient-current-command)))
-  (khoj--chat))
+  (transient-define-suffix khoj--chat-command (&optional _)
+    "Command to Chat with Khoj."
+    (interactive (list (transient-args transient-current-command)))
+    (khoj--chat))

-(transient-define-prefix khoj--menu ()
-  "Create Khoj Menu to Configure and Execute Commands."
-  [["Configure Search"
-    ("n" "Results Count" "--results-count=" :init-value (lambda (obj) (oset obj value (format "%s" khoj-results-count))))
-    ("t" "Content Type" khoj--content-type-switch)]
-   ["Configure Update"
-    ("-f" "Force Update" "--force-update")]]
-  [["Act"
-    ("c" "Chat" khoj--chat-command)
-    ("s" "Search" khoj--search-command)
-    ("f" "Find Similar" khoj--find-similar-command)
-    ("u" "Update" khoj--update-command)
-    ("q" "Quit" transient-quit-one)]])
+  (transient-define-prefix khoj--menu ()
+    "Create Khoj Menu to Configure and Execute Commands."
+    [["Configure Search"
+      ("n" "Results Count" "--results-count=" :init-value (lambda (obj) (oset obj value (format "%s" khoj-results-count))))
+      ("t" "Content Type" khoj--content-type-switch)]
+     ["Configure Update"
+      ("-f" "Force Update" "--force-update")]]
+    [["Act"
+      ("c" "Chat" khoj--chat-command)
+      ("s" "Search" khoj--search-command)
+      ("f" "Find Similar" khoj--find-similar-command)
+      ("u" "Update" khoj--update-command)
+      ("q" "Quit" transient-quit-one)]])
+
+  ;; Show the Khoj Transient menu
+  (khoj--menu))


 ;; ----------
@ -1086,7 +1151,7 @@ Paragraph only starts at first text after blank line."
  (interactive)
  (when khoj-auto-setup
    (khoj-setup t))
-  (khoj--menu))
+  (khoj--setup-and-show-menu))

 (provide 'khoj)

--- a/src/interface/emacs/tests/khoj-tests.el
+++ b/src/interface/emacs/tests/khoj-tests.el
@ -206,6 +206,64 @@ Rule everything\n")
      "Rule everything"))
    ))

+
+;; -------------------------------------
+;; Test Helpers to Index Content
+;; -------------------------------------
+
+(ert-deftest khoj-tests--render-files-to-add-request-body ()
+  "Test files are formatted into a multi-part http request body"
+  (let ((upgrade-file (make-temp-file "upgrade" nil ".org" "# Become God\n## Upgrade\n\nPenance to Immortality\n\n"))
+        (act-file (make-temp-file "act" nil ".org" "## Act\n\nRule everything\n\n")))
+    (unwind-protect
+        (progn
+          (should
+           (equal
+            (khoj--render-files-as-request-body (list upgrade-file act-file) '() "khoj")
+            (format
+            "\n--khoj\r\n\
+Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
+Content-Type: text/org\r\n\r\n\
+# Become God\n\
+## Upgrade\n\n\
+Penance to Immortality\n\n\r
+--khoj\r\n\
+Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
+Content-Type: text/org\r\n\r\n\
+## Act\n\n\
+Rule everything\n\n\r\n\
+--khoj--\r\n" upgrade-file act-file))))
+      (delete-file upgrade-file)
+      (delete-file act-file))))
+
+(ert-deftest khoj-tests--render-files-to-add-delete-in-request-body ()
+  "Test files are formatted into a multi-part http request body"
+  (let ((upgrade-file (make-temp-file "upgrade" nil ".org" "# Become God\n## Upgrade\n\nPenance to Immortality\n\n"))
+        (act-file (make-temp-file "act" nil ".org" "## Act\n\nRule everything\n\n")))
+    (unwind-protect
+        (progn
+          (should
+           (equal
+            (khoj--render-files-as-request-body (list upgrade-file act-file) (list upgrade-file act-file "/tmp/deleted-file.org") "khoj")
+            (format
+            "\n--khoj\r\n\
+Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
+Content-Type: text/org\r\n\r\n\
+# Become God\n\
+## Upgrade\n\n\
+Penance to Immortality\n\n\r
+--khoj\r\n\
+Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
+Content-Type: text/org\r\n\r\n\
+## Act\n\n\
+Rule everything\n\n\r
+--khoj\r\n\
+Content-Disposition: form-data; name=\"files\"; filename=\"%s\"\r\n\
+Content-Type: text/org\r\n\r\n\
+\r
+--khoj--\r\n" upgrade-file act-file "/tmp/deleted-file.org"))))
+      (delete-file upgrade-file)
+      (delete-file act-file))))

 (provide 'khoj-tests)

--- a/src/interface/obsidian/manifest.json
+++ b/src/interface/obsidian/manifest.json
@ -1,7 +1,7 @@
 {
 	"id": "khoj",
 	"name": "Khoj",
-	"version": "0.12.3",
+	"version": "0.13.0",
 	"minAppVersion": "0.15.0",
 	"description": "An Open-Source AI Personal Assistant for your Digital Brain",
 	"author": "Khoj Inc.",
--- a/src/interface/obsidian/package.json
+++ b/src/interface/obsidian/package.json
@ -1,7 +1,9 @@
 {
    "name": "Khoj",
-    "version": "0.12.3",
-    "description": "An AI Personal Assistant for your Digital Brain",
+    "version": "0.13.0",
+    "description": "An AI copilot for your Second Brain",
+    "author": "Debanjum Singh Solanky, Saba Imran <team@khoj.dev>",
+    "license": "GPL-3.0-or-later",
    "main": "src/main.js",
    "scripts": {
        "dev": "node esbuild.config.mjs",
@ -14,8 +16,6 @@
        "AI",
        "assistant"
    ],
-    "author": "Debanjum Singh Solanky",
-    "license": "GPL-3.0-or-later",
    "devDependencies": {
        "@types/node": "^16.11.6",
        "@typescript-eslint/eslint-plugin": "5.29.0",
--- a/src/interface/obsidian/src/main.ts
+++ b/src/interface/obsidian/src/main.ts
@ -1,12 +1,13 @@
-import { Notice, Plugin } from 'obsidian';
+import { Notice, Plugin, TFile } from 'obsidian';
 import { KhojSetting, KhojSettingTab, DEFAULT_SETTINGS } from 'src/settings'
 import { KhojSearchModal } from 'src/search_modal'
 import { KhojChatModal } from 'src/chat_modal'
-import { configureKhojBackend } from './utils';
+import { configureKhojBackend, updateContentIndex } from './utils';


 export default class Khoj extends Plugin {
    settings: KhojSetting;
+    indexingTimer: NodeJS.Timeout;

    async onload() {
        await this.loadSettings();
@ -54,6 +55,15 @@ export default class Khoj extends Plugin {

        // Add a settings tab so the user can configure khoj
        this.addSettingTab(new KhojSettingTab(this.app, this));
+
+        // Add scheduled job to update index every 60 minutes
+        this.indexingTimer = setInterval(async () => {
+            if (this.settings.autoConfigure) {
+                this.settings.lastSyncedFiles = await updateContentIndex(
+                    this.app.vault, this.settings, this.settings.lastSyncedFiles
+                );
+            }
+        }, 60 * 60 * 1000);
    }

    async loadSettings() {
@ -72,4 +82,12 @@ export default class Khoj extends Plugin {
        }
        this.saveData(this.settings);
    }
+
+    async onunload() {
+        // Remove scheduled job to update index at regular cadence
+        if (this.indexingTimer)
+            clearInterval(this.indexingTimer);
+
+        this.unload();
+    }
 }
--- a/src/interface/obsidian/src/settings.ts
+++ b/src/interface/obsidian/src/settings.ts
@ -1,5 +1,6 @@
-import { App, Notice, PluginSettingTab, request, Setting } from 'obsidian';
+import { App, Notice, PluginSettingTab, Setting, TFile } from 'obsidian';
 import Khoj from 'src/main';
+import { updateContentIndex } from './utils';

 export interface KhojSetting {
    enableOfflineChat: boolean;
@ -8,6 +9,7 @@ export interface KhojSetting {
    khojUrl: string;
    connectedToBackend: boolean;
    autoConfigure: boolean;
+    lastSyncedFiles: TFile[];
 }

 export const DEFAULT_SETTINGS: KhojSetting = {
@ -17,6 +19,7 @@ export const DEFAULT_SETTINGS: KhojSetting = {
    connectedToBackend: false,
    autoConfigure: true,
    openaiApiKey: '',
+    lastSyncedFiles: []
 }

 export class KhojSettingTab extends PluginSettingTab {
@ -118,8 +121,9 @@ export class KhojSettingTab extends PluginSettingTab {
                    }, 300);
                    this.plugin.registerInterval(progress_indicator);

-                    await request(`${this.plugin.settings.khojUrl}/api/update?t=markdown&force=true&client=obsidian`);
-                    await request(`${this.plugin.settings.khojUrl}/api/update?t=pdf&force=true&client=obsidian`);
+                    this.plugin.settings.lastSyncedFiles = await updateContentIndex(
+                        this.app.vault, this.plugin.settings, this.plugin.settings.lastSyncedFiles, true
+                    );
                    new Notice('✅ Updated Khoj index.');

                    // Reset button once index is updated
--- a/src/interface/obsidian/src/utils.ts
+++ b/src/interface/obsidian/src/utils.ts
@ -1,4 +1,4 @@
-import { FileSystemAdapter, Notice, RequestUrlParam, request, Vault, Modal } from 'obsidian';
+import { FileSystemAdapter, Notice, RequestUrlParam, request, Vault, Modal, TFile } from 'obsidian';
 import { KhojSetting } from 'src/settings'

 export function getVaultAbsolutePath(vault: Vault): string {
@ -14,18 +14,85 @@ type OpenAIType = null | {
    "api-key": string;
 };

+type OfflineChatType = null | {
+    "chat-model": string;
+    "enable-offline-chat": boolean;
+};
+
 interface ProcessorData {
    conversation: {
      "conversation-logfile": string;
      openai: OpenAIType;
-      "enable-offline-chat": boolean;
+      "offline-chat": OfflineChatType;
+      "tokenizer": null | string;
+      "max-prompt-size": null | number;
    };
 }

+function fileExtensionToMimeType (extension: string): string {
+    switch (extension) {
+        case 'pdf':
+            return 'application/pdf';
+        case 'png':
+            return 'image/png';
+        case 'jpg':
+        case 'jpeg':
+            return 'image/jpeg';
+        case 'md':
+        case 'markdown':
+            return 'text/markdown';
+        case 'org':
+            return 'text/org';
+        default:
+            return 'text/plain';
+    }
+}
+
+export async function updateContentIndex(vault: Vault, setting: KhojSetting, lastSyncedFiles: TFile[], regenerate: boolean = false): Promise<TFile[]> {
+    // Get all markdown, pdf files in the vault
+    console.log(`Khoj: Updating Khoj content index...`)
+    const files = vault.getFiles().filter(file => file.extension === 'md' || file.extension === 'pdf');
+    const binaryFileTypes = ['pdf', 'png', 'jpg', 'jpeg']
+    let countOfFilesToIndex = 0;
+    let countOfFilesToDelete = 0;
+
+    // Add all files to index as multipart form data
+    const formData = new FormData();
+    for (const file of files) {
+        countOfFilesToIndex++;
+        const encoding = binaryFileTypes.includes(file.extension) ? "binary" : "utf8";
+        const mimeType = fileExtensionToMimeType(file.extension) + (encoding === "utf8" ? "; charset=UTF-8" : "");
+        const fileContent = encoding == 'binary' ? await vault.readBinary(file) : await vault.read(file);
+        formData.append('files', new Blob([fileContent], { type: mimeType }), file.path);
+    }
+
+    // Add any previously synced files to be deleted to multipart form data
+    for (const lastSyncedFile of lastSyncedFiles) {
+        if (!files.includes(lastSyncedFile)) {
+            countOfFilesToDelete++;
+            formData.append('files', new Blob([]), lastSyncedFile.path);
+        }
+    }
+
+    // Call Khoj backend to update index with all markdown, pdf files
+    const response = await fetch(`${setting.khojUrl}/api/v1/index/update?force=${regenerate}&client=obsidian`, {
+        method: 'POST',
+        headers: {
+            'x-api-key': 'secret',
+        },
+        body: formData,
+    });
+
+    if (!response.ok) {
+        new Notice(`❗️Failed to update Khoj content index. Ensure Khoj server connected or raise issue on Khoj Discord/Github\nError: ${response.statusText}`);
+    } else {
+        console.log(`✅ Refreshed Khoj content index. Updated: ${countOfFilesToIndex} files, Deleted: ${countOfFilesToDelete} files.`);
+    }
+
+    return files;
+}
+
 export async function configureKhojBackend(vault: Vault, setting: KhojSetting, notify: boolean = true) {
-    let vaultPath = getVaultAbsolutePath(vault);
-    let mdInVault = `${vaultPath}/**/*.md`;
-    let pdfInVault = `${vaultPath}/**/*.pdf`;
    let khojConfigUrl = `${setting.khojUrl}/api/config/data`;

    // Check if khoj backend is configured, note if cannot connect to backend
@ -43,124 +110,33 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
    if (!setting.connectedToBackend) return;

    // Set index name from the path of the current vault
-    let indexName = vaultPath.replace(/\//g, '_').replace(/\\/g, '_').replace(/ /g, '_').replace(/:/g, '_');
    // Get default config fields from khoj backend
    let defaultConfig = await request(`${khojConfigUrl}/default`).then(response => JSON.parse(response));
-    let khojDefaultMdIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["markdown"]["embeddings-file"]);
-    let khojDefaultPdfIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["pdf"]["embeddings-file"]);
    let khojDefaultChatDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["processor"]["conversation"]["conversation-logfile"]);
-    let khojDefaultChatModelName = defaultConfig["processor"]["conversation"]["openai"]["chat-model"];
+    let khojDefaultOpenAIChatModelName = defaultConfig["processor"]["conversation"]["openai"]["chat-model"];
+    let khojDefaultOfflineChatModelName = defaultConfig["processor"]["conversation"]["offline-chat"]["chat-model"];

    // Get current config if khoj backend configured, else get default config from khoj backend
    await request(khoj_already_configured ? khojConfigUrl : `${khojConfigUrl}/default`)
        .then(response => JSON.parse(response))
        .then(data => {
-            khoj_already_configured = data["content-type"] != null;
-            // If khoj backend not configured yet
-            if (!khoj_already_configured) {
-                // Create khoj content-type config with only markdown configured
-                data["content-type"] = {
-                    "markdown": {
-                        "input-filter": [mdInVault],
-                        "input-files": null,
-                        "embeddings-file": `${khojDefaultMdIndexDirectory}/${indexName}.pt`,
-                        "compressed-jsonl": `${khojDefaultMdIndexDirectory}/${indexName}.jsonl.gz`,
-                    }
-                }
-
-                const hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf');
-
-                if (hasPdfFiles) {
-                    data["content-type"]["pdf"] = {
-                        "input-filter": [pdfInVault],
-                        "input-files": null,
-                        "embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
-                        "compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
-                    }
-                }
-            }
-            // Else if khoj config has no markdown content config
-            else if (!data["content-type"]["markdown"]) {
-                // Add markdown config to khoj content-type config
-                // Set markdown config to index markdown files in configured obsidian vault
-                data["content-type"]["markdown"] = {
-                    "input-filter": [mdInVault],
-                    "input-files": null,
-                    "embeddings-file": `${khojDefaultMdIndexDirectory}/${indexName}.pt`,
-                    "compressed-jsonl": `${khojDefaultMdIndexDirectory}/${indexName}.jsonl.gz`,
-                }
-            }
-            // Else if khoj is not configured to index markdown files in configured obsidian vault
-            else if (
-                data["content-type"]["markdown"]["input-files"] != null ||
-                data["content-type"]["markdown"]["input-filter"] == null ||
-                data["content-type"]["markdown"]["input-filter"].length != 1 ||
-                data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) {
-                    // Update markdown config in khoj content-type config
-                    // Set markdown config to only index markdown files in configured obsidian vault
-                    let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
-                    data["content-type"]["markdown"] = {
-                        "input-filter": [mdInVault],
-                        "input-files": null,
-                        "embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`,
-                        "compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`,
-                    }
-            }
-
-            if (khoj_already_configured && !data["content-type"]["pdf"]) {
-                const hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf');
-
-                if (hasPdfFiles) {
-                    data["content-type"]["pdf"] = {
-                        "input-filter": [pdfInVault],
-                        "input-files": null,
-                        "embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
-                        "compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
-                    }
-                } else {
-                    data["content-type"]["pdf"] = null;
-                }
-            }
-            // Else if khoj is not configured to index pdf files in configured obsidian vault
-            else if (khoj_already_configured &&
-                (
-                    data["content-type"]["pdf"]["input-files"] != null ||
-                    data["content-type"]["pdf"]["input-filter"] == null ||
-                    data["content-type"]["pdf"]["input-filter"].length != 1 ||
-                    data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) {
-
-                let hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf');
-
-                if (hasPdfFiles) {
-                    // Update pdf config in khoj content-type config
-                    // Set pdf config to only index pdf files in configured obsidian vault
-                    let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);
-                    data["content-type"]["pdf"] = {
-                        "input-filter": [pdfInVault],
-                        "input-files": null,
-                        "embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`,
-                        "compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`,
-                    }
-                } else {
-                    data["content-type"]["pdf"] = null;
-                }
-            }
-
            let conversationLogFile = data?.["processor"]?.["conversation"]?.["conversation-logfile"] ?? `${khojDefaultChatDirectory}/conversation.json`;
-
            let processorData: ProcessorData = {
                "conversation": {
                    "conversation-logfile": conversationLogFile,
                    "openai": null,
-                    "enable-offline-chat": setting.enableOfflineChat,
+                    "offline-chat": {
+                        "chat-model": khojDefaultOfflineChatModelName,
+                        "enable-offline-chat": setting.enableOfflineChat,
+                    },
+                    "tokenizer": null,
+                    "max-prompt-size": null,
                }
            }

            // If the Open AI API Key was configured in the plugin settings
            if (!!setting.openaiApiKey) {
-
-                let openAIChatModel = data?.["processor"]?.["conversation"]?.["openai"]?.["chat-model"] ?? khojDefaultChatModelName;
-
+                let openAIChatModel = data?.["processor"]?.["conversation"]?.["openai"]?.["chat-model"] ?? khojDefaultOpenAIChatModelName;
                processorData = {
                    "conversation": {
                        "conversation-logfile": conversationLogFile,
@ -168,7 +144,12 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
                            "chat-model": openAIChatModel,
                            "api-key": setting.openaiApiKey,
                        },
-                        "enable-offline-chat": setting.enableOfflineChat,
+                        "offline-chat": {
+                            "chat-model": khojDefaultOfflineChatModelName,
+                            "enable-offline-chat": setting.enableOfflineChat,
+                        },
+                        "tokenizer": null,
+                        "max-prompt-size": null,
                    },
                }
            }
@ -197,12 +178,8 @@ export async function updateKhojBackend(khojUrl: string, khojConfig: Object) {
        method: 'POST',
        contentType: 'application/json',
    };
-
    // Save khojConfig on khoj backend at khojConfigUrl
-    await request(requestContent)
-        // Refresh khoj search index after updating config
-        .then(_ => request(`${khojUrl}/api/update?t=markdown`))
-        .then(_ => request(`${khojUrl}/api/update?t=pdf`));
+    request(requestContent);
 }

 function getIndexDirectoryFromBackendConfig(filepath: string) {
--- a/src/interface/obsidian/versions.json
+++ b/src/interface/obsidian/versions.json
@ -24,5 +24,6 @@
 	"0.12.0": "0.15.0",
 	"0.12.1": "0.15.0",
 	"0.12.2": "0.15.0",
-	"0.12.3": "0.15.0"
+	"0.12.3": "0.15.0",
+	"0.13.0": "0.15.0"
 }
--- a/src/khoj/configure.py
+++ b/src/khoj/configure.py
@ -28,7 +28,7 @@ from khoj.utils.config import (
 )
 from khoj.utils.helpers import resolve_absolute_path, merge_dicts
 from khoj.utils.fs_syncer import collect_files
-from khoj.utils.rawconfig import FullConfig, ProcessorConfig, ConversationProcessorConfig
+from khoj.utils.rawconfig import FullConfig, OfflineChatProcessorConfig, ProcessorConfig, ConversationProcessorConfig
 from khoj.routers.indexer import configure_content, load_content, configure_search


@ -136,7 +136,7 @@ def configure_routes(app):

    app.include_router(api, prefix="/api")
    app.include_router(api_beta, prefix="/api/beta")
-    app.include_router(indexer, prefix="/v1/indexer")
+    app.include_router(indexer, prefix="/api/v1/index")
    app.include_router(web_client)
    app.include_router(auth_router, prefix="/auth")

@ -156,7 +156,7 @@ if not state.demo:
            state.content_index = configure_content(
                state.content_index, state.config.content_type, all_files, state.search_models
            )
-            logger.info("📬 Content index updated via Scheduler")
+            logger.info("📪 Content index updated via Scheduler")
        except Exception as e:
            logger.error(f"🚨 Error updating content index via Scheduler: {e}", exc_info=True)

@ -207,9 +207,7 @@ def configure_conversation_processor(
            conversation_config=ConversationProcessorConfig(
                conversation_logfile=conversation_logfile,
                openai=(conversation_config.openai if (conversation_config is not None) else None),
-                enable_offline_chat=(
-                    conversation_config.enable_offline_chat if (conversation_config is not None) else False
-                ),
+                offline_chat=conversation_config.offline_chat if conversation_config else OfflineChatProcessorConfig(),
            )
        )
    else:
--- a/src/khoj/interface/web/config.html
+++ b/src/khoj/interface/web/config.html
@ -236,7 +236,7 @@
                    </h3>
                </div>
                <div class="card-description-row">
-                <p class="card-description">Setup chat using OpenAI</p>
+                <p class="card-description">Setup online chat using OpenAI</p>
                </div>
                <div class="card-action-row">
                    <a class="card-button" href="/config/processor/conversation/openai">
@ -261,21 +261,21 @@
                    <img class="card-icon" src="/static/assets/icons/chat.svg" alt="Chat">
                    <h3 class="card-title">
                        Offline Chat
-                        <img id="configured-icon-conversation-enable-offline-chat" class="configured-icon {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat and current_model_state.conversation_gpt4all %}enabled{% else %}disabled{% endif %}" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
-                        {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat and not current_model_state.conversation_gpt4all %}
+                        <img id="configured-icon-conversation-enable-offline-chat" class="configured-icon {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat and current_model_state.conversation_gpt4all %}enabled{% else %}disabled{% endif %}" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
+                        {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat and not current_model_state.conversation_gpt4all %}
                            <img id="misconfigured-icon-conversation-enable-offline-chat" class="configured-icon" src="/static/assets/icons/question-mark-icon.svg" alt="Not Configured" title="The model was not downloaded as expected.">
                        {% endif %}
                    </h3>
                </div>
                <div class="card-description-row">
-                <p class="card-description">Setup offline chat (Llama V2)</p>
+                <p class="card-description">Setup offline chat</p>
                </div>
-                <div id="clear-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat %}enabled{% else %}disabled{% endif %}">
+                <div id="clear-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat %}enabled{% else %}disabled{% endif %}">
                    <button class="card-button" onclick="toggleEnableLocalLLLM(false)">
                        Disable
                    </button>
                </div>
-                <div id="set-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat %}disabled{% else %}enabled{% endif %}">
+                <div id="set-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat %}disabled{% else %}enabled{% endif %}">
                    <button class="card-button happy" onclick="toggleEnableLocalLLLM(true)">
                        Enable
                    </button>
@ -346,7 +346,7 @@
            featuresHintText.classList.add("show");
        }

-        fetch('/api/config/data/processor/conversation/enable_offline_chat' + '?enable_offline_chat=' + enable, {
+        fetch('/api/config/data/processor/conversation/offline_chat' + '?enable_offline_chat=' + enable, {
            method: 'POST',
            headers: {
                'Content-Type': 'application/json',
--- a/src/khoj/interface/web/content_type_input.html
+++ b/src/khoj/interface/web/content_type_input.html
@ -34,7 +34,7 @@
                            <input type="text" id="input-filter" name="input-filter" placeholder="~/Documents/{{content_type}}">
                        {% else %}
                            {% for input_filter in current_config['input_filter'] %}
-                                <input type="text" id="input-filter" name="input-filter" placeholder="~/Documents/{{content_type}}" value="{{ input_filter.split('/*')[0] }}">
+                                <input type="text" id="input-filter" name="input-filter" placeholder="~/Documents/{{content_type}}" value="{{ input_filter }}">
                            {% endfor %}
                        {% endif %}
                    </td>
@ -106,17 +106,18 @@

    submit.addEventListener("click", function(event) {
        event.preventDefault();
-        let globFormat = "**/*."
+        let globFormat = "**/*"
        let suffixes = [];
        if ('{{content_type}}' == "markdown")
-            suffixes = ["md", "markdown"]
+            suffixes = [".md", ".markdown"]
        else if ('{{content_type}}' == "org")
-            suffixes = ["org"]
+            suffixes = [".org"]
        else if ('{{content_type}}' === "pdf")
-            suffixes = ["pdf"]
+            suffixes = [".pdf"]
        else if ('{{content_type}}' === "plaintext")
-            suffixes = ['*']
+            suffixes = ['.*']

+        let globs = suffixes.map(x => `${globFormat}${x}`)
        var inputFileNodes = document.getElementsByName("input-files");
        var inputFiles = getValidInputNodes(inputFileNodes).map(node => node.value);

@ -124,10 +125,19 @@

        var inputFilter = [];
        var nodes = getValidInputNodes(inputFilterNodes);
+
+        // A regex that checks for globs in the path.  If they exist,
+        // we are going to just not add our own globing.  If they don't,
+        // then we will assume globbing should be done.
+        const glob_regex = /([*?\[\]])/;
        if (nodes.length > 0) {
            for (var i = 0; i < nodes.length; i++) {
-                for (var j = 0; j < suffixes.length; j++) {
-                    inputFilter.push(nodes[i].value + globFormat + suffixes[j]);
+                for (var j = 0; j < globs.length; j++) {
+                    if (glob_regex.test(nodes[i].value)) {
+                        inputFilter.push(nodes[i].value);
+                    } else {
+                        inputFilter.push(nodes[i].value + globs[j]);
+                    }
                }
            }
        }
--- a/src/khoj/migrations/migrate_offline_chat_schema.py
+++ b/src/khoj/migrations/migrate_offline_chat_schema.py
@ -0,0 +1,83 @@
+"""
+Current format of khoj.yml
+---
+app:
+    ...
+content-type:
+    ...
+processor:
+  conversation:
+    enable-offline-chat: false
+    conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
+    openai:
+        ...
+search-type:
+    ...
+
+New format of khoj.yml
+---
+app:
+    ...
+content-type:
+    ...
+processor:
+  conversation:
+    offline-chat:
+        enable-offline-chat: false
+        chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
+    tokenizer: null
+    max_prompt_size: null
+    conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
+    openai:
+        ...
+search-type:
+    ...
+"""
+import logging
+from packaging import version
+
+from khoj.utils.yaml import load_config_from_file, save_config_to_file
+
+
+logger = logging.getLogger(__name__)
+
+
+def migrate_offline_chat_schema(args):
+    schema_version = "0.12.3"
+    raw_config = load_config_from_file(args.config_file)
+    previous_version = raw_config.get("version")
+
+    if "processor" not in raw_config:
+        return args
+    if raw_config["processor"] is None:
+        return args
+    if "conversation" not in raw_config["processor"]:
+        return args
+
+    if previous_version is None or version.parse(previous_version) < version.parse("0.12.3"):
+        logger.info(
+            f"Upgrading config schema to {schema_version} from {previous_version} to make (offline) chat more configuration"
+        )
+        raw_config["version"] = schema_version
+
+        # Create max-prompt-size field in conversation processor schema
+        raw_config["processor"]["conversation"]["max-prompt-size"] = None
+        raw_config["processor"]["conversation"]["tokenizer"] = None
+
+        # Create offline chat schema based on existing enable_offline_chat field in khoj config schema
+        offline_chat_model = (
+            raw_config["processor"]["conversation"]
+            .get("offline-chat", {})
+            .get("chat-model", "llama-2-7b-chat.ggmlv3.q4_0.bin")
+        )
+        raw_config["processor"]["conversation"]["offline-chat"] = {
+            "enable-offline-chat": raw_config["processor"]["conversation"].get("enable-offline-chat", False),
+            "chat-model": offline_chat_model,
+        }
+
+        # Delete old enable-offline-chat field from conversation processor schema
+        if "enable-offline-chat" in raw_config["processor"]["conversation"]:
+            del raw_config["processor"]["conversation"]["enable-offline-chat"]
+
+        save_config_to_file(raw_config, args.config_file)
+    return args
--- a/src/khoj/processor/conversation/gpt4all/chat_model.py
+++ b/src/khoj/processor/conversation/gpt4all/chat_model.py
@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)

 def extract_questions_offline(
    text: str,
-    model: str = "llama-2-7b-chat.ggmlv3.q4_K_S.bin",
+    model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin",
    loaded_model: Union[Any, None] = None,
    conversation_log={},
    use_history: bool = True,
@ -113,7 +113,7 @@ def filter_questions(questions: List[str]):
    ]
    filtered_questions = []
    for q in questions:
-        if not any([word in q.lower() for word in hint_words]):
+        if not any([word in q.lower() for word in hint_words]) and not is_none_or_empty(q):
            filtered_questions.append(q)

    return filtered_questions
@ -123,10 +123,12 @@ def converse_offline(
    references,
    user_query,
    conversation_log={},
-    model: str = "llama-2-7b-chat.ggmlv3.q4_K_S.bin",
+    model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin",
    loaded_model: Union[Any, None] = None,
    completion_func=None,
    conversation_command=ConversationCommand.Default,
+    max_prompt_size=None,
+    tokenizer_name=None,
 ) -> Union[ThreadedGenerator, Iterator[str]]:
    """
    Converse with user using Llama
@ -158,6 +160,8 @@ def converse_offline(
        prompts.system_prompt_message_llamav2,
        conversation_log,
        model_name=model,
+        max_prompt_size=max_prompt_size,
+        tokenizer_name=tokenizer_name,
    )

    g = ThreadedGenerator(references, completion_func=completion_func)
--- a/src/khoj/processor/conversation/gpt4all/model_metadata.py
+++ b/src/khoj/processor/conversation/gpt4all/model_metadata.py
@ -1,3 +0,0 @@
-model_name_to_url = {
-    "llama-2-7b-chat.ggmlv3.q4_K_S.bin": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_K_S.bin"
-}
--- a/src/khoj/processor/conversation/gpt4all/utils.py
+++ b/src/khoj/processor/conversation/gpt4all/utils.py
@ -1,24 +1,8 @@
-import os
 import logging
-import requests
-import hashlib

-from tqdm import tqdm
-
-from khoj.processor.conversation.gpt4all import model_metadata

 logger = logging.getLogger(__name__)

-expected_checksum = {"llama-2-7b-chat.ggmlv3.q4_K_S.bin": "cfa87b15d92fb15a2d7c354b0098578b"}
-
-
-def get_md5_checksum(filename: str):
-    hash_md5 = hashlib.md5()
-    with open(filename, "rb") as f:
-        for chunk in iter(lambda: f.read(8192), b""):
-            hash_md5.update(chunk)
-    return hash_md5.hexdigest()
-

 def download_model(model_name: str):
    try:
@ -27,57 +11,12 @@ def download_model(model_name: str):
        logger.info("There was an error importing GPT4All. Please run pip install gpt4all in order to install it.")
        raise e

-    url = model_metadata.model_name_to_url.get(model_name)
-    model_path = os.path.expanduser(f"~/.cache/gpt4all/")
-    if not url:
-        logger.debug(f"Model {model_name} not found in model metadata. Skipping download.")
-        return GPT4All(model_name=model_name, model_path=model_path)
-
-    filename = os.path.expanduser(f"~/.cache/gpt4all/{model_name}")
-    if os.path.exists(filename):
-        # Check if the user is connected to the internet
-        try:
-            requests.get("https://www.google.com/", timeout=5)
-        except:
-            logger.debug("User is offline. Disabling allowed download flag")
-            return GPT4All(model_name=model_name, model_path=model_path, allow_download=False)
-        return GPT4All(model_name=model_name, model_path=model_path)
-
-    # Download the model to a tmp file. Once the download is completed, move the tmp file to the actual file
-    tmp_filename = filename + ".tmp"
-
+    # Use GPU for Chat Model, if available
    try:
-        os.makedirs(os.path.dirname(tmp_filename), exist_ok=True)
-        logger.debug(f"Downloading model {model_name} from {url} to {filename}...")
-        with requests.get(url, stream=True) as r:
-            r.raise_for_status()
-            total_size = int(r.headers.get("content-length", 0))
-            with open(tmp_filename, "wb") as f, tqdm(
-                unit="B",  # unit string to be displayed.
-                unit_scale=True,  # let tqdm to determine the scale in kilo, mega..etc.
-                unit_divisor=1024,  # is used when unit_scale is true
-                total=total_size,  # the total iteration.
-                desc=model_name,  # prefix to be displayed on progress bar.
-            ) as progress_bar:
-                for chunk in r.iter_content(chunk_size=8192):
-                    f.write(chunk)
-                    progress_bar.update(len(chunk))
+        model = GPT4All(model_name=model_name, device="gpu")
+        logger.debug("Loaded chat model to GPU.")
+    except ValueError:
+        model = GPT4All(model_name=model_name)
+        logger.debug("Loaded chat model to CPU.")

-        # Verify the checksum
-        if expected_checksum.get(model_name) != get_md5_checksum(tmp_filename):
-            logger.error(
-                f"Checksum verification failed for {filename}. Removing the tmp file. Offline model will not be available."
-            )
-            os.remove(tmp_filename)
-            raise ValueError(f"Checksum verification failed for downloading {model_name} from {url}.")
-
-        # Move the tmp file to the actual file
-        os.rename(tmp_filename, filename)
-        logger.debug(f"Successfully downloaded model {model_name} from {url} to {filename}")
-        return GPT4All(model_name)
-    except Exception as e:
-        logger.error(f"Failed to download model {model_name} from {url} to {filename}. Error: {e}", exc_info=True)
-        # Remove the tmp file if it exists
-        if os.path.exists(tmp_filename):
-            os.remove(tmp_filename)
-        return None
+    return model
--- a/src/khoj/processor/conversation/openai/gpt.py
+++ b/src/khoj/processor/conversation/openai/gpt.py
@ -116,6 +116,8 @@ def converse(
    temperature: float = 0.2,
    completion_func=None,
    conversation_command=ConversationCommand.Default,
+    max_prompt_size=None,
+    tokenizer_name=None,
 ):
    """
    Converse with user using OpenAI's ChatGPT
@ -141,6 +143,8 @@ def converse(
        prompts.personality.format(),
        conversation_log,
        model,
+        max_prompt_size,
+        tokenizer_name,
    )
    truncated_messages = "\n".join({f"{message.content[:40]}..." for message in messages})
    logger.debug(f"Conversation Context for GPT: {truncated_messages}")
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@ -23,7 +23,7 @@ no_notes_found = PromptTemplate.from_template(
    """.strip()
 )

-system_prompt_message_llamav2 = f"""You are Khoj, a friendly, smart and helpful personal assistant.
+system_prompt_message_llamav2 = f"""You are Khoj, a smart, inquisitive and helpful personal assistant.
 Using your general knowledge and our past conversations as context, answer the following question.
 If you do not know the answer, say 'I don't know.'"""

@ -51,13 +51,13 @@ extract_questions_system_prompt_llamav2 = PromptTemplate.from_template(

 general_conversation_llamav2 = PromptTemplate.from_template(
    """
-<s>[INST]{query}[/INST]
+<s>[INST] {query} [/INST]
 """.strip()
 )

 chat_history_llamav2_from_user = PromptTemplate.from_template(
    """
-<s>[INST]{message}[/INST]
+<s>[INST] {message} [/INST]
 """.strip()
 )

@ -69,7 +69,7 @@ chat_history_llamav2_from_assistant = PromptTemplate.from_template(

 conversation_llamav2 = PromptTemplate.from_template(
    """
-<s>[INST]{query}[/INST]
+<s>[INST] {query} [/INST]
 """.strip()
 )

@ -91,7 +91,7 @@ Question: {query}

 notes_conversation_llamav2 = PromptTemplate.from_template(
    """
-Notes:
+User's Notes:
 {references}
 Question: {query}
 """.strip()
@ -134,19 +134,25 @@ Answer (in second person):"""

 extract_questions_llamav2_sample = PromptTemplate.from_template(
    """
-<s>[INST]<<SYS>>Current Date: {current_date}<</SYS>>[/INST]</s>
-<s>[INST]How was my trip to Cambodia?[/INST][]</s>
-<s>[INST]Who did I visit the temple with on that trip?[/INST]Who did I visit the temple with in Cambodia?</s>
-<s>[INST]How should I take care of my plants?[/INST]What kind of plants do I have? What issues do my plants have?</s>
-<s>[INST]How many tennis balls fit in the back of a 2002 Honda Civic?[/INST]What is the size of a tennis ball? What is the trunk size of a 2002 Honda Civic?</s>
-<s>[INST]What did I do for Christmas last year?[/INST]What did I do for Christmas {last_year} dt>='{last_christmas_date}' dt<'{next_christmas_date}'</s>
-<s>[INST]How are you feeling today?[/INST]</s>
-<s>[INST]Is Alice older than Bob?[/INST]When was Alice born? What is Bob's age?</s>
-<s>[INST]<<SYS>>
+<s>[INST] <<SYS>>Current Date: {current_date}<</SYS>> [/INST]</s>
+<s>[INST] How was my trip to Cambodia? [/INST]
+How was my trip to Cambodia?</s>
+<s>[INST] Who did I visit the temple with on that trip? [/INST]
+Who did I visit the temple with in Cambodia?</s>
+<s>[INST] How should I take care of my plants? [/INST]
+What kind of plants do I have? What issues do my plants have?</s>
+<s>[INST] How many tennis balls fit in the back of a 2002 Honda Civic? [/INST]
+What is the size of a tennis ball? What is the trunk size of a 2002 Honda Civic?</s>
+<s>[INST] What did I do for Christmas last year? [/INST]
+What did I do for Christmas {last_year} dt>='{last_christmas_date}' dt<'{next_christmas_date}'</s>
+<s>[INST] How are you feeling today? [/INST]</s>
+<s>[INST] Is Alice older than Bob? [/INST]
+When was Alice born? What is Bob's age?</s>
+<s>[INST] <<SYS>>
 Use these notes from the user's previous conversations to provide a response:
 {chat_history}
-<</SYS>>[/INST]</s>
-<s>[INST]{query}[/INST]
+<</SYS>> [/INST]</s>
+<s>[INST] {query} [/INST]
 """
 )

--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@ -3,24 +3,27 @@ import logging
 from time import perf_counter
 import json
 from datetime import datetime
+import queue
 import tiktoken

 # External packages
 from langchain.schema import ChatMessage
-from transformers import LlamaTokenizerFast
+from transformers import AutoTokenizer

 # Internal Packages
-import queue
 from khoj.utils.helpers import merge_dicts

+
 logger = logging.getLogger(__name__)
-max_prompt_size = {
+model_to_prompt_size = {
    "gpt-3.5-turbo": 4096,
    "gpt-4": 8192,
-    "llama-2-7b-chat.ggmlv3.q4_K_S.bin": 1548,
+    "llama-2-7b-chat.ggmlv3.q4_0.bin": 1548,
    "gpt-3.5-turbo-16k": 15000,
 }
-tokenizer = {"llama-2-7b-chat.ggmlv3.q4_K_S.bin": "hf-internal-testing/llama-tokenizer"}
+model_to_tokenizer = {
+    "llama-2-7b-chat.ggmlv3.q4_0.bin": "hf-internal-testing/llama-tokenizer",
+}


 class ThreadedGenerator:
@ -82,9 +85,26 @@ def message_to_log(


 def generate_chatml_messages_with_context(
-    user_message, system_message, conversation_log={}, model_name="gpt-3.5-turbo", lookback_turns=2
+    user_message,
+    system_message,
+    conversation_log={},
+    model_name="gpt-3.5-turbo",
+    max_prompt_size=None,
+    tokenizer_name=None,
 ):
    """Generate messages for ChatGPT with context from previous conversation"""
+    # Set max prompt size from user config, pre-configured for model or to default prompt size
+    try:
+        max_prompt_size = max_prompt_size or model_to_prompt_size[model_name]
+    except:
+        max_prompt_size = 2000
+        logger.warning(
+            f"Fallback to default prompt size: {max_prompt_size}.\nConfigure max_prompt_size for unsupported model: {model_name} in Khoj settings to longer context window."
+        )
+
+    # Scale lookback turns proportional to max prompt size supported by model
+    lookback_turns = max_prompt_size // 750
+
    # Extract Chat History for Context
    chat_logs = []
    for chat in conversation_log.get("chat", []):
@ -105,19 +125,28 @@ def generate_chatml_messages_with_context(
    messages = user_chatml_message + rest_backnforths + system_chatml_message

    # Truncate oldest messages from conversation history until under max supported prompt size by model
-    messages = truncate_messages(messages, max_prompt_size[model_name], model_name)
+    messages = truncate_messages(messages, max_prompt_size, model_name, tokenizer_name)

    # Return message in chronological order
    return messages[::-1]


-def truncate_messages(messages: list[ChatMessage], max_prompt_size, model_name) -> list[ChatMessage]:
+def truncate_messages(
+    messages: list[ChatMessage], max_prompt_size, model_name: str, tokenizer_name=None
+) -> list[ChatMessage]:
    """Truncate messages to fit within max prompt size supported by model"""

-    if "llama" in model_name:
-        encoder = LlamaTokenizerFast.from_pretrained(tokenizer[model_name])
-    else:
-        encoder = tiktoken.encoding_for_model(model_name)
+    try:
+        if model_name.startswith("gpt-"):
+            encoder = tiktoken.encoding_for_model(model_name)
+        else:
+            encoder = AutoTokenizer.from_pretrained(tokenizer_name or model_to_tokenizer[model_name])
+    except:
+        default_tokenizer = "hf-internal-testing/llama-tokenizer"
+        encoder = AutoTokenizer.from_pretrained(default_tokenizer)
+        logger.warning(
+            f"Fallback to default chat model tokenizer: {default_tokenizer}.\nConfigure tokenizer for unsupported model: {model_name} in Khoj settings to improve context stuffing."
+        )

    system_message = messages.pop()
    system_message_tokens = len(encoder.encode(system_message.content))
--- a/src/khoj/processor/pdf/pdf_to_jsonl.py
+++ b/src/khoj/processor/pdf/pdf_to_jsonl.py
@ -65,7 +65,7 @@ class PdfToJsonl(TextToJsonl):
                # Write the PDF file to a temporary file, as it is stored in byte format in the pdf_file object and the PyPDFLoader expects a file path
                tmp_file = f"tmp_pdf_file.pdf"
                with open(f"{tmp_file}", "wb") as f:
-                    bytes = base64.b64decode(pdf_files[pdf_file])
+                    bytes = pdf_files[pdf_file]
                    f.write(bytes)
                loader = PyMuPDFLoader(f"{tmp_file}")
                pdf_entries_per_file = [page.page_content for page in loader.load()]
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@ -30,6 +30,7 @@ from khoj.utils.rawconfig import (
    GithubContentConfig,
    NotionContentConfig,
    ConversationProcessorConfig,
+    OfflineChatProcessorConfig,
 )
 from khoj.utils.helpers import resolve_absolute_path
 from khoj.utils.state import SearchType
@ -185,6 +186,10 @@ if not state.demo:
            state.content_index.markdown = None
        elif content_type == "org":
            state.content_index.org = None
+        elif content_type == "plaintext":
+            state.content_index.plaintext = None
+        else:
+            logger.warning(f"Request to delete unknown content type: {content_type} via API")

        try:
            save_config_to_file_updated_state()
@ -284,10 +289,11 @@ if not state.demo:
        except Exception as e:
            return {"status": "error", "message": str(e)}

-    @api.post("/config/data/processor/conversation/enable_offline_chat", status_code=200)
+    @api.post("/config/data/processor/conversation/offline_chat", status_code=200)
    async def set_processor_enable_offline_chat_config_data(
        request: Request,
        enable_offline_chat: bool,
+        offline_chat_model: Optional[str] = None,
        client: Optional[str] = None,
    ):
        _initialize_config()
@ -301,7 +307,12 @@ if not state.demo:
            state.config.processor = ProcessorConfig(conversation=ConversationProcessorConfig(conversation_logfile=conversation_logfile))  # type: ignore

        assert state.config.processor.conversation is not None
-        state.config.processor.conversation.enable_offline_chat = enable_offline_chat
+        if state.config.processor.conversation.offline_chat is None:
+            state.config.processor.conversation.offline_chat = OfflineChatProcessorConfig()
+
+        state.config.processor.conversation.offline_chat.enable_offline_chat = enable_offline_chat
+        if offline_chat_model is not None:
+            state.config.processor.conversation.offline_chat.chat_model = offline_chat_model
        state.processor_config = configure_processor(state.config.processor, state.processor_config)

        update_telemetry_state(
@ -322,7 +333,7 @@ if not state.demo:
 # Create Routes
@api.get("/config/data/default")
 def get_default_config_data():
-    return constants.default_config
+    return constants.empty_config


@api.get("/config/types", response_model=List[str])
@ -387,7 +398,7 @@ async def search(
    # Encode query with filter terms removed
    defiltered_query = user_query
    for filter in [DateFilter(), WordFilter(), FileFilter()]:
-        defiltered_query = filter.defilter(user_query)
+        defiltered_query = filter.defilter(defiltered_query)

    encoded_asymmetric_query = None
    if t == SearchType.All or t != SearchType.Image:
@ -622,7 +633,7 @@ def update(
        if state.processor_config:
            components.append("Conversation processor")
        components_msg = ", ".join(components)
-        logger.info(f"📬 {components_msg} updated via API")
+        logger.info(f"📪 {components_msg} updated via API")

    update_telemetry_state(
        request=request,
@ -702,12 +713,18 @@ async def chat(
 ) -> Response:
    perform_chat_checks()
    conversation_command = get_conversation_command(query=q, any_references=True)
+
+    q = q.replace(f"/{conversation_command.value}", "").strip()
+
    compiled_references, inferred_queries, defiltered_query = await extract_references_and_questions(
        request, q, (n or 5), conversation_command
    )
-    conversation_command = get_conversation_command(query=q, any_references=not is_none_or_empty(compiled_references))
+
+    if conversation_command == ConversationCommand.Default and is_none_or_empty(compiled_references):
+        conversation_command = ConversationCommand.General
+
    if conversation_command == ConversationCommand.Help:
-        model_type = "offline" if state.processor_config.conversation.enable_offline_chat else "openai"
+        model_type = "offline" if state.processor_config.conversation.offline_chat.enable_offline_chat else "openai"
        formatted_help = help_message.format(model=model_type, version=state.khoj_version)
        return StreamingResponse(iter([formatted_help]), media_type="text/event-stream", status_code=200)

@ -768,23 +785,21 @@ async def extract_references_and_questions(
        logger.warning(
            "No content index loaded, so cannot extract references from knowledge base. Please configure your data sources and update the index to chat with your notes."
        )
-        return compiled_references, inferred_queries
+        return compiled_references, inferred_queries, q

    if conversation_type == ConversationCommand.General:
        return compiled_references, inferred_queries, q

    # Extract filter terms from user message
    defiltered_query = q
-    filter_terms = []
    for filter in [DateFilter(), WordFilter(), FileFilter()]:
-        filter_terms += filter.get_filter_terms(q)
-        defiltered_query = filter.defilter(q)
-    filters_in_query = " ".join(filter_terms)
+        defiltered_query = filter.defilter(defiltered_query)
+    filters_in_query = q.replace(defiltered_query, "").strip()

    # Infer search queries from user message
    with timer("Extracting search queries took", logger):
        # If we've reached here, either the user has enabled offline chat or the openai model is enabled.
-        if state.processor_config.conversation.enable_offline_chat:
+        if state.processor_config.conversation.offline_chat.enable_offline_chat:
            loaded_model = state.processor_config.conversation.gpt4all_model.loaded_model
            inferred_queries = extract_questions_offline(
                defiltered_query, loaded_model=loaded_model, conversation_log=meta_log, should_extract_questions=False
@ -800,7 +815,7 @@ async def extract_references_and_questions(
    with timer("Searching knowledge base took", logger):
        result_list = []
        for query in inferred_queries:
-            n_items = min(n, 3) if state.processor_config.conversation.enable_offline_chat else n
+            n_items = min(n, 3) if state.processor_config.conversation.offline_chat.enable_offline_chat else n
            result_list.extend(
                await search(
                    f"{query} {filters_in_query}",
--- a/src/khoj/routers/helpers.py
+++ b/src/khoj/routers/helpers.py
@ -113,7 +113,7 @@ def generate_chat_response(
            meta_log=meta_log,
        )

-        if state.processor_config.conversation.enable_offline_chat:
+        if state.processor_config.conversation.offline_chat.enable_offline_chat:
            loaded_model = state.processor_config.conversation.gpt4all_model.loaded_model
            chat_response = converse_offline(
                references=compiled_references,
@ -122,6 +122,9 @@ def generate_chat_response(
                conversation_log=meta_log,
                completion_func=partial_completion,
                conversation_command=conversation_command,
+                model=state.processor_config.conversation.offline_chat.chat_model,
+                max_prompt_size=state.processor_config.conversation.max_prompt_size,
+                tokenizer_name=state.processor_config.conversation.tokenizer,
            )

        elif state.processor_config.conversation.openai_model:
@ -135,6 +138,8 @@ def generate_chat_response(
                api_key=api_key,
                completion_func=partial_completion,
                conversation_command=conversation_command,
+                max_prompt_size=state.processor_config.conversation.max_prompt_size,
+                tokenizer_name=state.processor_config.conversation.tokenizer,
            )

    except Exception as e:
--- a/src/khoj/routers/indexer.py
+++ b/src/khoj/routers/indexer.py
@ -1,11 +1,11 @@
 # Standard Packages
 import logging
-import sys
 from typing import Optional, Union, Dict

 # External Packages
-from fastapi import APIRouter, HTTPException, Header, Request, Body, Response
+from fastapi import APIRouter, HTTPException, Header, Request, Response, UploadFile
 from pydantic import BaseModel
+from khoj.routers.helpers import update_telemetry_state

 # Internal Packages
 from khoj.utils import state, constants
@ -56,42 +56,30 @@ class IndexerInput(BaseModel):
    plaintext: Optional[dict[str, str]] = None


-@indexer.post("/batch")
-async def index_batch(
+@indexer.post("/update")
+async def update(
    request: Request,
+    files: list[UploadFile],
    x_api_key: str = Header(None),
-    regenerate: bool = False,
-    search_type: Optional[Union[state.SearchType, str]] = None,
+    force: bool = False,
+    t: Optional[Union[state.SearchType, str]] = None,
+    client: Optional[str] = None,
+    user_agent: Optional[str] = Header(None),
+    referer: Optional[str] = Header(None),
+    host: Optional[str] = Header(None),
 ):
    if x_api_key != "secret":
        raise HTTPException(status_code=401, detail="Invalid API Key")
    state.config_lock.acquire()
    try:
-        logger.info(f"Received batch indexing request")
-        index_batch_request_acc = b""
-        async for chunk in request.stream():
-            index_batch_request_acc += chunk
-        data_bytes = sys.getsizeof(index_batch_request_acc)
-        unit = "KB"
-        data_size = data_bytes / 1024
-        if data_size > 1000:
-            unit = "MB"
-            data_size = data_size / 1024
-        if data_size > 1000:
-            unit = "GB"
-            data_size = data_size / 1024
-        data_size_metric = f"{data_size:.2f} {unit}"
-        logger.info(f"Received {data_size_metric} of data")
-        index_batch_request = IndexBatchRequest.parse_raw(index_batch_request_acc)
-        logger.info(f"Received {len(index_batch_request.files)} files")
-
+        logger.info(f"📬 Updating content index via API call by {client} client")
        org_files: Dict[str, str] = {}
        markdown_files: Dict[str, str] = {}
        pdf_files: Dict[str, str] = {}
        plaintext_files: Dict[str, str] = {}

-        for file in index_batch_request.files:
-            file_type = get_file_type(file.path)
+        for file in files:
+            file_type, encoding = get_file_type(file.content_type)
            dict_to_update = None
            if file_type == "org":
                dict_to_update = org_files
@ -103,9 +91,11 @@ async def index_batch(
                dict_to_update = plaintext_files

            if dict_to_update is not None:
-                dict_to_update[file.path] = file.content
+                dict_to_update[file.filename] = (
+                    file.file.read().decode("utf-8") if encoding == "utf-8" else file.file.read()
+                )
            else:
-                logger.info(f"Skipping unsupported streamed file: {file.path}")
+                logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file.filename}")

        indexer_input = IndexerInput(
            org=org_files,
@ -115,7 +105,7 @@ async def index_batch(
        )

        if state.config == None:
-            logger.info("First run, initializing state.")
+            logger.info("📬 Initializing content index on first run.")
            default_full_config = FullConfig(
                content_type=None,
                search_type=SearchConfig.parse_obj(constants.default_config["search-type"]),
@ -142,15 +132,30 @@ async def index_batch(
            state.config.content_type,
            indexer_input.dict(),
            state.search_models,
-            regenerate=regenerate,
-            t=search_type,
+            regenerate=force,
+            t=t,
            full_corpus=False,
        )

    except Exception as e:
-        logger.error(f"Failed to process batch indexing request: {e}", exc_info=True)
+        logger.error(
+            f"🚨 Failed to {force} update {t} content index triggered via API call by {client} client: {e}",
+            exc_info=True,
+        )
    finally:
        state.config_lock.release()
+
+    update_telemetry_state(
+        request=request,
+        telemetry_type="api",
+        api="index/update",
+        client=client,
+        user_agent=user_agent,
+        referer=referer,
+        host=host,
+    )
+
+    logger.info(f"📪 Content index updated via API call by {client} client")
    return Response(content="OK", status_code=200)


--- a/src/khoj/utils/cli.py
+++ b/src/khoj/utils/cli.py
@ -9,6 +9,7 @@ from khoj.utils.yaml import parse_config_from_file
 from khoj.migrations.migrate_version import migrate_config_to_version
 from khoj.migrations.migrate_processor_config_openai import migrate_processor_conversation_schema
 from khoj.migrations.migrate_offline_model import migrate_offline_model
+from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema


 def cli(args=None):
@ -55,7 +56,12 @@ def cli(args=None):


 def run_migrations(args):
-    migrations = [migrate_config_to_version, migrate_processor_conversation_schema, migrate_offline_model]
+    migrations = [
+        migrate_config_to_version,
+        migrate_processor_conversation_schema,
+        migrate_offline_model,
+        migrate_offline_chat_schema,
+    ]
    for migration in migrations:
        args = migration(args)
    return args
--- a/src/khoj/utils/config.py
+++ b/src/khoj/utils/config.py
@ -12,6 +12,8 @@ from khoj.processor.conversation.gpt4all.utils import download_model
 # External Packages
 import torch

+from khoj.utils.rawconfig import OfflineChatProcessorConfig
+
 logger = logging.getLogger(__name__)

 # Internal Packages
@ -84,7 +86,6 @@ class SearchModels:

@dataclass
 class GPT4AllProcessorConfig:
-    chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_K_S.bin"
    loaded_model: Union[Any, None] = None


@ -95,18 +96,20 @@ class ConversationProcessorConfigModel:
    ):
        self.openai_model = conversation_config.openai
        self.gpt4all_model = GPT4AllProcessorConfig()
-        self.enable_offline_chat = conversation_config.enable_offline_chat
+        self.offline_chat = conversation_config.offline_chat or OfflineChatProcessorConfig()
+        self.max_prompt_size = conversation_config.max_prompt_size
+        self.tokenizer = conversation_config.tokenizer
        self.conversation_logfile = Path(conversation_config.conversation_logfile)
        self.chat_session: List[str] = []
        self.meta_log: dict = {}

-        if self.enable_offline_chat:
+        if self.offline_chat.enable_offline_chat:
            try:
-                self.gpt4all_model.loaded_model = download_model(self.gpt4all_model.chat_model)
-            except ValueError as e:
+                self.gpt4all_model.loaded_model = download_model(self.offline_chat.chat_model)
+            except Exception as e:
+                self.offline_chat.enable_offline_chat = False
                self.gpt4all_model.loaded_model = None
                logger.error(f"Error while loading offline chat model: {e}", exc_info=True)
-                self.enable_offline_chat = False
        else:
            self.gpt4all_model.loaded_model = None

--- a/src/khoj/utils/constants.py
+++ b/src/khoj/utils/constants.py
@ -6,6 +6,64 @@ empty_escape_sequences = "\n|\r|\t| "
 app_env_filepath = "~/.khoj/env"
 telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"

+empty_config = {
+    "content-type": {
+        "org": {
+            "input-files": None,
+            "input-filter": None,
+            "compressed-jsonl": "~/.khoj/content/org/org.jsonl.gz",
+            "embeddings-file": "~/.khoj/content/org/org_embeddings.pt",
+            "index-heading-entries": False,
+        },
+        "markdown": {
+            "input-files": None,
+            "input-filter": None,
+            "compressed-jsonl": "~/.khoj/content/markdown/markdown.jsonl.gz",
+            "embeddings-file": "~/.khoj/content/markdown/markdown_embeddings.pt",
+        },
+        "pdf": {
+            "input-files": None,
+            "input-filter": None,
+            "compressed-jsonl": "~/.khoj/content/pdf/pdf.jsonl.gz",
+            "embeddings-file": "~/.khoj/content/pdf/pdf_embeddings.pt",
+        },
+        "plaintext": {
+            "input-files": None,
+            "input-filter": None,
+            "compressed-jsonl": "~/.khoj/content/plaintext/plaintext.jsonl.gz",
+            "embeddings-file": "~/.khoj/content/plaintext/plaintext_embeddings.pt",
+        },
+    },
+    "search-type": {
+        "symmetric": {
+            "encoder": "sentence-transformers/all-MiniLM-L6-v2",
+            "cross-encoder": "cross-encoder/ms-marco-MiniLM-L-6-v2",
+            "model_directory": "~/.khoj/search/symmetric/",
+        },
+        "asymmetric": {
+            "encoder": "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
+            "cross-encoder": "cross-encoder/ms-marco-MiniLM-L-6-v2",
+            "model_directory": "~/.khoj/search/asymmetric/",
+        },
+        "image": {"encoder": "sentence-transformers/clip-ViT-B-32", "model_directory": "~/.khoj/search/image/"},
+    },
+    "processor": {
+        "conversation": {
+            "openai": {
+                "api-key": None,
+                "chat-model": "gpt-3.5-turbo",
+            },
+            "offline-chat": {
+                "enable-offline-chat": False,
+                "chat-model": "llama-2-7b-chat.ggmlv3.q4_0.bin",
+            },
+            "tokenizer": None,
+            "max-prompt-size": None,
+            "conversation-logfile": "~/.khoj/processor/conversation/conversation_logs.json",
+        }
+    },
+}
+
 # default app config to use
 default_config = {
    "content-type": {
@ -72,7 +130,12 @@ default_config = {
                "api-key": None,
                "chat-model": "gpt-3.5-turbo",
            },
-            "enable-offline-chat": False,
+            "offline-chat": {
+                "enable-offline-chat": False,
+                "chat-model": "llama-2-7b-chat.ggmlv3.q4_0.bin",
+            },
+            "tokenizer": None,
+            "max-prompt-size": None,
            "conversation-logfile": "~/.khoj/processor/conversation/conversation_logs.json",
        }
    },
--- a/src/khoj/utils/fs_syncer.py
+++ b/src/khoj/utils/fs_syncer.py
@ -1,6 +1,6 @@
 import logging
 import glob
-import base64
+import os
 from typing import Optional
 from bs4 import BeautifulSoup

@ -39,13 +39,13 @@ def get_plaintext_files(config: TextContentConfig) -> dict[str, str]:
        return soup.get_text(strip=True, separator="\n")

    # Extract required fields from config
-    input_files, input_filter = (
+    input_files, input_filters = (
        config.input_files,
        config.input_filter,
    )

    # Input Validation
-    if is_none_or_empty(input_files) and is_none_or_empty(input_filter):
+    if is_none_or_empty(input_files) and is_none_or_empty(input_filters):
        logger.debug("At least one of input-files or input-file-filter is required to be specified")
        return {}

@ -53,11 +53,12 @@ def get_plaintext_files(config: TextContentConfig) -> dict[str, str]:
    absolute_plaintext_files, filtered_plaintext_files = set(), set()
    if input_files:
        absolute_plaintext_files = {get_absolute_path(jsonl_file) for jsonl_file in input_files}
-    if input_filter:
+    if input_filters:
        filtered_plaintext_files = {
            filtered_file
-            for jsonl_file_filter in input_filter
-            for filtered_file in glob.glob(get_absolute_path(jsonl_file_filter), recursive=True)
+            for plaintext_file_filter in input_filters
+            for filtered_file in glob.glob(get_absolute_path(plaintext_file_filter), recursive=True)
+            if os.path.isfile(filtered_file)
        }

    all_target_files = sorted(absolute_plaintext_files | filtered_plaintext_files)
@ -73,12 +74,12 @@ def get_plaintext_files(config: TextContentConfig) -> dict[str, str]:

    filename_to_content_map = {}
    for file in all_target_files:
-        with open(file, "r") as f:
+        with open(file, "r", encoding="utf8") as f:
            try:
                plaintext_content = f.read()
                if file.endswith(("html", "htm", "xml")):
                    plaintext_content = extract_html_content(plaintext_content)
-                filename_to_content_map[file] = f.read()
+                filename_to_content_map[file] = plaintext_content
            except Exception as e:
                logger.warning(f"Unable to read file: {file} as plaintext. Skipping file.")
                logger.warning(e, exc_info=True)
@ -88,13 +89,13 @@ def get_plaintext_files(config: TextContentConfig) -> dict[str, str]:

 def get_org_files(config: TextContentConfig):
    # Extract required fields from config
-    org_files, org_file_filter = (
+    org_files, org_file_filters = (
        config.input_files,
        config.input_filter,
    )

    # Input Validation
-    if is_none_or_empty(org_files) and is_none_or_empty(org_file_filter):
+    if is_none_or_empty(org_files) and is_none_or_empty(org_file_filters):
        logger.debug("At least one of org-files or org-file-filter is required to be specified")
        return {}

@ -102,11 +103,12 @@ def get_org_files(config: TextContentConfig):
    absolute_org_files, filtered_org_files = set(), set()
    if org_files:
        absolute_org_files = {get_absolute_path(org_file) for org_file in org_files}
-    if org_file_filter:
+    if org_file_filters:
        filtered_org_files = {
            filtered_file
-            for org_file_filter in org_file_filter
+            for org_file_filter in org_file_filters
            for filtered_file in glob.glob(get_absolute_path(org_file_filter), recursive=True)
+            if os.path.isfile(filtered_file)
        }

    all_org_files = sorted(absolute_org_files | filtered_org_files)
@ -119,7 +121,7 @@ def get_org_files(config: TextContentConfig):

    filename_to_content_map = {}
    for file in all_org_files:
-        with open(file, "r") as f:
+        with open(file, "r", encoding="utf8") as f:
            try:
                filename_to_content_map[file] = f.read()
            except Exception as e:
@ -131,26 +133,27 @@ def get_org_files(config: TextContentConfig):

 def get_markdown_files(config: TextContentConfig):
    # Extract required fields from config
-    markdown_files, markdown_file_filter = (
+    markdown_files, markdown_file_filters = (
        config.input_files,
        config.input_filter,
    )

    # Input Validation
-    if is_none_or_empty(markdown_files) and is_none_or_empty(markdown_file_filter):
+    if is_none_or_empty(markdown_files) and is_none_or_empty(markdown_file_filters):
        logger.debug("At least one of markdown-files or markdown-file-filter is required to be specified")
        return {}

-    "Get Markdown files to process"
+    # Get markdown files to process
    absolute_markdown_files, filtered_markdown_files = set(), set()
    if markdown_files:
        absolute_markdown_files = {get_absolute_path(markdown_file) for markdown_file in markdown_files}

-    if markdown_file_filter:
+    if markdown_file_filters:
        filtered_markdown_files = {
            filtered_file
-            for markdown_file_filter in markdown_file_filter
+            for markdown_file_filter in markdown_file_filters
            for filtered_file in glob.glob(get_absolute_path(markdown_file_filter), recursive=True)
+            if os.path.isfile(filtered_file)
        }

    all_markdown_files = sorted(absolute_markdown_files | filtered_markdown_files)
@ -168,7 +171,7 @@ def get_markdown_files(config: TextContentConfig):

    filename_to_content_map = {}
    for file in all_markdown_files:
-        with open(file, "r") as f:
+        with open(file, "r", encoding="utf8") as f:
            try:
                filename_to_content_map[file] = f.read()
            except Exception as e:
@ -180,13 +183,13 @@ def get_markdown_files(config: TextContentConfig):

 def get_pdf_files(config: TextContentConfig):
    # Extract required fields from config
-    pdf_files, pdf_file_filter = (
+    pdf_files, pdf_file_filters = (
        config.input_files,
        config.input_filter,
    )

    # Input Validation
-    if is_none_or_empty(pdf_files) and is_none_or_empty(pdf_file_filter):
+    if is_none_or_empty(pdf_files) and is_none_or_empty(pdf_file_filters):
        logger.debug("At least one of pdf-files or pdf-file-filter is required to be specified")
        return {}

@ -194,11 +197,12 @@ def get_pdf_files(config: TextContentConfig):
    absolute_pdf_files, filtered_pdf_files = set(), set()
    if pdf_files:
        absolute_pdf_files = {get_absolute_path(pdf_file) for pdf_file in pdf_files}
-    if pdf_file_filter:
+    if pdf_file_filters:
        filtered_pdf_files = {
            filtered_file
-            for pdf_file_filter in pdf_file_filter
+            for pdf_file_filter in pdf_file_filters
            for filtered_file in glob.glob(get_absolute_path(pdf_file_filter), recursive=True)
+            if os.path.isfile(filtered_file)
        }

    all_pdf_files = sorted(absolute_pdf_files | filtered_pdf_files)
@ -214,7 +218,7 @@ def get_pdf_files(config: TextContentConfig):
    for file in all_pdf_files:
        with open(file, "rb") as f:
            try:
-                filename_to_content_map[file] = base64.b64encode(f.read()).decode("utf-8")
+                filename_to_content_map[file] = f.read()
            except Exception as e:
                logger.warning(f"Unable to read file: {file} as PDF. Skipping file.")
                logger.warning(e, exc_info=True)
--- a/src/khoj/utils/helpers.py
+++ b/src/khoj/utils/helpers.py
@ -66,20 +66,25 @@ def merge_dicts(priority_dict: dict, default_dict: dict):
    return merged_dict


-def get_file_type(filepath: str) -> str:
-    "Get file type from file path"
-    file_type = Path(filepath).suffix[1:]
+def get_file_type(file_type: str) -> tuple[str, str]:
+    "Get file type from file mime type"

-    if file_type in ["md", "markdown"]:
-        return "markdown"
-    elif file_type in ["org", "orgmode"]:
-        return "org"
-    elif file_type in ["txt", "text", "html", "xml", "htm", "rst"]:
-        return "plaintext"
-    elif file_type in ["pdf"]:
-        return "pdf"
-
-    return file_type
+    encoding = file_type.split("=")[1].strip().lower() if ";" in file_type else None
+    file_type = file_type.split(";")[0].strip() if ";" in file_type else file_type
+    if file_type in ["text/markdown"]:
+        return "markdown", encoding
+    elif file_type in ["text/org"]:
+        return "org", encoding
+    elif file_type in ["application/pdf"]:
+        return "pdf", encoding
+    elif file_type in ["image/jpeg"]:
+        return "jpeg", encoding
+    elif file_type in ["image/png"]:
+        return "png", encoding
+    elif file_type in ["text/plain", "text/html", "application/xml", "text/x-rst"]:
+        return "plaintext", encoding
+    else:
+        return "other", encoding


 def load_model(
--- a/src/khoj/utils/rawconfig.py
+++ b/src/khoj/utils/rawconfig.py
@ -91,10 +91,17 @@ class OpenAIProcessorConfig(ConfigBase):
    chat_model: Optional[str] = "gpt-3.5-turbo"


+class OfflineChatProcessorConfig(ConfigBase):
+    enable_offline_chat: Optional[bool] = False
+    chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_0.bin"
+
+
 class ConversationProcessorConfig(ConfigBase):
    conversation_logfile: Path
    openai: Optional[OpenAIProcessorConfig]
-    enable_offline_chat: Optional[bool] = False
+    offline_chat: Optional[OfflineChatProcessorConfig]
+    max_prompt_size: Optional[int]
+    tokenizer: Optional[str]


 class ProcessorConfig(ConfigBase):
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -18,6 +18,7 @@ from khoj.utils.helpers import resolve_absolute_path
 from khoj.utils.rawconfig import (
    ContentConfig,
    ConversationProcessorConfig,
+    OfflineChatProcessorConfig,
    OpenAIProcessorConfig,
    ProcessorConfig,
    TextContentConfig,
@ -207,8 +208,9 @@ def processor_config_offline_chat(tmp_path_factory):

    # Setup conversation processor
    processor_config = ProcessorConfig()
+    offline_chat = OfflineChatProcessorConfig(enable_offline_chat=True)
    processor_config.conversation = ConversationProcessorConfig(
-        enable_offline_chat=True,
+        offline_chat=offline_chat,
        conversation_logfile=processor_dir.joinpath("conversation_logs.json"),
    )

--- a/tests/test_client.py
+++ b/tests/test_client.py
@ -6,6 +6,7 @@ from urllib.parse import quote

 # External Packages
 from fastapi.testclient import TestClient
+import pytest

 # Internal Packages
 from app.main import app
@ -60,13 +61,13 @@ def test_regenerate_with_invalid_content_type(client):


 # ----------------------------------------------------------------------------------------------------
-def test_index_batch(client):
+def test_index_update(client):
    # Arrange
-    request_body = get_sample_files_data()
+    files = get_sample_files_data()
    headers = {"x-api-key": "secret"}

    # Act
-    response = client.post("/v1/indexer/batch", json=request_body, headers=headers)
+    response = client.post("/api/v1/index/update", files=files, headers=headers)

    # Assert
    assert response.status_code == 200
@ -76,12 +77,11 @@ def test_index_batch(client):
 def test_regenerate_with_valid_content_type(client):
    for content_type in ["all", "org", "markdown", "image", "pdf", "notion", "plugin1"]:
        # Arrange
-        request_body = get_sample_files_data()
-
+        files = get_sample_files_data()
        headers = {"x-api-key": "secret"}

        # Act
-        response = client.post(f"/v1/indexer/batch?search_type={content_type}", json=request_body, headers=headers)
+        response = client.post(f"/api/v1/index/update?t={content_type}", files=files, headers=headers)
        # Assert
        assert response.status_code == 200, f"Returned status: {response.status_code} for content type: {content_type}"

@ -92,17 +92,17 @@ def test_regenerate_with_github_fails_without_pat(client):
    response = client.get(f"/api/update?force=true&t=github")

    # Arrange
-    request_body = get_sample_files_data()
-
+    files = get_sample_files_data()
    headers = {"x-api-key": "secret"}

    # Act
-    response = client.post(f"/v1/indexer/batch?search_type=github", json=request_body, headers=headers)
+    response = client.post(f"/api/v1/index/update?t=github", files=files, headers=headers)
    # Assert
    assert response.status_code == 200, f"Returned status: {response.status_code} for content type: github"


 # ----------------------------------------------------------------------------------------------------
+@pytest.mark.skip(reason="Flaky test on parallel test runs")
 def test_get_configured_types_via_api(client):
    # Act
    response = client.get(f"/api/config/types")
@ -288,24 +288,20 @@ def test_notes_search_with_exclude_filter(

 def get_sample_files_data():
    return {
-        "org": {
-            "path/to/filename.org": "* practicing piano",
-            "path/to/filename1.org": "** top 3 reasons why I moved to SF",
-            "path/to/filename2.org": "* how to build a search engine",
-        },
-        "pdf": {
-            "path/to/filename.pdf": "Moore's law does not apply to consumer hardware",
-            "path/to/filename1.pdf": "The sun is a ball of helium",
-            "path/to/filename2.pdf": "Effect of sunshine on baseline human happiness",
-        },
-        "plaintext": {
-            "path/to/filename.txt": "data,column,value",
-            "path/to/filename1.txt": "<html>my first web page</html>",
-            "path/to/filename2.txt": "2021-02-02 Journal Entry",
-        },
-        "markdown": {
-            "path/to/filename.md": "# Notes from client call",
-            "path/to/filename1.md": "## Studying anthropological records from the Fatimid caliphate",
-            "path/to/filename2.md": "**Understanding science through the lens of art**",
-        },
+        "files": ("path/to/filename.org", "* practicing piano", "text/org"),
+        "files": ("path/to/filename1.org", "** top 3 reasons why I moved to SF", "text/org"),
+        "files": ("path/to/filename2.org", "* how to build a search engine", "text/org"),
+        "files": ("path/to/filename.pdf", "Moore's law does not apply to consumer hardware", "application/pdf"),
+        "files": ("path/to/filename1.pdf", "The sun is a ball of helium", "application/pdf"),
+        "files": ("path/to/filename2.pdf", "Effect of sunshine on baseline human happiness", "application/pdf"),
+        "files": ("path/to/filename.txt", "data,column,value", "text/plain"),
+        "files": ("path/to/filename1.txt", "<html>my first web page</html>", "text/plain"),
+        "files": ("path/to/filename2.txt", "2021-02-02 Journal Entry", "text/plain"),
+        "files": ("path/to/filename.md", "# Notes from client call", "text/markdown"),
+        "files": (
+            "path/to/filename1.md",
+            "## Studying anthropological records from the Fatimid caliphate",
+            "text/markdown",
+        ),
+        "files": ("path/to/filename2.md", "**Understanding science through the lens of art**", "text/markdown"),
    }
--- a/tests/test_gpt4all_chat_actors.py
+++ b/tests/test_gpt4all_chat_actors.py
@ -24,7 +24,7 @@ from khoj.processor.conversation.gpt4all.utils import download_model

 from khoj.processor.conversation.utils import message_to_log

-MODEL_NAME = "llama-2-7b-chat.ggmlv3.q4_K_S.bin"
+MODEL_NAME = "llama-2-7b-chat.ggmlv3.q4_0.bin"


@pytest.fixture(scope="session")
@ -128,15 +128,15 @@ def test_extract_multiple_explicit_questions_from_message(loaded_model):
@pytest.mark.chatquality
 def test_extract_multiple_implicit_questions_from_message(loaded_model):
    # Act
-    response = extract_questions_offline("Is Morpheus taller than Neo?", loaded_model=loaded_model)
+    response = extract_questions_offline("Is Carl taller than Ross?", loaded_model=loaded_model)

    # Assert
-    expected_responses = ["height", "taller", "shorter", "heights"]
+    expected_responses = ["height", "taller", "shorter", "heights", "who"]
    assert len(response) <= 3

    for question in response:
        assert any([expected_response in question.lower() for expected_response in expected_responses]), (
-            "Expected chat actor to ask follow-up questions about Morpheus and Neo, but got: " + question
+            "Expected chat actor to ask follow-up questions about Carl and Ross, but got: " + question
        )


@ -145,7 +145,7 @@ def test_extract_multiple_implicit_questions_from_message(loaded_model):
 def test_generate_search_query_using_question_from_chat_history(loaded_model):
    # Arrange
    message_list = [
-        ("What is the name of Mr. Vader's daughter?", "Princess Leia", []),
+        ("What is the name of Mr. Anderson's daughter?", "Miss Barbara", []),
    ]

    # Act
@ -156,17 +156,22 @@ def test_generate_search_query_using_question_from_chat_history(loaded_model):
        use_history=True,
    )

-    expected_responses = [
-        "Vader",
-        "sons",
+    all_expected_in_response = [
+        "Anderson",
+    ]
+
+    any_expected_in_response = [
        "son",
-        "Darth",
+        "sons",
        "children",
    ]

    # Assert
    assert len(response) >= 1
-    assert any([expected_response in response[0] for expected_response in expected_responses]), (
+    assert all([expected_response in response[0] for expected_response in all_expected_in_response]), (
+        "Expected chat actor to ask for clarification in response, but got: " + response[0]
+    )
+    assert any([expected_response in response[0] for expected_response in any_expected_in_response]), (
        "Expected chat actor to ask for clarification in response, but got: " + response[0]
    )

@ -176,20 +181,20 @@ def test_generate_search_query_using_question_from_chat_history(loaded_model):
 def test_generate_search_query_using_answer_from_chat_history(loaded_model):
    # Arrange
    message_list = [
-        ("What is the name of Mr. Vader's daughter?", "Princess Leia", []),
+        ("What is the name of Mr. Anderson's daughter?", "Miss Barbara", []),
    ]

    # Act
    response = extract_questions_offline(
-        "Is she a Jedi?",
+        "Is she a Doctor?",
        conversation_log=populate_chat_history(message_list),
        loaded_model=loaded_model,
        use_history=True,
    )

    expected_responses = [
-        "Leia",
-        "Vader",
+        "Barbara",
+        "Robert",
        "daughter",
    ]

--- a/tests/test_pdf_to_jsonl.py
+++ b/tests/test_pdf_to_jsonl.py
@ -1,7 +1,6 @@
 # Standard Packages
 import json
 import os
-import base64

 # Internal Packages
 from khoj.processor.pdf.pdf_to_jsonl import PdfToJsonl
@ -16,7 +15,7 @@ def test_single_page_pdf_to_jsonl():
    # Extract Entries from specified Pdf files
    # Read singlepage.pdf into memory as bytes
    with open("tests/data/pdf/singlepage.pdf", "rb") as f:
-        pdf_bytes = base64.b64encode(f.read()).decode("utf-8")
+        pdf_bytes = f.read()

    data = {"tests/data/pdf/singlepage.pdf": pdf_bytes}
    entries, entry_to_file_map = PdfToJsonl.extract_pdf_entries(pdf_files=data)
@ -36,7 +35,7 @@ def test_multi_page_pdf_to_jsonl():
    # Act
    # Extract Entries from specified Pdf files
    with open("tests/data/pdf/multipage.pdf", "rb") as f:
-        pdf_bytes = base64.b64encode(f.read()).decode("utf-8")
+        pdf_bytes = f.read()

    data = {"tests/data/pdf/multipage.pdf": pdf_bytes}
    entries, entry_to_file_map = PdfToJsonl.extract_pdf_entries(pdf_files=data)
--- a/tests/test_text_search.py
+++ b/tests/test_text_search.py
@ -1,26 +1,25 @@
 # System Packages
 import logging
+import locale
 from pathlib import Path
 import os

 # External Packages
 import pytest
-from khoj.utils.config import SearchModels

 # Internal Packages
 from khoj.utils.state import content_index, search_models
 from khoj.search_type import text_search
-from khoj.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig
 from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
 from khoj.processor.github.github_to_jsonl import GithubToJsonl
+from khoj.utils.config import SearchModels
 from khoj.utils.fs_syncer import get_org_files
+from khoj.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig


 # Test
 # ----------------------------------------------------------------------------------------------------
-def test_text_search_setup_with_missing_file_raises_error(
-    org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig
-):
+def test_text_search_setup_with_missing_file_raises_error(org_config_with_only_new_file: TextContentConfig):
    # Arrange
    # Ensure file mentioned in org.input-files is missing
    single_new_file = Path(org_config_with_only_new_file.input_files[0])
@ -29,7 +28,23 @@ def test_text_search_setup_with_missing_file_raises_error(
    # Act
    # Generate notes embeddings during asymmetric setup
    with pytest.raises(FileNotFoundError):
-        data = get_org_files(org_config_with_only_new_file)
+        get_org_files(org_config_with_only_new_file)
+
+
+# ----------------------------------------------------------------------------------------------------
+def test_get_org_files_with_org_suffixed_dir_doesnt_raise_error(tmp_path: Path):
+    # Arrange
+    orgfile = tmp_path / "directory.org" / "file.org"
+    orgfile.parent.mkdir()
+    with open(orgfile, "w") as f:
+        f.write("* Heading\n- List item\n")
+    org_content_config = TextContentConfig(
+        input_filter=[f"{tmp_path}/**/*"], compressed_jsonl="test.jsonl", embeddings_file="test.pt"
+    )
+
+    # Act
+    # should not raise IsADirectoryError and return orgfile
+    assert get_org_files(org_content_config) == {f"{orgfile}": "* Heading\n- List item\n"}


 # ----------------------------------------------------------------------------------------------------
@ -48,6 +63,7 @@ def test_text_search_setup_with_empty_file_raises_error(
 def test_text_search_setup(content_config: ContentConfig, search_models: SearchModels):
    # Arrange
    data = get_org_files(content_config.org)
+
    # Act
    # Regenerate notes embeddings during asymmetric setup
    notes_model = text_search.setup(
--- a/versions.json
+++ b/versions.json
@ -24,5 +24,6 @@
 	"0.12.0": "0.15.0",
 	"0.12.1": "0.15.0",
 	"0.12.2": "0.15.0",
-	"0.12.3": "0.15.0"
+	"0.12.3": "0.15.0",
+	"0.13.0": "0.15.0"
 }