Push Vault files to index to Khoj server using Khoj Obsidian plugin

Use the multi-part/form-data request to sync Markdown, PDF files in
vault to index on khoj server

Run scheduled job to push updates to value for indexing every 1 hour
This commit is contained in:
Debanjum Singh Solanky 2023-10-17 02:17:44 -07:00
parent 6baaaaf91a
commit f2e293a149
2 changed files with 71 additions and 3 deletions

View file

@ -1,12 +1,13 @@
import { Notice, Plugin } from 'obsidian';
import { Notice, Plugin, TFile } from 'obsidian';
import { KhojSetting, KhojSettingTab, DEFAULT_SETTINGS } from 'src/settings'
import { KhojSearchModal } from 'src/search_modal'
import { KhojChatModal } from 'src/chat_modal'
import { configureKhojBackend } from './utils';
import { configureKhojBackend, updateContentIndex } from './utils';
export default class Khoj extends Plugin {
settings: KhojSetting;
indexingTimer: NodeJS.Timeout;
async onload() {
await this.loadSettings();
@ -54,6 +55,13 @@ export default class Khoj extends Plugin {
// Add a settings tab so the user can configure khoj
this.addSettingTab(new KhojSettingTab(this.app, this));
// Add scheduled job to update index every 60 minutes
this.indexingTimer = setInterval(async () => {
if (this.settings.autoConfigure) {
this.lastSyncedFiles = await updateContentIndex(this.app.vault, this.settings);
}
}, 60 * 60 * 1000);
}
async loadSettings() {
@ -72,4 +80,12 @@ export default class Khoj extends Plugin {
}
this.saveData(this.settings);
}
async onunload() {
// Remove scheduled job to update index at regular cadence
if (this.indexingTimer)
clearInterval(this.indexingTimer);
this.unload();
}
}

View file

@ -1,4 +1,4 @@
import { FileSystemAdapter, Notice, RequestUrlParam, request, Vault, Modal } from 'obsidian';
import { FileSystemAdapter, Notice, RequestUrlParam, request, Vault, Modal, TFile } from 'obsidian';
import { KhojSetting } from 'src/settings'
export function getVaultAbsolutePath(vault: Vault): string {
@ -22,6 +22,58 @@ interface ProcessorData {
};
}
function fileExtensionToMimeType (extension: string): string {
switch (extension) {
case 'pdf':
return 'application/pdf';
case 'png':
return 'image/png';
case 'jpg':
case 'jpeg':
return 'image/jpeg';
case 'md':
case 'markdown':
return 'text/markdown';
case 'org':
return 'text/org';
default:
return 'text/plain';
}
}
export async function updateContentIndex(vault: Vault, setting: KhojSetting): Promise<TFile[]> {
// Get all markdown, pdf files in the vault
console.log(`Khoj: Updating Khoj content index...`)
const files = vault.getFiles().filter(file => file.extension === 'md' || file.extension === 'pdf');
const binaryFileTypes = ['pdf', 'png', 'jpg', 'jpeg']
// Create multipart form data with all markdown, pdf files
const formData = new FormData();
for (const file of files) {
const encoding = binaryFileTypes.includes(file.extension) ? "binary" : "utf8";
const mimeType = fileExtensionToMimeType(file.extension) + (encoding === "utf8" ? "; charset=UTF-8" : "");
const fileContent = await vault.read(file);
formData.append('files', new Blob([fileContent], { type: mimeType }), file.path);
}
// Call Khoj backend to update index with all markdown, pdf files
const response = await fetch(`${setting.khojUrl}/api/v1/indexer/batch`, {
method: 'POST',
headers: {
'x-api-key': 'secret',
},
body: formData,
});
if (!response.ok) {
new Notice(`Failed to update Khoj content index. Ensure Khoj server connected or raise issue on Khoj Discord/Github\nError: ${response.statusText}`);
} else {
console.log(`✅ Refreshed Khoj content index.`);
}
return files;
}
export async function configureKhojBackend(vault: Vault, setting: KhojSetting, notify: boolean = true) {
let vaultPath = getVaultAbsolutePath(vault);
let mdInVault = `${vaultPath}/**/*.md`;