Pass any files to be deleted to indexer API via Khoj Obsidian plugin

- Keep state of previously synced files to identify files to be deleted
- Last synced files stored in settings for persistence of this data
  across Obsidian reboots
This commit is contained in:
Debanjum Singh Solanky 2023-10-17 02:51:54 -07:00
parent f2e293a149
commit 8e627a5809
3 changed files with 20 additions and 5 deletions

View file

@ -59,7 +59,9 @@ export default class Khoj extends Plugin {
// Add scheduled job to update index every 60 minutes
this.indexingTimer = setInterval(async () => {
if (this.settings.autoConfigure) {
this.lastSyncedFiles = await updateContentIndex(this.app.vault, this.settings);
this.settings.lastSyncedFiles = await updateContentIndex(
this.app.vault, this.settings, this.settings.lastSyncedFiles
);
}
}, 60 * 60 * 1000);
}

View file

@ -1,4 +1,4 @@
import { App, Notice, PluginSettingTab, request, Setting } from 'obsidian';
import { App, Notice, PluginSettingTab, request, Setting, TFile } from 'obsidian';
import Khoj from 'src/main';
export interface KhojSetting {
@ -8,6 +8,7 @@ export interface KhojSetting {
khojUrl: string;
connectedToBackend: boolean;
autoConfigure: boolean;
lastSyncedFiles: TFile[];
}
export const DEFAULT_SETTINGS: KhojSetting = {
@ -17,6 +18,7 @@ export const DEFAULT_SETTINGS: KhojSetting = {
connectedToBackend: false,
autoConfigure: true,
openaiApiKey: '',
lastSyncedFiles: []
}
export class KhojSettingTab extends PluginSettingTab {

View file

@ -41,21 +41,32 @@ function fileExtensionToMimeType (extension: string): string {
}
}
export async function updateContentIndex(vault: Vault, setting: KhojSetting): Promise<TFile[]> {
export async function updateContentIndex(vault: Vault, setting: KhojSetting, lastSyncedFiles: TFile[]): Promise<TFile[]> {
// Get all markdown, pdf files in the vault
console.log(`Khoj: Updating Khoj content index...`)
const files = vault.getFiles().filter(file => file.extension === 'md' || file.extension === 'pdf');
const binaryFileTypes = ['pdf', 'png', 'jpg', 'jpeg']
let countOfFilesToIndex = 0;
let countOfFilesToDelete = 0;
// Create multipart form data with all markdown, pdf files
// Add all files to index as multipart form data
const formData = new FormData();
for (const file of files) {
countOfFilesToIndex++;
const encoding = binaryFileTypes.includes(file.extension) ? "binary" : "utf8";
const mimeType = fileExtensionToMimeType(file.extension) + (encoding === "utf8" ? "; charset=UTF-8" : "");
const fileContent = await vault.read(file);
formData.append('files', new Blob([fileContent], { type: mimeType }), file.path);
}
// Add any previously synced files to be deleted to multipart form data
for (const lastSyncedFile of lastSyncedFiles) {
if (!files.includes(lastSyncedFile)) {
countOfFilesToDelete++;
formData.append('files', new Blob([]), lastSyncedFile.path);
}
}
// Call Khoj backend to update index with all markdown, pdf files
const response = await fetch(`${setting.khojUrl}/api/v1/indexer/batch`, {
method: 'POST',
@ -68,7 +79,7 @@ export async function updateContentIndex(vault: Vault, setting: KhojSetting): Pr
if (!response.ok) {
new Notice(`Failed to update Khoj content index. Ensure Khoj server connected or raise issue on Khoj Discord/Github\nError: ${response.statusText}`);
} else {
console.log(`✅ Refreshed Khoj content index.`);
console.log(`✅ Refreshed Khoj content index. Updated: ${countOfFilesToIndex} files, Deleted: ${countOfFilesToDelete} files.`);
}
return files;