Render PDF search results in Khoj Obsidian interface

- Make plugin update khoj server config to index PDF files in vault too
- Make Obsidian plugin update index for PDF files in vault too
- Show PDF results in Khoj Search modal as well
  - Ensure combined results are sorted by score across both types
- Jump to PDF file when select it PDF search result from modal
This commit is contained in:
Debanjum Singh Solanky 2023-06-01 20:31:28 +05:30
parent e3892945d4
commit bbe3bf9733
4 changed files with 65 additions and 18 deletions

View file

@ -42,7 +42,7 @@ https://github.com/debanjum/khoj/assets/6413477/3e33d8ea-25bb-46c8-a3bf-c92f78d0
1. Install Khoj via `pip` and start Khoj backend in non-gui mode 1. Install Khoj via `pip` and start Khoj backend in non-gui mode
2. Install Khoj plugin via Community Plugins settings pane on Obsidian app 2. Install Khoj plugin via Community Plugins settings pane on Obsidian app
3. Check the new Khoj plugin settings 3. Check the new Khoj plugin settings
4. Wait for Khoj backend to index markdown files in the current Vault 4. Wait for Khoj backend to index markdown, PDF files in the current Vault
5. Open Khoj plugin on Obsidian via Search button on Left Pane 5. Open Khoj plugin on Obsidian via Search button on Left Pane
6. Search \"*Announce plugin to folks*\" in the [Obsidian Plugin docs](https://marcus.se.net/obsidian-plugin-docs/) 6. Search \"*Announce plugin to folks*\" in the [Obsidian Plugin docs](https://marcus.se.net/obsidian-plugin-docs/)
7. Jump to the [search result](https://marcus.se.net/obsidian-plugin-docs/publishing/submit-your-plugin) 7. Jump to the [search result](https://marcus.se.net/obsidian-plugin-docs/publishing/submit-your-plugin)
@ -151,7 +151,7 @@ The plugin implements the following functionality to search your notes with Khoj
- [X] Open the Khoj search modal via left ribbon icon or the *Khoj: Search* command - [X] Open the Khoj search modal via left ribbon icon or the *Khoj: Search* command
- [X] Render results as Markdown preview to improve readability - [X] Render results as Markdown preview to improve readability
- [X] Configure Khoj via the plugin setting tab on the settings page - [X] Configure Khoj via the plugin setting tab on the settings page
- Set Obsidian Vault to Index with Khoj. Defaults to all markdown files in current Vault - Set Obsidian Vault to Index with Khoj. Defaults to all markdown, PDF files in current Vault
- Set URL of Khoj backend - Set URL of Khoj backend
- Set Number of Search Results to show in Search Modal - Set Number of Search Results to show in Search Modal
- [X] Allow reranking of result to improve search quality - [X] Allow reranking of result to improve search quality

View file

@ -89,12 +89,24 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
async getSuggestions(query: string): Promise<SearchResult[]> { async getSuggestions(query: string): Promise<SearchResult[]> {
// Query Khoj backend for search results // Query Khoj backend for search results
let encodedQuery = encodeURIComponent(query); let encodedQuery = encodeURIComponent(query);
let searchUrl = `${this.setting.khojUrl}/api/search?q=${encodedQuery}&n=${this.setting.resultsCount}&r=${this.rerank}&t=markdown`; let searchUrl = `${this.setting.khojUrl}/api/search?q=${encodedQuery}&n=${this.setting.resultsCount}&r=${this.rerank}`;
let response = await request(searchUrl);
let data = JSON.parse(response); // Get search results for markdown and pdf files
let results = data let mdResponse = await request(`${searchUrl}&t=markdown`);
let pdfResponse = await request(`${searchUrl}&t=pdf`);
// Parse search results
let mdData = JSON.parse(mdResponse)
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path)) .filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
.map((result: any) => { return { entry: result.entry, file: result.additional.file } as SearchResult; }); .map((result: any) => { return { entry: result.entry, score: result.score, file: result.additional.file }; });
let pdfData = JSON.parse(pdfResponse)
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
.map((result: any) => { return { entry: `## ${result.additional.compiled}`, score: result.score, file: result.additional.file } as SearchResult; })
// Combine markdown and PDF results and sort them by score
let results = mdData.concat(pdfData)
.sort((a: any, b: any) => b.score - a.score)
.map((result: any) => { return { entry: result.entry, file: result.file } as SearchResult; })
this.query = query; this.query = query;
return results; return results;
@ -124,11 +136,12 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
} }
async onChooseSuggestion(result: SearchResult, _: MouseEvent | KeyboardEvent) { async onChooseSuggestion(result: SearchResult, _: MouseEvent | KeyboardEvent) {
// Get all markdown files in vault // Get all markdown and PDF files in vault
const mdFiles = this.app.vault.getMarkdownFiles(); const mdFiles = this.app.vault.getMarkdownFiles();
const pdfFiles = this.app.vault.getFiles().filter(file => file.extension === 'pdf');
// Find the vault file matching file of chosen search result // Find the vault file matching file of chosen search result
let file_match = mdFiles let file_match = mdFiles.concat(pdfFiles)
// Sort by descending length of path // Sort by descending length of path
// This finds longest path match when multiple files have same name // This finds longest path match when multiple files have same name
.sort((a, b) => b.path.length - a.path.length) .sort((a, b) => b.path.length - a.path.length)
@ -138,7 +151,7 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
// Open vault file at heading of chosen search result // Open vault file at heading of chosen search result
if (file_match) { if (file_match) {
let resultHeading = result.entry.split('\n', 1)[0]; let resultHeading = file_match.extension !== 'pdf' ? result.entry.split('\n', 1)[0] : '';
let linkToEntry = `${file_match.path}${resultHeading}` let linkToEntry = `${file_match.path}${resultHeading}`
this.app.workspace.openLinkText(linkToEntry, ''); this.app.workspace.openLinkText(linkToEntry, '');
console.log(`Link: ${linkToEntry}, File: ${file_match.path}, Heading: ${resultHeading}`); console.log(`Link: ${linkToEntry}, File: ${file_match.path}, Heading: ${resultHeading}`);

View file

@ -108,6 +108,7 @@ export class KhojSettingTab extends PluginSettingTab {
this.plugin.registerInterval(progress_indicator); this.plugin.registerInterval(progress_indicator);
await request(`${this.plugin.settings.khojUrl}/api/update?t=markdown&force=true`); await request(`${this.plugin.settings.khojUrl}/api/update?t=markdown&force=true`);
await request(`${this.plugin.settings.khojUrl}/api/update?t=pdf&force=true`);
new Notice('✅ Updated Khoj index.'); new Notice('✅ Updated Khoj index.');
// Reset button once index is updated // Reset button once index is updated

View file

@ -12,6 +12,7 @@ export function getVaultAbsolutePath(vault: Vault): string {
export async function configureKhojBackend(vault: Vault, setting: KhojSetting, notify: boolean = true) { export async function configureKhojBackend(vault: Vault, setting: KhojSetting, notify: boolean = true) {
let vaultPath = getVaultAbsolutePath(vault); let vaultPath = getVaultAbsolutePath(vault);
let mdInVault = `${vaultPath}/**/*.md`; let mdInVault = `${vaultPath}/**/*.md`;
let pdfInVault = `${vaultPath}/**/*.pdf`;
let khojConfigUrl = `${setting.khojUrl}/api/config/data`; let khojConfigUrl = `${setting.khojUrl}/api/config/data`;
// Check if khoj backend is configured, note if cannot connect to backend // Check if khoj backend is configured, note if cannot connect to backend
@ -32,7 +33,8 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
let indexName = vaultPath.replace(/\//g, '_').replace(/\\/g, '_').replace(/ /g, '_').replace(/:/g, '_'); let indexName = vaultPath.replace(/\//g, '_').replace(/\\/g, '_').replace(/ /g, '_').replace(/:/g, '_');
// Get default config fields from khoj backend // Get default config fields from khoj backend
let defaultConfig = await request(`${khojConfigUrl}/default`).then(response => JSON.parse(response)); let defaultConfig = await request(`${khojConfigUrl}/default`).then(response => JSON.parse(response));
let khojDefaultIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["markdown"]["embeddings-file"]); let khojDefaultMdIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["markdown"]["embeddings-file"]);
let khojDefaultPdfIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["pdf"]["embeddings-file"]);
let khojDefaultChatDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["processor"]["conversation"]["conversation-logfile"]); let khojDefaultChatDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["processor"]["conversation"]["conversation-logfile"]);
let khojDefaultChatModelName = defaultConfig["processor"]["conversation"]["model"]; let khojDefaultChatModelName = defaultConfig["processor"]["conversation"]["model"];
@ -47,8 +49,14 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
"markdown": { "markdown": {
"input-filter": [mdInVault], "input-filter": [mdInVault],
"input-files": null, "input-files": null,
"embeddings-file": `${khojDefaultIndexDirectory}/${indexName}.pt`, "embeddings-file": `${khojDefaultMdIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojDefaultIndexDirectory}/${indexName}.jsonl.gz`, "compressed-jsonl": `${khojDefaultMdIndexDirectory}/${indexName}.jsonl.gz`,
},
"pdf": {
"input-filter": [pdfInVault],
"input-files": null,
"embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
} }
} }
} }
@ -59,8 +67,8 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
data["content-type"]["markdown"] = { data["content-type"]["markdown"] = {
"input-filter": [mdInVault], "input-filter": [mdInVault],
"input-files": null, "input-files": null,
"embeddings-file": `${khojDefaultIndexDirectory}/${indexName}.pt`, "embeddings-file": `${khojDefaultMdIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojDefaultIndexDirectory}/${indexName}.jsonl.gz`, "compressed-jsonl": `${khojDefaultMdIndexDirectory}/${indexName}.jsonl.gz`,
} }
} }
// Else if khoj is not configured to index markdown files in configured obsidian vault // Else if khoj is not configured to index markdown files in configured obsidian vault
@ -68,12 +76,37 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) { data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) {
// Update markdown config in khoj content-type config // Update markdown config in khoj content-type config
// Set markdown config to only index markdown files in configured obsidian vault // Set markdown config to only index markdown files in configured obsidian vault
let khojIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]); let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
data["content-type"]["markdown"] = { data["content-type"]["markdown"] = {
"input-filter": [mdInVault], "input-filter": [mdInVault],
"input-files": null, "input-files": null,
"embeddings-file": `${khojIndexDirectory}/${indexName}.pt`, "embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojIndexDirectory}/${indexName}.jsonl.gz`, "compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`,
}
}
if (khoj_already_configured && !data["content-type"]["pdf"]) {
// Add pdf config to khoj content-type config
// Set pdf config to index pdf files in configured obsidian vault
data["content-type"]["pdf"] = {
"input-filter": [pdfInVault],
"input-files": null,
"embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
}
}
// Else if khoj is not configured to index pdf files in configured obsidian vault
else if (khoj_already_configured &&
(data["content-type"]["pdf"]["input-filter"].length != 1 ||
data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) {
// Update pdf config in khoj content-type config
// Set pdf config to only index pdf files in configured obsidian vault
let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);
data["content-type"]["pdf"] = {
"input-filter": [pdfInVault],
"input-files": null,
"embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`,
} }
} }