Render PDF search results in Khoj Obsidian interface

- Make plugin update khoj server config to index PDF files in vault too
- Make Obsidian plugin update index for PDF files in vault too
- Show PDF results in Khoj Search modal as well
  - Ensure combined results are sorted by score across both types
- Jump to PDF file when select it PDF search result from modal
This commit is contained in:
Debanjum Singh Solanky 2023-06-01 20:31:28 +05:30
parent e3892945d4
commit bbe3bf9733
4 changed files with 65 additions and 18 deletions

View file

@ -42,7 +42,7 @@ https://github.com/debanjum/khoj/assets/6413477/3e33d8ea-25bb-46c8-a3bf-c92f78d0
1. Install Khoj via `pip` and start Khoj backend in non-gui mode
2. Install Khoj plugin via Community Plugins settings pane on Obsidian app
3. Check the new Khoj plugin settings
4. Wait for Khoj backend to index markdown files in the current Vault
4. Wait for Khoj backend to index markdown, PDF files in the current Vault
5. Open Khoj plugin on Obsidian via Search button on Left Pane
6. Search \"*Announce plugin to folks*\" in the [Obsidian Plugin docs](https://marcus.se.net/obsidian-plugin-docs/)
7. Jump to the [search result](https://marcus.se.net/obsidian-plugin-docs/publishing/submit-your-plugin)
@ -151,7 +151,7 @@ The plugin implements the following functionality to search your notes with Khoj
- [X] Open the Khoj search modal via left ribbon icon or the *Khoj: Search* command
- [X] Render results as Markdown preview to improve readability
- [X] Configure Khoj via the plugin setting tab on the settings page
- Set Obsidian Vault to Index with Khoj. Defaults to all markdown files in current Vault
- Set Obsidian Vault to Index with Khoj. Defaults to all markdown, PDF files in current Vault
- Set URL of Khoj backend
- Set Number of Search Results to show in Search Modal
- [X] Allow reranking of result to improve search quality

View file

@ -89,12 +89,24 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
async getSuggestions(query: string): Promise<SearchResult[]> {
// Query Khoj backend for search results
let encodedQuery = encodeURIComponent(query);
let searchUrl = `${this.setting.khojUrl}/api/search?q=${encodedQuery}&n=${this.setting.resultsCount}&r=${this.rerank}&t=markdown`;
let response = await request(searchUrl);
let data = JSON.parse(response);
let results = data
let searchUrl = `${this.setting.khojUrl}/api/search?q=${encodedQuery}&n=${this.setting.resultsCount}&r=${this.rerank}`;
// Get search results for markdown and pdf files
let mdResponse = await request(`${searchUrl}&t=markdown`);
let pdfResponse = await request(`${searchUrl}&t=pdf`);
// Parse search results
let mdData = JSON.parse(mdResponse)
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
.map((result: any) => { return { entry: result.entry, file: result.additional.file } as SearchResult; });
.map((result: any) => { return { entry: result.entry, score: result.score, file: result.additional.file }; });
let pdfData = JSON.parse(pdfResponse)
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
.map((result: any) => { return { entry: `## ${result.additional.compiled}`, score: result.score, file: result.additional.file } as SearchResult; })
// Combine markdown and PDF results and sort them by score
let results = mdData.concat(pdfData)
.sort((a: any, b: any) => b.score - a.score)
.map((result: any) => { return { entry: result.entry, file: result.file } as SearchResult; })
this.query = query;
return results;
@ -124,11 +136,12 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
}
async onChooseSuggestion(result: SearchResult, _: MouseEvent | KeyboardEvent) {
// Get all markdown files in vault
// Get all markdown and PDF files in vault
const mdFiles = this.app.vault.getMarkdownFiles();
const pdfFiles = this.app.vault.getFiles().filter(file => file.extension === 'pdf');
// Find the vault file matching file of chosen search result
let file_match = mdFiles
let file_match = mdFiles.concat(pdfFiles)
// Sort by descending length of path
// This finds longest path match when multiple files have same name
.sort((a, b) => b.path.length - a.path.length)
@ -138,7 +151,7 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
// Open vault file at heading of chosen search result
if (file_match) {
let resultHeading = result.entry.split('\n', 1)[0];
let resultHeading = file_match.extension !== 'pdf' ? result.entry.split('\n', 1)[0] : '';
let linkToEntry = `${file_match.path}${resultHeading}`
this.app.workspace.openLinkText(linkToEntry, '');
console.log(`Link: ${linkToEntry}, File: ${file_match.path}, Heading: ${resultHeading}`);

View file

@ -108,6 +108,7 @@ export class KhojSettingTab extends PluginSettingTab {
this.plugin.registerInterval(progress_indicator);
await request(`${this.plugin.settings.khojUrl}/api/update?t=markdown&force=true`);
await request(`${this.plugin.settings.khojUrl}/api/update?t=pdf&force=true`);
new Notice('✅ Updated Khoj index.');
// Reset button once index is updated

View file

@ -12,6 +12,7 @@ export function getVaultAbsolutePath(vault: Vault): string {
export async function configureKhojBackend(vault: Vault, setting: KhojSetting, notify: boolean = true) {
let vaultPath = getVaultAbsolutePath(vault);
let mdInVault = `${vaultPath}/**/*.md`;
let pdfInVault = `${vaultPath}/**/*.pdf`;
let khojConfigUrl = `${setting.khojUrl}/api/config/data`;
// Check if khoj backend is configured, note if cannot connect to backend
@ -32,7 +33,8 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
let indexName = vaultPath.replace(/\//g, '_').replace(/\\/g, '_').replace(/ /g, '_').replace(/:/g, '_');
// Get default config fields from khoj backend
let defaultConfig = await request(`${khojConfigUrl}/default`).then(response => JSON.parse(response));
let khojDefaultIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["markdown"]["embeddings-file"]);
let khojDefaultMdIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["markdown"]["embeddings-file"]);
let khojDefaultPdfIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["pdf"]["embeddings-file"]);
let khojDefaultChatDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["processor"]["conversation"]["conversation-logfile"]);
let khojDefaultChatModelName = defaultConfig["processor"]["conversation"]["model"];
@ -47,8 +49,14 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
"markdown": {
"input-filter": [mdInVault],
"input-files": null,
"embeddings-file": `${khojDefaultIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojDefaultIndexDirectory}/${indexName}.jsonl.gz`,
"embeddings-file": `${khojDefaultMdIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojDefaultMdIndexDirectory}/${indexName}.jsonl.gz`,
},
"pdf": {
"input-filter": [pdfInVault],
"input-files": null,
"embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
}
}
}
@ -59,8 +67,8 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
data["content-type"]["markdown"] = {
"input-filter": [mdInVault],
"input-files": null,
"embeddings-file": `${khojDefaultIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojDefaultIndexDirectory}/${indexName}.jsonl.gz`,
"embeddings-file": `${khojDefaultMdIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojDefaultMdIndexDirectory}/${indexName}.jsonl.gz`,
}
}
// Else if khoj is not configured to index markdown files in configured obsidian vault
@ -68,12 +76,37 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) {
// Update markdown config in khoj content-type config
// Set markdown config to only index markdown files in configured obsidian vault
let khojIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
data["content-type"]["markdown"] = {
"input-filter": [mdInVault],
"input-files": null,
"embeddings-file": `${khojIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojIndexDirectory}/${indexName}.jsonl.gz`,
"embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`,
}
}
if (khoj_already_configured && !data["content-type"]["pdf"]) {
// Add pdf config to khoj content-type config
// Set pdf config to index pdf files in configured obsidian vault
data["content-type"]["pdf"] = {
"input-filter": [pdfInVault],
"input-files": null,
"embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
}
}
// Else if khoj is not configured to index pdf files in configured obsidian vault
else if (khoj_already_configured &&
(data["content-type"]["pdf"]["input-filter"].length != 1 ||
data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) {
// Update pdf config in khoj content-type config
// Set pdf config to only index pdf files in configured obsidian vault
let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);
data["content-type"]["pdf"] = {
"input-filter": [pdfInVault],
"input-files": null,
"embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`,
}
}