mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Render PDF search results in Khoj Obsidian interface
- Make plugin update khoj server config to index PDF files in vault too - Make Obsidian plugin update index for PDF files in vault too - Show PDF results in Khoj Search modal as well - Ensure combined results are sorted by score across both types - Jump to PDF file when select it PDF search result from modal
This commit is contained in:
parent
e3892945d4
commit
bbe3bf9733
4 changed files with 65 additions and 18 deletions
|
@ -42,7 +42,7 @@ https://github.com/debanjum/khoj/assets/6413477/3e33d8ea-25bb-46c8-a3bf-c92f78d0
|
||||||
1. Install Khoj via `pip` and start Khoj backend in non-gui mode
|
1. Install Khoj via `pip` and start Khoj backend in non-gui mode
|
||||||
2. Install Khoj plugin via Community Plugins settings pane on Obsidian app
|
2. Install Khoj plugin via Community Plugins settings pane on Obsidian app
|
||||||
3. Check the new Khoj plugin settings
|
3. Check the new Khoj plugin settings
|
||||||
4. Wait for Khoj backend to index markdown files in the current Vault
|
4. Wait for Khoj backend to index markdown, PDF files in the current Vault
|
||||||
5. Open Khoj plugin on Obsidian via Search button on Left Pane
|
5. Open Khoj plugin on Obsidian via Search button on Left Pane
|
||||||
6. Search \"*Announce plugin to folks*\" in the [Obsidian Plugin docs](https://marcus.se.net/obsidian-plugin-docs/)
|
6. Search \"*Announce plugin to folks*\" in the [Obsidian Plugin docs](https://marcus.se.net/obsidian-plugin-docs/)
|
||||||
7. Jump to the [search result](https://marcus.se.net/obsidian-plugin-docs/publishing/submit-your-plugin)
|
7. Jump to the [search result](https://marcus.se.net/obsidian-plugin-docs/publishing/submit-your-plugin)
|
||||||
|
@ -151,7 +151,7 @@ The plugin implements the following functionality to search your notes with Khoj
|
||||||
- [X] Open the Khoj search modal via left ribbon icon or the *Khoj: Search* command
|
- [X] Open the Khoj search modal via left ribbon icon or the *Khoj: Search* command
|
||||||
- [X] Render results as Markdown preview to improve readability
|
- [X] Render results as Markdown preview to improve readability
|
||||||
- [X] Configure Khoj via the plugin setting tab on the settings page
|
- [X] Configure Khoj via the plugin setting tab on the settings page
|
||||||
- Set Obsidian Vault to Index with Khoj. Defaults to all markdown files in current Vault
|
- Set Obsidian Vault to Index with Khoj. Defaults to all markdown, PDF files in current Vault
|
||||||
- Set URL of Khoj backend
|
- Set URL of Khoj backend
|
||||||
- Set Number of Search Results to show in Search Modal
|
- Set Number of Search Results to show in Search Modal
|
||||||
- [X] Allow reranking of result to improve search quality
|
- [X] Allow reranking of result to improve search quality
|
||||||
|
|
|
@ -89,12 +89,24 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
|
||||||
async getSuggestions(query: string): Promise<SearchResult[]> {
|
async getSuggestions(query: string): Promise<SearchResult[]> {
|
||||||
// Query Khoj backend for search results
|
// Query Khoj backend for search results
|
||||||
let encodedQuery = encodeURIComponent(query);
|
let encodedQuery = encodeURIComponent(query);
|
||||||
let searchUrl = `${this.setting.khojUrl}/api/search?q=${encodedQuery}&n=${this.setting.resultsCount}&r=${this.rerank}&t=markdown`;
|
let searchUrl = `${this.setting.khojUrl}/api/search?q=${encodedQuery}&n=${this.setting.resultsCount}&r=${this.rerank}`;
|
||||||
let response = await request(searchUrl);
|
|
||||||
let data = JSON.parse(response);
|
// Get search results for markdown and pdf files
|
||||||
let results = data
|
let mdResponse = await request(`${searchUrl}&t=markdown`);
|
||||||
|
let pdfResponse = await request(`${searchUrl}&t=pdf`);
|
||||||
|
|
||||||
|
// Parse search results
|
||||||
|
let mdData = JSON.parse(mdResponse)
|
||||||
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
|
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
|
||||||
.map((result: any) => { return { entry: result.entry, file: result.additional.file } as SearchResult; });
|
.map((result: any) => { return { entry: result.entry, score: result.score, file: result.additional.file }; });
|
||||||
|
let pdfData = JSON.parse(pdfResponse)
|
||||||
|
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
|
||||||
|
.map((result: any) => { return { entry: `## ${result.additional.compiled}`, score: result.score, file: result.additional.file } as SearchResult; })
|
||||||
|
|
||||||
|
// Combine markdown and PDF results and sort them by score
|
||||||
|
let results = mdData.concat(pdfData)
|
||||||
|
.sort((a: any, b: any) => b.score - a.score)
|
||||||
|
.map((result: any) => { return { entry: result.entry, file: result.file } as SearchResult; })
|
||||||
|
|
||||||
this.query = query;
|
this.query = query;
|
||||||
return results;
|
return results;
|
||||||
|
@ -124,11 +136,12 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
|
||||||
}
|
}
|
||||||
|
|
||||||
async onChooseSuggestion(result: SearchResult, _: MouseEvent | KeyboardEvent) {
|
async onChooseSuggestion(result: SearchResult, _: MouseEvent | KeyboardEvent) {
|
||||||
// Get all markdown files in vault
|
// Get all markdown and PDF files in vault
|
||||||
const mdFiles = this.app.vault.getMarkdownFiles();
|
const mdFiles = this.app.vault.getMarkdownFiles();
|
||||||
|
const pdfFiles = this.app.vault.getFiles().filter(file => file.extension === 'pdf');
|
||||||
|
|
||||||
// Find the vault file matching file of chosen search result
|
// Find the vault file matching file of chosen search result
|
||||||
let file_match = mdFiles
|
let file_match = mdFiles.concat(pdfFiles)
|
||||||
// Sort by descending length of path
|
// Sort by descending length of path
|
||||||
// This finds longest path match when multiple files have same name
|
// This finds longest path match when multiple files have same name
|
||||||
.sort((a, b) => b.path.length - a.path.length)
|
.sort((a, b) => b.path.length - a.path.length)
|
||||||
|
@ -138,7 +151,7 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
|
||||||
|
|
||||||
// Open vault file at heading of chosen search result
|
// Open vault file at heading of chosen search result
|
||||||
if (file_match) {
|
if (file_match) {
|
||||||
let resultHeading = result.entry.split('\n', 1)[0];
|
let resultHeading = file_match.extension !== 'pdf' ? result.entry.split('\n', 1)[0] : '';
|
||||||
let linkToEntry = `${file_match.path}${resultHeading}`
|
let linkToEntry = `${file_match.path}${resultHeading}`
|
||||||
this.app.workspace.openLinkText(linkToEntry, '');
|
this.app.workspace.openLinkText(linkToEntry, '');
|
||||||
console.log(`Link: ${linkToEntry}, File: ${file_match.path}, Heading: ${resultHeading}`);
|
console.log(`Link: ${linkToEntry}, File: ${file_match.path}, Heading: ${resultHeading}`);
|
||||||
|
|
|
@ -108,6 +108,7 @@ export class KhojSettingTab extends PluginSettingTab {
|
||||||
this.plugin.registerInterval(progress_indicator);
|
this.plugin.registerInterval(progress_indicator);
|
||||||
|
|
||||||
await request(`${this.plugin.settings.khojUrl}/api/update?t=markdown&force=true`);
|
await request(`${this.plugin.settings.khojUrl}/api/update?t=markdown&force=true`);
|
||||||
|
await request(`${this.plugin.settings.khojUrl}/api/update?t=pdf&force=true`);
|
||||||
new Notice('✅ Updated Khoj index.');
|
new Notice('✅ Updated Khoj index.');
|
||||||
|
|
||||||
// Reset button once index is updated
|
// Reset button once index is updated
|
||||||
|
|
|
@ -12,6 +12,7 @@ export function getVaultAbsolutePath(vault: Vault): string {
|
||||||
export async function configureKhojBackend(vault: Vault, setting: KhojSetting, notify: boolean = true) {
|
export async function configureKhojBackend(vault: Vault, setting: KhojSetting, notify: boolean = true) {
|
||||||
let vaultPath = getVaultAbsolutePath(vault);
|
let vaultPath = getVaultAbsolutePath(vault);
|
||||||
let mdInVault = `${vaultPath}/**/*.md`;
|
let mdInVault = `${vaultPath}/**/*.md`;
|
||||||
|
let pdfInVault = `${vaultPath}/**/*.pdf`;
|
||||||
let khojConfigUrl = `${setting.khojUrl}/api/config/data`;
|
let khojConfigUrl = `${setting.khojUrl}/api/config/data`;
|
||||||
|
|
||||||
// Check if khoj backend is configured, note if cannot connect to backend
|
// Check if khoj backend is configured, note if cannot connect to backend
|
||||||
|
@ -32,7 +33,8 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
|
||||||
let indexName = vaultPath.replace(/\//g, '_').replace(/\\/g, '_').replace(/ /g, '_').replace(/:/g, '_');
|
let indexName = vaultPath.replace(/\//g, '_').replace(/\\/g, '_').replace(/ /g, '_').replace(/:/g, '_');
|
||||||
// Get default config fields from khoj backend
|
// Get default config fields from khoj backend
|
||||||
let defaultConfig = await request(`${khojConfigUrl}/default`).then(response => JSON.parse(response));
|
let defaultConfig = await request(`${khojConfigUrl}/default`).then(response => JSON.parse(response));
|
||||||
let khojDefaultIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["markdown"]["embeddings-file"]);
|
let khojDefaultMdIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["markdown"]["embeddings-file"]);
|
||||||
|
let khojDefaultPdfIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["pdf"]["embeddings-file"]);
|
||||||
let khojDefaultChatDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["processor"]["conversation"]["conversation-logfile"]);
|
let khojDefaultChatDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["processor"]["conversation"]["conversation-logfile"]);
|
||||||
let khojDefaultChatModelName = defaultConfig["processor"]["conversation"]["model"];
|
let khojDefaultChatModelName = defaultConfig["processor"]["conversation"]["model"];
|
||||||
|
|
||||||
|
@ -47,8 +49,14 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
|
||||||
"markdown": {
|
"markdown": {
|
||||||
"input-filter": [mdInVault],
|
"input-filter": [mdInVault],
|
||||||
"input-files": null,
|
"input-files": null,
|
||||||
"embeddings-file": `${khojDefaultIndexDirectory}/${indexName}.pt`,
|
"embeddings-file": `${khojDefaultMdIndexDirectory}/${indexName}.pt`,
|
||||||
"compressed-jsonl": `${khojDefaultIndexDirectory}/${indexName}.jsonl.gz`,
|
"compressed-jsonl": `${khojDefaultMdIndexDirectory}/${indexName}.jsonl.gz`,
|
||||||
|
},
|
||||||
|
"pdf": {
|
||||||
|
"input-filter": [pdfInVault],
|
||||||
|
"input-files": null,
|
||||||
|
"embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
|
||||||
|
"compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -59,8 +67,8 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
|
||||||
data["content-type"]["markdown"] = {
|
data["content-type"]["markdown"] = {
|
||||||
"input-filter": [mdInVault],
|
"input-filter": [mdInVault],
|
||||||
"input-files": null,
|
"input-files": null,
|
||||||
"embeddings-file": `${khojDefaultIndexDirectory}/${indexName}.pt`,
|
"embeddings-file": `${khojDefaultMdIndexDirectory}/${indexName}.pt`,
|
||||||
"compressed-jsonl": `${khojDefaultIndexDirectory}/${indexName}.jsonl.gz`,
|
"compressed-jsonl": `${khojDefaultMdIndexDirectory}/${indexName}.jsonl.gz`,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Else if khoj is not configured to index markdown files in configured obsidian vault
|
// Else if khoj is not configured to index markdown files in configured obsidian vault
|
||||||
|
@ -68,12 +76,37 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
|
||||||
data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) {
|
data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) {
|
||||||
// Update markdown config in khoj content-type config
|
// Update markdown config in khoj content-type config
|
||||||
// Set markdown config to only index markdown files in configured obsidian vault
|
// Set markdown config to only index markdown files in configured obsidian vault
|
||||||
let khojIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
|
let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
|
||||||
data["content-type"]["markdown"] = {
|
data["content-type"]["markdown"] = {
|
||||||
"input-filter": [mdInVault],
|
"input-filter": [mdInVault],
|
||||||
"input-files": null,
|
"input-files": null,
|
||||||
"embeddings-file": `${khojIndexDirectory}/${indexName}.pt`,
|
"embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`,
|
||||||
"compressed-jsonl": `${khojIndexDirectory}/${indexName}.jsonl.gz`,
|
"compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (khoj_already_configured && !data["content-type"]["pdf"]) {
|
||||||
|
// Add pdf config to khoj content-type config
|
||||||
|
// Set pdf config to index pdf files in configured obsidian vault
|
||||||
|
data["content-type"]["pdf"] = {
|
||||||
|
"input-filter": [pdfInVault],
|
||||||
|
"input-files": null,
|
||||||
|
"embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
|
||||||
|
"compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Else if khoj is not configured to index pdf files in configured obsidian vault
|
||||||
|
else if (khoj_already_configured &&
|
||||||
|
(data["content-type"]["pdf"]["input-filter"].length != 1 ||
|
||||||
|
data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) {
|
||||||
|
// Update pdf config in khoj content-type config
|
||||||
|
// Set pdf config to only index pdf files in configured obsidian vault
|
||||||
|
let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);
|
||||||
|
data["content-type"]["pdf"] = {
|
||||||
|
"input-filter": [pdfInVault],
|
||||||
|
"input-files": null,
|
||||||
|
"embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`,
|
||||||
|
"compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue