mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 15:38:55 +01:00
Render PDF search results in Khoj Obsidian interface
- Make plugin update khoj server config to index PDF files in vault too - Make Obsidian plugin update index for PDF files in vault too - Show PDF results in Khoj Search modal as well - Ensure combined results are sorted by score across both types - Jump to PDF file when select it PDF search result from modal
This commit is contained in:
parent
e3892945d4
commit
bbe3bf9733
4 changed files with 65 additions and 18 deletions
|
@ -42,7 +42,7 @@ https://github.com/debanjum/khoj/assets/6413477/3e33d8ea-25bb-46c8-a3bf-c92f78d0
|
|||
1. Install Khoj via `pip` and start Khoj backend in non-gui mode
|
||||
2. Install Khoj plugin via Community Plugins settings pane on Obsidian app
|
||||
3. Check the new Khoj plugin settings
|
||||
4. Wait for Khoj backend to index markdown files in the current Vault
|
||||
4. Wait for Khoj backend to index markdown, PDF files in the current Vault
|
||||
5. Open Khoj plugin on Obsidian via Search button on Left Pane
|
||||
6. Search \"*Announce plugin to folks*\" in the [Obsidian Plugin docs](https://marcus.se.net/obsidian-plugin-docs/)
|
||||
7. Jump to the [search result](https://marcus.se.net/obsidian-plugin-docs/publishing/submit-your-plugin)
|
||||
|
@ -151,7 +151,7 @@ The plugin implements the following functionality to search your notes with Khoj
|
|||
- [X] Open the Khoj search modal via left ribbon icon or the *Khoj: Search* command
|
||||
- [X] Render results as Markdown preview to improve readability
|
||||
- [X] Configure Khoj via the plugin setting tab on the settings page
|
||||
- Set Obsidian Vault to Index with Khoj. Defaults to all markdown files in current Vault
|
||||
- Set Obsidian Vault to Index with Khoj. Defaults to all markdown, PDF files in current Vault
|
||||
- Set URL of Khoj backend
|
||||
- Set Number of Search Results to show in Search Modal
|
||||
- [X] Allow reranking of result to improve search quality
|
||||
|
|
|
@ -89,12 +89,24 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
|
|||
async getSuggestions(query: string): Promise<SearchResult[]> {
|
||||
// Query Khoj backend for search results
|
||||
let encodedQuery = encodeURIComponent(query);
|
||||
let searchUrl = `${this.setting.khojUrl}/api/search?q=${encodedQuery}&n=${this.setting.resultsCount}&r=${this.rerank}&t=markdown`;
|
||||
let response = await request(searchUrl);
|
||||
let data = JSON.parse(response);
|
||||
let results = data
|
||||
let searchUrl = `${this.setting.khojUrl}/api/search?q=${encodedQuery}&n=${this.setting.resultsCount}&r=${this.rerank}`;
|
||||
|
||||
// Get search results for markdown and pdf files
|
||||
let mdResponse = await request(`${searchUrl}&t=markdown`);
|
||||
let pdfResponse = await request(`${searchUrl}&t=pdf`);
|
||||
|
||||
// Parse search results
|
||||
let mdData = JSON.parse(mdResponse)
|
||||
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
|
||||
.map((result: any) => { return { entry: result.entry, file: result.additional.file } as SearchResult; });
|
||||
.map((result: any) => { return { entry: result.entry, score: result.score, file: result.additional.file }; });
|
||||
let pdfData = JSON.parse(pdfResponse)
|
||||
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
|
||||
.map((result: any) => { return { entry: `## ${result.additional.compiled}`, score: result.score, file: result.additional.file } as SearchResult; })
|
||||
|
||||
// Combine markdown and PDF results and sort them by score
|
||||
let results = mdData.concat(pdfData)
|
||||
.sort((a: any, b: any) => b.score - a.score)
|
||||
.map((result: any) => { return { entry: result.entry, file: result.file } as SearchResult; })
|
||||
|
||||
this.query = query;
|
||||
return results;
|
||||
|
@ -124,11 +136,12 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
|
|||
}
|
||||
|
||||
async onChooseSuggestion(result: SearchResult, _: MouseEvent | KeyboardEvent) {
|
||||
// Get all markdown files in vault
|
||||
// Get all markdown and PDF files in vault
|
||||
const mdFiles = this.app.vault.getMarkdownFiles();
|
||||
const pdfFiles = this.app.vault.getFiles().filter(file => file.extension === 'pdf');
|
||||
|
||||
// Find the vault file matching file of chosen search result
|
||||
let file_match = mdFiles
|
||||
let file_match = mdFiles.concat(pdfFiles)
|
||||
// Sort by descending length of path
|
||||
// This finds longest path match when multiple files have same name
|
||||
.sort((a, b) => b.path.length - a.path.length)
|
||||
|
@ -138,7 +151,7 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
|
|||
|
||||
// Open vault file at heading of chosen search result
|
||||
if (file_match) {
|
||||
let resultHeading = result.entry.split('\n', 1)[0];
|
||||
let resultHeading = file_match.extension !== 'pdf' ? result.entry.split('\n', 1)[0] : '';
|
||||
let linkToEntry = `${file_match.path}${resultHeading}`
|
||||
this.app.workspace.openLinkText(linkToEntry, '');
|
||||
console.log(`Link: ${linkToEntry}, File: ${file_match.path}, Heading: ${resultHeading}`);
|
||||
|
|
|
@ -108,6 +108,7 @@ export class KhojSettingTab extends PluginSettingTab {
|
|||
this.plugin.registerInterval(progress_indicator);
|
||||
|
||||
await request(`${this.plugin.settings.khojUrl}/api/update?t=markdown&force=true`);
|
||||
await request(`${this.plugin.settings.khojUrl}/api/update?t=pdf&force=true`);
|
||||
new Notice('✅ Updated Khoj index.');
|
||||
|
||||
// Reset button once index is updated
|
||||
|
|
|
@ -12,6 +12,7 @@ export function getVaultAbsolutePath(vault: Vault): string {
|
|||
export async function configureKhojBackend(vault: Vault, setting: KhojSetting, notify: boolean = true) {
|
||||
let vaultPath = getVaultAbsolutePath(vault);
|
||||
let mdInVault = `${vaultPath}/**/*.md`;
|
||||
let pdfInVault = `${vaultPath}/**/*.pdf`;
|
||||
let khojConfigUrl = `${setting.khojUrl}/api/config/data`;
|
||||
|
||||
// Check if khoj backend is configured, note if cannot connect to backend
|
||||
|
@ -32,7 +33,8 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
|
|||
let indexName = vaultPath.replace(/\//g, '_').replace(/\\/g, '_').replace(/ /g, '_').replace(/:/g, '_');
|
||||
// Get default config fields from khoj backend
|
||||
let defaultConfig = await request(`${khojConfigUrl}/default`).then(response => JSON.parse(response));
|
||||
let khojDefaultIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["markdown"]["embeddings-file"]);
|
||||
let khojDefaultMdIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["markdown"]["embeddings-file"]);
|
||||
let khojDefaultPdfIndexDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["content-type"]["pdf"]["embeddings-file"]);
|
||||
let khojDefaultChatDirectory = getIndexDirectoryFromBackendConfig(defaultConfig["processor"]["conversation"]["conversation-logfile"]);
|
||||
let khojDefaultChatModelName = defaultConfig["processor"]["conversation"]["model"];
|
||||
|
||||
|
@ -47,8 +49,14 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
|
|||
"markdown": {
|
||||
"input-filter": [mdInVault],
|
||||
"input-files": null,
|
||||
"embeddings-file": `${khojDefaultIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojDefaultIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
"embeddings-file": `${khojDefaultMdIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojDefaultMdIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
},
|
||||
"pdf": {
|
||||
"input-filter": [pdfInVault],
|
||||
"input-files": null,
|
||||
"embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -59,8 +67,8 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
|
|||
data["content-type"]["markdown"] = {
|
||||
"input-filter": [mdInVault],
|
||||
"input-files": null,
|
||||
"embeddings-file": `${khojDefaultIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojDefaultIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
"embeddings-file": `${khojDefaultMdIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojDefaultMdIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
}
|
||||
}
|
||||
// Else if khoj is not configured to index markdown files in configured obsidian vault
|
||||
|
@ -68,12 +76,37 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
|
|||
data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) {
|
||||
// Update markdown config in khoj content-type config
|
||||
// Set markdown config to only index markdown files in configured obsidian vault
|
||||
let khojIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
|
||||
let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
|
||||
data["content-type"]["markdown"] = {
|
||||
"input-filter": [mdInVault],
|
||||
"input-files": null,
|
||||
"embeddings-file": `${khojIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
"embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
}
|
||||
}
|
||||
|
||||
if (khoj_already_configured && !data["content-type"]["pdf"]) {
|
||||
// Add pdf config to khoj content-type config
|
||||
// Set pdf config to index pdf files in configured obsidian vault
|
||||
data["content-type"]["pdf"] = {
|
||||
"input-filter": [pdfInVault],
|
||||
"input-files": null,
|
||||
"embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
}
|
||||
}
|
||||
// Else if khoj is not configured to index pdf files in configured obsidian vault
|
||||
else if (khoj_already_configured &&
|
||||
(data["content-type"]["pdf"]["input-filter"].length != 1 ||
|
||||
data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) {
|
||||
// Update pdf config in khoj content-type config
|
||||
// Set pdf config to only index pdf files in configured obsidian vault
|
||||
let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);
|
||||
data["content-type"]["pdf"] = {
|
||||
"input-filter": [pdfInVault],
|
||||
"input-files": null,
|
||||
"embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue