mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Merge branch 'master' of github.com:debanjum/khoj into features/simplify-configuration-steps
This commit is contained in:
commit
a8b83da872
8 changed files with 151 additions and 123 deletions
|
@ -1,7 +1,7 @@
|
|||
[Desktop Entry]
|
||||
Type=Application
|
||||
Name=Khoj
|
||||
Comment=A natural language search engine for your personal notes, transactions and images.
|
||||
Comment=An AI personal assistant for your Digital Brain
|
||||
Path=/opt
|
||||
Exec=/opt/Khoj
|
||||
Icon=Khoj
|
||||
|
|
|
@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|||
|
||||
[project]
|
||||
name = "khoj-assistant"
|
||||
description = "A natural language search engine for your personal notes, transactions and images"
|
||||
description = "An AI personal assistant for your Digital Brain"
|
||||
readme = "README.md"
|
||||
license = "GPL-3.0-or-later"
|
||||
requires-python = ">=3.8"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
<img src="/src/khoj/interface/web/assets/icons/khoj-logo-sideways.svg" width="200" alt="Khoj Logo">Obsidian
|
||||
|
||||
> Natural language search for your Obsidian notes using [Khoj](https://github.com/khoj-ai/khoj)
|
||||
> An AI personal assistant for your Digital Brain in Obsidian
|
||||
|
||||
## Table of Contents
|
||||
|
||||
|
|
|
@ -161,7 +161,7 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
|
|||
// Open vault file at heading of chosen search result
|
||||
if (file_match) {
|
||||
let resultHeading = file_match.extension !== 'pdf' ? result.entry.split('\n', 1)[0] : '';
|
||||
let linkToEntry = `${file_match.path}${resultHeading}`
|
||||
let linkToEntry = resultHeading.startsWith('#') ? `${file_match.path}${resultHeading}` : file_match.path;
|
||||
this.app.workspace.openLinkText(linkToEntry, '');
|
||||
console.log(`Link: ${linkToEntry}, File: ${file_match.path}, Heading: ${resultHeading}`);
|
||||
}
|
||||
|
|
|
@ -72,41 +72,58 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
|
|||
}
|
||||
}
|
||||
// Else if khoj is not configured to index markdown files in configured obsidian vault
|
||||
else if (data["content-type"]["markdown"]["input-filter"].length != 1 ||
|
||||
else if (
|
||||
data["content-type"]["markdown"]["input-files"] != null ||
|
||||
data["content-type"]["markdown"]["input-filter"] == null ||
|
||||
data["content-type"]["markdown"]["input-filter"].length != 1 ||
|
||||
data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) {
|
||||
// Update markdown config in khoj content-type config
|
||||
// Set markdown config to only index markdown files in configured obsidian vault
|
||||
let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
|
||||
data["content-type"]["markdown"] = {
|
||||
"input-filter": [mdInVault],
|
||||
"input-files": null,
|
||||
"embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
}
|
||||
// Update markdown config in khoj content-type config
|
||||
// Set markdown config to only index markdown files in configured obsidian vault
|
||||
let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
|
||||
data["content-type"]["markdown"] = {
|
||||
"input-filter": [mdInVault],
|
||||
"input-files": null,
|
||||
"embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
}
|
||||
}
|
||||
|
||||
if (khoj_already_configured && !data["content-type"]["pdf"]) {
|
||||
// Add pdf config to khoj content-type config
|
||||
// Set pdf config to index pdf files in configured obsidian vault
|
||||
data["content-type"]["pdf"] = {
|
||||
"input-filter": [pdfInVault],
|
||||
"input-files": null,
|
||||
"embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
const hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf');
|
||||
|
||||
if (hasPdfFiles) {
|
||||
data["content-type"]["pdf"] = {
|
||||
"input-filter": [pdfInVault],
|
||||
"input-files": null,
|
||||
"embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
}
|
||||
} else {
|
||||
data["content-type"]["pdf"] = null;
|
||||
}
|
||||
}
|
||||
// Else if khoj is not configured to index pdf files in configured obsidian vault
|
||||
else if (khoj_already_configured &&
|
||||
(data["content-type"]["pdf"]["input-filter"].length != 1 ||
|
||||
data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) {
|
||||
// Update pdf config in khoj content-type config
|
||||
// Set pdf config to only index pdf files in configured obsidian vault
|
||||
let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);
|
||||
data["content-type"]["pdf"] = {
|
||||
"input-filter": [pdfInVault],
|
||||
"input-files": null,
|
||||
"embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
(
|
||||
data["content-type"]["pdf"]["input-files"] != null ||
|
||||
data["content-type"]["pdf"]["input-filter"] == null ||
|
||||
data["content-type"]["pdf"]["input-filter"].length != 1 ||
|
||||
data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) {
|
||||
|
||||
let hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf');
|
||||
|
||||
if (hasPdfFiles) {
|
||||
// Update pdf config in khoj content-type config
|
||||
// Set pdf config to only index pdf files in configured obsidian vault
|
||||
let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);
|
||||
data["content-type"]["pdf"] = {
|
||||
"input-filter": [pdfInVault],
|
||||
"input-files": null,
|
||||
"embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`,
|
||||
"compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`,
|
||||
}
|
||||
} else {
|
||||
data["content-type"]["pdf"] = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -93,98 +93,106 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
|
|||
logger.warning("🚨 No Content or Search type is configured.")
|
||||
return
|
||||
|
||||
# Initialize Org Notes Search
|
||||
if (t == state.SearchType.Org or t == None) and config.content_type.org and config.search_type.asymmetric:
|
||||
logger.info("🦄 Setting up search for orgmode notes")
|
||||
# Extract Entries, Generate Notes Embeddings
|
||||
model.org_search = text_search.setup(
|
||||
OrgToJsonl,
|
||||
config.content_type.org,
|
||||
search_config=config.search_type.asymmetric,
|
||||
regenerate=regenerate,
|
||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||
)
|
||||
|
||||
# Initialize Org Music Search
|
||||
if (t == state.SearchType.Music or t == None) and config.content_type.music and config.search_type.asymmetric:
|
||||
logger.info("🎺 Setting up search for org-music")
|
||||
# Extract Entries, Generate Music Embeddings
|
||||
model.music_search = text_search.setup(
|
||||
OrgToJsonl,
|
||||
config.content_type.music,
|
||||
search_config=config.search_type.asymmetric,
|
||||
regenerate=regenerate,
|
||||
filters=[DateFilter(), WordFilter()],
|
||||
)
|
||||
|
||||
# Initialize Markdown Search
|
||||
if (t == state.SearchType.Markdown or t == None) and config.content_type.markdown and config.search_type.asymmetric:
|
||||
logger.info("💎 Setting up search for markdown notes")
|
||||
# Extract Entries, Generate Markdown Embeddings
|
||||
model.markdown_search = text_search.setup(
|
||||
MarkdownToJsonl,
|
||||
config.content_type.markdown,
|
||||
search_config=config.search_type.asymmetric,
|
||||
regenerate=regenerate,
|
||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||
)
|
||||
|
||||
# Initialize Ledger Search
|
||||
if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger and config.search_type.symmetric:
|
||||
logger.info("💸 Setting up search for ledger")
|
||||
# Extract Entries, Generate Ledger Embeddings
|
||||
model.ledger_search = text_search.setup(
|
||||
BeancountToJsonl,
|
||||
config.content_type.ledger,
|
||||
search_config=config.search_type.symmetric,
|
||||
regenerate=regenerate,
|
||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||
)
|
||||
|
||||
# Initialize PDF Search
|
||||
if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf and config.search_type.asymmetric:
|
||||
logger.info("🖨️ Setting up search for pdf")
|
||||
# Extract Entries, Generate PDF Embeddings
|
||||
model.pdf_search = text_search.setup(
|
||||
PdfToJsonl,
|
||||
config.content_type.pdf,
|
||||
search_config=config.search_type.asymmetric,
|
||||
regenerate=regenerate,
|
||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||
)
|
||||
|
||||
# Initialize Image Search
|
||||
if (t == state.SearchType.Image or t == None) and config.content_type.image and config.search_type.image:
|
||||
logger.info("🌄 Setting up search for images")
|
||||
# Extract Entries, Generate Image Embeddings
|
||||
model.image_search = image_search.setup(
|
||||
config.content_type.image, search_config=config.search_type.image, regenerate=regenerate
|
||||
)
|
||||
|
||||
if (t == state.SearchType.Github or t == None) and config.content_type.github and config.search_type.asymmetric:
|
||||
logger.info("🐙 Setting up search for github")
|
||||
# Extract Entries, Generate Github Embeddings
|
||||
model.github_search = text_search.setup(
|
||||
GithubToJsonl,
|
||||
config.content_type.github,
|
||||
search_config=config.search_type.asymmetric,
|
||||
regenerate=regenerate,
|
||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||
)
|
||||
|
||||
# Initialize External Plugin Search
|
||||
if (t == None or t in state.SearchType) and config.content_type.plugins:
|
||||
logger.info("🔌 Setting up search for plugins")
|
||||
model.plugin_search = {}
|
||||
for plugin_type, plugin_config in config.content_type.plugins.items():
|
||||
model.plugin_search[plugin_type] = text_search.setup(
|
||||
JsonlToJsonl,
|
||||
plugin_config,
|
||||
try:
|
||||
# Initialize Org Notes Search
|
||||
if (t == state.SearchType.Org or t == None) and config.content_type.org and config.search_type.asymmetric:
|
||||
logger.info("🦄 Setting up search for orgmode notes")
|
||||
# Extract Entries, Generate Notes Embeddings
|
||||
model.org_search = text_search.setup(
|
||||
OrgToJsonl,
|
||||
config.content_type.org,
|
||||
search_config=config.search_type.asymmetric,
|
||||
regenerate=regenerate,
|
||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||
)
|
||||
|
||||
# Initialize Org Music Search
|
||||
if (t == state.SearchType.Music or t == None) and config.content_type.music and config.search_type.asymmetric:
|
||||
logger.info("🎺 Setting up search for org-music")
|
||||
# Extract Entries, Generate Music Embeddings
|
||||
model.music_search = text_search.setup(
|
||||
OrgToJsonl,
|
||||
config.content_type.music,
|
||||
search_config=config.search_type.asymmetric,
|
||||
regenerate=regenerate,
|
||||
filters=[DateFilter(), WordFilter()],
|
||||
)
|
||||
|
||||
# Initialize Markdown Search
|
||||
if (
|
||||
(t == state.SearchType.Markdown or t == None)
|
||||
and config.content_type.markdown
|
||||
and config.search_type.asymmetric
|
||||
):
|
||||
logger.info("💎 Setting up search for markdown notes")
|
||||
# Extract Entries, Generate Markdown Embeddings
|
||||
model.markdown_search = text_search.setup(
|
||||
MarkdownToJsonl,
|
||||
config.content_type.markdown,
|
||||
search_config=config.search_type.asymmetric,
|
||||
regenerate=regenerate,
|
||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||
)
|
||||
|
||||
# Initialize Ledger Search
|
||||
if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger and config.search_type.symmetric:
|
||||
logger.info("💸 Setting up search for ledger")
|
||||
# Extract Entries, Generate Ledger Embeddings
|
||||
model.ledger_search = text_search.setup(
|
||||
BeancountToJsonl,
|
||||
config.content_type.ledger,
|
||||
search_config=config.search_type.symmetric,
|
||||
regenerate=regenerate,
|
||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||
)
|
||||
|
||||
# Initialize PDF Search
|
||||
if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf and config.search_type.asymmetric:
|
||||
logger.info("🖨️ Setting up search for pdf")
|
||||
# Extract Entries, Generate PDF Embeddings
|
||||
model.pdf_search = text_search.setup(
|
||||
PdfToJsonl,
|
||||
config.content_type.pdf,
|
||||
search_config=config.search_type.asymmetric,
|
||||
regenerate=regenerate,
|
||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||
)
|
||||
|
||||
# Initialize Image Search
|
||||
if (t == state.SearchType.Image or t == None) and config.content_type.image and config.search_type.image:
|
||||
logger.info("🌄 Setting up search for images")
|
||||
# Extract Entries, Generate Image Embeddings
|
||||
model.image_search = image_search.setup(
|
||||
config.content_type.image, search_config=config.search_type.image, regenerate=regenerate
|
||||
)
|
||||
|
||||
if (t == state.SearchType.Github or t == None) and config.content_type.github and config.search_type.asymmetric:
|
||||
logger.info("🐙 Setting up search for github")
|
||||
# Extract Entries, Generate Github Embeddings
|
||||
model.github_search = text_search.setup(
|
||||
GithubToJsonl,
|
||||
config.content_type.github,
|
||||
search_config=config.search_type.asymmetric,
|
||||
regenerate=regenerate,
|
||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||
)
|
||||
|
||||
# Initialize External Plugin Search
|
||||
if (t == None or t in state.SearchType) and config.content_type.plugins:
|
||||
logger.info("🔌 Setting up search for plugins")
|
||||
model.plugin_search = {}
|
||||
for plugin_type, plugin_config in config.content_type.plugins.items():
|
||||
model.plugin_search[plugin_type] = text_search.setup(
|
||||
JsonlToJsonl,
|
||||
plugin_config,
|
||||
search_config=config.search_type.asymmetric,
|
||||
regenerate=regenerate,
|
||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("🚨 Failed to setup search")
|
||||
raise e
|
||||
|
||||
# Invalidate Query Cache
|
||||
state.query_cache = LRU()
|
||||
|
||||
|
|
|
@ -384,8 +384,13 @@ def update(
|
|||
):
|
||||
try:
|
||||
state.search_index_lock.acquire()
|
||||
state.model = configure_search(state.model, state.config, regenerate=force or False, t=t)
|
||||
state.search_index_lock.release()
|
||||
try:
|
||||
state.model = configure_search(state.model, state.config, regenerate=force or False, t=t)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
finally:
|
||||
state.search_index_lock.release()
|
||||
except ValueError as e:
|
||||
logger.error(e)
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
|
|
@ -10,9 +10,7 @@ from khoj.utils.yaml import parse_config_from_file
|
|||
|
||||
def cli(args=None):
|
||||
# Setup Argument Parser for the Commandline Interface
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Start Khoj; A Natural Language Search Engine for your personal Notes, Transactions and Photos"
|
||||
)
|
||||
parser = argparse.ArgumentParser(description="Start Khoj; An AI personal assistant for your Digital Brain")
|
||||
parser.add_argument(
|
||||
"--config-file", "-c", default="~/.khoj/khoj.yml", type=pathlib.Path, help="YAML file to configure Khoj"
|
||||
)
|
||||
|
|
Loading…
Reference in a new issue