Merge branch 'master' of github.com:debanjum/khoj into features/simplify-configuration-steps

This commit is contained in:
sabaimran 2023-07-02 16:21:54 -07:00
commit a8b83da872
8 changed files with 151 additions and 123 deletions

View file

@ -1,7 +1,7 @@
[Desktop Entry]
Type=Application
Name=Khoj
Comment=A natural language search engine for your personal notes, transactions and images.
Comment=An AI personal assistant for your Digital Brain
Path=/opt
Exec=/opt/Khoj
Icon=Khoj

View file

@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project]
name = "khoj-assistant"
description = "A natural language search engine for your personal notes, transactions and images"
description = "An AI personal assistant for your Digital Brain"
readme = "README.md"
license = "GPL-3.0-or-later"
requires-python = ">=3.8"

View file

@ -1,6 +1,6 @@
<img src="/src/khoj/interface/web/assets/icons/khoj-logo-sideways.svg" width="200" alt="Khoj Logo">Obsidian
> Natural language search for your Obsidian notes using [Khoj](https://github.com/khoj-ai/khoj)
> An AI personal assistant for your Digital Brain in Obsidian
## Table of Contents

View file

@ -161,7 +161,7 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
// Open vault file at heading of chosen search result
if (file_match) {
let resultHeading = file_match.extension !== 'pdf' ? result.entry.split('\n', 1)[0] : '';
let linkToEntry = `${file_match.path}${resultHeading}`
let linkToEntry = resultHeading.startsWith('#') ? `${file_match.path}${resultHeading}` : file_match.path;
this.app.workspace.openLinkText(linkToEntry, '');
console.log(`Link: ${linkToEntry}, File: ${file_match.path}, Heading: ${resultHeading}`);
}

View file

@ -72,41 +72,58 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
}
}
// Else if khoj is not configured to index markdown files in configured obsidian vault
else if (data["content-type"]["markdown"]["input-filter"].length != 1 ||
else if (
data["content-type"]["markdown"]["input-files"] != null ||
data["content-type"]["markdown"]["input-filter"] == null ||
data["content-type"]["markdown"]["input-filter"].length != 1 ||
data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) {
// Update markdown config in khoj content-type config
// Set markdown config to only index markdown files in configured obsidian vault
let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
data["content-type"]["markdown"] = {
"input-filter": [mdInVault],
"input-files": null,
"embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`,
}
// Update markdown config in khoj content-type config
// Set markdown config to only index markdown files in configured obsidian vault
let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
data["content-type"]["markdown"] = {
"input-filter": [mdInVault],
"input-files": null,
"embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`,
}
}
if (khoj_already_configured && !data["content-type"]["pdf"]) {
// Add pdf config to khoj content-type config
// Set pdf config to index pdf files in configured obsidian vault
data["content-type"]["pdf"] = {
"input-filter": [pdfInVault],
"input-files": null,
"embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
const hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf');
if (hasPdfFiles) {
data["content-type"]["pdf"] = {
"input-filter": [pdfInVault],
"input-files": null,
"embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
}
} else {
data["content-type"]["pdf"] = null;
}
}
// Else if khoj is not configured to index pdf files in configured obsidian vault
else if (khoj_already_configured &&
(data["content-type"]["pdf"]["input-filter"].length != 1 ||
data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) {
// Update pdf config in khoj content-type config
// Set pdf config to only index pdf files in configured obsidian vault
let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);
data["content-type"]["pdf"] = {
"input-filter": [pdfInVault],
"input-files": null,
"embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`,
(
data["content-type"]["pdf"]["input-files"] != null ||
data["content-type"]["pdf"]["input-filter"] == null ||
data["content-type"]["pdf"]["input-filter"].length != 1 ||
data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) {
let hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf');
if (hasPdfFiles) {
// Update pdf config in khoj content-type config
// Set pdf config to only index pdf files in configured obsidian vault
let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);
data["content-type"]["pdf"] = {
"input-filter": [pdfInVault],
"input-files": null,
"embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`,
"compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`,
}
} else {
data["content-type"]["pdf"] = null;
}
}

View file

@ -93,98 +93,106 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
logger.warning("🚨 No Content or Search type is configured.")
return
# Initialize Org Notes Search
if (t == state.SearchType.Org or t == None) and config.content_type.org and config.search_type.asymmetric:
logger.info("🦄 Setting up search for orgmode notes")
# Extract Entries, Generate Notes Embeddings
model.org_search = text_search.setup(
OrgToJsonl,
config.content_type.org,
search_config=config.search_type.asymmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter(), FileFilter()],
)
# Initialize Org Music Search
if (t == state.SearchType.Music or t == None) and config.content_type.music and config.search_type.asymmetric:
logger.info("🎺 Setting up search for org-music")
# Extract Entries, Generate Music Embeddings
model.music_search = text_search.setup(
OrgToJsonl,
config.content_type.music,
search_config=config.search_type.asymmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter()],
)
# Initialize Markdown Search
if (t == state.SearchType.Markdown or t == None) and config.content_type.markdown and config.search_type.asymmetric:
logger.info("💎 Setting up search for markdown notes")
# Extract Entries, Generate Markdown Embeddings
model.markdown_search = text_search.setup(
MarkdownToJsonl,
config.content_type.markdown,
search_config=config.search_type.asymmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter(), FileFilter()],
)
# Initialize Ledger Search
if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger and config.search_type.symmetric:
logger.info("💸 Setting up search for ledger")
# Extract Entries, Generate Ledger Embeddings
model.ledger_search = text_search.setup(
BeancountToJsonl,
config.content_type.ledger,
search_config=config.search_type.symmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter(), FileFilter()],
)
# Initialize PDF Search
if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf and config.search_type.asymmetric:
logger.info("🖨️ Setting up search for pdf")
# Extract Entries, Generate PDF Embeddings
model.pdf_search = text_search.setup(
PdfToJsonl,
config.content_type.pdf,
search_config=config.search_type.asymmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter(), FileFilter()],
)
# Initialize Image Search
if (t == state.SearchType.Image or t == None) and config.content_type.image and config.search_type.image:
logger.info("🌄 Setting up search for images")
# Extract Entries, Generate Image Embeddings
model.image_search = image_search.setup(
config.content_type.image, search_config=config.search_type.image, regenerate=regenerate
)
if (t == state.SearchType.Github or t == None) and config.content_type.github and config.search_type.asymmetric:
logger.info("🐙 Setting up search for github")
# Extract Entries, Generate Github Embeddings
model.github_search = text_search.setup(
GithubToJsonl,
config.content_type.github,
search_config=config.search_type.asymmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter(), FileFilter()],
)
# Initialize External Plugin Search
if (t == None or t in state.SearchType) and config.content_type.plugins:
logger.info("🔌 Setting up search for plugins")
model.plugin_search = {}
for plugin_type, plugin_config in config.content_type.plugins.items():
model.plugin_search[plugin_type] = text_search.setup(
JsonlToJsonl,
plugin_config,
try:
# Initialize Org Notes Search
if (t == state.SearchType.Org or t == None) and config.content_type.org and config.search_type.asymmetric:
logger.info("🦄 Setting up search for orgmode notes")
# Extract Entries, Generate Notes Embeddings
model.org_search = text_search.setup(
OrgToJsonl,
config.content_type.org,
search_config=config.search_type.asymmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter(), FileFilter()],
)
# Initialize Org Music Search
if (t == state.SearchType.Music or t == None) and config.content_type.music and config.search_type.asymmetric:
logger.info("🎺 Setting up search for org-music")
# Extract Entries, Generate Music Embeddings
model.music_search = text_search.setup(
OrgToJsonl,
config.content_type.music,
search_config=config.search_type.asymmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter()],
)
# Initialize Markdown Search
if (
(t == state.SearchType.Markdown or t == None)
and config.content_type.markdown
and config.search_type.asymmetric
):
logger.info("💎 Setting up search for markdown notes")
# Extract Entries, Generate Markdown Embeddings
model.markdown_search = text_search.setup(
MarkdownToJsonl,
config.content_type.markdown,
search_config=config.search_type.asymmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter(), FileFilter()],
)
# Initialize Ledger Search
if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger and config.search_type.symmetric:
logger.info("💸 Setting up search for ledger")
# Extract Entries, Generate Ledger Embeddings
model.ledger_search = text_search.setup(
BeancountToJsonl,
config.content_type.ledger,
search_config=config.search_type.symmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter(), FileFilter()],
)
# Initialize PDF Search
if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf and config.search_type.asymmetric:
logger.info("🖨️ Setting up search for pdf")
# Extract Entries, Generate PDF Embeddings
model.pdf_search = text_search.setup(
PdfToJsonl,
config.content_type.pdf,
search_config=config.search_type.asymmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter(), FileFilter()],
)
# Initialize Image Search
if (t == state.SearchType.Image or t == None) and config.content_type.image and config.search_type.image:
logger.info("🌄 Setting up search for images")
# Extract Entries, Generate Image Embeddings
model.image_search = image_search.setup(
config.content_type.image, search_config=config.search_type.image, regenerate=regenerate
)
if (t == state.SearchType.Github or t == None) and config.content_type.github and config.search_type.asymmetric:
logger.info("🐙 Setting up search for github")
# Extract Entries, Generate Github Embeddings
model.github_search = text_search.setup(
GithubToJsonl,
config.content_type.github,
search_config=config.search_type.asymmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter(), FileFilter()],
)
# Initialize External Plugin Search
if (t == None or t in state.SearchType) and config.content_type.plugins:
logger.info("🔌 Setting up search for plugins")
model.plugin_search = {}
for plugin_type, plugin_config in config.content_type.plugins.items():
model.plugin_search[plugin_type] = text_search.setup(
JsonlToJsonl,
plugin_config,
search_config=config.search_type.asymmetric,
regenerate=regenerate,
filters=[DateFilter(), WordFilter(), FileFilter()],
)
except Exception as e:
logger.error("🚨 Failed to setup search")
raise e
# Invalidate Query Cache
state.query_cache = LRU()

View file

@ -384,8 +384,13 @@ def update(
):
try:
state.search_index_lock.acquire()
state.model = configure_search(state.model, state.config, regenerate=force or False, t=t)
state.search_index_lock.release()
try:
state.model = configure_search(state.model, state.config, regenerate=force or False, t=t)
except Exception as e:
logger.error(e)
raise HTTPException(status_code=500, detail=str(e))
finally:
state.search_index_lock.release()
except ValueError as e:
logger.error(e)
raise HTTPException(status_code=500, detail=str(e))

View file

@ -10,9 +10,7 @@ from khoj.utils.yaml import parse_config_from_file
def cli(args=None):
# Setup Argument Parser for the Commandline Interface
parser = argparse.ArgumentParser(
description="Start Khoj; A Natural Language Search Engine for your personal Notes, Transactions and Photos"
)
parser = argparse.ArgumentParser(description="Start Khoj; An AI personal assistant for your Digital Brain")
parser.add_argument(
"--config-file", "-c", default="~/.khoj/khoj.yml", type=pathlib.Path, help="YAML file to configure Khoj"
)