mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-24 07:55:07 +01:00
Merge branch 'master' of github.com:debanjum/khoj into features/simplify-configuration-steps
This commit is contained in:
commit
a8b83da872
8 changed files with 151 additions and 123 deletions
|
@ -1,7 +1,7 @@
|
||||||
[Desktop Entry]
|
[Desktop Entry]
|
||||||
Type=Application
|
Type=Application
|
||||||
Name=Khoj
|
Name=Khoj
|
||||||
Comment=A natural language search engine for your personal notes, transactions and images.
|
Comment=An AI personal assistant for your Digital Brain
|
||||||
Path=/opt
|
Path=/opt
|
||||||
Exec=/opt/Khoj
|
Exec=/opt/Khoj
|
||||||
Icon=Khoj
|
Icon=Khoj
|
||||||
|
|
|
@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "khoj-assistant"
|
name = "khoj-assistant"
|
||||||
description = "A natural language search engine for your personal notes, transactions and images"
|
description = "An AI personal assistant for your Digital Brain"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = "GPL-3.0-or-later"
|
license = "GPL-3.0-or-later"
|
||||||
requires-python = ">=3.8"
|
requires-python = ">=3.8"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
<img src="/src/khoj/interface/web/assets/icons/khoj-logo-sideways.svg" width="200" alt="Khoj Logo">Obsidian
|
<img src="/src/khoj/interface/web/assets/icons/khoj-logo-sideways.svg" width="200" alt="Khoj Logo">Obsidian
|
||||||
|
|
||||||
> Natural language search for your Obsidian notes using [Khoj](https://github.com/khoj-ai/khoj)
|
> An AI personal assistant for your Digital Brain in Obsidian
|
||||||
|
|
||||||
## Table of Contents
|
## Table of Contents
|
||||||
|
|
||||||
|
|
|
@ -161,7 +161,7 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
|
||||||
// Open vault file at heading of chosen search result
|
// Open vault file at heading of chosen search result
|
||||||
if (file_match) {
|
if (file_match) {
|
||||||
let resultHeading = file_match.extension !== 'pdf' ? result.entry.split('\n', 1)[0] : '';
|
let resultHeading = file_match.extension !== 'pdf' ? result.entry.split('\n', 1)[0] : '';
|
||||||
let linkToEntry = `${file_match.path}${resultHeading}`
|
let linkToEntry = resultHeading.startsWith('#') ? `${file_match.path}${resultHeading}` : file_match.path;
|
||||||
this.app.workspace.openLinkText(linkToEntry, '');
|
this.app.workspace.openLinkText(linkToEntry, '');
|
||||||
console.log(`Link: ${linkToEntry}, File: ${file_match.path}, Heading: ${resultHeading}`);
|
console.log(`Link: ${linkToEntry}, File: ${file_match.path}, Heading: ${resultHeading}`);
|
||||||
}
|
}
|
||||||
|
|
|
@ -72,41 +72,58 @@ export async function configureKhojBackend(vault: Vault, setting: KhojSetting, n
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Else if khoj is not configured to index markdown files in configured obsidian vault
|
// Else if khoj is not configured to index markdown files in configured obsidian vault
|
||||||
else if (data["content-type"]["markdown"]["input-filter"].length != 1 ||
|
else if (
|
||||||
|
data["content-type"]["markdown"]["input-files"] != null ||
|
||||||
|
data["content-type"]["markdown"]["input-filter"] == null ||
|
||||||
|
data["content-type"]["markdown"]["input-filter"].length != 1 ||
|
||||||
data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) {
|
data["content-type"]["markdown"]["input-filter"][0] !== mdInVault) {
|
||||||
// Update markdown config in khoj content-type config
|
// Update markdown config in khoj content-type config
|
||||||
// Set markdown config to only index markdown files in configured obsidian vault
|
// Set markdown config to only index markdown files in configured obsidian vault
|
||||||
let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
|
let khojMdIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["markdown"]["embeddings-file"]);
|
||||||
data["content-type"]["markdown"] = {
|
data["content-type"]["markdown"] = {
|
||||||
"input-filter": [mdInVault],
|
"input-filter": [mdInVault],
|
||||||
"input-files": null,
|
"input-files": null,
|
||||||
"embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`,
|
"embeddings-file": `${khojMdIndexDirectory}/${indexName}.pt`,
|
||||||
"compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`,
|
"compressed-jsonl": `${khojMdIndexDirectory}/${indexName}.jsonl.gz`,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (khoj_already_configured && !data["content-type"]["pdf"]) {
|
if (khoj_already_configured && !data["content-type"]["pdf"]) {
|
||||||
// Add pdf config to khoj content-type config
|
const hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf');
|
||||||
// Set pdf config to index pdf files in configured obsidian vault
|
|
||||||
data["content-type"]["pdf"] = {
|
if (hasPdfFiles) {
|
||||||
"input-filter": [pdfInVault],
|
data["content-type"]["pdf"] = {
|
||||||
"input-files": null,
|
"input-filter": [pdfInVault],
|
||||||
"embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
|
"input-files": null,
|
||||||
"compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
|
"embeddings-file": `${khojDefaultPdfIndexDirectory}/${indexName}.pt`,
|
||||||
|
"compressed-jsonl": `${khojDefaultPdfIndexDirectory}/${indexName}.jsonl.gz`,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
data["content-type"]["pdf"] = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Else if khoj is not configured to index pdf files in configured obsidian vault
|
// Else if khoj is not configured to index pdf files in configured obsidian vault
|
||||||
else if (khoj_already_configured &&
|
else if (khoj_already_configured &&
|
||||||
(data["content-type"]["pdf"]["input-filter"].length != 1 ||
|
(
|
||||||
data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) {
|
data["content-type"]["pdf"]["input-files"] != null ||
|
||||||
// Update pdf config in khoj content-type config
|
data["content-type"]["pdf"]["input-filter"] == null ||
|
||||||
// Set pdf config to only index pdf files in configured obsidian vault
|
data["content-type"]["pdf"]["input-filter"].length != 1 ||
|
||||||
let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);
|
data["content-type"]["pdf"]["input-filter"][0] !== pdfInVault)) {
|
||||||
data["content-type"]["pdf"] = {
|
|
||||||
"input-filter": [pdfInVault],
|
let hasPdfFiles = app.vault.getFiles().some(file => file.extension === 'pdf');
|
||||||
"input-files": null,
|
|
||||||
"embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`,
|
if (hasPdfFiles) {
|
||||||
"compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`,
|
// Update pdf config in khoj content-type config
|
||||||
|
// Set pdf config to only index pdf files in configured obsidian vault
|
||||||
|
let khojPdfIndexDirectory = getIndexDirectoryFromBackendConfig(data["content-type"]["pdf"]["embeddings-file"]);
|
||||||
|
data["content-type"]["pdf"] = {
|
||||||
|
"input-filter": [pdfInVault],
|
||||||
|
"input-files": null,
|
||||||
|
"embeddings-file": `${khojPdfIndexDirectory}/${indexName}.pt`,
|
||||||
|
"compressed-jsonl": `${khojPdfIndexDirectory}/${indexName}.jsonl.gz`,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
data["content-type"]["pdf"] = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -93,98 +93,106 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
|
||||||
logger.warning("🚨 No Content or Search type is configured.")
|
logger.warning("🚨 No Content or Search type is configured.")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Initialize Org Notes Search
|
try:
|
||||||
if (t == state.SearchType.Org or t == None) and config.content_type.org and config.search_type.asymmetric:
|
# Initialize Org Notes Search
|
||||||
logger.info("🦄 Setting up search for orgmode notes")
|
if (t == state.SearchType.Org or t == None) and config.content_type.org and config.search_type.asymmetric:
|
||||||
# Extract Entries, Generate Notes Embeddings
|
logger.info("🦄 Setting up search for orgmode notes")
|
||||||
model.org_search = text_search.setup(
|
# Extract Entries, Generate Notes Embeddings
|
||||||
OrgToJsonl,
|
model.org_search = text_search.setup(
|
||||||
config.content_type.org,
|
OrgToJsonl,
|
||||||
search_config=config.search_type.asymmetric,
|
config.content_type.org,
|
||||||
regenerate=regenerate,
|
|
||||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Initialize Org Music Search
|
|
||||||
if (t == state.SearchType.Music or t == None) and config.content_type.music and config.search_type.asymmetric:
|
|
||||||
logger.info("🎺 Setting up search for org-music")
|
|
||||||
# Extract Entries, Generate Music Embeddings
|
|
||||||
model.music_search = text_search.setup(
|
|
||||||
OrgToJsonl,
|
|
||||||
config.content_type.music,
|
|
||||||
search_config=config.search_type.asymmetric,
|
|
||||||
regenerate=regenerate,
|
|
||||||
filters=[DateFilter(), WordFilter()],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Initialize Markdown Search
|
|
||||||
if (t == state.SearchType.Markdown or t == None) and config.content_type.markdown and config.search_type.asymmetric:
|
|
||||||
logger.info("💎 Setting up search for markdown notes")
|
|
||||||
# Extract Entries, Generate Markdown Embeddings
|
|
||||||
model.markdown_search = text_search.setup(
|
|
||||||
MarkdownToJsonl,
|
|
||||||
config.content_type.markdown,
|
|
||||||
search_config=config.search_type.asymmetric,
|
|
||||||
regenerate=regenerate,
|
|
||||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Initialize Ledger Search
|
|
||||||
if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger and config.search_type.symmetric:
|
|
||||||
logger.info("💸 Setting up search for ledger")
|
|
||||||
# Extract Entries, Generate Ledger Embeddings
|
|
||||||
model.ledger_search = text_search.setup(
|
|
||||||
BeancountToJsonl,
|
|
||||||
config.content_type.ledger,
|
|
||||||
search_config=config.search_type.symmetric,
|
|
||||||
regenerate=regenerate,
|
|
||||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Initialize PDF Search
|
|
||||||
if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf and config.search_type.asymmetric:
|
|
||||||
logger.info("🖨️ Setting up search for pdf")
|
|
||||||
# Extract Entries, Generate PDF Embeddings
|
|
||||||
model.pdf_search = text_search.setup(
|
|
||||||
PdfToJsonl,
|
|
||||||
config.content_type.pdf,
|
|
||||||
search_config=config.search_type.asymmetric,
|
|
||||||
regenerate=regenerate,
|
|
||||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Initialize Image Search
|
|
||||||
if (t == state.SearchType.Image or t == None) and config.content_type.image and config.search_type.image:
|
|
||||||
logger.info("🌄 Setting up search for images")
|
|
||||||
# Extract Entries, Generate Image Embeddings
|
|
||||||
model.image_search = image_search.setup(
|
|
||||||
config.content_type.image, search_config=config.search_type.image, regenerate=regenerate
|
|
||||||
)
|
|
||||||
|
|
||||||
if (t == state.SearchType.Github or t == None) and config.content_type.github and config.search_type.asymmetric:
|
|
||||||
logger.info("🐙 Setting up search for github")
|
|
||||||
# Extract Entries, Generate Github Embeddings
|
|
||||||
model.github_search = text_search.setup(
|
|
||||||
GithubToJsonl,
|
|
||||||
config.content_type.github,
|
|
||||||
search_config=config.search_type.asymmetric,
|
|
||||||
regenerate=regenerate,
|
|
||||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Initialize External Plugin Search
|
|
||||||
if (t == None or t in state.SearchType) and config.content_type.plugins:
|
|
||||||
logger.info("🔌 Setting up search for plugins")
|
|
||||||
model.plugin_search = {}
|
|
||||||
for plugin_type, plugin_config in config.content_type.plugins.items():
|
|
||||||
model.plugin_search[plugin_type] = text_search.setup(
|
|
||||||
JsonlToJsonl,
|
|
||||||
plugin_config,
|
|
||||||
search_config=config.search_type.asymmetric,
|
search_config=config.search_type.asymmetric,
|
||||||
regenerate=regenerate,
|
regenerate=regenerate,
|
||||||
filters=[DateFilter(), WordFilter(), FileFilter()],
|
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Initialize Org Music Search
|
||||||
|
if (t == state.SearchType.Music or t == None) and config.content_type.music and config.search_type.asymmetric:
|
||||||
|
logger.info("🎺 Setting up search for org-music")
|
||||||
|
# Extract Entries, Generate Music Embeddings
|
||||||
|
model.music_search = text_search.setup(
|
||||||
|
OrgToJsonl,
|
||||||
|
config.content_type.music,
|
||||||
|
search_config=config.search_type.asymmetric,
|
||||||
|
regenerate=regenerate,
|
||||||
|
filters=[DateFilter(), WordFilter()],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize Markdown Search
|
||||||
|
if (
|
||||||
|
(t == state.SearchType.Markdown or t == None)
|
||||||
|
and config.content_type.markdown
|
||||||
|
and config.search_type.asymmetric
|
||||||
|
):
|
||||||
|
logger.info("💎 Setting up search for markdown notes")
|
||||||
|
# Extract Entries, Generate Markdown Embeddings
|
||||||
|
model.markdown_search = text_search.setup(
|
||||||
|
MarkdownToJsonl,
|
||||||
|
config.content_type.markdown,
|
||||||
|
search_config=config.search_type.asymmetric,
|
||||||
|
regenerate=regenerate,
|
||||||
|
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize Ledger Search
|
||||||
|
if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger and config.search_type.symmetric:
|
||||||
|
logger.info("💸 Setting up search for ledger")
|
||||||
|
# Extract Entries, Generate Ledger Embeddings
|
||||||
|
model.ledger_search = text_search.setup(
|
||||||
|
BeancountToJsonl,
|
||||||
|
config.content_type.ledger,
|
||||||
|
search_config=config.search_type.symmetric,
|
||||||
|
regenerate=regenerate,
|
||||||
|
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize PDF Search
|
||||||
|
if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf and config.search_type.asymmetric:
|
||||||
|
logger.info("🖨️ Setting up search for pdf")
|
||||||
|
# Extract Entries, Generate PDF Embeddings
|
||||||
|
model.pdf_search = text_search.setup(
|
||||||
|
PdfToJsonl,
|
||||||
|
config.content_type.pdf,
|
||||||
|
search_config=config.search_type.asymmetric,
|
||||||
|
regenerate=regenerate,
|
||||||
|
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize Image Search
|
||||||
|
if (t == state.SearchType.Image or t == None) and config.content_type.image and config.search_type.image:
|
||||||
|
logger.info("🌄 Setting up search for images")
|
||||||
|
# Extract Entries, Generate Image Embeddings
|
||||||
|
model.image_search = image_search.setup(
|
||||||
|
config.content_type.image, search_config=config.search_type.image, regenerate=regenerate
|
||||||
|
)
|
||||||
|
|
||||||
|
if (t == state.SearchType.Github or t == None) and config.content_type.github and config.search_type.asymmetric:
|
||||||
|
logger.info("🐙 Setting up search for github")
|
||||||
|
# Extract Entries, Generate Github Embeddings
|
||||||
|
model.github_search = text_search.setup(
|
||||||
|
GithubToJsonl,
|
||||||
|
config.content_type.github,
|
||||||
|
search_config=config.search_type.asymmetric,
|
||||||
|
regenerate=regenerate,
|
||||||
|
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize External Plugin Search
|
||||||
|
if (t == None or t in state.SearchType) and config.content_type.plugins:
|
||||||
|
logger.info("🔌 Setting up search for plugins")
|
||||||
|
model.plugin_search = {}
|
||||||
|
for plugin_type, plugin_config in config.content_type.plugins.items():
|
||||||
|
model.plugin_search[plugin_type] = text_search.setup(
|
||||||
|
JsonlToJsonl,
|
||||||
|
plugin_config,
|
||||||
|
search_config=config.search_type.asymmetric,
|
||||||
|
regenerate=regenerate,
|
||||||
|
filters=[DateFilter(), WordFilter(), FileFilter()],
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("🚨 Failed to setup search")
|
||||||
|
raise e
|
||||||
|
|
||||||
# Invalidate Query Cache
|
# Invalidate Query Cache
|
||||||
state.query_cache = LRU()
|
state.query_cache = LRU()
|
||||||
|
|
||||||
|
|
|
@ -384,8 +384,13 @@ def update(
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
state.search_index_lock.acquire()
|
state.search_index_lock.acquire()
|
||||||
state.model = configure_search(state.model, state.config, regenerate=force or False, t=t)
|
try:
|
||||||
state.search_index_lock.release()
|
state.model = configure_search(state.model, state.config, regenerate=force or False, t=t)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(e)
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
finally:
|
||||||
|
state.search_index_lock.release()
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
logger.error(e)
|
logger.error(e)
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
|
@ -10,9 +10,7 @@ from khoj.utils.yaml import parse_config_from_file
|
||||||
|
|
||||||
def cli(args=None):
|
def cli(args=None):
|
||||||
# Setup Argument Parser for the Commandline Interface
|
# Setup Argument Parser for the Commandline Interface
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(description="Start Khoj; An AI personal assistant for your Digital Brain")
|
||||||
description="Start Khoj; A Natural Language Search Engine for your personal Notes, Transactions and Photos"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--config-file", "-c", default="~/.khoj/khoj.yml", type=pathlib.Path, help="YAML file to configure Khoj"
|
"--config-file", "-c", default="~/.khoj/khoj.yml", type=pathlib.Path, help="YAML file to configure Khoj"
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue