mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-30 19:03:01 +01:00
7b907add77
* Add support for indexing plaintext files - Adds backend support for parsing plaintext files generically (.html, .txt, .xml, .csv, .md) - Add equivalent frontend views for setting up plaintext file indexing - Update config, rawconfig, default config, search API, setup endpoints * Add a nifty plaintext file icon to configure plaintext files in the Web UI * Use generic glob path for plaintext files. Skip indexing files that aren't in whitelist
28 lines
809 B
YAML
Vendored
28 lines
809 B
YAML
Vendored
content-type:
|
|
org:
|
|
compressed-jsonl: .notes.json.gz
|
|
embeddings-file: .note_embeddings.pt
|
|
index-header-entries: true
|
|
input-files:
|
|
- ~/first_from_config.org
|
|
- ~/second_from_config.org
|
|
input-filter:
|
|
- '*.org'
|
|
- ~/notes/*.org
|
|
plugins:
|
|
content_plugin_1:
|
|
compressed-jsonl: content_plugin_1.jsonl.gz
|
|
embeddings-file: content_plugin_1_embeddings.pt
|
|
input-files:
|
|
- content_plugin_1_new.jsonl.gz
|
|
content_plugin_2:
|
|
compressed-jsonl: content_plugin_2.jsonl.gz
|
|
embeddings-file: content_plugin_2_embeddings.pt
|
|
input-filter:
|
|
- '*2_new.jsonl.gz'
|
|
enable-offline-chat: false
|
|
search-type:
|
|
asymmetric:
|
|
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
|
|
encoder: sentence-transformers/msmarco-MiniLM-L-6-v3
|
|
version: 0.10.1
|