diff --git a/README.md b/README.md
index 5868f0a6..6a173359 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@
- **General**
- **Natural**: Advanced natural language understanding using Transformer based ML Models
- **Pluggable**: Modular architecture makes it easy to plug in new data sources, frontends and ML models
- - **Multiple Sources**: Index your Org-mode and Markdown notes, Beancount transactions, PDF files, Github repositories, and Photos
+ - **Multiple Sources**: Index your Org-mode and Markdown notes, PDF files, Github repositories, and Photos
- **Multiple Interfaces**: Interact from your [Web Browser](./src/khoj/interface/web/index.html), [Emacs](./src/interface/emacs/khoj.el) or [Obsidian](./src/interface/obsidian/)
## Demos
@@ -267,7 +267,7 @@ pip install --upgrade --pre khoj-assistant
2. [Install](https://tailscale.com/kb/installation/) [Tailscale](tailscale.com/) on your personal server and phone
3. Open the Khoj web interface of the server from your phone browser.
It should be `http://tailscale-ip-of-server:8000` or `http://name-of-server:8000` if you've setup [MagicDNS](https://tailscale.com/kb/1081/magicdns/)
4. Click the [Add to Homescreen](https://developer.mozilla.org/en-US/docs/Web/Progressive_web_apps/Add_to_home_screen) button
-5. Enjoy exploring your notes, transactions and images from your phone!
+5. Enjoy exploring your notes, documents and images from your phone!
![](https://github.com/khoj-ai/khoj/blob/master/docs/khoj_pwa_android.png?)
@@ -399,7 +399,7 @@ pip install -e .[dev]
- Delete `content-type` and `processor` sub-section(s) irrelevant for your use-case
- Restart khoj
- Note: Wait after configuration for khoj to Load ML model, generate embeddings and expose API to query notes, images, transactions etc specified in config YAML
+ Note: Wait after configuration for khoj to Load ML model, generate embeddings and expose API to query notes, images, documents etc specified in config YAML
#### Using Docker
##### 1. Clone
@@ -410,7 +410,7 @@ git clone https://github.com/khoj-ai/khoj && cd khoj
##### 2. Configure
-- **Required**: Update [docker-compose.yml](./docker-compose.yml) to mount your images, (org-mode or markdown) notes, pdf, Github repositories, and beancount directories
+- **Required**: Update [docker-compose.yml](./docker-compose.yml) to mount your images, (org-mode or markdown) notes, PDFs and Github repositories
- **Optional**: Edit application configuration in [khoj_docker.yml](./config/khoj_docker.yml)
##### 3. Run
@@ -449,7 +449,7 @@ python3 -m pip install pyqt6 # As conda does not support pyqt6 yet
```shell
python3 -m src.khoj.main -vv
```
- Load ML model, generate embeddings and expose API to query notes, images, transactions etc specified in config YAML
+ Load ML model, generate embeddings and expose API to query notes, images, documents etc specified in config YAML
##### 5. Upgrade
```shell
diff --git a/docker-compose.yml b/docker-compose.yml
index ec9af160..9ba95d75 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -18,7 +18,6 @@ services:
# must match the path prefix in your config file.
- ./tests/data/org/:/data/org/
- ./tests/data/images/:/data/images/
- - ./tests/data/ledger/:/data/ledger/
- ./tests/data/markdown/:/data/markdown/
- ./tests/data/pdf/:/data/pdf/
# Embeddings and models are populated after the first run
diff --git a/pyproject.toml b/pyproject.toml
index e6632574..9bcc7efd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,7 +19,6 @@ keywords = [
"AI",
"org-mode",
"markdown",
- "beancount",
"images",
"pdf",
]
diff --git a/src/interface/emacs/khoj.el b/src/interface/emacs/khoj.el
index ae625f9f..cfd59ee6 100644
--- a/src/interface/emacs/khoj.el
+++ b/src/interface/emacs/khoj.el
@@ -4,7 +4,7 @@
;; Author: Debanjum Singh Solanky
;; Description: An AI personal assistant for your digital brain
-;; Keywords: search, chat, org-mode, outlines, markdown, pdf, beancount, image
+;; Keywords: search, chat, org-mode, outlines, markdown, pdf, image
;; Version: 0.7.0
;; Package-Requires: ((emacs "27.1") (transient "0.3.0") (dash "2.19.1"))
;; URL: https://github.com/khoj-ai/khoj/tree/master/src/interface/emacs
@@ -29,8 +29,7 @@
;;; Commentary:
;; Create an AI personal assistant for your `org-mode', `markdown' notes,
-;; `beancount' transactions, PDFs and images. This package exposes
-;; two assistance modes, search and chat:
+;; PDFs and images. The assistant exposes 2 modes, search and chat:
;;
;; Chat provides faster answers, iterative discovery and assisted
;; creativity. It requires your OpenAI API key to access GPT models
@@ -93,7 +92,6 @@
:group 'khoj
:type '(choice (const "org")
(const "markdown")
- (const "ledger")
(const "image")
(const "pdf")))
@@ -119,7 +117,6 @@
(declare-function org-element-property "org-mode" (PROPERTY ELEMENT))
(declare-function org-element-type "org-mode" (ELEMENT))
-(declare-function beancount-mode "beancount" ())
(declare-function markdown-mode "markdown-mode" ())
(declare-function which-key--show-keymap "which-key" (KEYMAP-NAME KEYMAP &optional PRIOR-ARGS ALL
NO-PAGING FILTER))
@@ -135,8 +132,6 @@ NO-PAGING FILTER))
"C-x m | markdown\n")
(when (member 'org enabled-content-types)
"C-x o | org-mode\n")
- (when (member 'ledger enabled-content-types)
- "C-x l | ledger\n")
(when (member 'image enabled-content-types)
"C-x i | image\n")
(when (member 'pdf enabled-content-types)
@@ -146,7 +141,6 @@ NO-PAGING FILTER))
(defvar khoj--reference-count 0 "Track number of references currently in chat bufffer.")
(defun khoj--search-markdown () "Set content-type to `markdown'." (interactive) (setq khoj--content-type "markdown"))
(defun khoj--search-org () "Set content-type to `org-mode'." (interactive) (setq khoj--content-type "org"))
-(defun khoj--search-ledger () "Set content-type to `ledger'." (interactive) (setq khoj--content-type "ledger"))
(defun khoj--search-images () "Set content-type to image." (interactive) (setq khoj--content-type "image"))
(defun khoj--search-pdf () "Set content-type to pdf." (interactive) (setq khoj--content-type "pdf"))
(defun khoj--improve-rank () "Use cross-encoder to rerank search results." (interactive) (khoj--incremental-search t))
@@ -159,8 +153,6 @@ NO-PAGING FILTER))
(define-key kmap (kbd "C-x m") #'khoj--search-markdown))
(when (member 'org enabled-content-types)
(define-key kmap (kbd "C-x o") #'khoj--search-org))
- (when (member 'ledger enabled-content-types)
- (define-key kmap (kbd "C-x l") #'khoj--search-ledger))
(when (member 'image enabled-content-types)
(define-key kmap (kbd "C-x i") #'khoj--search-images))
(when (member 'pdf enabled-content-types)
@@ -531,18 +523,6 @@ CONFIG is json obtained from Khoj config API."
;; remove leading (, ) or SPC from extracted entries string
(replace-regexp-in-string "^[\(\) ]" "")))
-(defun khoj--extract-entries-as-ledger (json-response query)
- "Convert JSON-RESPONSE, QUERY from API to ledger entries."
- (thread-last json-response
- ;; extract and render entries from API response
- (mapcar (lambda (args) (format "%s\n\n" (cdr (assoc 'entry args)))))
- ;; Set query as heading in rendered results buffer
- (format ";; %s\n\n%s\n" query)
- ;; remove leading (, ) or SPC from extracted entries string
- (replace-regexp-in-string "^[\(\) ]" "")
- ;; remove trailing (, ) or SPC from extracted entries string
- (replace-regexp-in-string "[\(\) ]$" "")))
-
(defun khoj--extract-entries-as-pdf (json-response query)
"Convert QUERY, JSON-RESPONSE from API with PDF results to `org-mode' entries."
(thread-last
@@ -614,7 +594,6 @@ CONFIG is json obtained from Khoj config API."
(let ((enabled-content-types (khoj--get-enabled-content-types))
(file-extension (file-name-extension buffer-name)))
(cond
- ((and (member 'ledger enabled-content-types) (or (equal file-extension "bean") (equal file-extension "beancount"))) "ledger")
((and (member 'org enabled-content-types) (equal file-extension "org")) "org")
((and (member 'org enabled-content-types) (equal file-extension "pdf")) "pdf")
((and (member 'markdown enabled-content-types) (or (equal file-extension "markdown") (equal file-extension "md"))) "markdown")
@@ -673,7 +652,6 @@ Render results in BUFFER-NAME using QUERY, CONTENT-TYPE."
(cond ((equal content-type "org") (khoj--extract-entries-as-org json-response query))
((equal content-type "markdown") (khoj--extract-entries-as-markdown json-response query))
((equal content-type "pdf") (khoj--extract-entries-as-pdf json-response query))
- ((equal content-type "ledger") (khoj--extract-entries-as-ledger json-response query))
((equal content-type "image") (khoj--extract-entries-as-images json-response query))
(t (khoj--extract-entries json-response query))))
(cond ((or (equal content-type "all")
@@ -688,7 +666,6 @@ Render results in BUFFER-NAME using QUERY, CONTENT-TYPE."
(org-set-startup-visibility)))
((equal content-type "markdown") (progn (markdown-mode)
(visual-line-mode)))
- ((equal content-type "ledger") (beancount-mode))
((equal content-type "image") (progn (shr-render-region (point-min) (point-max))
(goto-char (point-min))))
(t (fundamental-mode))))
@@ -1004,7 +981,7 @@ Paragraph only starts at first text after blank line."
;; set content type to: last used > based on current buffer > default type
:init-value (lambda (obj) (oset obj value (format "--content-type=%s" (or khoj--content-type (khoj--buffer-name-to-content-type (buffer-name))))))
;; dynamically set choices to content types enabled on khoj backend
- :choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "ledger" "image")))
+ :choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "image")))
(transient-define-suffix khoj--search-command (&optional args)
(interactive (list (transient-args transient-current-command)))
@@ -1064,7 +1041,7 @@ Paragraph only starts at first text after blank line."
;;;###autoload
(defun khoj ()
- "Provide natural, search assistance for your notes, transactions and images."
+ "Provide natural, search assistance for your notes, documents and images."
(interactive)
(when khoj-auto-setup
(khoj-setup t))
diff --git a/src/interface/emacs/tests/khoj-tests.el b/src/interface/emacs/tests/khoj-tests.el
index 4e279114..8242d30b 100644
--- a/src/interface/emacs/tests/khoj-tests.el
+++ b/src/interface/emacs/tests/khoj-tests.el
@@ -112,46 +112,6 @@ Rule everything\n\
\n"))))
-(ert-deftest khoj-tests--extract-entries-as-ledger ()
- "Test `json-response', `query' from API formatted as beancount ledger."
- (let ((user-query "Become God")
- (json-response-from-khoj-backend
- (json-read-from-string
- "[\
-{\
- \"entry\": \"4242-04-01 * \\\"Penance Center\\\" \\\"Book Stay for 10,000 Years\\\"\\n Expenses:Health:Mental 15 GOLD\\n Assets:Commodities:Gold\",\
- \"score\": \"0.42\",\
- \"additional\": {\
- \"file\": \"/home/ravan/ledger.beancount\",\
- \"compiled\": \"4242-04-01 * \\\"Penance Center\\\" \\\"Book Stay for 10,000 Years\\\" Expenses:Health:Mental 15 GOLD Assets:Commodities:Gold\"\
- }\
-},\
-{\
- \"entry\": \"14242-04-01 * \\\"Brahma\\\" \\\"Boon for Invincibility from Higher Beings\\\"\\n Income:Health -1,00,00,000 LIFE\\n Assets:Commodities:Life\",\
- \"score\": \"0.42\",\
- \"additional\": {\
- \"file\": \"/home/ravan/ledger.beancount\",\
- \"compiled\": \"4242-04-01 * \\\"Brahma\\\" \\\"Boon for Invincibility from Higher Beings\\\" Income:Health -1,00,00,000 LIFE Assets:Commodities:Life\"\
- }\
-}]\
-")))
- (should
- (equal
- (khoj--extract-entries-as-ledger json-response-from-khoj-backend user-query)
- ";; Become God\n\
-\n\
-4242-04-01 * \"Penance Center\" \"Book Stay for 10,000 Years\"\n\
- Expenses:Health:Mental 15 GOLD\n\
- Assets:Commodities:Gold\n\
-\n\
-14242-04-01 * \"Brahma\" \"Boon for Invincibility from Higher Beings\"\n\
- Income:Health -1,00,00,000 LIFE\n\
- Assets:Commodities:Life\n\
-\n\
-\n\
-"))))
-
-
;; -------------------------------------
;; Test Helpers for Find Similar Feature
diff --git a/src/khoj/configure.py b/src/khoj/configure.py
index 8167d672..a1e07205 100644
--- a/src/khoj/configure.py
+++ b/src/khoj/configure.py
@@ -12,7 +12,6 @@ from fastapi.staticfiles import StaticFiles
# Internal Packages
from khoj.processor.conversation.gpt import summarize
-from khoj.processor.ledger.beancount_to_jsonl import BeancountToJsonl
from khoj.processor.jsonl.jsonl_to_jsonl import JsonlToJsonl
from khoj.processor.markdown.markdown_to_jsonl import MarkdownToJsonl
from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
@@ -122,18 +121,6 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
filters=[DateFilter(), WordFilter(), FileFilter()],
)
- # Initialize Ledger Search
- if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger and config.search_type.symmetric:
- logger.info("💸 Setting up search for ledger")
- # Extract Entries, Generate Ledger Embeddings
- model.ledger_search = text_search.setup(
- BeancountToJsonl,
- config.content_type.ledger,
- search_config=config.search_type.symmetric,
- regenerate=regenerate,
- filters=[DateFilter(), WordFilter(), FileFilter()],
- )
-
# Initialize PDF Search
if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf and config.search_type.asymmetric:
logger.info("🖨️ Setting up search for pdf")
diff --git a/src/khoj/interface/web/index.html b/src/khoj/interface/web/index.html
index 4f95f0e2..949ab783 100644
--- a/src/khoj/interface/web/index.html
+++ b/src/khoj/interface/web/index.html
@@ -47,12 +47,6 @@
}).join("\n");
}
- function render_ledger(query, data) {
- return data.map(function (item) {
- return ``;
- }).join("\n");
- }
-
function render_pdf(query, data) {
return data.map(function (item) {
let compiled_lines = item.additional.compiled.split("\n");
@@ -90,8 +84,6 @@
results = render_org(query, data, "org-");
} else if (type === "image") {
results = data.map(render_image).join('');
- } else if (type === "ledger") {
- results = render_ledger(query, data);
} else if (type === "pdf") {
results = render_pdf(query, data);
} else if (type === "github" || type === "all") {
@@ -360,8 +352,7 @@
white-space: pre-wrap;
}
.results-pdf,
- .results-plugin,
- .results-ledger {
+ .results-plugin {
text-align: left;
white-space: pre-line;
}
diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py
index b9fa5fdd..2cbc9f40 100644
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@@ -143,19 +143,15 @@ search_type = """
Objective: Extract search type from user query and return information as JSON
Allowed search types are listed below:
- - search-type=["notes","ledger","image", "pdf"]
+ - search-type=["notes", "image", "pdf"]
Some examples are given below for reference:
Q:What fiction book was I reading last week about AI starship?
A:{ "search-type": "notes" }
Q: What did the lease say about early termination
A: { "search-type": "pdf" }
-Q:How much did I spend at Subway for dinner last time?
-A:{ "search-type": "ledger" }
Q:Can you recommend a movie to watch from my notes?
A:{ "search-type": "notes" }
-Q:When did I buy Groceries last?
-A:{ "search-type": "ledger" }
Q:When did I go surfing last?
A:{ "search-type": "notes" }
Q:"""
diff --git a/src/khoj/processor/ledger/__init__.py b/src/khoj/processor/ledger/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/khoj/processor/ledger/beancount_to_jsonl.py b/src/khoj/processor/ledger/beancount_to_jsonl.py
deleted file mode 100644
index 347012a3..00000000
--- a/src/khoj/processor/ledger/beancount_to_jsonl.py
+++ /dev/null
@@ -1,133 +0,0 @@
-# Standard Packages
-import glob
-import re
-import logging
-from typing import List
-
-# Internal Packages
-from khoj.processor.text_to_jsonl import TextToJsonl
-from khoj.utils.helpers import get_absolute_path, is_none_or_empty, timer
-from khoj.utils.constants import empty_escape_sequences
-from khoj.utils.jsonl import dump_jsonl, compress_jsonl_data
-from khoj.utils.rawconfig import Entry
-
-
-logger = logging.getLogger(__name__)
-
-
-class BeancountToJsonl(TextToJsonl):
- # Define Functions
- def process(self, previous_entries=None):
- # Extract required fields from config
- beancount_files, beancount_file_filter, output_file = (
- self.config.input_files,
- self.config.input_filter,
- self.config.compressed_jsonl,
- )
-
- # Input Validation
- if is_none_or_empty(beancount_files) and is_none_or_empty(beancount_file_filter):
- print("At least one of beancount-files or beancount-file-filter is required to be specified")
- exit(1)
-
- # Get Beancount Files to Process
- beancount_files = BeancountToJsonl.get_beancount_files(beancount_files, beancount_file_filter)
-
- # Extract Entries from specified Beancount files
- with timer("Parse transactions from Beancount files into dictionaries", logger):
- current_entries = BeancountToJsonl.convert_transactions_to_maps(
- *BeancountToJsonl.extract_beancount_transactions(beancount_files)
- )
-
- # Split entries by max tokens supported by model
- with timer("Split entries by max token size supported by model", logger):
- current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=256)
-
- # Identify, mark and merge any new entries with previous entries
- with timer("Identify new or updated transaction", logger):
- if not previous_entries:
- entries_with_ids = list(enumerate(current_entries))
- else:
- entries_with_ids = TextToJsonl.mark_entries_for_update(
- current_entries, previous_entries, key="compiled", logger=logger
- )
-
- with timer("Write transactions to JSONL file", logger):
- # Process Each Entry from All Notes Files
- entries = list(map(lambda entry: entry[1], entries_with_ids))
- jsonl_data = BeancountToJsonl.convert_transaction_maps_to_jsonl(entries)
-
- # Compress JSONL formatted Data
- if output_file.suffix == ".gz":
- compress_jsonl_data(jsonl_data, output_file)
- elif output_file.suffix == ".jsonl":
- dump_jsonl(jsonl_data, output_file)
-
- return entries_with_ids
-
- @staticmethod
- def get_beancount_files(beancount_files=None, beancount_file_filters=None):
- "Get Beancount files to process"
- absolute_beancount_files, filtered_beancount_files = set(), set()
- if beancount_files:
- absolute_beancount_files = {get_absolute_path(beancount_file) for beancount_file in beancount_files}
- if beancount_file_filters:
- filtered_beancount_files = {
- filtered_file
- for beancount_file_filter in beancount_file_filters
- for filtered_file in glob.glob(get_absolute_path(beancount_file_filter), recursive=True)
- }
-
- all_beancount_files = sorted(absolute_beancount_files | filtered_beancount_files)
-
- files_with_non_beancount_extensions = {
- beancount_file
- for beancount_file in all_beancount_files
- if not beancount_file.endswith(".bean") and not beancount_file.endswith(".beancount")
- }
- if any(files_with_non_beancount_extensions):
- print(f"[Warning] There maybe non beancount files in the input set: {files_with_non_beancount_extensions}")
-
- logger.debug(f"Processing files: {all_beancount_files}")
-
- return all_beancount_files
-
- @staticmethod
- def extract_beancount_transactions(beancount_files):
- "Extract entries from specified Beancount files"
-
- # Initialize Regex for extracting Beancount Entries
- transaction_regex = r"^\n?\d{4}-\d{2}-\d{2} [\*|\!] "
- empty_newline = f"^[\n\r\t\ ]*$"
-
- entries = []
- transaction_to_file_map = []
- for beancount_file in beancount_files:
- with open(beancount_file) as f:
- ledger_content = f.read()
- transactions_per_file = [
- entry.strip(empty_escape_sequences)
- for entry in re.split(empty_newline, ledger_content, flags=re.MULTILINE)
- if re.match(transaction_regex, entry)
- ]
- transaction_to_file_map += zip(transactions_per_file, [beancount_file] * len(transactions_per_file))
- entries.extend(transactions_per_file)
- return entries, dict(transaction_to_file_map)
-
- @staticmethod
- def convert_transactions_to_maps(parsed_entries: List[str], transaction_to_file_map) -> List[Entry]:
- "Convert each parsed Beancount transaction into a Entry"
- entries = []
- for parsed_entry in parsed_entries:
- entries.append(
- Entry(compiled=parsed_entry, raw=parsed_entry, file=f"{transaction_to_file_map[parsed_entry]}")
- )
-
- logger.debug(f"Converted {len(parsed_entries)} transactions to dictionaries")
-
- return entries
-
- @staticmethod
- def convert_transaction_maps_to_jsonl(entries: List[Entry]) -> str:
- "Convert each Beancount transaction entry to JSON and collate as JSONL"
- return "".join([f"{entry.to_json()}\n" for entry in entries])
diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index 6e3818d1..86f0e19d 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -171,11 +171,9 @@ async def search(
defiltered_query = filter.defilter(user_query)
encoded_asymmetric_query = None
- if t == SearchType.All or (t != SearchType.Ledger and t != SearchType.Image):
+ if t == SearchType.All or t != SearchType.Image:
text_search_models: List[TextSearchModel] = [
- model
- for model_name, model in state.model.__dict__.items()
- if isinstance(model, TextSearchModel) and model_name != "ledger_search"
+ model for model in state.model.__dict__.values() if isinstance(model, TextSearchModel)
]
if text_search_models:
with timer("Encoding query took", logger=logger):
@@ -244,19 +242,6 @@ async def search(
)
]
- if (t == SearchType.Ledger) and state.model.ledger_search:
- # query transactions
- search_futures += [
- executor.submit(
- text_search.query,
- user_query,
- state.model.ledger_search,
- rank_results=r or False,
- score_threshold=score_threshold,
- dedupe=dedupe or True,
- )
- ]
-
if (t == SearchType.Image) and state.model.image_search:
# query images
search_futures += [
diff --git a/src/khoj/routers/web_client.py b/src/khoj/routers/web_client.py
index 6ab94181..c5ed0b3b 100644
--- a/src/khoj/routers/web_client.py
+++ b/src/khoj/routers/web_client.py
@@ -16,7 +16,7 @@ import json
web_client = APIRouter()
templates = Jinja2Templates(directory=constants.web_directory)
-VALID_CONTENT_TYPES = ["org", "ledger", "markdown", "pdf"]
+VALID_TEXT_CONTENT_TYPES = ["org", "markdown", "pdf"]
# Create Routes
@@ -60,7 +60,7 @@ if not state.demo:
@web_client.get("/config/content_type/{content_type}", response_class=HTMLResponse)
def content_config_page(request: Request, content_type: str):
- if content_type not in VALID_CONTENT_TYPES:
+ if content_type not in VALID_TEXT_CONTENT_TYPES:
return templates.TemplateResponse("config.html", context={"request": request})
default_copy = constants.default_config.copy()
diff --git a/src/khoj/utils/config.py b/src/khoj/utils/config.py
index b15fe811..3adc6e9d 100644
--- a/src/khoj/utils/config.py
+++ b/src/khoj/utils/config.py
@@ -19,7 +19,6 @@ if TYPE_CHECKING:
class SearchType(str, Enum):
All = "all"
Org = "org"
- Ledger = "ledger"
Markdown = "markdown"
Image = "image"
Pdf = "pdf"
@@ -60,7 +59,6 @@ class ImageSearchModel:
@dataclass
class SearchModels:
org_search: TextSearchModel = None
- ledger_search: TextSearchModel = None
markdown_search: TextSearchModel = None
pdf_search: TextSearchModel = None
image_search: ImageSearchModel = None
diff --git a/src/khoj/utils/constants.py b/src/khoj/utils/constants.py
index 49a55761..caf64ac2 100644
--- a/src/khoj/utils/constants.py
+++ b/src/khoj/utils/constants.py
@@ -22,12 +22,6 @@ default_config = {
"compressed-jsonl": "~/.khoj/content/markdown/markdown.jsonl.gz",
"embeddings-file": "~/.khoj/content/markdown/markdown_embeddings.pt",
},
- "ledger": {
- "input-files": None,
- "input-filter": None,
- "compressed-jsonl": "~/.khoj/content/ledger/ledger.jsonl.gz",
- "embeddings-file": "~/.khoj/content/ledger/ledger_embeddings.pt",
- },
"pdf": {
"input-files": None,
"input-filter": None,
diff --git a/src/khoj/utils/rawconfig.py b/src/khoj/utils/rawconfig.py
index cf3b4f29..b13c7449 100644
--- a/src/khoj/utils/rawconfig.py
+++ b/src/khoj/utils/rawconfig.py
@@ -72,7 +72,6 @@ class ImageContentConfig(ConfigBase):
class ContentConfig(ConfigBase):
org: Optional[TextContentConfig]
- ledger: Optional[TextContentConfig]
image: Optional[ImageContentConfig]
markdown: Optional[TextContentConfig]
pdf: Optional[TextContentConfig]
diff --git a/tests/data/ledger/otzi.beancount b/tests/data/ledger/otzi.beancount
deleted file mode 100644
index 29682985..00000000
--- a/tests/data/ledger/otzi.beancount
+++ /dev/null
@@ -1,233 +0,0 @@
-; -*- mode: org; mode: beancount; -*-
-;; Otzi's Ledger: A 3rd Millenium B.C Mountain Shepherd's Ledger
-;;
-;; A stylized recreation of Otzi's transaction history from a few months before his death
-;; based on https://en.wikipedia.org/wiki/Otzi#Scientific_analyses
-
-* Options ; Beancount options
-#+STARTUP: content
-option "title" "Beancount Ledger"
-option "operating_currency" "COWRIE" ; The main currencies you use
-
-* Accounts ; Open all the accounts
-3300-04-01 open Equity:Sheep ANIMALS
- description: "Inheritance from Parents"
-
-3300-04-01 open Income:Hunt ANIMALS
- description: "From Hunting Animals"
-3300-04-01 open Income:Forage PLANTS
- description: "From Foraging Wild Fruits, Plants"
-3300-04-01 open Income:Market COWRIE
- description: "Assets sold at the market"
-
-3300-04-01 open Assets:Animal ANIMALS
- description: "Animals Owned Like Sheep, Goats, Cows"
-3300-04-01 open Assets:Food MEALS
- description: "Food for Consumption"
-3300-04-01 open Assets:Food:Meat MEALS
- description: "Killed Animals for Consumption"
-3300-04-01 open Assets:Food:Veggie MEALS
- description: "Procured, Foraged Fruits, Grains"
-3300-04-01 open Assets:Plant PLANTS
- description: "Procured, Foraged Plants"
-3300-04-01 open Assets:Tools TOOLS
- description: "Procured, Made Tools"
-3300-04-01 open Assets:Cash COWRIE
- description: "Cowrie Shells in Pouch"
-
-3300-04-01 open Expenses:Medicine COWRIE
- description: "Procured, Foraged Medicinals"
-3300-04-01 open Expenses:Tools:Weapons COWRIE
- description: "Bought Weapons"
-3300-04-01 open Expenses:Food
- description: "Bought, Consumed Meals"
-3300-04-01 open Expenses:Clothing COWRIE
- description: "Bought Clothes"
-3300-04-01 open Expenses:Tools COWRIE
- description: "Bought Tools"
-
-* Transactions
-3345-03-15 * "Parents" "Inheritance"
- note: "Opening Balance"
- Equity:Sheep -20 ANIMALS
- Assets:Animal
-
-3345-03-26 * "Hauslabjoch Pass, Otzal Alps" "Red Deers"
- Income:Hunt -2 ANIMALS {50 COWRIE}
- Assets:Food:Meat 10 MEALS {7.5 COWRIE, "Deer"}
- Assets:Animal 0.5 ANIMALS {50 COWRIE}
-
-3345-03-28 * "Hauslabjoch Pass, Otzal Alps" "Wild Berries"
- Income:Forage -60 PLANTS
- Assets:Food:Veggie 3 MEALS {20 PLANTS, "Berry"}
-
-3345-04-02 * "Hauslabjoch Pass, Otzal Alps" "Last Weeks Meals"
- Assets:Food:Meat -7 MEALS {7.5 COWRIE, "Deer"}
- Assets:Food:Veggie -3 MEALS {20 PLANTS, "Berry"}
- Expenses:Food
-
-3345-04-02 * "Hauslabjoch Pass, Otzal Alps" "Sloe"
- Income:Forage -50 PLANTS
- Assets:Food:Veggie 5 MEALS {10 PLANTS, "Sloe"}
-
-3345-04-05 * "Hauslabjoch Pass, Otzal Alps" "Ibex"
- Income:Hunt -2 ANIMALS {100 COWRIE}
- Assets:Food:Meat 10 MEALS {15 COWRIE, "Ibex"}
- Assets:Animal 0.5 ANIMALS {100 COWRIE}
-
-3345-04-08 * "Hauslabjoch Pass, Otzal Alps" "Birch Fungus Medicinal Mushroom"
- Income:Forage -6 PLANTS {100 COWRIE}
- Assets:Plant 6 PLANTS {100 COWRIE}
-
-3345-04-09 * "Hauslabjoch Pass, Otzal Alps" "Last Weeks Meals"
- Assets:Food:Meat -3 MEALS {7.5 COWRIE, "Deer"}
- Assets:Food:Meat -4 MEALS {15 COWRIE, "Ibex"}
- Assets:Food:Veggie -3 MEALS {10 PLANTS, "Sloe"}
- Expenses:Food
-
-3345-04-15 * "Innsbruck Farmers Market" "Sold Red Deers Skin, Antler"
- Assets:Animal -0.5 ANIMALS {50 COWRIE}
- Assets:Cash 25 COWRIE
-
-3345-04-15 * "Innsbruck Farmers Market" "Sold Ibex Skin, Antler"
- Assets:Animal -0.5 ANIMALS {100 COWRIE}
- Assets:Cash 50 COWRIE
-
-3345-04-15 * "Innsbruck Farmers Market" "Sold Birch Fungus Medicinal Mushroom"
- Assets:Plant -5 PLANTS {100 COWRIE}
- Assets:Cash 500 COWRIE
-
-3345-04-15 * "Innsbruck Farmers Market" "Snow Shoes: Bearskin, Deer hide, Tree Bark"
- note: "Expensive Bearkskin but need not want"
- Assets:Cash -90 COWRIE
- Expenses:Clothing
-
-3345-04-15 * "Innsbruck Farmers Market" "Soft Grass Socks"
- Assets:Cash -10 COWRIE
- Expenses:Clothing
-
-3345-04-15 * "Innsbruck Farmers Market" "Cattle Shoelace"
- Assets:Cash -5 COWRIE
- Expenses:Clothing
-
-3345-04-15 * "Innsbruck Farmers Market" "Einkorn Wheat Bran Bread"
- Assets:Cash -50 COWRIE
- Assets:Food:Veggie 5 MEALS {10 COWRIE, "Bread"}
-
-3345-04-16 * "Enroute to Innsbruck" "Last Weeks Meals"
- Assets:Food:Meat -6 MEALS {15 COWRIE, "Ibex"}
- Assets:Food:Veggie -2 MEALS {10 PLANTS, "Sloe"}
- Expenses:Food
-
-3345-04-16 * "Innsbruck Tools Market" "Firelighting Kit: Plants, Pyrite, Flint"
- Assets:Cash -30 COWRIE
- Expenses:Tools
-
-3345-04-16 * "Innsbruck Tools Market" "Flint Blade, Ash Handle Knife"
- Assets:Cash -50 COWRIE
- Expenses:Tools:Weapons
-
-3345-04-20 * "Tisenjoch Pass, Otzal Alps" "Chamois"
- Income:Hunt -1 ANIMALS {100 COWRIE}
- Assets:Food:Meat 5 MEALS {10 COWRIE, "Chamois"}
- Assets:Animal 0.5 ANIMALS {100 COWRIE}
-
-3345-04-22 * "Tisenjoch Pass, Otzal Alps" "Roe Deer"
- Income:Hunt -2 ANIMALS {50 COWRIE}
- Assets:Food:Meat 10 MEALS {7.5 COWRIE, "Deer"}
- Assets:Animal 0.5 ANIMALS {50 COWRIE}
-
-3345-04-23 * "Tisenjoch Pass, Otzal Alps" "Last Weeks Meals"
- Assets:Food:Veggie -4 MEALS {10 COWRIE, "Bread"}
- Assets:Food:Meat -4 MEALS {10 COWRIE, "Chamois"}
- Assets:Food:Meat -3 MEALS {7.5 COWRIE, "Deer"}
- Expenses:Food
-
-3345-04-25 * "Tisenjoch Pass, Otzal Alps" "Roe Deer Quiver"
- Assets:Animal -0.25 ANIMALS {50 COWRIE}
- Assets:Tools 1 TOOLS {12.50 COWRIE}
-
-3345-04-28 * "Tisenjoch Pass, Otzal Alps" "Wild Berries"
- Income:Forage -60 PLANTS
- Assets:Food:Veggie 3 MEALS {20 PLANTS, "Berry"}
-
-3345-04-30 * "Tisenjoch Pass, Otzal Alps" "Last Weeks Meals"
- Assets:Food:Veggie -1 MEALS {10 COWRIE, "Bread"}
- Assets:Food:Meat -1 MEALS {10 COWRIE, "Chamois"}
- Assets:Food:Meat -6 MEALS {7.5 COWRIE, "Deer"}
- Expenses:Food
-
-3345-05-02 * "Enroute to Bolzano City" "Poppy Seed"
- Income:Forage -80 PLANTS
- Assets:Food:Veggie 8 MEALS {10 PLANTS, "Poppy"}
-
-3345-05-06 * "Enroute to Bolzano City" "Barley, Flax Seeds"
- Income:Forage -80 PLANTS
- Assets:Food:Veggie 4 MEALS {10 PLANTS, "Barley"}
- Assets:Food:Veggie 4 MEALS {10 PLANTS, "Flax"}
-
-3345-05-07 * "Enroute to Bolzano City" "Last Weeks Meals"
- Assets:Food:Veggie -5 MEALS {10 PLANTS, "Poppy"}
- Assets:Food:Veggie -3 MEALS {20 PLANTS, "Berry"}
- Assets:Food:Meat -1 MEALS {7.5 COWRIE, "Deer"}
- Expenses:Food
-
-3345-05-09 * "Bolzano City Market" "Sold Roe Deers Hide"
- Assets:Animal -0.25 ANIMALS {50 COWRIE}
- Assets:Cash 12.5 COWRIE
-
-3345-05-09 * "Bolzano City Market" "Sold Chamois Hide"
- Assets:Animal -0.5 ANIMALS {100 COWRIE}
- Assets:Cash 50 COWRIE
-
-3345-05-10 * "Bolzano City Market" "Yewood Handle Copper Axe"
- note: "Expensive Bearkskin but need not want"
- Assets:Cash -140 COWRIE
- Expenses:Tools:Weapons
-
-3345-05-10 * "Bolzano City Market" "Sheepskin Hide Coat"
- Assets:Cash -40 COWRIE
- Expenses:Clothing
-
-3345-05-10 * "Bolzano City Market" "Sheepskin Loincloth"
- Assets:Cash -20 COWRIE
- Expenses:Clothing
-
-3345-05-10 * "Bolzano City Market" "Goat Skin Leggings"
- Assets:Cash -40 COWRIE
- Expenses:Clothing
-
-3345-05-10 * "Bolzano City Market" "Brown Bear Fur Hat"
- Assets:Cash -60 COWRIE
- Expenses:Clothing
-
-3345-05-10 * "Bolzano City Market" "Viburnum, Dogwood, Flint"
- note: "For Making Arrows"
- Assets:Cash -40 COWRIE
- Expenses:Tools:Weapons
-
-3345-05-10 * "Bolzano City Market" "Yew Wood"
- note: "For Making Yewood Longbow"
- Assets:Cash -32.5 COWRIE
- Expenses:Tools:Weapons
-
-3345-05-10 * "Bolzano City Market" "Birch Bark Baskets"
- note: "Need Better Containers for Storage, Carrying"
- Assets:Cash -30 COWRIE
- Expenses:Tools
-
-3345-05-13 * "Near Feldthurns, South Tyrol" "Ibex"
- Income:Hunt -2 ANIMALS {100 COWRIE}
- Assets:Food:Meat 10 MEALS {15 COWRIE, "Ibex"}
- Assets:Animal 0.5 ANIMALS {100 COWRIE}
-
-3345-05-14 * "Near Feldthurns, South Tyrol" "Last Weeks Meals"
- Assets:Food:Veggie -4 MEALS {10 PLANTS, "Barley"}
- Assets:Food:Veggie -3 MEALS {10 PLANTS, "Flax"}
- Assets:Food:Veggie -3 MEALS {10 PLANTS, "Poppy"}
- Expenses:Food
-
-3345-05-21 * "Fineilspitze Peak, Otzal Alps" "Last Weeks Meals"
- Assets:Food:Meat -7 MEALS {15 COWRIE, "Ibex"}
- Assets:Food:Veggie -1 MEALS {10 PLANTS, "Flax"}
- Expenses:Food
diff --git a/tests/data/music/music.org b/tests/data/music/music.org
deleted file mode 100644
index 4a3f2008..00000000
--- a/tests/data/music/music.org
+++ /dev/null
@@ -1,18 +0,0 @@
-* The Beatles - Across The Universe :60s:BRITISH:POP:
- :PROPERTIES:
- :TYPE: song
- :END:
- :LOGBOOK:
- ENQUEUED: [1984-04-01 Sun 00:00]
- :END:
-
-* Ram Narayan :INDIAN:CLASSICAL:SARANGI:
-** Ram Narayan - Raag Kirwani Alap
- :PROPERTIES:
- :TYPE: song
- :QUERY: Raga Kirvani (feat. Suresh Talwalkar, François Auboux) (Alap)
- :CATEGORY: youtube
- :END:
- :LOGBOOK:
- ENQUEUED: [1984-04-01 Sun 00:00]
- :END:
diff --git a/tests/test_beancount_to_jsonl.py b/tests/test_beancount_to_jsonl.py
deleted file mode 100644
index 923adb5a..00000000
--- a/tests/test_beancount_to_jsonl.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# Standard Packages
-import json
-
-# Internal Packages
-from khoj.processor.ledger.beancount_to_jsonl import BeancountToJsonl
-
-
-def test_no_transactions_in_file(tmp_path):
- "Handle file with no transactions."
- # Arrange
- entry = f"""
- - Bullet point 1
- - Bullet point 2
- """
- beancount_file = create_file(tmp_path, entry)
-
- # Act
- # Extract Entries from specified Beancount files
- entry_nodes, file_to_entries = BeancountToJsonl.extract_beancount_transactions(beancount_files=[beancount_file])
-
- # Process Each Entry from All Beancount Files
- jsonl_string = BeancountToJsonl.convert_transaction_maps_to_jsonl(
- BeancountToJsonl.convert_transactions_to_maps(entry_nodes, file_to_entries)
- )
- jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
-
- # Assert
- assert len(jsonl_data) == 0
-
-
-def test_single_beancount_transaction_to_jsonl(tmp_path):
- "Convert transaction from single file to jsonl."
- # Arrange
- entry = f"""
-1984-04-01 * "Payee" "Narration"
-Expenses:Test:Test 1.00 KES
-Assets:Test:Test -1.00 KES
- """
- beancount_file = create_file(tmp_path, entry)
-
- # Act
- # Extract Entries from specified Beancount files
- entries, entry_to_file_map = BeancountToJsonl.extract_beancount_transactions(beancount_files=[beancount_file])
-
- # Process Each Entry from All Beancount Files
- jsonl_string = BeancountToJsonl.convert_transaction_maps_to_jsonl(
- BeancountToJsonl.convert_transactions_to_maps(entries, entry_to_file_map)
- )
- jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
-
- # Assert
- assert len(jsonl_data) == 1
-
-
-def test_multiple_transactions_to_jsonl(tmp_path):
- "Convert multiple transactions from single file to jsonl."
- # Arrange
- entry = f"""
-1984-04-01 * "Payee" "Narration"
-Expenses:Test:Test 1.00 KES
-Assets:Test:Test -1.00 KES
-\t\r
-1984-04-01 * "Payee" "Narration"
-Expenses:Test:Test 1.00 KES
-Assets:Test:Test -1.00 KES
-"""
-
- beancount_file = create_file(tmp_path, entry)
-
- # Act
- # Extract Entries from specified Beancount files
- entries, entry_to_file_map = BeancountToJsonl.extract_beancount_transactions(beancount_files=[beancount_file])
-
- # Process Each Entry from All Beancount Files
- jsonl_string = BeancountToJsonl.convert_transaction_maps_to_jsonl(
- BeancountToJsonl.convert_transactions_to_maps(entries, entry_to_file_map)
- )
- jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
-
- # Assert
- assert len(jsonl_data) == 2
-
-
-def test_get_beancount_files(tmp_path):
- "Ensure Beancount files specified via input-filter, input-files extracted"
- # Arrange
- # Include via input-filter globs
- group1_file1 = create_file(tmp_path, filename="group1-file1.bean")
- group1_file2 = create_file(tmp_path, filename="group1-file2.bean")
- group2_file1 = create_file(tmp_path, filename="group2-file1.beancount")
- group2_file2 = create_file(tmp_path, filename="group2-file2.beancount")
- # Include via input-file field
- file1 = create_file(tmp_path, filename="ledger.bean")
- # Not included by any filter
- create_file(tmp_path, filename="not-included-ledger.bean")
- create_file(tmp_path, filename="not-included-text.txt")
-
- expected_files = sorted(map(str, [group1_file1, group1_file2, group2_file1, group2_file2, file1]))
-
- # Setup input-files, input-filters
- input_files = [tmp_path / "ledger.bean"]
- input_filter = [tmp_path / "group1*.bean", tmp_path / "group2*.beancount"]
-
- # Act
- extracted_org_files = BeancountToJsonl.get_beancount_files(input_files, input_filter)
-
- # Assert
- assert len(extracted_org_files) == 5
- assert extracted_org_files == expected_files
-
-
-# Helper Functions
-def create_file(tmp_path, entry=None, filename="ledger.beancount"):
- beancount_file = tmp_path / filename
- beancount_file.touch()
- if entry:
- beancount_file.write_text(entry)
- return beancount_file
diff --git a/tests/test_client.py b/tests/test_client.py
index 976b6770..81955f39 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -34,7 +34,7 @@ def test_search_with_invalid_content_type(client):
# ----------------------------------------------------------------------------------------------------
def test_search_with_valid_content_type(client):
- for content_type in ["all", "org", "markdown", "ledger", "image", "pdf", "plugin1"]:
+ for content_type in ["all", "org", "markdown", "image", "pdf", "plugin1"]:
# Act
response = client.get(f"/api/search?q=random&t={content_type}")
# Assert
@@ -52,7 +52,7 @@ def test_update_with_invalid_content_type(client):
# ----------------------------------------------------------------------------------------------------
def test_update_with_valid_content_type(client):
- for content_type in ["org", "markdown", "ledger", "image", "pdf", "plugin1"]:
+ for content_type in ["org", "markdown", "image", "pdf", "plugin1"]:
# Act
response = client.get(f"/api/update?t={content_type}")
# Assert
@@ -70,7 +70,7 @@ def test_regenerate_with_invalid_content_type(client):
# ----------------------------------------------------------------------------------------------------
def test_regenerate_with_valid_content_type(client):
- for content_type in ["org", "markdown", "ledger", "image", "pdf", "plugin1"]:
+ for content_type in ["org", "markdown", "image", "pdf", "plugin1"]:
# Act
response = client.get(f"/api/update?force=true&t={content_type}")
# Assert