diff --git a/README.md b/README.md index 5868f0a6..6a173359 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ - **General** - **Natural**: Advanced natural language understanding using Transformer based ML Models - **Pluggable**: Modular architecture makes it easy to plug in new data sources, frontends and ML models - - **Multiple Sources**: Index your Org-mode and Markdown notes, Beancount transactions, PDF files, Github repositories, and Photos + - **Multiple Sources**: Index your Org-mode and Markdown notes, PDF files, Github repositories, and Photos - **Multiple Interfaces**: Interact from your [Web Browser](./src/khoj/interface/web/index.html), [Emacs](./src/interface/emacs/khoj.el) or [Obsidian](./src/interface/obsidian/) ## Demos @@ -267,7 +267,7 @@ pip install --upgrade --pre khoj-assistant 2. [Install](https://tailscale.com/kb/installation/) [Tailscale](tailscale.com/) on your personal server and phone 3. Open the Khoj web interface of the server from your phone browser.
It should be `http://tailscale-ip-of-server:8000` or `http://name-of-server:8000` if you've setup [MagicDNS](https://tailscale.com/kb/1081/magicdns/) 4. Click the [Add to Homescreen](https://developer.mozilla.org/en-US/docs/Web/Progressive_web_apps/Add_to_home_screen) button -5. Enjoy exploring your notes, transactions and images from your phone! +5. Enjoy exploring your notes, documents and images from your phone! ![](https://github.com/khoj-ai/khoj/blob/master/docs/khoj_pwa_android.png?) @@ -399,7 +399,7 @@ pip install -e .[dev] - Delete `content-type` and `processor` sub-section(s) irrelevant for your use-case - Restart khoj - Note: Wait after configuration for khoj to Load ML model, generate embeddings and expose API to query notes, images, transactions etc specified in config YAML + Note: Wait after configuration for khoj to Load ML model, generate embeddings and expose API to query notes, images, documents etc specified in config YAML #### Using Docker ##### 1. Clone @@ -410,7 +410,7 @@ git clone https://github.com/khoj-ai/khoj && cd khoj ##### 2. Configure -- **Required**: Update [docker-compose.yml](./docker-compose.yml) to mount your images, (org-mode or markdown) notes, pdf, Github repositories, and beancount directories +- **Required**: Update [docker-compose.yml](./docker-compose.yml) to mount your images, (org-mode or markdown) notes, PDFs and Github repositories - **Optional**: Edit application configuration in [khoj_docker.yml](./config/khoj_docker.yml) ##### 3. Run @@ -449,7 +449,7 @@ python3 -m pip install pyqt6 # As conda does not support pyqt6 yet ```shell python3 -m src.khoj.main -vv ``` - Load ML model, generate embeddings and expose API to query notes, images, transactions etc specified in config YAML + Load ML model, generate embeddings and expose API to query notes, images, documents etc specified in config YAML ##### 5. Upgrade ```shell diff --git a/docker-compose.yml b/docker-compose.yml index ec9af160..9ba95d75 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,7 +18,6 @@ services: # must match the path prefix in your config file. - ./tests/data/org/:/data/org/ - ./tests/data/images/:/data/images/ - - ./tests/data/ledger/:/data/ledger/ - ./tests/data/markdown/:/data/markdown/ - ./tests/data/pdf/:/data/pdf/ # Embeddings and models are populated after the first run diff --git a/pyproject.toml b/pyproject.toml index e6632574..9bcc7efd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,6 @@ keywords = [ "AI", "org-mode", "markdown", - "beancount", "images", "pdf", ] diff --git a/src/interface/emacs/khoj.el b/src/interface/emacs/khoj.el index ae625f9f..cfd59ee6 100644 --- a/src/interface/emacs/khoj.el +++ b/src/interface/emacs/khoj.el @@ -4,7 +4,7 @@ ;; Author: Debanjum Singh Solanky ;; Description: An AI personal assistant for your digital brain -;; Keywords: search, chat, org-mode, outlines, markdown, pdf, beancount, image +;; Keywords: search, chat, org-mode, outlines, markdown, pdf, image ;; Version: 0.7.0 ;; Package-Requires: ((emacs "27.1") (transient "0.3.0") (dash "2.19.1")) ;; URL: https://github.com/khoj-ai/khoj/tree/master/src/interface/emacs @@ -29,8 +29,7 @@ ;;; Commentary: ;; Create an AI personal assistant for your `org-mode', `markdown' notes, -;; `beancount' transactions, PDFs and images. This package exposes -;; two assistance modes, search and chat: +;; PDFs and images. The assistant exposes 2 modes, search and chat: ;; ;; Chat provides faster answers, iterative discovery and assisted ;; creativity. It requires your OpenAI API key to access GPT models @@ -93,7 +92,6 @@ :group 'khoj :type '(choice (const "org") (const "markdown") - (const "ledger") (const "image") (const "pdf"))) @@ -119,7 +117,6 @@ (declare-function org-element-property "org-mode" (PROPERTY ELEMENT)) (declare-function org-element-type "org-mode" (ELEMENT)) -(declare-function beancount-mode "beancount" ()) (declare-function markdown-mode "markdown-mode" ()) (declare-function which-key--show-keymap "which-key" (KEYMAP-NAME KEYMAP &optional PRIOR-ARGS ALL NO-PAGING FILTER)) @@ -135,8 +132,6 @@ NO-PAGING FILTER)) "C-x m | markdown\n") (when (member 'org enabled-content-types) "C-x o | org-mode\n") - (when (member 'ledger enabled-content-types) - "C-x l | ledger\n") (when (member 'image enabled-content-types) "C-x i | image\n") (when (member 'pdf enabled-content-types) @@ -146,7 +141,6 @@ NO-PAGING FILTER)) (defvar khoj--reference-count 0 "Track number of references currently in chat bufffer.") (defun khoj--search-markdown () "Set content-type to `markdown'." (interactive) (setq khoj--content-type "markdown")) (defun khoj--search-org () "Set content-type to `org-mode'." (interactive) (setq khoj--content-type "org")) -(defun khoj--search-ledger () "Set content-type to `ledger'." (interactive) (setq khoj--content-type "ledger")) (defun khoj--search-images () "Set content-type to image." (interactive) (setq khoj--content-type "image")) (defun khoj--search-pdf () "Set content-type to pdf." (interactive) (setq khoj--content-type "pdf")) (defun khoj--improve-rank () "Use cross-encoder to rerank search results." (interactive) (khoj--incremental-search t)) @@ -159,8 +153,6 @@ NO-PAGING FILTER)) (define-key kmap (kbd "C-x m") #'khoj--search-markdown)) (when (member 'org enabled-content-types) (define-key kmap (kbd "C-x o") #'khoj--search-org)) - (when (member 'ledger enabled-content-types) - (define-key kmap (kbd "C-x l") #'khoj--search-ledger)) (when (member 'image enabled-content-types) (define-key kmap (kbd "C-x i") #'khoj--search-images)) (when (member 'pdf enabled-content-types) @@ -531,18 +523,6 @@ CONFIG is json obtained from Khoj config API." ;; remove leading (, ) or SPC from extracted entries string (replace-regexp-in-string "^[\(\) ]" ""))) -(defun khoj--extract-entries-as-ledger (json-response query) - "Convert JSON-RESPONSE, QUERY from API to ledger entries." - (thread-last json-response - ;; extract and render entries from API response - (mapcar (lambda (args) (format "%s\n\n" (cdr (assoc 'entry args))))) - ;; Set query as heading in rendered results buffer - (format ";; %s\n\n%s\n" query) - ;; remove leading (, ) or SPC from extracted entries string - (replace-regexp-in-string "^[\(\) ]" "") - ;; remove trailing (, ) or SPC from extracted entries string - (replace-regexp-in-string "[\(\) ]$" ""))) - (defun khoj--extract-entries-as-pdf (json-response query) "Convert QUERY, JSON-RESPONSE from API with PDF results to `org-mode' entries." (thread-last @@ -614,7 +594,6 @@ CONFIG is json obtained from Khoj config API." (let ((enabled-content-types (khoj--get-enabled-content-types)) (file-extension (file-name-extension buffer-name))) (cond - ((and (member 'ledger enabled-content-types) (or (equal file-extension "bean") (equal file-extension "beancount"))) "ledger") ((and (member 'org enabled-content-types) (equal file-extension "org")) "org") ((and (member 'org enabled-content-types) (equal file-extension "pdf")) "pdf") ((and (member 'markdown enabled-content-types) (or (equal file-extension "markdown") (equal file-extension "md"))) "markdown") @@ -673,7 +652,6 @@ Render results in BUFFER-NAME using QUERY, CONTENT-TYPE." (cond ((equal content-type "org") (khoj--extract-entries-as-org json-response query)) ((equal content-type "markdown") (khoj--extract-entries-as-markdown json-response query)) ((equal content-type "pdf") (khoj--extract-entries-as-pdf json-response query)) - ((equal content-type "ledger") (khoj--extract-entries-as-ledger json-response query)) ((equal content-type "image") (khoj--extract-entries-as-images json-response query)) (t (khoj--extract-entries json-response query)))) (cond ((or (equal content-type "all") @@ -688,7 +666,6 @@ Render results in BUFFER-NAME using QUERY, CONTENT-TYPE." (org-set-startup-visibility))) ((equal content-type "markdown") (progn (markdown-mode) (visual-line-mode))) - ((equal content-type "ledger") (beancount-mode)) ((equal content-type "image") (progn (shr-render-region (point-min) (point-max)) (goto-char (point-min)))) (t (fundamental-mode)))) @@ -1004,7 +981,7 @@ Paragraph only starts at first text after blank line." ;; set content type to: last used > based on current buffer > default type :init-value (lambda (obj) (oset obj value (format "--content-type=%s" (or khoj--content-type (khoj--buffer-name-to-content-type (buffer-name)))))) ;; dynamically set choices to content types enabled on khoj backend - :choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "ledger" "image"))) + :choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "image"))) (transient-define-suffix khoj--search-command (&optional args) (interactive (list (transient-args transient-current-command))) @@ -1064,7 +1041,7 @@ Paragraph only starts at first text after blank line." ;;;###autoload (defun khoj () - "Provide natural, search assistance for your notes, transactions and images." + "Provide natural, search assistance for your notes, documents and images." (interactive) (when khoj-auto-setup (khoj-setup t)) diff --git a/src/interface/emacs/tests/khoj-tests.el b/src/interface/emacs/tests/khoj-tests.el index 4e279114..8242d30b 100644 --- a/src/interface/emacs/tests/khoj-tests.el +++ b/src/interface/emacs/tests/khoj-tests.el @@ -112,46 +112,6 @@ Rule everything\n\ \n")))) -(ert-deftest khoj-tests--extract-entries-as-ledger () - "Test `json-response', `query' from API formatted as beancount ledger." - (let ((user-query "Become God") - (json-response-from-khoj-backend - (json-read-from-string - "[\ -{\ - \"entry\": \"4242-04-01 * \\\"Penance Center\\\" \\\"Book Stay for 10,000 Years\\\"\\n Expenses:Health:Mental 15 GOLD\\n Assets:Commodities:Gold\",\ - \"score\": \"0.42\",\ - \"additional\": {\ - \"file\": \"/home/ravan/ledger.beancount\",\ - \"compiled\": \"4242-04-01 * \\\"Penance Center\\\" \\\"Book Stay for 10,000 Years\\\" Expenses:Health:Mental 15 GOLD Assets:Commodities:Gold\"\ - }\ -},\ -{\ - \"entry\": \"14242-04-01 * \\\"Brahma\\\" \\\"Boon for Invincibility from Higher Beings\\\"\\n Income:Health -1,00,00,000 LIFE\\n Assets:Commodities:Life\",\ - \"score\": \"0.42\",\ - \"additional\": {\ - \"file\": \"/home/ravan/ledger.beancount\",\ - \"compiled\": \"4242-04-01 * \\\"Brahma\\\" \\\"Boon for Invincibility from Higher Beings\\\" Income:Health -1,00,00,000 LIFE Assets:Commodities:Life\"\ - }\ -}]\ -"))) - (should - (equal - (khoj--extract-entries-as-ledger json-response-from-khoj-backend user-query) - ";; Become God\n\ -\n\ -4242-04-01 * \"Penance Center\" \"Book Stay for 10,000 Years\"\n\ - Expenses:Health:Mental 15 GOLD\n\ - Assets:Commodities:Gold\n\ -\n\ -14242-04-01 * \"Brahma\" \"Boon for Invincibility from Higher Beings\"\n\ - Income:Health -1,00,00,000 LIFE\n\ - Assets:Commodities:Life\n\ -\n\ -\n\ -")))) - - ;; ------------------------------------- ;; Test Helpers for Find Similar Feature diff --git a/src/khoj/configure.py b/src/khoj/configure.py index 8167d672..a1e07205 100644 --- a/src/khoj/configure.py +++ b/src/khoj/configure.py @@ -12,7 +12,6 @@ from fastapi.staticfiles import StaticFiles # Internal Packages from khoj.processor.conversation.gpt import summarize -from khoj.processor.ledger.beancount_to_jsonl import BeancountToJsonl from khoj.processor.jsonl.jsonl_to_jsonl import JsonlToJsonl from khoj.processor.markdown.markdown_to_jsonl import MarkdownToJsonl from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl @@ -122,18 +121,6 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, filters=[DateFilter(), WordFilter(), FileFilter()], ) - # Initialize Ledger Search - if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger and config.search_type.symmetric: - logger.info("💸 Setting up search for ledger") - # Extract Entries, Generate Ledger Embeddings - model.ledger_search = text_search.setup( - BeancountToJsonl, - config.content_type.ledger, - search_config=config.search_type.symmetric, - regenerate=regenerate, - filters=[DateFilter(), WordFilter(), FileFilter()], - ) - # Initialize PDF Search if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf and config.search_type.asymmetric: logger.info("🖨️ Setting up search for pdf") diff --git a/src/khoj/interface/web/index.html b/src/khoj/interface/web/index.html index 4f95f0e2..949ab783 100644 --- a/src/khoj/interface/web/index.html +++ b/src/khoj/interface/web/index.html @@ -47,12 +47,6 @@ }).join("\n"); } - function render_ledger(query, data) { - return data.map(function (item) { - return `
` + `

${item.entry}

` + `
`; - }).join("\n"); - } - function render_pdf(query, data) { return data.map(function (item) { let compiled_lines = item.additional.compiled.split("\n"); @@ -90,8 +84,6 @@ results = render_org(query, data, "org-"); } else if (type === "image") { results = data.map(render_image).join(''); - } else if (type === "ledger") { - results = render_ledger(query, data); } else if (type === "pdf") { results = render_pdf(query, data); } else if (type === "github" || type === "all") { @@ -360,8 +352,7 @@ white-space: pre-wrap; } .results-pdf, - .results-plugin, - .results-ledger { + .results-plugin { text-align: left; white-space: pre-line; } diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index b9fa5fdd..2cbc9f40 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -143,19 +143,15 @@ search_type = """ Objective: Extract search type from user query and return information as JSON Allowed search types are listed below: - - search-type=["notes","ledger","image", "pdf"] + - search-type=["notes", "image", "pdf"] Some examples are given below for reference: Q:What fiction book was I reading last week about AI starship? A:{ "search-type": "notes" } Q: What did the lease say about early termination A: { "search-type": "pdf" } -Q:How much did I spend at Subway for dinner last time? -A:{ "search-type": "ledger" } Q:Can you recommend a movie to watch from my notes? A:{ "search-type": "notes" } -Q:When did I buy Groceries last? -A:{ "search-type": "ledger" } Q:When did I go surfing last? A:{ "search-type": "notes" } Q:""" diff --git a/src/khoj/processor/ledger/__init__.py b/src/khoj/processor/ledger/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/khoj/processor/ledger/beancount_to_jsonl.py b/src/khoj/processor/ledger/beancount_to_jsonl.py deleted file mode 100644 index 347012a3..00000000 --- a/src/khoj/processor/ledger/beancount_to_jsonl.py +++ /dev/null @@ -1,133 +0,0 @@ -# Standard Packages -import glob -import re -import logging -from typing import List - -# Internal Packages -from khoj.processor.text_to_jsonl import TextToJsonl -from khoj.utils.helpers import get_absolute_path, is_none_or_empty, timer -from khoj.utils.constants import empty_escape_sequences -from khoj.utils.jsonl import dump_jsonl, compress_jsonl_data -from khoj.utils.rawconfig import Entry - - -logger = logging.getLogger(__name__) - - -class BeancountToJsonl(TextToJsonl): - # Define Functions - def process(self, previous_entries=None): - # Extract required fields from config - beancount_files, beancount_file_filter, output_file = ( - self.config.input_files, - self.config.input_filter, - self.config.compressed_jsonl, - ) - - # Input Validation - if is_none_or_empty(beancount_files) and is_none_or_empty(beancount_file_filter): - print("At least one of beancount-files or beancount-file-filter is required to be specified") - exit(1) - - # Get Beancount Files to Process - beancount_files = BeancountToJsonl.get_beancount_files(beancount_files, beancount_file_filter) - - # Extract Entries from specified Beancount files - with timer("Parse transactions from Beancount files into dictionaries", logger): - current_entries = BeancountToJsonl.convert_transactions_to_maps( - *BeancountToJsonl.extract_beancount_transactions(beancount_files) - ) - - # Split entries by max tokens supported by model - with timer("Split entries by max token size supported by model", logger): - current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=256) - - # Identify, mark and merge any new entries with previous entries - with timer("Identify new or updated transaction", logger): - if not previous_entries: - entries_with_ids = list(enumerate(current_entries)) - else: - entries_with_ids = TextToJsonl.mark_entries_for_update( - current_entries, previous_entries, key="compiled", logger=logger - ) - - with timer("Write transactions to JSONL file", logger): - # Process Each Entry from All Notes Files - entries = list(map(lambda entry: entry[1], entries_with_ids)) - jsonl_data = BeancountToJsonl.convert_transaction_maps_to_jsonl(entries) - - # Compress JSONL formatted Data - if output_file.suffix == ".gz": - compress_jsonl_data(jsonl_data, output_file) - elif output_file.suffix == ".jsonl": - dump_jsonl(jsonl_data, output_file) - - return entries_with_ids - - @staticmethod - def get_beancount_files(beancount_files=None, beancount_file_filters=None): - "Get Beancount files to process" - absolute_beancount_files, filtered_beancount_files = set(), set() - if beancount_files: - absolute_beancount_files = {get_absolute_path(beancount_file) for beancount_file in beancount_files} - if beancount_file_filters: - filtered_beancount_files = { - filtered_file - for beancount_file_filter in beancount_file_filters - for filtered_file in glob.glob(get_absolute_path(beancount_file_filter), recursive=True) - } - - all_beancount_files = sorted(absolute_beancount_files | filtered_beancount_files) - - files_with_non_beancount_extensions = { - beancount_file - for beancount_file in all_beancount_files - if not beancount_file.endswith(".bean") and not beancount_file.endswith(".beancount") - } - if any(files_with_non_beancount_extensions): - print(f"[Warning] There maybe non beancount files in the input set: {files_with_non_beancount_extensions}") - - logger.debug(f"Processing files: {all_beancount_files}") - - return all_beancount_files - - @staticmethod - def extract_beancount_transactions(beancount_files): - "Extract entries from specified Beancount files" - - # Initialize Regex for extracting Beancount Entries - transaction_regex = r"^\n?\d{4}-\d{2}-\d{2} [\*|\!] " - empty_newline = f"^[\n\r\t\ ]*$" - - entries = [] - transaction_to_file_map = [] - for beancount_file in beancount_files: - with open(beancount_file) as f: - ledger_content = f.read() - transactions_per_file = [ - entry.strip(empty_escape_sequences) - for entry in re.split(empty_newline, ledger_content, flags=re.MULTILINE) - if re.match(transaction_regex, entry) - ] - transaction_to_file_map += zip(transactions_per_file, [beancount_file] * len(transactions_per_file)) - entries.extend(transactions_per_file) - return entries, dict(transaction_to_file_map) - - @staticmethod - def convert_transactions_to_maps(parsed_entries: List[str], transaction_to_file_map) -> List[Entry]: - "Convert each parsed Beancount transaction into a Entry" - entries = [] - for parsed_entry in parsed_entries: - entries.append( - Entry(compiled=parsed_entry, raw=parsed_entry, file=f"{transaction_to_file_map[parsed_entry]}") - ) - - logger.debug(f"Converted {len(parsed_entries)} transactions to dictionaries") - - return entries - - @staticmethod - def convert_transaction_maps_to_jsonl(entries: List[Entry]) -> str: - "Convert each Beancount transaction entry to JSON and collate as JSONL" - return "".join([f"{entry.to_json()}\n" for entry in entries]) diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 6e3818d1..86f0e19d 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -171,11 +171,9 @@ async def search( defiltered_query = filter.defilter(user_query) encoded_asymmetric_query = None - if t == SearchType.All or (t != SearchType.Ledger and t != SearchType.Image): + if t == SearchType.All or t != SearchType.Image: text_search_models: List[TextSearchModel] = [ - model - for model_name, model in state.model.__dict__.items() - if isinstance(model, TextSearchModel) and model_name != "ledger_search" + model for model in state.model.__dict__.values() if isinstance(model, TextSearchModel) ] if text_search_models: with timer("Encoding query took", logger=logger): @@ -244,19 +242,6 @@ async def search( ) ] - if (t == SearchType.Ledger) and state.model.ledger_search: - # query transactions - search_futures += [ - executor.submit( - text_search.query, - user_query, - state.model.ledger_search, - rank_results=r or False, - score_threshold=score_threshold, - dedupe=dedupe or True, - ) - ] - if (t == SearchType.Image) and state.model.image_search: # query images search_futures += [ diff --git a/src/khoj/routers/web_client.py b/src/khoj/routers/web_client.py index 6ab94181..c5ed0b3b 100644 --- a/src/khoj/routers/web_client.py +++ b/src/khoj/routers/web_client.py @@ -16,7 +16,7 @@ import json web_client = APIRouter() templates = Jinja2Templates(directory=constants.web_directory) -VALID_CONTENT_TYPES = ["org", "ledger", "markdown", "pdf"] +VALID_TEXT_CONTENT_TYPES = ["org", "markdown", "pdf"] # Create Routes @@ -60,7 +60,7 @@ if not state.demo: @web_client.get("/config/content_type/{content_type}", response_class=HTMLResponse) def content_config_page(request: Request, content_type: str): - if content_type not in VALID_CONTENT_TYPES: + if content_type not in VALID_TEXT_CONTENT_TYPES: return templates.TemplateResponse("config.html", context={"request": request}) default_copy = constants.default_config.copy() diff --git a/src/khoj/utils/config.py b/src/khoj/utils/config.py index b15fe811..3adc6e9d 100644 --- a/src/khoj/utils/config.py +++ b/src/khoj/utils/config.py @@ -19,7 +19,6 @@ if TYPE_CHECKING: class SearchType(str, Enum): All = "all" Org = "org" - Ledger = "ledger" Markdown = "markdown" Image = "image" Pdf = "pdf" @@ -60,7 +59,6 @@ class ImageSearchModel: @dataclass class SearchModels: org_search: TextSearchModel = None - ledger_search: TextSearchModel = None markdown_search: TextSearchModel = None pdf_search: TextSearchModel = None image_search: ImageSearchModel = None diff --git a/src/khoj/utils/constants.py b/src/khoj/utils/constants.py index 49a55761..caf64ac2 100644 --- a/src/khoj/utils/constants.py +++ b/src/khoj/utils/constants.py @@ -22,12 +22,6 @@ default_config = { "compressed-jsonl": "~/.khoj/content/markdown/markdown.jsonl.gz", "embeddings-file": "~/.khoj/content/markdown/markdown_embeddings.pt", }, - "ledger": { - "input-files": None, - "input-filter": None, - "compressed-jsonl": "~/.khoj/content/ledger/ledger.jsonl.gz", - "embeddings-file": "~/.khoj/content/ledger/ledger_embeddings.pt", - }, "pdf": { "input-files": None, "input-filter": None, diff --git a/src/khoj/utils/rawconfig.py b/src/khoj/utils/rawconfig.py index cf3b4f29..b13c7449 100644 --- a/src/khoj/utils/rawconfig.py +++ b/src/khoj/utils/rawconfig.py @@ -72,7 +72,6 @@ class ImageContentConfig(ConfigBase): class ContentConfig(ConfigBase): org: Optional[TextContentConfig] - ledger: Optional[TextContentConfig] image: Optional[ImageContentConfig] markdown: Optional[TextContentConfig] pdf: Optional[TextContentConfig] diff --git a/tests/data/ledger/otzi.beancount b/tests/data/ledger/otzi.beancount deleted file mode 100644 index 29682985..00000000 --- a/tests/data/ledger/otzi.beancount +++ /dev/null @@ -1,233 +0,0 @@ -; -*- mode: org; mode: beancount; -*- -;; Otzi's Ledger: A 3rd Millenium B.C Mountain Shepherd's Ledger -;; -;; A stylized recreation of Otzi's transaction history from a few months before his death -;; based on https://en.wikipedia.org/wiki/Otzi#Scientific_analyses - -* Options ; Beancount options -#+STARTUP: content -option "title" "Beancount Ledger" -option "operating_currency" "COWRIE" ; The main currencies you use - -* Accounts ; Open all the accounts -3300-04-01 open Equity:Sheep ANIMALS - description: "Inheritance from Parents" - -3300-04-01 open Income:Hunt ANIMALS - description: "From Hunting Animals" -3300-04-01 open Income:Forage PLANTS - description: "From Foraging Wild Fruits, Plants" -3300-04-01 open Income:Market COWRIE - description: "Assets sold at the market" - -3300-04-01 open Assets:Animal ANIMALS - description: "Animals Owned Like Sheep, Goats, Cows" -3300-04-01 open Assets:Food MEALS - description: "Food for Consumption" -3300-04-01 open Assets:Food:Meat MEALS - description: "Killed Animals for Consumption" -3300-04-01 open Assets:Food:Veggie MEALS - description: "Procured, Foraged Fruits, Grains" -3300-04-01 open Assets:Plant PLANTS - description: "Procured, Foraged Plants" -3300-04-01 open Assets:Tools TOOLS - description: "Procured, Made Tools" -3300-04-01 open Assets:Cash COWRIE - description: "Cowrie Shells in Pouch" - -3300-04-01 open Expenses:Medicine COWRIE - description: "Procured, Foraged Medicinals" -3300-04-01 open Expenses:Tools:Weapons COWRIE - description: "Bought Weapons" -3300-04-01 open Expenses:Food - description: "Bought, Consumed Meals" -3300-04-01 open Expenses:Clothing COWRIE - description: "Bought Clothes" -3300-04-01 open Expenses:Tools COWRIE - description: "Bought Tools" - -* Transactions -3345-03-15 * "Parents" "Inheritance" - note: "Opening Balance" - Equity:Sheep -20 ANIMALS - Assets:Animal - -3345-03-26 * "Hauslabjoch Pass, Otzal Alps" "Red Deers" - Income:Hunt -2 ANIMALS {50 COWRIE} - Assets:Food:Meat 10 MEALS {7.5 COWRIE, "Deer"} - Assets:Animal 0.5 ANIMALS {50 COWRIE} - -3345-03-28 * "Hauslabjoch Pass, Otzal Alps" "Wild Berries" - Income:Forage -60 PLANTS - Assets:Food:Veggie 3 MEALS {20 PLANTS, "Berry"} - -3345-04-02 * "Hauslabjoch Pass, Otzal Alps" "Last Weeks Meals" - Assets:Food:Meat -7 MEALS {7.5 COWRIE, "Deer"} - Assets:Food:Veggie -3 MEALS {20 PLANTS, "Berry"} - Expenses:Food - -3345-04-02 * "Hauslabjoch Pass, Otzal Alps" "Sloe" - Income:Forage -50 PLANTS - Assets:Food:Veggie 5 MEALS {10 PLANTS, "Sloe"} - -3345-04-05 * "Hauslabjoch Pass, Otzal Alps" "Ibex" - Income:Hunt -2 ANIMALS {100 COWRIE} - Assets:Food:Meat 10 MEALS {15 COWRIE, "Ibex"} - Assets:Animal 0.5 ANIMALS {100 COWRIE} - -3345-04-08 * "Hauslabjoch Pass, Otzal Alps" "Birch Fungus Medicinal Mushroom" - Income:Forage -6 PLANTS {100 COWRIE} - Assets:Plant 6 PLANTS {100 COWRIE} - -3345-04-09 * "Hauslabjoch Pass, Otzal Alps" "Last Weeks Meals" - Assets:Food:Meat -3 MEALS {7.5 COWRIE, "Deer"} - Assets:Food:Meat -4 MEALS {15 COWRIE, "Ibex"} - Assets:Food:Veggie -3 MEALS {10 PLANTS, "Sloe"} - Expenses:Food - -3345-04-15 * "Innsbruck Farmers Market" "Sold Red Deers Skin, Antler" - Assets:Animal -0.5 ANIMALS {50 COWRIE} - Assets:Cash 25 COWRIE - -3345-04-15 * "Innsbruck Farmers Market" "Sold Ibex Skin, Antler" - Assets:Animal -0.5 ANIMALS {100 COWRIE} - Assets:Cash 50 COWRIE - -3345-04-15 * "Innsbruck Farmers Market" "Sold Birch Fungus Medicinal Mushroom" - Assets:Plant -5 PLANTS {100 COWRIE} - Assets:Cash 500 COWRIE - -3345-04-15 * "Innsbruck Farmers Market" "Snow Shoes: Bearskin, Deer hide, Tree Bark" - note: "Expensive Bearkskin but need not want" - Assets:Cash -90 COWRIE - Expenses:Clothing - -3345-04-15 * "Innsbruck Farmers Market" "Soft Grass Socks" - Assets:Cash -10 COWRIE - Expenses:Clothing - -3345-04-15 * "Innsbruck Farmers Market" "Cattle Shoelace" - Assets:Cash -5 COWRIE - Expenses:Clothing - -3345-04-15 * "Innsbruck Farmers Market" "Einkorn Wheat Bran Bread" - Assets:Cash -50 COWRIE - Assets:Food:Veggie 5 MEALS {10 COWRIE, "Bread"} - -3345-04-16 * "Enroute to Innsbruck" "Last Weeks Meals" - Assets:Food:Meat -6 MEALS {15 COWRIE, "Ibex"} - Assets:Food:Veggie -2 MEALS {10 PLANTS, "Sloe"} - Expenses:Food - -3345-04-16 * "Innsbruck Tools Market" "Firelighting Kit: Plants, Pyrite, Flint" - Assets:Cash -30 COWRIE - Expenses:Tools - -3345-04-16 * "Innsbruck Tools Market" "Flint Blade, Ash Handle Knife" - Assets:Cash -50 COWRIE - Expenses:Tools:Weapons - -3345-04-20 * "Tisenjoch Pass, Otzal Alps" "Chamois" - Income:Hunt -1 ANIMALS {100 COWRIE} - Assets:Food:Meat 5 MEALS {10 COWRIE, "Chamois"} - Assets:Animal 0.5 ANIMALS {100 COWRIE} - -3345-04-22 * "Tisenjoch Pass, Otzal Alps" "Roe Deer" - Income:Hunt -2 ANIMALS {50 COWRIE} - Assets:Food:Meat 10 MEALS {7.5 COWRIE, "Deer"} - Assets:Animal 0.5 ANIMALS {50 COWRIE} - -3345-04-23 * "Tisenjoch Pass, Otzal Alps" "Last Weeks Meals" - Assets:Food:Veggie -4 MEALS {10 COWRIE, "Bread"} - Assets:Food:Meat -4 MEALS {10 COWRIE, "Chamois"} - Assets:Food:Meat -3 MEALS {7.5 COWRIE, "Deer"} - Expenses:Food - -3345-04-25 * "Tisenjoch Pass, Otzal Alps" "Roe Deer Quiver" - Assets:Animal -0.25 ANIMALS {50 COWRIE} - Assets:Tools 1 TOOLS {12.50 COWRIE} - -3345-04-28 * "Tisenjoch Pass, Otzal Alps" "Wild Berries" - Income:Forage -60 PLANTS - Assets:Food:Veggie 3 MEALS {20 PLANTS, "Berry"} - -3345-04-30 * "Tisenjoch Pass, Otzal Alps" "Last Weeks Meals" - Assets:Food:Veggie -1 MEALS {10 COWRIE, "Bread"} - Assets:Food:Meat -1 MEALS {10 COWRIE, "Chamois"} - Assets:Food:Meat -6 MEALS {7.5 COWRIE, "Deer"} - Expenses:Food - -3345-05-02 * "Enroute to Bolzano City" "Poppy Seed" - Income:Forage -80 PLANTS - Assets:Food:Veggie 8 MEALS {10 PLANTS, "Poppy"} - -3345-05-06 * "Enroute to Bolzano City" "Barley, Flax Seeds" - Income:Forage -80 PLANTS - Assets:Food:Veggie 4 MEALS {10 PLANTS, "Barley"} - Assets:Food:Veggie 4 MEALS {10 PLANTS, "Flax"} - -3345-05-07 * "Enroute to Bolzano City" "Last Weeks Meals" - Assets:Food:Veggie -5 MEALS {10 PLANTS, "Poppy"} - Assets:Food:Veggie -3 MEALS {20 PLANTS, "Berry"} - Assets:Food:Meat -1 MEALS {7.5 COWRIE, "Deer"} - Expenses:Food - -3345-05-09 * "Bolzano City Market" "Sold Roe Deers Hide" - Assets:Animal -0.25 ANIMALS {50 COWRIE} - Assets:Cash 12.5 COWRIE - -3345-05-09 * "Bolzano City Market" "Sold Chamois Hide" - Assets:Animal -0.5 ANIMALS {100 COWRIE} - Assets:Cash 50 COWRIE - -3345-05-10 * "Bolzano City Market" "Yewood Handle Copper Axe" - note: "Expensive Bearkskin but need not want" - Assets:Cash -140 COWRIE - Expenses:Tools:Weapons - -3345-05-10 * "Bolzano City Market" "Sheepskin Hide Coat" - Assets:Cash -40 COWRIE - Expenses:Clothing - -3345-05-10 * "Bolzano City Market" "Sheepskin Loincloth" - Assets:Cash -20 COWRIE - Expenses:Clothing - -3345-05-10 * "Bolzano City Market" "Goat Skin Leggings" - Assets:Cash -40 COWRIE - Expenses:Clothing - -3345-05-10 * "Bolzano City Market" "Brown Bear Fur Hat" - Assets:Cash -60 COWRIE - Expenses:Clothing - -3345-05-10 * "Bolzano City Market" "Viburnum, Dogwood, Flint" - note: "For Making Arrows" - Assets:Cash -40 COWRIE - Expenses:Tools:Weapons - -3345-05-10 * "Bolzano City Market" "Yew Wood" - note: "For Making Yewood Longbow" - Assets:Cash -32.5 COWRIE - Expenses:Tools:Weapons - -3345-05-10 * "Bolzano City Market" "Birch Bark Baskets" - note: "Need Better Containers for Storage, Carrying" - Assets:Cash -30 COWRIE - Expenses:Tools - -3345-05-13 * "Near Feldthurns, South Tyrol" "Ibex" - Income:Hunt -2 ANIMALS {100 COWRIE} - Assets:Food:Meat 10 MEALS {15 COWRIE, "Ibex"} - Assets:Animal 0.5 ANIMALS {100 COWRIE} - -3345-05-14 * "Near Feldthurns, South Tyrol" "Last Weeks Meals" - Assets:Food:Veggie -4 MEALS {10 PLANTS, "Barley"} - Assets:Food:Veggie -3 MEALS {10 PLANTS, "Flax"} - Assets:Food:Veggie -3 MEALS {10 PLANTS, "Poppy"} - Expenses:Food - -3345-05-21 * "Fineilspitze Peak, Otzal Alps" "Last Weeks Meals" - Assets:Food:Meat -7 MEALS {15 COWRIE, "Ibex"} - Assets:Food:Veggie -1 MEALS {10 PLANTS, "Flax"} - Expenses:Food diff --git a/tests/data/music/music.org b/tests/data/music/music.org deleted file mode 100644 index 4a3f2008..00000000 --- a/tests/data/music/music.org +++ /dev/null @@ -1,18 +0,0 @@ -* The Beatles - Across The Universe :60s:BRITISH:POP: - :PROPERTIES: - :TYPE: song - :END: - :LOGBOOK: - ENQUEUED: [1984-04-01 Sun 00:00] - :END: - -* Ram Narayan :INDIAN:CLASSICAL:SARANGI: -** Ram Narayan - Raag Kirwani Alap - :PROPERTIES: - :TYPE: song - :QUERY: Raga Kirvani (feat. Suresh Talwalkar, François Auboux) (Alap) - :CATEGORY: youtube - :END: - :LOGBOOK: - ENQUEUED: [1984-04-01 Sun 00:00] - :END: diff --git a/tests/test_beancount_to_jsonl.py b/tests/test_beancount_to_jsonl.py deleted file mode 100644 index 923adb5a..00000000 --- a/tests/test_beancount_to_jsonl.py +++ /dev/null @@ -1,118 +0,0 @@ -# Standard Packages -import json - -# Internal Packages -from khoj.processor.ledger.beancount_to_jsonl import BeancountToJsonl - - -def test_no_transactions_in_file(tmp_path): - "Handle file with no transactions." - # Arrange - entry = f""" - - Bullet point 1 - - Bullet point 2 - """ - beancount_file = create_file(tmp_path, entry) - - # Act - # Extract Entries from specified Beancount files - entry_nodes, file_to_entries = BeancountToJsonl.extract_beancount_transactions(beancount_files=[beancount_file]) - - # Process Each Entry from All Beancount Files - jsonl_string = BeancountToJsonl.convert_transaction_maps_to_jsonl( - BeancountToJsonl.convert_transactions_to_maps(entry_nodes, file_to_entries) - ) - jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()] - - # Assert - assert len(jsonl_data) == 0 - - -def test_single_beancount_transaction_to_jsonl(tmp_path): - "Convert transaction from single file to jsonl." - # Arrange - entry = f""" -1984-04-01 * "Payee" "Narration" -Expenses:Test:Test 1.00 KES -Assets:Test:Test -1.00 KES - """ - beancount_file = create_file(tmp_path, entry) - - # Act - # Extract Entries from specified Beancount files - entries, entry_to_file_map = BeancountToJsonl.extract_beancount_transactions(beancount_files=[beancount_file]) - - # Process Each Entry from All Beancount Files - jsonl_string = BeancountToJsonl.convert_transaction_maps_to_jsonl( - BeancountToJsonl.convert_transactions_to_maps(entries, entry_to_file_map) - ) - jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()] - - # Assert - assert len(jsonl_data) == 1 - - -def test_multiple_transactions_to_jsonl(tmp_path): - "Convert multiple transactions from single file to jsonl." - # Arrange - entry = f""" -1984-04-01 * "Payee" "Narration" -Expenses:Test:Test 1.00 KES -Assets:Test:Test -1.00 KES -\t\r -1984-04-01 * "Payee" "Narration" -Expenses:Test:Test 1.00 KES -Assets:Test:Test -1.00 KES -""" - - beancount_file = create_file(tmp_path, entry) - - # Act - # Extract Entries from specified Beancount files - entries, entry_to_file_map = BeancountToJsonl.extract_beancount_transactions(beancount_files=[beancount_file]) - - # Process Each Entry from All Beancount Files - jsonl_string = BeancountToJsonl.convert_transaction_maps_to_jsonl( - BeancountToJsonl.convert_transactions_to_maps(entries, entry_to_file_map) - ) - jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()] - - # Assert - assert len(jsonl_data) == 2 - - -def test_get_beancount_files(tmp_path): - "Ensure Beancount files specified via input-filter, input-files extracted" - # Arrange - # Include via input-filter globs - group1_file1 = create_file(tmp_path, filename="group1-file1.bean") - group1_file2 = create_file(tmp_path, filename="group1-file2.bean") - group2_file1 = create_file(tmp_path, filename="group2-file1.beancount") - group2_file2 = create_file(tmp_path, filename="group2-file2.beancount") - # Include via input-file field - file1 = create_file(tmp_path, filename="ledger.bean") - # Not included by any filter - create_file(tmp_path, filename="not-included-ledger.bean") - create_file(tmp_path, filename="not-included-text.txt") - - expected_files = sorted(map(str, [group1_file1, group1_file2, group2_file1, group2_file2, file1])) - - # Setup input-files, input-filters - input_files = [tmp_path / "ledger.bean"] - input_filter = [tmp_path / "group1*.bean", tmp_path / "group2*.beancount"] - - # Act - extracted_org_files = BeancountToJsonl.get_beancount_files(input_files, input_filter) - - # Assert - assert len(extracted_org_files) == 5 - assert extracted_org_files == expected_files - - -# Helper Functions -def create_file(tmp_path, entry=None, filename="ledger.beancount"): - beancount_file = tmp_path / filename - beancount_file.touch() - if entry: - beancount_file.write_text(entry) - return beancount_file diff --git a/tests/test_client.py b/tests/test_client.py index 976b6770..81955f39 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -34,7 +34,7 @@ def test_search_with_invalid_content_type(client): # ---------------------------------------------------------------------------------------------------- def test_search_with_valid_content_type(client): - for content_type in ["all", "org", "markdown", "ledger", "image", "pdf", "plugin1"]: + for content_type in ["all", "org", "markdown", "image", "pdf", "plugin1"]: # Act response = client.get(f"/api/search?q=random&t={content_type}") # Assert @@ -52,7 +52,7 @@ def test_update_with_invalid_content_type(client): # ---------------------------------------------------------------------------------------------------- def test_update_with_valid_content_type(client): - for content_type in ["org", "markdown", "ledger", "image", "pdf", "plugin1"]: + for content_type in ["org", "markdown", "image", "pdf", "plugin1"]: # Act response = client.get(f"/api/update?t={content_type}") # Assert @@ -70,7 +70,7 @@ def test_regenerate_with_invalid_content_type(client): # ---------------------------------------------------------------------------------------------------- def test_regenerate_with_valid_content_type(client): - for content_type in ["org", "markdown", "ledger", "image", "pdf", "plugin1"]: + for content_type in ["org", "markdown", "image", "pdf", "plugin1"]: # Act response = client.get(f"/api/update?force=true&t={content_type}") # Assert