Add, configure and run pre-commit locally and in test workflow

2024-11-23 15:38:55 +01:00 · 2023-02-17 12:07:59 -06:00 · 2023-02-17 12:07:59 -06:00 · 051f0e3fb5
commit 051f0e3fb5
parent 5e83baab21
22 changed files with 183 additions and 162 deletions
--- a/.github/workflows/dockerize.yml
+++ b/.github/workflows/dockerize.yml
@ -44,4 +44,4 @@ jobs:
          push: true
          tags: ghcr.io/${{ github.repository }}:${{ env.DOCKER_IMAGE_TAG }}
          build-args: |
-            PORT=8000
+            PORT=8000
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@ -78,4 +78,4 @@ jobs:
        uses: pypa/gh-action-pypi-publish@v1.6.4
        with:
          password: ${{ secrets.PYPI_API_KEY }}
-          repository_url: https://test.pypi.org/legacy/
+          repository_url: https://test.pypi.org/legacy/
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -9,6 +9,7 @@ on:
      - tests/**
      - config/**
      - pyproject.toml
+      - .pre-commit-config.yml
      - .github/workflows/test.yml
  push:
    branches:
@ -18,6 +19,7 @@ on:
      - tests/**
      - config/**
      - pyproject.toml
+      - .pre-commit-config.yml
      - .github/workflows/test.yml

 jobs:
@ -45,9 +47,10 @@ jobs:
          python -m pip install --upgrade pip

      - name: Install Application
-        run: |
-          pip install --upgrade .[dev]
+        run: pip install --upgrade .[dev]
+
+      - name: Validate Application
+        run: pre-commit

      - name: Test Application
-        run: |
-          pytest 
+        run: pytest
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,16 @@
+repos:
+- repo: https://github.com/psf/black
+  rev: 23.1.0
+  hooks:
+  - id: black
+
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.3.0
+  hooks:
+  - id: end-of-file-fixer
+  - id: trailing-whitespace
+    # Exclude elisp files to not clear page breaks
+    exclude: \.el$
+  - id: check-json
+  - id: check-toml
+  - id: check-yaml
--- a/Khoj.desktop
+++ b/Khoj.desktop
@ -4,4 +4,4 @@ Name=Khoj
 Comment=A natural language search engine for your personal notes, transactions and images.
 Path=/opt
 Exec=/opt/Khoj
-Icon=Khoj
+Icon=Khoj
--- a/1
+++ b/1
@ -619,4 +619,3 @@ Program, unless a warranty or assumption of liability accompanies a
 copy of the Program in return for a fee.

                     END OF TERMS AND CONDITIONS
-
--- a/config/environment.yml
+++ b/config/environment.yml
@ -19,4 +19,4 @@ dependencies:
  - aiofiles=0.8.0
  - huggingface_hub=0.8.1
  - dateparser=1.1.1
-  - schedule=1.1.0
+  - schedule=1.1.0
--- a/config/khoj_docker.yml
+++ b/config/khoj_docker.yml
@ -52,4 +52,4 @@ processor:
  #conversation:
  #  openai-api-key: null
  #  model: "text-davinci-003"
-  #  conversation-logfile: "/data/embeddings/conversation_logs.json"
+  #  conversation-logfile: "/data/embeddings/conversation_logs.json"
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -4,14 +4,14 @@ services:
    image: ghcr.io/debanjum/khoj:latest
    ports:
      # If changing the local port (left hand side), no other changes required.
-      # If changing the remote port (right hand side), 
-      #   change the port in the args in the build section, 
+      # If changing the remote port (right hand side),
+      #   change the port in the args in the build section,
      #   as well as the port in the command section to match
      - "8000:8000"
    working_dir: /app
    volumes:
      - .:/app
-      # These mounted volumes hold the raw data that should be indexed for search. 
+      # These mounted volumes hold the raw data that should be indexed for search.
      # The path in your local directory (left hand side)
      #   points to the files you want to index.
      # The path of the mounted directory (right hand side),
--- a/pyproject.toml
+++ b/pyproject.toml
@ -64,10 +64,14 @@ khoj = "khoj.main:run"

 [project.optional-dependencies]
 test = [
-  "pytest >= 7.1.2",
-  "black >= 23.1.0",
+    "pytest >= 7.1.2",
+]
+dev = [
+    "khoj-assistant[test]",
+    "mypy >= 1.0.1",
+    "black >= 23.1.0",
+    "pre-commit >= 3.0.4",
 ]
-dev = ["khoj-assistant[test]"]

 [tool.hatch.build.targets.sdist]
 include = ["src/khoj"]
@ -79,8 +83,8 @@ packages = ["src/khoj"]
 files = "src/khoj"
 pretty = true
 strict_optional = false
-ignore_missing_imports = true
 install_types = true
+ignore_missing_imports = true
 non_interactive = true
 show_error_codes = true
 warn_unused_ignores = true
@ -91,4 +95,4 @@ exclude = [
 ]

 [tool.black]
-line-length = 120
+line-length = 120
--- a/src/interface/emacs/README.org
+++ b/src/interface/emacs/README.org
@ -1,6 +1,6 @@
 * Khoj Emacs 🦅
  [[https://stable.melpa.org/#/khoj][file:https://stable.melpa.org/packages/khoj-badge.svg]] [[https://melpa.org/#/khoj][file:https://melpa.org/packages/khoj-badge.svg]] [[https://github.com/debanjum/khoj/actions/workflows/build_khoj_el.yml][https://github.com/debanjum/khoj/actions/workflows/build_khoj_el.yml/badge.svg?]]  [[https://github.com/debanjum/khoj/actions/workflows/test_khoj_el.yml][https://github.com/debanjum/khoj/actions/workflows/test_khoj_el.yml/badge.svg?]]
- 
+
  /Natural, Incremental Search for your Second Brain/

 ** Table of Contents
--- a/src/interface/obsidian/.eslintignore
+++ b/src/interface/obsidian/.eslintignore
@ -1,2 +1,2 @@
 npm node_modules
-build
+build
--- a/src/interface/obsidian/.eslintrc
+++ b/src/interface/obsidian/.eslintrc
@ -9,7 +9,7 @@
      "eslint:recommended",
      "plugin:@typescript-eslint/eslint-recommended",
      "plugin:@typescript-eslint/recommended"
-    ], 
+    ],
    "parserOptions": {
        "sourceType": "module"
    },
@ -19,5 +19,5 @@
      "@typescript-eslint/ban-ts-comment": "off",
      "no-prototype-builtins": "off",
      "@typescript-eslint/no-empty-function": "off"
-    } 
-  }
+    }
+  }
--- a/src/interface/obsidian/.npmrc
+++ b/src/interface/obsidian/.npmrc
@ -1 +1 @@
-tag-version-prefix=""
+tag-version-prefix=""
--- a/src/interface/obsidian/LICENSE
+++ b/src/interface/obsidian/LICENSE
@ -619,4 +619,3 @@ Program, unless a warranty or assumption of liability accompanies a
 copy of the Program in return for a fee.

                     END OF TERMS AND CONDITIONS
-
--- a/src/interface/obsidian/manifest.json
+++ b/src/interface/obsidian/manifest.json
@ -7,4 +7,4 @@
 	"author": "Debanjum Singh Solanky",
 	"authorUrl": "https://github.com/debanjum",
 	"isDesktopOnly": false
-}
+}
--- a/src/interface/obsidian/src/utils.ts
+++ b/src/interface/obsidian/src/utils.ts
@ -113,4 +113,4 @@ export async function updateKhojBackend(khojUrl: string, khojConfig: Object) {

 function getIndexDirectoryFromBackendConfig(khojConfig: any) {
    return khojConfig["content-type"]["markdown"]["embeddings-file"].split("/").slice(0, -1).join("/");
-}
+}
--- a/src/interface/obsidian/versions.json
+++ b/src/interface/obsidian/versions.json
@ -3,4 +3,4 @@
 	"0.2.5": "0.15.0",
 	"0.2.6": "0.15.0",
 	"0.3.0": "0.15.0"
-}
+}
--- a/src/khoj/interface/web/assets/config.css
+++ b/src/khoj/interface/web/assets/config.css
@ -26,4 +26,4 @@ span.config-element-value {

 button {
    cursor: pointer;
-}
+}
--- a/src/khoj/interface/web/assets/config.js
+++ b/src/khoj/interface/web/assets/config.js
@ -56,10 +56,10 @@ regenerateButton.addEventListener("click", (event) => {
 })

 /**
- * Adds config elements to the DOM representing the sub-components 
+ * Adds config elements to the DOM representing the sub-components
 * of one of the fields in the raw config file.
- * @param {the parent element} element 
- * @param {the data to be rendered for this element and its children} data 
+ * @param {the parent element} element
+ * @param {the data to be rendered for this element and its children} data
 */
 function processChildren(element, data) {
    for (let key in data) {
@ -78,11 +78,11 @@ function processChildren(element, data) {
 }

 /**
- * Takes an element, and replaces it with an editable 
+ * Takes an element, and replaces it with an editable
 * element with the same data in place.
- * @param {the original element to be replaced} original 
- * @param {the source data to be rendered for the new element} data 
- * @param {the key for this input in the source data} key 
+ * @param {the original element to be replaced} original
+ * @param {the source data to be rendered for the new element} data
+ * @param {the key for this input in the source data} key
 */
 function makeElementEditable(original, data, key) {
    original.addEventListener("click", () => {
@ -98,8 +98,8 @@ function makeElementEditable(original, data, key) {

 /**
 * Creates a node corresponding to the value of a config element.
- * @param {the source data} data 
- * @param {the key corresponding to this node's data} key 
+ * @param {the source data} data
+ * @param {the key corresponding to this node's data} key
 * @returns A new element which corresponds to the value in some field.
 */
 function createValueNode(data, key) {
@ -111,11 +111,11 @@ function createValueNode(data, key) {
 }

 /**
- * Replaces an existing input element with an element with the same data, which is not an input. 
+ * Replaces an existing input element with an element with the same data, which is not an input.
 * If the input data for this element was changed, update the corresponding data in the raw config.
- * @param {the original element to be replaced} original 
- * @param {the source data} data 
- * @param {the key corresponding to this node's data} key 
+ * @param {the original element to be replaced} original
+ * @param {the source data} data
+ * @param {the key corresponding to this node's data} key
 */
 function fixInputOnFocusOut(original, data, key) {
    original.addEventListener("blur", () => {
--- a/src/khoj/interface/web/assets/markdown-it.min.js
+++ b/src/khoj/interface/web/assets/markdown-it.min.js
@ -1,6 +1,6 @@
 /*! markdown-it 13.0.1 https://github.com/markdown-it/markdown-it @license MIT */
 (function(global, factory) {
-  typeof exports === "object" && typeof module !== "undefined" ? module.exports = factory() : typeof define === "function" && define.amd ? define(factory) : (global = typeof globalThis !== "undefined" ? globalThis : global || self, 
+  typeof exports === "object" && typeof module !== "undefined" ? module.exports = factory() : typeof define === "function" && define.amd ? define(factory) : (global = typeof globalThis !== "undefined" ? globalThis : global || self,
  global.markdownit = factory());
 })(this, (function() {
  "use strict";
@ -2164,7 +2164,7 @@
  var encodeCache = {};
  // Create a lookup array where anything but characters in `chars` string
  // and alphanumeric chars is percent-encoded.
-  
+
    function getEncodeCache(exclude) {
    var i, ch, cache = encodeCache[exclude];
    if (cache) {
@ -2187,11 +2187,11 @@
  }
  // Encode unsafe characters with percent-encoding, skipping already
  // encoded sequences.
-  
+
  //  - string       - string to encode
  //  - exclude      - list of characters to ignore (in addition to a-zA-Z0-9)
  //  - keepEscaped  - don't encode '%' in a correct escape sequence (default: true)
-  
+
    function encode$2(string, exclude, keepEscaped) {
    var i, l, code, nextCode, cache, result = "";
    if (typeof exclude !== "string") {
@ -2253,7 +2253,7 @@
    return cache;
  }
  // Decode percent-encoded string.
-  
+
    function decode$2(string, exclude) {
    var cache;
    if (typeof exclude !== "string") {
@ -2340,26 +2340,26 @@
    return result;
  };
  // Copyright Joyent, Inc. and other Node contributors.
-  
+
  // Changes from joyent/node:
-  
+
  // 1. No leading slash in paths,
  //    e.g. in `url.parse('http://foo?bar')` pathname is ``, not `/`
-  
+
  // 2. Backslashes are not replaced with slashes,
  //    so `http:\\example.org\` is treated like a relative path
-  
+
  // 3. Trailing colon is treated like a part of the path,
  //    i.e. in `http://example.org:foo` pathname is `:foo`
-  
+
  // 4. Nothing is URL-encoded in the resulting object,
  //    (in joyent/node some chars in auth and paths are encoded)
-  
+
  // 5. `url.parse()` does not have `parseQueryString` argument
-  
+
  // 6. Removed extraneous result properties: `host`, `path`, `query`, etc.,
  //    which can be constructed using other parts of the url.
-  
+
    function Url() {
    this.protocol = null;
    this.slashes = null;
@ -2373,28 +2373,28 @@
  // Reference: RFC 3986, RFC 1808, RFC 2396
  // define these here so at least they only have to be
  // compiled once on the first module load.
-    var protocolPattern = /^([a-z0-9.+-]+:)/i, portPattern = /:[0-9]*$/, 
+    var protocolPattern = /^([a-z0-9.+-]+:)/i, portPattern = /:[0-9]*$/,
  // Special case for a simple path URL
-  simplePathPattern = /^(\/\/?(?!\/)[^\?\s]*)(\?[^\s]*)?$/, 
+  simplePathPattern = /^(\/\/?(?!\/)[^\?\s]*)(\?[^\s]*)?$/,
  // RFC 2396: characters reserved for delimiting URLs.
  // We actually just auto-escape these.
-  delims = [ "<", ">", '"', "`", " ", "\r", "\n", "\t" ], 
+  delims = [ "<", ">", '"', "`", " ", "\r", "\n", "\t" ],
  // RFC 2396: characters not allowed for various reasons.
-  unwise = [ "{", "}", "|", "\\", "^", "`" ].concat(delims), 
+  unwise = [ "{", "}", "|", "\\", "^", "`" ].concat(delims),
  // Allowed by RFCs, but cause of XSS attacks.  Always escape these.
-  autoEscape = [ "'" ].concat(unwise), 
+  autoEscape = [ "'" ].concat(unwise),
  // Characters that are never ever allowed in a hostname.
  // Note that any invalid chars are also handled, but these
  // are the ones that are *expected* to be seen, so we fast-path
  // them.
-  nonHostChars = [ "%", "/", "?", ";", "#" ].concat(autoEscape), hostEndingChars = [ "/", "?", "#" ], hostnameMaxLen = 255, hostnamePartPattern = /^[+a-z0-9A-Z_-]{0,63}$/, hostnamePartStart = /^([+a-z0-9A-Z_-]{0,63})(.*)$/, 
+  nonHostChars = [ "%", "/", "?", ";", "#" ].concat(autoEscape), hostEndingChars = [ "/", "?", "#" ], hostnameMaxLen = 255, hostnamePartPattern = /^[+a-z0-9A-Z_-]{0,63}$/, hostnamePartStart = /^([+a-z0-9A-Z_-]{0,63})(.*)$/,
  // protocols that can allow "unsafe" and "unwise" chars.
  /* eslint-disable no-script-url */
  // protocols that never have a hostname.
  hostlessProtocol = {
    javascript: true,
    "javascript:": true
-  }, 
+  },
  // protocols that always contain a // bit.
  slashedProtocol = {
    http: true,
@ -2632,7 +2632,7 @@
      return _hasOwnProperty.call(object, key);
    }
    // Merge objects
-    
+
        function assign(obj /*from1, from2, from3, ...*/) {
      var sources = Array.prototype.slice.call(arguments, 1);
      sources.forEach((function(source) {
@ -2798,12 +2798,12 @@
      return regex$4.test(ch);
    }
    // Markdown ASCII punctuation characters.
-    
+
    // !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~
    // http://spec.commonmark.org/0.15/#ascii-punctuation-character
-    
+
    // Don't confuse with unicode punctuation !!! It lacks some chars in ascii range.
-    
+
        function isMdAsciiPunct(ch) {
      switch (ch) {
       case 33 /* ! */ :
@ -2845,58 +2845,58 @@
      }
    }
    // Hepler to unify [reference labels].
-    
+
        function normalizeReference(str) {
      // Trim and collapse whitespace
      str = str.trim().replace(/\s+/g, " ");
      // In node v10 'ẞ'.toLowerCase() === 'Ṿ', which is presumed to be a bug
      // fixed in v12 (couldn't find any details).
-      
+
      // So treat this one as a special case
      // (remove this when node v10 is no longer supported).
-      
+
            if ("\u1e9e".toLowerCase() === "\u1e7e") {
        str = str.replace(/\u1e9e/g, "\xdf");
      }
      // .toLowerCase().toUpperCase() should get rid of all differences
      // between letter variants.
-      
+
      // Simple .toLowerCase() doesn't normalize 125 code points correctly,
      // and .toUpperCase doesn't normalize 6 of them (list of exceptions:
      // İ, ϴ, ẞ, Ω, K, Å - those are already uppercased, but have differently
      // uppercased versions).
-      
+
      // Here's an example showing how it happens. Lets take greek letter omega:
      // uppercase U+0398 (Θ), U+03f4 (ϴ) and lowercase U+03b8 (θ), U+03d1 (ϑ)
-      
+
      // Unicode entries:
      // 0398;GREEK CAPITAL LETTER THETA;Lu;0;L;;;;;N;;;;03B8;
      // 03B8;GREEK SMALL LETTER THETA;Ll;0;L;;;;;N;;;0398;;0398
      // 03D1;GREEK THETA SYMBOL;Ll;0;L;<compat> 03B8;;;;N;GREEK SMALL LETTER SCRIPT THETA;;0398;;0398
      // 03F4;GREEK CAPITAL THETA SYMBOL;Lu;0;L;<compat> 0398;;;;N;;;;03B8;
-      
+
      // Case-insensitive comparison should treat all of them as equivalent.
-      
+
      // But .toLowerCase() doesn't change ϑ (it's already lowercase),
      // and .toUpperCase() doesn't change ϴ (already uppercase).
-      
+
      // Applying first lower then upper case normalizes any character:
      // '\u0398\u03f4\u03b8\u03d1'.toLowerCase().toUpperCase() === '\u0398\u0398\u0398\u0398'
-      
+
      // Note: this is equivalent to unicode case folding; unicode normalization
      // is a different step that is not required here.
-      
+
      // Final result should be uppercased, because it's later stored in an object
      // (this avoid a conflict with Object.prototype members,
      // most notably, `__proto__`)
-      
+
            return str.toLowerCase().toUpperCase();
    }
    ////////////////////////////////////////////////////////////////////////////////
    // Re-export libraries commonly used in both markdown-it and its plugins,
    // so plugins won't have to depend on them explicitly, which reduces their
    // bundled size (e.g. a browser build).
-    
+
        exports.lib = {};
    exports.lib.mdurl = mdurl;
    exports.lib.ucmicro = uc_micro;
@ -3129,7 +3129,7 @@
    var token = tokens[idx];
    // "alt" attr MUST be set, even if empty. Because it's mandatory and
    // should be placed on proper position for tests.
-    
+
    // Replace content with actual value
        token.attrs[token.attrIndex("alt")][1] = slf.renderInlineAsText(token.children, options, env);
    return slf.renderToken(tokens, idx, options);
@ -3215,11 +3215,11 @@
    }
    // Insert a newline between hidden paragraph and subsequent opening
    // block-level tag.
-    
+
    // For example, here we should insert a newline before blockquote:
    //  - a
    //    >
-    
+
        if (token.block && token.nesting !== -1 && idx && tokens[idx - 1].hidden) {
      result += "\n";
    }
@ -3343,16 +3343,16 @@
    // }
    this.__rules__ = [];
    // Cached rule chains.
-    
+
    // First level - chain name, '' for default.
    // Second level - diginal anchor for fast filtering by charcodes.
-    
+
        this.__cache__ = null;
  }
  ////////////////////////////////////////////////////////////////////////////////
  // Helper methods, should not be used directly
  // Find rule index by name
-  
+
    Ruler.prototype.__find__ = function(name) {
    for (var i = 0; i < this.__rules__.length; i++) {
      if (this.__rules__[i].name === name) {
@ -3362,7 +3362,7 @@
    return -1;
  };
  // Build rules lookup cache
-  
+
    Ruler.prototype.__compile__ = function() {
    var self = this;
    var chains = [ "" ];
@ -3726,7 +3726,7 @@
            // Linkifier might send raw hostnames like "example.com", where url
            // starts with domain name. So we prepend http:// in those cases,
            // and remove it afterwards.
-            
+
                        if (!links[ln].schema) {
              urlText = state.md.normalizeLinkText("http://" + urlText).replace(/^http:\/\//, "");
            } else if (links[ln].schema === "mailto:" && !/^mailto:/i.test(urlText)) {
@ -3874,7 +3874,7 @@
        isSingle = t[0] === "'";
        // Find previous character,
        // default to space if it's the beginning of the line
-        
+
                lastChar = 32;
        if (t.index - 1 >= 0) {
          lastChar = text.charCodeAt(t.index - 1);
@ -3890,7 +3890,7 @@
        }
        // Find next character,
        // default to space if it's the end of the line
-        
+
                nextChar = 32;
        if (pos < max) {
          nextChar = text.charCodeAt(pos);
@ -4193,7 +4193,7 @@
  // re-export Token class to use in core rules
    StateCore.prototype.Token = token;
  var state_core = StateCore;
-  var _rules$2 = [ [ "normalize", normalize ], [ "block", block ], [ "inline", inline ], [ "linkify", linkify$1 ], [ "replacements", replacements ], [ "smartquotes", smartquotes ], 
+  var _rules$2 = [ [ "normalize", normalize ], [ "block", block ], [ "inline", inline ], [ "linkify", linkify$1 ], [ "replacements", replacements ], [ "smartquotes", smartquotes ],
  // `text_join` finds `text_special` tokens (for escape sequences)
  // and joins them with the rest of the text
  [ "text_join", text_join ] ];
@ -4590,12 +4590,12 @@
    oldParentType = state.parentType;
    state.parentType = "blockquote";
    // Search the end of the block
-    
+
    // Block ends with either:
    //  1. an empty line outside:
    //     ```
    //     > test
-    
+
    //     ```
    //  2. an empty line inside:
    //     ```
@ -4712,7 +4712,7 @@
      oldTShift.push(state.tShift[nextLine]);
      oldSCount.push(state.sCount[nextLine]);
      // A negative indentation means that this is a paragraph continuation
-      
+
            state.sCount[nextLine] = -1;
    }
    oldIndent = state.blkIndent;
@ -4905,9 +4905,9 @@
    }
    token.map = listLines = [ startLine, 0 ];
    token.markup = String.fromCharCode(markerCharCode);
-    
+
    // Iterate list items
-    
+
        nextLine = startLine;
    prevEmptyEnd = false;
    terminatorRules = state.md.block.ruler.getRules("list");
@ -4957,7 +4957,7 @@
      //  - example list
      // ^ listIndent position will be here
      //   ^ blkIndent position will be here
-      
+
            oldListIndent = state.listIndent;
      state.listIndent = state.blkIndent;
      state.blkIndent = indent;
@ -4995,9 +4995,9 @@
      if (nextLine >= endLine) {
        break;
      }
-      
+
      // Try to check if list is terminated or continued.
-      
+
            if (state.sCount[nextLine] < state.blkIndent) {
        break;
      }
@ -5245,7 +5245,7 @@
  var HTML_OPEN_CLOSE_TAG_RE = html_re.HTML_OPEN_CLOSE_TAG_RE;
  // An array of opening and corresponding closing sequences for html tags,
  // last argument defines whether it can terminate a paragraph or not
-  
+
    var HTML_SEQUENCES = [ [ /^<(script|pre|style|textarea)(?=(\s|>|$))/i, /<\/(script|pre|style|textarea)>/i, true ], [ /^<!--/, /-->/, true ], [ /^<\?/, /\?>/, true ], [ /^<![A-Z]/, />/, true ], [ /^<!\[CDATA\[/, /\]\]>/, true ], [ new RegExp("^</?(" + html_blocks.join("|") + ")(?=(\\s|/?>|$))", "i"), /^$/, true ], [ new RegExp(HTML_OPEN_CLOSE_TAG_RE.source + "\\s*$"), /^$/, false ] ];
  var html_block = function html_block(state, startLine, endLine, silent) {
    var i, nextLine, token, lineText, pos = state.bMarks[startLine] + state.tShift[startLine], max = state.eMarks[startLine];
@ -5357,9 +5357,9 @@
      if (state.sCount[nextLine] - state.blkIndent > 3) {
        continue;
      }
-      
+
      // Check for underline in setext header
-      
+
            if (state.sCount[nextLine] >= state.blkIndent) {
        pos = state.bMarks[nextLine] + state.tShift[nextLine];
        max = state.eMarks[nextLine];
@ -5456,9 +5456,9 @@
    // link to parser instance
        this.md = md;
    this.env = env;
-    
+
    // Internal state vartiables
-    
+
        this.tokens = tokens;
    this.bMarks = [];
 // line begin offsets for fast jumps
@ -5470,14 +5470,14 @@
 // indents for each line (tabs expanded)
    // An amount of virtual spaces (tabs expanded) between beginning
    // of each line (bMarks) and real beginning of that line.
-    
+
    // It exists only as a hack because blockquotes override bMarks
    // losing information in the process.
-    
+
    // It's used only when expanding tabs, you can think about it as
    // an initial tab length, e.g. bsCount=21 applied to string `\t123`
    // means first tab should be expanded to 4-21%4 === 3 spaces.
-    
+
        this.bsCount = [];
    // block parser variables
        this.blkIndent = 0;
@ -5543,7 +5543,7 @@
 // don't count last fake line
    }
  // Push new token to "stream".
-  
+
    StateBlock.prototype.push = function(type, tag, nesting) {
    var token$1 = new token(type, tag, nesting);
    token$1.block = true;
@ -5655,7 +5655,7 @@
  // re-export Token class to use in block rules
    StateBlock.prototype.Token = token;
  var state_block = StateBlock;
-  var _rules$1 = [ 
+  var _rules$1 = [
  // First 2 params - rule name & source. Secondary array - list of rules,
  // which can be terminated by this one.
  [ "table", table, [ "paragraph", "reference" ] ], [ "code", code ], [ "fence", fence, [ "paragraph", "reference", "blockquote", "list" ] ], [ "blockquote", blockquote, [ "paragraph", "reference", "blockquote", "list" ] ], [ "hr", hr, [ "paragraph", "reference", "blockquote", "list" ] ], [ "list", list, [ "paragraph", "reference", "blockquote" ] ], [ "reference", reference ], [ "html_block", html_block, [ "paragraph", "reference", "blockquote" ] ], [ "heading", heading, [ "paragraph", "reference", "blockquote" ] ], [ "lheading", lheading ], [ "paragraph", paragraph ] ];
@ -5675,7 +5675,7 @@
    }
  }
  // Generate tokens for input range
-  
+
    ParserBlock.prototype.tokenize = function(state, startLine, endLine) {
    var ok, i, rules = this.ruler.getRules(""), len = rules.length, line = startLine, hasEmptyLines = false, maxNesting = state.md.options.maxNesting;
    while (line < endLine) {
@ -5696,7 +5696,7 @@
      }
      // Try all possible rules.
      // On success, rule should:
-      
+
      // - update `state.line`
      // - update `state.tokens`
      // - return true
@ -5961,7 +5961,7 @@
  };
  // ~~strike through~~
  // Insert each marker as a separate text token, and add it to delimiter list
-  
+
    var tokenize$1 = function strikethrough(state, silent) {
    var i, scanned, token, len, ch, start = state.pos, marker = state.src.charCodeAt(start);
    if (silent) {
@ -6027,9 +6027,9 @@
    // If a marker sequence has an odd number of characters, it's splitted
    // like this: `~~~~~` -> `~` + `~~` + `~~`, leaving one marker at the
    // start of the sequence.
-    
+
    // So, we have to move all those markers after subsequent s_close tags.
-    
+
        while (loneMarkers.length) {
      i = loneMarkers.pop();
      j = i + 1;
@ -6045,7 +6045,7 @@
    }
  }
  // Walk through delimiter list and replace text tokens with tags
-  
+
    var postProcess_1$1 = function strikethrough(state) {
    var curr, tokens_meta = state.tokens_meta, max = state.tokens_meta.length;
    postProcess$1(state, state.delimiters);
@ -6061,7 +6061,7 @@
  };
  // Process *this* and _that_
  // Insert each marker as a separate text token, and add it to delimiter list
-  
+
    var tokenize = function emphasis(state, silent) {
    var i, scanned, token, start = state.pos, marker = state.src.charCodeAt(start);
    if (silent) {
@ -6107,12 +6107,12 @@
      endDelim = delimiters[startDelim.end];
      // If the previous delimiter has the same marker and is adjacent to this one,
      // merge those into one strong delimiter.
-      
+
      // `<em><em>whatever</em></em>` -> `<strong>whatever</strong>`
-      
-            isStrong = i > 0 && delimiters[i - 1].end === startDelim.end + 1 && 
+
+            isStrong = i > 0 && delimiters[i - 1].end === startDelim.end + 1 &&
      // check that first two markers match and adjacent
-      delimiters[i - 1].marker === startDelim.marker && delimiters[i - 1].token === startDelim.token - 1 && 
+      delimiters[i - 1].marker === startDelim.marker && delimiters[i - 1].token === startDelim.token - 1 &&
      // check that last two markers are adjacent (we can safely assume they match)
      delimiters[startDelim.end + 1].token === endDelim.token + 1;
      ch = String.fromCharCode(startDelim.marker);
@ -6136,7 +6136,7 @@
    }
  }
  // Walk through delimiter list and replace text tokens with tags
-  
+
    var postProcess_1 = function emphasis(state) {
    var curr, tokens_meta = state.tokens_meta, max = state.tokens_meta.length;
    postProcess(state, state.delimiters);
@ -6251,10 +6251,10 @@
      href = ref.href;
      title = ref.title;
    }
-    
+
    // We found the end of the link, and know for a fact it's a valid link;
    // so all that's left to do is to call tokenizer.
-    
+
        if (!silent) {
      state.pos = labelStart;
      state.posMax = labelEnd;
@ -6375,10 +6375,10 @@
      href = ref.href;
      title = ref.title;
    }
-    
+
    // We found the end of the link, and know for a fact it's a valid link;
    // so all that's left to do is to call tokenizer.
-    
+
        if (!silent) {
      content = state.src.slice(labelStart, labelEnd);
      state.md.inline.parse(content, state.md, state.env, tokens = []);
@ -6547,7 +6547,7 @@
      // markers belong to same delimiter run if:
      //  - they have adjacent tokens
      //  - AND markers are the same
-      
+
            if (delimiters[headerIdx].marker !== closer.marker || lastTokenIdx !== closer.token - 1) {
        headerIdx = closerIdx;
      }
@ -6555,7 +6555,7 @@
      // Length is only used for emphasis-specific "rule of 3",
      // if it's not defined (in strikethrough or 3rd party plugins),
      // we can default it to 0 to disable those checks.
-      
+
            closer.length = closer.length || 0;
      if (!closer.close) continue;
      // Previously calculated lower bounds (previous fails)
@ -6574,12 +6574,12 @@
        if (opener.open && opener.end < 0) {
          isOddMatch = false;
          // from spec:
-          
+
          // If one of the delimiters can both open and close emphasis, then the
          // sum of the lengths of the delimiter runs containing the opening and
          // closing delimiters must not be a multiple of 3 unless both lengths
          // are multiples of 3.
-          
+
                    if (opener.close || closer.open) {
            if ((opener.length + closer.length) % 3 === 0) {
              if (opener.length % 3 !== 0 || closer.length % 3 !== 0) {
@ -6678,7 +6678,7 @@
        this.linkLevel = 0;
  }
  // Flush pending text
-  
+
    StateInline.prototype.pushPending = function() {
    var token$1 = new token("text", "", 0);
    token$1.content = this.pending;
@ -6689,7 +6689,7 @@
  };
  // Push new token to "stream".
  // If pending text exists - flush it as text token
-  
+
    StateInline.prototype.push = function(type, tag, nesting) {
    if (this.pending) {
      this.pushPending();
@ -6718,10 +6718,10 @@
  };
  // Scan a sequence of emphasis-like markers, and determine whether
  // it can start an emphasis sequence or end an emphasis sequence.
-  
+
  //  - start - position to scan from (it should point at a valid marker);
  //  - canSplitWord - determine if these markers can be found inside a word
-  
+
    StateInline.prototype.scanDelims = function(start, canSplitWord) {
    var pos = start, lastChar, nextChar, count, can_open, can_close, isLastWhiteSpace, isLastPunctChar, isNextWhiteSpace, isNextPunctChar, left_flanking = true, right_flanking = true, max = this.posMax, marker = this.src.charCodeAt(start);
    // treat beginning of the line as a whitespace
@ -6771,10 +6771,10 @@
    var _rules = [ [ "text", text ], [ "linkify", linkify ], [ "newline", newline ], [ "escape", _escape ], [ "backticks", backticks ], [ "strikethrough", strikethrough.tokenize ], [ "emphasis", emphasis.tokenize ], [ "link", link ], [ "image", image ], [ "autolink", autolink ], [ "html_inline", html_inline ], [ "entity", entity ] ];
  // `rule2` ruleset was created specifically for emphasis/strikethrough
  // post-processing and may be changed in the future.
-  
+
  // Don't use this for anything except pairs (plugins working with `balance_pairs`).
-  
-    var _rules2 = [ [ "balance_pairs", balance_pairs ], [ "strikethrough", strikethrough.postProcess ], [ "emphasis", emphasis.postProcess ], 
+
+    var _rules2 = [ [ "balance_pairs", balance_pairs ], [ "strikethrough", strikethrough.postProcess ], [ "emphasis", emphasis.postProcess ],
  // rules for pairs separate '**' into its own text tokens, which may be left unused,
  // rule below merges unused segments back with the rest of the text
  [ "fragments_join", fragments_join ] ];
@ -6802,7 +6802,7 @@
  }
  // Skip single token by running all rules in validation mode;
  // returns `true` if any rule reported success
-  
+
    ParserInline.prototype.skipToken = function(state) {
    var ok, i, pos = state.pos, rules = this.ruler.getRules(""), len = rules.length, maxNesting = state.md.options.maxNesting, cache = state.cache;
    if (typeof cache[pos] !== "undefined") {
@ -6837,7 +6837,7 @@
    cache[pos] = state.pos;
  };
  // Generate tokens for input range
-  
+
    ParserInline.prototype.tokenize = function(state) {
    var ok, i, rules = this.ruler.getRules(""), len = rules.length, end = state.posMax, maxNesting = state.md.options.maxNesting;
    while (state.pos < end) {
@ -6928,11 +6928,11 @@
    re.src_xn = "xn--[a-z0-9\\-]{1,59}";
    // More to read about domain names
    // http://serverfault.com/questions/638260/
-        re.src_domain_root = 
+        re.src_domain_root =
    // Allow letters & digits (http://test1)
    "(?:" + re.src_xn + "|" + re.src_pseudo_letter + "{1,63}" + ")";
    re.src_domain = "(?:" + re.src_xn + "|" + "(?:" + re.src_pseudo_letter + ")" + "|" + "(?:" + re.src_pseudo_letter + "(?:-|" + re.src_pseudo_letter + "){0,61}" + re.src_pseudo_letter + ")" + ")";
-    re.src_host = "(?:" + 
+    re.src_host = "(?:" +
    // Don't need IP check, because digits are already allowed in normal domain names
    //   src_ip4 +
    // '|' +
@ -6949,11 +6949,11 @@
    // Rude test fuzzy links by host, for quick deny
        re.tpl_host_fuzzy_test = "localhost|www\\.|\\.\\d{1,3}\\.|(?:\\.(?:%TLDS%)(?:" + re.src_ZPCc + "|>|$))";
    re.tpl_email_fuzzy = "(^|" + text_separators + '|"|\\(|' + re.src_ZCc + ")" + "(" + re.src_email_name + "@" + re.tpl_host_fuzzy_strict + ")";
-    re.tpl_link_fuzzy = 
+    re.tpl_link_fuzzy =
    // Fuzzy link can't be prepended with .:/\- and non punctuation.
    // but can start with > (markdown blockquote)
    "(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|" + re.src_ZPCc + "))" + "((?![$+<=>^`|\uff5c])" + re.tpl_host_port_fuzzy_strict + re.src_path + ")";
-    re.tpl_link_no_ip_fuzzy = 
+    re.tpl_link_no_ip_fuzzy =
    // Fuzzy link can't be prepended with .:/\- and non punctuation.
    // but can start with > (markdown blockquote)
    "(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|" + re.src_ZPCc + "))" + "((?![$+<=>^`|\uff5c])" + re.tpl_host_port_no_ip_fuzzy_strict + re.src_path + ")";
@ -6962,7 +6962,7 @@
  ////////////////////////////////////////////////////////////////////////////////
  // Helpers
  // Merge objects
-  
+
    function assign(obj /*from1, from2, from3, ...*/) {
    var sources = Array.prototype.slice.call(arguments, 1);
    sources.forEach((function(source) {
@ -7025,7 +7025,7 @@
        var tail = text.slice(pos);
        if (!self.re.no_http) {
          // compile lazily, because "host"-containing variables can change on tlds update.
-          self.re.no_http = new RegExp("^" + self.re.src_auth + 
+          self.re.no_http = new RegExp("^" + self.re.src_auth +
          // Don't allow single-level domains, because of false positives like '//test'
          // with code comments
          "(?:localhost|(?:(?:" + self.re.src_domain + ")\\.)+" + self.re.src_domain_root + ")" + self.re.src_port + self.re.src_host_terminator + self.re.src_path, "i");
@ -7082,7 +7082,7 @@
    };
  }
  // Schemas compiler. Build regexps.
-  
+
    function compile(self) {
    // Load & clone RE patterns.
    var re$1 = self.re = re(self.__opts__);
@ -7101,9 +7101,9 @@
    re$1.link_fuzzy = RegExp(untpl(re$1.tpl_link_fuzzy), "i");
    re$1.link_no_ip_fuzzy = RegExp(untpl(re$1.tpl_link_no_ip_fuzzy), "i");
    re$1.host_fuzzy_test = RegExp(untpl(re$1.tpl_host_fuzzy_test), "i");
-    
+
    // Compile each schema
-    
+
        var aliases = [];
    self.__compiled__ = {};
 // Reset compiled data
@ -7144,9 +7144,9 @@
      }
      schemaError(name, val);
    }));
-    
+
    // Compile postponed aliases
-    
+
        aliases.forEach((function(alias) {
      if (!self.__compiled__[self.__schemas__[alias]]) {
        // Silently fail on missed schemas to avoid errons on disable.
@ -7156,16 +7156,16 @@
      self.__compiled__[alias].validate = self.__compiled__[self.__schemas__[alias]].validate;
      self.__compiled__[alias].normalize = self.__compiled__[self.__schemas__[alias]].normalize;
    }));
-    
+
    // Fake record for guessed links
-    
+
        self.__compiled__[""] = {
      validate: null,
      normalize: createNormalizer()
    };
-    
+
    // Build schema condition
-    
+
        var slist = Object.keys(self.__compiled__).filter((function(name) {
      // Filter disabled & fake schemas
      return name.length > 0 && self.__compiled__[name];
@ -7175,9 +7175,9 @@
    self.re.schema_search = RegExp("(^|(?!_)(?:[><\uff5c]|" + re$1.src_ZPCc + "))(" + slist + ")", "ig");
    self.re.schema_at_start = RegExp("^" + self.re.schema_search.source, "i");
    self.re.pretest = RegExp("(" + self.re.schema_test.source + ")|(" + self.re.host_fuzzy_test.source + ")|@", "i");
-    
+
    // Cleanup
-    
+
        resetScanCache(self);
  }
  /**
@ -7673,7 +7673,7 @@
 	 * @returns {String} The resulting string of Unicode symbols.
 	 */  function decode(input) {
    // Don't use UCS-2
-    var output = [], inputLength = input.length, out, i = 0, n = initialN, bias = initialBias, basic, j, index, oldi, w, k, digit, t, 
+    var output = [], inputLength = input.length, out, i = 0, n = initialN, bias = initialBias, basic, j, index, oldi, w, k, digit, t,
    /** Cached calculation results */
    baseMinusT;
    // Handle the basic code points: let `basic` be the number of input code
@ -7738,9 +7738,9 @@
 	 * @param {String} input The string of Unicode symbols.
 	 * @returns {String} The resulting Punycode string of ASCII-only symbols.
 	 */  function encode(input) {
-    var n, delta, handledCPCount, basicLength, bias, j, m, q, k, t, currentValue, output = [], 
+    var n, delta, handledCPCount, basicLength, bias, j, m, q, k, t, currentValue, output = [],
    /** `inputLength` will hold the number of code points in `input`. */
-    inputLength, 
+    inputLength,
    /** Cached calculation results */
    handledCPCountPlusOne, baseMinusT, qMinusT;
    // Convert the input in UCS-2 to Unicode
@ -7993,13 +7993,13 @@
    commonmark: commonmark
  };
  ////////////////////////////////////////////////////////////////////////////////
-  
+
  // This validator can prohibit more than really needed to prevent XSS. It's a
  // tradeoff to keep code simple and to be secure by default.
-  
+
  // If you need different setup - override validator method as you wish. Or
  // replace it with dummy function and use external sanitizer.
-  
+
    var BAD_PROTO_RE = /^(vbscript|javascript|file|data):/;
  var GOOD_DATA_RE = /^data:image\/(gif|png|jpeg|webp);/;
  function validateLink(url) {
--- a/tests/test_orgnode.py
+++ b/tests/test_orgnode.py
@ -220,7 +220,7 @@ Body 2
 def test_parse_entry_with_empty_title(tmp_path):
    "Test parsing of entry with minimal fields"
    # Arrange
-    entry = f"""#+TITLE: 
+    entry = f"""#+TITLE:
 Body Line 1"""
    orgfile = create_file(tmp_path, entry)

@ -266,7 +266,7 @@ Body Line 1"""
 def test_parse_entry_with_multiple_titles_and_no_headings(tmp_path):
    "Test parsing of entry with minimal fields"
    # Arrange
-    entry = f"""#+TITLE: title1 
+    entry = f"""#+TITLE: title1
 Body Line 1
 #+TITLE:  title2  """
    orgfile = create_file(tmp_path, entry)