diff --git a/.github/workflows/dockerize.yml b/.github/workflows/dockerize.yml index 94dde0a1..1c9807b2 100644 --- a/.github/workflows/dockerize.yml +++ b/.github/workflows/dockerize.yml @@ -44,4 +44,4 @@ jobs: push: true tags: ghcr.io/${{ github.repository }}:${{ env.DOCKER_IMAGE_TAG }} build-args: | - PORT=8000 \ No newline at end of file + PORT=8000 diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index f558886d..303db178 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -78,4 +78,4 @@ jobs: uses: pypa/gh-action-pypi-publish@v1.6.4 with: password: ${{ secrets.PYPI_API_KEY }} - repository_url: https://test.pypi.org/legacy/ \ No newline at end of file + repository_url: https://test.pypi.org/legacy/ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9136b7b7..676e186b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,6 +9,7 @@ on: - tests/** - config/** - pyproject.toml + - .pre-commit-config.yml - .github/workflows/test.yml push: branches: @@ -18,6 +19,7 @@ on: - tests/** - config/** - pyproject.toml + - .pre-commit-config.yml - .github/workflows/test.yml jobs: @@ -45,9 +47,10 @@ jobs: python -m pip install --upgrade pip - name: Install Application - run: | - pip install --upgrade .[dev] + run: pip install --upgrade .[dev] + + - name: Validate Application + run: pre-commit - name: Test Application - run: | - pytest \ No newline at end of file + run: pytest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..554f241e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,16 @@ +repos: +- repo: https://github.com/psf/black + rev: 23.1.0 + hooks: + - id: black + +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace + # Exclude elisp files to not clear page breaks + exclude: \.el$ + - id: check-json + - id: check-toml + - id: check-yaml diff --git a/Khoj.desktop b/Khoj.desktop index 1f0d924d..a9bac639 100644 --- a/Khoj.desktop +++ b/Khoj.desktop @@ -4,4 +4,4 @@ Name=Khoj Comment=A natural language search engine for your personal notes, transactions and images. Path=/opt Exec=/opt/Khoj -Icon=Khoj \ No newline at end of file +Icon=Khoj diff --git a/LICENSE b/LICENSE index e963df82..94a04532 100644 --- a/LICENSE +++ b/LICENSE @@ -619,4 +619,3 @@ Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS - diff --git a/config/environment.yml b/config/environment.yml index 01a37d8b..cb4e685e 100644 --- a/config/environment.yml +++ b/config/environment.yml @@ -19,4 +19,4 @@ dependencies: - aiofiles=0.8.0 - huggingface_hub=0.8.1 - dateparser=1.1.1 - - schedule=1.1.0 \ No newline at end of file + - schedule=1.1.0 diff --git a/config/khoj_docker.yml b/config/khoj_docker.yml index cf3535ea..6ffe59f5 100644 --- a/config/khoj_docker.yml +++ b/config/khoj_docker.yml @@ -52,4 +52,4 @@ processor: #conversation: # openai-api-key: null # model: "text-davinci-003" - # conversation-logfile: "/data/embeddings/conversation_logs.json" \ No newline at end of file + # conversation-logfile: "/data/embeddings/conversation_logs.json" diff --git a/docker-compose.yml b/docker-compose.yml index 42f2e617..5cd1763c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,14 +4,14 @@ services: image: ghcr.io/debanjum/khoj:latest ports: # If changing the local port (left hand side), no other changes required. - # If changing the remote port (right hand side), - # change the port in the args in the build section, + # If changing the remote port (right hand side), + # change the port in the args in the build section, # as well as the port in the command section to match - "8000:8000" working_dir: /app volumes: - .:/app - # These mounted volumes hold the raw data that should be indexed for search. + # These mounted volumes hold the raw data that should be indexed for search. # The path in your local directory (left hand side) # points to the files you want to index. # The path of the mounted directory (right hand side), diff --git a/pyproject.toml b/pyproject.toml index f55ff8b3..8194525b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,10 +64,14 @@ khoj = "khoj.main:run" [project.optional-dependencies] test = [ - "pytest >= 7.1.2", - "black >= 23.1.0", + "pytest >= 7.1.2", +] +dev = [ + "khoj-assistant[test]", + "mypy >= 1.0.1", + "black >= 23.1.0", + "pre-commit >= 3.0.4", ] -dev = ["khoj-assistant[test]"] [tool.hatch.build.targets.sdist] include = ["src/khoj"] @@ -79,8 +83,8 @@ packages = ["src/khoj"] files = "src/khoj" pretty = true strict_optional = false -ignore_missing_imports = true install_types = true +ignore_missing_imports = true non_interactive = true show_error_codes = true warn_unused_ignores = true @@ -91,4 +95,4 @@ exclude = [ ] [tool.black] -line-length = 120 \ No newline at end of file +line-length = 120 diff --git a/src/interface/emacs/README.org b/src/interface/emacs/README.org index 114322e3..46139607 100644 --- a/src/interface/emacs/README.org +++ b/src/interface/emacs/README.org @@ -1,6 +1,6 @@ * Khoj Emacs 🦅 [[https://stable.melpa.org/#/khoj][file:https://stable.melpa.org/packages/khoj-badge.svg]] [[https://melpa.org/#/khoj][file:https://melpa.org/packages/khoj-badge.svg]] [[https://github.com/debanjum/khoj/actions/workflows/build_khoj_el.yml][https://github.com/debanjum/khoj/actions/workflows/build_khoj_el.yml/badge.svg?]] [[https://github.com/debanjum/khoj/actions/workflows/test_khoj_el.yml][https://github.com/debanjum/khoj/actions/workflows/test_khoj_el.yml/badge.svg?]] - + /Natural, Incremental Search for your Second Brain/ ** Table of Contents diff --git a/src/interface/obsidian/.eslintignore b/src/interface/obsidian/.eslintignore index 32909b2e..b178d276 100644 --- a/src/interface/obsidian/.eslintignore +++ b/src/interface/obsidian/.eslintignore @@ -1,2 +1,2 @@ npm node_modules -build \ No newline at end of file +build diff --git a/src/interface/obsidian/.eslintrc b/src/interface/obsidian/.eslintrc index 08072905..a3a05c3d 100644 --- a/src/interface/obsidian/.eslintrc +++ b/src/interface/obsidian/.eslintrc @@ -9,7 +9,7 @@ "eslint:recommended", "plugin:@typescript-eslint/eslint-recommended", "plugin:@typescript-eslint/recommended" - ], + ], "parserOptions": { "sourceType": "module" }, @@ -19,5 +19,5 @@ "@typescript-eslint/ban-ts-comment": "off", "no-prototype-builtins": "off", "@typescript-eslint/no-empty-function": "off" - } - } \ No newline at end of file + } + } diff --git a/src/interface/obsidian/.npmrc b/src/interface/obsidian/.npmrc index b9737525..9555e172 100644 --- a/src/interface/obsidian/.npmrc +++ b/src/interface/obsidian/.npmrc @@ -1 +1 @@ -tag-version-prefix="" \ No newline at end of file +tag-version-prefix="" diff --git a/src/interface/obsidian/LICENSE b/src/interface/obsidian/LICENSE index e963df82..94a04532 100644 --- a/src/interface/obsidian/LICENSE +++ b/src/interface/obsidian/LICENSE @@ -619,4 +619,3 @@ Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS - diff --git a/src/interface/obsidian/manifest.json b/src/interface/obsidian/manifest.json index a2f48c73..9c5f58f6 100644 --- a/src/interface/obsidian/manifest.json +++ b/src/interface/obsidian/manifest.json @@ -7,4 +7,4 @@ "author": "Debanjum Singh Solanky", "authorUrl": "https://github.com/debanjum", "isDesktopOnly": false -} \ No newline at end of file +} diff --git a/src/interface/obsidian/src/utils.ts b/src/interface/obsidian/src/utils.ts index b8142f6d..04e1919c 100644 --- a/src/interface/obsidian/src/utils.ts +++ b/src/interface/obsidian/src/utils.ts @@ -113,4 +113,4 @@ export async function updateKhojBackend(khojUrl: string, khojConfig: Object) { function getIndexDirectoryFromBackendConfig(khojConfig: any) { return khojConfig["content-type"]["markdown"]["embeddings-file"].split("/").slice(0, -1).join("/"); -} \ No newline at end of file +} diff --git a/src/interface/obsidian/versions.json b/src/interface/obsidian/versions.json index 12d28a86..7b6560f4 100644 --- a/src/interface/obsidian/versions.json +++ b/src/interface/obsidian/versions.json @@ -3,4 +3,4 @@ "0.2.5": "0.15.0", "0.2.6": "0.15.0", "0.3.0": "0.15.0" -} \ No newline at end of file +} diff --git a/src/khoj/interface/web/assets/config.css b/src/khoj/interface/web/assets/config.css index 23c0130d..eff882ba 100644 --- a/src/khoj/interface/web/assets/config.css +++ b/src/khoj/interface/web/assets/config.css @@ -26,4 +26,4 @@ span.config-element-value { button { cursor: pointer; -} \ No newline at end of file +} diff --git a/src/khoj/interface/web/assets/config.js b/src/khoj/interface/web/assets/config.js index 965df9bc..056faca8 100644 --- a/src/khoj/interface/web/assets/config.js +++ b/src/khoj/interface/web/assets/config.js @@ -56,10 +56,10 @@ regenerateButton.addEventListener("click", (event) => { }) /** - * Adds config elements to the DOM representing the sub-components + * Adds config elements to the DOM representing the sub-components * of one of the fields in the raw config file. - * @param {the parent element} element - * @param {the data to be rendered for this element and its children} data + * @param {the parent element} element + * @param {the data to be rendered for this element and its children} data */ function processChildren(element, data) { for (let key in data) { @@ -78,11 +78,11 @@ function processChildren(element, data) { } /** - * Takes an element, and replaces it with an editable + * Takes an element, and replaces it with an editable * element with the same data in place. - * @param {the original element to be replaced} original - * @param {the source data to be rendered for the new element} data - * @param {the key for this input in the source data} key + * @param {the original element to be replaced} original + * @param {the source data to be rendered for the new element} data + * @param {the key for this input in the source data} key */ function makeElementEditable(original, data, key) { original.addEventListener("click", () => { @@ -98,8 +98,8 @@ function makeElementEditable(original, data, key) { /** * Creates a node corresponding to the value of a config element. - * @param {the source data} data - * @param {the key corresponding to this node's data} key + * @param {the source data} data + * @param {the key corresponding to this node's data} key * @returns A new element which corresponds to the value in some field. */ function createValueNode(data, key) { @@ -111,11 +111,11 @@ function createValueNode(data, key) { } /** - * Replaces an existing input element with an element with the same data, which is not an input. + * Replaces an existing input element with an element with the same data, which is not an input. * If the input data for this element was changed, update the corresponding data in the raw config. - * @param {the original element to be replaced} original - * @param {the source data} data - * @param {the key corresponding to this node's data} key + * @param {the original element to be replaced} original + * @param {the source data} data + * @param {the key corresponding to this node's data} key */ function fixInputOnFocusOut(original, data, key) { original.addEventListener("blur", () => { diff --git a/src/khoj/interface/web/assets/markdown-it.min.js b/src/khoj/interface/web/assets/markdown-it.min.js index 938677ae..5b37e28e 100644 --- a/src/khoj/interface/web/assets/markdown-it.min.js +++ b/src/khoj/interface/web/assets/markdown-it.min.js @@ -1,6 +1,6 @@ /*! markdown-it 13.0.1 https://github.com/markdown-it/markdown-it @license MIT */ (function(global, factory) { - typeof exports === "object" && typeof module !== "undefined" ? module.exports = factory() : typeof define === "function" && define.amd ? define(factory) : (global = typeof globalThis !== "undefined" ? globalThis : global || self, + typeof exports === "object" && typeof module !== "undefined" ? module.exports = factory() : typeof define === "function" && define.amd ? define(factory) : (global = typeof globalThis !== "undefined" ? globalThis : global || self, global.markdownit = factory()); })(this, (function() { "use strict"; @@ -2164,7 +2164,7 @@ var encodeCache = {}; // Create a lookup array where anything but characters in `chars` string // and alphanumeric chars is percent-encoded. - + function getEncodeCache(exclude) { var i, ch, cache = encodeCache[exclude]; if (cache) { @@ -2187,11 +2187,11 @@ } // Encode unsafe characters with percent-encoding, skipping already // encoded sequences. - + // - string - string to encode // - exclude - list of characters to ignore (in addition to a-zA-Z0-9) // - keepEscaped - don't encode '%' in a correct escape sequence (default: true) - + function encode$2(string, exclude, keepEscaped) { var i, l, code, nextCode, cache, result = ""; if (typeof exclude !== "string") { @@ -2253,7 +2253,7 @@ return cache; } // Decode percent-encoded string. - + function decode$2(string, exclude) { var cache; if (typeof exclude !== "string") { @@ -2340,26 +2340,26 @@ return result; }; // Copyright Joyent, Inc. and other Node contributors. - + // Changes from joyent/node: - + // 1. No leading slash in paths, // e.g. in `url.parse('http://foo?bar')` pathname is ``, not `/` - + // 2. Backslashes are not replaced with slashes, // so `http:\\example.org\` is treated like a relative path - + // 3. Trailing colon is treated like a part of the path, // i.e. in `http://example.org:foo` pathname is `:foo` - + // 4. Nothing is URL-encoded in the resulting object, // (in joyent/node some chars in auth and paths are encoded) - + // 5. `url.parse()` does not have `parseQueryString` argument - + // 6. Removed extraneous result properties: `host`, `path`, `query`, etc., // which can be constructed using other parts of the url. - + function Url() { this.protocol = null; this.slashes = null; @@ -2373,28 +2373,28 @@ // Reference: RFC 3986, RFC 1808, RFC 2396 // define these here so at least they only have to be // compiled once on the first module load. - var protocolPattern = /^([a-z0-9.+-]+:)/i, portPattern = /:[0-9]*$/, + var protocolPattern = /^([a-z0-9.+-]+:)/i, portPattern = /:[0-9]*$/, // Special case for a simple path URL - simplePathPattern = /^(\/\/?(?!\/)[^\?\s]*)(\?[^\s]*)?$/, + simplePathPattern = /^(\/\/?(?!\/)[^\?\s]*)(\?[^\s]*)?$/, // RFC 2396: characters reserved for delimiting URLs. // We actually just auto-escape these. - delims = [ "<", ">", '"', "`", " ", "\r", "\n", "\t" ], + delims = [ "<", ">", '"', "`", " ", "\r", "\n", "\t" ], // RFC 2396: characters not allowed for various reasons. - unwise = [ "{", "}", "|", "\\", "^", "`" ].concat(delims), + unwise = [ "{", "}", "|", "\\", "^", "`" ].concat(delims), // Allowed by RFCs, but cause of XSS attacks. Always escape these. - autoEscape = [ "'" ].concat(unwise), + autoEscape = [ "'" ].concat(unwise), // Characters that are never ever allowed in a hostname. // Note that any invalid chars are also handled, but these // are the ones that are *expected* to be seen, so we fast-path // them. - nonHostChars = [ "%", "/", "?", ";", "#" ].concat(autoEscape), hostEndingChars = [ "/", "?", "#" ], hostnameMaxLen = 255, hostnamePartPattern = /^[+a-z0-9A-Z_-]{0,63}$/, hostnamePartStart = /^([+a-z0-9A-Z_-]{0,63})(.*)$/, + nonHostChars = [ "%", "/", "?", ";", "#" ].concat(autoEscape), hostEndingChars = [ "/", "?", "#" ], hostnameMaxLen = 255, hostnamePartPattern = /^[+a-z0-9A-Z_-]{0,63}$/, hostnamePartStart = /^([+a-z0-9A-Z_-]{0,63})(.*)$/, // protocols that can allow "unsafe" and "unwise" chars. /* eslint-disable no-script-url */ // protocols that never have a hostname. hostlessProtocol = { javascript: true, "javascript:": true - }, + }, // protocols that always contain a // bit. slashedProtocol = { http: true, @@ -2632,7 +2632,7 @@ return _hasOwnProperty.call(object, key); } // Merge objects - + function assign(obj /*from1, from2, from3, ...*/) { var sources = Array.prototype.slice.call(arguments, 1); sources.forEach((function(source) { @@ -2798,12 +2798,12 @@ return regex$4.test(ch); } // Markdown ASCII punctuation characters. - + // !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~ // http://spec.commonmark.org/0.15/#ascii-punctuation-character - + // Don't confuse with unicode punctuation !!! It lacks some chars in ascii range. - + function isMdAsciiPunct(ch) { switch (ch) { case 33 /* ! */ : @@ -2845,58 +2845,58 @@ } } // Hepler to unify [reference labels]. - + function normalizeReference(str) { // Trim and collapse whitespace str = str.trim().replace(/\s+/g, " "); // In node v10 'ẞ'.toLowerCase() === 'Ṿ', which is presumed to be a bug // fixed in v12 (couldn't find any details). - + // So treat this one as a special case // (remove this when node v10 is no longer supported). - + if ("\u1e9e".toLowerCase() === "\u1e7e") { str = str.replace(/\u1e9e/g, "\xdf"); } // .toLowerCase().toUpperCase() should get rid of all differences // between letter variants. - + // Simple .toLowerCase() doesn't normalize 125 code points correctly, // and .toUpperCase doesn't normalize 6 of them (list of exceptions: // İ, ϴ, ẞ, Ω, K, Å - those are already uppercased, but have differently // uppercased versions). - + // Here's an example showing how it happens. Lets take greek letter omega: // uppercase U+0398 (Θ), U+03f4 (ϴ) and lowercase U+03b8 (θ), U+03d1 (ϑ) - + // Unicode entries: // 0398;GREEK CAPITAL LETTER THETA;Lu;0;L;;;;;N;;;;03B8; // 03B8;GREEK SMALL LETTER THETA;Ll;0;L;;;;;N;;;0398;;0398 // 03D1;GREEK THETA SYMBOL;Ll;0;L; 03B8;;;;N;GREEK SMALL LETTER SCRIPT THETA;;0398;;0398 // 03F4;GREEK CAPITAL THETA SYMBOL;Lu;0;L; 0398;;;;N;;;;03B8; - + // Case-insensitive comparison should treat all of them as equivalent. - + // But .toLowerCase() doesn't change ϑ (it's already lowercase), // and .toUpperCase() doesn't change ϴ (already uppercase). - + // Applying first lower then upper case normalizes any character: // '\u0398\u03f4\u03b8\u03d1'.toLowerCase().toUpperCase() === '\u0398\u0398\u0398\u0398' - + // Note: this is equivalent to unicode case folding; unicode normalization // is a different step that is not required here. - + // Final result should be uppercased, because it's later stored in an object // (this avoid a conflict with Object.prototype members, // most notably, `__proto__`) - + return str.toLowerCase().toUpperCase(); } //////////////////////////////////////////////////////////////////////////////// // Re-export libraries commonly used in both markdown-it and its plugins, // so plugins won't have to depend on them explicitly, which reduces their // bundled size (e.g. a browser build). - + exports.lib = {}; exports.lib.mdurl = mdurl; exports.lib.ucmicro = uc_micro; @@ -3129,7 +3129,7 @@ var token = tokens[idx]; // "alt" attr MUST be set, even if empty. Because it's mandatory and // should be placed on proper position for tests. - + // Replace content with actual value token.attrs[token.attrIndex("alt")][1] = slf.renderInlineAsText(token.children, options, env); return slf.renderToken(tokens, idx, options); @@ -3215,11 +3215,11 @@ } // Insert a newline between hidden paragraph and subsequent opening // block-level tag. - + // For example, here we should insert a newline before blockquote: // - a // > - + if (token.block && token.nesting !== -1 && idx && tokens[idx - 1].hidden) { result += "\n"; } @@ -3343,16 +3343,16 @@ // } this.__rules__ = []; // Cached rule chains. - + // First level - chain name, '' for default. // Second level - diginal anchor for fast filtering by charcodes. - + this.__cache__ = null; } //////////////////////////////////////////////////////////////////////////////// // Helper methods, should not be used directly // Find rule index by name - + Ruler.prototype.__find__ = function(name) { for (var i = 0; i < this.__rules__.length; i++) { if (this.__rules__[i].name === name) { @@ -3362,7 +3362,7 @@ return -1; }; // Build rules lookup cache - + Ruler.prototype.__compile__ = function() { var self = this; var chains = [ "" ]; @@ -3726,7 +3726,7 @@ // Linkifier might send raw hostnames like "example.com", where url // starts with domain name. So we prepend http:// in those cases, // and remove it afterwards. - + if (!links[ln].schema) { urlText = state.md.normalizeLinkText("http://" + urlText).replace(/^http:\/\//, ""); } else if (links[ln].schema === "mailto:" && !/^mailto:/i.test(urlText)) { @@ -3874,7 +3874,7 @@ isSingle = t[0] === "'"; // Find previous character, // default to space if it's the beginning of the line - + lastChar = 32; if (t.index - 1 >= 0) { lastChar = text.charCodeAt(t.index - 1); @@ -3890,7 +3890,7 @@ } // Find next character, // default to space if it's the end of the line - + nextChar = 32; if (pos < max) { nextChar = text.charCodeAt(pos); @@ -4193,7 +4193,7 @@ // re-export Token class to use in core rules StateCore.prototype.Token = token; var state_core = StateCore; - var _rules$2 = [ [ "normalize", normalize ], [ "block", block ], [ "inline", inline ], [ "linkify", linkify$1 ], [ "replacements", replacements ], [ "smartquotes", smartquotes ], + var _rules$2 = [ [ "normalize", normalize ], [ "block", block ], [ "inline", inline ], [ "linkify", linkify$1 ], [ "replacements", replacements ], [ "smartquotes", smartquotes ], // `text_join` finds `text_special` tokens (for escape sequences) // and joins them with the rest of the text [ "text_join", text_join ] ]; @@ -4590,12 +4590,12 @@ oldParentType = state.parentType; state.parentType = "blockquote"; // Search the end of the block - + // Block ends with either: // 1. an empty line outside: // ``` // > test - + // ``` // 2. an empty line inside: // ``` @@ -4712,7 +4712,7 @@ oldTShift.push(state.tShift[nextLine]); oldSCount.push(state.sCount[nextLine]); // A negative indentation means that this is a paragraph continuation - + state.sCount[nextLine] = -1; } oldIndent = state.blkIndent; @@ -4905,9 +4905,9 @@ } token.map = listLines = [ startLine, 0 ]; token.markup = String.fromCharCode(markerCharCode); - + // Iterate list items - + nextLine = startLine; prevEmptyEnd = false; terminatorRules = state.md.block.ruler.getRules("list"); @@ -4957,7 +4957,7 @@ // - example list // ^ listIndent position will be here // ^ blkIndent position will be here - + oldListIndent = state.listIndent; state.listIndent = state.blkIndent; state.blkIndent = indent; @@ -4995,9 +4995,9 @@ if (nextLine >= endLine) { break; } - + // Try to check if list is terminated or continued. - + if (state.sCount[nextLine] < state.blkIndent) { break; } @@ -5245,7 +5245,7 @@ var HTML_OPEN_CLOSE_TAG_RE = html_re.HTML_OPEN_CLOSE_TAG_RE; // An array of opening and corresponding closing sequences for html tags, // last argument defines whether it can terminate a paragraph or not - + var HTML_SEQUENCES = [ [ /^<(script|pre|style|textarea)(?=(\s|>|$))/i, /<\/(script|pre|style|textarea)>/i, true ], [ /^/, true ], [ /^<\?/, /\?>/, true ], [ /^/, true ], [ /^/, true ], [ new RegExp("^|$))", "i"), /^$/, true ], [ new RegExp(HTML_OPEN_CLOSE_TAG_RE.source + "\\s*$"), /^$/, false ] ]; var html_block = function html_block(state, startLine, endLine, silent) { var i, nextLine, token, lineText, pos = state.bMarks[startLine] + state.tShift[startLine], max = state.eMarks[startLine]; @@ -5357,9 +5357,9 @@ if (state.sCount[nextLine] - state.blkIndent > 3) { continue; } - + // Check for underline in setext header - + if (state.sCount[nextLine] >= state.blkIndent) { pos = state.bMarks[nextLine] + state.tShift[nextLine]; max = state.eMarks[nextLine]; @@ -5456,9 +5456,9 @@ // link to parser instance this.md = md; this.env = env; - + // Internal state vartiables - + this.tokens = tokens; this.bMarks = []; // line begin offsets for fast jumps @@ -5470,14 +5470,14 @@ // indents for each line (tabs expanded) // An amount of virtual spaces (tabs expanded) between beginning // of each line (bMarks) and real beginning of that line. - + // It exists only as a hack because blockquotes override bMarks // losing information in the process. - + // It's used only when expanding tabs, you can think about it as // an initial tab length, e.g. bsCount=21 applied to string `\t123` // means first tab should be expanded to 4-21%4 === 3 spaces. - + this.bsCount = []; // block parser variables this.blkIndent = 0; @@ -5543,7 +5543,7 @@ // don't count last fake line } // Push new token to "stream". - + StateBlock.prototype.push = function(type, tag, nesting) { var token$1 = new token(type, tag, nesting); token$1.block = true; @@ -5655,7 +5655,7 @@ // re-export Token class to use in block rules StateBlock.prototype.Token = token; var state_block = StateBlock; - var _rules$1 = [ + var _rules$1 = [ // First 2 params - rule name & source. Secondary array - list of rules, // which can be terminated by this one. [ "table", table, [ "paragraph", "reference" ] ], [ "code", code ], [ "fence", fence, [ "paragraph", "reference", "blockquote", "list" ] ], [ "blockquote", blockquote, [ "paragraph", "reference", "blockquote", "list" ] ], [ "hr", hr, [ "paragraph", "reference", "blockquote", "list" ] ], [ "list", list, [ "paragraph", "reference", "blockquote" ] ], [ "reference", reference ], [ "html_block", html_block, [ "paragraph", "reference", "blockquote" ] ], [ "heading", heading, [ "paragraph", "reference", "blockquote" ] ], [ "lheading", lheading ], [ "paragraph", paragraph ] ]; @@ -5675,7 +5675,7 @@ } } // Generate tokens for input range - + ParserBlock.prototype.tokenize = function(state, startLine, endLine) { var ok, i, rules = this.ruler.getRules(""), len = rules.length, line = startLine, hasEmptyLines = false, maxNesting = state.md.options.maxNesting; while (line < endLine) { @@ -5696,7 +5696,7 @@ } // Try all possible rules. // On success, rule should: - + // - update `state.line` // - update `state.tokens` // - return true @@ -5961,7 +5961,7 @@ }; // ~~strike through~~ // Insert each marker as a separate text token, and add it to delimiter list - + var tokenize$1 = function strikethrough(state, silent) { var i, scanned, token, len, ch, start = state.pos, marker = state.src.charCodeAt(start); if (silent) { @@ -6027,9 +6027,9 @@ // If a marker sequence has an odd number of characters, it's splitted // like this: `~~~~~` -> `~` + `~~` + `~~`, leaving one marker at the // start of the sequence. - + // So, we have to move all those markers after subsequent s_close tags. - + while (loneMarkers.length) { i = loneMarkers.pop(); j = i + 1; @@ -6045,7 +6045,7 @@ } } // Walk through delimiter list and replace text tokens with tags - + var postProcess_1$1 = function strikethrough(state) { var curr, tokens_meta = state.tokens_meta, max = state.tokens_meta.length; postProcess$1(state, state.delimiters); @@ -6061,7 +6061,7 @@ }; // Process *this* and _that_ // Insert each marker as a separate text token, and add it to delimiter list - + var tokenize = function emphasis(state, silent) { var i, scanned, token, start = state.pos, marker = state.src.charCodeAt(start); if (silent) { @@ -6107,12 +6107,12 @@ endDelim = delimiters[startDelim.end]; // If the previous delimiter has the same marker and is adjacent to this one, // merge those into one strong delimiter. - + // `whatever` -> `whatever` - - isStrong = i > 0 && delimiters[i - 1].end === startDelim.end + 1 && + + isStrong = i > 0 && delimiters[i - 1].end === startDelim.end + 1 && // check that first two markers match and adjacent - delimiters[i - 1].marker === startDelim.marker && delimiters[i - 1].token === startDelim.token - 1 && + delimiters[i - 1].marker === startDelim.marker && delimiters[i - 1].token === startDelim.token - 1 && // check that last two markers are adjacent (we can safely assume they match) delimiters[startDelim.end + 1].token === endDelim.token + 1; ch = String.fromCharCode(startDelim.marker); @@ -6136,7 +6136,7 @@ } } // Walk through delimiter list and replace text tokens with tags - + var postProcess_1 = function emphasis(state) { var curr, tokens_meta = state.tokens_meta, max = state.tokens_meta.length; postProcess(state, state.delimiters); @@ -6251,10 +6251,10 @@ href = ref.href; title = ref.title; } - + // We found the end of the link, and know for a fact it's a valid link; // so all that's left to do is to call tokenizer. - + if (!silent) { state.pos = labelStart; state.posMax = labelEnd; @@ -6375,10 +6375,10 @@ href = ref.href; title = ref.title; } - + // We found the end of the link, and know for a fact it's a valid link; // so all that's left to do is to call tokenizer. - + if (!silent) { content = state.src.slice(labelStart, labelEnd); state.md.inline.parse(content, state.md, state.env, tokens = []); @@ -6547,7 +6547,7 @@ // markers belong to same delimiter run if: // - they have adjacent tokens // - AND markers are the same - + if (delimiters[headerIdx].marker !== closer.marker || lastTokenIdx !== closer.token - 1) { headerIdx = closerIdx; } @@ -6555,7 +6555,7 @@ // Length is only used for emphasis-specific "rule of 3", // if it's not defined (in strikethrough or 3rd party plugins), // we can default it to 0 to disable those checks. - + closer.length = closer.length || 0; if (!closer.close) continue; // Previously calculated lower bounds (previous fails) @@ -6574,12 +6574,12 @@ if (opener.open && opener.end < 0) { isOddMatch = false; // from spec: - + // If one of the delimiters can both open and close emphasis, then the // sum of the lengths of the delimiter runs containing the opening and // closing delimiters must not be a multiple of 3 unless both lengths // are multiples of 3. - + if (opener.close || closer.open) { if ((opener.length + closer.length) % 3 === 0) { if (opener.length % 3 !== 0 || closer.length % 3 !== 0) { @@ -6678,7 +6678,7 @@ this.linkLevel = 0; } // Flush pending text - + StateInline.prototype.pushPending = function() { var token$1 = new token("text", "", 0); token$1.content = this.pending; @@ -6689,7 +6689,7 @@ }; // Push new token to "stream". // If pending text exists - flush it as text token - + StateInline.prototype.push = function(type, tag, nesting) { if (this.pending) { this.pushPending(); @@ -6718,10 +6718,10 @@ }; // Scan a sequence of emphasis-like markers, and determine whether // it can start an emphasis sequence or end an emphasis sequence. - + // - start - position to scan from (it should point at a valid marker); // - canSplitWord - determine if these markers can be found inside a word - + StateInline.prototype.scanDelims = function(start, canSplitWord) { var pos = start, lastChar, nextChar, count, can_open, can_close, isLastWhiteSpace, isLastPunctChar, isNextWhiteSpace, isNextPunctChar, left_flanking = true, right_flanking = true, max = this.posMax, marker = this.src.charCodeAt(start); // treat beginning of the line as a whitespace @@ -6771,10 +6771,10 @@ var _rules = [ [ "text", text ], [ "linkify", linkify ], [ "newline", newline ], [ "escape", _escape ], [ "backticks", backticks ], [ "strikethrough", strikethrough.tokenize ], [ "emphasis", emphasis.tokenize ], [ "link", link ], [ "image", image ], [ "autolink", autolink ], [ "html_inline", html_inline ], [ "entity", entity ] ]; // `rule2` ruleset was created specifically for emphasis/strikethrough // post-processing and may be changed in the future. - + // Don't use this for anything except pairs (plugins working with `balance_pairs`). - - var _rules2 = [ [ "balance_pairs", balance_pairs ], [ "strikethrough", strikethrough.postProcess ], [ "emphasis", emphasis.postProcess ], + + var _rules2 = [ [ "balance_pairs", balance_pairs ], [ "strikethrough", strikethrough.postProcess ], [ "emphasis", emphasis.postProcess ], // rules for pairs separate '**' into its own text tokens, which may be left unused, // rule below merges unused segments back with the rest of the text [ "fragments_join", fragments_join ] ]; @@ -6802,7 +6802,7 @@ } // Skip single token by running all rules in validation mode; // returns `true` if any rule reported success - + ParserInline.prototype.skipToken = function(state) { var ok, i, pos = state.pos, rules = this.ruler.getRules(""), len = rules.length, maxNesting = state.md.options.maxNesting, cache = state.cache; if (typeof cache[pos] !== "undefined") { @@ -6837,7 +6837,7 @@ cache[pos] = state.pos; }; // Generate tokens for input range - + ParserInline.prototype.tokenize = function(state) { var ok, i, rules = this.ruler.getRules(""), len = rules.length, end = state.posMax, maxNesting = state.md.options.maxNesting; while (state.pos < end) { @@ -6928,11 +6928,11 @@ re.src_xn = "xn--[a-z0-9\\-]{1,59}"; // More to read about domain names // http://serverfault.com/questions/638260/ - re.src_domain_root = + re.src_domain_root = // Allow letters & digits (http://test1) "(?:" + re.src_xn + "|" + re.src_pseudo_letter + "{1,63}" + ")"; re.src_domain = "(?:" + re.src_xn + "|" + "(?:" + re.src_pseudo_letter + ")" + "|" + "(?:" + re.src_pseudo_letter + "(?:-|" + re.src_pseudo_letter + "){0,61}" + re.src_pseudo_letter + ")" + ")"; - re.src_host = "(?:" + + re.src_host = "(?:" + // Don't need IP check, because digits are already allowed in normal domain names // src_ip4 + // '|' + @@ -6949,11 +6949,11 @@ // Rude test fuzzy links by host, for quick deny re.tpl_host_fuzzy_test = "localhost|www\\.|\\.\\d{1,3}\\.|(?:\\.(?:%TLDS%)(?:" + re.src_ZPCc + "|>|$))"; re.tpl_email_fuzzy = "(^|" + text_separators + '|"|\\(|' + re.src_ZCc + ")" + "(" + re.src_email_name + "@" + re.tpl_host_fuzzy_strict + ")"; - re.tpl_link_fuzzy = + re.tpl_link_fuzzy = // Fuzzy link can't be prepended with .:/\- and non punctuation. // but can start with > (markdown blockquote) "(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|" + re.src_ZPCc + "))" + "((?![$+<=>^`|\uff5c])" + re.tpl_host_port_fuzzy_strict + re.src_path + ")"; - re.tpl_link_no_ip_fuzzy = + re.tpl_link_no_ip_fuzzy = // Fuzzy link can't be prepended with .:/\- and non punctuation. // but can start with > (markdown blockquote) "(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|" + re.src_ZPCc + "))" + "((?![$+<=>^`|\uff5c])" + re.tpl_host_port_no_ip_fuzzy_strict + re.src_path + ")"; @@ -6962,7 +6962,7 @@ //////////////////////////////////////////////////////////////////////////////// // Helpers // Merge objects - + function assign(obj /*from1, from2, from3, ...*/) { var sources = Array.prototype.slice.call(arguments, 1); sources.forEach((function(source) { @@ -7025,7 +7025,7 @@ var tail = text.slice(pos); if (!self.re.no_http) { // compile lazily, because "host"-containing variables can change on tlds update. - self.re.no_http = new RegExp("^" + self.re.src_auth + + self.re.no_http = new RegExp("^" + self.re.src_auth + // Don't allow single-level domains, because of false positives like '//test' // with code comments "(?:localhost|(?:(?:" + self.re.src_domain + ")\\.)+" + self.re.src_domain_root + ")" + self.re.src_port + self.re.src_host_terminator + self.re.src_path, "i"); @@ -7082,7 +7082,7 @@ }; } // Schemas compiler. Build regexps. - + function compile(self) { // Load & clone RE patterns. var re$1 = self.re = re(self.__opts__); @@ -7101,9 +7101,9 @@ re$1.link_fuzzy = RegExp(untpl(re$1.tpl_link_fuzzy), "i"); re$1.link_no_ip_fuzzy = RegExp(untpl(re$1.tpl_link_no_ip_fuzzy), "i"); re$1.host_fuzzy_test = RegExp(untpl(re$1.tpl_host_fuzzy_test), "i"); - + // Compile each schema - + var aliases = []; self.__compiled__ = {}; // Reset compiled data @@ -7144,9 +7144,9 @@ } schemaError(name, val); })); - + // Compile postponed aliases - + aliases.forEach((function(alias) { if (!self.__compiled__[self.__schemas__[alias]]) { // Silently fail on missed schemas to avoid errons on disable. @@ -7156,16 +7156,16 @@ self.__compiled__[alias].validate = self.__compiled__[self.__schemas__[alias]].validate; self.__compiled__[alias].normalize = self.__compiled__[self.__schemas__[alias]].normalize; })); - + // Fake record for guessed links - + self.__compiled__[""] = { validate: null, normalize: createNormalizer() }; - + // Build schema condition - + var slist = Object.keys(self.__compiled__).filter((function(name) { // Filter disabled & fake schemas return name.length > 0 && self.__compiled__[name]; @@ -7175,9 +7175,9 @@ self.re.schema_search = RegExp("(^|(?!_)(?:[><\uff5c]|" + re$1.src_ZPCc + "))(" + slist + ")", "ig"); self.re.schema_at_start = RegExp("^" + self.re.schema_search.source, "i"); self.re.pretest = RegExp("(" + self.re.schema_test.source + ")|(" + self.re.host_fuzzy_test.source + ")|@", "i"); - + // Cleanup - + resetScanCache(self); } /** @@ -7673,7 +7673,7 @@ * @returns {String} The resulting string of Unicode symbols. */ function decode(input) { // Don't use UCS-2 - var output = [], inputLength = input.length, out, i = 0, n = initialN, bias = initialBias, basic, j, index, oldi, w, k, digit, t, + var output = [], inputLength = input.length, out, i = 0, n = initialN, bias = initialBias, basic, j, index, oldi, w, k, digit, t, /** Cached calculation results */ baseMinusT; // Handle the basic code points: let `basic` be the number of input code @@ -7738,9 +7738,9 @@ * @param {String} input The string of Unicode symbols. * @returns {String} The resulting Punycode string of ASCII-only symbols. */ function encode(input) { - var n, delta, handledCPCount, basicLength, bias, j, m, q, k, t, currentValue, output = [], + var n, delta, handledCPCount, basicLength, bias, j, m, q, k, t, currentValue, output = [], /** `inputLength` will hold the number of code points in `input`. */ - inputLength, + inputLength, /** Cached calculation results */ handledCPCountPlusOne, baseMinusT, qMinusT; // Convert the input in UCS-2 to Unicode @@ -7993,13 +7993,13 @@ commonmark: commonmark }; //////////////////////////////////////////////////////////////////////////////// - + // This validator can prohibit more than really needed to prevent XSS. It's a // tradeoff to keep code simple and to be secure by default. - + // If you need different setup - override validator method as you wish. Or // replace it with dummy function and use external sanitizer. - + var BAD_PROTO_RE = /^(vbscript|javascript|file|data):/; var GOOD_DATA_RE = /^data:image\/(gif|png|jpeg|webp);/; function validateLink(url) { diff --git a/tests/test_orgnode.py b/tests/test_orgnode.py index 53dee212..bda733c8 100644 --- a/tests/test_orgnode.py +++ b/tests/test_orgnode.py @@ -220,7 +220,7 @@ Body 2 def test_parse_entry_with_empty_title(tmp_path): "Test parsing of entry with minimal fields" # Arrange - entry = f"""#+TITLE: + entry = f"""#+TITLE: Body Line 1""" orgfile = create_file(tmp_path, entry) @@ -266,7 +266,7 @@ Body Line 1""" def test_parse_entry_with_multiple_titles_and_no_headings(tmp_path): "Test parsing of entry with minimal fields" # Arrange - entry = f"""#+TITLE: title1 + entry = f"""#+TITLE: title1 Body Line 1 #+TITLE: title2 """ orgfile = create_file(tmp_path, entry)