From c56d3b15582840da2b579f788000183b83be79ba Mon Sep 17 00:00:00 2001 From: Timothy Carambat <rambat1010@gmail.com> Date: Mon, 27 Jan 2025 13:35:58 -0800 Subject: [PATCH] breakout latex plugin for delims (#3040) * Breakout LaTeX plugin for modification * backport regular markdown link --- frontend/package.json | 2 +- frontend/src/utils/chat/markdown.js | 4 +- .../src/utils/chat/plugins/markdown-katex.js | 277 ++++++++++++++++++ frontend/yarn.lock | 7 - 4 files changed, 280 insertions(+), 10 deletions(-) create mode 100644 frontend/src/utils/chat/plugins/markdown-katex.js diff --git a/frontend/package.json b/frontend/package.json index 524f90fa6..2544fbd5e 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -24,7 +24,7 @@ "js-levenshtein": "^1.1.6", "lodash.debounce": "^4.0.8", "markdown-it": "^13.0.1", - "markdown-it-katex": "^2.0.3", + "katex": "^0.6.0", "moment": "^2.30.1", "onnxruntime-web": "^1.18.0", "pluralize": "^8.0.0", diff --git a/frontend/src/utils/chat/markdown.js b/frontend/src/utils/chat/markdown.js index 3ec5e4e34..5a8e83717 100644 --- a/frontend/src/utils/chat/markdown.js +++ b/frontend/src/utils/chat/markdown.js @@ -1,6 +1,6 @@ import { encode as HTMLEncode } from "he"; import markdownIt from "markdown-it"; -import markdownItKatex from "markdown-it-katex"; +import markdownItKatexPlugin from "./plugins/markdown-katex"; import hljs from "highlight.js"; import "./themes/github-dark.css"; import "./themes/github.css"; @@ -66,7 +66,7 @@ markdown.renderer.rules.image = function (tokens, idx) { return `<div class="w-full max-w-[800px]"><img src="${src}" alt="${alt}" class="w-full h-auto" /></div>`; }; -markdown.use(markdownItKatex); +markdown.use(markdownItKatexPlugin); export default function renderMarkdown(text = "") { return markdown.render(text); diff --git a/frontend/src/utils/chat/plugins/markdown-katex.js b/frontend/src/utils/chat/plugins/markdown-katex.js new file mode 100644 index 000000000..1a590295c --- /dev/null +++ b/frontend/src/utils/chat/plugins/markdown-katex.js @@ -0,0 +1,277 @@ +import katex from "katex"; + +// Test if potential opening or closing delimieter +// Assumes that there is a "$" at state.src[pos] +function isValidDelim(state, pos) { + var prevChar, + nextChar, + max = state.posMax, + can_open = true, + can_close = true; + + prevChar = pos > 0 ? state.src.charCodeAt(pos - 1) : -1; + nextChar = pos + 1 <= max ? state.src.charCodeAt(pos + 1) : -1; + + // Only apply whitespace rules if we're dealing with $ delimiter + if (state.src[pos] === "$") { + if ( + prevChar === 0x20 /* " " */ || + prevChar === 0x09 /* \t */ || + (nextChar >= 0x30 /* "0" */ && nextChar <= 0x39) /* "9" */ + ) { + can_close = false; + } + if (nextChar === 0x20 /* " " */ || nextChar === 0x09 /* \t */) { + can_open = false; + } + } + + return { + can_open: can_open, + can_close: can_close, + }; +} + +function math_inline(state, silent) { + var start, match, token, res, pos, esc_count; + + // Only process $ and \( delimiters for inline math + if ( + state.src[state.pos] !== "$" && + (state.src[state.pos] !== "\\" || state.src[state.pos + 1] !== "(") + ) { + return false; + } + + // Handle \( ... \) case separately + if (state.src[state.pos] === "\\" && state.src[state.pos + 1] === "(") { + start = state.pos + 2; + match = start; + while ((match = state.src.indexOf("\\)", match)) !== -1) { + pos = match - 1; + while (state.src[pos] === "\\") { + pos -= 1; + } + if ((match - pos) % 2 == 1) { + break; + } + match += 1; + } + + if (match === -1) { + if (!silent) { + state.pending += "\\("; + } + state.pos = start; + return true; + } + + if (!silent) { + token = state.push("math_inline", "math", 0); + token.markup = "\\("; + token.content = state.src.slice(start, match); + } + + state.pos = match + 2; + return true; + } + + res = isValidDelim(state, state.pos); + if (!res.can_open) { + if (!silent) { + state.pending += "$"; + } + state.pos += 1; + return true; + } + + // First check for and bypass all properly escaped delimieters + // This loop will assume that the first leading backtick can not + // be the first character in state.src, which is known since + // we have found an opening delimieter already. + start = state.pos + 1; + match = start; + while ((match = state.src.indexOf("$", match)) !== -1) { + // Found potential $, look for escapes, pos will point to + // first non escape when complete + pos = match - 1; + while (state.src[pos] === "\\") { + pos -= 1; + } + + // Even number of escapes, potential closing delimiter found + if ((match - pos) % 2 == 1) { + break; + } + match += 1; + } + + // No closing delimter found. Consume $ and continue. + if (match === -1) { + if (!silent) { + state.pending += "$"; + } + state.pos = start; + return true; + } + + // Check if we have empty content, ie: $$. Do not parse. + if (match - start === 0) { + if (!silent) { + state.pending += "$$"; + } + state.pos = start + 1; + return true; + } + + // Check for valid closing delimiter + res = isValidDelim(state, match); + if (!res.can_close) { + if (!silent) { + state.pending += "$"; + } + state.pos = start; + return true; + } + + if (!silent) { + token = state.push("math_inline", "math", 0); + token.markup = "$"; + token.content = state.src.slice(start, match); + } + + state.pos = match + 1; + return true; +} + +function math_block(state, start, end, silent) { + var firstLine, + lastLine, + next, + lastPos, + found = false, + token, + pos = state.bMarks[start] + state.tShift[start], + max = state.eMarks[start]; + + // Check for $$, \[, or standalone [ as opening delimiters + if (pos + 1 > max) { + return false; + } + + let openDelim = state.src.slice(pos, pos + 2); + let isDoubleDollar = openDelim === "$$"; + let isLatexBracket = openDelim === "\\["; + + if (!isDoubleDollar && !isLatexBracket) { + return false; + } + + // Determine the closing delimiter and position adjustment + let delimiter, posAdjust; + if (isDoubleDollar) { + delimiter = "$$"; + posAdjust = 2; + } else if (isLatexBracket) { + delimiter = "\\]"; + posAdjust = 2; + } + + pos += posAdjust; + firstLine = state.src.slice(pos, max); + + if (silent) { + return true; + } + if (firstLine.trim().slice(-delimiter.length) === delimiter) { + // Single line expression + firstLine = firstLine.trim().slice(0, -delimiter.length); + found = true; + } + + for (next = start; !found; ) { + next++; + + if (next >= end) { + break; + } + + pos = state.bMarks[next] + state.tShift[next]; + max = state.eMarks[next]; + + if (pos < max && state.tShift[next] < state.blkIndent) { + // non-empty line with negative indent should stop the list: + break; + } + + if ( + state.src.slice(pos, max).trim().slice(-delimiter.length) === delimiter + ) { + lastPos = state.src.slice(0, max).lastIndexOf(delimiter); + lastLine = state.src.slice(pos, lastPos); + found = true; + } + } + + state.line = next + 1; + + token = state.push("math_block", "math", 0); + token.block = true; + token.content = + (firstLine && firstLine.trim() ? firstLine + "\n" : "") + + state.getLines(start + 1, next, state.tShift[start], true) + + (lastLine && lastLine.trim() ? lastLine : ""); + token.map = [start, state.line]; + token.markup = delimiter; + return true; +} + +export default function math_plugin(md, options) { + // Default options + options = options || {}; + + var katexInline = function (latex) { + options.displayMode = false; + try { + latex = latex + .replace(/^\[(.*)\]$/, "$1") + .replace(/^\\\((.*)\\\)$/, "$1") + .replace(/^\\\[(.*)\\\]$/, "$1"); + return katex.renderToString(latex, options); + } catch (error) { + if (options.throwOnError) { + console.log(error); + } + return latex; + } + }; + + var inlineRenderer = function (tokens, idx) { + return katexInline(tokens[idx].content); + }; + + var katexBlock = function (latex) { + options.displayMode = true; + try { + // Remove surrounding delimiters if present + latex = latex.replace(/^\[(.*)\]$/, "$1").replace(/^\\\[(.*)\\\]$/, "$1"); + return "<p>" + katex.renderToString(latex, options) + "</p>"; + } catch (error) { + if (options.throwOnError) { + console.log(error); + } + return latex; + } + }; + + var blockRenderer = function (tokens, idx) { + return katexBlock(tokens[idx].content) + "\n"; + }; + + md.inline.ruler.after("escape", "math_inline", math_inline); + md.block.ruler.after("blockquote", "math_block", math_block, { + alt: ["paragraph", "reference", "blockquote", "list"], + }); + md.renderer.rules.math_inline = inlineRenderer; + md.renderer.rules.math_block = blockRenderer; +} diff --git a/frontend/yarn.lock b/frontend/yarn.lock index 475910ac4..a5ef8ba07 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -2483,13 +2483,6 @@ lru-cache@^5.1.1: dependencies: yallist "^3.0.2" -markdown-it-katex@^2.0.3: - version "2.0.3" - resolved "https://registry.yarnpkg.com/markdown-it-katex/-/markdown-it-katex-2.0.3.tgz#d7b86a1aea0b9d6496fab4e7919a18fdef589c39" - integrity sha512-nUkkMtRWeg7OpdflamflE/Ho/pWl64Lk9wNBKOmaj33XkQdumhXAIYhI0WO03GeiycPCsxbmX536V5NEXpC3Ng== - dependencies: - katex "^0.6.0" - markdown-it@^13.0.1: version "13.0.2" resolved "https://registry.yarnpkg.com/markdown-it/-/markdown-it-13.0.2.tgz#1bc22e23379a6952e5d56217fbed881e0c94d536"