From c56d3b15582840da2b579f788000183b83be79ba Mon Sep 17 00:00:00 2001
From: Timothy Carambat <rambat1010@gmail.com>
Date: Mon, 27 Jan 2025 13:35:58 -0800
Subject: [PATCH] breakout latex plugin for delims (#3040)

* Breakout LaTeX plugin for modification

* backport regular markdown link
---
 frontend/package.json                         |   2 +-
 frontend/src/utils/chat/markdown.js           |   4 +-
 .../src/utils/chat/plugins/markdown-katex.js  | 277 ++++++++++++++++++
 frontend/yarn.lock                            |   7 -
 4 files changed, 280 insertions(+), 10 deletions(-)
 create mode 100644 frontend/src/utils/chat/plugins/markdown-katex.js

diff --git a/frontend/package.json b/frontend/package.json
index 524f90fa6..2544fbd5e 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -24,7 +24,7 @@
     "js-levenshtein": "^1.1.6",
     "lodash.debounce": "^4.0.8",
     "markdown-it": "^13.0.1",
-    "markdown-it-katex": "^2.0.3",
+    "katex": "^0.6.0",
     "moment": "^2.30.1",
     "onnxruntime-web": "^1.18.0",
     "pluralize": "^8.0.0",
diff --git a/frontend/src/utils/chat/markdown.js b/frontend/src/utils/chat/markdown.js
index 3ec5e4e34..5a8e83717 100644
--- a/frontend/src/utils/chat/markdown.js
+++ b/frontend/src/utils/chat/markdown.js
@@ -1,6 +1,6 @@
 import { encode as HTMLEncode } from "he";
 import markdownIt from "markdown-it";
-import markdownItKatex from "markdown-it-katex";
+import markdownItKatexPlugin from "./plugins/markdown-katex";
 import hljs from "highlight.js";
 import "./themes/github-dark.css";
 import "./themes/github.css";
@@ -66,7 +66,7 @@ markdown.renderer.rules.image = function (tokens, idx) {
   return `<div class="w-full max-w-[800px]"><img src="${src}" alt="${alt}" class="w-full h-auto" /></div>`;
 };
 
-markdown.use(markdownItKatex);
+markdown.use(markdownItKatexPlugin);
 
 export default function renderMarkdown(text = "") {
   return markdown.render(text);
diff --git a/frontend/src/utils/chat/plugins/markdown-katex.js b/frontend/src/utils/chat/plugins/markdown-katex.js
new file mode 100644
index 000000000..1a590295c
--- /dev/null
+++ b/frontend/src/utils/chat/plugins/markdown-katex.js
@@ -0,0 +1,277 @@
+import katex from "katex";
+
+// Test if potential opening or closing delimieter
+// Assumes that there is a "$" at state.src[pos]
+function isValidDelim(state, pos) {
+  var prevChar,
+    nextChar,
+    max = state.posMax,
+    can_open = true,
+    can_close = true;
+
+  prevChar = pos > 0 ? state.src.charCodeAt(pos - 1) : -1;
+  nextChar = pos + 1 <= max ? state.src.charCodeAt(pos + 1) : -1;
+
+  // Only apply whitespace rules if we're dealing with $ delimiter
+  if (state.src[pos] === "$") {
+    if (
+      prevChar === 0x20 /* " " */ ||
+      prevChar === 0x09 /* \t */ ||
+      (nextChar >= 0x30 /* "0" */ && nextChar <= 0x39) /* "9" */
+    ) {
+      can_close = false;
+    }
+    if (nextChar === 0x20 /* " " */ || nextChar === 0x09 /* \t */) {
+      can_open = false;
+    }
+  }
+
+  return {
+    can_open: can_open,
+    can_close: can_close,
+  };
+}
+
+function math_inline(state, silent) {
+  var start, match, token, res, pos, esc_count;
+
+  // Only process $ and \( delimiters for inline math
+  if (
+    state.src[state.pos] !== "$" &&
+    (state.src[state.pos] !== "\\" || state.src[state.pos + 1] !== "(")
+  ) {
+    return false;
+  }
+
+  // Handle \( ... \) case separately
+  if (state.src[state.pos] === "\\" && state.src[state.pos + 1] === "(") {
+    start = state.pos + 2;
+    match = start;
+    while ((match = state.src.indexOf("\\)", match)) !== -1) {
+      pos = match - 1;
+      while (state.src[pos] === "\\") {
+        pos -= 1;
+      }
+      if ((match - pos) % 2 == 1) {
+        break;
+      }
+      match += 1;
+    }
+
+    if (match === -1) {
+      if (!silent) {
+        state.pending += "\\(";
+      }
+      state.pos = start;
+      return true;
+    }
+
+    if (!silent) {
+      token = state.push("math_inline", "math", 0);
+      token.markup = "\\(";
+      token.content = state.src.slice(start, match);
+    }
+
+    state.pos = match + 2;
+    return true;
+  }
+
+  res = isValidDelim(state, state.pos);
+  if (!res.can_open) {
+    if (!silent) {
+      state.pending += "$";
+    }
+    state.pos += 1;
+    return true;
+  }
+
+  // First check for and bypass all properly escaped delimieters
+  // This loop will assume that the first leading backtick can not
+  // be the first character in state.src, which is known since
+  // we have found an opening delimieter already.
+  start = state.pos + 1;
+  match = start;
+  while ((match = state.src.indexOf("$", match)) !== -1) {
+    // Found potential $, look for escapes, pos will point to
+    // first non escape when complete
+    pos = match - 1;
+    while (state.src[pos] === "\\") {
+      pos -= 1;
+    }
+
+    // Even number of escapes, potential closing delimiter found
+    if ((match - pos) % 2 == 1) {
+      break;
+    }
+    match += 1;
+  }
+
+  // No closing delimter found.  Consume $ and continue.
+  if (match === -1) {
+    if (!silent) {
+      state.pending += "$";
+    }
+    state.pos = start;
+    return true;
+  }
+
+  // Check if we have empty content, ie: $$.  Do not parse.
+  if (match - start === 0) {
+    if (!silent) {
+      state.pending += "$$";
+    }
+    state.pos = start + 1;
+    return true;
+  }
+
+  // Check for valid closing delimiter
+  res = isValidDelim(state, match);
+  if (!res.can_close) {
+    if (!silent) {
+      state.pending += "$";
+    }
+    state.pos = start;
+    return true;
+  }
+
+  if (!silent) {
+    token = state.push("math_inline", "math", 0);
+    token.markup = "$";
+    token.content = state.src.slice(start, match);
+  }
+
+  state.pos = match + 1;
+  return true;
+}
+
+function math_block(state, start, end, silent) {
+  var firstLine,
+    lastLine,
+    next,
+    lastPos,
+    found = false,
+    token,
+    pos = state.bMarks[start] + state.tShift[start],
+    max = state.eMarks[start];
+
+  // Check for $$, \[, or standalone [ as opening delimiters
+  if (pos + 1 > max) {
+    return false;
+  }
+
+  let openDelim = state.src.slice(pos, pos + 2);
+  let isDoubleDollar = openDelim === "$$";
+  let isLatexBracket = openDelim === "\\[";
+
+  if (!isDoubleDollar && !isLatexBracket) {
+    return false;
+  }
+
+  // Determine the closing delimiter and position adjustment
+  let delimiter, posAdjust;
+  if (isDoubleDollar) {
+    delimiter = "$$";
+    posAdjust = 2;
+  } else if (isLatexBracket) {
+    delimiter = "\\]";
+    posAdjust = 2;
+  }
+
+  pos += posAdjust;
+  firstLine = state.src.slice(pos, max);
+
+  if (silent) {
+    return true;
+  }
+  if (firstLine.trim().slice(-delimiter.length) === delimiter) {
+    // Single line expression
+    firstLine = firstLine.trim().slice(0, -delimiter.length);
+    found = true;
+  }
+
+  for (next = start; !found; ) {
+    next++;
+
+    if (next >= end) {
+      break;
+    }
+
+    pos = state.bMarks[next] + state.tShift[next];
+    max = state.eMarks[next];
+
+    if (pos < max && state.tShift[next] < state.blkIndent) {
+      // non-empty line with negative indent should stop the list:
+      break;
+    }
+
+    if (
+      state.src.slice(pos, max).trim().slice(-delimiter.length) === delimiter
+    ) {
+      lastPos = state.src.slice(0, max).lastIndexOf(delimiter);
+      lastLine = state.src.slice(pos, lastPos);
+      found = true;
+    }
+  }
+
+  state.line = next + 1;
+
+  token = state.push("math_block", "math", 0);
+  token.block = true;
+  token.content =
+    (firstLine && firstLine.trim() ? firstLine + "\n" : "") +
+    state.getLines(start + 1, next, state.tShift[start], true) +
+    (lastLine && lastLine.trim() ? lastLine : "");
+  token.map = [start, state.line];
+  token.markup = delimiter;
+  return true;
+}
+
+export default function math_plugin(md, options) {
+  // Default options
+  options = options || {};
+
+  var katexInline = function (latex) {
+    options.displayMode = false;
+    try {
+      latex = latex
+        .replace(/^\[(.*)\]$/, "$1")
+        .replace(/^\\\((.*)\\\)$/, "$1")
+        .replace(/^\\\[(.*)\\\]$/, "$1");
+      return katex.renderToString(latex, options);
+    } catch (error) {
+      if (options.throwOnError) {
+        console.log(error);
+      }
+      return latex;
+    }
+  };
+
+  var inlineRenderer = function (tokens, idx) {
+    return katexInline(tokens[idx].content);
+  };
+
+  var katexBlock = function (latex) {
+    options.displayMode = true;
+    try {
+      // Remove surrounding delimiters if present
+      latex = latex.replace(/^\[(.*)\]$/, "$1").replace(/^\\\[(.*)\\\]$/, "$1");
+      return "<p>" + katex.renderToString(latex, options) + "</p>";
+    } catch (error) {
+      if (options.throwOnError) {
+        console.log(error);
+      }
+      return latex;
+    }
+  };
+
+  var blockRenderer = function (tokens, idx) {
+    return katexBlock(tokens[idx].content) + "\n";
+  };
+
+  md.inline.ruler.after("escape", "math_inline", math_inline);
+  md.block.ruler.after("blockquote", "math_block", math_block, {
+    alt: ["paragraph", "reference", "blockquote", "list"],
+  });
+  md.renderer.rules.math_inline = inlineRenderer;
+  md.renderer.rules.math_block = blockRenderer;
+}
diff --git a/frontend/yarn.lock b/frontend/yarn.lock
index 475910ac4..a5ef8ba07 100644
--- a/frontend/yarn.lock
+++ b/frontend/yarn.lock
@@ -2483,13 +2483,6 @@ lru-cache@^5.1.1:
   dependencies:
     yallist "^3.0.2"
 
-markdown-it-katex@^2.0.3:
-  version "2.0.3"
-  resolved "https://registry.yarnpkg.com/markdown-it-katex/-/markdown-it-katex-2.0.3.tgz#d7b86a1aea0b9d6496fab4e7919a18fdef589c39"
-  integrity sha512-nUkkMtRWeg7OpdflamflE/Ho/pWl64Lk9wNBKOmaj33XkQdumhXAIYhI0WO03GeiycPCsxbmX536V5NEXpC3Ng==
-  dependencies:
-    katex "^0.6.0"
-
 markdown-it@^13.0.1:
   version "13.0.2"
   resolved "https://registry.yarnpkg.com/markdown-it/-/markdown-it-13.0.2.tgz#1bc22e23379a6952e5d56217fbed881e0c94d536"