fix(user-bubble): stash code fences before math to keep code-blocks literal

PR #1854 added a math stash to _renderUserFencedBlocks so backslash LaTeX
delimiters (\[..\], \(..\)) survive esc() and reach the KaTeX renderer in
user bubbles. The stash ran BEFORE the existing code-fence stash, so a
user-typed code block containing LaTeX-like syntax was extracted as
KaTeX and rendered as math inside <pre><code>:

    ```
    \[ a + b \] is wrong
    ```
  → <pre><code><div class="katex-block"> a + b </div> is wrong</code></pre>

renderMd() (assistant path) handles this correctly by running fence_stash
before math_stash. The user-bubble path got the order inverted. Fix:
stash code fences first, then run the math regexes on the
outside-of-fence text only. Both top-level math and code-fenced literals
now render correctly:

  - "math: \[ x + y \]"           → KaTeX block
  - "```\n\[ a + b \]\n```"       → literal <pre><code>\[ a + b \]</code></pre>

Adds two regression tests:
  - test_user_code_block_with_latex_syntax_renders_as_literal_code
    (fails pre-fix, asserts no KaTeX wrappers inside <pre><code>)
  - test_user_bubble_top_level_latex_still_renders_after_fence_reorder
    (sibling guard against over-correcting and disabling math entirely)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Nathan Esquenazi
2026-05-07 14:03:04 -07:00
parent 4c51521c89
commit d703959b74
2 changed files with 45 additions and 6 deletions
+10 -6
View File
@@ -81,12 +81,10 @@ function _renderUserFencedBlocks(text){
return `<span class="katex-inline" data-katex="inline">${esc(item.src)}</span>`;
});
let s=String(text||'');
// Stash math before escaping plain text; display delimiters must run before inline.
s=s.replace(/\$\$([\s\S]+?)\$\$/g,(_,m)=>stashMath('display',m));
s=s.replace(/\\\[([\s\S]+?)\\\]/g,(_,m)=>stashMath('display',m));
s=s.replace(/\$([^\s$\n][^$\n]*?[^\s$\n]|\S)\$/g,(_,m)=>stashMath('inline',m));
s=s.replace(/\\\((.+?)\\\)/g,(_,m)=>stashMath('inline',m));
// Extract fenced code blocks → stash, replace with null-token placeholder
// Extract fenced code blocks FIRST so math regexes never run inside fenced
// content. If math were stashed first, a user-typed code block containing
// \[..\] / \(..\) / $$..$$ would be rendered as a KaTeX block inside
// <pre><code> instead of as literal source. Mirrors renderMd()'s ordering.
// CommonMark §4.5 line-anchored fence: the closing run must use at least
// as many backticks as the opener, so inner triple-backtick fences remain content.
s=s.replace(/(^|\n)[ ]{0,3}(`{3,})([^\n`]*)\n(?:([\s\S]*?)\n)?[ ]{0,3}\2`*[ \t]*(?=\n|$)/g,(_,lead,_fence,info,code)=>{
@@ -111,6 +109,12 @@ function _renderUserFencedBlocks(text){
}
return lead+'\x00UF'+(stash.length-1)+'\x00';
});
// Now stash math from the OUTSIDE-of-fence text. Display delimiters must
// run before inline so $$..$$ isn't mis-parsed as $..$..$..$.
s=s.replace(/\$\$([\s\S]+?)\$\$/g,(_,m)=>stashMath('display',m));
s=s.replace(/\\\[([\s\S]+?)\\\]/g,(_,m)=>stashMath('display',m));
s=s.replace(/\$([^\s$\n][^$\n]*?[^\s$\n]|\S)\$/g,(_,m)=>stashMath('inline',m));
s=s.replace(/\\\((.+?)\\\)/g,(_,m)=>stashMath('inline',m));
// Escape remaining plain text and convert newlines to <br>
s=esc(s).replace(/\n/g,'<br>');
// Restore stashed code blocks, then math placeholders as KaTeX targets.
+35
View File
@@ -115,6 +115,41 @@ where \\(L_i(f)\\) = SPL at angle \\(i\\)."""
assert "\\(" not in html and "\\)" not in html, html
def test_user_code_block_with_latex_syntax_renders_as_literal_code():
"""User-bubble code blocks containing \\[..\\] / \\(..\\) / $$..$$ must
render as literal code source, not as KaTeX. _renderUserFencedBlocks
must stash code fences BEFORE math (mirroring renderMd's ordering); if
math is stashed first, a user-typed code block containing LaTeX-like
syntax gets a `<div class="katex-block">` placeholder dropped INSIDE
`<pre><code>`, and the user's literal source is silently replaced by
rendered math.
"""
sample = "```\n\\[ a + b \\] is wrong\n\\(L_i\\) too\n$$matrix$$\n```"
rendered = _run_renderers(sample)
user_html = rendered["user"]
# The whole code block should remain literal, no KaTeX wrappers inside.
assert "<pre><code>" in user_html, user_html
assert "katex-block" not in user_html, user_html
assert "katex-inline" not in user_html, user_html
# Backslashes survive HTML escape unchanged; the user's source is intact.
assert "\\[ a + b \\]" in user_html, user_html
assert "\\(L_i\\)" in user_html, user_html
assert "$$matrix$$" in user_html, user_html
def test_user_bubble_top_level_latex_still_renders_after_fence_reorder():
"""Sibling regression: top-level math (outside any code fence) must
still render through KaTeX in user bubbles after the fence-first
reorder. Guards against an over-correction that disables user-bubble
math rendering entirely.
"""
sample = "math: \\[ x + y \\]\n\nand inline \\(L_i\\)"
rendered = _run_renderers(sample)
user_html = rendered["user"]
assert 'class="katex-block" data-katex="display"' in user_html, user_html
assert 'class="katex-inline" data-katex="inline"' in user_html, user_html
def test_katex_inline_placeholder_emitted():
"""renderMd restore pass must emit .katex-inline spans for inline math."""
assert 'katex-inline' in UI_JS, \