mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-25 03:00:23 +00:00
fix(user-bubble): stash code fences before math to keep code-blocks literal
PR #1854 added a math stash to _renderUserFencedBlocks so backslash LaTeX delimiters (\[..\], \(..\)) survive esc() and reach the KaTeX renderer in user bubbles. The stash ran BEFORE the existing code-fence stash, so a user-typed code block containing LaTeX-like syntax was extracted as KaTeX and rendered as math inside <pre><code>: ``` \[ a + b \] is wrong ``` → <pre><code><div class="katex-block"> a + b </div> is wrong</code></pre> renderMd() (assistant path) handles this correctly by running fence_stash before math_stash. The user-bubble path got the order inverted. Fix: stash code fences first, then run the math regexes on the outside-of-fence text only. Both top-level math and code-fenced literals now render correctly: - "math: \[ x + y \]" → KaTeX block - "```\n\[ a + b \]\n```" → literal <pre><code>\[ a + b \]</code></pre> Adds two regression tests: - test_user_code_block_with_latex_syntax_renders_as_literal_code (fails pre-fix, asserts no KaTeX wrappers inside <pre><code>) - test_user_bubble_top_level_latex_still_renders_after_fence_reorder (sibling guard against over-correcting and disabling math entirely) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+10
-6
@@ -81,12 +81,10 @@ function _renderUserFencedBlocks(text){
|
||||
return `<span class="katex-inline" data-katex="inline">${esc(item.src)}</span>`;
|
||||
});
|
||||
let s=String(text||'');
|
||||
// Stash math before escaping plain text; display delimiters must run before inline.
|
||||
s=s.replace(/\$\$([\s\S]+?)\$\$/g,(_,m)=>stashMath('display',m));
|
||||
s=s.replace(/\\\[([\s\S]+?)\\\]/g,(_,m)=>stashMath('display',m));
|
||||
s=s.replace(/\$([^\s$\n][^$\n]*?[^\s$\n]|\S)\$/g,(_,m)=>stashMath('inline',m));
|
||||
s=s.replace(/\\\((.+?)\\\)/g,(_,m)=>stashMath('inline',m));
|
||||
// Extract fenced code blocks → stash, replace with null-token placeholder
|
||||
// Extract fenced code blocks FIRST so math regexes never run inside fenced
|
||||
// content. If math were stashed first, a user-typed code block containing
|
||||
// \[..\] / \(..\) / $$..$$ would be rendered as a KaTeX block inside
|
||||
// <pre><code> instead of as literal source. Mirrors renderMd()'s ordering.
|
||||
// CommonMark §4.5 line-anchored fence: the closing run must use at least
|
||||
// as many backticks as the opener, so inner triple-backtick fences remain content.
|
||||
s=s.replace(/(^|\n)[ ]{0,3}(`{3,})([^\n`]*)\n(?:([\s\S]*?)\n)?[ ]{0,3}\2`*[ \t]*(?=\n|$)/g,(_,lead,_fence,info,code)=>{
|
||||
@@ -111,6 +109,12 @@ function _renderUserFencedBlocks(text){
|
||||
}
|
||||
return lead+'\x00UF'+(stash.length-1)+'\x00';
|
||||
});
|
||||
// Now stash math from the OUTSIDE-of-fence text. Display delimiters must
|
||||
// run before inline so $$..$$ isn't mis-parsed as $..$..$..$.
|
||||
s=s.replace(/\$\$([\s\S]+?)\$\$/g,(_,m)=>stashMath('display',m));
|
||||
s=s.replace(/\\\[([\s\S]+?)\\\]/g,(_,m)=>stashMath('display',m));
|
||||
s=s.replace(/\$([^\s$\n][^$\n]*?[^\s$\n]|\S)\$/g,(_,m)=>stashMath('inline',m));
|
||||
s=s.replace(/\\\((.+?)\\\)/g,(_,m)=>stashMath('inline',m));
|
||||
// Escape remaining plain text and convert newlines to <br>
|
||||
s=esc(s).replace(/\n/g,'<br>');
|
||||
// Restore stashed code blocks, then math placeholders as KaTeX targets.
|
||||
|
||||
@@ -115,6 +115,41 @@ where \\(L_i(f)\\) = SPL at angle \\(i\\)."""
|
||||
assert "\\(" not in html and "\\)" not in html, html
|
||||
|
||||
|
||||
def test_user_code_block_with_latex_syntax_renders_as_literal_code():
|
||||
"""User-bubble code blocks containing \\[..\\] / \\(..\\) / $$..$$ must
|
||||
render as literal code source, not as KaTeX. _renderUserFencedBlocks
|
||||
must stash code fences BEFORE math (mirroring renderMd's ordering); if
|
||||
math is stashed first, a user-typed code block containing LaTeX-like
|
||||
syntax gets a `<div class="katex-block">` placeholder dropped INSIDE
|
||||
`<pre><code>`, and the user's literal source is silently replaced by
|
||||
rendered math.
|
||||
"""
|
||||
sample = "```\n\\[ a + b \\] is wrong\n\\(L_i\\) too\n$$matrix$$\n```"
|
||||
rendered = _run_renderers(sample)
|
||||
user_html = rendered["user"]
|
||||
# The whole code block should remain literal, no KaTeX wrappers inside.
|
||||
assert "<pre><code>" in user_html, user_html
|
||||
assert "katex-block" not in user_html, user_html
|
||||
assert "katex-inline" not in user_html, user_html
|
||||
# Backslashes survive HTML escape unchanged; the user's source is intact.
|
||||
assert "\\[ a + b \\]" in user_html, user_html
|
||||
assert "\\(L_i\\)" in user_html, user_html
|
||||
assert "$$matrix$$" in user_html, user_html
|
||||
|
||||
|
||||
def test_user_bubble_top_level_latex_still_renders_after_fence_reorder():
|
||||
"""Sibling regression: top-level math (outside any code fence) must
|
||||
still render through KaTeX in user bubbles after the fence-first
|
||||
reorder. Guards against an over-correction that disables user-bubble
|
||||
math rendering entirely.
|
||||
"""
|
||||
sample = "math: \\[ x + y \\]\n\nand inline \\(L_i\\)"
|
||||
rendered = _run_renderers(sample)
|
||||
user_html = rendered["user"]
|
||||
assert 'class="katex-block" data-katex="display"' in user_html, user_html
|
||||
assert 'class="katex-inline" data-katex="inline"' in user_html, user_html
|
||||
|
||||
|
||||
def test_katex_inline_placeholder_emitted():
|
||||
"""renderMd restore pass must emit .katex-inline spans for inline math."""
|
||||
assert 'katex-inline' in UI_JS, \
|
||||
|
||||
Reference in New Issue
Block a user