Files
hermes-webui/tests/test_renderer_js_behaviour.py
T
nesquena-hermes 7189416969 fix: batch v0.50.234-235 — XSS hardening, workspace validation, profile switch fixes (#1206)
fix: batch v0.50.234-235 — XSS hardening, workspace validation, profile switch fixes

v0.50.235 (#1203 — profile switch workspace/model/chip, 3 bugs + flaky test):
- switch_profile now reads target profile's workspace directly (thread-local bypass)
- invalidate_models_cache() after profile switch (model dropdown staleness)
- syncTopbar() updates chip before early-return (no-session path)

v0.50.234 (#1201/#1205 — XSS hardening + workspace security):
- renderMd() full HTML attribute sanitizer replacing tag-name-only allowlist
- Delegated image lightbox (removes all inline onclick)
- macOS /etc → /private/etc symlink bypass fixed
- /System /Library added to blocked workspace roots
- Legacy /api/chat workspace trust gap closed

Both PRs independently reviewed. 2787/2787 tests. QA harness 20/20 + 11/11 API checks.

Co-authored-by: Brendan Schmid <bschmidy10@Wilson.bschmidy10>
Co-authored-by: Nathan Esquenazi <nesquena@gmail.com>
2026-04-27 21:39:30 -07:00

528 lines
22 KiB
Python

"""Behavioural tests that drive the ACTUAL renderMd() in static/ui.js via node.
The Python mirrors in test_blockquote_rendering.py and
test_renderer_comprehensive.py validate intent, but they can drift from the
JS. Twice now (PR #1073 commit 94d63d0 — phantom <br>; PR #1073 commit
04e7b53 — leading-space-in-blockquote prefix-strip regex) the Python mirror
was correct while the JS was not, so the static-mirror tests passed even
though the live UI was broken.
This file closes that gap by spawning ``node`` on the real ui.js and
asserting the rendered HTML for the most common LLM-output shapes.
Add a case here whenever the renderer fix targets a class of input the
Python mirror cannot exercise faithfully.
"""
import os
import re
import shutil
import subprocess
import sys
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).parent.parent.resolve()
UI_JS_PATH = REPO_ROOT / "static" / "ui.js"
NODE = shutil.which("node")
pytestmark = pytest.mark.skipif(NODE is None, reason="node not on PATH")
_DRIVER_SRC = r"""
const fs = require('fs');
const src = fs.readFileSync(process.argv[2], 'utf8');
global.window = {};
global.document = { createElement: () => ({ innerHTML: '', textContent: '' }) };
const esc = s => String(s ?? '').replace(/[&<>"']/g, c => (
{'&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'}[c]));
const _IMAGE_EXTS=/\.(png|jpg|jpeg|gif|webp|bmp|ico|avif)$/i;
function extractFunc(name) {
const re = new RegExp('function\\s+' + name + '\\s*\\(');
const start = src.search(re);
if (start < 0) throw new Error(name + ' not found');
let i = src.indexOf('{', start);
let depth = 1; i++;
while (depth > 0 && i < src.length) {
if (src[i] === '{') depth++;
else if (src[i] === '}') depth--;
i++;
}
return src.slice(start, i);
}
eval(extractFunc('renderMd'));
let buf = '';
process.stdin.on('data', c => { buf += c; });
process.stdin.on('end', () => { process.stdout.write(renderMd(buf)); });
"""
@pytest.fixture(scope="module")
def driver_path(tmp_path_factory):
"""Write the node driver to a tmp file (works around `node -e` arg quirks)."""
p = tmp_path_factory.mktemp("renderer_driver") / "driver.js"
p.write_text(_DRIVER_SRC, encoding="utf-8")
return str(p)
def _render(driver_path, markdown: str) -> str:
"""Run renderMd against the actual ui.js and return the rendered HTML."""
result = subprocess.run(
[NODE, driver_path, str(UI_JS_PATH)],
input=markdown,
capture_output=True,
text=True,
timeout=10,
)
if result.returncode != 0:
raise RuntimeError(f"node driver failed: {result.stderr}")
return result.stdout
# ─────────────────────────────────────────────────────────────────────────────
# Blockquote prefix strip — the bug commit 04e7b53 introduced was a one-char
# regex regression where `^>[\t]?` (only tab) replaced `^>[ \t]?` (space or
# tab), producing leading-space artifacts and breaking lists-in-quotes
# because the list-detection regex `^( )?[-*+]` couldn't match the
# space-prefixed lines. These tests exercise the actual JS so the regex
# can't silently regress to tab-only again.
# ─────────────────────────────────────────────────────────────────────────────
class TestBlockquotePrefixStrip:
"""Drive the actual renderMd to confirm `> ` is fully stripped."""
def test_single_line_blockquote_no_leading_space(self, driver_path):
out = _render(driver_path, "> Hello world").strip()
# New shape: recursive renderMd wraps content in <p> (CommonMark-correct).
assert "<blockquote><p>Hello world</p></blockquote>" in out, (
f"`> Hello world` must render as <blockquote><p>Hello world</p></blockquote> "
f"with no leading space. Got: {out!r}."
)
def test_multiline_blockquote_no_leading_space(self, driver_path):
out = _render(driver_path, "> Line one\n> Line two").strip()
# New shape: single paragraph with <br> between soft-wrapped lines.
assert "<blockquote><p>Line one<br>Line two</p></blockquote>" in out, (
f"Multi-line blockquote must strip the space after each `>` and "
f"render as a single paragraph. Got: {out!r}"
)
# Belt-and-braces: there must be no space-after-newline-in-content
assert "\n " not in out.replace("</blockquote>", ""), (
f"Inner content of blockquote should not contain leading-space "
f"lines. Got: {out!r}"
)
def test_list_inside_blockquote_renders_as_ul(self, driver_path):
"""The PR explicitly added 'lists inside blockquotes' as a feature.
With the prefix-strip bug, the list-detection regex can't match the
space-prefixed lines, so the list never renders. This pins it."""
out = _render(driver_path, "> Steps:\n> - one\n> - two")
assert "<ul>" in out, (
f"`> - item` lines inside a blockquote must render as a <ul>. "
f"Got: {out!r}. Likely cause: prefix-strip leaves a leading "
f"space, list regex `^( )?[-*+] ` can't match one-space prefix."
)
assert "<li>one</li>" in out
assert "<li>two</li>" in out
def test_task_list_inside_blockquote(self, driver_path):
"""Task lists inside blockquotes render checkbox spans, not literal [x]."""
out = _render(driver_path, "> - [x] done\n> - [ ] todo")
assert 'class="task-done"' in out, (
f"`- [x]` inside a blockquote must produce a task-done span. "
f"Got: {out!r}"
)
assert 'class="task-todo"' in out
# ─────────────────────────────────────────────────────────────────────────────
# Common LLM output shapes — sanity-check the most frequent constructs render
# the way a user would expect.
# ─────────────────────────────────────────────────────────────────────────────
class TestRendererSanitization:
"""Raw/model-provided HTML must not survive with executable attributes or schemes."""
@pytest.mark.parametrize(
"payload, forbidden",
[
('<img src=x onerror=alert(1)>', 'onerror'),
('<span onclick=alert(1)>click</span>', 'onclick'),
('<div onmouseover=alert(1)>hover</div>', 'onmouseover'),
('<a href="javascript:alert(1)">x</a>', 'javascript:'),
],
)
def test_raw_html_dangerous_attributes_and_schemes_are_removed(self, driver_path, payload, forbidden):
out = _render(driver_path, payload).lower()
assert forbidden not in out, f"dangerous HTML survived sanitization: {out!r}"
assert 'alert(1)' not in out, f"executable payload text should not remain executable: {out!r}"
def test_generated_image_markdown_uses_delegated_lightbox_not_inline_js(self, driver_path):
out = _render(driver_path, "![capy](https://example.com/capy.png)").lower()
assert '<img' in out and 'msg-media-img' in out
assert 'onclick' not in out
assert '_openimglightbox' not in out
def test_media_token_image_uses_delegated_lightbox_not_inline_js(self, driver_path):
out = _render(driver_path, "MEDIA:https://example.com/capy.png").lower()
assert '<img' in out and 'msg-media-img' in out
assert 'onclick' not in out
assert '_openimglightbox' not in out
def test_incomplete_raw_html_tag_is_escaped_before_paragraph_wrapping(self, driver_path):
out = _render(driver_path, '<img src=x onerror=alert(1)//').lower()
assert '&lt;img' in out
assert '<img' not in out
assert 'onerror' not in out or '&lt;img' in out
class TestCommonLLMShapes:
def test_strikethrough_outside_quote(self, driver_path):
out = _render(driver_path, "This was ~~outdated~~ but is now fine.")
assert "<del>outdated</del>" in out
def test_strikethrough_inside_blockquote(self, driver_path):
out = _render(driver_path, "> This is ~~wrong~~ actually")
assert "<blockquote>" in out and "<del>wrong</del>" in out
def test_top_level_task_list(self, driver_path):
out = _render(driver_path, "- [x] done\n- [ ] todo\n- regular item")
assert 'class="task-done"' in out
assert 'class="task-todo"' in out
assert "regular item" in out
def test_nested_blockquote_recurses(self, driver_path):
out = _render(driver_path, ">>> deeply nested")
assert out.count("<blockquote>") == 3
assert out.count("</blockquote>") == 3
def test_quote_then_heading(self, driver_path):
out = _render(driver_path, "> Note this.\n\n## Heading")
assert "<blockquote><p>Note this.</p></blockquote>" in out
assert "<h2>Heading</h2>" in out
def test_crlf_does_not_leak_carriage_return(self, driver_path):
out = _render(driver_path, "Line1\r\nLine2\r\nLine3")
assert "\r" not in out, f"CRLF must be normalised; got {out!r}"
def test_llm_multiparagraph_quote_with_list(self, driver_path):
"""The shape an LLM emits when summarising decisions inside a quote."""
src = (
"> Here are the key points:\n"
">\n"
"> - Point one\n"
"> - Point two\n"
">\n"
"> And a closing remark."
)
out = _render(driver_path, src)
assert "<blockquote>" in out
assert "<ul>" in out
assert "<li>Point one</li>" in out
assert "<li>Point two</li>" in out
assert "And a closing remark." in out
# No leading-space artifacts in the quoted text
assert "\n " not in out.replace("</blockquote>", "")
# ─────────────────────────────────────────────────────────────────────────────
# Block-level constructs INSIDE blockquotes — the six bugs documented in
# blockquote-rendering-bugs.md. Each test feeds the exact input from the
# bug report and asserts the rendered HTML structure.
#
# Root cause of all six: every block-level pass (fenced code, headings, hr,
# ordered lists) used to run BEFORE the blockquote handler, on > -prefixed
# lines those passes don't recognise. The fix moved blockquote handling to a
# pre-pass that strips > prefixes and recursively renders the inner content.
# ─────────────────────────────────────────────────────────────────────────────
class TestBugFencedCodeInBlockquote:
"""Bug 1: fenced code blocks inside blockquotes leaked > prefixes inside
the rendered <pre>, broke the <blockquote> wrapper, and sometimes left
raw <pre>/<div class="pre-header"> as visible text."""
def test_fenced_code_inside_blockquote_renders_pre(self, driver_path):
src = (
"> Here is some code:\n"
">\n"
"> ```python\n"
"> x = 1\n"
"> y = 2\n"
"> ```\n"
">\n"
"> That was the code."
)
out = _render(driver_path, src)
assert "<pre>" in out and "</pre>" in out, (
f"Fenced code inside blockquote must render as <pre>: {out!r}"
)
# The > prefixes must be stripped from the code content, not preserved
# inside the <pre>.
assert "&gt; x = 1" not in out, (
f"Code content inside <pre> must not contain &gt; prefixes: {out!r}"
)
# Raw <pre> or pre-header tags must NOT appear as visible text
assert "&lt;pre&gt;" not in out
assert "&lt;div class=&quot;pre-header" not in out
# Single <blockquote> wrapping everything (not split by the <pre>)
assert out.count("<blockquote>") == 1, (
f"Expected ONE <blockquote>, got {out.count('<blockquote>')}: {out!r}"
)
def test_fenced_code_with_lang_class(self, driver_path):
src = "> ```python\n> x = 1\n> ```"
out = _render(driver_path, src)
assert 'class="language-python"' in out
assert "x = 1" in out
class TestBugBlankContinuationInBlockquote:
"""Bug 2: blank > lines between paragraphs fragmented the blockquote into
separate elements with literal > characters between them."""
def test_three_paragraphs_one_blockquote(self, driver_path):
src = (
"> First paragraph of the quote.\n"
">\n"
"> Second paragraph of the quote.\n"
">\n"
"> Third paragraph of the quote."
)
out = _render(driver_path, src)
# All three paragraphs in ONE <blockquote>
assert out.count("<blockquote>") == 1, (
f"Expected ONE <blockquote>, got {out.count('<blockquote>')}: {out!r}"
)
assert "First paragraph" in out
assert "Second paragraph" in out
assert "Third paragraph" in out
# No literal > between paragraphs (would indicate fragmented blockquote)
text_only = re.sub(r"<[^>]+>", "", out)
assert ">" not in text_only, (
f"Literal > in rendered text indicates fragmented blockquote: {text_only!r}"
)
class TestBugHeadingsInsideBlockquote:
"""Bug 3: # headings inside blockquotes rendered as literal '##' text
because the heading pass ran before the blockquote pass."""
def test_h2_inside_blockquote(self, driver_path):
src = (
"> ## Bug description\n"
">\n"
"> The widget is broken.\n"
">\n"
"> ## Steps to reproduce\n"
">\n"
"> Click the button."
)
out = _render(driver_path, src)
assert "<h2>Bug description</h2>" in out, (
f"## inside blockquote must render as <h2>: {out!r}"
)
assert "<h2>Steps to reproduce</h2>" in out
# No literal '##' as visible text
text_only = re.sub(r"<[^>]+>", "", out)
assert "##" not in text_only, (
f"Literal ## in rendered text — heading pass missed it: {text_only!r}"
)
def test_h1_h2_h3_all_render(self, driver_path):
src = "> # H1\n> ## H2\n> ### H3"
out = _render(driver_path, src)
assert "<h1>H1</h1>" in out
assert "<h2>H2</h2>" in out
assert "<h3>H3</h3>" in out
class TestBugOrderedListInsideBlockquote:
"""Bug 4: ordered (numbered) lists inside blockquotes rendered as plain
text — the OL pass had no equivalent of the UL branch in the old
blockquote handler."""
def test_ordered_list_renders_as_ol(self, driver_path):
src = (
"> Steps to reproduce:\n"
">\n"
"> 1. Open the app\n"
"> 2. Click the button\n"
"> 3. Observe the crash"
)
out = _render(driver_path, src)
assert "<ol>" in out and "</ol>" in out, (
f"Numbered list inside blockquote must render as <ol>: {out!r}"
)
# All three list items present
for item in ["Open the app", "Click the button", "Observe the crash"]:
assert f">{item}</li>" in out, (
f"Missing <li>{item}</li> in {out!r}"
)
class TestBugHorizontalRuleInsideBlockquote:
"""Bug 6: --- inside a blockquote rendered as literal text instead of <hr>."""
def test_hr_renders_inside_blockquote(self, driver_path):
src = "> Above the rule\n>\n> ---\n>\n> Below the rule"
out = _render(driver_path, src)
assert "<hr>" in out, (
f"--- inside blockquote must render as <hr>: {out!r}"
)
assert "Above the rule" in out
assert "Below the rule" in out
# No literal '---' as text
text_only = re.sub(r"<[^>]+>", "", out)
assert "---" not in text_only, (
f"Literal --- in rendered text: {text_only!r}"
)
class TestBugComplexBlockquoteAllFeatures:
"""Bug 5 (worst-case): a blockquote with headings, paragraphs, inline code,
fenced code, and an ordered list. Old behaviour collapsed the entire thing
into a monospace blob with raw markdown syntax leaking everywhere."""
def test_complex_blockquote_renders_all_constructs(self, driver_path):
src = (
"> ## Description\n"
">\n"
"> The widget is broken when X happens.\n"
">\n"
"> ## Root cause\n"
">\n"
"> The `MIME_MAP` in `api/config.py` is missing entries.\n"
">\n"
"> ## Fix\n"
">\n"
"> Add two entries:\n"
">\n"
"> ```python\n"
'> ".html": "text/html",\n'
'> ".htm": "text/html",\n'
"> ```\n"
">\n"
"> ## Workflow rules\n"
">\n"
"> 1. Never edit the file directly\n"
"> 2. Create a worktree\n"
"> 3. Run the tests\n"
">\n"
"> Target branch is `master`."
)
out = _render(driver_path, src)
# Multiple <h2> headings
assert out.count("<h2>") >= 4, (
f"Expected at least 4 <h2> headings, got {out.count('<h2>')}: {out!r}"
)
# Fenced code block
assert "<pre>" in out
assert 'class="language-python"' in out
# Ordered list
assert "<ol>" in out
# Inline code
assert "<code>MIME_MAP</code>" in out
assert "<code>api/config.py</code>" in out
assert "<code>master</code>" in out
# No literal markdown syntax leaking
text_only = re.sub(r"<[^>]+>", "", out)
assert "##" not in text_only, f"Literal ## in {text_only!r}"
# Single <blockquote> wraps everything
assert out.count("<blockquote>") == 1, (
f"Expected ONE <blockquote>, got {out.count('<blockquote>')}: {out!r}"
)
# No raw <pre>/<div class="pre-header"> as escaped text
assert "&lt;pre&gt;" not in out
assert "&lt;div class=&quot;pre-header" not in out
class TestBlockquoteRegressionsDontTouchOutsideContent:
"""Make sure the blockquote pre-pass doesn't grab > -prefixed lines that
sit inside a non-blockquote fenced code block (e.g. shell prompts in
```bash``` examples)."""
def test_shell_prompt_in_bash_fence_not_treated_as_blockquote(self, driver_path):
src = "```bash\n> echo hello\n```"
out = _render(driver_path, src)
# The > line is part of the bash code, not a blockquote
assert "<blockquote>" not in out, (
f"> line inside ```bash``` must NOT become a blockquote: {out!r}"
)
assert "<pre>" in out
# Escaped > preserved as code content
assert "&gt; echo hello" in out
def test_two_separate_blockquotes_stay_separate(self, driver_path):
src = "> First quote\n\nSome plain text.\n\n> Second quote"
out = _render(driver_path, src)
assert out.count("<blockquote>") == 2, (
f"Two separated blockquotes must stay separate: {out!r}"
)
assert "Some plain text." in out
def test_nested_double_blockquote(self, driver_path):
src = "> outer line\n> > inner line"
out = _render(driver_path, src)
# Should produce nested <blockquote><blockquote>
assert out.count("<blockquote>") == 2, (
f"Expected 2 <blockquote>: {out!r}"
)
class TestBlockquoteEntityEncodedInput:
"""Blockquotes sent as HTML-entity-encoded text must still render correctly.
LLMs sometimes emit &gt; instead of > — the entity-decode pass must run
BEFORE the blockquote pre-pass, not after it."""
def test_amp_gt_prefix_becomes_blockquote(self, driver_path):
src = "&gt; Hello quote"
out = _render(driver_path, src)
assert "<blockquote>" in out, (
f"&gt;-prefixed line must render as <blockquote>: {out!r}"
)
text_only = re.sub(r"<[^>]+>", "", out)
assert "Hello quote" in text_only
# Should not see a literal > or &gt; in the rendered text
assert "&gt;" not in out, f"&gt; should have been decoded: {out!r}"
def test_amp_gt_fenced_code_in_blockquote(self, driver_path):
src = "&gt; ```python\n&gt; x = 1\n&gt; ```"
out = _render(driver_path, src)
assert "<blockquote>" in out, (
f"Entity-encoded blockquote with fenced code must render: {out!r}"
)
assert "<pre>" in out, f"Fenced code inside entity-encoded blockquote must render: {out!r}"
class TestRawPreCodePreservation:
"""Raw <pre><code> HTML from model output should remain structurally intact."""
def test_multiline_pre_code_blocks_do_not_degrade_to_backticks(self, driver_path):
src = (
"<pre><code>line 1\n"
"line 2\n"
"</code></pre>\n\n"
"After paragraph.\n\n"
"<pre><code>line 3\n"
"line 4\n"
"</code></pre>\n\n"
"Done."
)
out = _render(driver_path, src)
assert out.count("<pre>") == 2 and out.count("</pre>") == 2, (
f"Expected two balanced <pre> blocks, got: {out!r}"
)
assert out.count("<code>") == 2 and out.count("</code>") == 2, (
f"Expected two balanced <code> blocks, got: {out!r}"
)
assert "`line 1" not in out and "line 2\n`</pre>" not in out, (
f"<code> content inside <pre> must not be rewritten to backticks: {out!r}"
)
assert "After paragraph." in out and "Done." in out