fix(renderer): group consecutive blockquote lines into single element

Root cause: the old rule `s.replace(/^> (.+)$/gm, ...)` had three bugs: 1. `.+` required at least one character — bare `>` lines (blank continuation lines) did not match and passed through as literal `>` 2. Each matching line became its own `<blockquote>` element — a 10-line blockquote produced 10 stacked `<blockquote>` tags with no grouping 3. When a fenced code block sat inside a blockquote, the fence-stash pass consumed the code content and left orphaned `>` lines that the old `.+` pattern could not match Fix: replace the single-line regex with a group-based approach that matches one or more consecutive `>` lines as a single block, strips the `>` prefix from each line, passes each non-empty line through inlineMd(), turns blank `>` lines into `<br>`, and wraps the entire group in one `<blockquote>`. 14 regression tests added covering: - Single-line blockquotes (regression) - Multi-line grouping (2 and 10 lines) - Two separate blockquotes staying separate - Bare `>` and `>text` (no space) edge cases - Blank continuation lines → <br> - Bold / italic / inline-code inside blockquotes - Blockquote followed by normal paragraph
2026-07-16 12:40:18 +00:00 · 2026-04-26 02:44:34 +00:00
parent 3d96dc1498
commit f6ea11d22e
2 changed files with 189 additions and 1 deletions
@@ -762,7 +762,20 @@ function renderMd(raw){
  s=s.replace(/\x00O(\d+)\x00/g,(_,i)=>_ob_stash[+i]);
  s=s.replace(/^### (.+)$/gm,(_,t)=>`<h3>${inlineMd(t)}</h3>`).replace(/^## (.+)$/gm,(_,t)=>`<h2>${inlineMd(t)}</h2>`).replace(/^# (.+)$/gm,(_,t)=>`<h1>${inlineMd(t)}</h1>`);
  s=s.replace(/^---+$/gm,'<hr>');
-  s=s.replace(/^> (.+)$/gm,(_,t)=>`<blockquote>${inlineMd(t)}</blockquote>`);
+  // Group consecutive > lines (including bare >) into one <blockquote>.
+  // The old single-line rule (^> (.+)$) had three bugs:
+  //   1. .+ skipped bare "> " lines — they passed through as literal >
+  //   2. Each line became its own <blockquote> — no visual grouping
+  //   3. After the fenced-code pass, lines of > preceding/following code
+  //      blocks were left as literals because .+ didn't match empty lines
+  s=s.replace(/((?:^>[^\n]*(?:\n|$))+)/gm,block=>{
+    const inner=block.split('\n')
+      .filter((_,i,a)=>i<a.length-1||_.trim()!='>')  // drop lone trailing '>' artifact
+      .map(l=>l.replace(/^>[ \t]?/,''))               // strip "> " or ">"
+      .map(l=>l.trim()===''?'<br>':inlineMd(l))        // blank lines → <br>, text → inlineMd
+      .join('\n');
+    return `<blockquote>${inner}</blockquote>`;
+  });
  // B8: improved list handling supporting up to 2 levels of indentation
  s=s.replace(/((?:^(?:  )?[-*+] .+\n?)+)/gm,block=>{
    const lines=block.trimEnd().split('\n');
@@ -0,0 +1,175 @@
+"""Regression tests for the blockquote rendering fix (fix/blockquote-rendering).
+
+Root cause: the old rule was `s.replace(/^> (.+)$/gm, ...)` which had three bugs:
+  1. `.+` required at least one character — bare `>` lines passed through as literal `>`
+  2. Each line became its own `<blockquote>` — no grouping, so 10-line quotes became
+     10 stacked `<blockquote>` elements
+  3. Fenced code blocks inside blockquotes left orphaned `>` literals after the
+     fence-stash pass had consumed the code content
+
+Fix: group consecutive `>` lines into a single `<blockquote>`, handle bare `>` lines
+as `<br>`, and strip the `>` prefix before passing each line to `inlineMd()`.
+"""
+import re
+import pathlib
+
+UI_JS = (pathlib.Path(__file__).parent.parent / "static" / "ui.js").read_text(encoding="utf-8")
+
+# ---------------------------------------------------------------------------
+# Python mirror of the new blockquote rule + inlineMd (for behavioural tests)
+# ---------------------------------------------------------------------------
+
+import html as _html
+
+
+def _esc(s):
+    return _html.escape(str(s), quote=True)
+
+
+def _inline_md(t):
+    """Minimal inlineMd mirror — bold, italic, inline-code only."""
+    t = re.sub(r"`([^`\n]+)`", lambda m: f"<code>{_esc(m.group(1))}</code>", t)
+    t = re.sub(r"\*\*\*(.+?)\*\*\*", lambda m: f"<strong><em>{_esc(m.group(1))}</em></strong>", t)
+    t = re.sub(r"\*\*(.+?)\*\*", lambda m: f"<strong>{_esc(m.group(1))}</strong>", t)
+    t = re.sub(r"\*([^*\n]+)\*", lambda m: f"<em>{_esc(m.group(1))}</em>", t)
+    return t
+
+
+def _apply_blockquote(s):
+    """Python mirror of the new group-based blockquote rule in ui.js."""
+    def replacer(m):
+        block = m.group(0)
+        lines = block.split("\n")
+        # Drop a lone trailing ">" artifact that the regex can leave
+        while lines and lines[-1].strip() in (">", ""):
+            if lines[-1].strip() == ">":
+                lines.pop()
+                break
+            lines.pop()
+        processed = []
+        for l in lines:
+            stripped = re.sub(r"^>[ \t]?", "", l)
+            if stripped.strip() == "":
+                processed.append("<br>")
+            else:
+                processed.append(_inline_md(stripped))
+        inner = "\n".join(processed)
+        return f"<blockquote>{inner}</blockquote>"
+
+    return re.sub(r"((?:^>[^\n]*(?:\n|$))+)", replacer, s, flags=re.MULTILINE)
+
+
+# ---------------------------------------------------------------------------
+# Source-level structural tests
+# ---------------------------------------------------------------------------
+
+class TestBlockquoteSourceStructure:
+    """The new rule must be present in ui.js and the old single-line rule must be gone."""
+
+    def test_old_single_line_rule_removed(self):
+        """The old `.+` pattern that skipped blank lines must be gone."""
+        assert "replace(/^> (.+)$/gm" not in UI_JS, (
+            "Old single-line blockquote rule still present — it misses blank '>'"
+            " lines and creates one <blockquote> per line"
+        )
+
+    def test_new_group_rule_present(self):
+        """The new grouping regex must be present."""
+        assert "(?:^>[^\\n]*(?:\\n|$))+" in UI_JS, (
+            "New group-based blockquote rule not found in ui.js"
+        )
+
+    def test_prefix_strip_present(self):
+        """The fix must strip the '> ' prefix from each line."""
+        assert "replace(/^>[" in UI_JS or "replace(/^>[ " in UI_JS, (
+            "Expected prefix-strip pattern not found in the blockquote block"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Behavioural tests (using the Python mirror)
+# ---------------------------------------------------------------------------
+
+class TestMultiLineBlockquote:
+    """Consecutive > lines must become ONE <blockquote>, not many."""
+
+    def test_single_line_still_works(self):
+        out = _apply_blockquote("> Hello world")
+        assert out.count("<blockquote>") == 1
+        assert "Hello world" in out
+        assert ">" not in out.replace("<blockquote>", "").replace("</blockquote>", "")
+
+    def test_two_consecutive_lines_grouped(self):
+        src = "> Line one\n> Line two"
+        out = _apply_blockquote(src)
+        assert out.count("<blockquote>") == 1, (
+            f"Expected 1 <blockquote>, got {out.count('<blockquote>')}: {out!r}"
+        )
+
+    def test_ten_lines_one_blockquote(self):
+        src = "\n".join(f"> Line {i}" for i in range(10))
+        out = _apply_blockquote(src)
+        assert out.count("<blockquote>") == 1
+
+    def test_two_separate_quotes_stay_separate(self):
+        src = "> First quote\n\n> Second quote"
+        out = _apply_blockquote(src)
+        # Each quote is its own group (separated by a blank line)
+        assert out.count("<blockquote>") == 2
+
+
+class TestBlankContinuationLines:
+    """Bare '>' lines (blank continuation) must not appear as literal '>'."""
+
+    def test_bare_gt_line_no_literal(self):
+        src = "> Para one\n>\n> Para two"
+        out = _apply_blockquote(src)
+        assert out.count("<blockquote>") == 1, f"Expected 1 blockquote: {out!r}"
+        # No stray '>' outside of HTML tags
+        text_only = re.sub(r"<[^>]+>", "", out)
+        assert ">" not in text_only, f"Literal '>' in text: {text_only!r}"
+
+    def test_bare_gt_no_space_handled(self):
+        """'>' with no space at all should also be consumed, not rendered literally."""
+        src = ">no space after"
+        out = _apply_blockquote(src)
+        assert out.count("<blockquote>") == 1
+        text_only = re.sub(r"<[^>]+>", "", out)
+        assert ">" not in text_only
+
+    def test_blank_line_becomes_br(self):
+        src = "> First\n>\n> Second"
+        out = _apply_blockquote(src)
+        assert "<br>" in out, f"Expected <br> for blank > line: {out!r}"
+
+
+class TestInlineMarkdownInsideBlockquote:
+    """Bold, italic, and inline code must still render correctly inside a blockquote."""
+
+    def test_bold_inside_blockquote(self):
+        out = _apply_blockquote("> This is **important**")
+        assert "<strong>" in out
+        assert "<blockquote>" in out
+
+    def test_inline_code_inside_blockquote(self):
+        out = _apply_blockquote("> Run `git status` first")
+        assert "<code>" in out
+        assert "<blockquote>" in out
+
+    def test_italic_inside_blockquote(self):
+        out = _apply_blockquote("> *emphasis* here")
+        assert "<em>" in out
+        assert "<blockquote>" in out
+
+
+class TestBlockquoteFollowedByParagraph:
+    """A blockquote followed by a normal paragraph must not bleed into each other."""
+
+    def test_non_blockquote_paragraph_untouched(self):
+        src = "> Quoted text\n\nNormal paragraph"
+        out = _apply_blockquote(src)
+        assert out.count("<blockquote>") == 1
+        assert "Normal paragraph" in out
+        # Normal paragraph must be outside the blockquote
+        after_bq = out[out.index("</blockquote>"):]
+        assert "Normal paragraph" in after_bq