fix(chat): reset reasoning accumulator per turn and prefer reasoning_content (closes #2565)

Two confirmed bugs in the thinking/reasoning display: 1. reasoningText was initialized once when the SSE stream opened and never reset between turns. On the done event, the last assistant message received the union of every turn's reasoning. Now reset at both turn boundaries: tool (alongside existing liveReasoningText reset) and interim_assistant (the other turn boundary where prior reasoning closes). 2. ui.js renderMessages preferred m.reasoning (which could be corrupted by bug 1) over m.reasoning_content (the clean per-turn value from the backend). The fallback now reads m.reasoning_content || m.reasoning. Both fixes are needed: bug 2 alone cannot cover providers that stream reasoning events without populating reasoning_content on the final API message. Updated test_streaming_race_fix.py to scope its reconnect-accumulator guard to the _wireSSE preamble only, since turn-boundary resets inside event listeners are intentional and correct. 9 new regression tests in test_issue2565_reasoning_accumulation.py.
2026-05-25 03:00:23 +00:00 · 2026-05-23 02:42:15 +00:00
parent 1f56fad73f
commit 160cd03c18
4 changed files with 178 additions and 9 deletions
@@ -1490,6 +1490,8 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
      if(!visible){
        return;
      }
+      reasoningText='';
+      liveReasoningText='';
      if(alreadyStreamed){
        if(!S.session||S.session.session_id!==activeSid) return;
        _resetAssistantSegment();
@@ -1548,6 +1550,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
      // to be re-created below everything when reasoning resumed post-tool.
      if(typeof finalizeThinkingCard==='function') finalizeThinkingCard();
      liveReasoningText='';
+      reasoningText='';
      const oldRow=$('toolRunningRow');if(oldRow)oldRow.remove();
      appendLiveToolCard(tc);
      snapshotLiveTurn();
@@ -6098,7 +6098,7 @@ function renderMessages(options){
      thinkingText=content.filter(p=>p&&(p.type==='thinking'||p.type==='reasoning')).map(p=>p.thinking||p.reasoning||p.text||'').join('\n');
      content=content.filter(p=>p&&p.type==='text').map(p=>p.text||p.content||'').join('\n');
    }
-    if(!thinkingText && m.reasoning) thinkingText=m.reasoning;
+    if(!thinkingText && (m.reasoning_content || m.reasoning)) thinkingText=m.reasoning_content || m.reasoning;
    if(!thinkingText && typeof content==='string'){
      const thinkMatch=content.match(/^\s*<think>([\s\S]*?)<\/think>\s*/);
      if(thinkMatch){
@@ -0,0 +1,154 @@
+"""Regression tests for issue #2565: reasoning display bugs.
+
+Issue 1: reasoningText accumulates across turns within a single SSE stream.
+  - reasoningText must be reset at each turn boundary (tool and interim_assistant
+    events) so the done event only persists the current turn's reasoning.
+
+Issue 2: ui.js display prefers m.reasoning over m.reasoning_content.
+  - The rendering path must prefer m.reasoning_content (the clean per-turn value
+    from the backend) over m.reasoning (which can be corrupted by Issue 1).
+
+Both fixes are needed: Issue 2 alone cannot cover providers that stream reasoning
+events without populating reasoning_content on the final API message.
+"""
+
+import pathlib
+import re
+
+REPO = pathlib.Path(__file__).parent.parent
+
+
+def read(rel):
+    return (REPO / rel).read_text(encoding='utf-8')
+
+
+# ── Issue 1: reasoningText reset at turn boundaries ──────────────────────────
+
+
+class TestReasoningTextResetOnTool:
+    """reasoningText must be reset alongside liveReasoningText in the tool
+    listener so multi-tool-turn sessions don't accumulate reasoning across
+    turns."""
+
+    def _tool_listener_body(self):
+        """Extract the full tool listener body between the tool and
+        tool_complete addEventListener calls."""
+        src = read('static/messages.js')
+        tool_start = src.find("source.addEventListener('tool'")
+        assert tool_start >= 0, "tool listener not found"
+        tool_complete_start = src.find(
+            "source.addEventListener('tool_complete'", tool_start + 1,
+        )
+        assert tool_complete_start >= 0, "tool_complete listener not found"
+        return src[tool_start:tool_complete_start]
+
+    def test_reasoning_text_reset_in_tool_listener(self):
+        body = self._tool_listener_body()
+        assert "reasoningText=''" in body, (
+            "reasoningText must be reset to '' inside the tool listener "
+            "(Issue 1: accumulated reasoning from prior turns was assigned "
+            "to the last assistant message on the done event)"
+        )
+
+    def test_live_reasoning_text_also_reset_in_tool_listener(self):
+        body = self._tool_listener_body()
+        assert "liveReasoningText=''" in body, (
+            "liveReasoningText must also be reset in the tool listener"
+        )
+
+
+class TestReasoningTextResetOnInterimAssistant:
+    """reasoningText must be reset at the interim_assistant boundary — the
+    other turn boundary where the previous turn's reasoning closes out.
+    Without this, providers that emit reasoning before an interim_assistant
+    event will still co-mingle reasoning across turns."""
+
+    def test_reasoning_text_reset_in_interim_assistant_listener(self):
+        src = read('static/messages.js')
+        m = re.search(
+            r"source\.addEventListener\('interim_assistant'\s*,\s*(?:e|ev)\s*=>\s*\{(.*?)\n\s*\}\);",
+            src, re.DOTALL,
+        )
+        assert m, "interim_assistant listener not found in messages.js"
+        body = m.group(1)
+        assert "reasoningText=''" in body, (
+            "reasoningText must be reset to '' inside the interim_assistant "
+            "listener (Issue 1: turn boundary where prior reasoning closes)"
+        )
+
+    def test_live_reasoning_text_reset_in_interim_assistant_listener(self):
+        src = read('static/messages.js')
+        m = re.search(
+            r"source\.addEventListener\('interim_assistant'\s*,\s*(?:e|ev)\s*=>\s*\{(.*?)\n\s*\}\);",
+            src, re.DOTALL,
+        )
+        assert m
+        body = m.group(1)
+        assert "liveReasoningText=''" in body, (
+            "liveReasoningText must be reset in the interim_assistant listener"
+        )
+
+
+# ── Issue 2: reasoning_content preference on read ────────────────────────────
+
+
+class TestReasoningContentPreference:
+    """The rendering path in ui.js must prefer m.reasoning_content (the clean
+    per-turn value from the backend) over m.reasoning (which can be corrupted
+    by Issue 1's accumulation bug)."""
+
+    def test_reasoning_content_checked_before_reasoning(self):
+        src = read('static/ui.js')
+        assert 'm.reasoning_content' in src, (
+            "ui.js must reference m.reasoning_content so the clean per-turn "
+            "value from the backend is used for thinking card display"
+        )
+
+    def test_reasoning_content_preferred_in_thinking_text_fallback(self):
+        src = read('static/ui.js')
+        lines = src.splitlines()
+        for line in lines:
+            if 'thinkingText' in line and 'm.reasoning' in line:
+                if 'm.reasoning_content' not in line and 'reasoning_content' not in line:
+                    if 'Array.isArray' not in line:
+                        raise AssertionError(
+                            f"Line references m.reasoning without checking "
+                            f"m.reasoning_content first: {line.strip()}"
+                        )
+
+    def test_reasoning_content_has_priority_over_reasoning(self):
+        """The fallback expression must evaluate reasoning_content first."""
+        src = read('static/ui.js')
+        m = re.search(
+            r"thinkingText\s*=\s*(m\.reasoning_content\s*\|\|\s*m\.reasoning)",
+            src,
+        )
+        assert m, (
+            "thinkingText assignment must use m.reasoning_content || m.reasoning "
+            "so the clean backend value takes priority over the potentially "
+            "corrupted frontend-accumulated value"
+        )
+
+
+# ── Cross-cutting: done event still has the persist-on-done guard ────────────
+
+
+class TestDoneEventReasoningPersist:
+    """The done event's reasoning persistence guard must still exist —
+    the reset fixes reduce the blast radius but the guard prevents double-write
+    when the backend already populated .reasoning."""
+
+    def test_done_event_has_reasoning_guard(self):
+        src = read('static/messages.js')
+        assert '!lastAsst.reasoning' in src, (
+            "done event must guard reasoningText persistence with "
+            "!lastAsst.reasoning to avoid overwriting backend-populated values"
+        )
+
+    def test_done_event_persists_reasoning_text(self):
+        src = read('static/messages.js')
+        assert 'lastAsst.reasoning=reasoningText' in src, (
+            "done event must still persist reasoningText to lastAsst.reasoning "
+            "for providers that stream reasoning events without populating "
+            "reasoning_content on the final API message"
+        )
@@ -114,19 +114,31 @@ class TestReconnectAccumulatorPreservation:
    """

    def test_wire_sse_does_not_reset_accumulators(self):
-        """Regression guard: _wireSSE must not contain a literal
-        accumulator-reset statement.  Preserves pre-reconnect content so
-        the user sees the full response across a drop+reconnect."""
+        """Regression guard: the _wireSSE preamble (before any event
+        listeners are attached) must not contain a literal accumulator-
+        reset statement.  Preserves pre-reconnect content so the user
+        sees the full response across a drop+reconnect.
+
+        Turn-boundary resets inside event listeners (tool,
+        interim_assistant) are intentional (#2565) and not covered by
+        this guard — they prevent reasoning from accumulating across
+        multi-turn agent sessions."""
        src = read('static/messages.js')
        m = re.search(r'function _wireSSE\(source\)\{.*?\n  \}', src, re.DOTALL)
        assert m, "_wireSSE not found"
        fn = m.group(0)
-        assert "assistantText=''" not in fn and 'assistantText = ""' not in fn, (
-            "_wireSSE must NOT reset assistantText — the server does not replay "
-            "events on reconnect, so the reset would wipe valid pre-drop content"
+        # Check only the preamble before the first addEventListener — this is
+        # the reconnect path where resets would cause data loss.
+        first_listener = fn.find("source.addEventListener(")
+        assert first_listener > 0, "no addEventListener in _wireSSE"
+        preamble = fn[:first_listener]
+        assert "assistantText=''" not in preamble and 'assistantText = ""' not in preamble, (
+            "_wireSSE preamble must NOT reset assistantText — the server does "
+            "not replay events on reconnect, so the reset would wipe valid "
+            "pre-drop content"
        )
-        assert "reasoningText=''" not in fn and 'reasoningText = ""' not in fn, (
-            "_wireSSE must NOT reset reasoningText on reconnect"
+        assert "reasoningText=''" not in preamble and 'reasoningText = ""' not in preamble, (
+            "_wireSSE preamble must NOT reset reasoningText on reconnect"
        )

    def test_closure_initialises_accumulators_empty(self):