diff --git a/CHANGELOG.md b/CHANGELOG.md index e5fa89b5..7aa06161 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- **PR #2536** (closes #2514, refs #2535) — Stop reasoning-only Thinking entries from being replayed into provider-facing history as blank assistant turns, preventing long WebUI sessions from accumulating duplicated stale Thinking blocks and inflated Activity/tool metadata on later turns. Settled compact Activity rerenders now also clear previously inserted Thinking rows before rebuilding the visible transcript. + ## [v0.51.90] — 2026-05-18 — Release BN (stage-383 — 10-PR full sweep batch — empty-gateway messaging history fix + previous-messaging-sessions setting + Kanban board switcher layout + UI/UX demo theme controls + Slice 3c queue/goal RFC gate + keyless custom endpoints + custom-provider remote model catalog parity + auto-compression elapsed timer + new-conversation cold-start guard + Kanban drag-drop detail open fix) ### Fixed @@ -22,7 +26,6 @@ - **PR #2511** by @franksong2702 (refs #2502 / #2503) — Update the `docs/ui-ux/` demo appearance controls to initialize as `class="dark" data-skin="slate"` instead of the deprecated `data-theme`-only buttons and legacy theme names. Brings the demo pages in line with the live Theme + Skin contract referenced from the new `docs/CONTRACTS.md` so contributors following the contract-index path don't land on stale demos. - **PR #2509** by @Michaelyklam (refs #1925) — Advance the runtime-adapter RFC after the Slice 3b approval/clarify seam shipped in v0.51.89. The RFC now marks Slice 3b as shipped and defines the next Slice 3c queue/continue + goal control gate: route those controls through `RuntimeAdapter.queue_message(...)` / `update_goal(...)` only after pinning stable response contracts, bounded unavailable-control behavior, replayable lifecycle/status evidence, ordering/idempotency expectations, and explicit non-goals for runner/sidecar ownership or a WebUI-owned queue/goal scheduler. Docs + adapter-seam regression test only — no runtime/control routing changes in this PR. - ## [v0.51.89] — 2026-05-18 — Release BM (stage-382 — 6-PR full sweep batch — runtime adapter approval/clarify seam + SOUL.md memory panel + #1855 resolve_model_provider fast-path + PWA sidebar spinner fix + /model active-provider preference + contributor contract docs index) ### Changed diff --git a/api/routes.py b/api/routes.py index a56a2b47..261514cf 100644 --- a/api/routes.py +++ b/api/routes.py @@ -8158,6 +8158,7 @@ def _handle_chat_sync(handler, body): ) from api.streaming import ( _merge_display_messages_after_agent_result, + _restore_display_reasoning_metadata, _restore_reasoning_metadata, _sanitize_messages_for_api, _context_messages_for_new_turn, @@ -8210,7 +8211,7 @@ def _handle_chat_sync(handler, body): s.messages = _merge_display_messages_after_agent_result( _previous_messages, _previous_context_messages, - _restore_reasoning_metadata(_previous_messages, _result_messages), + _restore_display_reasoning_metadata(_previous_messages, _result_messages), msg, ) # Only auto-generate title when still default; preserves user renames diff --git a/api/streaming.py b/api/streaming.py index 2cb0daa2..560a8096 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -1932,6 +1932,45 @@ def _strip_native_image_parts_from_content(content): return clean_parts +def _content_has_reasoning_only_parts(content) -> bool: + if not isinstance(content, list) or not content: + return False + saw_reasoning = False + for part in content: + if not isinstance(part, dict): + continue + part_type = part.get('type') + if part_type in {'thinking', 'reasoning'}: + text = part.get('thinking') or part.get('reasoning') or part.get('text') or '' + if str(text).strip(): + saw_reasoning = True + continue + if part_type == 'text' and str(part.get('text') or part.get('content') or '').strip(): + return False + if part_type not in {'text', 'thinking', 'reasoning'}: + return False + return saw_reasoning + + +def _is_reasoning_only_assistant_message(msg) -> bool: + """Return True for display-only assistant Thinking entries. + + These entries keep partial Thinking cards visible after reload/cancel, but + they are not API-safe history: providers only see a blank assistant turn. + Visible assistant replies that also carry reasoning metadata are kept. + """ + if not isinstance(msg, dict) or msg.get('role') != 'assistant': + return False + if msg.get('tool_calls'): + return False + content = msg.get('content', '') + if _message_text(content).strip(): + return False + if str(msg.get('reasoning') or msg.get('reasoning_content') or '').strip(): + return True + return _content_has_reasoning_only_parts(content) + + def _sanitize_messages_for_api(messages, *, cfg: dict = None): """Return a deep copy of messages with only API-safe fields. @@ -1970,6 +2009,10 @@ def _sanitize_messages_for_api(messages, *, cfg: dict = None): for msg in messages: if not isinstance(msg, dict): continue + # Skip display-only Thinking entries. They are visible transcript + # metadata, not provider-facing assistant turns. + if _is_reasoning_only_assistant_message(msg): + continue # Skip persisted error markers — never send them to the LLM as prior context. if msg.get('_error'): continue @@ -2004,6 +2047,8 @@ def _api_safe_message_positions(messages): for idx, msg in enumerate(messages): if not isinstance(msg, dict): continue + if _is_reasoning_only_assistant_message(msg): + continue role = msg.get('role') if role == 'tool': tid = msg.get('tool_call_id') or '' @@ -2037,13 +2082,6 @@ def _restore_reasoning_metadata(previous_messages, updated_messages): return None return {k: v for k, v in msg.items() if k in _API_SAFE_MSG_KEYS and msg.get('role')} - def _reasoning_only_assistant(msg): - if not isinstance(msg, dict) or msg.get('role') != 'assistant' or not msg.get('reasoning'): - return False - if msg.get('tool_calls'): - return False - return not _message_text(msg.get('content')) - safe_pos = 0 while safe_pos < len(prev_safe): prev_idx, _ = prev_safe[safe_pos] @@ -2060,12 +2098,28 @@ def _restore_reasoning_metadata(previous_messages, updated_messages): safe_pos += 1 continue - if _reasoning_only_assistant(prev_msg): - updated_messages.insert(safe_pos, copy.deepcopy(prev_msg)) - safe_pos += 1 - continue - safe_pos += 1 + + return updated_messages + + +def _restore_display_reasoning_metadata(previous_messages, updated_messages): + """Restore display-only thinking rows for visible transcript persistence.""" + updated_messages = _restore_reasoning_metadata(previous_messages, updated_messages) + if not previous_messages or not updated_messages: + return updated_messages + prev_safe = _api_safe_message_positions(previous_messages) + safe_indices = {idx for idx, _ in prev_safe} + inserted_reasoning_only = 0 + for prev_idx, prev_msg in enumerate(previous_messages): + if prev_idx in safe_indices or not _is_reasoning_only_assistant_message(prev_msg): + continue + safe_pos = sum(1 for idx, _ in prev_safe if idx < prev_idx) + inserted_reasoning_only + existing = updated_messages[safe_pos] if safe_pos < len(updated_messages) else None + if isinstance(existing, dict) and _is_reasoning_only_assistant_message(existing): + continue + updated_messages.insert(safe_pos, copy.deepcopy(prev_msg)) + inserted_reasoning_only += 1 return updated_messages @@ -4035,7 +4089,7 @@ def _run_agent_streaming( s.messages = _merge_display_messages_after_agent_result( _previous_messages, _previous_context_messages, - _restore_reasoning_metadata(_previous_messages, _result_messages), + _restore_display_reasoning_metadata(_previous_messages, _result_messages), msg_text, ) # Strip XML tool-call blocks from assistant message content. diff --git a/static/ui.js b/static/ui.js index 587b595f..cb78ba09 100644 --- a/static/ui.js +++ b/static/ui.js @@ -5995,7 +5995,7 @@ function renderMessages(options){ if(derived.length) S.toolCalls=derived; } if(!S.busy){ - inner.querySelectorAll('.tool-call-group:not([data-compression-card]),.tool-card-row:not([data-compression-card])').forEach(el=>el.remove()); + inner.querySelectorAll('.tool-call-group:not([data-compression-card]),.tool-card-row:not([data-compression-card]),.agent-activity-thinking:not([data-live-thinking="1"])').forEach(el=>el.remove()); const byAssistant = {}; for(const tc of (S.toolCalls||[])){ const key = tc.assistant_msg_idx !== undefined ? tc.assistant_msg_idx : -1; diff --git a/tests/test_sprint49.py b/tests/test_sprint49.py index e49c9295..a76091dd 100644 --- a/tests/test_sprint49.py +++ b/tests/test_sprint49.py @@ -12,7 +12,7 @@ Covers: import pathlib import re -from api.streaming import _restore_reasoning_metadata +from api.streaming import _restore_reasoning_metadata, _sanitize_messages_for_api REPO = pathlib.Path(__file__).parent.parent @@ -108,3 +108,38 @@ def test_restore_reasoning_metadata_does_not_preserve_timestamp_for_changed_mess assert restored[0]["timestamp"] == 1713500000 assert "timestamp" not in restored[1] + + +def test_sanitize_messages_for_api_drops_reasoning_only_display_entries(): + messages = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "", "reasoning": "hidden chain", "_partial_tool_calls": [{"name": "read_file"}]}, + {"role": "assistant", "content": "visible answer", "reasoning": "display metadata"}, + {"role": "assistant", "content": [{"type": "reasoning", "text": "hidden"}]}, + ] + + sanitized = _sanitize_messages_for_api(messages) + + assert sanitized == [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "visible answer"}, + ] + + +def test_restore_reasoning_metadata_does_not_reinsert_reasoning_only_display_entries(): + previous_messages = [ + {"role": "user", "content": "hello", "timestamp": 1713500000}, + {"role": "assistant", "content": "", "reasoning": "old hidden thought", "timestamp": 1713500001}, + {"role": "assistant", "content": "visible answer", "reasoning": "answer thought", "timestamp": 1713500060}, + ] + updated_messages = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "visible answer"}, + ] + + restored = _restore_reasoning_metadata(previous_messages, updated_messages) + + assert len(restored) == 2 + assert restored[1]["content"] == "visible answer" + assert restored[1]["reasoning"] == "answer thought" + assert restored[1]["timestamp"] == 1713500060 diff --git a/tests/test_ui_tool_call_cleanup.py b/tests/test_ui_tool_call_cleanup.py index 7713287f..d19a82b0 100644 --- a/tests/test_ui_tool_call_cleanup.py +++ b/tests/test_ui_tool_call_cleanup.py @@ -278,6 +278,9 @@ class TestToolCallGroupingStatic: assert "anchorParent.insertBefore(thinkingNode, anchorRow)" in render_fn, ( "Settled Thinking cards should appear before their visible assistant process text." ) + assert ".agent-activity-thinking:not([data-live-thinking=\"1\"])" in render_fn, ( + "Settled rerenders must remove previously inserted Thinking activity rows before rebuilding." + ) assert "seg.insertAdjacentHTML('beforeend', _thinkingCardHtml(thinkingText))" in render_fn, ( "The non-simplified path should preserve standalone settled thinking cards." )