From 40f69a2b75c06379cd04cc0cb9cc975d7d4244c9 Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sat, 16 May 2026 11:53:13 +0800 Subject: [PATCH 1/4] Keep recovered pending turns in context --- CHANGELOG.md | 2 ++ api/models.py | 51 ++++++++++++++++++++++------ tests/test_session_sidecar_repair.py | 38 +++++++++++++++++++++ 3 files changed, 80 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bf05c1b..5cd3aab4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,6 +59,8 @@ ### Fixed +- Stale stream recovery now keeps a recovered pending user turn in the model context as well as the visible transcript. Before this fix, a server restart during an in-flight turn could restore the user's message in WebUI while omitting it from `context_messages`, so the next agent turn could forget a prompt that was visibly present just above it. + - **PR #2315** by @Michaelyklam (closes #2305, refs #749) — WebUI profile creation now seeds bundled profile skills for newly-created non-cloned profiles, matching the CLI's `hermes profile create` behaviour. Pre-fix, creating a profile via Settings → New Profile (without checking "Clone from active profile") left the profile's `skills/` directory empty, which was inconsistent with CLI-created profiles that get the full bundled-skills overlay. The fix calls `seed_profile_skills(profile_path, quiet=True)` after `profile_path.mkdir()` when `clone_from is None`. Cloned profiles still inherit skills from their source — they don't get a second bundled-skills overlay. Seed failures (e.g. `hermes_cli` unavailable in Docker fallback) are logged as warnings, not fatal — profile creation still succeeds. - **PR #2317** by @Michaelyklam (refs #2312 follow-up #2) — Appearance boot reconciliation now treats explicit `light`, `dark`, and `system` localStorage theme values as user selections when a prior Settings autosave failed. Pre-fix, the predicate `lsHasExplicitTheme = lsTheme === 'system'` only treated 'system' as explicit, so a user who picked `light` on a server defaulted to `dark` (or vice versa) with a failed autosave still reverted to the server default on refresh. Now broadened to `['system','light','dark'].includes(lsTheme)`. Skin handling was already correct (`lsSkin !== 'default'`). Closes follow-up item #2 from the v0.51.66 review (#2312). diff --git a/api/models.py b/api/models.py index 681df1c5..f448f7a3 100644 --- a/api/models.py +++ b/api/models.py @@ -203,6 +203,42 @@ def _active_stream_ids(): return set(STREAMS.keys()) +def _append_recovered_turn_to_context(session, recovered: dict) -> None: + context_messages = getattr(session, 'context_messages', None) + if not isinstance(context_messages, list) or not context_messages: + return + recovered_text = " ".join(str(recovered.get('content') or '').split()) + if recovered_text: + for existing in reversed(context_messages[-8:]): + if not isinstance(existing, dict) or existing.get('role') != 'user': + continue + existing_text = " ".join(str(existing.get('content') or '').split()) + if existing_text == recovered_text: + return + context_entry = {k: v for k, v in recovered.items() if k != 'timestamp'} + context_messages.append(context_entry) + + +def _append_recovered_pending_turn(session, *, timestamp: int | None = None) -> dict | None: + pending_text = str(session.pending_user_message or '') + if not pending_text: + return None + recovered_ts = int(time.time()) + if isinstance(timestamp, (int, float)) and timestamp > 0: + recovered_ts = int(timestamp) + recovered: dict = { + 'role': 'user', + 'content': session.pending_user_message, + 'timestamp': recovered_ts, + '_recovered': True, + } + if session.pending_attachments: + recovered['attachments'] = list(session.pending_attachments) + session.messages.append(recovered) + _append_recovered_turn_to_context(session, recovered) + return recovered + + def _is_streaming_session(active_stream_id, active_stream_ids): return bool(active_stream_id and active_stream_id in active_stream_ids) @@ -695,15 +731,16 @@ def _apply_core_sync_or_error_marker( if isinstance(session.pending_started_at, (int, float)) and session.pending_started_at > 0: _recovered_ts = int(session.pending_started_at) if not _already_checkpointed: + _append_recovered_pending_turn(session, timestamp=_recovered_ts) + else: recovered = { 'role': 'user', 'content': session.pending_user_message, - 'timestamp': _recovered_ts, '_recovered': True, } if session.pending_attachments: recovered['attachments'] = list(session.pending_attachments) - session.messages.append(recovered) + _append_recovered_turn_to_context(session, recovered) session.active_stream_id = None session.pending_user_message = None session.pending_attachments = [] @@ -752,15 +789,7 @@ def _apply_core_sync_or_error_marker( _recovered_ts = int(time.time()) if isinstance(session.pending_started_at, (int, float)) and session.pending_started_at > 0: _recovered_ts = int(session.pending_started_at) - recovered: dict = { - 'role': 'user', - 'content': session.pending_user_message, - 'timestamp': _recovered_ts, - '_recovered': True, - } - if session.pending_attachments: - recovered['attachments'] = list(session.pending_attachments) - session.messages.append(recovered) + _append_recovered_pending_turn(session, timestamp=_recovered_ts) session.active_stream_id = None session.pending_user_message = None session.pending_attachments = [] diff --git a/tests/test_session_sidecar_repair.py b/tests/test_session_sidecar_repair.py index e95efafb..4d575125 100644 --- a/tests/test_session_sidecar_repair.py +++ b/tests/test_session_sidecar_repair.py @@ -257,6 +257,44 @@ class TestDraftRecovery: f"got {user_msgs[0]['timestamp']}" ) + def test_pending_message_recovered_into_context_messages(self, hermes_home, monkeypatch): + """A recovered pending prompt must remain visible to the next agent turn. + + Sessions that have been auto-compressed feed context_messages to the + model, not the full display transcript. If stale-stream repair appends + the recovered user prompt only to messages, the user can see the prompt + in WebUI but the next agent turn cannot. + """ + s = _make_session( + messages=[{"role": "user", "content": "older visible turn"}], + context_messages=[ + {"role": "user", "content": "older context turn"}, + {"role": "assistant", "content": "older context answer"}, + ], + ) + s.pending_user_message = "Clip this article https://example.com/post" + s.active_stream_id = "stream_1" + lock = config._get_session_agent_lock(s.session_id) + + with lock: + core_path = hermes_home / "sessions" / f"session_{s.session_id}.json" + result = _apply_core_sync_or_error_marker( + s, core_path, stream_id_for_recheck="stream_1", + ) + + assert result is True + assert any( + m.get("role") == "user" + and m.get("content") == "Clip this article https://example.com/post" + and m.get("_recovered") is True + for m in s.messages + ) + assert any( + m.get("role") == "user" + and m.get("content") == "Clip this article https://example.com/post" + for m in s.context_messages + ), "Recovered pending user turn must be included in model context." + def test_error_marker_no_preserved_as_draft(self, hermes_home, monkeypatch): """Error marker text must NOT say 'preserved as a draft'.""" s = _make_stale_session() From dd5f3ff9b5e3b2db392a77f664fa4ff25a828da4 Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sat, 16 May 2026 10:52:33 +0800 Subject: [PATCH 2/4] Suppress interim text echoes in Thinking cards --- CHANGELOG.md | 2 ++ static/messages.js | 23 +++++++++++++++++++++-- static/ui.js | 13 +++++++++++++ tests/test_ui_tool_call_cleanup.py | 27 +++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bf05c1b..0069a1b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,8 @@ ### Fixed +- **PR TBD** by @franksong2702 — Thinking cards now suppress exact snippets that are already shown as user-visible interim assistant text, avoiding duplicated progress lines when an agent emits the same sentence through both reasoning and interim-assistant callbacks. + - **PR #2322** by @Michaelyklam (refs #2271) — LAN Ollama models selected from endpoint-discovered `custom:-` / `custom::` picker entries now route through the configured `ollama` provider and base URL instead of surfacing a missing `CUSTOM_*_API_KEY` error. The picker still surfaces endpoint-discovered entries; the fix is to recognize them as UI routing hints matching the configured local-server base URL and resolve them via the actual `ollama` provider. - **PR #2326** by @Michaelyklam (closes #2232) — Legacy `hermes` CLI toolset alias is now normalized to `hermes-cli` + `hermes-api-server` when WebUI resolves CLI toolsets from shared Hermes config. Modern Hermes Agent exposes the composite under those two names; older configs that still contain the legacy `hermes` toolset name no longer surface as "unknown toolset" warnings. diff --git a/static/messages.js b/static/messages.js index 2e037943..fea980df 100644 --- a/static/messages.js +++ b/static/messages.js @@ -432,6 +432,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ let assistantText=''; let reasoningText=''; let liveReasoningText=''; + let visibleInterimSnippets=[]; let _latestGoalStatus=null; let _pendingGoalContinuation=null; let assistantRow=null; @@ -527,6 +528,19 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ function _closeSource(){ closeLiveStream(activeSid, streamId); } + function _stripVisibleAssistantEchoFromThinking(text, snippets){ + let out=String(text||''); + (Array.isArray(snippets)?snippets:[]).forEach(snippet=>{ + const visible=String(snippet||'').trim(); + if(visible.length<20) return; + out=out.split(visible).join(''); + }); + return out.trim(); + } + function _liveThinkingText(){ + const clean=_stripVisibleAssistantEchoFromThinking(liveReasoningText, visibleInterimSnippets); + return clean || 'Thinking…'; + } function syncInflightAssistantMessage(){ const inflight=INFLIGHT[activeSid]; if(!inflight) return; @@ -1207,9 +1221,14 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ return; } assistantText+=visible; + visibleInterimSnippets.push(visible); syncInflightAssistantMessage(); if(!S.session||S.session.session_id!==activeSid) return; const parsed=_parseStreamState(); + if(window._showThinking!==false){ + if(typeof updateThinking==='function') updateThinking(_liveThinkingText()); + else appendThinking(_liveThinkingText()); + } if(String((parsed&&parsed.displayText)||'').trim()||assistantRow) ensureAssistantRow(); _scheduleRender(); }); @@ -1226,8 +1245,8 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ // finalizeThinkingCard(). The old rAF-only path caused a race where // the thinking row was still a spinner when finalized. if(window._showThinking!==false){ - if(typeof updateThinking==='function') updateThinking(liveReasoningText||'Thinking…'); - else appendThinking(liveReasoningText); + if(typeof updateThinking==='function') updateThinking(_liveThinkingText()); + else appendThinking(_liveThinkingText()); } _scheduleRender(); }); diff --git a/static/ui.js b/static/ui.js index 949bf67f..0704cc33 100644 --- a/static/ui.js +++ b/static/ui.js @@ -2308,6 +2308,16 @@ function _sanitizeThinkingDisplayText(text){ return stripped.trim(); } +function _stripVisibleAssistantEchoFromThinking(thinkingText, visibleText){ + let out=String(thinkingText||''); + const visible=String(visibleText||''); + if(!out||!visible) return out.trim(); + visible.split(/\n{2,}/).map(s=>s.trim()).filter(s=>s.length>=20).forEach(snippet=>{ + out=out.split(snippet).join(''); + }); + return out.trim(); +} + function renderMd(raw){ let s=(raw||'').replace(/\r\n/g,'\n').replace(/\r/g,'\n'); // ── Entity decode: must run FIRST so > lines become > for the blockquote @@ -5402,6 +5412,9 @@ function renderMessages(options){ content='**Error:** No response received after context compression. Please retry.'; } const displayContent=isUser?_stripWorkspaceDisplayPrefix(content):content; + if(thinkingText&&!isUser){ + thinkingText=_stripVisibleAssistantEchoFromThinking(thinkingText, displayContent); + } const isLastAssistant=!isUser&&vi===renderVisWithIdx.length-1; const nextRendered=renderVisWithIdx[vi+1]; const isTurnFinalAssistant=!isUser&&(!nextRendered||!nextRendered.m||nextRendered.m.role!=='assistant'); diff --git a/tests/test_ui_tool_call_cleanup.py b/tests/test_ui_tool_call_cleanup.py index b350379b..2dc06ba9 100644 --- a/tests/test_ui_tool_call_cleanup.py +++ b/tests/test_ui_tool_call_cleanup.py @@ -11,6 +11,7 @@ REPO = pathlib.Path(__file__).parent.parent UI_JS = (REPO / "static" / "ui.js").read_text(encoding="utf-8") BOOT_JS = (REPO / "static" / "boot.js").read_text(encoding="utf-8") CSS = (REPO / "static" / "style.css").read_text(encoding="utf-8") +MESSAGES_JS = (REPO / "static" / "messages.js").read_text(encoding="utf-8") def _function_body(src: str, name: str) -> str: @@ -233,6 +234,32 @@ class TestToolCallGroupingStatic: "Readable progress must not reintroduce the noisy secondary tool-name list." ) + def test_live_thinking_suppresses_visible_interim_echoes(self): + interim_match = re.search(r"source\.addEventListener\('interim_assistant',e=>\{(.*?)\n\s*\}\);", MESSAGES_JS, re.S) + assert interim_match, "interim_assistant listener not found" + interim_fn = interim_match.group(1) + live_thinking_fn = _function_body(MESSAGES_JS, "_liveThinkingText") + + assert "visibleInterimSnippets.push(visible)" in interim_fn, ( + "Visible interim commentary should be remembered so the live Thinking card does not echo it." + ) + assert "_stripVisibleAssistantEchoFromThinking" in live_thinking_fn, ( + "Live Thinking text should suppress exact visible interim commentary echoes." + ) + + def test_settled_thinking_suppresses_visible_assistant_echoes(self): + render_fn = _function_body(UI_JS, "renderMessages") + helper = _function_body(UI_JS, "_stripVisibleAssistantEchoFromThinking") + assert "_stripVisibleAssistantEchoFromThinking(thinkingText, displayContent)" in render_fn, ( + "Settled Thinking cards should not repeat text already rendered as visible assistant content." + ) + assert "s.length>=20" in helper, ( + "Thinking echo suppression should ignore tiny snippets to avoid over-stripping reasoning." + ) + assert "out.split(snippet).join('')" in helper, ( + "Thinking echo suppression should remove exact visible assistant snippets from reasoning display." + ) + def test_tools_and_thinking_share_one_collapsed_activity_dropdown(self): ui_min = re.sub(r"\s+", "", UI_JS) assert "functionensureActivityGroup(" in ui_min, ( From d94320b4bf7529aa495b8055d55ed5902c58fa79 Mon Sep 17 00:00:00 2001 From: Frank Song Date: Sat, 16 May 2026 10:58:59 +0800 Subject: [PATCH 3/4] Avoid duplicate Thinking echo helper names --- static/messages.js | 4 ++-- tests/test_ui_tool_call_cleanup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/static/messages.js b/static/messages.js index fea980df..d21d1144 100644 --- a/static/messages.js +++ b/static/messages.js @@ -528,7 +528,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ function _closeSource(){ closeLiveStream(activeSid, streamId); } - function _stripVisibleAssistantEchoFromThinking(text, snippets){ + function _stripLiveVisibleAssistantEchoFromThinking(text, snippets){ let out=String(text||''); (Array.isArray(snippets)?snippets:[]).forEach(snippet=>{ const visible=String(snippet||'').trim(); @@ -538,7 +538,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ return out.trim(); } function _liveThinkingText(){ - const clean=_stripVisibleAssistantEchoFromThinking(liveReasoningText, visibleInterimSnippets); + const clean=_stripLiveVisibleAssistantEchoFromThinking(liveReasoningText, visibleInterimSnippets); return clean || 'Thinking…'; } function syncInflightAssistantMessage(){ diff --git a/tests/test_ui_tool_call_cleanup.py b/tests/test_ui_tool_call_cleanup.py index 2dc06ba9..29fe6457 100644 --- a/tests/test_ui_tool_call_cleanup.py +++ b/tests/test_ui_tool_call_cleanup.py @@ -243,7 +243,7 @@ class TestToolCallGroupingStatic: assert "visibleInterimSnippets.push(visible)" in interim_fn, ( "Visible interim commentary should be remembered so the live Thinking card does not echo it." ) - assert "_stripVisibleAssistantEchoFromThinking" in live_thinking_fn, ( + assert "_stripLiveVisibleAssistantEchoFromThinking" in live_thinking_fn, ( "Live Thinking text should suppress exact visible interim commentary echoes." ) From 7516c9591fb45fd05a660934c3aefa3ccf8e0f66 Mon Sep 17 00:00:00 2001 From: Hermes Agent Date: Sat, 16 May 2026 04:17:51 +0000 Subject: [PATCH 4/4] stage-365: stamp CHANGELOG v0.51.72 (2-PR safe-lane batch) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v0.51.72 — Release AV: - PR #2354 (fixes #2353) — Recovered pending turn context fix - PR #2348 (fixes #2346) — Thinking card interim-text echo suppression Hit Pitfall 6 again: contributor branches predated v0.51.70+71 so their CHANGELOG entries landed in pre-existing v0.51.68/69 sections after rebase. Manually moved entries to a new v0.51.72 section above v0.51.71, with proper PR # attribution (#2348 was 'PR TBD' on the contributor branch). --- CHANGELOG.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fe4ebc9..a2679a4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ ## [Unreleased] +## [v0.51.72] — 2026-05-16 — Release AV (stage-365 — 2-PR safe-lane batch — #2354 recovered pending turn context fix + #2348 Thinking card interim-text echo suppression) + +### Fixed + +- **PR #2354** by @franksong2702 (fixes #2353) — Stale stream recovery now keeps a recovered pending user turn in the model context (`context_messages`) as well as the visible transcript. Pre-fix, a server restart during an in-flight turn could restore the user's message in WebUI while omitting it from `context_messages`, so the next agent turn could forget a prompt that was visibly present just above it. The repair path now appends the recovered user turn to both surfaces with 8-message lookback dedup so already-checkpointed entries are not duplicated. + +- **PR #2348** by @franksong2702 (fixes #2346) — Thinking cards now suppress exact snippets that are already shown as user-visible interim assistant text, avoiding duplicated progress lines when an agent emits the same sentence through both reasoning and interim-assistant callbacks. Tracks `_liveThinkingText` during the live stream to strip the visible echo from the live Thinking card display; applies the same suppression in the settled-transcript path so reload/session-switch sees the cleaned-up view too. + ## [v0.51.71] — 2026-05-16 — Release AU (stage-364 — 3-PR batch — #2349 stale-stream cleanup non-touching + #2343 profiles vs workspaces help card + #2283 run-event journal replay [refs #1925 RFC slice 1] — with Opus-caught replay double-render fix) ### Added @@ -36,8 +44,6 @@ ### Fixed -- **PR TBD** by @franksong2702 — Thinking cards now suppress exact snippets that are already shown as user-visible interim assistant text, avoiding duplicated progress lines when an agent emits the same sentence through both reasoning and interim-assistant callbacks. - - **PR #2322** by @Michaelyklam (refs #2271) — LAN Ollama models selected from endpoint-discovered `custom:-` / `custom::` picker entries now route through the configured `ollama` provider and base URL instead of surfacing a missing `CUSTOM_*_API_KEY` error. The picker still surfaces endpoint-discovered entries; the fix is to recognize them as UI routing hints matching the configured local-server base URL and resolve them via the actual `ollama` provider. - **PR #2326** by @Michaelyklam (closes #2232) — Legacy `hermes` CLI toolset alias is now normalized to `hermes-cli` + `hermes-api-server` when WebUI resolves CLI toolsets from shared Hermes config. Modern Hermes Agent exposes the composite under those two names; older configs that still contain the legacy `hermes` toolset name no longer surface as "unknown toolset" warnings. @@ -61,8 +67,6 @@ ### Fixed -- Stale stream recovery now keeps a recovered pending user turn in the model context as well as the visible transcript. Before this fix, a server restart during an in-flight turn could restore the user's message in WebUI while omitting it from `context_messages`, so the next agent turn could forget a prompt that was visibly present just above it. - - **PR #2315** by @Michaelyklam (closes #2305, refs #749) — WebUI profile creation now seeds bundled profile skills for newly-created non-cloned profiles, matching the CLI's `hermes profile create` behaviour. Pre-fix, creating a profile via Settings → New Profile (without checking "Clone from active profile") left the profile's `skills/` directory empty, which was inconsistent with CLI-created profiles that get the full bundled-skills overlay. The fix calls `seed_profile_skills(profile_path, quiet=True)` after `profile_path.mkdir()` when `clone_from is None`. Cloned profiles still inherit skills from their source — they don't get a second bundled-skills overlay. Seed failures (e.g. `hermes_cli` unavailable in Docker fallback) are logged as warnings, not fatal — profile creation still succeeds. - **PR #2317** by @Michaelyklam (refs #2312 follow-up #2) — Appearance boot reconciliation now treats explicit `light`, `dark`, and `system` localStorage theme values as user selections when a prior Settings autosave failed. Pre-fix, the predicate `lsHasExplicitTheme = lsTheme === 'system'` only treated 'system' as explicit, so a user who picked `light` on a server defaulted to `dark` (or vice versa) with a failed autosave still reverted to the server default on refresh. Now broadened to `['system','light','dark'].includes(lsTheme)`. Skin handling was already correct (`lsSkin !== 'default'`). Closes follow-up item #2 from the v0.51.66 review (#2312).