From 42b97d15f6140dc9eadfdbaf81dada1cbd7b57dd Mon Sep 17 00:00:00 2001 From: Hermes Agent Date: Mon, 18 May 2026 10:44:36 -0500 Subject: [PATCH] fix: clear fallback streaming warnings --- CHANGELOG.md | 4 +++ api/streaming.py | 38 +++++++++++++++++++++----- tests/test_auto_compression_card.py | 41 ++++++++++++++++++++++++++++- 3 files changed, 76 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5fa89b5..c20dc208 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- Surface provider fallback and rate-limit lifecycle notices as auto-clearing fallback warnings in the streaming composer status, matching the frontend warning contract. + ## [v0.51.90] — 2026-05-18 — Release BN (stage-383 — 10-PR full sweep batch — empty-gateway messaging history fix + previous-messaging-sessions setting + Kanban board switcher layout + UI/UX demo theme controls + Slice 3c queue/goal RFC gate + keyless custom endpoints + custom-provider remote model catalog parity + auto-compression elapsed timer + new-conversation cold-start guard + Kanban drag-drop detail open fix) ### Fixed diff --git a/api/streaming.py b/api/streaming.py index 2cb0daa2..c245087a 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -85,6 +85,22 @@ def _resolve_custom_provider_runtime_overrides( return resolved_provider, resolved_api_key, resolved_base_url +def _is_fallback_lifecycle_message(kind: str, message: str) -> bool: + """Return True if an agent lifecycle status should surface as a fallback warning.""" + k = str(kind or '').strip().lower() + m = str(message or '').strip().lower() + return ( + k == 'lifecycle' + and ( + 'rate limited' in m + or 'switching to fallback' in m + or 'falling back' in m + or 'fallback activated' in m + or 'trying fallback' in m + ) + ) + + def _prewarm_skill_tool_modules(): """Import tools.skills_tool and tools.skill_manager_tool outside any lock. @@ -3012,7 +3028,12 @@ def _run_agent_streaming( logger.debug("Failed to put event to queue") def _agent_status_callback(kind, message): - """Bridge Agent lifecycle compression status into WebUI SSE.""" + """Bridge Agent lifecycle status into WebUI SSE. + + Passes compression events as 'compressing' events and rate-limit/fallback + events as 'warning' events so the frontend can surface them to the user. + All other lifecycle messages are dropped silently. + """ _message = str(message or '').strip() _kind = str(kind or '').strip().lower() if not _message: @@ -3027,12 +3048,17 @@ def _run_agent_streaming( or 'context too large' in _lower ) ) - if not _is_compression_start: + if _is_compression_start: + put('compressing', { + 'session_id': session_id, + 'message': 'Auto-compressing context to continue...', + }) return - put('compressing', { - 'session_id': session_id, - 'message': 'Auto-compressing context to continue...', - }) + # Pass through rate-limit and fallback messages so the frontend can + # show them as warnings via the existing messages.js 'warning' listener. + _is_fallback_notice = _is_fallback_lifecycle_message(_kind, _message) + if _is_fallback_notice: + put('warning', {'type': 'fallback', 'message': _message}) # Initialised here (before any code that may raise) so the outer `finally` # block can safely check `if _checkpoint_stop is not None` even when an diff --git a/tests/test_auto_compression_card.py b/tests/test_auto_compression_card.py index 5bb2dd30..8e4af44f 100644 --- a/tests/test_auto_compression_card.py +++ b/tests/test_auto_compression_card.py @@ -1,5 +1,7 @@ from pathlib import Path +from api.streaming import _is_fallback_lifecycle_message + ROOT = Path(__file__).resolve().parents[1] @@ -38,7 +40,7 @@ def test_auto_compression_running_sse_uses_active_session_running_card(): assert "message:d.message||'Auto-compressing context...'" in block -def test_auto_compression_running_sse_is_emitted_from_agent_lifecycle_status(): +def test_agent_status_callback_emits_compressing_and_warning_events(): src = _read("api/streaming.py") start = src.find("def _agent_status_callback") assert start != -1, "agent status callback bridge not found" @@ -46,6 +48,7 @@ def test_auto_compression_running_sse_is_emitted_from_agent_lifecycle_status(): assert end != -1, "status callback block end marker not found" block = src[start:end] + # compressing events for compression lifecycle notices assert "put('compressing'" in block assert "'session_id': session_id" in block assert "'message': 'Auto-compressing context to continue...'" in block @@ -53,11 +56,47 @@ def test_auto_compression_running_sse_is_emitted_from_agent_lifecycle_status(): assert "'compressing'" in block assert "'compacting context'" in block assert "'context too large'" in block + + # warning events with type:fallback for rate-limit/fallback lifecycle notices + assert "put('warning'" in block + assert "'type': 'fallback'" in block + assert "'rate limited'" in src + assert "'switching to fallback'" in src + assert "'falling back'" in src + assert "'fallback activated'" in src + assert "'trying fallback'" in src + + # Verify callback is wired to agent assert "'status_callback' in _agent_params" in src assert "_agent_kwargs['status_callback'] = _agent_status_callback" in src assert "agent.status_callback = _agent_kwargs.get('status_callback')" in src +def test_agent_status_callback_wiring(): + src = _read("api/streaming.py") + assert "_agent_status_callback" in src + assert "_agent_kwargs['status_callback'] = _agent_status_callback" in src + + +def test_fallback_lifecycle_message_predicate_matches_agent_emitters(): + assert _is_fallback_lifecycle_message( + "lifecycle", + "Rate limited — switching to fallback provider...", + ) + assert _is_fallback_lifecycle_message( + "lifecycle", + "Non-retryable error (HTTP 500) — trying fallback...", + ) + assert not _is_fallback_lifecycle_message( + "tool", + "Rate limited — switching to fallback provider...", + ) + assert not _is_fallback_lifecycle_message( + "lifecycle", + "Auto-compressing context to continue...", + ) + + def test_auto_compression_completion_transition_is_preserved_after_running_listener(): src = _read("static/messages.js") compressing_idx = src.find("source.addEventListener('compressing'")