mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-24 10:40:16 +00:00
@@ -7,6 +7,8 @@
|
||||
- **PR #2536** (closes #2514, refs #2535) — Stop reasoning-only Thinking entries from being replayed into provider-facing history as blank assistant turns, preventing long WebUI sessions from accumulating duplicated stale Thinking blocks and inflated Activity/tool metadata on later turns. Settled compact Activity rerenders now also clear previously inserted Thinking rows before rebuilding the visible transcript.
|
||||
|
||||
- **PR #2520** by @OneFat3 (refs #2247) — Route archive extraction (`/api/upload/extract`) through the per-session attachment inbox (`_session_attachment_dir`) instead of hardcoded `Path(s.workspace)`, matching the single-file upload path. Extracted archives now land at `<attachment_root>/<session_id>/<archive_stem>/` so session deletion cleanup covers them and per-session isolation is preserved when `HERMES_WEBUI_ATTACHMENT_DIR` is configured.
|
||||
- Surface provider fallback and rate-limit lifecycle notices as auto-clearing fallback warnings in the streaming composer status, matching the frontend warning contract.
|
||||
|
||||
## [v0.51.90] — 2026-05-18 — Release BN (stage-383 — 10-PR full sweep batch — empty-gateway messaging history fix + previous-messaging-sessions setting + Kanban board switcher layout + UI/UX demo theme controls + Slice 3c queue/goal RFC gate + keyless custom endpoints + custom-provider remote model catalog parity + auto-compression elapsed timer + new-conversation cold-start guard + Kanban drag-drop detail open fix)
|
||||
|
||||
### Fixed
|
||||
|
||||
+32
-6
@@ -85,6 +85,22 @@ def _resolve_custom_provider_runtime_overrides(
|
||||
return resolved_provider, resolved_api_key, resolved_base_url
|
||||
|
||||
|
||||
def _is_fallback_lifecycle_message(kind: str, message: str) -> bool:
|
||||
"""Return True if an agent lifecycle status should surface as a fallback warning."""
|
||||
k = str(kind or '').strip().lower()
|
||||
m = str(message or '').strip().lower()
|
||||
return (
|
||||
k == 'lifecycle'
|
||||
and (
|
||||
'rate limited' in m
|
||||
or 'switching to fallback' in m
|
||||
or 'falling back' in m
|
||||
or 'fallback activated' in m
|
||||
or 'trying fallback' in m
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _prewarm_skill_tool_modules():
|
||||
"""Import tools.skills_tool and tools.skill_manager_tool outside any lock.
|
||||
|
||||
@@ -3066,7 +3082,12 @@ def _run_agent_streaming(
|
||||
logger.debug("Failed to put event to queue")
|
||||
|
||||
def _agent_status_callback(kind, message):
|
||||
"""Bridge Agent lifecycle compression status into WebUI SSE."""
|
||||
"""Bridge Agent lifecycle status into WebUI SSE.
|
||||
|
||||
Passes compression events as 'compressing' events and rate-limit/fallback
|
||||
events as 'warning' events so the frontend can surface them to the user.
|
||||
All other lifecycle messages are dropped silently.
|
||||
"""
|
||||
_message = str(message or '').strip()
|
||||
_kind = str(kind or '').strip().lower()
|
||||
if not _message:
|
||||
@@ -3081,12 +3102,17 @@ def _run_agent_streaming(
|
||||
or 'context too large' in _lower
|
||||
)
|
||||
)
|
||||
if not _is_compression_start:
|
||||
if _is_compression_start:
|
||||
put('compressing', {
|
||||
'session_id': session_id,
|
||||
'message': 'Auto-compressing context to continue...',
|
||||
})
|
||||
return
|
||||
put('compressing', {
|
||||
'session_id': session_id,
|
||||
'message': 'Auto-compressing context to continue...',
|
||||
})
|
||||
# Pass through rate-limit and fallback messages so the frontend can
|
||||
# show them as warnings via the existing messages.js 'warning' listener.
|
||||
_is_fallback_notice = _is_fallback_lifecycle_message(_kind, _message)
|
||||
if _is_fallback_notice:
|
||||
put('warning', {'type': 'fallback', 'message': _message})
|
||||
|
||||
# Initialised here (before any code that may raise) so the outer `finally`
|
||||
# block can safely check `if _checkpoint_stop is not None` even when an
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from pathlib import Path
|
||||
|
||||
from api.streaming import _is_fallback_lifecycle_message
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
|
||||
@@ -38,7 +40,7 @@ def test_auto_compression_running_sse_uses_active_session_running_card():
|
||||
assert "message:d.message||'Auto-compressing context...'" in block
|
||||
|
||||
|
||||
def test_auto_compression_running_sse_is_emitted_from_agent_lifecycle_status():
|
||||
def test_agent_status_callback_emits_compressing_and_warning_events():
|
||||
src = _read("api/streaming.py")
|
||||
start = src.find("def _agent_status_callback")
|
||||
assert start != -1, "agent status callback bridge not found"
|
||||
@@ -46,6 +48,7 @@ def test_auto_compression_running_sse_is_emitted_from_agent_lifecycle_status():
|
||||
assert end != -1, "status callback block end marker not found"
|
||||
block = src[start:end]
|
||||
|
||||
# compressing events for compression lifecycle notices
|
||||
assert "put('compressing'" in block
|
||||
assert "'session_id': session_id" in block
|
||||
assert "'message': 'Auto-compressing context to continue...'" in block
|
||||
@@ -53,11 +56,47 @@ def test_auto_compression_running_sse_is_emitted_from_agent_lifecycle_status():
|
||||
assert "'compressing'" in block
|
||||
assert "'compacting context'" in block
|
||||
assert "'context too large'" in block
|
||||
|
||||
# warning events with type:fallback for rate-limit/fallback lifecycle notices
|
||||
assert "put('warning'" in block
|
||||
assert "'type': 'fallback'" in block
|
||||
assert "'rate limited'" in src
|
||||
assert "'switching to fallback'" in src
|
||||
assert "'falling back'" in src
|
||||
assert "'fallback activated'" in src
|
||||
assert "'trying fallback'" in src
|
||||
|
||||
# Verify callback is wired to agent
|
||||
assert "'status_callback' in _agent_params" in src
|
||||
assert "_agent_kwargs['status_callback'] = _agent_status_callback" in src
|
||||
assert "agent.status_callback = _agent_kwargs.get('status_callback')" in src
|
||||
|
||||
|
||||
def test_agent_status_callback_wiring():
|
||||
src = _read("api/streaming.py")
|
||||
assert "_agent_status_callback" in src
|
||||
assert "_agent_kwargs['status_callback'] = _agent_status_callback" in src
|
||||
|
||||
|
||||
def test_fallback_lifecycle_message_predicate_matches_agent_emitters():
|
||||
assert _is_fallback_lifecycle_message(
|
||||
"lifecycle",
|
||||
"Rate limited — switching to fallback provider...",
|
||||
)
|
||||
assert _is_fallback_lifecycle_message(
|
||||
"lifecycle",
|
||||
"Non-retryable error (HTTP 500) — trying fallback...",
|
||||
)
|
||||
assert not _is_fallback_lifecycle_message(
|
||||
"tool",
|
||||
"Rate limited — switching to fallback provider...",
|
||||
)
|
||||
assert not _is_fallback_lifecycle_message(
|
||||
"lifecycle",
|
||||
"Auto-compressing context to continue...",
|
||||
)
|
||||
|
||||
|
||||
def test_auto_compression_completion_transition_is_preserved_after_running_listener():
|
||||
src = _read("static/messages.js")
|
||||
compressing_idx = src.find("source.addEventListener('compressing'")
|
||||
|
||||
Reference in New Issue
Block a user