mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-26 03:30:36 +00:00
444 lines
18 KiB
Python
444 lines
18 KiB
Python
"""Regression tests for #1361 — Stop/Cancel discards already-streamed content.
|
|
|
|
Three distinct data-loss paths on cancel:
|
|
|
|
§A Reasoning text accumulated in a thread-local `_reasoning_text` is never
|
|
visible to cancel_stream(), so it's lost on cancel.
|
|
§B Live tool calls accumulated in thread-local `_live_tool_calls` are lost
|
|
on cancel — only STREAM_PARTIAL_TEXT is captured.
|
|
§C When the entire streamed output is reasoning (no visible tokens),
|
|
_stripped is empty after regex cleanup, so NO partial assistant message
|
|
is appended — only the *Task cancelled.* marker survives.
|
|
|
|
All three fix the same "tokens-paid-for-data-loss" class of bug.
|
|
"""
|
|
|
|
import pathlib
|
|
import queue
|
|
import re
|
|
import threading
|
|
from unittest.mock import Mock, patch
|
|
|
|
import pytest
|
|
|
|
import api.config as config
|
|
import api.models as models
|
|
import api.streaming as streaming
|
|
from api.models import Session
|
|
from api.streaming import cancel_stream
|
|
|
|
REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
|
|
|
|
|
|
# ── Fixtures ────────────────────────────────────────────────────────────────
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _isolate_session_dir(tmp_path, monkeypatch):
|
|
"""Redirect SESSION_DIR / SESSION_INDEX_FILE to an isolated temp dir."""
|
|
session_dir = tmp_path / "sessions"
|
|
session_dir.mkdir()
|
|
index_file = session_dir / "_index.json"
|
|
monkeypatch.setattr(models, "SESSION_DIR", session_dir)
|
|
monkeypatch.setattr(models, "SESSION_INDEX_FILE", index_file)
|
|
models.SESSIONS.clear()
|
|
yield
|
|
models.SESSIONS.clear()
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _isolate_stream_state():
|
|
"""Clear all shared streaming dicts before/after each test."""
|
|
config.STREAMS.clear()
|
|
config.CANCEL_FLAGS.clear()
|
|
config.AGENT_INSTANCES.clear()
|
|
config.STREAM_PARTIAL_TEXT.clear()
|
|
# New shared dicts for §A and §B
|
|
if hasattr(config, 'STREAM_REASONING_TEXT'):
|
|
config.STREAM_REASONING_TEXT.clear()
|
|
if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'):
|
|
config.STREAM_LIVE_TOOL_CALLS.clear()
|
|
yield
|
|
config.STREAMS.clear()
|
|
config.CANCEL_FLAGS.clear()
|
|
config.AGENT_INSTANCES.clear()
|
|
config.STREAM_PARTIAL_TEXT.clear()
|
|
if hasattr(config, 'STREAM_REASONING_TEXT'):
|
|
config.STREAM_REASONING_TEXT.clear()
|
|
if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'):
|
|
config.STREAM_LIVE_TOOL_CALLS.clear()
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _isolate_agent_locks():
|
|
config.SESSION_AGENT_LOCKS.clear()
|
|
yield
|
|
config.SESSION_AGENT_LOCKS.clear()
|
|
|
|
|
|
def _make_session(session_id="cancel_sid_1361",
|
|
pending_msg="Help me debug this",
|
|
messages=None):
|
|
"""Build a session in mid-stream state."""
|
|
s = Session(
|
|
session_id=session_id,
|
|
title="Test Session",
|
|
messages=messages or [],
|
|
)
|
|
s.pending_user_message = pending_msg
|
|
s.pending_attachments = []
|
|
s.pending_started_at = None
|
|
s.active_stream_id = "stream_1361"
|
|
s.save()
|
|
models.SESSIONS[session_id] = s
|
|
return s
|
|
|
|
|
|
def _setup_cancel_state(session_id, stream_id="stream_1361"):
|
|
"""Wire up STREAMS/CANCEL_FLAGS/AGENT_INSTANCES for cancel_stream()."""
|
|
config.STREAMS[stream_id] = queue.Queue()
|
|
config.CANCEL_FLAGS[stream_id] = threading.Event()
|
|
mock_agent = Mock()
|
|
mock_agent.session_id = session_id
|
|
mock_agent.interrupt = Mock()
|
|
config.AGENT_INSTANCES[stream_id] = mock_agent
|
|
return stream_id, mock_agent
|
|
|
|
|
|
# ── §A: Reasoning text lost on cancel ───────────────────────────────────────
|
|
|
|
class TestCancelPreservesReasoningText:
|
|
"""§A: _reasoning_text is thread-local and invisible to cancel_stream().
|
|
|
|
After fix: reasoning text should be persisted in a shared dict
|
|
(STREAM_REASONING_TEXT) keyed by stream_id, and cancel_stream()
|
|
should append it as a 'reasoning' field on the partial assistant message.
|
|
"""
|
|
|
|
def test_cancel_with_reasoning_only_preserves_reasoning(self):
|
|
"""Cancel during reasoning phase (no visible tokens) should persist reasoning."""
|
|
sid = "test_1361_a1"
|
|
stream_id = "stream_a1"
|
|
s = _make_session(session_id=sid)
|
|
_setup_cancel_state(sid, stream_id)
|
|
|
|
# Simulate: reasoning was accumulated but no visible tokens
|
|
reasoning = "Let me think about this step by step..."
|
|
config.STREAM_PARTIAL_TEXT[stream_id] = "" # no visible tokens
|
|
|
|
if hasattr(config, 'STREAM_REASONING_TEXT'):
|
|
config.STREAM_REASONING_TEXT[stream_id] = reasoning
|
|
|
|
cancel_stream(stream_id)
|
|
|
|
# Reload and check
|
|
s2 = models.SESSIONS[sid]
|
|
msgs = s2.messages
|
|
# There should be a partial assistant message with reasoning
|
|
assistant_msgs = [m for m in msgs if isinstance(m, dict) and m.get('role') == 'assistant']
|
|
has_reasoning = any(m.get('reasoning') for m in assistant_msgs)
|
|
assert has_reasoning, \
|
|
f"Expected reasoning field on partial assistant msg after cancel. Got messages: {assistant_msgs}"
|
|
|
|
def test_cancel_with_reasoning_and_partial_tokens_preserves_both(self):
|
|
"""Cancel mid-stream with both reasoning and some visible tokens."""
|
|
sid = "test_1361_a2"
|
|
stream_id = "stream_a2"
|
|
s = _make_session(session_id=sid)
|
|
_setup_cancel_state(sid, stream_id)
|
|
|
|
reasoning = "Let me analyze the code..."
|
|
partial_text = "Based on my analysis, the bug is in the"
|
|
config.STREAM_PARTIAL_TEXT[stream_id] = partial_text
|
|
|
|
if hasattr(config, 'STREAM_REASONING_TEXT'):
|
|
config.STREAM_REASONING_TEXT[stream_id] = reasoning
|
|
|
|
cancel_stream(stream_id)
|
|
|
|
s2 = models.SESSIONS[sid]
|
|
assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant']
|
|
# Should have partial content
|
|
partial_msgs = [m for m in assistant_msgs if m.get('_partial')]
|
|
has_content = any(m.get('content') for m in partial_msgs)
|
|
assert has_content, \
|
|
f"Expected partial assistant content after cancel. Got: {partial_msgs}"
|
|
|
|
def test_cancel_without_reasoning_dict_works_as_before(self):
|
|
"""If STREAM_REASONING_TEXT doesn't exist yet (pre-fix), cancel still works."""
|
|
sid = "test_1361_a3"
|
|
stream_id = "stream_a3"
|
|
s = _make_session(session_id=sid)
|
|
_setup_cancel_state(sid, stream_id)
|
|
|
|
config.STREAM_PARTIAL_TEXT[stream_id] = "Some partial text"
|
|
|
|
cancel_stream(stream_id)
|
|
|
|
s2 = models.SESSIONS[sid]
|
|
msgs = s2.messages
|
|
# Should have the cancel marker
|
|
has_cancel = any(
|
|
isinstance(m, dict) and m.get('role') == 'assistant' and m.get('_error')
|
|
for m in msgs
|
|
)
|
|
assert has_cancel, "Cancel marker should always be present"
|
|
|
|
|
|
# ── §B: Tool calls lost on cancel ───────────────────────────────────────────
|
|
|
|
class TestCancelPreservesToolCalls:
|
|
"""§B: _live_tool_calls is thread-local and invisible to cancel_stream().
|
|
|
|
After fix: tool calls should be persisted in a shared dict
|
|
(STREAM_LIVE_TOOL_CALLS) keyed by stream_id, and cancel_stream()
|
|
should append them as tool_call entries on the partial assistant message.
|
|
"""
|
|
|
|
def test_cancel_with_tool_calls_preserves_tools(self):
|
|
"""Cancel after tool execution should preserve the tool call info."""
|
|
sid = "test_1361_b1"
|
|
stream_id = "stream_b1"
|
|
s = _make_session(session_id=sid)
|
|
_setup_cancel_state(sid, stream_id)
|
|
|
|
config.STREAM_PARTIAL_TEXT[stream_id] = ""
|
|
|
|
if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'):
|
|
config.STREAM_LIVE_TOOL_CALLS[stream_id] = [
|
|
{"name": "read_file", "args": {"path": "/tmp/test.py"}, "done": True},
|
|
{"name": "terminal", "args": {"command": "ls"}, "done": False},
|
|
]
|
|
|
|
cancel_stream(stream_id)
|
|
|
|
s2 = models.SESSIONS[sid]
|
|
assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant']
|
|
has_tools = any(m.get('_partial_tool_calls') or m.get('tool_calls') or m.get('tools') for m in assistant_msgs)
|
|
assert has_tools, \
|
|
f"Expected _partial_tool_calls on partial assistant msg after cancel. Got: {assistant_msgs}"
|
|
|
|
def test_cancel_with_tools_and_text_preserves_both(self):
|
|
"""Cancel after tools + partial text should keep both."""
|
|
sid = "test_1361_b2"
|
|
stream_id = "stream_b2"
|
|
s = _make_session(session_id=sid)
|
|
_setup_cancel_state(sid, stream_id)
|
|
|
|
config.STREAM_PARTIAL_TEXT[stream_id] = "Here's what I found:"
|
|
if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'):
|
|
config.STREAM_LIVE_TOOL_CALLS[stream_id] = [
|
|
{"name": "web_search", "args": {"query": "test"}, "done": True},
|
|
]
|
|
|
|
cancel_stream(stream_id)
|
|
|
|
s2 = models.SESSIONS[sid]
|
|
assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant']
|
|
partial_msgs = [m for m in assistant_msgs if m.get('_partial')]
|
|
has_content = any(m.get('content') for m in partial_msgs)
|
|
assert has_content, \
|
|
f"Expected partial content with tools after cancel. Got: {partial_msgs}"
|
|
|
|
|
|
# ── §C: Empty _stripped skips entire append ─────────────────────────────────
|
|
|
|
class TestCancelWithReasoningOnlyNoText:
|
|
"""§C: When streaming was 100% reasoning (no visible tokens), _stripped is
|
|
empty after regex cleanup, so no partial assistant message is appended.
|
|
|
|
After fix: even when _stripped is empty, if reasoning or tool calls exist,
|
|
a partial assistant message should be appended (with no content, but with
|
|
reasoning and/or tool_calls fields).
|
|
"""
|
|
|
|
def test_reasoning_only_creates_partial_message(self):
|
|
"""Cancel after reasoning-only output should still create a partial msg."""
|
|
sid = "test_1361_c1"
|
|
stream_id = "stream_c1"
|
|
s = _make_session(session_id=sid)
|
|
_setup_cancel_state(sid, stream_id)
|
|
|
|
# Only reasoning, no visible tokens at all
|
|
config.STREAM_PARTIAL_TEXT[stream_id] = ""
|
|
|
|
if hasattr(config, 'STREAM_REASONING_TEXT'):
|
|
config.STREAM_REASONING_TEXT[stream_id] = "Deep reasoning here..."
|
|
|
|
cancel_stream(stream_id)
|
|
|
|
s2 = models.SESSIONS[sid]
|
|
assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant']
|
|
# Should NOT be only the cancel marker — there should be a partial msg
|
|
partial_msgs = [m for m in assistant_msgs if m.get('_partial')]
|
|
assert len(partial_msgs) > 0, \
|
|
f"Expected at least one partial assistant msg for reasoning-only cancel. Got: {assistant_msgs}"
|
|
|
|
def test_tools_only_creates_partial_message(self):
|
|
"""Cancel after tool-only output (no text, no reasoning) should still create a partial msg."""
|
|
sid = "test_1361_c2"
|
|
stream_id = "stream_c2"
|
|
s = _make_session(session_id=sid)
|
|
_setup_cancel_state(sid, stream_id)
|
|
|
|
config.STREAM_PARTIAL_TEXT[stream_id] = ""
|
|
|
|
if hasattr(config, 'STREAM_LIVE_TOOL_CALLS'):
|
|
config.STREAM_LIVE_TOOL_CALLS[stream_id] = [
|
|
{"name": "read_file", "args": {"path": "/tmp/x"}, "done": True},
|
|
]
|
|
|
|
cancel_stream(stream_id)
|
|
|
|
s2 = models.SESSIONS[sid]
|
|
assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant']
|
|
partial_msgs = [m for m in assistant_msgs if m.get('_partial')]
|
|
assert len(partial_msgs) > 0, \
|
|
f"Expected at least one partial assistant msg for tools-only cancel. Got: {assistant_msgs}"
|
|
|
|
def test_no_reasoning_no_tools_no_partial(self):
|
|
"""Cancel with no reasoning and no tools and no text = only cancel marker (no change)."""
|
|
sid = "test_1361_c3"
|
|
stream_id = "stream_c3"
|
|
s = _make_session(session_id=sid)
|
|
_setup_cancel_state(sid, stream_id)
|
|
|
|
config.STREAM_PARTIAL_TEXT[stream_id] = ""
|
|
|
|
cancel_stream(stream_id)
|
|
|
|
s2 = models.SESSIONS[sid]
|
|
assistant_msgs = [m for m in s2.messages if isinstance(m, dict) and m.get('role') == 'assistant']
|
|
# Should only have the cancel marker, no partial messages
|
|
partial_msgs = [m for m in assistant_msgs if m.get('_partial')]
|
|
cancel_msgs = [m for m in assistant_msgs if m.get('_error')]
|
|
assert len(partial_msgs) == 0, \
|
|
f"Expected no partial msg when nothing was streamed. Got partials: {partial_msgs}"
|
|
assert len(cancel_msgs) == 1, \
|
|
f"Expected exactly 1 cancel marker. Got: {cancel_msgs}"
|
|
|
|
# ── §D: Error paths must not lose pending user turn ─────────────────────────
|
|
|
|
def test_stream_error_materializes_pending_user_turn_before_clearing_runtime_state():
|
|
"""If a stream errors before normal merge, pending_user_message must become a
|
|
durable user message before the error marker is saved. Otherwise reload/server
|
|
reconcile makes the user's just-submitted prompt disappear.
|
|
"""
|
|
from api.streaming import _materialize_pending_user_turn_before_error
|
|
|
|
sid = "test_pending_error_d1"
|
|
s = _make_session(
|
|
session_id=sid,
|
|
pending_msg="please restart the WebUI",
|
|
messages=[{"role": "assistant", "content": "previous answer"}],
|
|
)
|
|
s.pending_started_at = 1778098700.0
|
|
s.pending_attachments = [{"name": "screenshot.png"}]
|
|
|
|
appended = _materialize_pending_user_turn_before_error(s)
|
|
|
|
assert appended is True
|
|
assert s.messages[-1]["role"] == "user"
|
|
assert s.messages[-1]["content"] == "please restart the WebUI"
|
|
assert s.messages[-1]["timestamp"] == 1778098700
|
|
assert s.messages[-1]["attachments"] == [{"name": "screenshot.png"}]
|
|
assert s.pending_user_message == "please restart the WebUI"
|
|
|
|
|
|
def test_stream_error_pending_materialization_does_not_duplicate_eager_checkpoint():
|
|
"""Eager session-save mode may already have checkpointed the current user turn;
|
|
the error materializer must not append the same user message again.
|
|
"""
|
|
from api.streaming import _materialize_pending_user_turn_before_error
|
|
|
|
sid = "test_pending_error_d2"
|
|
s = _make_session(
|
|
session_id=sid,
|
|
pending_msg="please restart the WebUI",
|
|
messages=[
|
|
{"role": "assistant", "content": "previous answer"},
|
|
{"role": "user", "content": "please restart the WebUI"},
|
|
],
|
|
)
|
|
|
|
appended = _materialize_pending_user_turn_before_error(s)
|
|
|
|
assert appended is False
|
|
assert [m.get("role") for m in s.messages].count("user") == 1
|
|
|
|
|
|
def test_stale_stream_cleanup_materializes_pending_turn_before_clearing_state():
|
|
"""A zombie/stale stream repair must preserve the pending user prompt.
|
|
|
|
If the process dies after chat_start saved pending_user_message but before the
|
|
agent merges the user turn, /api/session stale cleanup must not clear that
|
|
pending field without first appending a durable user message.
|
|
"""
|
|
from api.routes import _clear_stale_stream_state
|
|
|
|
sid = "test_pending_error_d3_stale"
|
|
s = _make_session(
|
|
session_id=sid,
|
|
pending_msg="please make the GUI fully usable",
|
|
messages=[{"role": "assistant", "content": "previous answer"}],
|
|
)
|
|
s.pending_started_at = 1778187755.0
|
|
s.pending_attachments = [{"name": "visible-state.png"}]
|
|
# No matching STREAMS entry: this simulates a dead worker/server restart.
|
|
|
|
cleared = _clear_stale_stream_state(s)
|
|
|
|
assert cleared is True
|
|
assert s.active_stream_id is None
|
|
assert s.pending_user_message is None
|
|
assert s.messages[-1]["role"] == "user"
|
|
assert s.messages[-1]["content"] == "please make the GUI fully usable"
|
|
assert s.messages[-1]["timestamp"] == 1778187755
|
|
assert s.messages[-1]["attachments"] == [{"name": "visible-state.png"}]
|
|
|
|
reloaded = models.get_session(sid, metadata_only=False)
|
|
assert reloaded.messages[-1]["role"] == "user"
|
|
assert reloaded.messages[-1]["content"] == "please make the GUI fully usable"
|
|
|
|
|
|
# ── Structural guard: pin call sites of the materialize helper at error branches ──
|
|
|
|
def test_materialize_helper_called_immediately_before_error_path_clears():
|
|
"""Pin call sites of _materialize_pending_user_turn_before_error.
|
|
|
|
Catches a future refactor that drops the call from the apperror-no-response
|
|
or outer-Exception paths in api/streaming.py while leaving the
|
|
`pending_user_message = None` clearing in place — which is exactly the
|
|
user-turn-data-loss regression #1361 was filed for.
|
|
|
|
Strategy: count how many `pending_user_message = None` clearings have the
|
|
helper call within the preceding 4 lines. Currently 2 (apperror at 2610,
|
|
outer-Exception at 3072). The success path (2716) and cancel path (3375)
|
|
legitimately don't need the helper. If a future refactor drops the helper
|
|
call from one of the error sites, this assertion fires.
|
|
"""
|
|
from pathlib import Path
|
|
src = Path(__file__).parent.parent.joinpath('api', 'streaming.py').read_text(encoding='utf-8')
|
|
lines = src.splitlines()
|
|
|
|
helper_name = '_materialize_pending_user_turn_before_error('
|
|
clear_sites = [(i + 1, line) for i, line in enumerate(lines)
|
|
if 'pending_user_message = None' in line]
|
|
assert len(clear_sites) >= 4, (
|
|
f"Expected ≥4 sites that clear pending_user_message; found {len(clear_sites)}. "
|
|
f"If api/streaming.py was refactored, re-audit this test."
|
|
)
|
|
|
|
sites_with_helper = []
|
|
for lineno, _ in clear_sites:
|
|
prev_block = '\n'.join(lines[max(0, lineno - 5):lineno - 1])
|
|
if helper_name in prev_block:
|
|
sites_with_helper.append(lineno)
|
|
|
|
# Concretely, PR #1760 wired up the helper at the apperror-no-response
|
|
# path and the outer-Exception path. Both must remain wired.
|
|
assert len(sites_with_helper) >= 2, (
|
|
f"Expected ≥2 clear sites preceded by {helper_name} within 4 lines; "
|
|
f"found {sites_with_helper}. PR #1760 / #1361 regression — re-wire the "
|
|
f"helper at the error-branch clear sites in api/streaming.py."
|
|
)
|