Stage 368: PR #2371 — Clarify interrupted turn recovery after WebUI restart by @franksong2702

This commit is contained in:
Hermes Agent
2026-05-16 17:19:05 +00:00
2 changed files with 25 additions and 16 deletions
+17 -13
View File
@@ -679,6 +679,20 @@ def _get_profile_home(profile) -> Path:
return Path(os.environ.get('HERMES_HOME') or '~/.hermes').expanduser()
def _interrupted_recovery_marker() -> dict:
return {
'role': 'assistant',
'content': (
'**Response interrupted.**\n\n'
'The WebUI process restarted before this turn finished. '
'The user message above was preserved, but no agent output was recovered.'
),
'timestamp': int(time.time()),
'_error': True,
'type': 'interrupted',
}
def _apply_core_sync_or_error_marker(
session,
core_path,
@@ -745,12 +759,7 @@ def _apply_core_sync_or_error_marker(
session.pending_user_message = None
session.pending_attachments = []
session.pending_started_at = None
session.messages.append({
'role': 'assistant',
'content': '**Previous turn did not complete.**',
'timestamp': int(time.time()),
'_error': True,
})
session.messages.append(_interrupted_recovery_marker())
session.save()
logger.info(
"Session %s: recovered pending user turn (messages non-empty), added error marker",
@@ -794,12 +803,7 @@ def _apply_core_sync_or_error_marker(
session.pending_user_message = None
session.pending_attachments = []
session.pending_started_at = None
session.messages.append({
'role': 'assistant',
'content': '**Previous turn did not complete.**',
'timestamp': int(time.time()),
'_error': True,
})
session.messages.append(_interrupted_recovery_marker())
session.save()
logger.info("Session %s: no core transcript found, added error marker", sid)
return True
@@ -811,7 +815,7 @@ def _apply_core_sync_or_error_marker(
# pending_user_message and STREAMS.pop(stream_id). Without this guard, any
# fast turn (e.g. command approval) that exits the thread before the on-disk
# pending clear has flushed gets misdiagnosed as a crashed turn, producing a
# spurious "Previous turn did not complete." marker.
# spurious "Response interrupted." marker.
#
# 30s covers the worst-case post-loop persistence window: LLM finishing a tool
# batch + lock contention with the checkpoint thread + a multi-MB session.save.
+8 -3
View File
@@ -231,7 +231,7 @@ class TestRepairStalePendingNoDeadlock:
class TestDraftRecovery:
"""When no core transcript exists, the pending user message is restored as
a recovered user turn (_recovered=True) and the error marker says
'Previous turn did not complete.' NOT 'preserved as a draft'."""
a clear restart interruption marker NOT 'preserved as a draft'."""
def test_pending_message_recovered_as_user_turn(self, hermes_home, monkeypatch):
"""When core transcript is missing, the pending_user_message is appended
@@ -310,7 +310,10 @@ class TestDraftRecovery:
assert "preserved as a draft" not in content, (
f"Error marker should not say 'preserved as a draft', got: {content}"
)
assert "Previous turn did not complete" in content
assert "Response interrupted" in content
assert "WebUI process restarted" in content
assert "user message above was preserved" in content
assert error_msgs[0].get("type") == "interrupted"
def test_pending_attachments_recovered(self, hermes_home, monkeypatch):
"""Attachments on the pending message are carried over to the recovered turn."""
@@ -604,7 +607,9 @@ class TestNonEmptyMessagesPendingCleared:
# Exactly one error marker
error_msgs = [m for m in s.messages if m.get("_error")]
assert len(error_msgs) == 1
assert "Previous turn did not complete" in error_msgs[0]["content"]
assert "Response interrupted" in error_msgs[0]["content"]
assert "WebUI process restarted" in error_msgs[0]["content"]
assert error_msgs[0].get("type") == "interrupted"
# Pending fields fully cleared
assert s.pending_user_message is None