hermes-webui/tests/test_silent_failure_detection.py

"""Tests for the silent-failure detection fix in api/streaming.py.

The core logic lives in the module-level helper ``_has_new_assistant_reply``,
which decides whether *new* messages (beyond the pre-turn history) contain an
assistant message with non-empty content.

These tests cover the 8 scenarios specified in the task description to ensure
that historical assistant messages don't mask a silent provider failure.
"""

import pytest

from api.streaming import _has_new_assistant_reply


# ── Helpers ──────────────────────────────────────────────────────────────────

def _msg(role: str, content: str) -> dict:
    """Shorthand for building a message dict."""
    return {"role": role, "content": content}


# ── Test scenarios ───────────────────────────────────────────────────────────

class TestHasNewAssistantReply:
    """All 8 scenarios from the task specification."""

    # Scenario 1 ──────────────────────────────────────────────────────────
    def test_history_has_assistant_but_current_turn_failed(self):
        """History has assistant content, but no new assistant was added."""
        prev = [
            _msg("user", "hi"),
            _msg("assistant", "hello"),
            _msg("user", "what's up?"),
        ]
        all_msgs = list(prev)  # same length — nothing new
        assert _has_new_assistant_reply(all_msgs, len(prev)) is False

    # Scenario 2 ──────────────────────────────────────────────────────────
    def test_history_has_assistant_and_new_reply_added(self):
        """New assistant reply was appended this turn → should detect it."""
        prev = [
            _msg("user", "hi"),
            _msg("assistant", "hello"),
            _msg("user", "what's up?"),
        ]
        all_msgs = prev + [_msg("assistant", "not much, you?")]
        assert _has_new_assistant_reply(all_msgs, len(prev)) is True

    # Scenario 3 ──────────────────────────────────────────────────────────
    def test_empty_history_empty_result(self):
        """Completely empty conversation → no assistant reply."""
        assert _has_new_assistant_reply([], 0) is False

    # Scenario 4 ──────────────────────────────────────────────────────────
    def test_new_assistant_with_empty_content(self):
        """New assistant message added but content is empty string."""
        prev = [
            _msg("user", "hello"),
            _msg("assistant", "hi there"),
        ]
        all_msgs = prev + [_msg("assistant", "")]
        assert _has_new_assistant_reply(all_msgs, len(prev)) is False

    # Scenario 5 ──────────────────────────────────────────────────────────
    def test_new_assistant_with_whitespace_content(self):
        """New assistant message added but content is only whitespace."""
        prev = [
            _msg("user", "hello"),
            _msg("assistant", "hi there"),
        ]
        all_msgs = prev + [_msg("assistant", "  \n  ")]
        assert _has_new_assistant_reply(all_msgs, len(prev)) is False

    # Scenario 6 ──────────────────────────────────────────────────────────
    def test_long_history_new_assistant_at_tail(self):
        """Many historical messages; two new ones at the end, last is assistant."""
        prev = [_msg("user", f"msg {i}") if i % 2 == 0 else _msg("assistant", f"reply {i}")
                for i in range(10)]
        # prev has 10 messages (indices 0..9)
        all_msgs = prev + [
            _msg("user", "new question"),
            _msg("assistant", "new answer with real content"),
        ]
        assert _has_new_assistant_reply(all_msgs, len(prev)) is True

    # Scenario 7 ──────────────────────────────────────────────────────────
    def test_result_length_equals_prev_len(self):
        """No new messages at all — result length == prev length."""
        prev = [
            _msg("user", "hi"),
            _msg("assistant", "hey"),
        ]
        all_msgs = list(prev)
        assert _has_new_assistant_reply(all_msgs, len(prev)) is False

    # Scenario 8 ──────────────────────────────────────────────────────────
    def test_result_shorter_than_prev_len_returns_false(self):
        """Edge-case: result messages < prev_count cannot prove a new reply.

        Shrunken result history has no reliable new-message slice. Scanning
        the shorter list can mistake an older assistant reply for a current
        turn reply, which would hide the silent-failure banner.
        """
        prev_count = 5
        # Only 3 messages in result — shorter than prev_count
        all_msgs = [
            _msg("user", "a"),
            _msg("assistant", "b"),
            _msg("user", "c"),
        ]
        assert _has_new_assistant_reply(all_msgs, prev_count) is False

        all_msgs_no_asst = [
            _msg("user", "a"),
            _msg("user", "b"),
            _msg("user", "c"),
        ]
        assert _has_new_assistant_reply(all_msgs_no_asst, prev_count) is False


# ── Additional edge-case tests ───────────────────────────────────────────────

class TestHasNewAssistantReplyEdgeCases:
    """Extra coverage for content field variants."""

    def test_content_is_none(self):
        """assistant message with content=None should not count."""
        prev = [_msg("user", "hi")]
        all_msgs = prev + [{"role": "assistant", "content": None}]
        assert _has_new_assistant_reply(all_msgs, len(prev)) is False

    def test_content_is_missing_key(self):
        """assistant message without 'content' key should not count."""
        prev = [_msg("user", "hi")]
        all_msgs = prev + [{"role": "assistant"}]
        assert _has_new_assistant_reply(all_msgs, len(prev)) is False

    def test_non_assistant_role_in_new_messages(self):
        """Only 'assistant' role counts; 'user' or 'system' in new msgs → False."""
        prev = [_msg("user", "hi")]
        all_msgs = prev + [_msg("user", "follow-up")]
        assert _has_new_assistant_reply(all_msgs, len(prev)) is False

    def test_prev_count_zero_with_assistant(self):
        """prev_count=0 with a new assistant → scans from index 0, finds it."""
        all_msgs = [_msg("assistant", "hello")]
        assert _has_new_assistant_reply(all_msgs, 0) is True

    def test_prev_count_zero_without_assistant(self):
        """prev_count=0 with only user messages → False."""
        all_msgs = [_msg("user", "hello")]
        assert _has_new_assistant_reply(all_msgs, 0) is False

    def test_multiple_new_assistant_first_empty_second_has_content(self):
        """First new assistant is empty, second has content → True."""
        prev = [_msg("user", "q")]
        all_msgs = prev + [
            _msg("assistant", ""),
            _msg("assistant", "actual content"),
        ]
        assert _has_new_assistant_reply(all_msgs, len(prev)) is True

    def test_multiple_new_assistant_all_empty(self):
        """Multiple new assistant messages, all empty → False."""
        prev = [_msg("user", "q")]
        all_msgs = prev + [
            _msg("assistant", ""),
            _msg("assistant", "   "),
        ]
        assert _has_new_assistant_reply(all_msgs, len(prev)) is False