From af381ef12cd12bd04bb156b81ef798e9b994984f Mon Sep 17 00:00:00 2001 From: samahn0601 <197455947+samahn0601@users.noreply.github.com> Date: Mon, 18 May 2026 18:03:00 +0900 Subject: [PATCH] fix(telegram): retry wrapped connect timeouts --- gateway/platforms/telegram.py | 48 ++++++++++++++-- .../gateway/test_telegram_thread_fallback.py | 57 +++++++++++++++++++ 2 files changed, 99 insertions(+), 6 deletions(-) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 25d50a8974..589eb6abe8 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -707,6 +707,34 @@ class TelegramAdapter(BasePlatformAdapter): pass return isinstance(error, OSError) + @staticmethod + def _looks_like_connect_timeout(error: Exception) -> bool: + """Return True when a Telegram TimedOut wraps a connect-timeout. + + A plain Telegram TimedOut may mean the request reached Telegram and + should not be re-sent. A ConnectTimeout means the TCP connection was + never established, so retrying is safe and prevents silent drops. + """ + seen: set[int] = set() + stack: list[BaseException] = [error] + while stack: + cur = stack.pop() + ident = id(cur) + if ident in seen: + continue + seen.add(ident) + name = cur.__class__.__name__.lower() + text = str(cur).lower() + if "connecttimeout" in name or "connect timeout" in text or "connect timed out" in text: + return True + cause = getattr(cur, "__cause__", None) + context = getattr(cur, "__context__", None) + if cause is not None: + stack.append(cause) + if context is not None: + stack.append(context) + return False + def _coerce_bool_extra(self, key: str, default: bool = False) -> bool: value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None if value is None: @@ -1708,10 +1736,15 @@ class TelegramAdapter(BasePlatformAdapter): continue # Other BadRequest errors are permanent — don't retry raise - # TimedOut is also a subclass of NetworkError but - # indicates the request may have reached the server — - # retrying risks duplicate message delivery. - if _TimedOut and isinstance(send_err, _TimedOut): + # TimedOut is also a subclass of NetworkError. A + # generic timeout may have reached Telegram, so don't + # retry; a wrapped ConnectTimeout means no connection + # was established, so retrying is safe. + if ( + _TimedOut + and isinstance(send_err, _TimedOut) + and not self._looks_like_connect_timeout(send_err) + ): raise if _send_attempt < 2: wait = 2 ** _send_attempt @@ -1764,11 +1797,14 @@ class TelegramAdapter(BasePlatformAdapter): self.name, ) return SendResult(success=False, error="message_too_long") - # TimedOut means the request may have reached Telegram — + # TimedOut usually means the request may have reached Telegram — # mark as non-retryable so _send_with_retry() doesn't re-send. + # Exception: wrapped ConnectTimeout, where no connection was + # established; retrying is safe and prevents silent drops. _to = locals().get("_TimedOut") is_timeout = (_to and isinstance(e, _to)) or "timed out" in err_str - return SendResult(success=False, error=str(e), retryable=not is_timeout) + is_connect_timeout = self._looks_like_connect_timeout(e) + return SendResult(success=False, error=str(e), retryable=(is_connect_timeout or not is_timeout)) async def edit_message( self, diff --git a/tests/gateway/test_telegram_thread_fallback.py b/tests/gateway/test_telegram_thread_fallback.py index 643588cac6..d8bfc6261b 100644 --- a/tests/gateway/test_telegram_thread_fallback.py +++ b/tests/gateway/test_telegram_thread_fallback.py @@ -1095,6 +1095,63 @@ async def test_send_does_not_retry_timeout(): assert attempt[0] == 1 +@pytest.mark.asyncio +async def test_send_retries_wrapped_connect_timeout(): + """Retry TimedOut only when it wraps a TCP connect timeout. + + A generic Telegram TimedOut may have reached Telegram and must not be + retried, but an underlying ConnectTimeout means the connection was never + established. Retrying prevents a silent drop without risking duplicates. + """ + adapter = _make_adapter() + + class FakeConnectTimeout(Exception): + pass + + attempt = [0] + + async def mock_send_message(**kwargs): + attempt[0] += 1 + if attempt[0] < 3: + err = FakeTimedOut("Timed out") + err.__cause__ = FakeConnectTimeout("connect timed out") + raise err + return SimpleNamespace(message_id=201) + + adapter._bot = SimpleNamespace(send_message=mock_send_message) + + result = await adapter.send(chat_id="123", content="test message") + + assert result.success is True + assert result.message_id == "201" + assert attempt[0] == 3 + + +@pytest.mark.asyncio +async def test_send_marks_wrapped_connect_timeout_retryable_after_exhaustion(): + """Final SendResult remains retryable for outer gateway retry handling.""" + adapter = _make_adapter() + + class FakeConnectTimeout(Exception): + pass + + attempt = [0] + + async def mock_send_message(**kwargs): + attempt[0] += 1 + err = FakeTimedOut("Timed out") + err.__context__ = FakeConnectTimeout("ConnectTimeout") + raise err + + adapter._bot = SimpleNamespace(send_message=mock_send_message) + + result = await adapter.send(chat_id="123", content="test message") + + assert result.success is False + assert result.retryable is True + assert attempt[0] == 3 + + @pytest.mark.asyncio async def test_thread_fallback_only_fires_once(): """After clearing thread_id, subsequent chunks should also use None."""