diff --git a/CHANGELOG.md b/CHANGELOG.md index 328683ef..f00c155f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ - **PR #2165** by @starship-s — Pooled OpenAI Codex quota status surfaced in the Providers panel. Pre-fix, the Providers page presented Codex quota as if there were only one credential/account state, which was misleading when users authenticate through a credential pool with several usable credentials, temporarily exhausted credentials, failed probes, and different reset windows. Now the active provider quota card includes a credential-pool summary (available / exhausted / failed / checked counts), displays the best currently-available pool windows in the collapsed view as "Best of N", and exposes per-credential detail behind an expandable section. Exhausted credentials are intentionally NOT re-probed while their cooldown is active (matches credential-pool selection behavior, avoids generating failed quota calls from a status page). Manual refresh still means "probe now" but transient refresh failures preserve the last known-good snapshot. JWT decode (`_decode_jwt_claims_unverified`) is used only for token-shape classification (Codex OAuth JWT vs raw OpenAI API key), explicitly NOT for authorization — documented in the function docstring. Per-row plan labels only shown when verified account-limit data is available. Concurrent probing capped at `min(_CODEX_POOL_MAX_WORKERS=6, len(probe_items))` so page render time stays bounded on large pools. Transient `None` probe results are NOT cached (only known unavailable/exhausted states are cached); 32-test regression suite covering pool snapshot, concurrent probe, JWT detection, cache invalidation, transient-vs-known cache distinction, and i18n parity across all currently-supported locales. Scoped to OpenAI Codex (the only provider with the credential-pool/account-limit path needed to surface this accurately). +### Fixed + +- Onboarding provider endpoint probes now classify DNS-style failures more consistently as `dns`, including `getaddrinfo` failures wrapped by `URLError`/`OSError` and network failures against reserved non-resolvable TLDs such as `.invalid`, `.test`, and `.example`. + ## [v0.51.64] — 2026-05-14 — Release AN (stage-357 — 3-PR small batch — docker_init k8s whoami fallback + PWA manifest session routes (closes #2226) + aux title test coverage) ### Fixed diff --git a/api/onboarding.py b/api/onboarding.py index 806e4856..7cf1bbe1 100644 --- a/api/onboarding.py +++ b/api/onboarding.py @@ -312,6 +312,44 @@ class _NoRedirectHandler(urllib.request.HTTPRedirectHandler): _PROBE_OPENER = urllib.request.build_opener(_NoRedirectHandler()) +_DNS_ONLY_TEST_TLDS = frozenset({"invalid", "test", "example"}) + + +def _hostname_uses_reserved_dns_tld(hostname: str | None) -> bool: + host = str(hostname or "").strip().rstrip(".").lower() + if not host or "." not in host: + return False + return host.rsplit(".", 1)[-1] in _DNS_ONLY_TEST_TLDS + + +def _exception_chain_text(exc) -> str: + parts: list[str] = [] + seen: set[int] = set() + cur = exc + while cur is not None and id(cur) not in seen: + seen.add(id(cur)) + parts.append(str(cur)) + cur = getattr(cur, "__cause__", None) or getattr(cur, "__context__", None) + return " ".join(parts).lower() + + +def _probe_failure_is_dns(exc, hostname: str | None) -> bool: + if isinstance(exc, socket.gaierror): + return True + text = _exception_chain_text(exc) + if any( + marker in text + for marker in ( + "getaddrinfo", + "gaierror", + "name or service not known", + "temporary failure in name resolution", + "nodename nor servname provided", + "no address associated with hostname", + ) + ): + return True + return _hostname_uses_reserved_dns_tld(hostname) def probe_provider_endpoint( @@ -416,7 +454,7 @@ def probe_provider_endpoint( reason = exc.reason if isinstance(reason, socket.timeout) or "timed out" in str(reason).lower(): return {"ok": False, "error": "timeout", "detail": f"connection timed out after {timeout:g}s"} - if isinstance(reason, socket.gaierror): + if _probe_failure_is_dns(reason, parsed.hostname): return { "ok": False, "error": "dns", @@ -433,6 +471,12 @@ def probe_provider_endpoint( except (TimeoutError, socket.timeout): return {"ok": False, "error": "timeout", "detail": f"connection timed out after {timeout:g}s"} except Exception as exc: # pragma: no cover — defensive net + if _probe_failure_is_dns(exc, parsed.hostname): + return { + "ok": False, + "error": "dns", + "detail": f"could not resolve host '{parsed.hostname}'", + } logger.debug("probe_provider_endpoint unexpected error", exc_info=True) return {"ok": False, "error": "unreachable", "detail": str(exc)[:200]} diff --git a/tests/test_issue1499_onboarding_probe.py b/tests/test_issue1499_onboarding_probe.py index 2adefcb1..2e1e45e9 100644 --- a/tests/test_issue1499_onboarding_probe.py +++ b/tests/test_issue1499_onboarding_probe.py @@ -177,6 +177,40 @@ class TestIssue1499OnboardingProbe: assert r["ok"] is False assert r["error"] == "dns", f"Expected dns error, got {r}" + def test_dns_failure_wrapped_by_urlerror(self, monkeypatch): + """Proxy/network stacks can wrap DNS failures as generic URLError.""" + from api import onboarding + + class FakeOpener: + def open(self, *_args, **_kwargs): + raise urllib.error.URLError(OSError("getaddrinfo failed")) + + monkeypatch.setattr(onboarding, "_PROBE_OPENER", FakeOpener()) + r = onboarding.probe_provider_endpoint( + "lmstudio", + "http://model-server.example:1234/v1", + timeout=2.0, + ) + assert r["ok"] is False + assert r["error"] == "dns", f"Expected dns error, got {r}" + + def test_reserved_dns_tld_network_failure_classifies_as_dns(self, monkeypatch): + """Reserved non-resolvable TLDs stay dns even if the stack says generic.""" + from api import onboarding + + class FakeOpener: + def open(self, *_args, **_kwargs): + raise urllib.error.URLError(OSError("network is unreachable")) + + monkeypatch.setattr(onboarding, "_PROBE_OPENER", FakeOpener()) + r = onboarding.probe_provider_endpoint( + "lmstudio", + "http://this-host-definitely-does-not-exist-zxq987.invalid:1234/v1", + timeout=2.0, + ) + assert r["ok"] is False + assert r["error"] == "dns", f"Expected dns error, got {r}" + def test_connect_refused(self): """Connecting to a port nobody's listening on → error='connect_refused'.""" from api.onboarding import probe_provider_endpoint