mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-24 18:50:15 +00:00
fix(providers): avoid caching transient quota probe failures
This commit is contained in:
+18
-7
@@ -104,10 +104,12 @@ _ACCOUNT_USAGE_PARENT_DEATHSIG_BOOTSTRAP = (
|
||||
_account_usage_probe_semaphore: threading.BoundedSemaphore | None = None
|
||||
|
||||
# Short-lived account-usage cache. The Codex pooled probe may check multiple
|
||||
# credentials, so cache the sanitized snapshot briefly to avoid re-querying the
|
||||
# credentials, so cache sanitized snapshots briefly to avoid re-querying the
|
||||
# provider on every Settings repaint/profile-panel refresh. Pool composition
|
||||
# changes can be stale for at most this TTL; that is preferred to hammering the
|
||||
# provider usage API while the Settings panel is open.
|
||||
# provider usage API while the Settings panel is open. Transient None probe
|
||||
# results are intentionally not cached; known exhausted/unavailable states are
|
||||
# represented as non-None snapshots and remain cacheable.
|
||||
_account_usage_status_cache: dict[tuple[str, str, str], tuple[float, Any]] = {}
|
||||
_account_usage_status_cache_lock = threading.Lock()
|
||||
|
||||
@@ -461,6 +463,9 @@ def _best_remaining_by_window(rows):
|
||||
"credential_label": label,
|
||||
}
|
||||
current = best.get(window_label.lower())
|
||||
# The normalized Codex account-limit payload currently exposes
|
||||
# percentages, not absolute request/token capacity. If absolute
|
||||
# remaining capacity becomes available, prefer it here.
|
||||
if current is None or float(remaining) > float(current.get("remaining_percent") or -1):
|
||||
best[window_label.lower()] = candidate
|
||||
return list(best.values())
|
||||
@@ -713,6 +718,12 @@ def _load_env_file(env_path: Path) -> dict[str, str]:
|
||||
|
||||
|
||||
def _decode_jwt_claims_unverified(token: str) -> dict[str, Any]:
|
||||
"""Decode JWT claims for token-shape classification only.
|
||||
|
||||
The signature is intentionally not verified because this helper is not an
|
||||
authorization decision: it only prevents a Codex OAuth JWT-shaped value from
|
||||
being treated as a raw OpenAI API key in provider-card detection.
|
||||
"""
|
||||
if not isinstance(token, str) or token.count(".") != 2:
|
||||
return {}
|
||||
payload = token.split(".", 2)[1]
|
||||
@@ -1185,15 +1196,15 @@ def invalidate_account_usage_status_cache(provider_id: str | None = None) -> Non
|
||||
def _set_cached_account_usage(
|
||||
cache_key: tuple[str, str, str],
|
||||
snapshot: Any,
|
||||
*,
|
||||
preserve_non_none: bool = False,
|
||||
) -> None:
|
||||
now = time.monotonic()
|
||||
with _account_usage_status_cache_lock:
|
||||
if preserve_non_none and snapshot is None:
|
||||
if snapshot is None:
|
||||
cached = _account_usage_status_cache.get(cache_key)
|
||||
if cached is not None and cached[1] is not None:
|
||||
return
|
||||
_account_usage_status_cache.pop(cache_key, None)
|
||||
return
|
||||
_account_usage_status_cache[cache_key] = (now, snapshot)
|
||||
expired = [
|
||||
key for key, (fetched_at, _snapshot) in _account_usage_status_cache.items()
|
||||
@@ -1284,11 +1295,11 @@ def _fetch_account_usage_with_profile_context(provider: str, *, refresh: bool =
|
||||
home,
|
||||
api_key=api_key,
|
||||
)
|
||||
_set_cached_account_usage(cache_key, snapshot, preserve_non_none=refresh)
|
||||
_set_cached_account_usage(cache_key, snapshot)
|
||||
return snapshot
|
||||
except Exception:
|
||||
logger.debug("Failed to fetch account usage for %s", provider, exc_info=True)
|
||||
_set_cached_account_usage(cache_key, None, preserve_non_none=refresh)
|
||||
_set_cached_account_usage(cache_key, None)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -999,18 +999,30 @@ def test_account_usage_profile_cache_invalidates_with_credential_pool_cache(monk
|
||||
|
||||
|
||||
|
||||
def test_account_usage_profile_fetch_caches_none_results(monkeypatch, tmp_path):
|
||||
"""Known-empty/unavailable account usage should be cached, not re-probed each refresh."""
|
||||
def test_account_usage_profile_fetch_caches_unavailable_snapshots(monkeypatch, tmp_path):
|
||||
"""Known unavailable account snapshots should be cached like available ones."""
|
||||
import api.providers as providers
|
||||
|
||||
monkeypatch.setattr(profiles, "get_active_hermes_home", lambda: tmp_path)
|
||||
old_cfg, old_mtime = _with_config(model={"provider": "openai-codex"})
|
||||
providers._account_usage_status_cache.clear()
|
||||
calls = []
|
||||
unavailable_snapshot = SimpleNamespace(
|
||||
provider="openai-codex",
|
||||
source="usage_api_pool",
|
||||
title="Account limits",
|
||||
plan=None,
|
||||
windows=(),
|
||||
details=("0/1 credentials available", "1 exhausted"),
|
||||
available=False,
|
||||
unavailable_reason="Credential pool exhausted until 2030-03-17T18:46:40Z.",
|
||||
fetched_at=datetime(2030, 3, 17, 12, 30, tzinfo=timezone.utc),
|
||||
pool={"total_credentials": 1, "available_credentials": 0, "exhausted_credentials": 1, "credentials": []},
|
||||
)
|
||||
|
||||
def fake_fetch(provider, home, api_key=None):
|
||||
calls.append((provider, str(home), api_key))
|
||||
return None
|
||||
return unavailable_snapshot
|
||||
|
||||
monkeypatch.setattr(providers, "_agent_fetch_account_usage_for_home", fake_fetch)
|
||||
try:
|
||||
@@ -1020,11 +1032,54 @@ def test_account_usage_profile_fetch_caches_none_results(monkeypatch, tmp_path):
|
||||
providers._account_usage_status_cache.clear()
|
||||
_restore_config(old_cfg, old_mtime)
|
||||
|
||||
assert first is None
|
||||
assert second is None
|
||||
assert first is unavailable_snapshot
|
||||
assert second is unavailable_snapshot
|
||||
assert calls == [("openai-codex", str(tmp_path), None)]
|
||||
|
||||
|
||||
def test_account_usage_profile_fetch_does_not_cache_transient_none_results(monkeypatch, tmp_path):
|
||||
"""Transient None probe results should not mask the next successful status check."""
|
||||
import api.providers as providers
|
||||
|
||||
monkeypatch.setattr(profiles, "get_active_hermes_home", lambda: tmp_path)
|
||||
old_cfg, old_mtime = _with_config(model={"provider": "openai-codex"})
|
||||
providers._account_usage_status_cache.clear()
|
||||
calls = []
|
||||
recovered_snapshot = SimpleNamespace(
|
||||
provider="openai-codex",
|
||||
source="usage_api_pool",
|
||||
title="Account limits",
|
||||
plan=None,
|
||||
windows=(),
|
||||
details=(),
|
||||
available=True,
|
||||
unavailable_reason=None,
|
||||
fetched_at=datetime(2030, 3, 17, 12, 31, tzinfo=timezone.utc),
|
||||
pool={"total_credentials": 1, "credentials": []},
|
||||
)
|
||||
|
||||
def fake_fetch(provider, home, api_key=None):
|
||||
calls.append((provider, str(home), api_key))
|
||||
return None if len(calls) == 1 else recovered_snapshot
|
||||
|
||||
monkeypatch.setattr(providers, "_agent_fetch_account_usage_for_home", fake_fetch)
|
||||
try:
|
||||
first = providers._fetch_account_usage_with_profile_context("openai-codex")
|
||||
second = providers._fetch_account_usage_with_profile_context("openai-codex")
|
||||
third = providers._fetch_account_usage_with_profile_context("openai-codex")
|
||||
finally:
|
||||
providers._account_usage_status_cache.clear()
|
||||
_restore_config(old_cfg, old_mtime)
|
||||
|
||||
assert first is None
|
||||
assert second is recovered_snapshot
|
||||
assert third is recovered_snapshot
|
||||
assert calls == [
|
||||
("openai-codex", str(tmp_path), None),
|
||||
("openai-codex", str(tmp_path), None),
|
||||
]
|
||||
|
||||
|
||||
def test_account_usage_profile_fetches_can_overlap_for_different_homes(monkeypatch, tmp_path):
|
||||
"""Different profile quota fetches should not serialize on cron's global lock."""
|
||||
import api.providers as providers
|
||||
|
||||
Reference in New Issue
Block a user