mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-21 03:39:54 +00:00
feat(openrouter): add response caching support (#19132)
Enable OpenRouter's response caching feature (beta) via X-OpenRouter-Cache
headers. When enabled, identical API requests return cached responses for
free (zero billing), reducing both latency and cost.
Configuration via config.yaml:
openrouter:
response_cache: true # default: on
response_cache_ttl: 300 # 1-86400 seconds
Changes:
- Add openrouter config section to DEFAULT_CONFIG (response_cache + TTL)
- Add build_or_headers() in auxiliary_client.py that builds attribution
headers plus optional cache headers based on config
- Replace inline _OR_HEADERS dicts with build_or_headers() at all 5 sites:
run_agent.py __init__, _apply_client_headers_for_base_url(), and
auxiliary_client.py _try_openrouter() + _to_async_client()
- Add _check_openrouter_cache_status() method to AIAgent that reads
X-OpenRouter-Cache-Status from streaming response headers and logs
HIT/MISS status
- Document in cli-config.yaml.example
- Add 28 tests (22 unit + 6 integration)
Ref: https://openrouter.ai/docs/guides/features/response-caching
This commit is contained in:
@@ -259,13 +259,68 @@ _PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
|
||||
"kimi-coding-cn",
|
||||
})
|
||||
|
||||
# OpenRouter app attribution headers
|
||||
_OR_HEADERS = {
|
||||
# OpenRouter app attribution headers (base — always sent)
|
||||
_OR_HEADERS_BASE = {
|
||||
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
}
|
||||
|
||||
# Truthy values for boolean env-var parsing.
|
||||
_TRUTHY_ENV_VALUES = frozenset({"1", "true", "yes", "on"})
|
||||
|
||||
|
||||
def build_or_headers(or_config: dict | None = None) -> dict:
|
||||
"""Build OpenRouter headers, optionally including response-cache headers.
|
||||
|
||||
Precedence for response cache: env var > config.yaml > default (enabled).
|
||||
|
||||
Environment variables:
|
||||
``HERMES_OPENROUTER_CACHE`` — truthy (``1``/``true``/``yes``/``on``)
|
||||
enables caching; ``0``/``false``/``no``/``off`` disables.
|
||||
Overrides ``openrouter.response_cache`` in config.yaml.
|
||||
``HERMES_OPENROUTER_CACHE_TTL`` — integer seconds (1-86400).
|
||||
Overrides ``openrouter.response_cache_ttl`` in config.yaml.
|
||||
|
||||
*or_config* is the ``openrouter`` section from config.yaml. When *None*,
|
||||
falls back to reading config from disk via ``load_config()``.
|
||||
"""
|
||||
headers = dict(_OR_HEADERS_BASE)
|
||||
|
||||
# Resolve config from disk if not provided.
|
||||
if or_config is None:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
or_config = load_config().get("openrouter", {})
|
||||
except Exception:
|
||||
or_config = {}
|
||||
|
||||
# Determine cache enabled: env var overrides config.
|
||||
env_cache = os.environ.get("HERMES_OPENROUTER_CACHE", "").strip().lower()
|
||||
if env_cache:
|
||||
cache_enabled = env_cache in _TRUTHY_ENV_VALUES
|
||||
else:
|
||||
cache_enabled = or_config.get("response_cache", False)
|
||||
|
||||
if not cache_enabled:
|
||||
return headers
|
||||
|
||||
headers["X-OpenRouter-Cache"] = "true"
|
||||
|
||||
# Determine TTL: env var overrides config.
|
||||
env_ttl = os.environ.get("HERMES_OPENROUTER_CACHE_TTL", "").strip()
|
||||
if env_ttl:
|
||||
if env_ttl.isdigit():
|
||||
ttl = int(env_ttl)
|
||||
if 1 <= ttl <= 86400:
|
||||
headers["X-OpenRouter-Cache-TTL"] = str(ttl)
|
||||
else:
|
||||
ttl = or_config.get("response_cache_ttl", 300)
|
||||
if isinstance(ttl, (int, float)) and 1 <= ttl <= 86400:
|
||||
headers["X-OpenRouter-Cache-TTL"] = str(int(ttl))
|
||||
|
||||
return headers
|
||||
|
||||
# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
|
||||
# referrerUrl and X-Title maps to appName in the gateway's analytics.
|
||||
from hermes_cli import __version__ as _HERMES_VERSION
|
||||
@@ -1158,14 +1213,14 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
|
||||
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
|
||||
logger.debug("Auxiliary client: OpenRouter via pool")
|
||||
return OpenAI(api_key=or_key, base_url=base_url,
|
||||
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
|
||||
default_headers=build_or_headers()), _OPENROUTER_MODEL
|
||||
|
||||
or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
|
||||
if not or_key:
|
||||
return None, None
|
||||
logger.debug("Auxiliary client: OpenRouter")
|
||||
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
|
||||
default_headers=_OR_HEADERS), _OPENROUTER_MODEL
|
||||
default_headers=build_or_headers()), _OPENROUTER_MODEL
|
||||
|
||||
|
||||
def _describe_openrouter_unavailable() -> str:
|
||||
@@ -1911,7 +1966,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
|
||||
}
|
||||
sync_base_url = str(sync_client.base_url)
|
||||
if base_url_host_matches(sync_base_url, "openrouter.ai"):
|
||||
async_kwargs["default_headers"] = dict(_OR_HEADERS)
|
||||
async_kwargs["default_headers"] = build_or_headers()
|
||||
elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
|
||||
from hermes_cli.copilot_auth import copilot_request_headers
|
||||
|
||||
|
||||
@@ -121,6 +121,18 @@ model:
|
||||
# # Data policy: "allow" (default) or "deny" to exclude providers that may store data
|
||||
# # data_collection: "deny"
|
||||
|
||||
# =============================================================================
|
||||
# OpenRouter Response Caching (only applies when using OpenRouter)
|
||||
# =============================================================================
|
||||
# Cache identical API responses at the OpenRouter edge for free instant replays.
|
||||
# When enabled, identical requests (same model, messages, parameters) return
|
||||
# cached responses with zero billing. Separate from Anthropic prompt caching.
|
||||
# See: https://openrouter.ai/docs/guides/features/response-caching
|
||||
#
|
||||
# openrouter:
|
||||
# response_cache: true # Enable response caching (default: true)
|
||||
# response_cache_ttl: 300 # Cache TTL in seconds, 1-86400 (default: 300)
|
||||
|
||||
# =============================================================================
|
||||
# Git Worktree Isolation
|
||||
# =============================================================================
|
||||
|
||||
@@ -644,6 +644,18 @@ DEFAULT_CONFIG = {
|
||||
"cache_ttl": "5m",
|
||||
},
|
||||
|
||||
# OpenRouter-specific settings.
|
||||
# response_cache: enable OpenRouter response caching (X-OpenRouter-Cache header).
|
||||
# When enabled, identical requests return cached responses for free (zero billing).
|
||||
# This is separate from Anthropic prompt caching and works alongside it.
|
||||
# See: https://openrouter.ai/docs/guides/features/response-caching
|
||||
# response_cache_ttl: how long cached responses remain valid, in seconds (1-86400).
|
||||
# Default 300 (5 minutes). Only used when response_cache is enabled.
|
||||
"openrouter": {
|
||||
"response_cache": True,
|
||||
"response_cache_ttl": 300,
|
||||
},
|
||||
|
||||
# AWS Bedrock provider configuration.
|
||||
# Only used when model.provider is "bedrock".
|
||||
"bedrock": {
|
||||
|
||||
+33
-7
@@ -1258,6 +1258,10 @@ class AIAgent:
|
||||
# after each API call. Accessed by /usage slash command.
|
||||
self._rate_limit_state: Optional["RateLimitState"] = None
|
||||
|
||||
# OpenRouter response cache hit counter — incremented when
|
||||
# X-OpenRouter-Cache-Status: HIT is seen in streaming response headers.
|
||||
self._or_cache_hits: int = 0
|
||||
|
||||
# Centralized logging — agent.log (INFO+) and errors.log (WARNING+)
|
||||
# both live under ~/.hermes/logs/. Idempotent, so gateway mode
|
||||
# (which creates a new AIAgent per message) won't duplicate handlers.
|
||||
@@ -1421,11 +1425,8 @@ class AIAgent:
|
||||
client_kwargs["args"] = self.acp_args
|
||||
effective_base = base_url
|
||||
if base_url_host_matches(effective_base, "openrouter.ai"):
|
||||
client_kwargs["default_headers"] = {
|
||||
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
}
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
client_kwargs["default_headers"] = build_or_headers()
|
||||
elif base_url_host_matches(effective_base, "api.routermint.com"):
|
||||
client_kwargs["default_headers"] = _routermint_headers()
|
||||
elif base_url_host_matches(effective_base, "api.githubcopilot.com"):
|
||||
@@ -4580,6 +4581,28 @@ class AIAgent:
|
||||
"""Return the last captured RateLimitState, or None."""
|
||||
return self._rate_limit_state
|
||||
|
||||
def _check_openrouter_cache_status(self, http_response: Any) -> None:
|
||||
"""Read X-OpenRouter-Cache-Status from response headers and log it.
|
||||
|
||||
Increments ``_or_cache_hits`` on HIT so callers can report savings.
|
||||
"""
|
||||
if http_response is None:
|
||||
return
|
||||
headers = getattr(http_response, "headers", None)
|
||||
if not headers:
|
||||
return
|
||||
try:
|
||||
status = headers.get("x-openrouter-cache-status")
|
||||
if not status:
|
||||
return
|
||||
if status.upper() == "HIT":
|
||||
self._or_cache_hits += 1
|
||||
logger.info("OpenRouter response cache HIT (total: %d)", self._or_cache_hits)
|
||||
else:
|
||||
logger.debug("OpenRouter response cache %s", status.upper())
|
||||
except Exception:
|
||||
pass # Never let header parsing break the agent loop
|
||||
|
||||
def get_activity_summary(self) -> dict:
|
||||
"""Return a snapshot of the agent's current activity for diagnostics.
|
||||
|
||||
@@ -6157,10 +6180,10 @@ class AIAgent:
|
||||
return True
|
||||
|
||||
def _apply_client_headers_for_base_url(self, base_url: str) -> None:
|
||||
from agent.auxiliary_client import _AI_GATEWAY_HEADERS, _OR_HEADERS
|
||||
from agent.auxiliary_client import _AI_GATEWAY_HEADERS, build_or_headers
|
||||
|
||||
if base_url_host_matches(base_url, "openrouter.ai"):
|
||||
self._client_kwargs["default_headers"] = dict(_OR_HEADERS)
|
||||
self._client_kwargs["default_headers"] = build_or_headers()
|
||||
elif base_url_host_matches(base_url, "ai-gateway.vercel.sh"):
|
||||
self._client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS)
|
||||
elif base_url_host_matches(base_url, "api.routermint.com"):
|
||||
@@ -6780,6 +6803,9 @@ class AIAgent:
|
||||
# response via .response before any chunks are consumed.
|
||||
self._capture_rate_limits(getattr(stream, "response", None))
|
||||
|
||||
# Log OpenRouter response cache status when present.
|
||||
self._check_openrouter_cache_status(getattr(stream, "response", None))
|
||||
|
||||
content_parts: list = []
|
||||
tool_calls_acc: dict = {}
|
||||
tool_gen_notified: set = set()
|
||||
|
||||
@@ -0,0 +1,284 @@
|
||||
"""Tests for OpenRouter response caching header injection."""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_or_headers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestBuildOrHeaders:
|
||||
"""Test the build_or_headers() helper in agent/auxiliary_client.py."""
|
||||
|
||||
def test_base_attribution_always_present(self):
|
||||
"""Attribution headers must always be included regardless of cache setting."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": False})
|
||||
assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
|
||||
assert headers["X-OpenRouter-Title"] == "Hermes Agent"
|
||||
assert headers["X-OpenRouter-Categories"] == "productivity,cli-agent"
|
||||
|
||||
def test_cache_enabled(self):
|
||||
"""When response_cache is True, X-OpenRouter-Cache header is set."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True})
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
|
||||
def test_cache_disabled(self):
|
||||
"""When response_cache is False, no cache header is sent."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": False})
|
||||
assert "X-OpenRouter-Cache" not in headers
|
||||
assert "X-OpenRouter-Cache-TTL" not in headers
|
||||
|
||||
def test_cache_disabled_by_default_empty_config(self):
|
||||
"""Empty config dict means no cache headers (response_cache defaults to False)."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={})
|
||||
assert "X-OpenRouter-Cache" not in headers
|
||||
|
||||
def test_ttl_default(self):
|
||||
"""Default TTL (300) is included when cache is enabled."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 300})
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "300"
|
||||
|
||||
def test_ttl_custom(self):
|
||||
"""Custom TTL values within range are sent."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 3600})
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "3600"
|
||||
|
||||
def test_ttl_max(self):
|
||||
"""Maximum TTL (86400) is accepted."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 86400})
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "86400"
|
||||
|
||||
def test_ttl_out_of_range_too_high(self):
|
||||
"""TTL above 86400 is silently ignored (no TTL header sent)."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 100000})
|
||||
assert "X-OpenRouter-Cache-TTL" not in headers
|
||||
# But cache is still enabled
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
|
||||
def test_ttl_out_of_range_zero(self):
|
||||
"""TTL of 0 is below minimum — no TTL header sent."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 0})
|
||||
assert "X-OpenRouter-Cache-TTL" not in headers
|
||||
|
||||
def test_ttl_negative(self):
|
||||
"""Negative TTL is ignored."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": -5})
|
||||
assert "X-OpenRouter-Cache-TTL" not in headers
|
||||
|
||||
def test_ttl_not_a_number(self):
|
||||
"""Non-numeric TTL is ignored."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": "five"})
|
||||
assert "X-OpenRouter-Cache-TTL" not in headers
|
||||
|
||||
def test_ttl_float_truncated(self):
|
||||
"""Float TTL values are truncated to int."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 600.7})
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "600"
|
||||
|
||||
def test_returns_fresh_dict(self):
|
||||
"""Each call returns a new dict so mutations don't leak."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
cfg = {"response_cache": True}
|
||||
h1 = build_or_headers(or_config=cfg)
|
||||
h2 = build_or_headers(or_config=cfg)
|
||||
assert h1 is not h2
|
||||
assert h1 == h2
|
||||
|
||||
def test_none_config_falls_back_to_load_config(self):
|
||||
"""When or_config is None, build_or_headers reads from load_config()."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
fake_cfg = {
|
||||
"openrouter": {"response_cache": True, "response_cache_ttl": 900},
|
||||
}
|
||||
with patch("hermes_cli.config.load_config", return_value=fake_cfg):
|
||||
headers = build_or_headers(or_config=None)
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "900"
|
||||
|
||||
def test_none_config_load_config_fails_gracefully(self):
|
||||
"""When load_config() fails, build_or_headers still returns base headers."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
with patch("hermes_cli.config.load_config", side_effect=RuntimeError("boom")):
|
||||
headers = build_or_headers(or_config=None)
|
||||
# Should have base attribution but no cache headers
|
||||
assert "HTTP-Referer" in headers
|
||||
assert "X-OpenRouter-Cache" not in headers
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Environment variable overrides
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestEnvVarOverrides:
|
||||
"""Test env var precedence over config.yaml for response caching."""
|
||||
|
||||
def test_env_enables_cache(self, monkeypatch):
|
||||
"""HERMES_OPENROUTER_CACHE=true enables cache even when config disables it."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "true")
|
||||
headers = build_or_headers(or_config={"response_cache": False})
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
|
||||
def test_env_disables_cache(self, monkeypatch):
|
||||
"""HERMES_OPENROUTER_CACHE=false disables cache even when config enables it."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "false")
|
||||
headers = build_or_headers(or_config={"response_cache": True})
|
||||
assert "X-OpenRouter-Cache" not in headers
|
||||
|
||||
@pytest.mark.parametrize("value", ["1", "true", "TRUE", "yes", "Yes", "on"])
|
||||
def test_truthy_values(self, monkeypatch, value):
|
||||
"""Various truthy strings enable caching."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", value)
|
||||
headers = build_or_headers(or_config={})
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
|
||||
@pytest.mark.parametrize("value", ["0", "false", "no", "off", "maybe", ""])
|
||||
def test_non_truthy_values(self, monkeypatch, value):
|
||||
"""Non-truthy strings do not enable caching (empty falls through to config)."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", value)
|
||||
# Empty string falls through to config; others are explicitly non-truthy
|
||||
if value == "":
|
||||
# Empty env var falls through to config default (False)
|
||||
headers = build_or_headers(or_config={"response_cache": False})
|
||||
else:
|
||||
headers = build_or_headers(or_config={"response_cache": True})
|
||||
assert "X-OpenRouter-Cache" not in headers
|
||||
|
||||
def test_env_ttl_overrides_config(self, monkeypatch):
|
||||
"""HERMES_OPENROUTER_CACHE_TTL overrides config TTL."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "true")
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", "1800")
|
||||
headers = build_or_headers(or_config={"response_cache_ttl": 300})
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "1800"
|
||||
|
||||
@pytest.mark.parametrize("ttl", ["0", "86401", "abc", "-1", "12.5"])
|
||||
def test_invalid_env_ttl_dropped(self, monkeypatch, ttl):
|
||||
"""Invalid TTL env values are ignored; cache still enabled without TTL."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "1")
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", ttl)
|
||||
headers = build_or_headers(or_config={})
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
assert "X-OpenRouter-Cache-TTL" not in headers
|
||||
|
||||
@pytest.mark.parametrize("ttl", ["1", "300", "86400"])
|
||||
def test_valid_env_ttl_boundaries(self, monkeypatch, ttl):
|
||||
"""Boundary TTL values (1, 300, 86400) are accepted."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "yes")
|
||||
monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", ttl)
|
||||
assert build_or_headers(or_config={})["X-OpenRouter-Cache-TTL"] == ttl
|
||||
|
||||
def test_no_env_vars_falls_through_to_config(self, monkeypatch):
|
||||
"""Without env vars, config.yaml controls behavior."""
|
||||
from agent.auxiliary_client import build_or_headers
|
||||
|
||||
monkeypatch.delenv("HERMES_OPENROUTER_CACHE", raising=False)
|
||||
monkeypatch.delenv("HERMES_OPENROUTER_CACHE_TTL", raising=False)
|
||||
headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 600})
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "600"
|
||||
|
||||
class TestDefaultConfig:
|
||||
"""Verify the openrouter config section is in DEFAULT_CONFIG."""
|
||||
|
||||
def test_openrouter_section_exists(self):
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
assert "openrouter" in DEFAULT_CONFIG
|
||||
or_cfg = DEFAULT_CONFIG["openrouter"]
|
||||
assert or_cfg["response_cache"] is True
|
||||
assert or_cfg["response_cache_ttl"] == 300
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _check_openrouter_cache_status
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestCheckOpenrouterCacheStatus:
|
||||
"""Test the _check_openrouter_cache_status method on AIAgent."""
|
||||
|
||||
def _make_agent(self):
|
||||
"""Create a minimal AIAgent-like object with just the method under test."""
|
||||
from run_agent import AIAgent
|
||||
|
||||
# Use object.__new__ to skip __init__, then set the attributes we need
|
||||
agent = object.__new__(AIAgent)
|
||||
agent._or_cache_hits = 0
|
||||
return agent
|
||||
|
||||
def test_hit_increments_counter(self):
|
||||
agent = self._make_agent()
|
||||
resp = SimpleNamespace(headers={"x-openrouter-cache-status": "HIT"})
|
||||
agent._check_openrouter_cache_status(resp)
|
||||
assert agent._or_cache_hits == 1
|
||||
# Second hit increments
|
||||
agent._check_openrouter_cache_status(resp)
|
||||
assert agent._or_cache_hits == 2
|
||||
|
||||
def test_miss_does_not_increment(self):
|
||||
agent = self._make_agent()
|
||||
resp = SimpleNamespace(headers={"x-openrouter-cache-status": "MISS"})
|
||||
agent._check_openrouter_cache_status(resp)
|
||||
assert getattr(agent, "_or_cache_hits", 0) == 0
|
||||
|
||||
def test_no_header_is_noop(self):
|
||||
agent = self._make_agent()
|
||||
resp = SimpleNamespace(headers={})
|
||||
agent._check_openrouter_cache_status(resp)
|
||||
assert getattr(agent, "_or_cache_hits", 0) == 0
|
||||
|
||||
def test_none_response_is_safe(self):
|
||||
agent = self._make_agent()
|
||||
agent._check_openrouter_cache_status(None) # no crash
|
||||
|
||||
def test_no_headers_attr_is_safe(self):
|
||||
agent = self._make_agent()
|
||||
agent._check_openrouter_cache_status(object()) # no crash
|
||||
|
||||
def test_case_insensitive(self):
|
||||
agent = self._make_agent()
|
||||
resp = SimpleNamespace(headers={"x-openrouter-cache-status": "hit"})
|
||||
agent._check_openrouter_cache_status(resp)
|
||||
assert agent._or_cache_hits == 1
|
||||
@@ -81,3 +81,51 @@ def test_unknown_base_url_clears_default_headers(mock_openai):
|
||||
agent._apply_client_headers_for_base_url("https://api.example.com/v1")
|
||||
|
||||
assert "default_headers" not in agent._client_kwargs
|
||||
|
||||
|
||||
@patch("run_agent.OpenAI")
|
||||
def test_openrouter_headers_include_response_cache_when_enabled(mock_openai):
|
||||
"""When openrouter.response_cache is True, the cache header is injected."""
|
||||
mock_openai.return_value = MagicMock()
|
||||
agent = AIAgent(
|
||||
api_key="test-key",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
model="test/model",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
|
||||
with patch("hermes_cli.config.load_config", return_value={
|
||||
"openrouter": {"response_cache": True, "response_cache_ttl": 600},
|
||||
}):
|
||||
agent._apply_client_headers_for_base_url("https://openrouter.ai/api/v1")
|
||||
|
||||
headers = agent._client_kwargs["default_headers"]
|
||||
assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
|
||||
assert headers["X-OpenRouter-Cache"] == "true"
|
||||
assert headers["X-OpenRouter-Cache-TTL"] == "600"
|
||||
|
||||
|
||||
@patch("run_agent.OpenAI")
|
||||
def test_openrouter_headers_no_cache_when_disabled(mock_openai):
|
||||
"""When openrouter.response_cache is False, no cache headers are sent."""
|
||||
mock_openai.return_value = MagicMock()
|
||||
agent = AIAgent(
|
||||
api_key="test-key",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
model="test/model",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
|
||||
with patch("hermes_cli.config.load_config", return_value={
|
||||
"openrouter": {"response_cache": False},
|
||||
}):
|
||||
agent._apply_client_headers_for_base_url("https://openrouter.ai/api/v1")
|
||||
|
||||
headers = agent._client_kwargs["default_headers"]
|
||||
assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
|
||||
assert "X-OpenRouter-Cache" not in headers
|
||||
assert "X-OpenRouter-Cache-TTL" not in headers
|
||||
|
||||
@@ -14,6 +14,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
|
||||
|----------|-------------|
|
||||
| `OPENROUTER_API_KEY` | OpenRouter API key (recommended for flexibility) |
|
||||
| `OPENROUTER_BASE_URL` | Override the OpenRouter-compatible base URL |
|
||||
| `HERMES_OPENROUTER_CACHE` | Enable OpenRouter response caching (`1`/`true`/`yes`/`on`). Overrides `openrouter.response_cache` in config.yaml. See [Response Caching](https://openrouter.ai/docs/guides/features/response-caching). |
|
||||
| `HERMES_OPENROUTER_CACHE_TTL` | Cache TTL in seconds (1-86400). Overrides `openrouter.response_cache_ttl` in config.yaml. |
|
||||
| `NOUS_BASE_URL` | Override Nous Portal base URL (rarely needed; development/testing only) |
|
||||
| `NOUS_INFERENCE_BASE_URL` | Override Nous inference endpoint directly |
|
||||
| `AI_GATEWAY_API_KEY` | Vercel AI Gateway API key ([ai-gateway.vercel.sh](https://ai-gateway.vercel.sh)) |
|
||||
|
||||
Reference in New Issue
Block a user