feat(openrouter): add response caching support (#19132)

Enable OpenRouter's response caching feature (beta) via X-OpenRouter-Cache headers. When enabled, identical API requests return cached responses for free (zero billing), reducing both latency and cost. Configuration via config.yaml: openrouter: response_cache: true # default: on response_cache_ttl: 300 # 1-86400 seconds Changes: - Add openrouter config section to DEFAULT_CONFIG (response_cache + TTL) - Add build_or_headers() in auxiliary_client.py that builds attribution headers plus optional cache headers based on config - Replace inline _OR_HEADERS dicts with build_or_headers() at all 5 sites: run_agent.py __init__, _apply_client_headers_for_base_url(), and auxiliary_client.py _try_openrouter() + _to_async_client() - Add _check_openrouter_cache_status() method to AIAgent that reads X-OpenRouter-Cache-Status from streaming response headers and logs HIT/MISS status - Document in cli-config.yaml.example - Add 28 tests (22 unit + 6 integration) Ref: https://openrouter.ai/docs/guides/features/response-caching
2026-05-21 03:39:54 +00:00 · 2026-05-03 01:54:24 -07:00
parent 9b5b88b5e0
commit 457c7b76cd
7 changed files with 451 additions and 12 deletions
@@ -259,13 +259,68 @@ _PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
    "kimi-coding-cn",
 })

-# OpenRouter app attribution headers
-_OR_HEADERS = {
+# OpenRouter app attribution headers (base — always sent)
+_OR_HEADERS_BASE = {
    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
    "X-OpenRouter-Title": "Hermes Agent",
    "X-OpenRouter-Categories": "productivity,cli-agent",
 }

+# Truthy values for boolean env-var parsing.
+_TRUTHY_ENV_VALUES = frozenset({"1", "true", "yes", "on"})
+
+
+def build_or_headers(or_config: dict | None = None) -> dict:
+    """Build OpenRouter headers, optionally including response-cache headers.
+
+    Precedence for response cache: env var > config.yaml > default (enabled).
+
+    Environment variables:
+        ``HERMES_OPENROUTER_CACHE`` — truthy (``1``/``true``/``yes``/``on``)
+            enables caching; ``0``/``false``/``no``/``off`` disables.
+            Overrides ``openrouter.response_cache`` in config.yaml.
+        ``HERMES_OPENROUTER_CACHE_TTL`` — integer seconds (1-86400).
+            Overrides ``openrouter.response_cache_ttl`` in config.yaml.
+
+    *or_config* is the ``openrouter`` section from config.yaml.  When *None*,
+    falls back to reading config from disk via ``load_config()``.
+    """
+    headers = dict(_OR_HEADERS_BASE)
+
+    # Resolve config from disk if not provided.
+    if or_config is None:
+        try:
+            from hermes_cli.config import load_config
+            or_config = load_config().get("openrouter", {})
+        except Exception:
+            or_config = {}
+
+    # Determine cache enabled: env var overrides config.
+    env_cache = os.environ.get("HERMES_OPENROUTER_CACHE", "").strip().lower()
+    if env_cache:
+        cache_enabled = env_cache in _TRUTHY_ENV_VALUES
+    else:
+        cache_enabled = or_config.get("response_cache", False)
+
+    if not cache_enabled:
+        return headers
+
+    headers["X-OpenRouter-Cache"] = "true"
+
+    # Determine TTL: env var overrides config.
+    env_ttl = os.environ.get("HERMES_OPENROUTER_CACHE_TTL", "").strip()
+    if env_ttl:
+        if env_ttl.isdigit():
+            ttl = int(env_ttl)
+            if 1 <= ttl <= 86400:
+                headers["X-OpenRouter-Cache-TTL"] = str(ttl)
+    else:
+        ttl = or_config.get("response_cache_ttl", 300)
+        if isinstance(ttl, (int, float)) and 1 <= ttl <= 86400:
+            headers["X-OpenRouter-Cache-TTL"] = str(int(ttl))
+
+    return headers
+
 # Vercel AI Gateway app attribution headers. HTTP-Referer maps to
 # referrerUrl and X-Title maps to appName in the gateway's analytics.
 from hermes_cli import __version__ as _HERMES_VERSION
@@ -1158,14 +1213,14 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
        base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
        logger.debug("Auxiliary client: OpenRouter via pool")
        return OpenAI(api_key=or_key, base_url=base_url,
-                       default_headers=_OR_HEADERS), _OPENROUTER_MODEL
+                       default_headers=build_or_headers()), _OPENROUTER_MODEL

    or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
    if not or_key:
        return None, None
    logger.debug("Auxiliary client: OpenRouter")
    return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
-                   default_headers=_OR_HEADERS), _OPENROUTER_MODEL
+                   default_headers=build_or_headers()), _OPENROUTER_MODEL


 def _describe_openrouter_unavailable() -> str:
@@ -1911,7 +1966,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
    }
    sync_base_url = str(sync_client.base_url)
    if base_url_host_matches(sync_base_url, "openrouter.ai"):
-        async_kwargs["default_headers"] = dict(_OR_HEADERS)
+        async_kwargs["default_headers"] = build_or_headers()
    elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
        from hermes_cli.copilot_auth import copilot_request_headers

@@ -121,6 +121,18 @@ model:
 #   # Data policy: "allow" (default) or "deny" to exclude providers that may store data
 #   # data_collection: "deny"

+# =============================================================================
+# OpenRouter Response Caching (only applies when using OpenRouter)
+# =============================================================================
+# Cache identical API responses at the OpenRouter edge for free instant replays.
+# When enabled, identical requests (same model, messages, parameters) return
+# cached responses with zero billing. Separate from Anthropic prompt caching.
+# See: https://openrouter.ai/docs/guides/features/response-caching
+#
+# openrouter:
+#   response_cache: true         # Enable response caching (default: true)
+#   response_cache_ttl: 300      # Cache TTL in seconds, 1-86400 (default: 300)
+
 # =============================================================================
 # Git Worktree Isolation
 # =============================================================================
@@ -644,6 +644,18 @@ DEFAULT_CONFIG = {
        "cache_ttl": "5m",
    },

+    # OpenRouter-specific settings.
+    # response_cache: enable OpenRouter response caching (X-OpenRouter-Cache header).
+    #   When enabled, identical requests return cached responses for free (zero billing).
+    #   This is separate from Anthropic prompt caching and works alongside it.
+    #   See: https://openrouter.ai/docs/guides/features/response-caching
+    # response_cache_ttl: how long cached responses remain valid, in seconds (1-86400).
+    #   Default 300 (5 minutes). Only used when response_cache is enabled.
+    "openrouter": {
+        "response_cache": True,
+        "response_cache_ttl": 300,
+    },
+
    # AWS Bedrock provider configuration.
    # Only used when model.provider is "bedrock".
    "bedrock": {
@@ -1258,6 +1258,10 @@ class AIAgent:
        # after each API call.  Accessed by /usage slash command.
        self._rate_limit_state: Optional["RateLimitState"] = None

+        # OpenRouter response cache hit counter — incremented when
+        # X-OpenRouter-Cache-Status: HIT is seen in streaming response headers.
+        self._or_cache_hits: int = 0
+
        # Centralized logging — agent.log (INFO+) and errors.log (WARNING+)
        # both live under ~/.hermes/logs/.  Idempotent, so gateway mode
        # (which creates a new AIAgent per message) won't duplicate handlers.
@@ -1421,11 +1425,8 @@ class AIAgent:
                    client_kwargs["args"] = self.acp_args
                effective_base = base_url
                if base_url_host_matches(effective_base, "openrouter.ai"):
-                    client_kwargs["default_headers"] = {
-                        "HTTP-Referer": "https://hermes-agent.nousresearch.com",
-                        "X-OpenRouter-Title": "Hermes Agent",
-                        "X-OpenRouter-Categories": "productivity,cli-agent",
-                    }
+                    from agent.auxiliary_client import build_or_headers
+                    client_kwargs["default_headers"] = build_or_headers()
                elif base_url_host_matches(effective_base, "api.routermint.com"):
                    client_kwargs["default_headers"] = _routermint_headers()
                elif base_url_host_matches(effective_base, "api.githubcopilot.com"):
@@ -4580,6 +4581,28 @@ class AIAgent:
        """Return the last captured RateLimitState, or None."""
        return self._rate_limit_state

+    def _check_openrouter_cache_status(self, http_response: Any) -> None:
+        """Read X-OpenRouter-Cache-Status from response headers and log it.
+
+        Increments ``_or_cache_hits`` on HIT so callers can report savings.
+        """
+        if http_response is None:
+            return
+        headers = getattr(http_response, "headers", None)
+        if not headers:
+            return
+        try:
+            status = headers.get("x-openrouter-cache-status")
+            if not status:
+                return
+            if status.upper() == "HIT":
+                self._or_cache_hits += 1
+                logger.info("OpenRouter response cache HIT (total: %d)", self._or_cache_hits)
+            else:
+                logger.debug("OpenRouter response cache %s", status.upper())
+        except Exception:
+            pass  # Never let header parsing break the agent loop
+
    def get_activity_summary(self) -> dict:
        """Return a snapshot of the agent's current activity for diagnostics.

@@ -6157,10 +6180,10 @@ class AIAgent:
        return True

    def _apply_client_headers_for_base_url(self, base_url: str) -> None:
-        from agent.auxiliary_client import _AI_GATEWAY_HEADERS, _OR_HEADERS
+        from agent.auxiliary_client import _AI_GATEWAY_HEADERS, build_or_headers

        if base_url_host_matches(base_url, "openrouter.ai"):
-            self._client_kwargs["default_headers"] = dict(_OR_HEADERS)
+            self._client_kwargs["default_headers"] = build_or_headers()
        elif base_url_host_matches(base_url, "ai-gateway.vercel.sh"):
            self._client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS)
        elif base_url_host_matches(base_url, "api.routermint.com"):
@@ -6780,6 +6803,9 @@ class AIAgent:
            # response via .response before any chunks are consumed.
            self._capture_rate_limits(getattr(stream, "response", None))

+            # Log OpenRouter response cache status when present.
+            self._check_openrouter_cache_status(getattr(stream, "response", None))
+
            content_parts: list = []
            tool_calls_acc: dict = {}
            tool_gen_notified: set = set()
@@ -0,0 +1,284 @@
+"""Tests for OpenRouter response caching header injection."""
+
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# build_or_headers
+# ---------------------------------------------------------------------------
+
+class TestBuildOrHeaders:
+    """Test the build_or_headers() helper in agent/auxiliary_client.py."""
+
+    def test_base_attribution_always_present(self):
+        """Attribution headers must always be included regardless of cache setting."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": False})
+        assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
+        assert headers["X-OpenRouter-Title"] == "Hermes Agent"
+        assert headers["X-OpenRouter-Categories"] == "productivity,cli-agent"
+
+    def test_cache_enabled(self):
+        """When response_cache is True, X-OpenRouter-Cache header is set."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True})
+        assert headers["X-OpenRouter-Cache"] == "true"
+
+    def test_cache_disabled(self):
+        """When response_cache is False, no cache header is sent."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": False})
+        assert "X-OpenRouter-Cache" not in headers
+        assert "X-OpenRouter-Cache-TTL" not in headers
+
+    def test_cache_disabled_by_default_empty_config(self):
+        """Empty config dict means no cache headers (response_cache defaults to False)."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={})
+        assert "X-OpenRouter-Cache" not in headers
+
+    def test_ttl_default(self):
+        """Default TTL (300) is included when cache is enabled."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 300})
+        assert headers["X-OpenRouter-Cache-TTL"] == "300"
+
+    def test_ttl_custom(self):
+        """Custom TTL values within range are sent."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 3600})
+        assert headers["X-OpenRouter-Cache-TTL"] == "3600"
+
+    def test_ttl_max(self):
+        """Maximum TTL (86400) is accepted."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 86400})
+        assert headers["X-OpenRouter-Cache-TTL"] == "86400"
+
+    def test_ttl_out_of_range_too_high(self):
+        """TTL above 86400 is silently ignored (no TTL header sent)."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 100000})
+        assert "X-OpenRouter-Cache-TTL" not in headers
+        # But cache is still enabled
+        assert headers["X-OpenRouter-Cache"] == "true"
+
+    def test_ttl_out_of_range_zero(self):
+        """TTL of 0 is below minimum — no TTL header sent."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 0})
+        assert "X-OpenRouter-Cache-TTL" not in headers
+
+    def test_ttl_negative(self):
+        """Negative TTL is ignored."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": -5})
+        assert "X-OpenRouter-Cache-TTL" not in headers
+
+    def test_ttl_not_a_number(self):
+        """Non-numeric TTL is ignored."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": "five"})
+        assert "X-OpenRouter-Cache-TTL" not in headers
+
+    def test_ttl_float_truncated(self):
+        """Float TTL values are truncated to int."""
+        from agent.auxiliary_client import build_or_headers
+
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 600.7})
+        assert headers["X-OpenRouter-Cache-TTL"] == "600"
+
+    def test_returns_fresh_dict(self):
+        """Each call returns a new dict so mutations don't leak."""
+        from agent.auxiliary_client import build_or_headers
+
+        cfg = {"response_cache": True}
+        h1 = build_or_headers(or_config=cfg)
+        h2 = build_or_headers(or_config=cfg)
+        assert h1 is not h2
+        assert h1 == h2
+
+    def test_none_config_falls_back_to_load_config(self):
+        """When or_config is None, build_or_headers reads from load_config()."""
+        from agent.auxiliary_client import build_or_headers
+
+        fake_cfg = {
+            "openrouter": {"response_cache": True, "response_cache_ttl": 900},
+        }
+        with patch("hermes_cli.config.load_config", return_value=fake_cfg):
+            headers = build_or_headers(or_config=None)
+        assert headers["X-OpenRouter-Cache"] == "true"
+        assert headers["X-OpenRouter-Cache-TTL"] == "900"
+
+    def test_none_config_load_config_fails_gracefully(self):
+        """When load_config() fails, build_or_headers still returns base headers."""
+        from agent.auxiliary_client import build_or_headers
+
+        with patch("hermes_cli.config.load_config", side_effect=RuntimeError("boom")):
+            headers = build_or_headers(or_config=None)
+        # Should have base attribution but no cache headers
+        assert "HTTP-Referer" in headers
+        assert "X-OpenRouter-Cache" not in headers
+
+
+# ---------------------------------------------------------------------------
+# Environment variable overrides
+# ---------------------------------------------------------------------------
+
+class TestEnvVarOverrides:
+    """Test env var precedence over config.yaml for response caching."""
+
+    def test_env_enables_cache(self, monkeypatch):
+        """HERMES_OPENROUTER_CACHE=true enables cache even when config disables it."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "true")
+        headers = build_or_headers(or_config={"response_cache": False})
+        assert headers["X-OpenRouter-Cache"] == "true"
+
+    def test_env_disables_cache(self, monkeypatch):
+        """HERMES_OPENROUTER_CACHE=false disables cache even when config enables it."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "false")
+        headers = build_or_headers(or_config={"response_cache": True})
+        assert "X-OpenRouter-Cache" not in headers
+
+    @pytest.mark.parametrize("value", ["1", "true", "TRUE", "yes", "Yes", "on"])
+    def test_truthy_values(self, monkeypatch, value):
+        """Various truthy strings enable caching."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE", value)
+        headers = build_or_headers(or_config={})
+        assert headers["X-OpenRouter-Cache"] == "true"
+
+    @pytest.mark.parametrize("value", ["0", "false", "no", "off", "maybe", ""])
+    def test_non_truthy_values(self, monkeypatch, value):
+        """Non-truthy strings do not enable caching (empty falls through to config)."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE", value)
+        # Empty string falls through to config; others are explicitly non-truthy
+        if value == "":
+            # Empty env var falls through to config default (False)
+            headers = build_or_headers(or_config={"response_cache": False})
+        else:
+            headers = build_or_headers(or_config={"response_cache": True})
+        assert "X-OpenRouter-Cache" not in headers
+
+    def test_env_ttl_overrides_config(self, monkeypatch):
+        """HERMES_OPENROUTER_CACHE_TTL overrides config TTL."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "true")
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", "1800")
+        headers = build_or_headers(or_config={"response_cache_ttl": 300})
+        assert headers["X-OpenRouter-Cache-TTL"] == "1800"
+
+    @pytest.mark.parametrize("ttl", ["0", "86401", "abc", "-1", "12.5"])
+    def test_invalid_env_ttl_dropped(self, monkeypatch, ttl):
+        """Invalid TTL env values are ignored; cache still enabled without TTL."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "1")
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", ttl)
+        headers = build_or_headers(or_config={})
+        assert headers["X-OpenRouter-Cache"] == "true"
+        assert "X-OpenRouter-Cache-TTL" not in headers
+
+    @pytest.mark.parametrize("ttl", ["1", "300", "86400"])
+    def test_valid_env_ttl_boundaries(self, monkeypatch, ttl):
+        """Boundary TTL values (1, 300, 86400) are accepted."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE", "yes")
+        monkeypatch.setenv("HERMES_OPENROUTER_CACHE_TTL", ttl)
+        assert build_or_headers(or_config={})["X-OpenRouter-Cache-TTL"] == ttl
+
+    def test_no_env_vars_falls_through_to_config(self, monkeypatch):
+        """Without env vars, config.yaml controls behavior."""
+        from agent.auxiliary_client import build_or_headers
+
+        monkeypatch.delenv("HERMES_OPENROUTER_CACHE", raising=False)
+        monkeypatch.delenv("HERMES_OPENROUTER_CACHE_TTL", raising=False)
+        headers = build_or_headers(or_config={"response_cache": True, "response_cache_ttl": 600})
+        assert headers["X-OpenRouter-Cache"] == "true"
+        assert headers["X-OpenRouter-Cache-TTL"] == "600"
+
+class TestDefaultConfig:
+    """Verify the openrouter config section is in DEFAULT_CONFIG."""
+
+    def test_openrouter_section_exists(self):
+        from hermes_cli.config import DEFAULT_CONFIG
+
+        assert "openrouter" in DEFAULT_CONFIG
+        or_cfg = DEFAULT_CONFIG["openrouter"]
+        assert or_cfg["response_cache"] is True
+        assert or_cfg["response_cache_ttl"] == 300
+
+
+# ---------------------------------------------------------------------------
+# _check_openrouter_cache_status
+# ---------------------------------------------------------------------------
+
+class TestCheckOpenrouterCacheStatus:
+    """Test the _check_openrouter_cache_status method on AIAgent."""
+
+    def _make_agent(self):
+        """Create a minimal AIAgent-like object with just the method under test."""
+        from run_agent import AIAgent
+
+        # Use object.__new__ to skip __init__, then set the attributes we need
+        agent = object.__new__(AIAgent)
+        agent._or_cache_hits = 0
+        return agent
+
+    def test_hit_increments_counter(self):
+        agent = self._make_agent()
+        resp = SimpleNamespace(headers={"x-openrouter-cache-status": "HIT"})
+        agent._check_openrouter_cache_status(resp)
+        assert agent._or_cache_hits == 1
+        # Second hit increments
+        agent._check_openrouter_cache_status(resp)
+        assert agent._or_cache_hits == 2
+
+    def test_miss_does_not_increment(self):
+        agent = self._make_agent()
+        resp = SimpleNamespace(headers={"x-openrouter-cache-status": "MISS"})
+        agent._check_openrouter_cache_status(resp)
+        assert getattr(agent, "_or_cache_hits", 0) == 0
+
+    def test_no_header_is_noop(self):
+        agent = self._make_agent()
+        resp = SimpleNamespace(headers={})
+        agent._check_openrouter_cache_status(resp)
+        assert getattr(agent, "_or_cache_hits", 0) == 0
+
+    def test_none_response_is_safe(self):
+        agent = self._make_agent()
+        agent._check_openrouter_cache_status(None)  # no crash
+
+    def test_no_headers_attr_is_safe(self):
+        agent = self._make_agent()
+        agent._check_openrouter_cache_status(object())  # no crash
+
+    def test_case_insensitive(self):
+        agent = self._make_agent()
+        resp = SimpleNamespace(headers={"x-openrouter-cache-status": "hit"})
+        agent._check_openrouter_cache_status(resp)
+        assert agent._or_cache_hits == 1
@@ -81,3 +81,51 @@ def test_unknown_base_url_clears_default_headers(mock_openai):
    agent._apply_client_headers_for_base_url("https://api.example.com/v1")

    assert "default_headers" not in agent._client_kwargs
+
+
+@patch("run_agent.OpenAI")
+def test_openrouter_headers_include_response_cache_when_enabled(mock_openai):
+    """When openrouter.response_cache is True, the cache header is injected."""
+    mock_openai.return_value = MagicMock()
+    agent = AIAgent(
+        api_key="test-key",
+        base_url="https://openrouter.ai/api/v1",
+        model="test/model",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+
+    with patch("hermes_cli.config.load_config", return_value={
+        "openrouter": {"response_cache": True, "response_cache_ttl": 600},
+    }):
+        agent._apply_client_headers_for_base_url("https://openrouter.ai/api/v1")
+
+    headers = agent._client_kwargs["default_headers"]
+    assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
+    assert headers["X-OpenRouter-Cache"] == "true"
+    assert headers["X-OpenRouter-Cache-TTL"] == "600"
+
+
+@patch("run_agent.OpenAI")
+def test_openrouter_headers_no_cache_when_disabled(mock_openai):
+    """When openrouter.response_cache is False, no cache headers are sent."""
+    mock_openai.return_value = MagicMock()
+    agent = AIAgent(
+        api_key="test-key",
+        base_url="https://openrouter.ai/api/v1",
+        model="test/model",
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+
+    with patch("hermes_cli.config.load_config", return_value={
+        "openrouter": {"response_cache": False},
+    }):
+        agent._apply_client_headers_for_base_url("https://openrouter.ai/api/v1")
+
+    headers = agent._client_kwargs["default_headers"]
+    assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
+    assert "X-OpenRouter-Cache" not in headers
+    assert "X-OpenRouter-Cache-TTL" not in headers
@@ -14,6 +14,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
 |----------|-------------|
 | `OPENROUTER_API_KEY` | OpenRouter API key (recommended for flexibility) |
 | `OPENROUTER_BASE_URL` | Override the OpenRouter-compatible base URL |
+| `HERMES_OPENROUTER_CACHE` | Enable OpenRouter response caching (`1`/`true`/`yes`/`on`). Overrides `openrouter.response_cache` in config.yaml. See [Response Caching](https://openrouter.ai/docs/guides/features/response-caching). |
+| `HERMES_OPENROUTER_CACHE_TTL` | Cache TTL in seconds (1-86400). Overrides `openrouter.response_cache_ttl` in config.yaml. |
 | `NOUS_BASE_URL` | Override Nous Portal base URL (rarely needed; development/testing only) |
 | `NOUS_INFERENCE_BASE_URL` | Override Nous inference endpoint directly |
 | `AI_GATEWAY_API_KEY` | Vercel AI Gateway API key ([ai-gateway.vercel.sh](https://ai-gateway.vercel.sh)) |