diff --git a/agent/transports/codex_app_server_session.py b/agent/transports/codex_app_server_session.py index 582a3a75e7..70f8a03675 100644 --- a/agent/transports/codex_app_server_session.py +++ b/agent/transports/codex_app_server_session.py @@ -133,13 +133,22 @@ class CodexAppServerSession: client_title="Hermes Agent", client_version=_get_hermes_version(), ) - # We pass `permissions` (experimental) when we know it, falling back - # to the implicit profile baked into thread/start. The README warns - # that `sandboxPolicy` cannot be combined with `permissions`, so we - # never set the legacy field. + # Permission selection is intentionally NOT sent on thread/start. + # Two reasons (live-tested against codex 0.130.0): + # 1. `thread/start.permissions` is gated behind the experimentalApi + # capability on this codex version — we'd have to opt in during + # initialize and accept the unstable surface. + # 2. Even with experimentalApi declared and the correct shape + # (`{"type": "profile", "id": "..."}`, not `{"profileId": ...}`), + # codex requires a matching `[permissions]` table in + # ~/.codex/config.toml or it fails the request with + # 'default_permissions requires a [permissions] table'. + # Letting codex pick its default (`:read-only` unless the user has + # configured otherwise in their codex config.toml) is the standard + # codex CLI workflow and avoids fighting codex's own validation. + # Users who want a write-capable profile configure it in their + # ~/.codex/config.toml the same way they would for any codex usage. params: dict[str, Any] = {"cwd": self._cwd} - if self._permission_profile: - params["permissions"] = {"profileId": self._permission_profile} result = self._client.request("thread/start", params, timeout=15) self._thread_id = result["thread"]["id"] logger.info( @@ -292,19 +301,34 @@ class CodexAppServerSession: def _handle_server_request(self, req: dict) -> None: """Translate a codex server request (approval) into Hermes' approval - flow, then send the response.""" + flow, then send the response. + + Method names verified live against codex 0.130.0 (Apr 2026): + item/commandExecution/requestApproval — exec approvals + item/fileChange/requestApproval — apply_patch approvals + item/permissions/requestApproval — permissions changes + (we decline; user controls + permission profile in + ~/.codex/config.toml). + """ if self._client is None: return method = req.get("method", "") rid = req.get("id") params = req.get("params") or {} - if method == "execCommandApproval": + if method == "item/commandExecution/requestApproval": decision = self._decide_exec_approval(params) self._client.respond(rid, {"decision": decision}) - elif method == "applyPatchApproval": + elif method == "item/fileChange/requestApproval": decision = self._decide_apply_patch_approval(params) self._client.respond(rid, {"decision": decision}) + elif method == "item/permissions/requestApproval": + # Codex sometimes asks to escalate permissions mid-turn. We + # always decline — the user already chose their permission + # profile in ~/.codex/config.toml and surprise escalations + # shouldn't be silently accepted. + self._client.respond(rid, {"decision": "decline"}) else: # Unknown server request — codex can extend this surface. Reject # cleanly so codex doesn't hang waiting for us. @@ -315,7 +339,7 @@ class CodexAppServerSession: def _decide_exec_approval(self, params: dict) -> str: if self._routing.auto_approve_exec: - return "approved" + return "accept" command = params.get("command") or "" cwd = params.get("cwd") or "" if self._approval_callback is not None: @@ -326,12 +350,12 @@ class CodexAppServerSession: return _approval_choice_to_codex_decision(choice) except Exception: logger.exception("approval_callback raised on exec request") - return "denied" - return "denied" # fail-closed when no callback wired + return "decline" + return "decline" # fail-closed when no callback wired def _decide_apply_patch_approval(self, params: dict) -> str: if self._routing.auto_approve_apply_patch: - return "approved" + return "accept" if self._approval_callback is not None: n_changes = len(params.get("changes") or []) try: @@ -343,21 +367,24 @@ class CodexAppServerSession: return _approval_choice_to_codex_decision(choice) except Exception: logger.exception("approval_callback raised on apply_patch") - return "denied" - return "denied" + return "decline" + return "decline" def _approval_choice_to_codex_decision(choice: str) -> str: - """Map Hermes approval choices onto codex decision strings. + """Map Hermes approval choices onto codex's CommandExecutionApprovalDecision + / FileChangeApprovalDecision wire values. Hermes returns 'once', 'session', 'always', or 'deny'. - Codex expects 'approved', 'approvedForSession', or 'denied'. + Codex expects 'accept', 'acceptForSession', 'decline', or 'cancel' + (verified against codex-rs/app-server-protocol/src/protocol/v2/item.rs + on codex 0.130.0). """ if choice in ("once",): - return "approved" + return "accept" if choice in ("session", "always"): - return "approvedForSession" - return "denied" + return "acceptForSession" + return "decline" def _get_hermes_version() -> str: diff --git a/tests/agent/transports/test_codex_app_server_session.py b/tests/agent/transports/test_codex_app_server_session.py index 6d947c0a57..246a55d4ee 100644 --- a/tests/agent/transports/test_codex_app_server_session.py +++ b/tests/agent/transports/test_codex_app_server_session.py @@ -104,11 +104,11 @@ def make_session(client: FakeClient, **kwargs) -> CodexAppServerSession: class TestApprovalChoiceMapping: @pytest.mark.parametrize("choice,expected", [ - ("once", "approved"), - ("session", "approvedForSession"), - ("always", "approvedForSession"), - ("deny", "denied"), - ("anything-else", "denied"), + ("once", "accept"), + ("session", "acceptForSession"), + ("always", "acceptForSession"), + ("deny", "decline"), + ("anything-else", "decline"), ]) def test_mapping(self, choice, expected): assert _approval_choice_to_codex_decision(choice) == expected @@ -127,13 +127,17 @@ class TestLifecycle: method_calls = [m for (m, _) in client.requests if m == "thread/start"] assert len(method_calls) == 1 - def test_thread_start_passes_cwd_and_permissions(self): + def test_thread_start_passes_cwd_only(self): + """thread/start carries cwd. We intentionally do NOT pass `permissions` + on this codex version (experimentalApi-gated + requires matching + config.toml [permissions] table). Letting codex use its default + (read-only unless user configures otherwise) is the documented path.""" client = FakeClient() s = make_session(client, permission_profile="workspace-write") s.ensure_started() method, params = next(r for r in client.requests if r[0] == "thread/start") assert params["cwd"] == "/tmp" - assert params.get("permissions") == {"profileId": "workspace-write"} + assert "permissions" not in params # see session.ensure_started() comment def test_close_idempotent(self): client = FakeClient() @@ -266,7 +270,7 @@ class TestServerRequestRouting: def test_exec_approval_with_callback_approves_once(self): client = FakeClient() client.queue_server_request( - "execCommandApproval", request_id="req-1", + "item/commandExecution/requestApproval", request_id="req-1", command="ls /tmp", cwd="/tmp", ) client.queue_notification( @@ -284,12 +288,12 @@ class TestServerRequestRouting: s = make_session(client, approval_callback=cb) s.run_turn("hi", turn_timeout=1.0) assert captured["command"] == "ls /tmp" - # The session must have responded to the server request with "approved" - assert ("req-1", {"decision": "approved"}) in client.responses + # The session must have responded to the server request with "accept" + assert ("req-1", {"decision": "accept"}) in client.responses def test_exec_approval_no_callback_denies(self): client = FakeClient() - client.queue_server_request("execCommandApproval", request_id="req-1", + client.queue_server_request("item/commandExecution/requestApproval", request_id="req-1", command="rm -rf /", cwd="/") client.queue_notification( "turn/completed", threadId="t", @@ -297,12 +301,12 @@ class TestServerRequestRouting: ) s = make_session(client) # no approval_callback wired s.run_turn("hi", turn_timeout=1.0) - assert ("req-1", {"decision": "denied"}) in client.responses + assert ("req-1", {"decision": "decline"}) in client.responses def test_apply_patch_approval_session_maps_to_session_decision(self): client = FakeClient() client.queue_server_request( - "applyPatchApproval", request_id="req-2", + "item/fileChange/requestApproval", request_id="req-2", changes=[{"kind": {"type": "add"}, "path": "/tmp/x"}], ) client.queue_notification( @@ -315,7 +319,7 @@ class TestServerRequestRouting: s = make_session(client, approval_callback=cb) s.run_turn("hi", turn_timeout=1.0) - assert ("req-2", {"decision": "approvedForSession"}) in client.responses + assert ("req-2", {"decision": "acceptForSession"}) in client.responses def test_unknown_server_request_replied_with_error(self): client = FakeClient() @@ -333,7 +337,7 @@ class TestServerRequestRouting: def test_routing_auto_approve_bypass(self): client = FakeClient() - client.queue_server_request("execCommandApproval", request_id="r1", + client.queue_server_request("item/commandExecution/requestApproval", request_id="r1", command="ls", cwd="/") client.queue_notification( "turn/completed", threadId="t", @@ -343,11 +347,11 @@ class TestServerRequestRouting: s = make_session(client, request_routing=_ServerRequestRouting( auto_approve_exec=True)) s.run_turn("hi", turn_timeout=1.0) - assert ("r1", {"decision": "approved"}) in client.responses + assert ("r1", {"decision": "accept"}) in client.responses - def test_callback_raises_falls_back_to_denied(self): + def test_callback_raises_falls_back_to_decline(self): client = FakeClient() - client.queue_server_request("execCommandApproval", request_id="r1", + client.queue_server_request("item/commandExecution/requestApproval", request_id="r1", command="ls", cwd="/") client.queue_notification( "turn/completed", threadId="t", @@ -360,4 +364,4 @@ class TestServerRequestRouting: s = make_session(client, approval_callback=boom) s.run_turn("hi", turn_timeout=1.0) # Fail-closed: deny on callback exception - assert ("r1", {"decision": "denied"}) in client.responses + assert ("r1", {"decision": "decline"}) in client.responses