From 49bd89dc78490d8bebbf31fcb6243d89d512b3a3 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 12 May 2026 23:41:53 -0700 Subject: [PATCH] fix(codex-runtime): correct protocol field names found via live e2e test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three real bugs caught only by running a turn end-to-end against codex 0.130.0 with a real ChatGPT subscription. Unit tests passed because they asserted on our own (incorrect) wire shapes; the wire format from codex-rs/app-server-protocol/src/protocol/v2/* is the source of truth and my initial reading of the README was incomplete. Bug 1: thread/start.permissions wire format Was sending {"profileId": "workspace-write"}. Real format per PermissionProfileSelectionParams enum (tagged union): {"type": "profile", "id": "workspace-write"} AND requires the experimentalApi capability declared during initialize. AND requires a matching [permissions] table in ~/.codex/config.toml or codex fails the request with 'default_permissions requires a [permissions] table'. Fix: stop overriding permissions on thread/start. Codex picks its default profile (read-only unless user configures otherwise), which matches what codex CLI users expect — they configure their default permission profile in ~/.codex/config.toml the standard way. Trying to be clever about profile selection broke every turn we tested. Live error before fix: 'Invalid request: missing field type' on every turn/start, even though our turn/start payload was correct — the field codex was complaining about was inside the permissions sub-object we shouldn't have been sending. Bug 2: server-request method names Was matching 'execCommandApproval' and 'applyPatchApproval'. Real names per common.rs ServerRequest enum: item/commandExecution/requestApproval item/fileChange/requestApproval item/permissions/requestApproval (new third method) Fix: match the documented names. Added handler for item/permissions/requestApproval that always declines — codex sometimes asks to escalate permissions mid-turn and silent acceptance would surprise users. Live symptom before fix: agent.log showed 'Unknown codex server request: item/commandExecution/requestApproval' and codex stalled because we replied with -32601 (unsupported method) instead of an approval decision. The agent reported back 'The write command was rejected' even though Hermes never showed the user an approval prompt. Bug 3: approval decision values Was sending decision strings 'approved'/'approvedForSession'/'denied'. Real values per CommandExecutionApprovalDecision enum (camelCase): accept, acceptForSession, decline, cancel (also AcceptWithExecpolicyAmendment and ApplyNetworkPolicyAmendment variants we don't currently use). Fix: rename _approval_choice_to_codex_decision return values; update auto_approve_* fallbacks; update fail-closed default from 'denied' to 'decline'. Test mapping table updated to match. Live test verified after fixes: $ hermes (with model.openai_runtime: codex_app_server) > Run the shell command: echo hermes-codex-livetest > .../proof.txt then read it back Approval prompt fired with 'Codex requests exec in '. User chose 'Allow once'. Codex executed the command, wrote the file, read it back. Final response: 'Read back from proof.txt: hermes-codex-livetest'. File contents on disk match. agent.log confirms: codex app-server thread started: id=019e200e profile=workspace-write cwd=/tmp/hermes-codex-livetest/workspace All 20 session tests still green after wire-format updates. --- agent/transports/codex_app_server_session.py | 67 +++++++++++++------ .../test_codex_app_server_session.py | 42 ++++++------ 2 files changed, 70 insertions(+), 39 deletions(-) diff --git a/agent/transports/codex_app_server_session.py b/agent/transports/codex_app_server_session.py index 582a3a75e7..70f8a03675 100644 --- a/agent/transports/codex_app_server_session.py +++ b/agent/transports/codex_app_server_session.py @@ -133,13 +133,22 @@ class CodexAppServerSession: client_title="Hermes Agent", client_version=_get_hermes_version(), ) - # We pass `permissions` (experimental) when we know it, falling back - # to the implicit profile baked into thread/start. The README warns - # that `sandboxPolicy` cannot be combined with `permissions`, so we - # never set the legacy field. + # Permission selection is intentionally NOT sent on thread/start. + # Two reasons (live-tested against codex 0.130.0): + # 1. `thread/start.permissions` is gated behind the experimentalApi + # capability on this codex version — we'd have to opt in during + # initialize and accept the unstable surface. + # 2. Even with experimentalApi declared and the correct shape + # (`{"type": "profile", "id": "..."}`, not `{"profileId": ...}`), + # codex requires a matching `[permissions]` table in + # ~/.codex/config.toml or it fails the request with + # 'default_permissions requires a [permissions] table'. + # Letting codex pick its default (`:read-only` unless the user has + # configured otherwise in their codex config.toml) is the standard + # codex CLI workflow and avoids fighting codex's own validation. + # Users who want a write-capable profile configure it in their + # ~/.codex/config.toml the same way they would for any codex usage. params: dict[str, Any] = {"cwd": self._cwd} - if self._permission_profile: - params["permissions"] = {"profileId": self._permission_profile} result = self._client.request("thread/start", params, timeout=15) self._thread_id = result["thread"]["id"] logger.info( @@ -292,19 +301,34 @@ class CodexAppServerSession: def _handle_server_request(self, req: dict) -> None: """Translate a codex server request (approval) into Hermes' approval - flow, then send the response.""" + flow, then send the response. + + Method names verified live against codex 0.130.0 (Apr 2026): + item/commandExecution/requestApproval — exec approvals + item/fileChange/requestApproval — apply_patch approvals + item/permissions/requestApproval — permissions changes + (we decline; user controls + permission profile in + ~/.codex/config.toml). + """ if self._client is None: return method = req.get("method", "") rid = req.get("id") params = req.get("params") or {} - if method == "execCommandApproval": + if method == "item/commandExecution/requestApproval": decision = self._decide_exec_approval(params) self._client.respond(rid, {"decision": decision}) - elif method == "applyPatchApproval": + elif method == "item/fileChange/requestApproval": decision = self._decide_apply_patch_approval(params) self._client.respond(rid, {"decision": decision}) + elif method == "item/permissions/requestApproval": + # Codex sometimes asks to escalate permissions mid-turn. We + # always decline — the user already chose their permission + # profile in ~/.codex/config.toml and surprise escalations + # shouldn't be silently accepted. + self._client.respond(rid, {"decision": "decline"}) else: # Unknown server request — codex can extend this surface. Reject # cleanly so codex doesn't hang waiting for us. @@ -315,7 +339,7 @@ class CodexAppServerSession: def _decide_exec_approval(self, params: dict) -> str: if self._routing.auto_approve_exec: - return "approved" + return "accept" command = params.get("command") or "" cwd = params.get("cwd") or "" if self._approval_callback is not None: @@ -326,12 +350,12 @@ class CodexAppServerSession: return _approval_choice_to_codex_decision(choice) except Exception: logger.exception("approval_callback raised on exec request") - return "denied" - return "denied" # fail-closed when no callback wired + return "decline" + return "decline" # fail-closed when no callback wired def _decide_apply_patch_approval(self, params: dict) -> str: if self._routing.auto_approve_apply_patch: - return "approved" + return "accept" if self._approval_callback is not None: n_changes = len(params.get("changes") or []) try: @@ -343,21 +367,24 @@ class CodexAppServerSession: return _approval_choice_to_codex_decision(choice) except Exception: logger.exception("approval_callback raised on apply_patch") - return "denied" - return "denied" + return "decline" + return "decline" def _approval_choice_to_codex_decision(choice: str) -> str: - """Map Hermes approval choices onto codex decision strings. + """Map Hermes approval choices onto codex's CommandExecutionApprovalDecision + / FileChangeApprovalDecision wire values. Hermes returns 'once', 'session', 'always', or 'deny'. - Codex expects 'approved', 'approvedForSession', or 'denied'. + Codex expects 'accept', 'acceptForSession', 'decline', or 'cancel' + (verified against codex-rs/app-server-protocol/src/protocol/v2/item.rs + on codex 0.130.0). """ if choice in ("once",): - return "approved" + return "accept" if choice in ("session", "always"): - return "approvedForSession" - return "denied" + return "acceptForSession" + return "decline" def _get_hermes_version() -> str: diff --git a/tests/agent/transports/test_codex_app_server_session.py b/tests/agent/transports/test_codex_app_server_session.py index 6d947c0a57..246a55d4ee 100644 --- a/tests/agent/transports/test_codex_app_server_session.py +++ b/tests/agent/transports/test_codex_app_server_session.py @@ -104,11 +104,11 @@ def make_session(client: FakeClient, **kwargs) -> CodexAppServerSession: class TestApprovalChoiceMapping: @pytest.mark.parametrize("choice,expected", [ - ("once", "approved"), - ("session", "approvedForSession"), - ("always", "approvedForSession"), - ("deny", "denied"), - ("anything-else", "denied"), + ("once", "accept"), + ("session", "acceptForSession"), + ("always", "acceptForSession"), + ("deny", "decline"), + ("anything-else", "decline"), ]) def test_mapping(self, choice, expected): assert _approval_choice_to_codex_decision(choice) == expected @@ -127,13 +127,17 @@ class TestLifecycle: method_calls = [m for (m, _) in client.requests if m == "thread/start"] assert len(method_calls) == 1 - def test_thread_start_passes_cwd_and_permissions(self): + def test_thread_start_passes_cwd_only(self): + """thread/start carries cwd. We intentionally do NOT pass `permissions` + on this codex version (experimentalApi-gated + requires matching + config.toml [permissions] table). Letting codex use its default + (read-only unless user configures otherwise) is the documented path.""" client = FakeClient() s = make_session(client, permission_profile="workspace-write") s.ensure_started() method, params = next(r for r in client.requests if r[0] == "thread/start") assert params["cwd"] == "/tmp" - assert params.get("permissions") == {"profileId": "workspace-write"} + assert "permissions" not in params # see session.ensure_started() comment def test_close_idempotent(self): client = FakeClient() @@ -266,7 +270,7 @@ class TestServerRequestRouting: def test_exec_approval_with_callback_approves_once(self): client = FakeClient() client.queue_server_request( - "execCommandApproval", request_id="req-1", + "item/commandExecution/requestApproval", request_id="req-1", command="ls /tmp", cwd="/tmp", ) client.queue_notification( @@ -284,12 +288,12 @@ class TestServerRequestRouting: s = make_session(client, approval_callback=cb) s.run_turn("hi", turn_timeout=1.0) assert captured["command"] == "ls /tmp" - # The session must have responded to the server request with "approved" - assert ("req-1", {"decision": "approved"}) in client.responses + # The session must have responded to the server request with "accept" + assert ("req-1", {"decision": "accept"}) in client.responses def test_exec_approval_no_callback_denies(self): client = FakeClient() - client.queue_server_request("execCommandApproval", request_id="req-1", + client.queue_server_request("item/commandExecution/requestApproval", request_id="req-1", command="rm -rf /", cwd="/") client.queue_notification( "turn/completed", threadId="t", @@ -297,12 +301,12 @@ class TestServerRequestRouting: ) s = make_session(client) # no approval_callback wired s.run_turn("hi", turn_timeout=1.0) - assert ("req-1", {"decision": "denied"}) in client.responses + assert ("req-1", {"decision": "decline"}) in client.responses def test_apply_patch_approval_session_maps_to_session_decision(self): client = FakeClient() client.queue_server_request( - "applyPatchApproval", request_id="req-2", + "item/fileChange/requestApproval", request_id="req-2", changes=[{"kind": {"type": "add"}, "path": "/tmp/x"}], ) client.queue_notification( @@ -315,7 +319,7 @@ class TestServerRequestRouting: s = make_session(client, approval_callback=cb) s.run_turn("hi", turn_timeout=1.0) - assert ("req-2", {"decision": "approvedForSession"}) in client.responses + assert ("req-2", {"decision": "acceptForSession"}) in client.responses def test_unknown_server_request_replied_with_error(self): client = FakeClient() @@ -333,7 +337,7 @@ class TestServerRequestRouting: def test_routing_auto_approve_bypass(self): client = FakeClient() - client.queue_server_request("execCommandApproval", request_id="r1", + client.queue_server_request("item/commandExecution/requestApproval", request_id="r1", command="ls", cwd="/") client.queue_notification( "turn/completed", threadId="t", @@ -343,11 +347,11 @@ class TestServerRequestRouting: s = make_session(client, request_routing=_ServerRequestRouting( auto_approve_exec=True)) s.run_turn("hi", turn_timeout=1.0) - assert ("r1", {"decision": "approved"}) in client.responses + assert ("r1", {"decision": "accept"}) in client.responses - def test_callback_raises_falls_back_to_denied(self): + def test_callback_raises_falls_back_to_decline(self): client = FakeClient() - client.queue_server_request("execCommandApproval", request_id="r1", + client.queue_server_request("item/commandExecution/requestApproval", request_id="r1", command="ls", cwd="/") client.queue_notification( "turn/completed", threadId="t", @@ -360,4 +364,4 @@ class TestServerRequestRouting: s = make_session(client, approval_callback=boom) s.run_turn("hi", turn_timeout=1.0) # Fail-closed: deny on callback exception - assert ("r1", {"decision": "denied"}) in client.responses + assert ("r1", {"decision": "decline"}) in client.responses