From a39ec45b9f917b1fbd474221cc6b61e75e2c7a55 Mon Sep 17 00:00:00 2001 From: Hermes Agent Date: Mon, 4 May 2026 23:06:42 +0000 Subject: [PATCH] =?UTF-8?q?fix(kanban):=20protect=20dispatcher=20contract?= =?UTF-8?q?=20=E2=80=94=20reject=20raw=20status=3D'running'=20PATCH?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PATCH /api/kanban/tasks/:id endpoint allowed any status-to-any-status transition for the non-claim/complete/block/archive set via raw `UPDATE tasks SET status = ?`. This let UI users (or any client) flip a task to 'running' without going through kb.claim_task(), bypassing claim_lock + claim_expires + started_at + worker_pid. The dispatcher treats such a phantom-claimed task as orphaned and may reclaim, hide, or double-dispatch it. Match the agent dashboard plugin's contract (plugins/kanban/dashboard/plugin_api.py update_task): - status='running' via PATCH → ValueError (HTTP 400) - status='ready' from currently-blocked → kb.unblock_task() (fires 'unblocked' event) - status='ready' from anything else, plus status in {'todo', 'triage'} → new _set_status_direct() helper that nulls claim fields when leaving 'running', closes any active run with outcome='reclaimed', and appends a 'status' event row to task_events - status='done', 'blocked', 'archived' → unchanged (already structured) Frontend changes: - Drop 'running' from the .kanban-status-actions button row in the task detail pane (clicking it would always 400 anyway). - allowKanbanDrop() refuses the 'running' column as a drop target with dropEffect='none' so users see immediate visual feedback that the dispatcher/claim path owns running. Tests added (3, all passing): - test_patch_status_running_is_rejected_to_protect_dispatcher_contract - test_patch_status_done_to_running_is_rejected - test_patch_status_blocked_to_ready_routes_through_unblock_task Existing 12 tests still pass. Co-authored-by: ai-ag2026 --- api/kanban_bridge.py | 113 ++++++++++++++++++++++++++++++++---- static/panels.js | 17 +++++- tests/test_kanban_bridge.py | 59 +++++++++++++++++++ 3 files changed, 178 insertions(+), 11 deletions(-) diff --git a/api/kanban_bridge.py b/api/kanban_bridge.py index 575ae610..68dddf2d 100644 --- a/api/kanban_bridge.py +++ b/api/kanban_bridge.py @@ -9,6 +9,7 @@ added in later focused PRs. from __future__ import annotations import json +import time from dataclasses import asdict, is_dataclass from urllib.parse import parse_qs, unquote @@ -183,6 +184,72 @@ def _validate_status(status: str) -> str: return value +def _set_status_direct(conn, task_id: str, new_status: str) -> bool: + """Direct status write for drag-drop moves not covered by structured verbs. + + Used for ``todo <-> ready`` and ``running -> ready`` transitions. The + structured verbs (``complete_task``, ``block_task``, ``unblock_task``, + ``archive_task``, ``claim_task``) own their own state changes; this helper + handles the remainder while preserving the dispatcher's contract: + + - When transitioning OFF ``running`` to anything other than the terminal + verbs, claim_lock / claim_expires / worker_pid are nulled so the + dispatcher doesn't see a phantom-running task. The active run (if any) + is closed with ``outcome='reclaimed'`` so attempt history isn't + orphaned. + - When transitioning INTO ``running``, claim fields are preserved (this + function is NOT used for entering 'running' — that goes through + ``kb.claim_task()`` and the bridge rejects raw 'running' status writes + with HTTP 400). + + Mirrors the agent dashboard plugin's ``_set_status_direct`` + (plugins/kanban/dashboard/plugin_api.py) so first-party clients see + identical behaviour from either surface. + """ + kb = _kb() + with kb.write_txn(conn): + prev = conn.execute( + "SELECT status, current_run_id FROM tasks WHERE id = ?", + (task_id,), + ).fetchone() + if prev is None: + return False + was_running = prev["status"] == "running" + cur = conn.execute( + "UPDATE tasks SET status = ?, " + " claim_lock = CASE WHEN ? = 'running' THEN claim_lock ELSE NULL END, " + " claim_expires = CASE WHEN ? = 'running' THEN claim_expires ELSE NULL END, " + " worker_pid = CASE WHEN ? = 'running' THEN worker_pid ELSE NULL END " + "WHERE id = ?", + (new_status, new_status, new_status, new_status, task_id), + ) + if cur.rowcount != 1: + return False + run_id = None + if was_running and new_status != "running" and prev["current_run_id"]: + try: + run_id = kb._end_run( + conn, task_id, + outcome="reclaimed", status="reclaimed", + summary=f"status changed to {new_status} (webui/direct)", + ) + except Exception: + # _end_run is best-effort here; the status flip itself is + # what matters for sidebar rendering. + run_id = None + conn.execute( + "INSERT INTO task_events (task_id, run_id, kind, payload, created_at) " + "VALUES (?, ?, 'status', ?, ?)", + (task_id, run_id, json.dumps({"status": new_status, "source": "webui"}), int(time.time())), + ) + if new_status in ("done", "ready") and hasattr(kb, "recompute_ready"): + try: + kb.recompute_ready(conn) + except Exception: + pass + return True + + def _create_task_payload(body: dict): title = str(body.get("title") or "").strip() if not title: @@ -265,17 +332,43 @@ def _patch_task(conn, task_id: str, body: dict): elif status == "archived": if not kb.archive_task(conn, task_id): raise LookupError("task not found") - else: - task = kb.get_task(conn, task_id) - if not task: + elif status == "running": + # The 'running' state is owned by the kanban dispatcher / claim + # protocol — entering it via raw UPDATE bypasses claim_lock, + # claim_expires, started_at, and worker_pid, which leaves the task + # in a state the dispatcher treats as "phantom claimed" and may + # reclaim or hide. Match the agent dashboard plugin's contract + # (plugins/kanban/dashboard/plugin_api.py update_task) by rejecting + # this transition with HTTP 400. Workers enter 'running' via + # kb.claim_task(); UI users should use the dispatcher nudge. + raise ValueError( + "Cannot set status to 'running' directly; use the dispatcher/claim path" + ) + elif status == "ready": + # If the task is currently 'blocked', use the structured unblock + # verb so the unblocked event fires. Otherwise it's a legitimate + # drag-drop or click move (e.g. todo → ready, running → ready when + # the user yanks a stuck worker back to the queue) and we use the + # claim-aware direct status write. + current = kb.get_task(conn, task_id) + if not current: raise LookupError("task not found") - try: - setattr(task, "status", status) - except Exception: - pass - conn.execute("UPDATE tasks SET status = ? WHERE id = ?", (status, task_id)) - if hasattr(kb, "_append_event"): - kb._append_event(conn, task_id, "status", {"status": status, "source": "webui"}) + if current.status == "blocked": + if not kb.unblock_task(conn, task_id): + raise LookupError("task not found") + else: + if not _set_status_direct(conn, task_id, "ready"): + raise LookupError("task not found") + elif status in ("triage", "todo"): + # Direct status write for drag-drop moves between non-running, + # non-terminal columns. Uses the claim-aware helper that nulls out + # claim_lock / claim_expires / worker_pid when leaving 'running' + # and ends any active run with outcome='reclaimed'. + if not _set_status_direct(conn, task_id, status): + raise LookupError("task not found") + else: + # _validate_status guarantees we never reach here, but be defensive. + raise ValueError(f"unknown status: {status}") def _patch_task_payload(task_id: str, body: dict): diff --git a/static/panels.js b/static/panels.js index e7d5206d..df6efece 100644 --- a/static/panels.js +++ b/static/panels.js @@ -996,6 +996,16 @@ function dragKanbanTask(event, taskId){ } function allowKanbanDrop(event){ + // Don't accept drops into the 'running' column. Entering 'running' is owned + // by the dispatcher/claim_task path (sets claim_lock + claim_expires + + // started_at + worker_pid). A drag-drop would bypass that contract and the + // bridge would reject the resulting PATCH with HTTP 400 anyway. Refuse the + // drop visually so users see immediate feedback. + const target = event.currentTarget; + if (target && target.dataset && target.dataset.kanbanStatus === 'running') { + if (event.dataTransfer) event.dataTransfer.dropEffect = 'none'; + return; + } event.preventDefault(); if (event.dataTransfer) event.dataTransfer.dropEffect = 'move'; } @@ -1332,7 +1342,12 @@ function _kanbanRenderTaskDetail(data){ const events = data.events || []; const links = data.links || {}; const runs = data.runs || []; - const statusButtons = ['triage', 'todo', 'ready', 'running', 'blocked', 'done', 'archived'].map(status => + // Note: 'running' is intentionally absent — entering 'running' is the + // dispatcher/claim_task path's responsibility, not a user UI write. The + // bridge rejects PATCH status='running' with HTTP 400 to match the agent + // dashboard plugin's contract. UI users want to claim/promote a ready task + // via the dispatcher Nudge button, not flip it to running by hand. + const statusButtons = ['triage', 'todo', 'ready', 'blocked', 'done', 'archived'].map(status => `` ).join('') + ``; return `
diff --git a/tests/test_kanban_bridge.py b/tests/test_kanban_bridge.py index 4178dfd8..fbbd7419 100644 --- a/tests/test_kanban_bridge.py +++ b/tests/test_kanban_bridge.py @@ -424,3 +424,62 @@ def test_routes_dispatches_canonical_kanban_patch_and_delete_verbs(): assert 'parsed.path.startswith("/api/kanban/")' in src assert "handle_kanban_patch(handler, parsed, body)" in src assert "handle_kanban_delete(handler, parsed, body)" in src + + +def test_patch_status_running_is_rejected_to_protect_dispatcher_contract(monkeypatch): + """Bridge must NOT allow status='running' via PATCH. + + The 'running' state is owned by the kanban dispatcher / claim_task path + (sets claim_lock + claim_expires + started_at + worker_pid). A raw status + flip would leave the task in a phantom-claimed state the dispatcher would + treat as orphaned. Mirrors the agent dashboard plugin's contract at + plugins/kanban/dashboard/plugin_api.py update_task — both surfaces must + reject this transition. + """ + bridge = _load_bridge(monkeypatch) + bridge._OAUTH_FLOWS = getattr(bridge, '_OAUTH_FLOWS', {}) # no-op safe + # The fake board includes t_1 (ready) — try to PATCH it to 'running' + try: + bridge._patch_task_payload("t_1", {"status": "running"}) + except ValueError as exc: + assert "running" in str(exc).lower() + return + raise AssertionError("PATCH status='running' must raise ValueError") + + +def test_patch_status_done_to_running_is_rejected(monkeypatch): + """A completed task must not be resurrected to 'running' via PATCH.""" + bridge = _load_bridge(monkeypatch) + # The fake board includes t_2 (blocked); we'll PATCH any task to 'running' + try: + bridge._patch_task_payload("t_2", {"status": "running"}) + except ValueError as exc: + assert "running" in str(exc).lower() + return + raise AssertionError("PATCH status='running' must raise ValueError") + + +def test_patch_status_blocked_to_ready_routes_through_unblock_task(monkeypatch): + """blocked → ready transition must call kb.unblock_task (not raw UPDATE). + + kb.unblock_task is the structured verb that fires the 'unblocked' event + and clears any block-related state. Going through raw UPDATE would skip + that event firing, so live event polling and worker dispatchers wouldn't + see the transition. + """ + bridge = _load_bridge(monkeypatch) + # Hook into the shared FakeKanbanDB instance + kb = bridge._kb() + kb.unblock_calls = [] + original_unblock = kb.unblock_task + + def fake_unblock(conn, task_id): + kb.unblock_calls.append(task_id) + return original_unblock(conn, task_id) + + monkeypatch.setattr(kb, "unblock_task", fake_unblock, raising=False) + # t_2 is blocked in the fake fixture + bridge._patch_task_payload("t_2", {"status": "ready"}) + assert kb.unblock_calls == ["t_2"], ( + f"blocked → ready transition must call kb.unblock_task; saw: {kb.unblock_calls}" + )