fix(kanban): protect dispatcher contract — reject raw status='running' PATCH

The PATCH /api/kanban/tasks/:id endpoint allowed any status-to-any-status transition for the non-claim/complete/block/archive set via raw `UPDATE tasks SET status = ?`. This let UI users (or any client) flip a task to 'running' without going through kb.claim_task(), bypassing claim_lock + claim_expires + started_at + worker_pid. The dispatcher treats such a phantom-claimed task as orphaned and may reclaim, hide, or double-dispatch it. Match the agent dashboard plugin's contract (plugins/kanban/dashboard/plugin_api.py update_task): - status='running' via PATCH → ValueError (HTTP 400) - status='ready' from currently-blocked → kb.unblock_task() (fires 'unblocked' event) - status='ready' from anything else, plus status in {'todo', 'triage'} → new _set_status_direct() helper that nulls claim fields when leaving 'running', closes any active run with outcome='reclaimed', and appends a 'status' event row to task_events - status='done', 'blocked', 'archived' → unchanged (already structured) Frontend changes: - Drop 'running' from the .kanban-status-actions button row in the task detail pane (clicking it would always 400 anyway). - allowKanbanDrop() refuses the 'running' column as a drop target with dropEffect='none' so users see immediate visual feedback that the dispatcher/claim path owns running. Tests added (3, all passing): - test_patch_status_running_is_rejected_to_protect_dispatcher_contract - test_patch_status_done_to_running_is_rejected - test_patch_status_blocked_to_ready_routes_through_unblock_task Existing 12 tests still pass. Co-authored-by: ai-ag2026 <ai-ag2026@users.noreply.github.com>
2026-05-25 03:00:23 +00:00 · 2026-05-04 23:06:42 +00:00
parent 711e33e7db
commit a39ec45b9f
3 changed files with 178 additions and 11 deletions
@@ -9,6 +9,7 @@ added in later focused PRs.
 from __future__ import annotations

 import json
+import time
 from dataclasses import asdict, is_dataclass
 from urllib.parse import parse_qs, unquote

@@ -183,6 +184,72 @@ def _validate_status(status: str) -> str:
    return value


+def _set_status_direct(conn, task_id: str, new_status: str) -> bool:
+    """Direct status write for drag-drop moves not covered by structured verbs.
+
+    Used for ``todo <-> ready`` and ``running -> ready`` transitions. The
+    structured verbs (``complete_task``, ``block_task``, ``unblock_task``,
+    ``archive_task``, ``claim_task``) own their own state changes; this helper
+    handles the remainder while preserving the dispatcher's contract:
+
+    - When transitioning OFF ``running`` to anything other than the terminal
+      verbs, claim_lock / claim_expires / worker_pid are nulled so the
+      dispatcher doesn't see a phantom-running task. The active run (if any)
+      is closed with ``outcome='reclaimed'`` so attempt history isn't
+      orphaned.
+    - When transitioning INTO ``running``, claim fields are preserved (this
+      function is NOT used for entering 'running' — that goes through
+      ``kb.claim_task()`` and the bridge rejects raw 'running' status writes
+      with HTTP 400).
+
+    Mirrors the agent dashboard plugin's ``_set_status_direct``
+    (plugins/kanban/dashboard/plugin_api.py) so first-party clients see
+    identical behaviour from either surface.
+    """
+    kb = _kb()
+    with kb.write_txn(conn):
+        prev = conn.execute(
+            "SELECT status, current_run_id FROM tasks WHERE id = ?",
+            (task_id,),
+        ).fetchone()
+        if prev is None:
+            return False
+        was_running = prev["status"] == "running"
+        cur = conn.execute(
+            "UPDATE tasks SET status = ?, "
+            "  claim_lock = CASE WHEN ? = 'running' THEN claim_lock ELSE NULL END, "
+            "  claim_expires = CASE WHEN ? = 'running' THEN claim_expires ELSE NULL END, "
+            "  worker_pid = CASE WHEN ? = 'running' THEN worker_pid ELSE NULL END "
+            "WHERE id = ?",
+            (new_status, new_status, new_status, new_status, task_id),
+        )
+        if cur.rowcount != 1:
+            return False
+        run_id = None
+        if was_running and new_status != "running" and prev["current_run_id"]:
+            try:
+                run_id = kb._end_run(
+                    conn, task_id,
+                    outcome="reclaimed", status="reclaimed",
+                    summary=f"status changed to {new_status} (webui/direct)",
+                )
+            except Exception:
+                # _end_run is best-effort here; the status flip itself is
+                # what matters for sidebar rendering.
+                run_id = None
+        conn.execute(
+            "INSERT INTO task_events (task_id, run_id, kind, payload, created_at) "
+            "VALUES (?, ?, 'status', ?, ?)",
+            (task_id, run_id, json.dumps({"status": new_status, "source": "webui"}), int(time.time())),
+        )
+    if new_status in ("done", "ready") and hasattr(kb, "recompute_ready"):
+        try:
+            kb.recompute_ready(conn)
+        except Exception:
+            pass
+    return True
+
+
 def _create_task_payload(body: dict):
    title = str(body.get("title") or "").strip()
    if not title:
@@ -265,17 +332,43 @@ def _patch_task(conn, task_id: str, body: dict):
    elif status == "archived":
        if not kb.archive_task(conn, task_id):
            raise LookupError("task not found")
-    else:
-        task = kb.get_task(conn, task_id)
-        if not task:
+    elif status == "running":
+        # The 'running' state is owned by the kanban dispatcher / claim
+        # protocol — entering it via raw UPDATE bypasses claim_lock,
+        # claim_expires, started_at, and worker_pid, which leaves the task
+        # in a state the dispatcher treats as "phantom claimed" and may
+        # reclaim or hide. Match the agent dashboard plugin's contract
+        # (plugins/kanban/dashboard/plugin_api.py update_task) by rejecting
+        # this transition with HTTP 400. Workers enter 'running' via
+        # kb.claim_task(); UI users should use the dispatcher nudge.
+        raise ValueError(
+            "Cannot set status to 'running' directly; use the dispatcher/claim path"
+        )
+    elif status == "ready":
+        # If the task is currently 'blocked', use the structured unblock
+        # verb so the unblocked event fires. Otherwise it's a legitimate
+        # drag-drop or click move (e.g. todo → ready, running → ready when
+        # the user yanks a stuck worker back to the queue) and we use the
+        # claim-aware direct status write.
+        current = kb.get_task(conn, task_id)
+        if not current:
            raise LookupError("task not found")
-        try:
-            setattr(task, "status", status)
-        except Exception:
-            pass
-        conn.execute("UPDATE tasks SET status = ? WHERE id = ?", (status, task_id))
-        if hasattr(kb, "_append_event"):
-            kb._append_event(conn, task_id, "status", {"status": status, "source": "webui"})
+        if current.status == "blocked":
+            if not kb.unblock_task(conn, task_id):
+                raise LookupError("task not found")
+        else:
+            if not _set_status_direct(conn, task_id, "ready"):
+                raise LookupError("task not found")
+    elif status in ("triage", "todo"):
+        # Direct status write for drag-drop moves between non-running,
+        # non-terminal columns. Uses the claim-aware helper that nulls out
+        # claim_lock / claim_expires / worker_pid when leaving 'running'
+        # and ends any active run with outcome='reclaimed'.
+        if not _set_status_direct(conn, task_id, status):
+            raise LookupError("task not found")
+    else:
+        # _validate_status guarantees we never reach here, but be defensive.
+        raise ValueError(f"unknown status: {status}")


 def _patch_task_payload(task_id: str, body: dict):
@@ -996,6 +996,16 @@ function dragKanbanTask(event, taskId){
 }

 function allowKanbanDrop(event){
+  // Don't accept drops into the 'running' column. Entering 'running' is owned
+  // by the dispatcher/claim_task path (sets claim_lock + claim_expires +
+  // started_at + worker_pid). A drag-drop would bypass that contract and the
+  // bridge would reject the resulting PATCH with HTTP 400 anyway. Refuse the
+  // drop visually so users see immediate feedback.
+  const target = event.currentTarget;
+  if (target && target.dataset && target.dataset.kanbanStatus === 'running') {
+    if (event.dataTransfer) event.dataTransfer.dropEffect = 'none';
+    return;
+  }
  event.preventDefault();
  if (event.dataTransfer) event.dataTransfer.dropEffect = 'move';
 }
@@ -1332,7 +1342,12 @@ function _kanbanRenderTaskDetail(data){
  const events = data.events || [];
  const links = data.links || {};
  const runs = data.runs || [];
-  const statusButtons = ['triage', 'todo', 'ready', 'running', 'blocked', 'done', 'archived'].map(status =>
+  // Note: 'running' is intentionally absent — entering 'running' is the
+  // dispatcher/claim_task path's responsibility, not a user UI write. The
+  // bridge rejects PATCH status='running' with HTTP 400 to match the agent
+  // dashboard plugin's contract. UI users want to claim/promote a ready task
+  // via the dispatcher Nudge button, not flip it to running by hand.
+  const statusButtons = ['triage', 'todo', 'ready', 'blocked', 'done', 'archived'].map(status =>
    `<button class="btn secondary" onclick="updateKanbanTask('${esc(task.id)}',{status:'${status}'})">${esc(_kanbanColumnLabel(status))}</button>`
  ).join('') + `<button class="btn secondary" onclick="blockKanbanTask('${esc(task.id)}')">${esc(t('kanban_block'))}</button><button class="btn secondary" onclick="unblockKanbanTask('${esc(task.id)}')">${esc(t('kanban_unblock'))}</button>`;
  return `<div class="kanban-task-preview-header">
@@ -424,3 +424,62 @@ def test_routes_dispatches_canonical_kanban_patch_and_delete_verbs():
    assert 'parsed.path.startswith("/api/kanban/")' in src
    assert "handle_kanban_patch(handler, parsed, body)" in src
    assert "handle_kanban_delete(handler, parsed, body)" in src
+
+
+def test_patch_status_running_is_rejected_to_protect_dispatcher_contract(monkeypatch):
+    """Bridge must NOT allow status='running' via PATCH.
+
+    The 'running' state is owned by the kanban dispatcher / claim_task path
+    (sets claim_lock + claim_expires + started_at + worker_pid). A raw status
+    flip would leave the task in a phantom-claimed state the dispatcher would
+    treat as orphaned. Mirrors the agent dashboard plugin's contract at
+    plugins/kanban/dashboard/plugin_api.py update_task — both surfaces must
+    reject this transition.
+    """
+    bridge = _load_bridge(monkeypatch)
+    bridge._OAUTH_FLOWS = getattr(bridge, '_OAUTH_FLOWS', {})  # no-op safe
+    # The fake board includes t_1 (ready) — try to PATCH it to 'running'
+    try:
+        bridge._patch_task_payload("t_1", {"status": "running"})
+    except ValueError as exc:
+        assert "running" in str(exc).lower()
+        return
+    raise AssertionError("PATCH status='running' must raise ValueError")
+
+
+def test_patch_status_done_to_running_is_rejected(monkeypatch):
+    """A completed task must not be resurrected to 'running' via PATCH."""
+    bridge = _load_bridge(monkeypatch)
+    # The fake board includes t_2 (blocked); we'll PATCH any task to 'running'
+    try:
+        bridge._patch_task_payload("t_2", {"status": "running"})
+    except ValueError as exc:
+        assert "running" in str(exc).lower()
+        return
+    raise AssertionError("PATCH status='running' must raise ValueError")
+
+
+def test_patch_status_blocked_to_ready_routes_through_unblock_task(monkeypatch):
+    """blocked → ready transition must call kb.unblock_task (not raw UPDATE).
+
+    kb.unblock_task is the structured verb that fires the 'unblocked' event
+    and clears any block-related state. Going through raw UPDATE would skip
+    that event firing, so live event polling and worker dispatchers wouldn't
+    see the transition.
+    """
+    bridge = _load_bridge(monkeypatch)
+    # Hook into the shared FakeKanbanDB instance
+    kb = bridge._kb()
+    kb.unblock_calls = []
+    original_unblock = kb.unblock_task
+
+    def fake_unblock(conn, task_id):
+        kb.unblock_calls.append(task_id)
+        return original_unblock(conn, task_id)
+
+    monkeypatch.setattr(kb, "unblock_task", fake_unblock, raising=False)
+    # t_2 is blocked in the fake fixture
+    bridge._patch_task_payload("t_2", {"status": "ready"})
+    assert kb.unblock_calls == ["t_2"], (
+        f"blocked → ready transition must call kb.unblock_task; saw: {kb.unblock_calls}"
+    )