From a39ec45b9f917b1fbd474221cc6b61e75e2c7a55 Mon Sep 17 00:00:00 2001
From: Hermes Agent <hermes-agent@nesquena-hermes.local>
Date: Mon, 4 May 2026 23:06:42 +0000
Subject: [PATCH] =?UTF-8?q?fix(kanban):=20protect=20dispatcher=20contract?=
 =?UTF-8?q?=20=E2=80=94=20reject=20raw=20status=3D'running'=20PATCH?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PATCH /api/kanban/tasks/:id endpoint allowed any status-to-any-status
transition for the non-claim/complete/block/archive set via raw
`UPDATE tasks SET status = ?`. This let UI users (or any client) flip a
task to 'running' without going through kb.claim_task(), bypassing
claim_lock + claim_expires + started_at + worker_pid. The dispatcher
treats such a phantom-claimed task as orphaned and may reclaim, hide, or
double-dispatch it.

Match the agent dashboard plugin's contract
(plugins/kanban/dashboard/plugin_api.py update_task):

- status='running' via PATCH → ValueError (HTTP 400)
- status='ready' from currently-blocked → kb.unblock_task() (fires
  'unblocked' event)
- status='ready' from anything else, plus status in {'todo', 'triage'}
  → new _set_status_direct() helper that nulls claim fields when leaving
  'running', closes any active run with outcome='reclaimed', and
  appends a 'status' event row to task_events
- status='done', 'blocked', 'archived' → unchanged (already structured)

Frontend changes:
- Drop 'running' from the .kanban-status-actions button row in the task
  detail pane (clicking it would always 400 anyway).
- allowKanbanDrop() refuses the 'running' column as a drop target with
  dropEffect='none' so users see immediate visual feedback that the
  dispatcher/claim path owns running.

Tests added (3, all passing):
- test_patch_status_running_is_rejected_to_protect_dispatcher_contract
- test_patch_status_done_to_running_is_rejected
- test_patch_status_blocked_to_ready_routes_through_unblock_task

Existing 12 tests still pass.

Co-authored-by: ai-ag2026 <ai-ag2026@users.noreply.github.com>
---
 api/kanban_bridge.py        | 113 ++++++++++++++++++++++++++++++++----
 static/panels.js            |  17 +++++-
 tests/test_kanban_bridge.py |  59 +++++++++++++++++++
 3 files changed, 178 insertions(+), 11 deletions(-)

diff --git a/api/kanban_bridge.py b/api/kanban_bridge.py
index 575ae610..68dddf2d 100644
--- a/api/kanban_bridge.py
+++ b/api/kanban_bridge.py
@@ -9,6 +9,7 @@ added in later focused PRs.
 from __future__ import annotations
 
 import json
+import time
 from dataclasses import asdict, is_dataclass
 from urllib.parse import parse_qs, unquote
 
@@ -183,6 +184,72 @@ def _validate_status(status: str) -> str:
     return value
 
 
+def _set_status_direct(conn, task_id: str, new_status: str) -> bool:
+    """Direct status write for drag-drop moves not covered by structured verbs.
+
+    Used for ``todo <-> ready`` and ``running -> ready`` transitions. The
+    structured verbs (``complete_task``, ``block_task``, ``unblock_task``,
+    ``archive_task``, ``claim_task``) own their own state changes; this helper
+    handles the remainder while preserving the dispatcher's contract:
+
+    - When transitioning OFF ``running`` to anything other than the terminal
+      verbs, claim_lock / claim_expires / worker_pid are nulled so the
+      dispatcher doesn't see a phantom-running task. The active run (if any)
+      is closed with ``outcome='reclaimed'`` so attempt history isn't
+      orphaned.
+    - When transitioning INTO ``running``, claim fields are preserved (this
+      function is NOT used for entering 'running' — that goes through
+      ``kb.claim_task()`` and the bridge rejects raw 'running' status writes
+      with HTTP 400).
+
+    Mirrors the agent dashboard plugin's ``_set_status_direct``
+    (plugins/kanban/dashboard/plugin_api.py) so first-party clients see
+    identical behaviour from either surface.
+    """
+    kb = _kb()
+    with kb.write_txn(conn):
+        prev = conn.execute(
+            "SELECT status, current_run_id FROM tasks WHERE id = ?",
+            (task_id,),
+        ).fetchone()
+        if prev is None:
+            return False
+        was_running = prev["status"] == "running"
+        cur = conn.execute(
+            "UPDATE tasks SET status = ?, "
+            "  claim_lock = CASE WHEN ? = 'running' THEN claim_lock ELSE NULL END, "
+            "  claim_expires = CASE WHEN ? = 'running' THEN claim_expires ELSE NULL END, "
+            "  worker_pid = CASE WHEN ? = 'running' THEN worker_pid ELSE NULL END "
+            "WHERE id = ?",
+            (new_status, new_status, new_status, new_status, task_id),
+        )
+        if cur.rowcount != 1:
+            return False
+        run_id = None
+        if was_running and new_status != "running" and prev["current_run_id"]:
+            try:
+                run_id = kb._end_run(
+                    conn, task_id,
+                    outcome="reclaimed", status="reclaimed",
+                    summary=f"status changed to {new_status} (webui/direct)",
+                )
+            except Exception:
+                # _end_run is best-effort here; the status flip itself is
+                # what matters for sidebar rendering.
+                run_id = None
+        conn.execute(
+            "INSERT INTO task_events (task_id, run_id, kind, payload, created_at) "
+            "VALUES (?, ?, 'status', ?, ?)",
+            (task_id, run_id, json.dumps({"status": new_status, "source": "webui"}), int(time.time())),
+        )
+    if new_status in ("done", "ready") and hasattr(kb, "recompute_ready"):
+        try:
+            kb.recompute_ready(conn)
+        except Exception:
+            pass
+    return True
+
+
 def _create_task_payload(body: dict):
     title = str(body.get("title") or "").strip()
     if not title:
@@ -265,17 +332,43 @@ def _patch_task(conn, task_id: str, body: dict):
     elif status == "archived":
         if not kb.archive_task(conn, task_id):
             raise LookupError("task not found")
-    else:
-        task = kb.get_task(conn, task_id)
-        if not task:
+    elif status == "running":
+        # The 'running' state is owned by the kanban dispatcher / claim
+        # protocol — entering it via raw UPDATE bypasses claim_lock,
+        # claim_expires, started_at, and worker_pid, which leaves the task
+        # in a state the dispatcher treats as "phantom claimed" and may
+        # reclaim or hide. Match the agent dashboard plugin's contract
+        # (plugins/kanban/dashboard/plugin_api.py update_task) by rejecting
+        # this transition with HTTP 400. Workers enter 'running' via
+        # kb.claim_task(); UI users should use the dispatcher nudge.
+        raise ValueError(
+            "Cannot set status to 'running' directly; use the dispatcher/claim path"
+        )
+    elif status == "ready":
+        # If the task is currently 'blocked', use the structured unblock
+        # verb so the unblocked event fires. Otherwise it's a legitimate
+        # drag-drop or click move (e.g. todo → ready, running → ready when
+        # the user yanks a stuck worker back to the queue) and we use the
+        # claim-aware direct status write.
+        current = kb.get_task(conn, task_id)
+        if not current:
             raise LookupError("task not found")
-        try:
-            setattr(task, "status", status)
-        except Exception:
-            pass
-        conn.execute("UPDATE tasks SET status = ? WHERE id = ?", (status, task_id))
-        if hasattr(kb, "_append_event"):
-            kb._append_event(conn, task_id, "status", {"status": status, "source": "webui"})
+        if current.status == "blocked":
+            if not kb.unblock_task(conn, task_id):
+                raise LookupError("task not found")
+        else:
+            if not _set_status_direct(conn, task_id, "ready"):
+                raise LookupError("task not found")
+    elif status in ("triage", "todo"):
+        # Direct status write for drag-drop moves between non-running,
+        # non-terminal columns. Uses the claim-aware helper that nulls out
+        # claim_lock / claim_expires / worker_pid when leaving 'running'
+        # and ends any active run with outcome='reclaimed'.
+        if not _set_status_direct(conn, task_id, status):
+            raise LookupError("task not found")
+    else:
+        # _validate_status guarantees we never reach here, but be defensive.
+        raise ValueError(f"unknown status: {status}")
 
 
 def _patch_task_payload(task_id: str, body: dict):
diff --git a/static/panels.js b/static/panels.js
index e7d5206d..df6efece 100644
--- a/static/panels.js
+++ b/static/panels.js
@@ -996,6 +996,16 @@ function dragKanbanTask(event, taskId){
 }
 
 function allowKanbanDrop(event){
+  // Don't accept drops into the 'running' column. Entering 'running' is owned
+  // by the dispatcher/claim_task path (sets claim_lock + claim_expires +
+  // started_at + worker_pid). A drag-drop would bypass that contract and the
+  // bridge would reject the resulting PATCH with HTTP 400 anyway. Refuse the
+  // drop visually so users see immediate feedback.
+  const target = event.currentTarget;
+  if (target && target.dataset && target.dataset.kanbanStatus === 'running') {
+    if (event.dataTransfer) event.dataTransfer.dropEffect = 'none';
+    return;
+  }
   event.preventDefault();
   if (event.dataTransfer) event.dataTransfer.dropEffect = 'move';
 }
@@ -1332,7 +1342,12 @@ function _kanbanRenderTaskDetail(data){
   const events = data.events || [];
   const links = data.links || {};
   const runs = data.runs || [];
-  const statusButtons = ['triage', 'todo', 'ready', 'running', 'blocked', 'done', 'archived'].map(status =>
+  // Note: 'running' is intentionally absent — entering 'running' is the
+  // dispatcher/claim_task path's responsibility, not a user UI write. The
+  // bridge rejects PATCH status='running' with HTTP 400 to match the agent
+  // dashboard plugin's contract. UI users want to claim/promote a ready task
+  // via the dispatcher Nudge button, not flip it to running by hand.
+  const statusButtons = ['triage', 'todo', 'ready', 'blocked', 'done', 'archived'].map(status =>
     `<button class="btn secondary" onclick="updateKanbanTask('${esc(task.id)}',{status:'${status}'})">${esc(_kanbanColumnLabel(status))}</button>`
   ).join('') + `<button class="btn secondary" onclick="blockKanbanTask('${esc(task.id)}')">${esc(t('kanban_block'))}</button><button class="btn secondary" onclick="unblockKanbanTask('${esc(task.id)}')">${esc(t('kanban_unblock'))}</button>`;
   return `<div class="kanban-task-preview-header">
diff --git a/tests/test_kanban_bridge.py b/tests/test_kanban_bridge.py
index 4178dfd8..fbbd7419 100644
--- a/tests/test_kanban_bridge.py
+++ b/tests/test_kanban_bridge.py
@@ -424,3 +424,62 @@ def test_routes_dispatches_canonical_kanban_patch_and_delete_verbs():
     assert 'parsed.path.startswith("/api/kanban/")' in src
     assert "handle_kanban_patch(handler, parsed, body)" in src
     assert "handle_kanban_delete(handler, parsed, body)" in src
+
+
+def test_patch_status_running_is_rejected_to_protect_dispatcher_contract(monkeypatch):
+    """Bridge must NOT allow status='running' via PATCH.
+
+    The 'running' state is owned by the kanban dispatcher / claim_task path
+    (sets claim_lock + claim_expires + started_at + worker_pid). A raw status
+    flip would leave the task in a phantom-claimed state the dispatcher would
+    treat as orphaned. Mirrors the agent dashboard plugin's contract at
+    plugins/kanban/dashboard/plugin_api.py update_task — both surfaces must
+    reject this transition.
+    """
+    bridge = _load_bridge(monkeypatch)
+    bridge._OAUTH_FLOWS = getattr(bridge, '_OAUTH_FLOWS', {})  # no-op safe
+    # The fake board includes t_1 (ready) — try to PATCH it to 'running'
+    try:
+        bridge._patch_task_payload("t_1", {"status": "running"})
+    except ValueError as exc:
+        assert "running" in str(exc).lower()
+        return
+    raise AssertionError("PATCH status='running' must raise ValueError")
+
+
+def test_patch_status_done_to_running_is_rejected(monkeypatch):
+    """A completed task must not be resurrected to 'running' via PATCH."""
+    bridge = _load_bridge(monkeypatch)
+    # The fake board includes t_2 (blocked); we'll PATCH any task to 'running'
+    try:
+        bridge._patch_task_payload("t_2", {"status": "running"})
+    except ValueError as exc:
+        assert "running" in str(exc).lower()
+        return
+    raise AssertionError("PATCH status='running' must raise ValueError")
+
+
+def test_patch_status_blocked_to_ready_routes_through_unblock_task(monkeypatch):
+    """blocked → ready transition must call kb.unblock_task (not raw UPDATE).
+
+    kb.unblock_task is the structured verb that fires the 'unblocked' event
+    and clears any block-related state. Going through raw UPDATE would skip
+    that event firing, so live event polling and worker dispatchers wouldn't
+    see the transition.
+    """
+    bridge = _load_bridge(monkeypatch)
+    # Hook into the shared FakeKanbanDB instance
+    kb = bridge._kb()
+    kb.unblock_calls = []
+    original_unblock = kb.unblock_task
+
+    def fake_unblock(conn, task_id):
+        kb.unblock_calls.append(task_id)
+        return original_unblock(conn, task_id)
+
+    monkeypatch.setattr(kb, "unblock_task", fake_unblock, raising=False)
+    # t_2 is blocked in the fake fixture
+    bridge._patch_task_payload("t_2", {"status": "ready"})
+    assert kb.unblock_calls == ["t_2"], (
+        f"blocked → ready transition must call kb.unblock_task; saw: {kb.unblock_calls}"
+    )