fix(kanban): protect dispatcher contract — reject raw status='running' PATCH

The PATCH /api/kanban/tasks/:id endpoint allowed any status-to-any-status
transition for the non-claim/complete/block/archive set via raw
`UPDATE tasks SET status = ?`. This let UI users (or any client) flip a
task to 'running' without going through kb.claim_task(), bypassing
claim_lock + claim_expires + started_at + worker_pid. The dispatcher
treats such a phantom-claimed task as orphaned and may reclaim, hide, or
double-dispatch it.

Match the agent dashboard plugin's contract
(plugins/kanban/dashboard/plugin_api.py update_task):

- status='running' via PATCH → ValueError (HTTP 400)
- status='ready' from currently-blocked → kb.unblock_task() (fires
  'unblocked' event)
- status='ready' from anything else, plus status in {'todo', 'triage'}
  → new _set_status_direct() helper that nulls claim fields when leaving
  'running', closes any active run with outcome='reclaimed', and
  appends a 'status' event row to task_events
- status='done', 'blocked', 'archived' → unchanged (already structured)

Frontend changes:
- Drop 'running' from the .kanban-status-actions button row in the task
  detail pane (clicking it would always 400 anyway).
- allowKanbanDrop() refuses the 'running' column as a drop target with
  dropEffect='none' so users see immediate visual feedback that the
  dispatcher/claim path owns running.

Tests added (3, all passing):
- test_patch_status_running_is_rejected_to_protect_dispatcher_contract
- test_patch_status_done_to_running_is_rejected
- test_patch_status_blocked_to_ready_routes_through_unblock_task

Existing 12 tests still pass.

Co-authored-by: ai-ag2026 <ai-ag2026@users.noreply.github.com>
This commit is contained in:
Hermes Agent
2026-05-04 23:06:42 +00:00
parent 711e33e7db
commit a39ec45b9f
3 changed files with 178 additions and 11 deletions
+103 -10
View File
@@ -9,6 +9,7 @@ added in later focused PRs.
from __future__ import annotations
import json
import time
from dataclasses import asdict, is_dataclass
from urllib.parse import parse_qs, unquote
@@ -183,6 +184,72 @@ def _validate_status(status: str) -> str:
return value
def _set_status_direct(conn, task_id: str, new_status: str) -> bool:
"""Direct status write for drag-drop moves not covered by structured verbs.
Used for ``todo <-> ready`` and ``running -> ready`` transitions. The
structured verbs (``complete_task``, ``block_task``, ``unblock_task``,
``archive_task``, ``claim_task``) own their own state changes; this helper
handles the remainder while preserving the dispatcher's contract:
- When transitioning OFF ``running`` to anything other than the terminal
verbs, claim_lock / claim_expires / worker_pid are nulled so the
dispatcher doesn't see a phantom-running task. The active run (if any)
is closed with ``outcome='reclaimed'`` so attempt history isn't
orphaned.
- When transitioning INTO ``running``, claim fields are preserved (this
function is NOT used for entering 'running' — that goes through
``kb.claim_task()`` and the bridge rejects raw 'running' status writes
with HTTP 400).
Mirrors the agent dashboard plugin's ``_set_status_direct``
(plugins/kanban/dashboard/plugin_api.py) so first-party clients see
identical behaviour from either surface.
"""
kb = _kb()
with kb.write_txn(conn):
prev = conn.execute(
"SELECT status, current_run_id FROM tasks WHERE id = ?",
(task_id,),
).fetchone()
if prev is None:
return False
was_running = prev["status"] == "running"
cur = conn.execute(
"UPDATE tasks SET status = ?, "
" claim_lock = CASE WHEN ? = 'running' THEN claim_lock ELSE NULL END, "
" claim_expires = CASE WHEN ? = 'running' THEN claim_expires ELSE NULL END, "
" worker_pid = CASE WHEN ? = 'running' THEN worker_pid ELSE NULL END "
"WHERE id = ?",
(new_status, new_status, new_status, new_status, task_id),
)
if cur.rowcount != 1:
return False
run_id = None
if was_running and new_status != "running" and prev["current_run_id"]:
try:
run_id = kb._end_run(
conn, task_id,
outcome="reclaimed", status="reclaimed",
summary=f"status changed to {new_status} (webui/direct)",
)
except Exception:
# _end_run is best-effort here; the status flip itself is
# what matters for sidebar rendering.
run_id = None
conn.execute(
"INSERT INTO task_events (task_id, run_id, kind, payload, created_at) "
"VALUES (?, ?, 'status', ?, ?)",
(task_id, run_id, json.dumps({"status": new_status, "source": "webui"}), int(time.time())),
)
if new_status in ("done", "ready") and hasattr(kb, "recompute_ready"):
try:
kb.recompute_ready(conn)
except Exception:
pass
return True
def _create_task_payload(body: dict):
title = str(body.get("title") or "").strip()
if not title:
@@ -265,17 +332,43 @@ def _patch_task(conn, task_id: str, body: dict):
elif status == "archived":
if not kb.archive_task(conn, task_id):
raise LookupError("task not found")
else:
task = kb.get_task(conn, task_id)
if not task:
elif status == "running":
# The 'running' state is owned by the kanban dispatcher / claim
# protocol — entering it via raw UPDATE bypasses claim_lock,
# claim_expires, started_at, and worker_pid, which leaves the task
# in a state the dispatcher treats as "phantom claimed" and may
# reclaim or hide. Match the agent dashboard plugin's contract
# (plugins/kanban/dashboard/plugin_api.py update_task) by rejecting
# this transition with HTTP 400. Workers enter 'running' via
# kb.claim_task(); UI users should use the dispatcher nudge.
raise ValueError(
"Cannot set status to 'running' directly; use the dispatcher/claim path"
)
elif status == "ready":
# If the task is currently 'blocked', use the structured unblock
# verb so the unblocked event fires. Otherwise it's a legitimate
# drag-drop or click move (e.g. todo → ready, running → ready when
# the user yanks a stuck worker back to the queue) and we use the
# claim-aware direct status write.
current = kb.get_task(conn, task_id)
if not current:
raise LookupError("task not found")
try:
setattr(task, "status", status)
except Exception:
pass
conn.execute("UPDATE tasks SET status = ? WHERE id = ?", (status, task_id))
if hasattr(kb, "_append_event"):
kb._append_event(conn, task_id, "status", {"status": status, "source": "webui"})
if current.status == "blocked":
if not kb.unblock_task(conn, task_id):
raise LookupError("task not found")
else:
if not _set_status_direct(conn, task_id, "ready"):
raise LookupError("task not found")
elif status in ("triage", "todo"):
# Direct status write for drag-drop moves between non-running,
# non-terminal columns. Uses the claim-aware helper that nulls out
# claim_lock / claim_expires / worker_pid when leaving 'running'
# and ends any active run with outcome='reclaimed'.
if not _set_status_direct(conn, task_id, status):
raise LookupError("task not found")
else:
# _validate_status guarantees we never reach here, but be defensive.
raise ValueError(f"unknown status: {status}")
def _patch_task_payload(task_id: str, body: dict):
+16 -1
View File
@@ -996,6 +996,16 @@ function dragKanbanTask(event, taskId){
}
function allowKanbanDrop(event){
// Don't accept drops into the 'running' column. Entering 'running' is owned
// by the dispatcher/claim_task path (sets claim_lock + claim_expires +
// started_at + worker_pid). A drag-drop would bypass that contract and the
// bridge would reject the resulting PATCH with HTTP 400 anyway. Refuse the
// drop visually so users see immediate feedback.
const target = event.currentTarget;
if (target && target.dataset && target.dataset.kanbanStatus === 'running') {
if (event.dataTransfer) event.dataTransfer.dropEffect = 'none';
return;
}
event.preventDefault();
if (event.dataTransfer) event.dataTransfer.dropEffect = 'move';
}
@@ -1332,7 +1342,12 @@ function _kanbanRenderTaskDetail(data){
const events = data.events || [];
const links = data.links || {};
const runs = data.runs || [];
const statusButtons = ['triage', 'todo', 'ready', 'running', 'blocked', 'done', 'archived'].map(status =>
// Note: 'running' is intentionally absent — entering 'running' is the
// dispatcher/claim_task path's responsibility, not a user UI write. The
// bridge rejects PATCH status='running' with HTTP 400 to match the agent
// dashboard plugin's contract. UI users want to claim/promote a ready task
// via the dispatcher Nudge button, not flip it to running by hand.
const statusButtons = ['triage', 'todo', 'ready', 'blocked', 'done', 'archived'].map(status =>
`<button class="btn secondary" onclick="updateKanbanTask('${esc(task.id)}',{status:'${status}'})">${esc(_kanbanColumnLabel(status))}</button>`
).join('') + `<button class="btn secondary" onclick="blockKanbanTask('${esc(task.id)}')">${esc(t('kanban_block'))}</button><button class="btn secondary" onclick="unblockKanbanTask('${esc(task.id)}')">${esc(t('kanban_unblock'))}</button>`;
return `<div class="kanban-task-preview-header">
+59
View File
@@ -424,3 +424,62 @@ def test_routes_dispatches_canonical_kanban_patch_and_delete_verbs():
assert 'parsed.path.startswith("/api/kanban/")' in src
assert "handle_kanban_patch(handler, parsed, body)" in src
assert "handle_kanban_delete(handler, parsed, body)" in src
def test_patch_status_running_is_rejected_to_protect_dispatcher_contract(monkeypatch):
"""Bridge must NOT allow status='running' via PATCH.
The 'running' state is owned by the kanban dispatcher / claim_task path
(sets claim_lock + claim_expires + started_at + worker_pid). A raw status
flip would leave the task in a phantom-claimed state the dispatcher would
treat as orphaned. Mirrors the agent dashboard plugin's contract at
plugins/kanban/dashboard/plugin_api.py update_task both surfaces must
reject this transition.
"""
bridge = _load_bridge(monkeypatch)
bridge._OAUTH_FLOWS = getattr(bridge, '_OAUTH_FLOWS', {}) # no-op safe
# The fake board includes t_1 (ready) — try to PATCH it to 'running'
try:
bridge._patch_task_payload("t_1", {"status": "running"})
except ValueError as exc:
assert "running" in str(exc).lower()
return
raise AssertionError("PATCH status='running' must raise ValueError")
def test_patch_status_done_to_running_is_rejected(monkeypatch):
"""A completed task must not be resurrected to 'running' via PATCH."""
bridge = _load_bridge(monkeypatch)
# The fake board includes t_2 (blocked); we'll PATCH any task to 'running'
try:
bridge._patch_task_payload("t_2", {"status": "running"})
except ValueError as exc:
assert "running" in str(exc).lower()
return
raise AssertionError("PATCH status='running' must raise ValueError")
def test_patch_status_blocked_to_ready_routes_through_unblock_task(monkeypatch):
"""blocked → ready transition must call kb.unblock_task (not raw UPDATE).
kb.unblock_task is the structured verb that fires the 'unblocked' event
and clears any block-related state. Going through raw UPDATE would skip
that event firing, so live event polling and worker dispatchers wouldn't
see the transition.
"""
bridge = _load_bridge(monkeypatch)
# Hook into the shared FakeKanbanDB instance
kb = bridge._kb()
kb.unblock_calls = []
original_unblock = kb.unblock_task
def fake_unblock(conn, task_id):
kb.unblock_calls.append(task_id)
return original_unblock(conn, task_id)
monkeypatch.setattr(kb, "unblock_task", fake_unblock, raising=False)
# t_2 is blocked in the fake fixture
bridge._patch_task_payload("t_2", {"status": "ready"})
assert kb.unblock_calls == ["t_2"], (
f"blocked → ready transition must call kb.unblock_task; saw: {kb.unblock_calls}"
)