test(chat): harden WebUI prefill script hook

2026-05-28 12:40:26 +00:00 · 2026-05-24 20:20:28 -04:00
parent fa57868431
commit befee0e035
4 changed files with 49 additions and 3 deletions
@@ -5,7 +5,7 @@

 ### Added

- WebUI can now opt into a `webui_prefill_messages_script` / `HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT` hook for dynamic browser-turn prefill context from local notes or recall systems. The script output is normalized to ephemeral prefill messages and browser status still hides message bodies while redacting script errors.
+- WebUI can now opt into a `webui_prefill_messages_script` / `HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT` hook for dynamic browser-turn prefill context from local notes or recall systems. The script output is capped at 256 KiB, normalized to ephemeral prefill messages, and browser status still hides message bodies while redacting script errors.

 ## [v0.51.131] — 2026-05-24 — Release DC (stage-batch13 — 6-PR notes-drawer + context-parity + PWA-swipe + locale polish)

@@ -150,8 +150,9 @@ HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT_TIMEOUT=5 \

 The script may print either an OpenAI-style JSON message list, a JSON object with
 a `messages` list, or plain text; plain text is wrapped as one `system` prefill
-message. The browser only receives a compact status event (`source`, `label`,
-message count, and redacted errors), never the prefill message bodies.
+message. Script output is capped at 256 KiB before parsing. The browser only
+receives a compact status event (`source`, `label`, message count, and redacted
+errors), never the prefill message bodies.

 The bootstrap will:

@@ -287,6 +287,9 @@ def _resolve_prefill_path(raw: str) -> Path:
    return path


+_PREFILL_SCRIPT_OUTPUT_LIMIT = 262_144
+
+
 def _prefill_not_configured() -> dict:
    return {"status": "not_configured", "source": "none", "label": "", "messages": [], "message_count": 0}

@@ -364,6 +367,15 @@ def _load_prefill_messages_script(config_data: dict) -> dict:
    if proc.returncode != 0:
        err = _redact_prefill_status_text(proc.stderr or proc.stdout or f"prefill script exited {proc.returncode}")
        return {"status": "error", "source": "script", "label": label, "messages": [], "message_count": 0, "error": err}
+    if len(proc.stdout.encode("utf-8")) > _PREFILL_SCRIPT_OUTPUT_LIMIT:
+        return {
+            "status": "error",
+            "source": "script",
+            "label": label,
+            "messages": [],
+            "message_count": 0,
+            "error": f"prefill script output exceeded {_PREFILL_SCRIPT_OUTPUT_LIMIT} bytes",
+        }
    messages = _messages_from_prefill_script_output(proc.stdout)
    return {"status": "loaded", "source": "script", "label": label, "messages": messages, "message_count": len(messages)}

@@ -113,6 +113,39 @@ def test_webui_prefill_script_takes_precedence_over_static_file(tmp_path):
    assert result["messages"] == [{"role": "system", "content": "dynamic"}]


+def test_webui_prefill_script_timeout_returns_redacted_error(tmp_path):
+    from api.streaming import _load_webui_prefill_context
+
+    script = tmp_path / "slow_recall.py"
+    script.write_text("import time\ntime.sleep(1)\nprint('too late')\n", encoding="utf-8")
+
+    result = _load_webui_prefill_context({
+        "webui_prefill_messages_script": [sys.executable, str(script)],
+        "webui_prefill_messages_script_timeout": 0.1,
+    })
+
+    assert result["status"] == "error"
+    assert result["source"] == "script"
+    assert result["messages"] == []
+    assert result["message_count"] == 0
+    assert result["error"] == "prefill script timed out"
+
+
+def test_webui_prefill_script_rejects_oversized_stdout(tmp_path):
+    from api.streaming import _load_webui_prefill_context
+
+    script = tmp_path / "large_recall.py"
+    script.write_text("print('x' * 262145)\n", encoding="utf-8")
+
+    result = _load_webui_prefill_context({"webui_prefill_messages_script": [sys.executable, str(script)]})
+
+    assert result["status"] == "error"
+    assert result["source"] == "script"
+    assert result["messages"] == []
+    assert result["message_count"] == 0
+    assert "output exceeded" in result["error"]
+
+
 def test_public_prefill_status_strips_message_bodies():
    from api.streaming import _public_prefill_context_status