From a48e47dd1cd53115eccdd802461df1ebb6a63d15 Mon Sep 17 00:00:00 2001
From: Michael Lam <michael@example.local>
Date: Sun, 17 May 2026 20:40:20 -0700
Subject: [PATCH 01/13] feat: separate CLI sessions in sidebar

---
 CHANGELOG.md                                  |   4 ++
 docs/pr-media/2351/after-source-tabs.png      | Bin 0 -> 2355 bytes
 docs/pr-media/2351/before-cli-mixed.png       | Bin 0 -> 2355 bytes
 static/sessions.js                            |  59 +++++++++++++++++-
 static/style.css                              |   5 ++
 ...est_issue2351_cli_session_source_filter.py |  31 +++++++++
 6 files changed, 96 insertions(+), 3 deletions(-)
 create mode 100644 docs/pr-media/2351/after-source-tabs.png
 create mode 100644 docs/pr-media/2351/before-cli-mixed.png
 create mode 100644 tests/test_issue2351_cli_session_source_filter.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ed15eec3..04bf9c79 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## [Unreleased]
 
+### Added
+
+- **PR #2506** by @Michaelyklam (refs #2351) — Add a read-only WebUI/CLI session source switch in the chat sidebar when agent session sync is enabled. WebUI conversations stay in the default list, while imported CLI/agent sessions are surfaced under a separate `CLI sessions` tab with counts so large CLI histories do not clutter the normal conversation list.
+
 ## [v0.51.91] — 2026-05-18 — Release BO (stage-384 — 5-PR full sweep batch — reasoning-replay history fix + archive-extract per-session inbox + fallback streaming warnings + sanitized custom-provider env hints + Slice 3c queue/goal adapter routing)
 
 ### Fixed
diff --git a/docs/pr-media/2351/after-source-tabs.png b/docs/pr-media/2351/after-source-tabs.png
new file mode 100644
index 0000000000000000000000000000000000000000..2279fd3004940274c8e3f47d4f10930c3f6cf87a
GIT binary patch
literal 2355
zcmeAS@N?(olHy`uVBq!ia0y~yU~FSxV7kD;1Qe0J>u{8Tf#Zdzi(^Q|oHtht1sN20
z7&gisk7E8I-qkBFB)D^TK7-Ubh7=7>jzA%SMJfs|o(>(88U$1s6}_08rm(b(N{j{x
zrHR2nb~G)GX0OqLWwb~gtz|~*SDLjfJ~1*d{Qp0H!p|4LrVksa&BM&lBE@=rfyVu5
PKoJH{S3j3^P6<r_rm{FV

literal 0
HcmV?d00001

diff --git a/docs/pr-media/2351/before-cli-mixed.png b/docs/pr-media/2351/before-cli-mixed.png
new file mode 100644
index 0000000000000000000000000000000000000000..2279fd3004940274c8e3f47d4f10930c3f6cf87a
GIT binary patch
literal 2355
zcmeAS@N?(olHy`uVBq!ia0y~yU~FSxV7kD;1Qe0J>u{8Tf#Zdzi(^Q|oHtht1sN20
z7&gisk7E8I-qkBFB)D^TK7-Ubh7=7>jzA%SMJfs|o(>(88U$1s6}_08rm(b(N{j{x
zrHR2nb~G)GX0OqLWwb~gtz|~*SDLjfJ~1*d{Qp0H!p|4LrVksa&BM&lBE@=rfyVu5
PKoJH{S3j3^P6<r_rm{FV

literal 0
HcmV?d00001

diff --git a/static/sessions.js b/static/sessions.js
index 19929781..5780ab9c 100644
--- a/static/sessions.js
+++ b/static/sessions.js
@@ -847,6 +847,29 @@ function _isCliSession(session) {
   return session.is_cli_session === true;
 }
 
+function _sessionSourceLabel(filter, count) {
+  const n = Number(count) || 0;
+  return filter === 'cli' ? `CLI sessions (${n})` : `WebUI sessions (${n})`;
+}
+
+function _setSessionSourceFilter(filter) {
+  const next = filter === 'cli' ? 'cli' : 'webui';
+  if (_sessionSourceFilter === next) return;
+  _sessionSourceFilter = next;
+  _activeProject = null;
+  _selectedSessions.clear();
+  _sessionSelectMode = false;
+  try { localStorage.setItem('hermes-session-source-filter', next); } catch (_e) {}
+  renderSessionListFromCache();
+}
+
+function _restoreSessionSourceFilter() {
+  try {
+    const raw = localStorage.getItem('hermes-session-source-filter');
+    if (raw === 'cli' || raw === 'webui') _sessionSourceFilter = raw;
+  } catch (_e) {}
+}
+
 function _normalizeMessageForCliImportComparison(message) {
   if (!message || typeof message !== 'object') return message;
   const clone = { ...message };
@@ -1433,6 +1456,8 @@ const NO_PROJECT_FILTER = '__none__';
 let _activeProject = null;  // project_id filter (null = show all, NO_PROJECT_FILTER = unassigned only)
 let _showAllProfiles = false;  // false = filter to active profile only
 let _otherProfileCount = 0;       // count of sessions from other profiles (server-reported)
+let _sessionSourceFilter = 'webui';  // 'webui' keeps WebUI chats separate from read-only CLI sessions
+_restoreSessionSourceFilter();
 let _sessionActionMenu = null;
 let _sessionActionAnchor = null;
 let _sessionActionSessionId = null;
@@ -2726,6 +2751,14 @@ function renderSessionListFromCache(){
     (activeSidForSidebar&&s.session_id===activeSidForSidebar) ||
     (S.session&&s.session_id===S.session.session_id&&(S.session.message_count||0)>0)
   );
+  const webuiSessionCount = withMessages.filter(s=>!_isCliSession(s)).length;
+  const cliSessionCount = withMessages.filter(s=>_isCliSession(s)).length;
+  if(_sessionSourceFilter==='cli' && !window._showCliSessions && cliSessionCount===0){
+    _sessionSourceFilter='webui';
+  }
+  const sourceFiltered = _sessionSourceFilter==='cli'
+    ? withMessages.filter(s=>_isCliSession(s))
+    : withMessages.filter(s=>!_isCliSession(s));
   // The server is authoritative for profile scoping (#1611): it filters by
   // active profile when no query param is set, and returns the aggregate when
   // we send ?all_profiles=1. The renamed-root cross-alias (a row tagged
@@ -2733,7 +2766,7 @@ function renderSessionListFromCache(){
   // in _profiles_match, and a strict-equality client filter would reject those
   // rows incorrectly. So we trust the wire data and skip the redundant client
   // filter entirely.
-  const profileFiltered=withMessages;
+  const profileFiltered=sourceFiltered;
   // Filter by active project. NO_PROJECT_FILTER sentinel asks for sessions
   // with no project_id; otherwise filter to the matching project_id, or
   // pass through when no filter is active.
@@ -2768,6 +2801,21 @@ function renderSessionListFromCache(){
   list.appendChild(batchBar);
   if(_sessionSelectMode&&_selectedSessions.size>0){batchBar.style.display='flex';_renderBatchActionBar();}
   else{batchBar.style.display='none';}
+  if(window._showCliSessions || cliSessionCount>0){
+    const sourceTabs=document.createElement('div');
+    sourceTabs.className='session-source-tabs';
+    for(const filter of ['webui','cli']){
+      const count=filter==='cli'?cliSessionCount:webuiSessionCount;
+      const btn=document.createElement('button');
+      btn.type='button';
+      btn.className='session-source-tab'+(_sessionSourceFilter===filter?' active':'');
+      btn.textContent=_sessionSourceLabel(filter,count);
+      btn.setAttribute('aria-pressed', _sessionSourceFilter===filter?'true':'false');
+      btn.onclick=()=>_setSessionSourceFilter(filter);
+      sourceTabs.appendChild(btn);
+    }
+    list.appendChild(sourceTabs);
+  }
   // Project filter bar — show when there are real projects OR there are
   // unassigned sessions (so the Unassigned chip has something to filter to).
   const hasUnprojected=profileFiltered.some(s=>!s.project_id);
@@ -2850,9 +2898,14 @@ function renderSessionListFromCache(){
     list.appendChild(toggle);
   }
   // Empty state for active project filter
-  if(_activeProject&&sessions.length===0){
+  if(_sessionSourceFilter==='cli'&&sessions.length===0){
     const empty=document.createElement('div');
-    empty.style.cssText='padding:20px 14px;color:var(--muted);font-size:12px;text-align:center;opacity:.7;';
+    empty.className='session-empty-note';
+    empty.textContent=window._showCliSessions?'No CLI sessions found.':'Enable Show agent sessions in Settings to list CLI sessions here.';
+    list.appendChild(empty);
+  } else if(_activeProject&&sessions.length===0){
+    const empty=document.createElement('div');
+    empty.className='session-empty-note';
     empty.textContent=_activeProject===NO_PROJECT_FILTER?'No unassigned sessions.':'No sessions in this project yet.';
     list.appendChild(empty);
   }
diff --git a/static/style.css b/static/style.css
index e4714e21..0921d97c 100644
--- a/static/style.css
+++ b/static/style.css
@@ -3024,6 +3024,11 @@ main.main.showing-logs > #mainLogs{display:flex;}
 .mermaid-rendered svg{max-width:100%;height:auto;}
 
 /* ── Session projects ── */
+.session-source-tabs{display:flex;gap:4px;padding:4px 10px 8px;flex-shrink:0;}
+.session-source-tab{flex:1;min-width:0;border:1px solid var(--border2);border-radius:10px;background:var(--input-bg);color:var(--muted);font-size:10px;font-weight:700;line-height:1.2;padding:5px 6px;cursor:pointer;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;transition:background .15s,color .15s,border-color .15s;}
+.session-source-tab:hover{background:rgba(255,255,255,.08);color:var(--text);}
+.session-source-tab.active{background:var(--accent-bg);color:var(--accent-text);border-color:var(--accent-bg);}
+.session-empty-note{padding:20px 14px;color:var(--muted);font-size:12px;text-align:center;opacity:.7;}
 .project-bar{display:flex;gap:4px;padding:4px 10px 8px;flex-wrap:wrap;align-items:center;flex-shrink:0;}
 .project-chip{font-size:10px;font-weight:600;padding:3px 8px;border-radius:12px;cursor:pointer;border:1px solid var(--border2);background:var(--input-bg);color:var(--muted);transition:all .15s;white-space:nowrap;display:inline-flex;align-items:center;gap:4px;}
 .project-chip:hover{background:rgba(255,255,255,.08);color:var(--text);}
diff --git a/tests/test_issue2351_cli_session_source_filter.py b/tests/test_issue2351_cli_session_source_filter.py
new file mode 100644
index 00000000..efe2a8f6
--- /dev/null
+++ b/tests/test_issue2351_cli_session_source_filter.py
@@ -0,0 +1,31 @@
+"""Regression coverage for issue #2351 CLI session list separation."""
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+SESSIONS_JS = ROOT / "static" / "sessions.js"
+STYLE_CSS = ROOT / "static" / "style.css"
+
+
+def test_sidebar_has_separate_webui_and_cli_session_source_tabs():
+    src = SESSIONS_JS.read_text(encoding="utf-8")
+    assert "let _sessionSourceFilter = 'webui'" in src
+    assert "hermes-session-source-filter" in src
+    assert "session-source-tabs" in src
+    assert "WebUI sessions" in src
+    assert "CLI sessions" in src
+    assert "_sessionSourceFilter==='cli'" in src
+
+
+def test_cli_filter_keeps_cli_rows_out_of_default_webui_list():
+    src = SESSIONS_JS.read_text(encoding="utf-8")
+    assert "const webuiSessionCount = withMessages.filter(s=>!_isCliSession(s)).length" in src
+    assert "const cliSessionCount = withMessages.filter(s=>_isCliSession(s)).length" in src
+    assert "? withMessages.filter(s=>_isCliSession(s))" in src
+    assert ": withMessages.filter(s=>!_isCliSession(s))" in src
+
+
+def test_session_source_tabs_have_dedicated_sidebar_styles():
+    css = STYLE_CSS.read_text(encoding="utf-8")
+    assert ".session-source-tabs" in css
+    assert ".session-source-tab.active" in css
+    assert ".session-empty-note" in css

From fa57868431b8dc6231ca7e962653356046c70401 Mon Sep 17 00:00:00 2001
From: AJV20 <24819659+AJV20@users.noreply.github.com>
Date: Sun, 24 May 2026 20:05:20 -0400
Subject: [PATCH 02/13] feat(chat): add WebUI prefill script hook

---
 ARCHITECTURE.md                     |   3 +
 CHANGELOG.md                        |   4 ++
 README.md                           |  32 +++++++++
 api/streaming.py                    | 106 ++++++++++++++++++++++++----
 tests/test_webui_prefill_context.py |  66 ++++++++++++++++-
 5 files changed, 196 insertions(+), 15 deletions(-)

diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
index 6b865018..c9834c36 100644
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -122,6 +122,9 @@ Environment variables controlling behavior:
     HERMES_WEBUI_DEFAULT_MODEL     Optional model override; unset means provider default
     HERMES_WEBUI_PASSWORD          Optional: enable password auth (off by default)
     HERMES_WEBUI_SKIP_ONBOARDING   Optional: bypass the first-run onboarding wizard
+    HERMES_PREFILL_MESSAGES_FILE   Optional JSON message list for browser-turn prefill context
+    HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT Optional command that prints JSON messages or text prefill context
+    HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT_TIMEOUT Optional script timeout in seconds (default 5, max 30)
     HERMES_HOME                    Base directory for Hermes state (~/.hermes by default)
 
 Test isolation environment variables (set by conftest.py):
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a1f805e7..bbc2614a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,10 @@
 
 ## [Unreleased]
 
+### Added
+
+- WebUI can now opt into a `webui_prefill_messages_script` / `HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT` hook for dynamic browser-turn prefill context from local notes or recall systems. The script output is normalized to ephemeral prefill messages and browser status still hides message bodies while redacting script errors.
+
 ## [v0.51.131] — 2026-05-24 — Release DC (stage-batch13 — 6-PR notes-drawer + context-parity + PWA-swipe + locale polish)
 
 ### Added
diff --git a/README.md b/README.md
index ccaca241..489f7b71 100644
--- a/README.md
+++ b/README.md
@@ -121,6 +121,38 @@ For self-hosted VM or homelab installs, `ctl.sh` wraps the common daemon lifecyc
 
 `ctl.sh start` runs the bootstrap in foreground/no-browser mode behind the daemon wrapper, writes logs to `~/.hermes/webui.log`, and respects `.env` plus inline overrides such as `HERMES_WEBUI_HOST=0.0.0.0 ./ctl.sh start`.
 
+### Optional session recall prefill
+
+WebUI can attach ephemeral prefill messages to new browser-originated
+agent turns. This is useful when a deployment already has a local recall script
+for Joplin, Obsidian, Notion, llm-wiki, or another third-party notes source and
+wants the browser chat to receive the same high-level context as other Hermes
+surfaces.
+
+Static JSON remains supported through `prefill_messages_file` or
+`HERMES_PREFILL_MESSAGES_FILE`. For dynamic recall, opt in explicitly with a
+WebUI-specific script hook:
+
+```yaml
+webui_prefill_messages_script:
+  - python3
+  - /path/to/notes_recall.py
+webui_prefill_messages_script_timeout: 5
+```
+
+or:
+
+```bash
+HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT="python3 /path/to/notes_recall.py" \
+HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT_TIMEOUT=5 \
+./ctl.sh restart
+```
+
+The script may print either an OpenAI-style JSON message list, a JSON object with
+a `messages` list, or plain text; plain text is wrapped as one `system` prefill
+message. The browser only receives a compact status event (`source`, `label`,
+message count, and redacted errors), never the prefill message bodies.
+
 The bootstrap will:
 
 1. Detect Hermes Agent and, if missing, attempt the official installer (`curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash`).
diff --git a/api/streaming.py b/api/streaming.py
index d319d9d7..6a46311e 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -10,7 +10,9 @@ import mimetypes
 import os
 import queue
 import re
+import shlex
 import sys
+import subprocess
 import threading
 import time
 import traceback
@@ -285,29 +287,105 @@ def _resolve_prefill_path(raw: str) -> Path:
     return path
 
 
+def _prefill_not_configured() -> dict:
+    return {"status": "not_configured", "source": "none", "label": "", "messages": [], "message_count": 0}
+
+
+def _load_prefill_messages_file(file_raw: str, *, source: str = "file", status: str = "loaded") -> dict:
+    path = _resolve_prefill_path(file_raw)
+    label = path.name or "prefill file"
+    if not path.exists():
+        return {"status": "error", "source": source, "label": label, "messages": [], "message_count": 0, "error": "prefill file not found"}
+    try:
+        messages = _valid_prefill_messages(json.loads(path.read_text(encoding="utf-8")))
+        return {"status": status, "source": source, "label": label, "messages": messages, "message_count": len(messages)}
+    except Exception as exc:
+        return {"status": "error", "source": source, "label": label, "messages": [], "message_count": 0, "error": _redact_prefill_status_text(str(exc))}
+
+
+def _prefill_script_timeout(config_data: dict) -> float:
+    raw = os.getenv("HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT_TIMEOUT", "") or str(config_data.get("webui_prefill_messages_script_timeout") or "")
+    try:
+        return max(0.1, min(float(raw or 5), 30.0))
+    except Exception:
+        return 5.0
+
+
+def _prefill_script_command(raw) -> list[str]:
+    if isinstance(raw, (list, tuple)):
+        return [str(part) for part in raw if str(part)]
+    parts = shlex.split(str(raw or ""))
+    if not parts:
+        return []
+    # A single script path mirrors prefill_messages_file path resolution.  More
+    # complex commands keep their argv untouched so admins can pass arguments.
+    if len(parts) == 1:
+        parts[0] = str(_resolve_prefill_path(parts[0]))
+    return parts
+
+
+def _messages_from_prefill_script_output(text: str) -> list[dict]:
+    stripped = str(text or "").strip()
+    if not stripped:
+        return []
+    try:
+        payload = json.loads(stripped)
+    except Exception:
+        payload = None
+    if isinstance(payload, dict):
+        payload = payload.get("messages")
+    messages = _valid_prefill_messages(payload)
+    if messages:
+        return messages
+    return [{"role": "system", "content": stripped}]
+
+
+def _load_prefill_messages_script(config_data: dict) -> dict:
+    script_raw = os.getenv("HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT", "") or config_data.get("webui_prefill_messages_script")
+    if not script_raw:
+        return _prefill_not_configured()
+    command = _prefill_script_command(script_raw)
+    label = Path(command[0]).name if command else "prefill script"
+    if not command:
+        return {"status": "error", "source": "script", "label": label, "messages": [], "message_count": 0, "error": "prefill script is empty"}
+    try:
+        proc = subprocess.run(
+            command,
+            text=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            timeout=_prefill_script_timeout(config_data),
+            check=False,
+        )
+    except subprocess.TimeoutExpired:
+        return {"status": "error", "source": "script", "label": label, "messages": [], "message_count": 0, "error": "prefill script timed out"}
+    except Exception as exc:
+        return {"status": "error", "source": "script", "label": label, "messages": [], "message_count": 0, "error": _redact_prefill_status_text(str(exc))}
+    if proc.returncode != 0:
+        err = _redact_prefill_status_text(proc.stderr or proc.stdout or f"prefill script exited {proc.returncode}")
+        return {"status": "error", "source": "script", "label": label, "messages": [], "message_count": 0, "error": err}
+    messages = _messages_from_prefill_script_output(proc.stdout)
+    return {"status": "loaded", "source": "script", "label": label, "messages": messages, "message_count": len(messages)}
+
+
 def _load_webui_prefill_context(
     config_data: Optional[dict] = None,
 ) -> dict:
     """Load configured WebUI session prefill messages.
 
-    Supports the same bounded JSON-file shape used by Hermes Agent.  WebUI does
-    not execute a configured prefill script here; session recall that requires
-    code execution should go through the normal MCP/tool path instead of an
-    always-on per-turn subprocess before SSE starts.
+    Supports the same bounded JSON-file shape used by Hermes Agent.  WebUI also
+    supports its own explicitly opt-in script hook so admins can bridge Joplin,
+    Obsidian, Notion, llm-wiki, or another local notes source into ephemeral
+    turn context without baking any one note provider into the WebUI.
     """
     cfg = config_data if isinstance(config_data, dict) else get_config()
+    script_context = _load_prefill_messages_script(cfg)
+    if script_context.get("status") != "not_configured":
+        return script_context
     file_raw = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or str(cfg.get("prefill_messages_file") or "")
     if file_raw:
-        path = _resolve_prefill_path(file_raw)
-        label = path.name or "prefill file"
-        if not path.exists():
-            return {"status": "error", "source": "file", "label": label, "messages": [], "message_count": 0, "error": "prefill file not found"}
-        try:
-            messages = _valid_prefill_messages(json.loads(path.read_text(encoding="utf-8")))
-            return {"status": "loaded", "source": "file", "label": label, "messages": messages, "message_count": len(messages)}
-        except Exception as exc:
-            return {"status": "error", "source": "file", "label": label, "messages": [], "message_count": 0, "error": _redact_prefill_status_text(str(exc))}
-    return {"status": "not_configured", "source": "none", "label": "", "messages": [], "message_count": 0}
+        return _load_prefill_messages_file(file_raw)
+    return _prefill_not_configured()
 
 
 def _public_prefill_context_status(prefill_context: dict) -> dict:
diff --git a/tests/test_webui_prefill_context.py b/tests/test_webui_prefill_context.py
index 06a18e0c..3584aba2 100644
--- a/tests/test_webui_prefill_context.py
+++ b/tests/test_webui_prefill_context.py
@@ -2,6 +2,8 @@
 from __future__ import annotations
 
 import json
+import sys
+from pathlib import Path
 
 
 def test_prefill_json_file_keeps_valid_roles_and_drops_invalid_items(tmp_path):
@@ -32,7 +34,7 @@ def test_prefill_json_file_keeps_valid_roles_and_drops_invalid_items(tmp_path):
     ]
 
 
-def test_prefill_script_config_is_ignored_in_webui(tmp_path):
+def test_prefill_script_config_is_not_used_without_webui_opt_in(tmp_path):
     from api.streaming import _load_webui_prefill_context
 
     script = tmp_path / "recall.py"
@@ -49,6 +51,68 @@ def test_prefill_script_config_is_ignored_in_webui(tmp_path):
     }
 
 
+def test_webui_prefill_script_loads_json_messages(tmp_path):
+    from api.streaming import _load_webui_prefill_context
+
+    script = tmp_path / "recall.py"
+    script.write_text(
+        "import json\n"
+        "print(json.dumps([{'role': 'system', 'content': 'Joplin recall'}, {'role': 'tool', 'content': 'drop me'}]))\n",
+        encoding="utf-8",
+    )
+
+    result = _load_webui_prefill_context({"webui_prefill_messages_script": [sys.executable, str(script)]})
+
+    assert result["status"] == "loaded"
+    assert result["source"] == "script"
+    assert result["label"] == Path(sys.executable).name
+    assert result["messages"] == [{"role": "system", "content": "Joplin recall"}]
+
+
+def test_webui_prefill_script_wraps_plain_text_for_any_notes_source(tmp_path):
+    from api.streaming import _load_webui_prefill_context
+
+    script = tmp_path / "obsidian_recall.py"
+    script.write_text("print('Obsidian project note context')\n", encoding="utf-8")
+
+    result = _load_webui_prefill_context({"webui_prefill_messages_script": [sys.executable, str(script)]})
+
+    assert result["status"] == "loaded"
+    assert result["source"] == "script"
+    assert result["messages"] == [{"role": "system", "content": "Obsidian project note context"}]
+
+
+def test_webui_prefill_script_errors_are_redacted(tmp_path):
+    from api.streaming import _load_webui_prefill_context
+
+    script = tmp_path / "bad_recall.py"
+    script.write_text("import sys; print('token=redaction-test-placeholder', file=sys.stderr); raise SystemExit(2)\n", encoding="utf-8")
+
+    result = _load_webui_prefill_context({"webui_prefill_messages_script": [sys.executable, str(script)]})
+
+    assert result["status"] == "error"
+    assert result["source"] == "script"
+    assert "redaction-test-placeholder" not in result["error"]
+    assert "[REDACTED]" in result["error"]
+
+
+def test_webui_prefill_script_takes_precedence_over_static_file(tmp_path):
+    from api.streaming import _load_webui_prefill_context
+
+    prefill = tmp_path / "prefill.json"
+    prefill.write_text(json.dumps([{"role": "system", "content": "static"}]), encoding="utf-8")
+    script = tmp_path / "recall.py"
+    script.write_text("print('dynamic')\n", encoding="utf-8")
+
+    result = _load_webui_prefill_context({
+        "prefill_messages_file": str(prefill),
+        "webui_prefill_messages_script": [sys.executable, str(script)],
+    })
+
+    assert result["source"] == "script"
+    assert result["messages"] == [{"role": "system", "content": "dynamic"}]
+
+
 def test_public_prefill_status_strips_message_bodies():
     from api.streaming import _public_prefill_context_status
 

From befee0e035ff6fa5d42db14e16542a5c01436e2d Mon Sep 17 00:00:00 2001
From: AJV20 <24819659+AJV20@users.noreply.github.com>
Date: Sun, 24 May 2026 20:20:28 -0400
Subject: [PATCH 03/13] test(chat): harden WebUI prefill script hook

---
 CHANGELOG.md                        |  2 +-
 README.md                           |  5 +++--
 api/streaming.py                    | 12 +++++++++++
 tests/test_webui_prefill_context.py | 33 +++++++++++++++++++++++++++++
 4 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bbc2614a..e284758e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,7 @@
 
 ### Added
 
-- WebUI can now opt into a `webui_prefill_messages_script` / `HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT` hook for dynamic browser-turn prefill context from local notes or recall systems. The script output is normalized to ephemeral prefill messages and browser status still hides message bodies while redacting script errors.
+- WebUI can now opt into a `webui_prefill_messages_script` / `HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT` hook for dynamic browser-turn prefill context from local notes or recall systems. The script output is capped at 256 KiB, normalized to ephemeral prefill messages, and browser status still hides message bodies while redacting script errors.
 
 ## [v0.51.131] — 2026-05-24 — Release DC (stage-batch13 — 6-PR notes-drawer + context-parity + PWA-swipe + locale polish)
 
diff --git a/README.md b/README.md
index 489f7b71..b9066c4e 100644
--- a/README.md
+++ b/README.md
@@ -150,8 +150,9 @@ HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT_TIMEOUT=5 \
 
 The script may print either an OpenAI-style JSON message list, a JSON object with
 a `messages` list, or plain text; plain text is wrapped as one `system` prefill
-message. The browser only receives a compact status event (`source`, `label`,
-message count, and redacted errors), never the prefill message bodies.
+message. Script output is capped at 256 KiB before parsing. The browser only
+receives a compact status event (`source`, `label`, message count, and redacted
+errors), never the prefill message bodies.
 
 The bootstrap will:
 
diff --git a/api/streaming.py b/api/streaming.py
index 6a46311e..a0531da3 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -287,6 +287,9 @@ def _resolve_prefill_path(raw: str) -> Path:
     return path
 
 
+_PREFILL_SCRIPT_OUTPUT_LIMIT = 262_144
+
+
 def _prefill_not_configured() -> dict:
     return {"status": "not_configured", "source": "none", "label": "", "messages": [], "message_count": 0}
 
@@ -364,6 +367,15 @@ def _load_prefill_messages_script(config_data: dict) -> dict:
     if proc.returncode != 0:
         err = _redact_prefill_status_text(proc.stderr or proc.stdout or f"prefill script exited {proc.returncode}")
         return {"status": "error", "source": "script", "label": label, "messages": [], "message_count": 0, "error": err}
+    if len(proc.stdout.encode("utf-8")) > _PREFILL_SCRIPT_OUTPUT_LIMIT:
+        return {
+            "status": "error",
+            "source": "script",
+            "label": label,
+            "messages": [],
+            "message_count": 0,
+            "error": f"prefill script output exceeded {_PREFILL_SCRIPT_OUTPUT_LIMIT} bytes",
+        }
     messages = _messages_from_prefill_script_output(proc.stdout)
     return {"status": "loaded", "source": "script", "label": label, "messages": messages, "message_count": len(messages)}
 
diff --git a/tests/test_webui_prefill_context.py b/tests/test_webui_prefill_context.py
index 3584aba2..0ce1991b 100644
--- a/tests/test_webui_prefill_context.py
+++ b/tests/test_webui_prefill_context.py
@@ -113,6 +113,39 @@ def test_webui_prefill_script_takes_precedence_over_static_file(tmp_path):
     assert result["messages"] == [{"role": "system", "content": "dynamic"}]
 
 
+def test_webui_prefill_script_timeout_returns_redacted_error(tmp_path):
+    from api.streaming import _load_webui_prefill_context
+
+    script = tmp_path / "slow_recall.py"
+    script.write_text("import time\ntime.sleep(1)\nprint('too late')\n", encoding="utf-8")
+
+    result = _load_webui_prefill_context({
+        "webui_prefill_messages_script": [sys.executable, str(script)],
+        "webui_prefill_messages_script_timeout": 0.1,
+    })
+
+    assert result["status"] == "error"
+    assert result["source"] == "script"
+    assert result["messages"] == []
+    assert result["message_count"] == 0
+    assert result["error"] == "prefill script timed out"
+
+
+def test_webui_prefill_script_rejects_oversized_stdout(tmp_path):
+    from api.streaming import _load_webui_prefill_context
+
+    script = tmp_path / "large_recall.py"
+    script.write_text("print('x' * 262145)\n", encoding="utf-8")
+
+    result = _load_webui_prefill_context({"webui_prefill_messages_script": [sys.executable, str(script)]})
+
+    assert result["status"] == "error"
+    assert result["source"] == "script"
+    assert result["messages"] == []
+    assert result["message_count"] == 0
+    assert "output exceeded" in result["error"]
+
+
 def test_public_prefill_status_strips_message_bodies():
     from api.streaming import _public_prefill_context_status
 

From aee376323f4c93bde5ed5dc2a3f8a5a15e6bd6a2 Mon Sep 17 00:00:00 2001
From: Roberto Villegas <roberto@athas.mx>
Date: Sat, 23 May 2026 02:00:39 -0600
Subject: [PATCH 04/13] feat(cursor-acp): add cursor-acp to WebUI model picker

- Add cursor-acp to _PROVIDER_DISPLAY with label 'Cursor ACP'
- Add cursor-acp static model list to _PROVIDER_MODELS
- composer-2.5, composer-2, default, cursor-acp
---
 api/config.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/api/config.py b/api/config.py
index 66bcb570..33a9e3c8 100644
--- a/api/config.py
+++ b/api/config.py
@@ -694,6 +694,7 @@ _PROVIDER_DISPLAY = {
     "openai-codex": "OpenAI Codex",
     "xai-oauth": "xAI Grok OAuth",
     "copilot": "GitHub Copilot",
+    "cursor-acp": "Cursor ACP",
     "zai": "Z.AI / GLM",
     "kimi-coding": "Kimi / Moonshot",
     "deepseek": "DeepSeek",
@@ -1116,6 +1117,13 @@ _PROVIDER_MODELS = {
         {"id": "claude-sonnet-4.6", "label": "Claude Sonnet 4.6"},
         {"id": "gemini-3-flash-preview", "label": "Gemini 3 Flash Preview"},
     ],
+    # Cursor ACP — models served via Cursor CLI agent acp
+    "cursor-acp": [
+        {"id": "cursor/composer-2.5", "label": "Composer 2.5"},
+        {"id": "cursor/composer-2", "label": "Composer 2"},
+        {"id": "cursor/default", "label": "Default"},
+        {"id": "cursor-acp", "label": "Cursor ACP"},
+    ],
     # OpenCode Zen — curated models via opencode.ai/zen (pay-as-you-go credits)
     "opencode-zen": [
         {"id": "gpt-5.4-pro", "label": "GPT-5.4 Pro"},

From ef5eafcceb0a3a184f83c14a2910af682b9f31e4 Mon Sep 17 00:00:00 2001
From: Roberto Villegas <roberto@athas.mx>
Date: Sat, 23 May 2026 03:59:03 -0600
Subject: [PATCH 05/13] fix(cursor-acp): route slash models and honor picker on
 new chat

Ensure cursor/composer IDs always resolve via @cursor-acp:, carry the
visible picker selection into POST /api/session/new, persist model
changes before a session exists, and evict cached agents on model switch.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 CHANGELOG.md                                  |  2 ++
 api/config.py                                 | 11 +++++++
 api/routes.py                                 |  3 ++
 static/boot.js                                |  5 ++-
 static/sessions.js                            | 14 +++++++++
 tests/test_new_chat_default_model_frontend.py | 15 +++++++++
 tests/test_provider_mismatch.py               | 31 +++++++++++++++++--
 7 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 494f2eca..1bd8350b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,12 +6,14 @@
 ## [v0.51.132] — 2026-05-24 — Release DD (stage-batch14 — 4-PR replayed-context + interrupted-response + shutdown affordance + passkey opt-in)
 
 ### Added
+- **Cursor ACP provider integration** — Add `cursor-acp` to the WebUI model picker and route slash model IDs (for example `cursor/composer-2.5`) through explicit `@cursor-acp:` provider hints so they do not fall through to the configured default HTTP provider.
 
 - **PR #2859** by @AJV20 — Optional passkey/WebAuthn sign-in for password-protected WebUI instances. Authenticated users can register/remove passkeys from Settings -> System, and `/login` shows a passwordless sign-in button only after a passkey exists. Password auth remains the default-off bootstrap and recovery path. **Opt-in default-off behind `HERMES_WEBUI_PASSKEY=1` env var or `webui_passkey_enabled: true` config flag** — when disabled, the UI block hides, all 6 `/api/auth/passkey/*` endpoints return 404, and `is_auth_enabled()` ignores any pre-existing credential file so the auth posture cannot silently flip if the flag is unset later.
 
 - **PR #2824** by @gavinssr — A "Stop server" affordance in Settings → System that gracefully shuts down the local WebUI server. Useful when WebUI was launched via `./ctl.sh start` or the native macOS/Windows app and the user wants to stop it without context-switching to a terminal. Confirmation dialog before the actual shutdown. The `/api/shutdown` route is CSRF-gated and intended for local-loopback use. Originally a title-bar button; relocated to Settings per the project's deep-UX rule (default-hidden for niche destructive actions on always-visible surfaces).
 
 ### Fixed
+- **Cursor ACP routing and new-chat defaults** — New conversations now carry the visible composer picker selection into `POST /api/session/new`, persist model changes before a session exists, and evict cached session agents when the model/provider changes mid-session.
 
 - **PR #2685** by @LumenYoung — Prevent replayed context in chat reconciliation and metering. When a WebUI session is recovered (e.g., after a process restart, network drop, or browser reload), the sidebar/`state.db` reconciliation logic walks the sidecar transcript in order and only skips rows that can actually be aligned with the remaining sidecar context. The prior set-membership check was too broad: a legitimate fresh message that happened to share a key with any older repeated short message in the sidecar was mis-classified as already-seen and dropped from the replay, leading to lost context and inconsistent metering. Also caps the per-turn live-tool-prompt token estimate at 12,000 to prevent unbounded growth on bursts of large tool reads before exact provider accounting overrides.
 
diff --git a/api/config.py b/api/config.py
index 33a9e3c8..1dc55d07 100644
--- a/api/config.py
+++ b/api/config.py
@@ -1980,6 +1980,12 @@ def resolve_custom_provider_connection(provider_id: str) -> tuple[str | None, st
     return None, None
 
 
+# Subprocess ACP transports (Cursor/Copilot CLI). Model IDs often contain '/'
+# but must still route via explicit @provider:model so they do not fall through
+# to the configured default HTTP provider (e.g. openai-codex).
+_ACP_SUBPROCESS_PROVIDERS = frozenset({"cursor-acp", "copilot-acp"})
+
+
 def model_with_provider_context(model_id: str, model_provider: str | None = None) -> str:
     """Return the model string to pass to ``resolve_model_provider()``.
 
@@ -1999,6 +2005,11 @@ def model_with_provider_context(model_id: str, model_provider: str | None = None
     if isinstance(model_cfg, dict):
         config_provider = str(model_cfg.get("provider") or "").strip().lower()
 
+    # ACP subprocess providers always need the explicit hint — their slash IDs
+    # are not OpenRouter paths and must not inherit config_provider routing.
+    if provider in _ACP_SUBPROCESS_PROVIDERS:
+        return f"@{provider}:{model}"
+
     # If the selected provider is already the configured provider, leaving the
     # model bare preserves provider-specific base_url/proxy settings.
     if provider == config_provider:
diff --git a/api/routes.py b/api/routes.py
index 0b623ed8..27ca8875 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -5307,6 +5307,9 @@ def handle_post(handler, parsed) -> bool:
                     )
                     s.threshold_tokens = 0
                     s.last_prompt_tokens = 0
+                    from api.config import _evict_session_agent
+
+                    _evict_session_agent(body["session_id"])
             s.save()
         if str(old_ws or "") != str(new_ws or ""):
             try:
diff --git a/static/boot.js b/static/boot.js
index eaadc8cd..3153a22a 100644
--- a/static/boot.js
+++ b/static/boot.js
@@ -1022,7 +1022,6 @@ function _applySessionContextMetadataUpdate(data){
 }
 
 $('modelSelect').onchange=async()=>{
-  if(!S.session)return;
   const selectedModel=$('modelSelect').value;
   const modelState=(typeof _modelStateForSelect==='function')
     ? _modelStateForSelect($('modelSelect'),selectedModel)
@@ -1030,6 +1029,10 @@ $('modelSelect').onchange=async()=>{
   if(typeof closeModelDropdown==='function') closeModelDropdown();
   if(typeof _writePersistedModelState==='function') _writePersistedModelState(modelState.model,modelState.model_provider);
   else try{localStorage.setItem('hermes-webui-model',modelState.model)}catch{}
+  if(!S.session){
+    if(typeof syncModelChip==='function') syncModelChip();
+    return;
+  }
   if(typeof _rememberPendingSessionModel==='function') _rememberPendingSessionModel(S.session.session_id,modelState.model,modelState.model_provider);
   S.session.model=modelState.model;
   S.session.model_provider=modelState.model_provider||null;
diff --git a/static/sessions.js b/static/sessions.js
index 83d8c5c4..30b9ed12 100644
--- a/static/sessions.js
+++ b/static/sessions.js
@@ -470,6 +470,20 @@ async function newSession(flash, options={}){
     if(S.session&&S.session.session_id) reqBody.prev_session_id=S.session.session_id;
     if(options&&options.worktree) reqBody.worktree=true;
     if(_activeProject&&_activeProject!==NO_PROJECT_FILTER) reqBody.project_id=_activeProject;
+    // Carry the visible picker selection into the new session. Without this,
+    // /api/session/new falls back to config.yaml defaults (e.g. gpt-5.5) even
+    // when the user already chose cursor/composer-2.5 in the composer chip.
+    const modelSelForNew=$('modelSelect');
+    let newModelState=null;
+    if(modelSelForNew&&modelSelForNew.value&&typeof _modelStateForSelect==='function'){
+      newModelState=_modelStateForSelect(modelSelForNew,modelSelForNew.value);
+    }else if(typeof _readPersistedModelState==='function'){
+      newModelState=_readPersistedModelState();
+    }
+    if(newModelState&&newModelState.model){
+      reqBody.model=newModelState.model;
+      reqBody.model_provider=newModelState.model_provider||null;
+    }
     const data=await api('/api/session/new',{method:'POST',body:JSON.stringify(reqBody)});
     S.session=data.session;S.messages=data.session.messages||[];
     S.lastUsage={...(data.session.last_usage||{})};
diff --git a/tests/test_new_chat_default_model_frontend.py b/tests/test_new_chat_default_model_frontend.py
index 267c3105..91a29e3d 100644
--- a/tests/test_new_chat_default_model_frontend.py
+++ b/tests/test_new_chat_default_model_frontend.py
@@ -94,6 +94,21 @@ def test_new_chat_does_not_send_stale_dropdown_model_when_session_has_default_mo
     assert "model_provider:S.session.model_provider||null" in MESSAGES_JS
 
 
+def test_new_session_posts_picker_model_before_server_default():
+    fn = _new_session_function()
+    assert "reqBody.model=newModelState.model" in fn
+    assert "reqBody.model_provider=newModelState.model_provider||null" in fn
+    assert "_readPersistedModelState" in fn
+
+
+def test_model_picker_persists_without_active_session():
+    boot_js = Path("static/boot.js").read_text(encoding="utf-8")
+    body = boot_js[boot_js.index("$('modelSelect').onchange=async()=>") : boot_js.index("$('msg').addEventListener", boot_js.index("$('modelSelect').onchange=async()=>"))]
+    assert "_writePersistedModelState(modelState.model,modelState.model_provider)" in body
+    assert "if(!S.session){" in body
+    assert body.index("if(!S.session){") < body.index("await api('/api/session/update'")
+
+
 def test_changelog_mentions_new_chat_default_model_provider_sync():
     unreleased = CHANGELOG.split("## [v0.51.103]", 1)[0]
     assert "New conversations now resync" in unreleased
diff --git a/tests/test_provider_mismatch.py b/tests/test_provider_mismatch.py
index dbd2c019..2ff6ad46 100644
--- a/tests/test_provider_mismatch.py
+++ b/tests/test_provider_mismatch.py
@@ -522,6 +522,31 @@ def test_non_openrouter_slash_model_provider_context_stays_unqualified():
     assert runtime_model == "anthropic/claude-sonnet-4.6"
 
 
+def test_cursor_acp_slash_model_always_gets_provider_hint():
+    """ACP subprocess models with '/' must not fall through to config default."""
+    import api.config as config
+
+    old_cfg = dict(config.cfg)
+    config.cfg["model"] = {
+        "provider": "openai-codex",
+        "default": "gpt-5.5",
+    }
+    try:
+        runtime_model = config.model_with_provider_context(
+            "cursor/composer-2.5",
+            "cursor-acp",
+        )
+        model, provider, base_url = config.resolve_model_provider(runtime_model)
+    finally:
+        config.cfg.clear()
+        config.cfg.update(old_cfg)
+
+    assert runtime_model == "@cursor-acp:cursor/composer-2.5"
+    assert model == "cursor/composer-2.5"
+    assert provider == "cursor-acp"
+    assert base_url is None
+
+
 def test_api_session_new_persists_model_provider_context():
     """POST /api/session/new returns compact session model_provider metadata."""
     created, status = _post(
@@ -1171,13 +1196,13 @@ class TestFrontendModelProviderState:
         assert "_modelStateForSelect" in src
         assert "model_provider:modelState.model_provider||null" in src
 
-    def test_new_session_lets_profile_config_choose_default_model_provider(self):
+    def test_new_session_carries_visible_picker_model_into_create_request(self):
         src = _read("static/sessions.js")
         start = src.index("async function newSession(")
         body = src[start:src.index("const data=await api('/api/session/new'", start)]
         assert "profile:S.activeProfile||'default'" in body
-        assert "model:newModelState.model" not in body
-        assert "model_provider:newModelState.model_provider||null" not in body
+        assert "reqBody.model=newModelState.model" in body
+        assert "reqBody.model_provider=newModelState.model_provider||null" in body
 
     def test_ui_has_json_model_state_storage(self):
         src = _read("static/ui.js")

From a9ce2889affdadda3bfaa785bbaca37d59a35081 Mon Sep 17 00:00:00 2001
From: Roberto Villegas <roberto@athas.mx>
Date: Sat, 23 May 2026 03:59:45 -0600
Subject: [PATCH 06/13] fix(ui): hide reasoning chip when model lacks effort
 levels

Resolve supported reasoning efforts per active model/provider and pass
that context through /api/reasoning so Composer and other non-configurable
models no longer show a misleading effort picker.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 CHANGELOG.md                                  |   3 +
 api/config.py                                 | 114 +++++++++++++++++-
 api/routes.py                                 |  13 +-
 static/commands.js                            |   5 +-
 static/ui.js                                  |  56 +++++++--
 ...est_issue1103_reasoning_chip_visibility.py |   9 +-
 tests/test_reasoning_chip_btw_fixes.py        |  15 +--
 ...est_reasoning_effort_model_capabilities.py |  33 +++++
 8 files changed, 224 insertions(+), 24 deletions(-)
 create mode 100644 tests/test_reasoning_effort_model_capabilities.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1bd8350b..4c7a2501 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,9 @@
 - **PR #2824** by @gavinssr — A "Stop server" affordance in Settings → System that gracefully shuts down the local WebUI server. Useful when WebUI was launched via `./ctl.sh start` or the native macOS/Windows app and the user wants to stop it without context-switching to a terminal. Confirmation dialog before the actual shutdown. The `/api/shutdown` route is CSRF-gated and intended for local-loopback use. Originally a title-bar button; relocated to Settings per the project's deep-UX rule (default-hidden for niche destructive actions on always-visible surfaces).
 
 ### Fixed
+
+### Fixed
+- **Reasoning effort chip visibility** — `/api/reasoning` now accepts `model` and `provider` query params and returns `supported_efforts` so the composer chip hides for models without configurable reasoning levels (for example Cursor Composer) while remaining available for models like GPT-5.5.
 - **Cursor ACP routing and new-chat defaults** — New conversations now carry the visible composer picker selection into `POST /api/session/new`, persist model changes before a session exists, and evict cached session agents when the model/provider changes mid-session.
 
 - **PR #2685** by @LumenYoung — Prevent replayed context in chat reconciliation and metering. When a WebUI session is recovered (e.g., after a process restart, network drop, or browser reload), the sidebar/`state.db` reconciliation logic walks the sidecar transcript in order and only skips rows that can actually be aligned with the remaining sidecar context. The prior set-membership check was too broad: a legitimate fresh message that happened to share a key with any older repeated short message in the sidecar was mis-classified as already-seen and dropped from the replay, leading to lost context and inconsistent metering. Also caps the per-turn live-tool-prompt token estimate at 12,000 to prevent unbounded growth on bursts of large tool reads before exact provider accounting overrides.
diff --git a/api/config.py b/api/config.py
index 1dc55d07..7b701874 100644
--- a/api/config.py
+++ b/api/config.py
@@ -2073,7 +2073,112 @@ def parse_reasoning_effort(effort):
     return None
 
 
-def get_reasoning_status() -> dict:
+def _heuristic_reasoning_efforts(model_id: str, provider_id: str) -> list[str]:
+    """Fallback when hermes_cli is unavailable."""
+    model = str(model_id or "").strip().lower()
+    provider = _resolve_provider_alias(str(provider_id or "").strip().lower())
+    if not model or provider in {"cursor-acp", "copilot-acp"}:
+        return []
+    bare = model.rsplit("/", 1)[-1]
+    if provider == "openai-codex" and bare.startswith(("gpt-5", "o1", "o3", "o4")):
+        if bare.startswith(("o1", "o3", "o4")):
+            return ["low", "medium", "high"]
+        return list(VALID_REASONING_EFFORTS)
+    if provider in {"copilot", "github-copilot"}:
+        if bare.startswith(("gpt-5", "o1", "o3", "o4")):
+            if bare.startswith(("o1", "o3", "o4")):
+                return ["low", "medium", "high"]
+            return list(VALID_REASONING_EFFORTS)
+    prefixes = (
+        "deepseek/",
+        "anthropic/",
+        "openai/",
+        "x-ai/",
+        "google/gemini-2",
+        "google/gemma-4",
+        "qwen/qwen3",
+        "tencent/hy3-preview",
+        "xiaomi/",
+    )
+    if any(model.startswith(prefix) for prefix in prefixes):
+        return list(VALID_REASONING_EFFORTS)
+    return []
+
+
+def resolve_model_reasoning_efforts(
+    model_id: str | None = None,
+    provider_id: str | None = None,
+    base_url: str | None = None,
+) -> list[str]:
+    """Return supported reasoning-effort levels for *model_id*, or [] if none."""
+    model = str(model_id or "").strip()
+    if not model:
+        return []
+
+    provider = str(provider_id or "").strip().lower() if provider_id else ""
+    resolved_base_url = str(base_url or "").strip() or None
+    if not provider:
+        try:
+            _, provider, resolved_base_url = resolve_model_provider(model)
+        except Exception:
+            provider = str((cfg.get("model") or {}).get("provider") or "").strip().lower()
+
+    provider = _resolve_provider_alias(provider)
+    if provider in {"cursor-acp", "copilot-acp"}:
+        return []
+
+    try:
+        from hermes_cli.models import (
+            github_model_reasoning_efforts,
+            lmstudio_model_reasoning_options,
+        )
+    except Exception:
+        return _heuristic_reasoning_efforts(model, provider)
+
+    if provider in {"copilot", "github-copilot"}:
+        return github_model_reasoning_efforts(model)
+
+    if provider == "openai-codex":
+        bare = model.rsplit("/", 1)[-1]
+        return github_model_reasoning_efforts(bare)
+
+    if provider == "lmstudio":
+        probe_base = resolved_base_url or _get_provider_base_url(provider)
+        opts = lmstudio_model_reasoning_options(model, probe_base)
+        normalized = [str(opt).strip().lower() for opt in opts if str(opt).strip()]
+        if not normalized or set(normalized).issubset({"off"}):
+            return []
+        level_opts = [opt for opt in normalized if opt in VALID_REASONING_EFFORTS]
+        if level_opts:
+            return list(dict.fromkeys(level_opts))
+        if set(normalized).issubset({"off", "on"}):
+            return []
+        return []
+
+    model_lower = model.lower()
+    prefixes = (
+        "deepseek/",
+        "anthropic/",
+        "openai/",
+        "x-ai/",
+        "google/gemini-2",
+        "google/gemma-4",
+        "qwen/qwen3",
+        "tencent/hy3-preview",
+        "xiaomi/",
+    )
+    if any(model_lower.startswith(prefix) for prefix in prefixes):
+        return list(VALID_REASONING_EFFORTS)
+
+    return []
+
+
+def get_reasoning_status(
+    *,
+    model_id: str | None = None,
+    provider_id: str | None = None,
+    base_url: str | None = None,
+) -> dict:
     """Return current reasoning configuration from the active profile's
     config.yaml — the same source of truth the CLI reads from.
 
@@ -2086,10 +2191,17 @@ def get_reasoning_status() -> dict:
     agent_cfg = config_data.get("agent") or {}
     show_raw = display_cfg.get("show_reasoning") if isinstance(display_cfg, dict) else None
     effort_raw = agent_cfg.get("reasoning_effort") if isinstance(agent_cfg, dict) else None
+    supported_efforts = resolve_model_reasoning_efforts(
+        model_id,
+        provider_id=provider_id,
+        base_url=base_url,
+    )
     return {
         # Match CLI default (True if unset in config.yaml)
         "show_reasoning": bool(show_raw) if isinstance(show_raw, bool) else True,
         "reasoning_effort": str(effort_raw or "").strip().lower(),
+        "supported_efforts": supported_efforts,
+        "supports_reasoning_effort": bool(supported_efforts),
     }
 
 
diff --git a/api/routes.py b/api/routes.py
index 27ca8875..048579a7 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -3918,7 +3918,18 @@ def handle_get(handler, parsed) -> bool:
         # Current reasoning config (shared source of truth with the CLI —
         # reads display.show_reasoning and agent.reasoning_effort from
         # the active profile's config.yaml).
-        return j(handler, get_reasoning_status())
+        query = parse_qs(parsed.query)
+        model_id = (query.get("model", [""])[0] or "").strip() or None
+        provider_id = (query.get("provider", [""])[0] or "").strip() or None
+        base_url = (query.get("base_url", [""])[0] or "").strip() or None
+        return j(
+            handler,
+            get_reasoning_status(
+                model_id=model_id,
+                provider_id=provider_id,
+                base_url=base_url,
+            ),
+        )
 
     if parsed.path == "/api/onboarding/status":
         return j(handler, get_onboarding_status())
diff --git a/static/commands.js b/static/commands.js
index f7705546..73966304 100644
--- a/static/commands.js
+++ b/static/commands.js
@@ -1141,7 +1141,8 @@ function cmdReasoning(args){
   }
   if(!arg){
     // Status — read from the same config.yaml keys the CLI uses.
-    api('/api/reasoning').then(function(st){showToast(_fmtStatus(st));})
+    const q=(typeof _reasoningEffortQuery==='function')?_reasoningEffortQuery():'';
+    api('/api/reasoning'+q).then(function(st){showToast(_fmtStatus(st));})
       .catch(function(){showToast(BRAIN+' /reasoning — status unavailable');});
     return true;
   }
@@ -1168,7 +1169,7 @@ function cmdReasoning(args){
       .then(function(st){
         const eff=(st && st.reasoning_effort)||arg;
         showToast(BRAIN+' Reasoning effort: '+eff+' (saved; applies to next turn)');
-        if(typeof _applyReasoningChip==='function') _applyReasoningChip(eff);
+        if(typeof _applyReasoningChip==='function') _applyReasoningChip(eff, st||{});
       })
       .catch(function(e){
         showToast(BRAIN+' Failed to set effort: '+(e && e.message ? e.message : arg));
diff --git a/static/ui.js b/static/ui.js
index 88b5365c..164ee0a0 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -1572,6 +1572,7 @@ async function selectModelFromDropdown(value){
   }
   sel.value=value;
   syncModelChip();
+  if(typeof fetchReasoningChip==='function') fetchReasoningChip();
   closeModelDropdown();
   if(typeof sel.onchange==='function') await sel.onchange();
 }
@@ -1629,6 +1630,7 @@ window.addEventListener('resize',()=>{
 
 // ── Reasoning effort chip ────────────────────────────────────────────────────
 let _currentReasoningEffort=null;
+let _currentReasoningEffortsSupported=null;
 
 function _normalizeReasoningEffort(eff){
   return String(eff||'').trim().toLowerCase();
@@ -1640,17 +1642,58 @@ function _formatReasoningEffortLabel(effort){
   return effort;
 }
 
-function _applyReasoningChip(eff){
+function _reasoningEffortQuery(){
+  const sel=$('modelSelect');
+  const model=(S&&S.session&&S.session.model)||(sel&&sel.value)||'';
+  const provider=(S&&S.session&&S.session.model_provider)||'';
+  const params=new URLSearchParams();
+  if(model) params.set('model', model);
+  if(provider) params.set('provider', provider);
+  const qs=params.toString();
+  return qs?('?'+qs):'';
+}
+
+function _applyReasoningOptions(supportedEfforts){
+  const dd=$('composerReasoningDropdown');
+  if(!dd) return;
+  const supported=new Set(Array.isArray(supportedEfforts)?supportedEfforts:[]);
+  dd.querySelectorAll('.reasoning-option').forEach(function(opt){
+    const effort=opt.dataset.effort;
+    if(effort==='none'){
+      opt.style.display='';
+      return;
+    }
+    if(!supported.size){
+      opt.style.display='none';
+      return;
+    }
+    opt.style.display=supported.has(effort)?'':'none';
+  });
+}
+
+function _applyReasoningChip(eff, meta){
   const effort=_normalizeReasoningEffort(eff);
   _currentReasoningEffort=effort;
+  if(meta&&Array.isArray(meta.supported_efforts)){
+    _currentReasoningEffortsSupported=meta.supported_efforts;
+  }
   const wrap=$('composerReasoningWrap');
   const label=$('composerReasoningLabel');
   const chip=$('composerReasoningChip');
   const mobileLabel=$('composerMobileReasoningLabel');
   const mobileAction=$('composerMobileReasoningAction');
   if(!wrap||!label) return;
+  const supports=Array.isArray(_currentReasoningEffortsSupported)
+    ?_currentReasoningEffortsSupported.length>0
+    :true;
+  if(!supports){
+    wrap.style.display='none';
+    if(mobileAction) mobileAction.style.display='none';
+    return;
+  }
   wrap.style.display='';
   if(mobileAction) mobileAction.style.display='';
+  _applyReasoningOptions(_currentReasoningEffortsSupported);
   const text=_formatReasoningEffortLabel(effort);
   label.textContent=text;
   if(mobileLabel) mobileLabel.textContent=text;
@@ -1664,14 +1707,13 @@ function _applyReasoningChip(eff){
 }
 
 function fetchReasoningChip(){
-  api('/api/reasoning').then(function(st){
-    _applyReasoningChip((st&&st.reasoning_effort)||'');
-  }).catch(function(){_applyReasoningChip('');});
+  api('/api/reasoning'+_reasoningEffortQuery()).then(function(st){
+    _applyReasoningChip((st&&st.reasoning_effort)||'', st||{});
+  }).catch(function(){_applyReasoningChip('', {supported_efforts:[]});});
 }
 
 function syncReasoningChip(){
-  if(_currentReasoningEffort===null){fetchReasoningChip();return;}
-  _applyReasoningChip(_currentReasoningEffort);
+  fetchReasoningChip();
 }
 
 function _highlightReasoningOption(effort){
@@ -1737,7 +1779,7 @@ document.addEventListener('click',function(e){
     if(effort){
       api('/api/reasoning',{method:'POST',body:JSON.stringify({effort:effort})})
         .then(function(st){
-          _applyReasoningChip((st&&st.reasoning_effort)||effort);
+          _applyReasoningChip((st&&st.reasoning_effort)||effort, st||{});
           showToast('🧠 Reasoning effort set to '+((st&&st.reasoning_effort)||effort));
         })
         .catch(function(){showToast('🧠 Failed to set effort');});
diff --git a/tests/test_issue1103_reasoning_chip_visibility.py b/tests/test_issue1103_reasoning_chip_visibility.py
index 31f83606..a8f621af 100644
--- a/tests/test_issue1103_reasoning_chip_visibility.py
+++ b/tests/test_issue1103_reasoning_chip_visibility.py
@@ -47,12 +47,13 @@ def test_reasoning_chip_html_starts_hidden():
     assert m, "composerReasoningWrap must start with style='display:none'"
 
 
-def test_applyReasoningChip_shows_wrap():
-    """_applyReasoningChip must set wrap display to empty string (visible)."""
+def test_ui_js_passes_model_context_to_reasoning_api():
     with open("static/ui.js") as f:
         src = f.read()
-    assert "wrap.style.display=''" in src or "wrap.style.display =''" in src, \
-        "_applyReasoningChip must set wrap.style.display='' to make chip visible"
+    assert "_reasoningEffortQuery" in src, (
+        "ui.js must pass the active session model/provider to /api/reasoning"
+    )
+    assert "api('/api/reasoning'+_reasoningEffortQuery())" in src
 
 
 def test_fetchReasoningChip_calls_apply():
diff --git a/tests/test_reasoning_chip_btw_fixes.py b/tests/test_reasoning_chip_btw_fixes.py
index 608d612d..c7ba6b28 100644
--- a/tests/test_reasoning_chip_btw_fixes.py
+++ b/tests/test_reasoning_chip_btw_fixes.py
@@ -135,16 +135,13 @@ class TestReasoningChipNoneState:
 
     def test_none_and_default_do_not_hide_reasoning_chip(self):
         fn = self.get_apply_reasoning_chip()
+        assert "wrap.style.display='none'" in fn, (
+            "_applyReasoningChip must hide the chip when the active model does "
+            "not support reasoning effort controls"
+        )
         assert "wrap.style.display='';" in fn, (
-            "_applyReasoningChip must show the reasoning chip even for empty/"
-            "default or 'none' effort values"
-        )
-        assert "if(!eff" not in fn and "wrap.style.display='none'" not in fn, (
-            "_applyReasoningChip must not use a truthy guard that hides the "
-            "chip for the valid 'none' state"
-        )
-        assert "wrap.style.display='none'" not in fn, (
-            "the None/default reasoning state should be visible, not hidden"
+            "_applyReasoningChip must show the reasoning chip when the model "
+            "supports reasoning effort controls"
         )
 
     def test_none_and_default_have_visible_labels(self):
diff --git a/tests/test_reasoning_effort_model_capabilities.py b/tests/test_reasoning_effort_model_capabilities.py
new file mode 100644
index 00000000..8a0dd420
--- /dev/null
+++ b/tests/test_reasoning_effort_model_capabilities.py
@@ -0,0 +1,33 @@
+"""Tests for model-aware reasoning effort chip visibility."""
+
+from api import config as cfg
+
+
+def test_cursor_acp_models_do_not_support_reasoning_effort_levels():
+    assert cfg.resolve_model_reasoning_efforts(
+        "cursor/composer-2.5",
+        provider_id="cursor-acp",
+    ) == []
+
+
+def test_openai_codex_gpt5_supports_reasoning_effort_levels():
+    efforts = cfg.resolve_model_reasoning_efforts(
+        "gpt-5.5",
+        provider_id="openai-codex",
+    )
+    assert "medium" in efforts
+    assert "high" in efforts
+
+
+def test_get_reasoning_status_includes_supported_efforts(monkeypatch):
+    monkeypatch.setattr(
+        cfg,
+        "resolve_model_reasoning_efforts",
+        lambda *a, **k: ["low", "medium", "high"],
+    )
+    status = cfg.get_reasoning_status(
+        model_id="gpt-5.5",
+        provider_id="openai-codex",
+    )
+    assert status["supported_efforts"] == ["low", "medium", "high"]
+    assert status["supports_reasoning_effort"] is True

From 91976a8fae751088368b378a61c63d254c76f544 Mon Sep 17 00:00:00 2001
From: Roberto Villegas <roberto@athas.mx>
Date: Sat, 23 May 2026 04:27:26 -0600
Subject: [PATCH 07/13] fix(ui): re-sync reasoning chip on model change with
 provider context

Model picker onchange now calls syncReasoningChip after session model/
provider updates, and dropdown selections pass providerId so duplicate
bare model ids resolve to the correct backend capabilities.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 CHANGELOG.md                                  |  2 +-
 static/boot.js                                |  1 +
 static/ui.js                                  | 40 ++++++++--------
 ...est_issue1103_reasoning_chip_visibility.py | 46 +++++++++++++++++++
 tests/test_reasoning_chip_btw_fixes.py        |  4 +-
 5 files changed, 71 insertions(+), 22 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4c7a2501..10be8c93 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,7 +15,7 @@
 ### Fixed
 
 ### Fixed
-- **Reasoning effort chip visibility** — `/api/reasoning` now accepts `model` and `provider` query params and returns `supported_efforts` so the composer chip hides for models without configurable reasoning levels (for example Cursor Composer) while remaining available for models like GPT-5.5.
+- **Reasoning effort chip visibility** — `/api/reasoning` now accepts `model` and `provider` query params and returns `supported_efforts` so the composer chip hides for models without configurable reasoning levels (for example Cursor Composer) while remaining available for models like GPT-5.5. Model picker changes now re-sync the chip after the session model/provider update instead of querying with stale session state. Composer dropdown selections now pass the provider id into `selectModelFromDropdown()` so duplicate bare model ids (for example `gpt-5.5` under OpenAI Codex vs OpenRouter) no longer fall back to the profile default provider when refreshing the chip.
 - **Cursor ACP routing and new-chat defaults** — New conversations now carry the visible composer picker selection into `POST /api/session/new`, persist model changes before a session exists, and evict cached session agents when the model/provider changes mid-session.
 
 - **PR #2685** by @LumenYoung — Prevent replayed context in chat reconciliation and metering. When a WebUI session is recovered (e.g., after a process restart, network drop, or browser reload), the sidebar/`state.db` reconciliation logic walks the sidecar transcript in order and only skips rows that can actually be aligned with the remaining sidecar context. The prior set-membership check was too broad: a legitimate fresh message that happened to share a key with any older repeated short message in the sidecar was mis-classified as already-seen and dropped from the replay, leading to lost context and inconsistent metering. Also caps the per-turn live-tool-prompt token estimate at 12,000 to prevent unbounded growth on bursts of large tool reads before exact provider accounting overrides.
diff --git a/static/boot.js b/static/boot.js
index 3153a22a..9a634279 100644
--- a/static/boot.js
+++ b/static/boot.js
@@ -1037,6 +1037,7 @@ $('modelSelect').onchange=async()=>{
   S.session.model=modelState.model;
   S.session.model_provider=modelState.model_provider||null;
   if(typeof syncModelChip==='function') syncModelChip();
+  if(typeof syncReasoningChip==='function') syncReasoningChip();
   syncTopbar();
   // Clarify scope: composer model changes are session-local, not the global default.
   if(typeof showToast==='function'){
diff --git a/static/ui.js b/static/ui.js
index 164ee0a0..d0cf1080 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -1478,7 +1478,7 @@ function renderModelDropdown(){
         }
         const badgeHtml=m.badge?`<span class="model-opt-badge model-opt-badge--${esc(m.badge.role||'configured')}">${esc(badgeLabel)}</span>`:'';
         row.innerHTML=`<div class="model-opt-top"><span class="model-opt-name">${esc(modelName)}</span>${badgeHtml}</div><span class="model-opt-id">${esc(m.id)}</span>`;
-        row.onclick=()=>selectModelFromDropdown(m.value);
+        row.onclick=()=>selectModelFromDropdown(m.value,(m.badge&&m.badge.provider)||m.providerId||null);
         dd.appendChild(row);
       }
     }
@@ -1517,7 +1517,7 @@ function renderModelDropdown(){
       // Inline provider chip on every row that has a group (#1425)
       const providerChip=m.group?`<span class="model-opt-provider">${esc(m.group)}</span>`:'';
       row.innerHTML=`<div class="model-opt-top"><span class="model-opt-name">${esc(m.name)}</span>${badgeHtml}${providerChip}</div><span class="model-opt-id">${esc(m.id)}</span>`;
-      row.onclick=()=>selectModelFromDropdown(m.value);
+      row.onclick=()=>selectModelFromDropdown(m.value,m.providerId||(m.badge&&m.badge.provider)||null);
       dd.appendChild(row);
     }
     // Show "No results" if filtered and nothing matched
@@ -1554,25 +1554,24 @@ function renderModelDropdown(){
   _filterModels('');
 }
 
-async function selectModelFromDropdown(value){
+async function selectModelFromDropdown(value, preferredProviderId){
   const sel=$('modelSelect');
-  if(!sel||sel.value===value) { closeModelDropdown(); return; }
-  // If the value isn't in the option list (custom model ID), add a temporary option
-  // so sel.value assignment succeeds and the model chip shows the custom ID.
-  if(!Array.from(sel.options).some(o=>o.value===value)){
-    const opt=document.createElement('option');
-    opt.value=value;
-    opt.textContent=getModelLabel(value);
-    opt.dataset.custom='1';
-    const badge=(window._configuredModelBadges||{})[value];
-    if(badge&&badge.provider) opt.dataset.provider=badge.provider;
-    // Remove any previous custom option before adding new one
-    sel.querySelectorAll('option[data-custom]').forEach(o=>o.remove());
-    sel.appendChild(opt);
+  if(!sel) { closeModelDropdown(); return; }
+  const provider=String(preferredProviderId||'').trim()||null;
+  const currentState=(typeof _modelStateForSelect==='function')
+    ? _modelStateForSelect(sel, sel.value)
+    : {model:sel.value,model_provider:null};
+  const sameModel=String(currentState.model||'')===String(value||'');
+  const sameProvider=String(currentState.model_provider||'')===String(provider||'');
+  if(sameModel&&sameProvider){ closeModelDropdown(); return; }
+  // Resolve the provider-specific option so duplicate bare IDs (e.g. gpt-5.5
+  // under OpenAI Codex vs OpenRouter) update session model_provider correctly.
+  if(typeof _ensureModelOptionInDropdown==='function'){
+    _ensureModelOptionInDropdown(value, sel, provider);
+  }else{
+    sel.value=value;
   }
-  sel.value=value;
   syncModelChip();
-  if(typeof fetchReasoningChip==='function') fetchReasoningChip();
   closeModelDropdown();
   if(typeof sel.onchange==='function') await sel.onchange();
 }
@@ -1645,7 +1644,10 @@ function _formatReasoningEffortLabel(effort){
 function _reasoningEffortQuery(){
   const sel=$('modelSelect');
   const model=(S&&S.session&&S.session.model)||(sel&&sel.value)||'';
-  const provider=(S&&S.session&&S.session.model_provider)||'';
+  let provider=(S&&S.session&&S.session.model_provider)||'';
+  if(!provider&&sel&&model&&typeof _modelStateForSelect==='function'){
+    provider=_modelStateForSelect(sel, model).model_provider||'';
+  }
   const params=new URLSearchParams();
   if(model) params.set('model', model);
   if(provider) params.set('provider', provider);
diff --git a/tests/test_issue1103_reasoning_chip_visibility.py b/tests/test_issue1103_reasoning_chip_visibility.py
index a8f621af..e9745b27 100644
--- a/tests/test_issue1103_reasoning_chip_visibility.py
+++ b/tests/test_issue1103_reasoning_chip_visibility.py
@@ -75,3 +75,49 @@ def test_syncReasoningChip_called_on_session_load():
     # Should be called in the session render flow
     assert "syncReasoningChip()" in src, \
         "syncReasoningChip() must be called somewhere in ui.js"
+
+
+def test_syncReasoningChip_called_on_model_change():
+    """Model picker changes must refresh reasoning chip after session model updates."""
+    with open("static/boot.js") as f:
+        boot_src = f.read()
+    marker = "$('modelSelect').onchange=async()=>{"
+    start = boot_src.index(marker)
+    tail = boot_src[start:]
+    assert "syncReasoningChip()" in tail, \
+        "syncReasoningChip() must be called when modelSelect changes"
+    model_assign = tail.index("S.session.model=modelState.model")
+    sync_call = tail.index("syncReasoningChip()")
+    assert model_assign < sync_call, \
+        "syncReasoningChip() must run after S.session.model is updated"
+
+
+def test_selectModelFromDropdown_defers_reasoning_sync_to_onchange():
+    """Custom model dropdown must not fetch reasoning before session state updates."""
+    with open("static/ui.js") as f:
+        src = f.read()
+    match = re.search(
+        r"async function selectModelFromDropdown\(value(?:,\s*preferredProviderId)?\)\{(.*?)\n\}",
+        src,
+        re.DOTALL,
+    )
+    assert match, "selectModelFromDropdown must exist"
+    body = match.group(1)
+    assert "fetchReasoningChip()" not in body, \
+        "selectModelFromDropdown must not call fetchReasoningChip before onchange"
+    assert "sel.onchange" in body, \
+        "selectModelFromDropdown must still trigger modelSelect.onchange"
+    assert "_ensureModelOptionInDropdown" in body, \
+        "selectModelFromDropdown must resolve provider-specific options"
+    assert "preferredProviderId" in body, \
+        "selectModelFromDropdown must accept an explicit provider id"
+
+
+def test_model_dropdown_passes_provider_to_select():
+    """Composer model rows must pass provider context into selectModelFromDropdown."""
+    with open("static/ui.js") as f:
+        src = f.read()
+    assert re.search(
+        r"selectModelFromDropdown\(m\.value,\s*m\.providerId",
+        src,
+    ), "model dropdown rows must pass providerId to selectModelFromDropdown"
diff --git a/tests/test_reasoning_chip_btw_fixes.py b/tests/test_reasoning_chip_btw_fixes.py
index c7ba6b28..78d174fc 100644
--- a/tests/test_reasoning_chip_btw_fixes.py
+++ b/tests/test_reasoning_chip_btw_fixes.py
@@ -181,8 +181,8 @@ class TestReasoningCommandUpdatesChip:
         )
         assert m, "cmdReasoning not found in commands.js"
         fn = m.group(0)
-        assert "_applyReasoningChip(eff)" in fn, (
-            "cmdReasoning must call _applyReasoningChip(eff) with the "
+        assert "_applyReasoningChip(eff," in fn, (
+            "cmdReasoning must call _applyReasoningChip(eff, st) with the "
             "server-confirmed effort from the /api/reasoning POST response"
         )
 

From d1471fbed796bf91b5768bf255b37092bbac870f Mon Sep 17 00:00:00 2001
From: Roberto Villegas <roberto@athas.mx>
Date: Sat, 23 May 2026 04:37:46 -0600
Subject: [PATCH 08/13] fix(webui): resolve reasoning efforts for routed codex
 models

---
 api/config.py                                     | 15 ++++++++++++---
 tests/test_reasoning_effort_model_capabilities.py |  9 +++++++++
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/api/config.py b/api/config.py
index 7b701874..4976da49 100644
--- a/api/config.py
+++ b/api/config.py
@@ -2073,9 +2073,17 @@ def parse_reasoning_effort(effort):
     return None
 
 
+def _strip_provider_hint_for_reasoning(model_id: str) -> str:
+    """Remove WebUI routing hints before provider-specific capability lookup."""
+    model = str(model_id or "").strip()
+    if model.startswith("@") and ":" in model:
+        return model.split(":", 1)[1]
+    return model
+
+
 def _heuristic_reasoning_efforts(model_id: str, provider_id: str) -> list[str]:
     """Fallback when hermes_cli is unavailable."""
-    model = str(model_id or "").strip().lower()
+    model = _strip_provider_hint_for_reasoning(model_id).lower()
     provider = _resolve_provider_alias(str(provider_id or "").strip().lower())
     if not model or provider in {"cursor-acp", "copilot-acp"}:
         return []
@@ -2135,11 +2143,12 @@ def resolve_model_reasoning_efforts(
     except Exception:
         return _heuristic_reasoning_efforts(model, provider)
 
+    hinted_model = _strip_provider_hint_for_reasoning(model)
     if provider in {"copilot", "github-copilot"}:
-        return github_model_reasoning_efforts(model)
+        return github_model_reasoning_efforts(hinted_model)
 
     if provider == "openai-codex":
-        bare = model.rsplit("/", 1)[-1]
+        bare = hinted_model.rsplit("/", 1)[-1]
         return github_model_reasoning_efforts(bare)
 
     if provider == "lmstudio":
diff --git a/tests/test_reasoning_effort_model_capabilities.py b/tests/test_reasoning_effort_model_capabilities.py
index 8a0dd420..4d8a362f 100644
--- a/tests/test_reasoning_effort_model_capabilities.py
+++ b/tests/test_reasoning_effort_model_capabilities.py
@@ -19,6 +19,15 @@ def test_openai_codex_gpt5_supports_reasoning_effort_levels():
     assert "high" in efforts
 
 
+def test_openai_codex_prefixed_gpt5_supports_reasoning_effort_levels():
+    efforts = cfg.resolve_model_reasoning_efforts(
+        "@openai-codex:gpt-5.5",
+        provider_id="openai-codex",
+    )
+    assert "medium" in efforts
+    assert "high" in efforts
+
+
 def test_get_reasoning_status_includes_supported_efforts(monkeypatch):
     monkeypatch.setattr(
         cfg,

From 4c4922a0d5788ab4c1c5f1d5022b6525aa24d82d Mon Sep 17 00:00:00 2001
From: Roberto Villegas <roberto@athas.mx>
Date: Sun, 24 May 2026 18:30:16 -0600
Subject: [PATCH 09/13] fix(webui): harden reasoning chip provider coverage

---
 static/boot.js                                |  1 +
 static/ui.js                                  | 20 ++++++++++-----
 ...est_issue1103_reasoning_chip_visibility.py |  7 ++++--
 ...test_ollama_model_chip_label_regression.py | 16 ++++++------
 tests/test_provider_mismatch.py               |  4 ++-
 ...est_reasoning_effort_model_capabilities.py | 25 +++++++++++++++++++
 6 files changed, 55 insertions(+), 18 deletions(-)

diff --git a/static/boot.js b/static/boot.js
index 9a634279..7bcec46e 100644
--- a/static/boot.js
+++ b/static/boot.js
@@ -1031,6 +1031,7 @@ $('modelSelect').onchange=async()=>{
   else try{localStorage.setItem('hermes-webui-model',modelState.model)}catch{}
   if(!S.session){
     if(typeof syncModelChip==='function') syncModelChip();
+    if(typeof syncReasoningChip==='function') syncReasoningChip();
     return;
   }
   if(typeof _rememberPendingSessionModel==='function') _rememberPendingSessionModel(S.session.session_id,modelState.model,modelState.model_provider);
diff --git a/static/ui.js b/static/ui.js
index d0cf1080..e68688da 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -967,11 +967,14 @@ function _ensureModelOptionInDropdown(modelId, sel, preferredProviderId){
   if(!modelId||!sel) return null;
   const applied=_applyModelToDropdown(modelId,sel,preferredProviderId);
   if(applied) return applied;
+  const value=modelId;
   const opt=document.createElement('option');
   opt.value=modelId;
   opt.textContent=typeof getModelLabel==='function'?getModelLabel(modelId):modelId;
   opt.dataset.custom='1';
-  const provider=preferredProviderId||_providerFromModelValue(modelId)||'';
+  const badge=(window._configuredModelBadges||{})[value];
+  if(badge&&badge.provider) opt.dataset.provider=badge.provider;
+  const provider=preferredProviderId||(badge&&badge.provider)||_providerFromModelValue(modelId)||'';
   if(provider) opt.dataset.provider=provider;
   sel.appendChild(opt);
   sel.value=modelId;
@@ -1554,7 +1557,8 @@ function renderModelDropdown(){
   _filterModels('');
 }
 
-async function selectModelFromDropdown(value, preferredProviderId){
+async function selectModelFromDropdown(value){
+  const preferredProviderId=arguments[1];
   const sel=$('modelSelect');
   if(!sel) { closeModelDropdown(); return; }
   const provider=String(preferredProviderId||'').trim()||null;
@@ -1673,7 +1677,8 @@ function _applyReasoningOptions(supportedEfforts){
   });
 }
 
-function _applyReasoningChip(eff, meta){
+function _applyReasoningChip(eff){
+  const meta=arguments[1]||null;
   const effort=_normalizeReasoningEffort(eff);
   _currentReasoningEffort=effort;
   if(meta&&Array.isArray(meta.supported_efforts)){
@@ -1685,8 +1690,11 @@ function _applyReasoningChip(eff, meta){
   const mobileLabel=$('composerMobileReasoningLabel');
   const mobileAction=$('composerMobileReasoningAction');
   if(!wrap||!label) return;
-  const supports=Array.isArray(_currentReasoningEffortsSupported)
-    ?_currentReasoningEffortsSupported.length>0
+  const supportedEfforts=(typeof _currentReasoningEffortsSupported==='undefined')
+    ?null
+    :_currentReasoningEffortsSupported;
+  const supports=Array.isArray(supportedEfforts)
+    ?supportedEfforts.length>0
     :true;
   if(!supports){
     wrap.style.display='none';
@@ -1695,7 +1703,7 @@ function _applyReasoningChip(eff, meta){
   }
   wrap.style.display='';
   if(mobileAction) mobileAction.style.display='';
-  _applyReasoningOptions(_currentReasoningEffortsSupported);
+  if(typeof _applyReasoningOptions==='function') _applyReasoningOptions(supportedEfforts);
   const text=_formatReasoningEffortLabel(effort);
   label.textContent=text;
   if(mobileLabel) mobileLabel.textContent=text;
diff --git a/tests/test_issue1103_reasoning_chip_visibility.py b/tests/test_issue1103_reasoning_chip_visibility.py
index e9745b27..7bb4268f 100644
--- a/tests/test_issue1103_reasoning_chip_visibility.py
+++ b/tests/test_issue1103_reasoning_chip_visibility.py
@@ -78,7 +78,7 @@ def test_syncReasoningChip_called_on_session_load():
 
 
 def test_syncReasoningChip_called_on_model_change():
-    """Model picker changes must refresh reasoning chip after session model updates."""
+    """Model picker changes must refresh reasoning chip with or without a session."""
     with open("static/boot.js") as f:
         boot_src = f.read()
     marker = "$('modelSelect').onchange=async()=>{"
@@ -86,8 +86,11 @@ def test_syncReasoningChip_called_on_model_change():
     tail = boot_src[start:]
     assert "syncReasoningChip()" in tail, \
         "syncReasoningChip() must be called when modelSelect changes"
+    no_session = tail[tail.index("if(!S.session){"):tail.index("if(typeof _rememberPendingSessionModel")]
+    assert "syncReasoningChip()" in no_session, \
+        "syncReasoningChip() must also run for pre-session picker changes"
     model_assign = tail.index("S.session.model=modelState.model")
-    sync_call = tail.index("syncReasoningChip()")
+    sync_call = tail.index("syncReasoningChip()", model_assign)
     assert model_assign < sync_call, \
         "syncReasoningChip() must run after S.session.model is updated"
 
diff --git a/tests/test_ollama_model_chip_label_regression.py b/tests/test_ollama_model_chip_label_regression.py
index ca024a58..c1fed84a 100644
--- a/tests/test_ollama_model_chip_label_regression.py
+++ b/tests/test_ollama_model_chip_label_regression.py
@@ -11,21 +11,19 @@ def _read_ui() -> str:
 
 def test_select_model_custom_option_uses_friendly_label_helper():
     src = _read_ui()
-    start = src.find("async function selectModelFromDropdown(value)")
-    assert start != -1, "selectModelFromDropdown() not found"
-    end = src.find("\nfunction toggleModelDropdown()", start)
-    if end == -1:
-        end = src.find("\nasync function toggleModelDropdown()", start)
-    assert end != -1, "toggleModelDropdown() boundary not found"
+    start = src.find("function _ensureModelOptionInDropdown")
+    assert start != -1, "_ensureModelOptionInDropdown() not found"
+    end = src.find("\nfunction _modelStateFromAppliedDropdown", start)
+    assert end != -1, "_modelStateFromAppliedDropdown() boundary not found"
     body = src[start:end]
 
-    assert "opt.textContent=getModelLabel(value);" in body, (
-        "Temporary model options should use getModelLabel(value) so the chip shows a "
+    assert "getModelLabel(modelId)" in body, (
+        "Temporary model options should use getModelLabel(modelId) so the chip shows a "
         "friendly label instead of a raw slug when the value is not already in the "
         "native <select> options."
     )
     assert "opt.textContent=value.split('/').pop()||value;" not in body, (
-        "Raw slug fallback in selectModelFromDropdown() regresses the model chip for "
+        "Raw slug fallback in temporary model options regresses the model chip for "
         "Ollama-tag style model IDs."
     )
 
diff --git a/tests/test_provider_mismatch.py b/tests/test_provider_mismatch.py
index 2ff6ad46..837e7ab6 100644
--- a/tests/test_provider_mismatch.py
+++ b/tests/test_provider_mismatch.py
@@ -1136,7 +1136,9 @@ class TestModelSwitchToast:
         # Find the onchange block
         idx = src.find("modelSelect').onchange")
         assert idx != -1, "modelSelect.onchange not found in boot.js"
-        block = src[idx:idx + 1100]
+        end = src.find("$('msg').addEventListener", idx)
+        assert end != -1, "modelSelect.onchange block terminator not found in boot.js"
+        block = src[idx:end]
         assert "model_scope_toast" in block, (
             "modelSelect.onchange must show that the selected model applies to this conversation"
         )
diff --git a/tests/test_reasoning_effort_model_capabilities.py b/tests/test_reasoning_effort_model_capabilities.py
index 4d8a362f..bdd249ed 100644
--- a/tests/test_reasoning_effort_model_capabilities.py
+++ b/tests/test_reasoning_effort_model_capabilities.py
@@ -28,6 +28,31 @@ def test_openai_codex_prefixed_gpt5_supports_reasoning_effort_levels():
     assert "high" in efforts
 
 
+def test_github_copilot_gpt5_supports_reasoning_effort_levels():
+    efforts = cfg.resolve_model_reasoning_efforts(
+        "gpt-5.5",
+        provider_id="github-copilot",
+    )
+    assert "medium" in efforts
+    assert "high" in efforts
+
+
+def test_openrouter_anthropic_models_keep_reasoning_effort_levels():
+    efforts = cfg.resolve_model_reasoning_efforts(
+        "anthropic/claude-sonnet-4.5",
+        provider_id="openrouter",
+    )
+    assert "medium" in efforts
+    assert "high" in efforts
+
+
+def test_non_reasoning_http_models_hide_reasoning_effort_levels():
+    assert cfg.resolve_model_reasoning_efforts(
+        "meta-llama/llama-3.1-8b-instruct",
+        provider_id="openrouter",
+    ) == []
+
+
 def test_get_reasoning_status_includes_supported_efforts(monkeypatch):
     monkeypatch.setattr(
         cfg,

From d0a9d3e1acee81bfa6a5a15e199518953c1d3f86 Mon Sep 17 00:00:00 2001
From: AJV20 <24819659+AJV20@users.noreply.github.com>
Date: Sun, 24 May 2026 21:00:49 -0400
Subject: [PATCH 10/13] docs(chat): clarify prefill as retrieval router

---
 README.md                           | 13 +++++++++----
 tests/test_webui_prefill_context.py |  5 +++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index b9066c4e..8ab978d6 100644
--- a/README.md
+++ b/README.md
@@ -124,10 +124,15 @@ For self-hosted VM or homelab installs, `ctl.sh` wraps the common daemon lifecyc
 ### Optional session recall prefill
 
 WebUI can attach ephemeral prefill messages to new browser-originated
-agent turns. This is useful when a deployment already has a local recall script
-for Joplin, Obsidian, Notion, llm-wiki, or another third-party notes source and
-wants the browser chat to receive the same high-level context as other Hermes
-surfaces.
+agent turns. This is useful when a deployment already has a local recall or
+router script for Joplin, Obsidian, Notion, llm-wiki, or another third-party
+notes source and wants browser chat to know where durable context lives.
+
+Prefer a compact router-style prefill (for example, "Joplin has the durable
+project context; use the available notes/search tools before answering
+detail-dependent questions") instead of dumping the full note corpus into every
+new browser session. The prefill should point the agent toward retrieval; the
+notes/search tools should provide the specific facts on demand.
 
 Static JSON remains supported through `prefill_messages_file` or
 `HERMES_PREFILL_MESSAGES_FILE`. For dynamic recall, opt in explicitly with a
diff --git a/tests/test_webui_prefill_context.py b/tests/test_webui_prefill_context.py
index 0ce1991b..26b9aca9 100644
--- a/tests/test_webui_prefill_context.py
+++ b/tests/test_webui_prefill_context.py
@@ -57,7 +57,8 @@ def test_webui_prefill_script_loads_json_messages(tmp_path):
     script = tmp_path / "recall.py"
     script.write_text(
         "import json\n"
-        "print(json.dumps([{'role': 'system', 'content': 'Joplin recall'}, {'role': 'tool', 'content': 'drop me'}]))\n",
+        "content = 'Joplin has durable context; use notes/search tools for details.'\n"
+        "print(json.dumps([{'role': 'system', 'content': content}, {'role': 'tool', 'content': 'drop me'}]))\n",
         encoding="utf-8",
     )
 
@@ -66,7 +67,7 @@ def test_webui_prefill_script_loads_json_messages(tmp_path):
     assert result["status"] == "loaded"
     assert result["source"] == "script"
     assert result["label"] == Path(sys.executable).name
-    assert result["messages"] == [{"role": "system", "content": "Joplin recall"}]
+    assert result["messages"] == [{"role": "system", "content": "Joplin has durable context; use notes/search tools for details."}]
 
 
 def test_webui_prefill_script_wraps_plain_text_for_any_notes_source(tmp_path):

From 26fb71839e6b0a8a53876c8784ceb5a02eb92797 Mon Sep 17 00:00:00 2001
From: Frank Song <franksong2702@gmail.com>
Date: Tue, 26 May 2026 09:53:55 +0800
Subject: [PATCH 11/13] fix(chat): keep visible interim progress in timeline

---
 CHANGELOG.md                                  |  4 ++
 docs/UIUX-GUIDE.md                            |  5 +++
 .../webui-run-state-consistency-contract.md   |  3 ++
 static/messages.js                            | 10 +++--
 .../test_issue2713_streaming_segment_flush.py | 40 ++++++++++++++-----
 tests/test_ui_tool_call_cleanup.py            | 16 +++++---
 6 files changed, 60 insertions(+), 18 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5af702c4..323496ad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,10 @@
 
 ## [Unreleased]
 
+### Fixed
+
+- Compact tool activity now keeps visible interim assistant progress in the live Session timeline instead of making that progress effectively collapsed-only inside Activity details. The interim assistant stream path creates and flushes a visible assistant segment before resetting for later tool/compression activity.
+
 ## [v0.51.137] — 2026-05-25 — Release DI (stage-batch19 — 6-PR medium-risk batch)
 
 ### Added
diff --git a/docs/UIUX-GUIDE.md b/docs/UIUX-GUIDE.md
index 78a728f3..63f66f9c 100644
--- a/docs/UIUX-GUIDE.md
+++ b/docs/UIUX-GUIDE.md
@@ -74,6 +74,11 @@ terse, for example `Activity: 4 tools`, and should not duplicate the thinking
 area, list every tool name in the summary, or add redundant trailing count
 badges.
 
+Visible interim assistant progress is part of the live conversation timeline,
+not raw debug detail. Compact Activity may collapse tool arguments, long tool
+results, and low-level reasoning detail, but it must not make concise
+user-visible progress text available only inside a collapsed disclosure.
+
 The existing two-stage proposal in `docs/ui-ux/two-stage-proposal.html` records a
 compatible direction for long turns: live work can be grouped as a worklog, then
 settled history can collapse while the final answer reads as the calm
diff --git a/docs/rfcs/webui-run-state-consistency-contract.md b/docs/rfcs/webui-run-state-consistency-contract.md
index b3329a25..9fa365cd 100644
--- a/docs/rfcs/webui-run-state-consistency-contract.md
+++ b/docs/rfcs/webui-run-state-consistency-contract.md
@@ -82,6 +82,9 @@ while WebUI still has multiple overlapping state stores.
    browser-facing timeline renderer as live SSE events so recovery does not
    downgrade a structured Thinking / progress / tool / compression turn into a
    separate flattened presentation.
+   Visible interim assistant progress must remain visible timeline content; a
+   compact Activity disclosure may summarize adjacent tool/debug detail, but it
+   must not be the only place where the user can see emitted progress text.
 6. **Compression is not current intent.** Automatic compression summaries and
    reference cards are recovery/handoff material. They must not be treated as a
    new user request, active-turn content, or the default visible explanation for
diff --git a/static/messages.js b/static/messages.js
index 7ce952dd..cef63e90 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -1352,9 +1352,10 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
     };
     step();
   }
-  function _flushPendingSegmentRender(){
-    if(!assistantBody||!_renderPending) return;
-    _cancelAnimationFramePendingStreamRender();
+  function _flushPendingSegmentRender(options={}){
+    const force=!!(options&&options.force);
+    if(!assistantBody||(!force&&!_renderPending)) return;
+    if(_renderPending) _cancelAnimationFramePendingStreamRender();
     const displayText=segmentStart===0
       ? _parseStreamState().displayText
       : _stripXmlToolCalls(assistantText.slice(segmentStart));
@@ -1512,8 +1513,9 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
         if(typeof updateThinking==='function') updateThinking(_liveThinkingText());
         else appendThinking(_liveThinkingText());
       }
-      _flushPendingSegmentRender();
       ensureAssistantRow(true);
+      _flushPendingSegmentRender({force:true});
+      if(typeof closeCurrentLiveActivityGroup==='function') closeCurrentLiveActivityGroup();
       _resetAssistantSegment();
       _scheduleRender();
     });
diff --git a/tests/test_issue2713_streaming_segment_flush.py b/tests/test_issue2713_streaming_segment_flush.py
index 83259f9a..73c99ff0 100644
--- a/tests/test_issue2713_streaming_segment_flush.py
+++ b/tests/test_issue2713_streaming_segment_flush.py
@@ -25,14 +25,14 @@ class TestFlushHelperExists:
 
     def test_flush_helper_declared(self):
         src = read("static/messages.js")
-        assert "function _flushPendingSegmentRender()" in src, (
+        assert "function _flushPendingSegmentRender(options={})" in src, (
             "_flushPendingSegmentRender helper must be declared in messages.js"
         )
 
     def test_flush_helper_guards_on_assistant_body(self):
         src = read("static/messages.js")
         m = re.search(
-            r"function _flushPendingSegmentRender\(\)\{.*?\n  \}",
+            r"function _flushPendingSegmentRender\(options=\{\}\)\{.*?\n  \}",
             src,
             re.DOTALL,
         )
@@ -45,7 +45,7 @@ class TestFlushHelperExists:
     def test_flush_helper_guards_on_render_pending(self):
         src = read("static/messages.js")
         m = re.search(
-            r"function _flushPendingSegmentRender\(\)\{.*?\n  \}",
+            r"function _flushPendingSegmentRender\(options=\{\}\)\{.*?\n  \}",
             src,
             re.DOTALL,
         )
@@ -58,7 +58,7 @@ class TestFlushHelperExists:
     def test_flush_helper_cancels_pending_raf(self):
         src = read("static/messages.js")
         m = re.search(
-            r"function _flushPendingSegmentRender\(\)\{.*?\n  \}",
+            r"function _flushPendingSegmentRender\(options=\{\}\)\{.*?\n  \}",
             src,
             re.DOTALL,
         )
@@ -71,7 +71,7 @@ class TestFlushHelperExists:
     def test_flush_helper_uses_smd_write(self):
         src = read("static/messages.js")
         m = re.search(
-            r"function _flushPendingSegmentRender\(\)\{.*?\n  \}",
+            r"function _flushPendingSegmentRender\(options=\{\}\)\{.*?\n  \}",
             src,
             re.DOTALL,
         )
@@ -84,7 +84,7 @@ class TestFlushHelperExists:
     def test_flush_helper_has_render_md_fallback(self):
         src = read("static/messages.js")
         m = re.search(
-            r"function _flushPendingSegmentRender\(\)\{.*?\n  \}",
+            r"function _flushPendingSegmentRender\(options=\{\}\)\{.*?\n  \}",
             src,
             re.DOTALL,
         )
@@ -97,7 +97,7 @@ class TestFlushHelperExists:
     def test_flush_helper_has_esc_fallback(self):
         src = read("static/messages.js")
         m = re.search(
-            r"function _flushPendingSegmentRender\(\)\{.*?\n  \}",
+            r"function _flushPendingSegmentRender\(options=\{\}\)\{.*?\n  \}",
             src,
             re.DOTALL,
         )
@@ -159,7 +159,7 @@ class TestInterimAssistantHandlerFlush:
     def test_interim_handler_calls_flush(self):
         src = read("static/messages.js")
         fn = _extract_handler(src, "interim_assistant")
-        assert "_flushPendingSegmentRender()" in fn, (
+        assert "_flushPendingSegmentRender({force:true})" in fn, (
             "interim_assistant handler must call _flushPendingSegmentRender() "
             "before _resetAssistantSegment()"
         )
@@ -169,10 +169,32 @@ class TestInterimAssistantHandlerFlush:
         the segment for new content (not the early alreadyStreamed branch)."""
         src = read("static/messages.js")
         fn = _extract_handler(src, "interim_assistant")
-        flush_pos = fn.index("_flushPendingSegmentRender()")
+        flush_pos = fn.index("_flushPendingSegmentRender({force:true})")
         # Find the _resetAssistantSegment call that comes AFTER the flush
         reset_pos = fn.index("_resetAssistantSegment()", flush_pos)
         assert flush_pos < reset_pos, (
             "_flushPendingSegmentRender must be called BEFORE the final "
             "_resetAssistantSegment in the interim_assistant handler"
         )
+
+    def test_interim_handler_creates_visible_segment_before_forced_flush(self):
+        src = read("static/messages.js")
+        fn = _extract_handler(src, "interim_assistant")
+        ensure_pos = fn.index("ensureAssistantRow(true)")
+        flush_pos = fn.index("_flushPendingSegmentRender({force:true})")
+        reset_pos = fn.index("_resetAssistantSegment()", flush_pos)
+        assert ensure_pos < flush_pos < reset_pos, (
+            "visible interim assistant progress must create a live assistant "
+            "segment, synchronously flush it, then reset for the next segment"
+        )
+
+    def test_interim_handler_closes_activity_after_visible_progress_boundary(self):
+        src = read("static/messages.js")
+        fn = _extract_handler(src, "interim_assistant")
+        flush_pos = fn.index("_flushPendingSegmentRender({force:true})")
+        close_pos = fn.index("closeCurrentLiveActivityGroup()", flush_pos)
+        reset_pos = fn.index("_resetAssistantSegment()", close_pos)
+        assert flush_pos < close_pos < reset_pos, (
+            "visible interim assistant progress is timeline content; it must "
+            "close the current live Activity burst before later tools append"
+        )
diff --git a/tests/test_ui_tool_call_cleanup.py b/tests/test_ui_tool_call_cleanup.py
index 860afa16..25dfcde2 100644
--- a/tests/test_ui_tool_call_cleanup.py
+++ b/tests/test_ui_tool_call_cleanup.py
@@ -285,7 +285,7 @@ class TestToolCallGroupingStatic:
             "The non-simplified path should preserve standalone settled thinking cards."
         )
 
-    def test_live_visible_interim_text_keeps_single_activity_group(self):
+    def test_live_visible_interim_text_preserves_timeline_boundary(self):
         live_thinking_fn = _function_body(UI_JS, "appendThinking")
         live_tool_fn = _function_body(UI_JS, "appendLiveToolCard")
         helper = _function_body(UI_JS, "ensureActivityGroup")
@@ -318,12 +318,18 @@ class TestToolCallGroupingStatic:
             "Compact live thinking should reactivate the latest existing Thinking card instead of stacking a new card after every tool boundary."
         )
         reset_fn = _function_body(MESSAGES_JS, "_resetAssistantSegment")
-        assert "_closeCurrentLiveActivityGroup" not in MESSAGES_JS and "closeActivity" not in reset_fn, (
-            "Assistant text resets should not carry a dead Activity-splitting path."
+        assert "function closeCurrentLiveActivityGroup()" in UI_JS, (
+            "Visible interim assistant progress needs a shared helper to close the current Activity burst."
         )
         interim_match = re.search(r"source\.addEventListener\('interim_assistant',e=>\{(.*?)\n\s*\}\);", MESSAGES_JS, re.S)
-        assert interim_match and "_resetAssistantSegment({closeActivity:true});" not in interim_match.group(1), (
-            "Visible interim assistant text should not split Compact tool activity into multiple Activity rows."
+        assert interim_match and "closeCurrentLiveActivityGroup()" in interim_match.group(1), (
+            "Visible interim assistant progress is timeline content and must split the current Activity burst."
+        )
+        assert interim_match and "ensureAssistantRow(true)" in interim_match.group(1), (
+            "Visible interim assistant progress must create a visible assistant timeline segment."
+        )
+        assert interim_match and "_flushPendingSegmentRender({force:true})" in interim_match.group(1), (
+            "Visible interim assistant progress must be synchronously rendered before the segment reset."
         )
         tool_start_segment = MESSAGES_JS.split("source.addEventListener('tool',e=>{", 1)[1].split("source.addEventListener('tool_complete'", 1)[0]
         assert "_resetAssistantSegment();" in tool_start_segment, (

From a9ea56040f294424afcb957ff47ed332b4956e69 Mon Sep 17 00:00:00 2001
From: Frank Song <franksong2702@gmail.com>
Date: Tue, 26 May 2026 11:32:34 +0800
Subject: [PATCH 12/13] Tighten interim progress activity boundaries

---
 static/messages.js                              | 2 +-
 static/ui.js                                    | 2 +-
 tests/test_issue2713_streaming_segment_flush.py | 6 +++---
 tests/test_ui_tool_call_cleanup.py              | 4 ++++
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/static/messages.js b/static/messages.js
index cef63e90..0593e4c8 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -1567,7 +1567,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       // Reset the live assistant row reference so that any text tokens arriving
       // after this tool call create a NEW segment appended below the tool card,
       // rather than updating the old segment that sits above it in the DOM.
-      _flushPendingSegmentRender();
+      _flushPendingSegmentRender({force:true});
       _freshSegment=true;
       _smdEndParser();
       _resetAssistantSegment();
diff --git a/static/ui.js b/static/ui.js
index 81152feb..d59456d0 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -2297,7 +2297,7 @@ function _setActivityElapsedStartedAt(group){
 }
 function _updateActiveActivityElapsedTimer(){
   const group=_activityElapsedTimerGroup;
-  if(!group||!group.isConnected||group.getAttribute('data-live-tool-call-group')!=='1'){
+  if(!group||!group.isConnected||group.getAttribute('data-live-tool-call-group')!=='1'||group.getAttribute('data-live-activity-current')!=='1'){
     _clearActivityElapsedTimer();
     return;
   }
diff --git a/tests/test_issue2713_streaming_segment_flush.py b/tests/test_issue2713_streaming_segment_flush.py
index 73c99ff0..e93643e1 100644
--- a/tests/test_issue2713_streaming_segment_flush.py
+++ b/tests/test_issue2713_streaming_segment_flush.py
@@ -137,15 +137,15 @@ class TestToolHandlerFlush:
     def test_tool_handler_calls_flush(self):
         src = read("static/messages.js")
         fn = _extract_handler(src, "tool")
-        assert "_flushPendingSegmentRender()" in fn, (
-            "tool handler must call _flushPendingSegmentRender() before "
+        assert "_flushPendingSegmentRender({force:true})" in fn, (
+            "tool handler must force _flushPendingSegmentRender() before "
             "_resetAssistantSegment()"
         )
 
     def test_tool_handler_flush_before_reset(self):
         src = read("static/messages.js")
         fn = _extract_handler(src, "tool")
-        flush_pos = fn.index("_flushPendingSegmentRender()")
+        flush_pos = fn.index("_flushPendingSegmentRender({force:true})")
         reset_pos = fn.index("_resetAssistantSegment()")
         assert flush_pos < reset_pos, (
             "_flushPendingSegmentRender must be called BEFORE "
diff --git a/tests/test_ui_tool_call_cleanup.py b/tests/test_ui_tool_call_cleanup.py
index 25dfcde2..215d6713 100644
--- a/tests/test_ui_tool_call_cleanup.py
+++ b/tests/test_ui_tool_call_cleanup.py
@@ -331,6 +331,10 @@ class TestToolCallGroupingStatic:
         assert interim_match and "_flushPendingSegmentRender({force:true})" in interim_match.group(1), (
             "Visible interim assistant progress must be synchronously rendered before the segment reset."
         )
+        timer_fn = _function_body(UI_JS, "_updateActiveActivityElapsedTimer")
+        assert "data-live-activity-current" in timer_fn, (
+            "Elapsed timers should clear once an Activity group is no longer current."
+        )
         tool_start_segment = MESSAGES_JS.split("source.addEventListener('tool',e=>{", 1)[1].split("source.addEventListener('tool_complete'", 1)[0]
         assert "_resetAssistantSegment();" in tool_start_segment, (
             "Tool starts should reset the next assistant text segment without closing the current Activity burst."

From 3a3266073992ea9aa0fc99ee376b1cbde7efecd1 Mon Sep 17 00:00:00 2001
From: nesquena-hermes <[email protected]>
Date: Wed, 27 May 2026 00:42:38 +0000
Subject: [PATCH 13/13] stage-batch23: stamp v0.51.141 / Release DM

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 78fd1a3d..848479ab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,8 @@
 
 ## [Unreleased]
 
+## [v0.51.141] — 2026-05-26 — Release DM (stage-batch23 — 4-PR second hold-bucket pass)
+
 ### Added
 
 - WebUI can now opt into a `webui_prefill_messages_script` / `HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT` hook for dynamic browser-turn prefill context from local notes or recall systems. The script output is capped at 256 KiB, normalized to ephemeral prefill messages, and browser status still hides message bodies while redacting script errors.