From a48e47dd1cd53115eccdd802461df1ebb6a63d15 Mon Sep 17 00:00:00 2001 From: Michael Lam Date: Sun, 17 May 2026 20:40:20 -0700 Subject: [PATCH 01/13] feat: separate CLI sessions in sidebar --- CHANGELOG.md | 4 ++ docs/pr-media/2351/after-source-tabs.png | Bin 0 -> 2355 bytes docs/pr-media/2351/before-cli-mixed.png | Bin 0 -> 2355 bytes static/sessions.js | 59 +++++++++++++++++- static/style.css | 5 ++ ...est_issue2351_cli_session_source_filter.py | 31 +++++++++ 6 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 docs/pr-media/2351/after-source-tabs.png create mode 100644 docs/pr-media/2351/before-cli-mixed.png create mode 100644 tests/test_issue2351_cli_session_source_filter.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ed15eec3..04bf9c79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- **PR #2506** by @Michaelyklam (refs #2351) — Add a read-only WebUI/CLI session source switch in the chat sidebar when agent session sync is enabled. WebUI conversations stay in the default list, while imported CLI/agent sessions are surfaced under a separate `CLI sessions` tab with counts so large CLI histories do not clutter the normal conversation list. + ## [v0.51.91] — 2026-05-18 — Release BO (stage-384 — 5-PR full sweep batch — reasoning-replay history fix + archive-extract per-session inbox + fallback streaming warnings + sanitized custom-provider env hints + Slice 3c queue/goal adapter routing) ### Fixed diff --git a/docs/pr-media/2351/after-source-tabs.png b/docs/pr-media/2351/after-source-tabs.png new file mode 100644 index 0000000000000000000000000000000000000000..2279fd3004940274c8e3f47d4f10930c3f6cf87a GIT binary patch literal 2355 zcmeAS@N?(olHy`uVBq!ia0y~yU~FSxV7kD;1Qe0J>u{8Tf#Zdzi(^Q|oHtht1sN20 z7&gisk7E8I-qkBFB)D^TK7-Ubh7=7>jzA%SMJfs|o(>(88U$1s6}_08rm(b(N{j{x zrHR2nb~G)GX0OqLWwb~gtz|~*SDLjfJ~1*d{Qp0H!p|4LrVksa&BM&lBE@=rfyVu5 PKoJH{S3j3^P6u{8Tf#Zdzi(^Q|oHtht1sN20 z7&gisk7E8I-qkBFB)D^TK7-Ubh7=7>jzA%SMJfs|o(>(88U$1s6}_08rm(b(N{j{x zrHR2nb~G)GX0OqLWwb~gtz|~*SDLjfJ~1*d{Qp0H!p|4LrVksa&BM&lBE@=rfyVu5 PKoJH{S3j3^P60) ); + const webuiSessionCount = withMessages.filter(s=>!_isCliSession(s)).length; + const cliSessionCount = withMessages.filter(s=>_isCliSession(s)).length; + if(_sessionSourceFilter==='cli' && !window._showCliSessions && cliSessionCount===0){ + _sessionSourceFilter='webui'; + } + const sourceFiltered = _sessionSourceFilter==='cli' + ? withMessages.filter(s=>_isCliSession(s)) + : withMessages.filter(s=>!_isCliSession(s)); // The server is authoritative for profile scoping (#1611): it filters by // active profile when no query param is set, and returns the aggregate when // we send ?all_profiles=1. The renamed-root cross-alias (a row tagged @@ -2733,7 +2766,7 @@ function renderSessionListFromCache(){ // in _profiles_match, and a strict-equality client filter would reject those // rows incorrectly. So we trust the wire data and skip the redundant client // filter entirely. - const profileFiltered=withMessages; + const profileFiltered=sourceFiltered; // Filter by active project. NO_PROJECT_FILTER sentinel asks for sessions // with no project_id; otherwise filter to the matching project_id, or // pass through when no filter is active. @@ -2768,6 +2801,21 @@ function renderSessionListFromCache(){ list.appendChild(batchBar); if(_sessionSelectMode&&_selectedSessions.size>0){batchBar.style.display='flex';_renderBatchActionBar();} else{batchBar.style.display='none';} + if(window._showCliSessions || cliSessionCount>0){ + const sourceTabs=document.createElement('div'); + sourceTabs.className='session-source-tabs'; + for(const filter of ['webui','cli']){ + const count=filter==='cli'?cliSessionCount:webuiSessionCount; + const btn=document.createElement('button'); + btn.type='button'; + btn.className='session-source-tab'+(_sessionSourceFilter===filter?' active':''); + btn.textContent=_sessionSourceLabel(filter,count); + btn.setAttribute('aria-pressed', _sessionSourceFilter===filter?'true':'false'); + btn.onclick=()=>_setSessionSourceFilter(filter); + sourceTabs.appendChild(btn); + } + list.appendChild(sourceTabs); + } // Project filter bar — show when there are real projects OR there are // unassigned sessions (so the Unassigned chip has something to filter to). const hasUnprojected=profileFiltered.some(s=>!s.project_id); @@ -2850,9 +2898,14 @@ function renderSessionListFromCache(){ list.appendChild(toggle); } // Empty state for active project filter - if(_activeProject&&sessions.length===0){ + if(_sessionSourceFilter==='cli'&&sessions.length===0){ const empty=document.createElement('div'); - empty.style.cssText='padding:20px 14px;color:var(--muted);font-size:12px;text-align:center;opacity:.7;'; + empty.className='session-empty-note'; + empty.textContent=window._showCliSessions?'No CLI sessions found.':'Enable Show agent sessions in Settings to list CLI sessions here.'; + list.appendChild(empty); + } else if(_activeProject&&sessions.length===0){ + const empty=document.createElement('div'); + empty.className='session-empty-note'; empty.textContent=_activeProject===NO_PROJECT_FILTER?'No unassigned sessions.':'No sessions in this project yet.'; list.appendChild(empty); } diff --git a/static/style.css b/static/style.css index e4714e21..0921d97c 100644 --- a/static/style.css +++ b/static/style.css @@ -3024,6 +3024,11 @@ main.main.showing-logs > #mainLogs{display:flex;} .mermaid-rendered svg{max-width:100%;height:auto;} /* ── Session projects ── */ +.session-source-tabs{display:flex;gap:4px;padding:4px 10px 8px;flex-shrink:0;} +.session-source-tab{flex:1;min-width:0;border:1px solid var(--border2);border-radius:10px;background:var(--input-bg);color:var(--muted);font-size:10px;font-weight:700;line-height:1.2;padding:5px 6px;cursor:pointer;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;transition:background .15s,color .15s,border-color .15s;} +.session-source-tab:hover{background:rgba(255,255,255,.08);color:var(--text);} +.session-source-tab.active{background:var(--accent-bg);color:var(--accent-text);border-color:var(--accent-bg);} +.session-empty-note{padding:20px 14px;color:var(--muted);font-size:12px;text-align:center;opacity:.7;} .project-bar{display:flex;gap:4px;padding:4px 10px 8px;flex-wrap:wrap;align-items:center;flex-shrink:0;} .project-chip{font-size:10px;font-weight:600;padding:3px 8px;border-radius:12px;cursor:pointer;border:1px solid var(--border2);background:var(--input-bg);color:var(--muted);transition:all .15s;white-space:nowrap;display:inline-flex;align-items:center;gap:4px;} .project-chip:hover{background:rgba(255,255,255,.08);color:var(--text);} diff --git a/tests/test_issue2351_cli_session_source_filter.py b/tests/test_issue2351_cli_session_source_filter.py new file mode 100644 index 00000000..efe2a8f6 --- /dev/null +++ b/tests/test_issue2351_cli_session_source_filter.py @@ -0,0 +1,31 @@ +"""Regression coverage for issue #2351 CLI session list separation.""" +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +SESSIONS_JS = ROOT / "static" / "sessions.js" +STYLE_CSS = ROOT / "static" / "style.css" + + +def test_sidebar_has_separate_webui_and_cli_session_source_tabs(): + src = SESSIONS_JS.read_text(encoding="utf-8") + assert "let _sessionSourceFilter = 'webui'" in src + assert "hermes-session-source-filter" in src + assert "session-source-tabs" in src + assert "WebUI sessions" in src + assert "CLI sessions" in src + assert "_sessionSourceFilter==='cli'" in src + + +def test_cli_filter_keeps_cli_rows_out_of_default_webui_list(): + src = SESSIONS_JS.read_text(encoding="utf-8") + assert "const webuiSessionCount = withMessages.filter(s=>!_isCliSession(s)).length" in src + assert "const cliSessionCount = withMessages.filter(s=>_isCliSession(s)).length" in src + assert "? withMessages.filter(s=>_isCliSession(s))" in src + assert ": withMessages.filter(s=>!_isCliSession(s))" in src + + +def test_session_source_tabs_have_dedicated_sidebar_styles(): + css = STYLE_CSS.read_text(encoding="utf-8") + assert ".session-source-tabs" in css + assert ".session-source-tab.active" in css + assert ".session-empty-note" in css From fa57868431b8dc6231ca7e962653356046c70401 Mon Sep 17 00:00:00 2001 From: AJV20 <24819659+AJV20@users.noreply.github.com> Date: Sun, 24 May 2026 20:05:20 -0400 Subject: [PATCH 02/13] feat(chat): add WebUI prefill script hook --- ARCHITECTURE.md | 3 + CHANGELOG.md | 4 ++ README.md | 32 +++++++++ api/streaming.py | 106 ++++++++++++++++++++++++---- tests/test_webui_prefill_context.py | 66 ++++++++++++++++- 5 files changed, 196 insertions(+), 15 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 6b865018..c9834c36 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -122,6 +122,9 @@ Environment variables controlling behavior: HERMES_WEBUI_DEFAULT_MODEL Optional model override; unset means provider default HERMES_WEBUI_PASSWORD Optional: enable password auth (off by default) HERMES_WEBUI_SKIP_ONBOARDING Optional: bypass the first-run onboarding wizard + HERMES_PREFILL_MESSAGES_FILE Optional JSON message list for browser-turn prefill context + HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT Optional command that prints JSON messages or text prefill context + HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT_TIMEOUT Optional script timeout in seconds (default 5, max 30) HERMES_HOME Base directory for Hermes state (~/.hermes by default) Test isolation environment variables (set by conftest.py): diff --git a/CHANGELOG.md b/CHANGELOG.md index a1f805e7..bbc2614a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ ## [Unreleased] +### Added + +- WebUI can now opt into a `webui_prefill_messages_script` / `HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT` hook for dynamic browser-turn prefill context from local notes or recall systems. The script output is normalized to ephemeral prefill messages and browser status still hides message bodies while redacting script errors. + ## [v0.51.131] — 2026-05-24 — Release DC (stage-batch13 — 6-PR notes-drawer + context-parity + PWA-swipe + locale polish) ### Added diff --git a/README.md b/README.md index ccaca241..489f7b71 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,38 @@ For self-hosted VM or homelab installs, `ctl.sh` wraps the common daemon lifecyc `ctl.sh start` runs the bootstrap in foreground/no-browser mode behind the daemon wrapper, writes logs to `~/.hermes/webui.log`, and respects `.env` plus inline overrides such as `HERMES_WEBUI_HOST=0.0.0.0 ./ctl.sh start`. +### Optional session recall prefill + +WebUI can attach ephemeral prefill messages to new browser-originated +agent turns. This is useful when a deployment already has a local recall script +for Joplin, Obsidian, Notion, llm-wiki, or another third-party notes source and +wants the browser chat to receive the same high-level context as other Hermes +surfaces. + +Static JSON remains supported through `prefill_messages_file` or +`HERMES_PREFILL_MESSAGES_FILE`. For dynamic recall, opt in explicitly with a +WebUI-specific script hook: + +```yaml +webui_prefill_messages_script: + - python3 + - /path/to/notes_recall.py +webui_prefill_messages_script_timeout: 5 +``` + +or: + +```bash +HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT="python3 /path/to/notes_recall.py" \ +HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT_TIMEOUT=5 \ +./ctl.sh restart +``` + +The script may print either an OpenAI-style JSON message list, a JSON object with +a `messages` list, or plain text; plain text is wrapped as one `system` prefill +message. The browser only receives a compact status event (`source`, `label`, +message count, and redacted errors), never the prefill message bodies. + The bootstrap will: 1. Detect Hermes Agent and, if missing, attempt the official installer (`curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash`). diff --git a/api/streaming.py b/api/streaming.py index d319d9d7..6a46311e 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -10,7 +10,9 @@ import mimetypes import os import queue import re +import shlex import sys +import subprocess import threading import time import traceback @@ -285,29 +287,105 @@ def _resolve_prefill_path(raw: str) -> Path: return path +def _prefill_not_configured() -> dict: + return {"status": "not_configured", "source": "none", "label": "", "messages": [], "message_count": 0} + + +def _load_prefill_messages_file(file_raw: str, *, source: str = "file", status: str = "loaded") -> dict: + path = _resolve_prefill_path(file_raw) + label = path.name or "prefill file" + if not path.exists(): + return {"status": "error", "source": source, "label": label, "messages": [], "message_count": 0, "error": "prefill file not found"} + try: + messages = _valid_prefill_messages(json.loads(path.read_text(encoding="utf-8"))) + return {"status": status, "source": source, "label": label, "messages": messages, "message_count": len(messages)} + except Exception as exc: + return {"status": "error", "source": source, "label": label, "messages": [], "message_count": 0, "error": _redact_prefill_status_text(str(exc))} + + +def _prefill_script_timeout(config_data: dict) -> float: + raw = os.getenv("HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT_TIMEOUT", "") or str(config_data.get("webui_prefill_messages_script_timeout") or "") + try: + return max(0.1, min(float(raw or 5), 30.0)) + except Exception: + return 5.0 + + +def _prefill_script_command(raw) -> list[str]: + if isinstance(raw, (list, tuple)): + return [str(part) for part in raw if str(part)] + parts = shlex.split(str(raw or "")) + if not parts: + return [] + # A single script path mirrors prefill_messages_file path resolution. More + # complex commands keep their argv untouched so admins can pass arguments. + if len(parts) == 1: + parts[0] = str(_resolve_prefill_path(parts[0])) + return parts + + +def _messages_from_prefill_script_output(text: str) -> list[dict]: + stripped = str(text or "").strip() + if not stripped: + return [] + try: + payload = json.loads(stripped) + except Exception: + payload = None + if isinstance(payload, dict): + payload = payload.get("messages") + messages = _valid_prefill_messages(payload) + if messages: + return messages + return [{"role": "system", "content": stripped}] + + +def _load_prefill_messages_script(config_data: dict) -> dict: + script_raw = os.getenv("HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT", "") or config_data.get("webui_prefill_messages_script") + if not script_raw: + return _prefill_not_configured() + command = _prefill_script_command(script_raw) + label = Path(command[0]).name if command else "prefill script" + if not command: + return {"status": "error", "source": "script", "label": label, "messages": [], "message_count": 0, "error": "prefill script is empty"} + try: + proc = subprocess.run( + command, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=_prefill_script_timeout(config_data), + check=False, + ) + except subprocess.TimeoutExpired: + return {"status": "error", "source": "script", "label": label, "messages": [], "message_count": 0, "error": "prefill script timed out"} + except Exception as exc: + return {"status": "error", "source": "script", "label": label, "messages": [], "message_count": 0, "error": _redact_prefill_status_text(str(exc))} + if proc.returncode != 0: + err = _redact_prefill_status_text(proc.stderr or proc.stdout or f"prefill script exited {proc.returncode}") + return {"status": "error", "source": "script", "label": label, "messages": [], "message_count": 0, "error": err} + messages = _messages_from_prefill_script_output(proc.stdout) + return {"status": "loaded", "source": "script", "label": label, "messages": messages, "message_count": len(messages)} + + def _load_webui_prefill_context( config_data: Optional[dict] = None, ) -> dict: """Load configured WebUI session prefill messages. - Supports the same bounded JSON-file shape used by Hermes Agent. WebUI does - not execute a configured prefill script here; session recall that requires - code execution should go through the normal MCP/tool path instead of an - always-on per-turn subprocess before SSE starts. + Supports the same bounded JSON-file shape used by Hermes Agent. WebUI also + supports its own explicitly opt-in script hook so admins can bridge Joplin, + Obsidian, Notion, llm-wiki, or another local notes source into ephemeral + turn context without baking any one note provider into the WebUI. """ cfg = config_data if isinstance(config_data, dict) else get_config() + script_context = _load_prefill_messages_script(cfg) + if script_context.get("status") != "not_configured": + return script_context file_raw = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or str(cfg.get("prefill_messages_file") or "") if file_raw: - path = _resolve_prefill_path(file_raw) - label = path.name or "prefill file" - if not path.exists(): - return {"status": "error", "source": "file", "label": label, "messages": [], "message_count": 0, "error": "prefill file not found"} - try: - messages = _valid_prefill_messages(json.loads(path.read_text(encoding="utf-8"))) - return {"status": "loaded", "source": "file", "label": label, "messages": messages, "message_count": len(messages)} - except Exception as exc: - return {"status": "error", "source": "file", "label": label, "messages": [], "message_count": 0, "error": _redact_prefill_status_text(str(exc))} - return {"status": "not_configured", "source": "none", "label": "", "messages": [], "message_count": 0} + return _load_prefill_messages_file(file_raw) + return _prefill_not_configured() def _public_prefill_context_status(prefill_context: dict) -> dict: diff --git a/tests/test_webui_prefill_context.py b/tests/test_webui_prefill_context.py index 06a18e0c..3584aba2 100644 --- a/tests/test_webui_prefill_context.py +++ b/tests/test_webui_prefill_context.py @@ -2,6 +2,8 @@ from __future__ import annotations import json +import sys +from pathlib import Path def test_prefill_json_file_keeps_valid_roles_and_drops_invalid_items(tmp_path): @@ -32,7 +34,7 @@ def test_prefill_json_file_keeps_valid_roles_and_drops_invalid_items(tmp_path): ] -def test_prefill_script_config_is_ignored_in_webui(tmp_path): +def test_prefill_script_config_is_not_used_without_webui_opt_in(tmp_path): from api.streaming import _load_webui_prefill_context script = tmp_path / "recall.py" @@ -49,6 +51,68 @@ def test_prefill_script_config_is_ignored_in_webui(tmp_path): } +def test_webui_prefill_script_loads_json_messages(tmp_path): + from api.streaming import _load_webui_prefill_context + + script = tmp_path / "recall.py" + script.write_text( + "import json\n" + "print(json.dumps([{'role': 'system', 'content': 'Joplin recall'}, {'role': 'tool', 'content': 'drop me'}]))\n", + encoding="utf-8", + ) + + result = _load_webui_prefill_context({"webui_prefill_messages_script": [sys.executable, str(script)]}) + + assert result["status"] == "loaded" + assert result["source"] == "script" + assert result["label"] == Path(sys.executable).name + assert result["messages"] == [{"role": "system", "content": "Joplin recall"}] + + +def test_webui_prefill_script_wraps_plain_text_for_any_notes_source(tmp_path): + from api.streaming import _load_webui_prefill_context + + script = tmp_path / "obsidian_recall.py" + script.write_text("print('Obsidian project note context')\n", encoding="utf-8") + + result = _load_webui_prefill_context({"webui_prefill_messages_script": [sys.executable, str(script)]}) + + assert result["status"] == "loaded" + assert result["source"] == "script" + assert result["messages"] == [{"role": "system", "content": "Obsidian project note context"}] + + +def test_webui_prefill_script_errors_are_redacted(tmp_path): + from api.streaming import _load_webui_prefill_context + + script = tmp_path / "bad_recall.py" + script.write_text("import sys; print('token=redaction-test-placeholder', file=sys.stderr); raise SystemExit(2)\n", encoding="utf-8") + + result = _load_webui_prefill_context({"webui_prefill_messages_script": [sys.executable, str(script)]}) + + assert result["status"] == "error" + assert result["source"] == "script" + assert "redaction-test-placeholder" not in result["error"] + assert "[REDACTED]" in result["error"] + + +def test_webui_prefill_script_takes_precedence_over_static_file(tmp_path): + from api.streaming import _load_webui_prefill_context + + prefill = tmp_path / "prefill.json" + prefill.write_text(json.dumps([{"role": "system", "content": "static"}]), encoding="utf-8") + script = tmp_path / "recall.py" + script.write_text("print('dynamic')\n", encoding="utf-8") + + result = _load_webui_prefill_context({ + "prefill_messages_file": str(prefill), + "webui_prefill_messages_script": [sys.executable, str(script)], + }) + + assert result["source"] == "script" + assert result["messages"] == [{"role": "system", "content": "dynamic"}] + + def test_public_prefill_status_strips_message_bodies(): from api.streaming import _public_prefill_context_status From befee0e035ff6fa5d42db14e16542a5c01436e2d Mon Sep 17 00:00:00 2001 From: AJV20 <24819659+AJV20@users.noreply.github.com> Date: Sun, 24 May 2026 20:20:28 -0400 Subject: [PATCH 03/13] test(chat): harden WebUI prefill script hook --- CHANGELOG.md | 2 +- README.md | 5 +++-- api/streaming.py | 12 +++++++++++ tests/test_webui_prefill_context.py | 33 +++++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbc2614a..e284758e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ ### Added -- WebUI can now opt into a `webui_prefill_messages_script` / `HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT` hook for dynamic browser-turn prefill context from local notes or recall systems. The script output is normalized to ephemeral prefill messages and browser status still hides message bodies while redacting script errors. +- WebUI can now opt into a `webui_prefill_messages_script` / `HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT` hook for dynamic browser-turn prefill context from local notes or recall systems. The script output is capped at 256 KiB, normalized to ephemeral prefill messages, and browser status still hides message bodies while redacting script errors. ## [v0.51.131] — 2026-05-24 — Release DC (stage-batch13 — 6-PR notes-drawer + context-parity + PWA-swipe + locale polish) diff --git a/README.md b/README.md index 489f7b71..b9066c4e 100644 --- a/README.md +++ b/README.md @@ -150,8 +150,9 @@ HERMES_WEBUI_PREFILL_MESSAGES_SCRIPT_TIMEOUT=5 \ The script may print either an OpenAI-style JSON message list, a JSON object with a `messages` list, or plain text; plain text is wrapped as one `system` prefill -message. The browser only receives a compact status event (`source`, `label`, -message count, and redacted errors), never the prefill message bodies. +message. Script output is capped at 256 KiB before parsing. The browser only +receives a compact status event (`source`, `label`, message count, and redacted +errors), never the prefill message bodies. The bootstrap will: diff --git a/api/streaming.py b/api/streaming.py index 6a46311e..a0531da3 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -287,6 +287,9 @@ def _resolve_prefill_path(raw: str) -> Path: return path +_PREFILL_SCRIPT_OUTPUT_LIMIT = 262_144 + + def _prefill_not_configured() -> dict: return {"status": "not_configured", "source": "none", "label": "", "messages": [], "message_count": 0} @@ -364,6 +367,15 @@ def _load_prefill_messages_script(config_data: dict) -> dict: if proc.returncode != 0: err = _redact_prefill_status_text(proc.stderr or proc.stdout or f"prefill script exited {proc.returncode}") return {"status": "error", "source": "script", "label": label, "messages": [], "message_count": 0, "error": err} + if len(proc.stdout.encode("utf-8")) > _PREFILL_SCRIPT_OUTPUT_LIMIT: + return { + "status": "error", + "source": "script", + "label": label, + "messages": [], + "message_count": 0, + "error": f"prefill script output exceeded {_PREFILL_SCRIPT_OUTPUT_LIMIT} bytes", + } messages = _messages_from_prefill_script_output(proc.stdout) return {"status": "loaded", "source": "script", "label": label, "messages": messages, "message_count": len(messages)} diff --git a/tests/test_webui_prefill_context.py b/tests/test_webui_prefill_context.py index 3584aba2..0ce1991b 100644 --- a/tests/test_webui_prefill_context.py +++ b/tests/test_webui_prefill_context.py @@ -113,6 +113,39 @@ def test_webui_prefill_script_takes_precedence_over_static_file(tmp_path): assert result["messages"] == [{"role": "system", "content": "dynamic"}] +def test_webui_prefill_script_timeout_returns_redacted_error(tmp_path): + from api.streaming import _load_webui_prefill_context + + script = tmp_path / "slow_recall.py" + script.write_text("import time\ntime.sleep(1)\nprint('too late')\n", encoding="utf-8") + + result = _load_webui_prefill_context({ + "webui_prefill_messages_script": [sys.executable, str(script)], + "webui_prefill_messages_script_timeout": 0.1, + }) + + assert result["status"] == "error" + assert result["source"] == "script" + assert result["messages"] == [] + assert result["message_count"] == 0 + assert result["error"] == "prefill script timed out" + + +def test_webui_prefill_script_rejects_oversized_stdout(tmp_path): + from api.streaming import _load_webui_prefill_context + + script = tmp_path / "large_recall.py" + script.write_text("print('x' * 262145)\n", encoding="utf-8") + + result = _load_webui_prefill_context({"webui_prefill_messages_script": [sys.executable, str(script)]}) + + assert result["status"] == "error" + assert result["source"] == "script" + assert result["messages"] == [] + assert result["message_count"] == 0 + assert "output exceeded" in result["error"] + + def test_public_prefill_status_strips_message_bodies(): from api.streaming import _public_prefill_context_status From aee376323f4c93bde5ed5dc2a3f8a5a15e6bd6a2 Mon Sep 17 00:00:00 2001 From: Roberto Villegas Date: Sat, 23 May 2026 02:00:39 -0600 Subject: [PATCH 04/13] feat(cursor-acp): add cursor-acp to WebUI model picker - Add cursor-acp to _PROVIDER_DISPLAY with label 'Cursor ACP' - Add cursor-acp static model list to _PROVIDER_MODELS - composer-2.5, composer-2, default, cursor-acp --- api/config.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/api/config.py b/api/config.py index 66bcb570..33a9e3c8 100644 --- a/api/config.py +++ b/api/config.py @@ -694,6 +694,7 @@ _PROVIDER_DISPLAY = { "openai-codex": "OpenAI Codex", "xai-oauth": "xAI Grok OAuth", "copilot": "GitHub Copilot", + "cursor-acp": "Cursor ACP", "zai": "Z.AI / GLM", "kimi-coding": "Kimi / Moonshot", "deepseek": "DeepSeek", @@ -1116,6 +1117,13 @@ _PROVIDER_MODELS = { {"id": "claude-sonnet-4.6", "label": "Claude Sonnet 4.6"}, {"id": "gemini-3-flash-preview", "label": "Gemini 3 Flash Preview"}, ], + # Cursor ACP — models served via Cursor CLI agent acp + "cursor-acp": [ + {"id": "cursor/composer-2.5", "label": "Composer 2.5"}, + {"id": "cursor/composer-2", "label": "Composer 2"}, + {"id": "cursor/default", "label": "Default"}, + {"id": "cursor-acp", "label": "Cursor ACP"}, + ], # OpenCode Zen — curated models via opencode.ai/zen (pay-as-you-go credits) "opencode-zen": [ {"id": "gpt-5.4-pro", "label": "GPT-5.4 Pro"}, From ef5eafcceb0a3a184f83c14a2910af682b9f31e4 Mon Sep 17 00:00:00 2001 From: Roberto Villegas Date: Sat, 23 May 2026 03:59:03 -0600 Subject: [PATCH 05/13] fix(cursor-acp): route slash models and honor picker on new chat Ensure cursor/composer IDs always resolve via @cursor-acp:, carry the visible picker selection into POST /api/session/new, persist model changes before a session exists, and evict cached agents on model switch. Co-authored-by: Cursor --- CHANGELOG.md | 2 ++ api/config.py | 11 +++++++ api/routes.py | 3 ++ static/boot.js | 5 ++- static/sessions.js | 14 +++++++++ tests/test_new_chat_default_model_frontend.py | 15 +++++++++ tests/test_provider_mismatch.py | 31 +++++++++++++++++-- 7 files changed, 77 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 494f2eca..1bd8350b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,12 +6,14 @@ ## [v0.51.132] — 2026-05-24 — Release DD (stage-batch14 — 4-PR replayed-context + interrupted-response + shutdown affordance + passkey opt-in) ### Added +- **Cursor ACP provider integration** — Add `cursor-acp` to the WebUI model picker and route slash model IDs (for example `cursor/composer-2.5`) through explicit `@cursor-acp:` provider hints so they do not fall through to the configured default HTTP provider. - **PR #2859** by @AJV20 — Optional passkey/WebAuthn sign-in for password-protected WebUI instances. Authenticated users can register/remove passkeys from Settings -> System, and `/login` shows a passwordless sign-in button only after a passkey exists. Password auth remains the default-off bootstrap and recovery path. **Opt-in default-off behind `HERMES_WEBUI_PASSKEY=1` env var or `webui_passkey_enabled: true` config flag** — when disabled, the UI block hides, all 6 `/api/auth/passkey/*` endpoints return 404, and `is_auth_enabled()` ignores any pre-existing credential file so the auth posture cannot silently flip if the flag is unset later. - **PR #2824** by @gavinssr — A "Stop server" affordance in Settings → System that gracefully shuts down the local WebUI server. Useful when WebUI was launched via `./ctl.sh start` or the native macOS/Windows app and the user wants to stop it without context-switching to a terminal. Confirmation dialog before the actual shutdown. The `/api/shutdown` route is CSRF-gated and intended for local-loopback use. Originally a title-bar button; relocated to Settings per the project's deep-UX rule (default-hidden for niche destructive actions on always-visible surfaces). ### Fixed +- **Cursor ACP routing and new-chat defaults** — New conversations now carry the visible composer picker selection into `POST /api/session/new`, persist model changes before a session exists, and evict cached session agents when the model/provider changes mid-session. - **PR #2685** by @LumenYoung — Prevent replayed context in chat reconciliation and metering. When a WebUI session is recovered (e.g., after a process restart, network drop, or browser reload), the sidebar/`state.db` reconciliation logic walks the sidecar transcript in order and only skips rows that can actually be aligned with the remaining sidecar context. The prior set-membership check was too broad: a legitimate fresh message that happened to share a key with any older repeated short message in the sidecar was mis-classified as already-seen and dropped from the replay, leading to lost context and inconsistent metering. Also caps the per-turn live-tool-prompt token estimate at 12,000 to prevent unbounded growth on bursts of large tool reads before exact provider accounting overrides. diff --git a/api/config.py b/api/config.py index 33a9e3c8..1dc55d07 100644 --- a/api/config.py +++ b/api/config.py @@ -1980,6 +1980,12 @@ def resolve_custom_provider_connection(provider_id: str) -> tuple[str | None, st return None, None +# Subprocess ACP transports (Cursor/Copilot CLI). Model IDs often contain '/' +# but must still route via explicit @provider:model so they do not fall through +# to the configured default HTTP provider (e.g. openai-codex). +_ACP_SUBPROCESS_PROVIDERS = frozenset({"cursor-acp", "copilot-acp"}) + + def model_with_provider_context(model_id: str, model_provider: str | None = None) -> str: """Return the model string to pass to ``resolve_model_provider()``. @@ -1999,6 +2005,11 @@ def model_with_provider_context(model_id: str, model_provider: str | None = None if isinstance(model_cfg, dict): config_provider = str(model_cfg.get("provider") or "").strip().lower() + # ACP subprocess providers always need the explicit hint — their slash IDs + # are not OpenRouter paths and must not inherit config_provider routing. + if provider in _ACP_SUBPROCESS_PROVIDERS: + return f"@{provider}:{model}" + # If the selected provider is already the configured provider, leaving the # model bare preserves provider-specific base_url/proxy settings. if provider == config_provider: diff --git a/api/routes.py b/api/routes.py index 0b623ed8..27ca8875 100644 --- a/api/routes.py +++ b/api/routes.py @@ -5307,6 +5307,9 @@ def handle_post(handler, parsed) -> bool: ) s.threshold_tokens = 0 s.last_prompt_tokens = 0 + from api.config import _evict_session_agent + + _evict_session_agent(body["session_id"]) s.save() if str(old_ws or "") != str(new_ws or ""): try: diff --git a/static/boot.js b/static/boot.js index eaadc8cd..3153a22a 100644 --- a/static/boot.js +++ b/static/boot.js @@ -1022,7 +1022,6 @@ function _applySessionContextMetadataUpdate(data){ } $('modelSelect').onchange=async()=>{ - if(!S.session)return; const selectedModel=$('modelSelect').value; const modelState=(typeof _modelStateForSelect==='function') ? _modelStateForSelect($('modelSelect'),selectedModel) @@ -1030,6 +1029,10 @@ $('modelSelect').onchange=async()=>{ if(typeof closeModelDropdown==='function') closeModelDropdown(); if(typeof _writePersistedModelState==='function') _writePersistedModelState(modelState.model,modelState.model_provider); else try{localStorage.setItem('hermes-webui-model',modelState.model)}catch{} + if(!S.session){ + if(typeof syncModelChip==='function') syncModelChip(); + return; + } if(typeof _rememberPendingSessionModel==='function') _rememberPendingSessionModel(S.session.session_id,modelState.model,modelState.model_provider); S.session.model=modelState.model; S.session.model_provider=modelState.model_provider||null; diff --git a/static/sessions.js b/static/sessions.js index 83d8c5c4..30b9ed12 100644 --- a/static/sessions.js +++ b/static/sessions.js @@ -470,6 +470,20 @@ async function newSession(flash, options={}){ if(S.session&&S.session.session_id) reqBody.prev_session_id=S.session.session_id; if(options&&options.worktree) reqBody.worktree=true; if(_activeProject&&_activeProject!==NO_PROJECT_FILTER) reqBody.project_id=_activeProject; + // Carry the visible picker selection into the new session. Without this, + // /api/session/new falls back to config.yaml defaults (e.g. gpt-5.5) even + // when the user already chose cursor/composer-2.5 in the composer chip. + const modelSelForNew=$('modelSelect'); + let newModelState=null; + if(modelSelForNew&&modelSelForNew.value&&typeof _modelStateForSelect==='function'){ + newModelState=_modelStateForSelect(modelSelForNew,modelSelForNew.value); + }else if(typeof _readPersistedModelState==='function'){ + newModelState=_readPersistedModelState(); + } + if(newModelState&&newModelState.model){ + reqBody.model=newModelState.model; + reqBody.model_provider=newModelState.model_provider||null; + } const data=await api('/api/session/new',{method:'POST',body:JSON.stringify(reqBody)}); S.session=data.session;S.messages=data.session.messages||[]; S.lastUsage={...(data.session.last_usage||{})}; diff --git a/tests/test_new_chat_default_model_frontend.py b/tests/test_new_chat_default_model_frontend.py index 267c3105..91a29e3d 100644 --- a/tests/test_new_chat_default_model_frontend.py +++ b/tests/test_new_chat_default_model_frontend.py @@ -94,6 +94,21 @@ def test_new_chat_does_not_send_stale_dropdown_model_when_session_has_default_mo assert "model_provider:S.session.model_provider||null" in MESSAGES_JS +def test_new_session_posts_picker_model_before_server_default(): + fn = _new_session_function() + assert "reqBody.model=newModelState.model" in fn + assert "reqBody.model_provider=newModelState.model_provider||null" in fn + assert "_readPersistedModelState" in fn + + +def test_model_picker_persists_without_active_session(): + boot_js = Path("static/boot.js").read_text(encoding="utf-8") + body = boot_js[boot_js.index("$('modelSelect').onchange=async()=>") : boot_js.index("$('msg').addEventListener", boot_js.index("$('modelSelect').onchange=async()=>"))] + assert "_writePersistedModelState(modelState.model,modelState.model_provider)" in body + assert "if(!S.session){" in body + assert body.index("if(!S.session){") < body.index("await api('/api/session/update'") + + def test_changelog_mentions_new_chat_default_model_provider_sync(): unreleased = CHANGELOG.split("## [v0.51.103]", 1)[0] assert "New conversations now resync" in unreleased diff --git a/tests/test_provider_mismatch.py b/tests/test_provider_mismatch.py index dbd2c019..2ff6ad46 100644 --- a/tests/test_provider_mismatch.py +++ b/tests/test_provider_mismatch.py @@ -522,6 +522,31 @@ def test_non_openrouter_slash_model_provider_context_stays_unqualified(): assert runtime_model == "anthropic/claude-sonnet-4.6" +def test_cursor_acp_slash_model_always_gets_provider_hint(): + """ACP subprocess models with '/' must not fall through to config default.""" + import api.config as config + + old_cfg = dict(config.cfg) + config.cfg["model"] = { + "provider": "openai-codex", + "default": "gpt-5.5", + } + try: + runtime_model = config.model_with_provider_context( + "cursor/composer-2.5", + "cursor-acp", + ) + model, provider, base_url = config.resolve_model_provider(runtime_model) + finally: + config.cfg.clear() + config.cfg.update(old_cfg) + + assert runtime_model == "@cursor-acp:cursor/composer-2.5" + assert model == "cursor/composer-2.5" + assert provider == "cursor-acp" + assert base_url is None + + def test_api_session_new_persists_model_provider_context(): """POST /api/session/new returns compact session model_provider metadata.""" created, status = _post( @@ -1171,13 +1196,13 @@ class TestFrontendModelProviderState: assert "_modelStateForSelect" in src assert "model_provider:modelState.model_provider||null" in src - def test_new_session_lets_profile_config_choose_default_model_provider(self): + def test_new_session_carries_visible_picker_model_into_create_request(self): src = _read("static/sessions.js") start = src.index("async function newSession(") body = src[start:src.index("const data=await api('/api/session/new'", start)] assert "profile:S.activeProfile||'default'" in body - assert "model:newModelState.model" not in body - assert "model_provider:newModelState.model_provider||null" not in body + assert "reqBody.model=newModelState.model" in body + assert "reqBody.model_provider=newModelState.model_provider||null" in body def test_ui_has_json_model_state_storage(self): src = _read("static/ui.js") From a9ce2889affdadda3bfaa785bbaca37d59a35081 Mon Sep 17 00:00:00 2001 From: Roberto Villegas Date: Sat, 23 May 2026 03:59:45 -0600 Subject: [PATCH 06/13] fix(ui): hide reasoning chip when model lacks effort levels Resolve supported reasoning efforts per active model/provider and pass that context through /api/reasoning so Composer and other non-configurable models no longer show a misleading effort picker. Co-authored-by: Cursor --- CHANGELOG.md | 3 + api/config.py | 114 +++++++++++++++++- api/routes.py | 13 +- static/commands.js | 5 +- static/ui.js | 56 +++++++-- ...est_issue1103_reasoning_chip_visibility.py | 9 +- tests/test_reasoning_chip_btw_fixes.py | 15 +-- ...est_reasoning_effort_model_capabilities.py | 33 +++++ 8 files changed, 224 insertions(+), 24 deletions(-) create mode 100644 tests/test_reasoning_effort_model_capabilities.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bd8350b..4c7a2501 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ - **PR #2824** by @gavinssr — A "Stop server" affordance in Settings → System that gracefully shuts down the local WebUI server. Useful when WebUI was launched via `./ctl.sh start` or the native macOS/Windows app and the user wants to stop it without context-switching to a terminal. Confirmation dialog before the actual shutdown. The `/api/shutdown` route is CSRF-gated and intended for local-loopback use. Originally a title-bar button; relocated to Settings per the project's deep-UX rule (default-hidden for niche destructive actions on always-visible surfaces). ### Fixed + +### Fixed +- **Reasoning effort chip visibility** — `/api/reasoning` now accepts `model` and `provider` query params and returns `supported_efforts` so the composer chip hides for models without configurable reasoning levels (for example Cursor Composer) while remaining available for models like GPT-5.5. - **Cursor ACP routing and new-chat defaults** — New conversations now carry the visible composer picker selection into `POST /api/session/new`, persist model changes before a session exists, and evict cached session agents when the model/provider changes mid-session. - **PR #2685** by @LumenYoung — Prevent replayed context in chat reconciliation and metering. When a WebUI session is recovered (e.g., after a process restart, network drop, or browser reload), the sidebar/`state.db` reconciliation logic walks the sidecar transcript in order and only skips rows that can actually be aligned with the remaining sidecar context. The prior set-membership check was too broad: a legitimate fresh message that happened to share a key with any older repeated short message in the sidecar was mis-classified as already-seen and dropped from the replay, leading to lost context and inconsistent metering. Also caps the per-turn live-tool-prompt token estimate at 12,000 to prevent unbounded growth on bursts of large tool reads before exact provider accounting overrides. diff --git a/api/config.py b/api/config.py index 1dc55d07..7b701874 100644 --- a/api/config.py +++ b/api/config.py @@ -2073,7 +2073,112 @@ def parse_reasoning_effort(effort): return None -def get_reasoning_status() -> dict: +def _heuristic_reasoning_efforts(model_id: str, provider_id: str) -> list[str]: + """Fallback when hermes_cli is unavailable.""" + model = str(model_id or "").strip().lower() + provider = _resolve_provider_alias(str(provider_id or "").strip().lower()) + if not model or provider in {"cursor-acp", "copilot-acp"}: + return [] + bare = model.rsplit("/", 1)[-1] + if provider == "openai-codex" and bare.startswith(("gpt-5", "o1", "o3", "o4")): + if bare.startswith(("o1", "o3", "o4")): + return ["low", "medium", "high"] + return list(VALID_REASONING_EFFORTS) + if provider in {"copilot", "github-copilot"}: + if bare.startswith(("gpt-5", "o1", "o3", "o4")): + if bare.startswith(("o1", "o3", "o4")): + return ["low", "medium", "high"] + return list(VALID_REASONING_EFFORTS) + prefixes = ( + "deepseek/", + "anthropic/", + "openai/", + "x-ai/", + "google/gemini-2", + "google/gemma-4", + "qwen/qwen3", + "tencent/hy3-preview", + "xiaomi/", + ) + if any(model.startswith(prefix) for prefix in prefixes): + return list(VALID_REASONING_EFFORTS) + return [] + + +def resolve_model_reasoning_efforts( + model_id: str | None = None, + provider_id: str | None = None, + base_url: str | None = None, +) -> list[str]: + """Return supported reasoning-effort levels for *model_id*, or [] if none.""" + model = str(model_id or "").strip() + if not model: + return [] + + provider = str(provider_id or "").strip().lower() if provider_id else "" + resolved_base_url = str(base_url or "").strip() or None + if not provider: + try: + _, provider, resolved_base_url = resolve_model_provider(model) + except Exception: + provider = str((cfg.get("model") or {}).get("provider") or "").strip().lower() + + provider = _resolve_provider_alias(provider) + if provider in {"cursor-acp", "copilot-acp"}: + return [] + + try: + from hermes_cli.models import ( + github_model_reasoning_efforts, + lmstudio_model_reasoning_options, + ) + except Exception: + return _heuristic_reasoning_efforts(model, provider) + + if provider in {"copilot", "github-copilot"}: + return github_model_reasoning_efforts(model) + + if provider == "openai-codex": + bare = model.rsplit("/", 1)[-1] + return github_model_reasoning_efforts(bare) + + if provider == "lmstudio": + probe_base = resolved_base_url or _get_provider_base_url(provider) + opts = lmstudio_model_reasoning_options(model, probe_base) + normalized = [str(opt).strip().lower() for opt in opts if str(opt).strip()] + if not normalized or set(normalized).issubset({"off"}): + return [] + level_opts = [opt for opt in normalized if opt in VALID_REASONING_EFFORTS] + if level_opts: + return list(dict.fromkeys(level_opts)) + if set(normalized).issubset({"off", "on"}): + return [] + return [] + + model_lower = model.lower() + prefixes = ( + "deepseek/", + "anthropic/", + "openai/", + "x-ai/", + "google/gemini-2", + "google/gemma-4", + "qwen/qwen3", + "tencent/hy3-preview", + "xiaomi/", + ) + if any(model_lower.startswith(prefix) for prefix in prefixes): + return list(VALID_REASONING_EFFORTS) + + return [] + + +def get_reasoning_status( + *, + model_id: str | None = None, + provider_id: str | None = None, + base_url: str | None = None, +) -> dict: """Return current reasoning configuration from the active profile's config.yaml — the same source of truth the CLI reads from. @@ -2086,10 +2191,17 @@ def get_reasoning_status() -> dict: agent_cfg = config_data.get("agent") or {} show_raw = display_cfg.get("show_reasoning") if isinstance(display_cfg, dict) else None effort_raw = agent_cfg.get("reasoning_effort") if isinstance(agent_cfg, dict) else None + supported_efforts = resolve_model_reasoning_efforts( + model_id, + provider_id=provider_id, + base_url=base_url, + ) return { # Match CLI default (True if unset in config.yaml) "show_reasoning": bool(show_raw) if isinstance(show_raw, bool) else True, "reasoning_effort": str(effort_raw or "").strip().lower(), + "supported_efforts": supported_efforts, + "supports_reasoning_effort": bool(supported_efforts), } diff --git a/api/routes.py b/api/routes.py index 27ca8875..048579a7 100644 --- a/api/routes.py +++ b/api/routes.py @@ -3918,7 +3918,18 @@ def handle_get(handler, parsed) -> bool: # Current reasoning config (shared source of truth with the CLI — # reads display.show_reasoning and agent.reasoning_effort from # the active profile's config.yaml). - return j(handler, get_reasoning_status()) + query = parse_qs(parsed.query) + model_id = (query.get("model", [""])[0] or "").strip() or None + provider_id = (query.get("provider", [""])[0] or "").strip() or None + base_url = (query.get("base_url", [""])[0] or "").strip() or None + return j( + handler, + get_reasoning_status( + model_id=model_id, + provider_id=provider_id, + base_url=base_url, + ), + ) if parsed.path == "/api/onboarding/status": return j(handler, get_onboarding_status()) diff --git a/static/commands.js b/static/commands.js index f7705546..73966304 100644 --- a/static/commands.js +++ b/static/commands.js @@ -1141,7 +1141,8 @@ function cmdReasoning(args){ } if(!arg){ // Status — read from the same config.yaml keys the CLI uses. - api('/api/reasoning').then(function(st){showToast(_fmtStatus(st));}) + const q=(typeof _reasoningEffortQuery==='function')?_reasoningEffortQuery():''; + api('/api/reasoning'+q).then(function(st){showToast(_fmtStatus(st));}) .catch(function(){showToast(BRAIN+' /reasoning — status unavailable');}); return true; } @@ -1168,7 +1169,7 @@ function cmdReasoning(args){ .then(function(st){ const eff=(st && st.reasoning_effort)||arg; showToast(BRAIN+' Reasoning effort: '+eff+' (saved; applies to next turn)'); - if(typeof _applyReasoningChip==='function') _applyReasoningChip(eff); + if(typeof _applyReasoningChip==='function') _applyReasoningChip(eff, st||{}); }) .catch(function(e){ showToast(BRAIN+' Failed to set effort: '+(e && e.message ? e.message : arg)); diff --git a/static/ui.js b/static/ui.js index 88b5365c..164ee0a0 100644 --- a/static/ui.js +++ b/static/ui.js @@ -1572,6 +1572,7 @@ async function selectModelFromDropdown(value){ } sel.value=value; syncModelChip(); + if(typeof fetchReasoningChip==='function') fetchReasoningChip(); closeModelDropdown(); if(typeof sel.onchange==='function') await sel.onchange(); } @@ -1629,6 +1630,7 @@ window.addEventListener('resize',()=>{ // ── Reasoning effort chip ──────────────────────────────────────────────────── let _currentReasoningEffort=null; +let _currentReasoningEffortsSupported=null; function _normalizeReasoningEffort(eff){ return String(eff||'').trim().toLowerCase(); @@ -1640,17 +1642,58 @@ function _formatReasoningEffortLabel(effort){ return effort; } -function _applyReasoningChip(eff){ +function _reasoningEffortQuery(){ + const sel=$('modelSelect'); + const model=(S&&S.session&&S.session.model)||(sel&&sel.value)||''; + const provider=(S&&S.session&&S.session.model_provider)||''; + const params=new URLSearchParams(); + if(model) params.set('model', model); + if(provider) params.set('provider', provider); + const qs=params.toString(); + return qs?('?'+qs):''; +} + +function _applyReasoningOptions(supportedEfforts){ + const dd=$('composerReasoningDropdown'); + if(!dd) return; + const supported=new Set(Array.isArray(supportedEfforts)?supportedEfforts:[]); + dd.querySelectorAll('.reasoning-option').forEach(function(opt){ + const effort=opt.dataset.effort; + if(effort==='none'){ + opt.style.display=''; + return; + } + if(!supported.size){ + opt.style.display='none'; + return; + } + opt.style.display=supported.has(effort)?'':'none'; + }); +} + +function _applyReasoningChip(eff, meta){ const effort=_normalizeReasoningEffort(eff); _currentReasoningEffort=effort; + if(meta&&Array.isArray(meta.supported_efforts)){ + _currentReasoningEffortsSupported=meta.supported_efforts; + } const wrap=$('composerReasoningWrap'); const label=$('composerReasoningLabel'); const chip=$('composerReasoningChip'); const mobileLabel=$('composerMobileReasoningLabel'); const mobileAction=$('composerMobileReasoningAction'); if(!wrap||!label) return; + const supports=Array.isArray(_currentReasoningEffortsSupported) + ?_currentReasoningEffortsSupported.length>0 + :true; + if(!supports){ + wrap.style.display='none'; + if(mobileAction) mobileAction.style.display='none'; + return; + } wrap.style.display=''; if(mobileAction) mobileAction.style.display=''; + _applyReasoningOptions(_currentReasoningEffortsSupported); const text=_formatReasoningEffortLabel(effort); label.textContent=text; if(mobileLabel) mobileLabel.textContent=text; @@ -1664,14 +1707,13 @@ function _applyReasoningChip(eff){ } function fetchReasoningChip(){ - api('/api/reasoning').then(function(st){ - _applyReasoningChip((st&&st.reasoning_effort)||''); - }).catch(function(){_applyReasoningChip('');}); + api('/api/reasoning'+_reasoningEffortQuery()).then(function(st){ + _applyReasoningChip((st&&st.reasoning_effort)||'', st||{}); + }).catch(function(){_applyReasoningChip('', {supported_efforts:[]});}); } function syncReasoningChip(){ - if(_currentReasoningEffort===null){fetchReasoningChip();return;} - _applyReasoningChip(_currentReasoningEffort); + fetchReasoningChip(); } function _highlightReasoningOption(effort){ @@ -1737,7 +1779,7 @@ document.addEventListener('click',function(e){ if(effort){ api('/api/reasoning',{method:'POST',body:JSON.stringify({effort:effort})}) .then(function(st){ - _applyReasoningChip((st&&st.reasoning_effort)||effort); + _applyReasoningChip((st&&st.reasoning_effort)||effort, st||{}); showToast('🧠 Reasoning effort set to '+((st&&st.reasoning_effort)||effort)); }) .catch(function(){showToast('🧠 Failed to set effort');}); diff --git a/tests/test_issue1103_reasoning_chip_visibility.py b/tests/test_issue1103_reasoning_chip_visibility.py index 31f83606..a8f621af 100644 --- a/tests/test_issue1103_reasoning_chip_visibility.py +++ b/tests/test_issue1103_reasoning_chip_visibility.py @@ -47,12 +47,13 @@ def test_reasoning_chip_html_starts_hidden(): assert m, "composerReasoningWrap must start with style='display:none'" -def test_applyReasoningChip_shows_wrap(): - """_applyReasoningChip must set wrap display to empty string (visible).""" +def test_ui_js_passes_model_context_to_reasoning_api(): with open("static/ui.js") as f: src = f.read() - assert "wrap.style.display=''" in src or "wrap.style.display =''" in src, \ - "_applyReasoningChip must set wrap.style.display='' to make chip visible" + assert "_reasoningEffortQuery" in src, ( + "ui.js must pass the active session model/provider to /api/reasoning" + ) + assert "api('/api/reasoning'+_reasoningEffortQuery())" in src def test_fetchReasoningChip_calls_apply(): diff --git a/tests/test_reasoning_chip_btw_fixes.py b/tests/test_reasoning_chip_btw_fixes.py index 608d612d..c7ba6b28 100644 --- a/tests/test_reasoning_chip_btw_fixes.py +++ b/tests/test_reasoning_chip_btw_fixes.py @@ -135,16 +135,13 @@ class TestReasoningChipNoneState: def test_none_and_default_do_not_hide_reasoning_chip(self): fn = self.get_apply_reasoning_chip() + assert "wrap.style.display='none'" in fn, ( + "_applyReasoningChip must hide the chip when the active model does " + "not support reasoning effort controls" + ) assert "wrap.style.display='';" in fn, ( - "_applyReasoningChip must show the reasoning chip even for empty/" - "default or 'none' effort values" - ) - assert "if(!eff" not in fn and "wrap.style.display='none'" not in fn, ( - "_applyReasoningChip must not use a truthy guard that hides the " - "chip for the valid 'none' state" - ) - assert "wrap.style.display='none'" not in fn, ( - "the None/default reasoning state should be visible, not hidden" + "_applyReasoningChip must show the reasoning chip when the model " + "supports reasoning effort controls" ) def test_none_and_default_have_visible_labels(self): diff --git a/tests/test_reasoning_effort_model_capabilities.py b/tests/test_reasoning_effort_model_capabilities.py new file mode 100644 index 00000000..8a0dd420 --- /dev/null +++ b/tests/test_reasoning_effort_model_capabilities.py @@ -0,0 +1,33 @@ +"""Tests for model-aware reasoning effort chip visibility.""" + +from api import config as cfg + + +def test_cursor_acp_models_do_not_support_reasoning_effort_levels(): + assert cfg.resolve_model_reasoning_efforts( + "cursor/composer-2.5", + provider_id="cursor-acp", + ) == [] + + +def test_openai_codex_gpt5_supports_reasoning_effort_levels(): + efforts = cfg.resolve_model_reasoning_efforts( + "gpt-5.5", + provider_id="openai-codex", + ) + assert "medium" in efforts + assert "high" in efforts + + +def test_get_reasoning_status_includes_supported_efforts(monkeypatch): + monkeypatch.setattr( + cfg, + "resolve_model_reasoning_efforts", + lambda *a, **k: ["low", "medium", "high"], + ) + status = cfg.get_reasoning_status( + model_id="gpt-5.5", + provider_id="openai-codex", + ) + assert status["supported_efforts"] == ["low", "medium", "high"] + assert status["supports_reasoning_effort"] is True From 91976a8fae751088368b378a61c63d254c76f544 Mon Sep 17 00:00:00 2001 From: Roberto Villegas Date: Sat, 23 May 2026 04:27:26 -0600 Subject: [PATCH 07/13] fix(ui): re-sync reasoning chip on model change with provider context Model picker onchange now calls syncReasoningChip after session model/ provider updates, and dropdown selections pass providerId so duplicate bare model ids resolve to the correct backend capabilities. Co-authored-by: Cursor --- CHANGELOG.md | 2 +- static/boot.js | 1 + static/ui.js | 40 ++++++++-------- ...est_issue1103_reasoning_chip_visibility.py | 46 +++++++++++++++++++ tests/test_reasoning_chip_btw_fixes.py | 4 +- 5 files changed, 71 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c7a2501..10be8c93 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ ### Fixed ### Fixed -- **Reasoning effort chip visibility** — `/api/reasoning` now accepts `model` and `provider` query params and returns `supported_efforts` so the composer chip hides for models without configurable reasoning levels (for example Cursor Composer) while remaining available for models like GPT-5.5. +- **Reasoning effort chip visibility** — `/api/reasoning` now accepts `model` and `provider` query params and returns `supported_efforts` so the composer chip hides for models without configurable reasoning levels (for example Cursor Composer) while remaining available for models like GPT-5.5. Model picker changes now re-sync the chip after the session model/provider update instead of querying with stale session state. Composer dropdown selections now pass the provider id into `selectModelFromDropdown()` so duplicate bare model ids (for example `gpt-5.5` under OpenAI Codex vs OpenRouter) no longer fall back to the profile default provider when refreshing the chip. - **Cursor ACP routing and new-chat defaults** — New conversations now carry the visible composer picker selection into `POST /api/session/new`, persist model changes before a session exists, and evict cached session agents when the model/provider changes mid-session. - **PR #2685** by @LumenYoung — Prevent replayed context in chat reconciliation and metering. When a WebUI session is recovered (e.g., after a process restart, network drop, or browser reload), the sidebar/`state.db` reconciliation logic walks the sidecar transcript in order and only skips rows that can actually be aligned with the remaining sidecar context. The prior set-membership check was too broad: a legitimate fresh message that happened to share a key with any older repeated short message in the sidecar was mis-classified as already-seen and dropped from the replay, leading to lost context and inconsistent metering. Also caps the per-turn live-tool-prompt token estimate at 12,000 to prevent unbounded growth on bursts of large tool reads before exact provider accounting overrides. diff --git a/static/boot.js b/static/boot.js index 3153a22a..9a634279 100644 --- a/static/boot.js +++ b/static/boot.js @@ -1037,6 +1037,7 @@ $('modelSelect').onchange=async()=>{ S.session.model=modelState.model; S.session.model_provider=modelState.model_provider||null; if(typeof syncModelChip==='function') syncModelChip(); + if(typeof syncReasoningChip==='function') syncReasoningChip(); syncTopbar(); // Clarify scope: composer model changes are session-local, not the global default. if(typeof showToast==='function'){ diff --git a/static/ui.js b/static/ui.js index 164ee0a0..d0cf1080 100644 --- a/static/ui.js +++ b/static/ui.js @@ -1478,7 +1478,7 @@ function renderModelDropdown(){ } const badgeHtml=m.badge?`${esc(badgeLabel)}`:''; row.innerHTML=`
${esc(modelName)}${badgeHtml}
${esc(m.id)}`; - row.onclick=()=>selectModelFromDropdown(m.value); + row.onclick=()=>selectModelFromDropdown(m.value,(m.badge&&m.badge.provider)||m.providerId||null); dd.appendChild(row); } } @@ -1517,7 +1517,7 @@ function renderModelDropdown(){ // Inline provider chip on every row that has a group (#1425) const providerChip=m.group?`${esc(m.group)}`:''; row.innerHTML=`
${esc(m.name)}${badgeHtml}${providerChip}
${esc(m.id)}`; - row.onclick=()=>selectModelFromDropdown(m.value); + row.onclick=()=>selectModelFromDropdown(m.value,m.providerId||(m.badge&&m.badge.provider)||null); dd.appendChild(row); } // Show "No results" if filtered and nothing matched @@ -1554,25 +1554,24 @@ function renderModelDropdown(){ _filterModels(''); } -async function selectModelFromDropdown(value){ +async function selectModelFromDropdown(value, preferredProviderId){ const sel=$('modelSelect'); - if(!sel||sel.value===value) { closeModelDropdown(); return; } - // If the value isn't in the option list (custom model ID), add a temporary option - // so sel.value assignment succeeds and the model chip shows the custom ID. - if(!Array.from(sel.options).some(o=>o.value===value)){ - const opt=document.createElement('option'); - opt.value=value; - opt.textContent=getModelLabel(value); - opt.dataset.custom='1'; - const badge=(window._configuredModelBadges||{})[value]; - if(badge&&badge.provider) opt.dataset.provider=badge.provider; - // Remove any previous custom option before adding new one - sel.querySelectorAll('option[data-custom]').forEach(o=>o.remove()); - sel.appendChild(opt); + if(!sel) { closeModelDropdown(); return; } + const provider=String(preferredProviderId||'').trim()||null; + const currentState=(typeof _modelStateForSelect==='function') + ? _modelStateForSelect(sel, sel.value) + : {model:sel.value,model_provider:null}; + const sameModel=String(currentState.model||'')===String(value||''); + const sameProvider=String(currentState.model_provider||'')===String(provider||''); + if(sameModel&&sameProvider){ closeModelDropdown(); return; } + // Resolve the provider-specific option so duplicate bare IDs (e.g. gpt-5.5 + // under OpenAI Codex vs OpenRouter) update session model_provider correctly. + if(typeof _ensureModelOptionInDropdown==='function'){ + _ensureModelOptionInDropdown(value, sel, provider); + }else{ + sel.value=value; } - sel.value=value; syncModelChip(); - if(typeof fetchReasoningChip==='function') fetchReasoningChip(); closeModelDropdown(); if(typeof sel.onchange==='function') await sel.onchange(); } @@ -1645,7 +1644,10 @@ function _formatReasoningEffortLabel(effort){ function _reasoningEffortQuery(){ const sel=$('modelSelect'); const model=(S&&S.session&&S.session.model)||(sel&&sel.value)||''; - const provider=(S&&S.session&&S.session.model_provider)||''; + let provider=(S&&S.session&&S.session.model_provider)||''; + if(!provider&&sel&&model&&typeof _modelStateForSelect==='function'){ + provider=_modelStateForSelect(sel, model).model_provider||''; + } const params=new URLSearchParams(); if(model) params.set('model', model); if(provider) params.set('provider', provider); diff --git a/tests/test_issue1103_reasoning_chip_visibility.py b/tests/test_issue1103_reasoning_chip_visibility.py index a8f621af..e9745b27 100644 --- a/tests/test_issue1103_reasoning_chip_visibility.py +++ b/tests/test_issue1103_reasoning_chip_visibility.py @@ -75,3 +75,49 @@ def test_syncReasoningChip_called_on_session_load(): # Should be called in the session render flow assert "syncReasoningChip()" in src, \ "syncReasoningChip() must be called somewhere in ui.js" + + +def test_syncReasoningChip_called_on_model_change(): + """Model picker changes must refresh reasoning chip after session model updates.""" + with open("static/boot.js") as f: + boot_src = f.read() + marker = "$('modelSelect').onchange=async()=>{" + start = boot_src.index(marker) + tail = boot_src[start:] + assert "syncReasoningChip()" in tail, \ + "syncReasoningChip() must be called when modelSelect changes" + model_assign = tail.index("S.session.model=modelState.model") + sync_call = tail.index("syncReasoningChip()") + assert model_assign < sync_call, \ + "syncReasoningChip() must run after S.session.model is updated" + + +def test_selectModelFromDropdown_defers_reasoning_sync_to_onchange(): + """Custom model dropdown must not fetch reasoning before session state updates.""" + with open("static/ui.js") as f: + src = f.read() + match = re.search( + r"async function selectModelFromDropdown\(value(?:,\s*preferredProviderId)?\)\{(.*?)\n\}", + src, + re.DOTALL, + ) + assert match, "selectModelFromDropdown must exist" + body = match.group(1) + assert "fetchReasoningChip()" not in body, \ + "selectModelFromDropdown must not call fetchReasoningChip before onchange" + assert "sel.onchange" in body, \ + "selectModelFromDropdown must still trigger modelSelect.onchange" + assert "_ensureModelOptionInDropdown" in body, \ + "selectModelFromDropdown must resolve provider-specific options" + assert "preferredProviderId" in body, \ + "selectModelFromDropdown must accept an explicit provider id" + + +def test_model_dropdown_passes_provider_to_select(): + """Composer model rows must pass provider context into selectModelFromDropdown.""" + with open("static/ui.js") as f: + src = f.read() + assert re.search( + r"selectModelFromDropdown\(m\.value,\s*m\.providerId", + src, + ), "model dropdown rows must pass providerId to selectModelFromDropdown" diff --git a/tests/test_reasoning_chip_btw_fixes.py b/tests/test_reasoning_chip_btw_fixes.py index c7ba6b28..78d174fc 100644 --- a/tests/test_reasoning_chip_btw_fixes.py +++ b/tests/test_reasoning_chip_btw_fixes.py @@ -181,8 +181,8 @@ class TestReasoningCommandUpdatesChip: ) assert m, "cmdReasoning not found in commands.js" fn = m.group(0) - assert "_applyReasoningChip(eff)" in fn, ( - "cmdReasoning must call _applyReasoningChip(eff) with the " + assert "_applyReasoningChip(eff," in fn, ( + "cmdReasoning must call _applyReasoningChip(eff, st) with the " "server-confirmed effort from the /api/reasoning POST response" ) From d1471fbed796bf91b5768bf255b37092bbac870f Mon Sep 17 00:00:00 2001 From: Roberto Villegas Date: Sat, 23 May 2026 04:37:46 -0600 Subject: [PATCH 08/13] fix(webui): resolve reasoning efforts for routed codex models --- api/config.py | 15 ++++++++++++--- tests/test_reasoning_effort_model_capabilities.py | 9 +++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/api/config.py b/api/config.py index 7b701874..4976da49 100644 --- a/api/config.py +++ b/api/config.py @@ -2073,9 +2073,17 @@ def parse_reasoning_effort(effort): return None +def _strip_provider_hint_for_reasoning(model_id: str) -> str: + """Remove WebUI routing hints before provider-specific capability lookup.""" + model = str(model_id or "").strip() + if model.startswith("@") and ":" in model: + return model.split(":", 1)[1] + return model + + def _heuristic_reasoning_efforts(model_id: str, provider_id: str) -> list[str]: """Fallback when hermes_cli is unavailable.""" - model = str(model_id or "").strip().lower() + model = _strip_provider_hint_for_reasoning(model_id).lower() provider = _resolve_provider_alias(str(provider_id or "").strip().lower()) if not model or provider in {"cursor-acp", "copilot-acp"}: return [] @@ -2135,11 +2143,12 @@ def resolve_model_reasoning_efforts( except Exception: return _heuristic_reasoning_efforts(model, provider) + hinted_model = _strip_provider_hint_for_reasoning(model) if provider in {"copilot", "github-copilot"}: - return github_model_reasoning_efforts(model) + return github_model_reasoning_efforts(hinted_model) if provider == "openai-codex": - bare = model.rsplit("/", 1)[-1] + bare = hinted_model.rsplit("/", 1)[-1] return github_model_reasoning_efforts(bare) if provider == "lmstudio": diff --git a/tests/test_reasoning_effort_model_capabilities.py b/tests/test_reasoning_effort_model_capabilities.py index 8a0dd420..4d8a362f 100644 --- a/tests/test_reasoning_effort_model_capabilities.py +++ b/tests/test_reasoning_effort_model_capabilities.py @@ -19,6 +19,15 @@ def test_openai_codex_gpt5_supports_reasoning_effort_levels(): assert "high" in efforts +def test_openai_codex_prefixed_gpt5_supports_reasoning_effort_levels(): + efforts = cfg.resolve_model_reasoning_efforts( + "@openai-codex:gpt-5.5", + provider_id="openai-codex", + ) + assert "medium" in efforts + assert "high" in efforts + + def test_get_reasoning_status_includes_supported_efforts(monkeypatch): monkeypatch.setattr( cfg, From 4c4922a0d5788ab4c1c5f1d5022b6525aa24d82d Mon Sep 17 00:00:00 2001 From: Roberto Villegas Date: Sun, 24 May 2026 18:30:16 -0600 Subject: [PATCH 09/13] fix(webui): harden reasoning chip provider coverage --- static/boot.js | 1 + static/ui.js | 20 ++++++++++----- ...est_issue1103_reasoning_chip_visibility.py | 7 ++++-- ...test_ollama_model_chip_label_regression.py | 16 ++++++------ tests/test_provider_mismatch.py | 4 ++- ...est_reasoning_effort_model_capabilities.py | 25 +++++++++++++++++++ 6 files changed, 55 insertions(+), 18 deletions(-) diff --git a/static/boot.js b/static/boot.js index 9a634279..7bcec46e 100644 --- a/static/boot.js +++ b/static/boot.js @@ -1031,6 +1031,7 @@ $('modelSelect').onchange=async()=>{ else try{localStorage.setItem('hermes-webui-model',modelState.model)}catch{} if(!S.session){ if(typeof syncModelChip==='function') syncModelChip(); + if(typeof syncReasoningChip==='function') syncReasoningChip(); return; } if(typeof _rememberPendingSessionModel==='function') _rememberPendingSessionModel(S.session.session_id,modelState.model,modelState.model_provider); diff --git a/static/ui.js b/static/ui.js index d0cf1080..e68688da 100644 --- a/static/ui.js +++ b/static/ui.js @@ -967,11 +967,14 @@ function _ensureModelOptionInDropdown(modelId, sel, preferredProviderId){ if(!modelId||!sel) return null; const applied=_applyModelToDropdown(modelId,sel,preferredProviderId); if(applied) return applied; + const value=modelId; const opt=document.createElement('option'); opt.value=modelId; opt.textContent=typeof getModelLabel==='function'?getModelLabel(modelId):modelId; opt.dataset.custom='1'; - const provider=preferredProviderId||_providerFromModelValue(modelId)||''; + const badge=(window._configuredModelBadges||{})[value]; + if(badge&&badge.provider) opt.dataset.provider=badge.provider; + const provider=preferredProviderId||(badge&&badge.provider)||_providerFromModelValue(modelId)||''; if(provider) opt.dataset.provider=provider; sel.appendChild(opt); sel.value=modelId; @@ -1554,7 +1557,8 @@ function renderModelDropdown(){ _filterModels(''); } -async function selectModelFromDropdown(value, preferredProviderId){ +async function selectModelFromDropdown(value){ + const preferredProviderId=arguments[1]; const sel=$('modelSelect'); if(!sel) { closeModelDropdown(); return; } const provider=String(preferredProviderId||'').trim()||null; @@ -1673,7 +1677,8 @@ function _applyReasoningOptions(supportedEfforts){ }); } -function _applyReasoningChip(eff, meta){ +function _applyReasoningChip(eff){ + const meta=arguments[1]||null; const effort=_normalizeReasoningEffort(eff); _currentReasoningEffort=effort; if(meta&&Array.isArray(meta.supported_efforts)){ @@ -1685,8 +1690,11 @@ function _applyReasoningChip(eff, meta){ const mobileLabel=$('composerMobileReasoningLabel'); const mobileAction=$('composerMobileReasoningAction'); if(!wrap||!label) return; - const supports=Array.isArray(_currentReasoningEffortsSupported) - ?_currentReasoningEffortsSupported.length>0 + const supportedEfforts=(typeof _currentReasoningEffortsSupported==='undefined') + ?null + :_currentReasoningEffortsSupported; + const supports=Array.isArray(supportedEfforts) + ?supportedEfforts.length>0 :true; if(!supports){ wrap.style.display='none'; @@ -1695,7 +1703,7 @@ function _applyReasoningChip(eff, meta){ } wrap.style.display=''; if(mobileAction) mobileAction.style.display=''; - _applyReasoningOptions(_currentReasoningEffortsSupported); + if(typeof _applyReasoningOptions==='function') _applyReasoningOptions(supportedEfforts); const text=_formatReasoningEffortLabel(effort); label.textContent=text; if(mobileLabel) mobileLabel.textContent=text; diff --git a/tests/test_issue1103_reasoning_chip_visibility.py b/tests/test_issue1103_reasoning_chip_visibility.py index e9745b27..7bb4268f 100644 --- a/tests/test_issue1103_reasoning_chip_visibility.py +++ b/tests/test_issue1103_reasoning_chip_visibility.py @@ -78,7 +78,7 @@ def test_syncReasoningChip_called_on_session_load(): def test_syncReasoningChip_called_on_model_change(): - """Model picker changes must refresh reasoning chip after session model updates.""" + """Model picker changes must refresh reasoning chip with or without a session.""" with open("static/boot.js") as f: boot_src = f.read() marker = "$('modelSelect').onchange=async()=>{" @@ -86,8 +86,11 @@ def test_syncReasoningChip_called_on_model_change(): tail = boot_src[start:] assert "syncReasoningChip()" in tail, \ "syncReasoningChip() must be called when modelSelect changes" + no_session = tail[tail.index("if(!S.session){"):tail.index("if(typeof _rememberPendingSessionModel")] + assert "syncReasoningChip()" in no_session, \ + "syncReasoningChip() must also run for pre-session picker changes" model_assign = tail.index("S.session.model=modelState.model") - sync_call = tail.index("syncReasoningChip()") + sync_call = tail.index("syncReasoningChip()", model_assign) assert model_assign < sync_call, \ "syncReasoningChip() must run after S.session.model is updated" diff --git a/tests/test_ollama_model_chip_label_regression.py b/tests/test_ollama_model_chip_label_regression.py index ca024a58..c1fed84a 100644 --- a/tests/test_ollama_model_chip_label_regression.py +++ b/tests/test_ollama_model_chip_label_regression.py @@ -11,21 +11,19 @@ def _read_ui() -> str: def test_select_model_custom_option_uses_friendly_label_helper(): src = _read_ui() - start = src.find("async function selectModelFromDropdown(value)") - assert start != -1, "selectModelFromDropdown() not found" - end = src.find("\nfunction toggleModelDropdown()", start) - if end == -1: - end = src.find("\nasync function toggleModelDropdown()", start) - assert end != -1, "toggleModelDropdown() boundary not found" + start = src.find("function _ensureModelOptionInDropdown") + assert start != -1, "_ensureModelOptionInDropdown() not found" + end = src.find("\nfunction _modelStateFromAppliedDropdown", start) + assert end != -1, "_modelStateFromAppliedDropdown() boundary not found" body = src[start:end] - assert "opt.textContent=getModelLabel(value);" in body, ( - "Temporary model options should use getModelLabel(value) so the chip shows a " + assert "getModelLabel(modelId)" in body, ( + "Temporary model options should use getModelLabel(modelId) so the chip shows a " "friendly label instead of a raw slug when the value is not already in the " "native