From 9db0d6869ad0102f0c8c3841ba1d7cb73362577a Mon Sep 17 00:00:00 2001
From: Frank Song <franksong2702@gmail.com>
Date: Tue, 26 May 2026 16:40:35 +0800
Subject: [PATCH 1/2] fix: keep session switch metadata non-blocking

---
 CHANGELOG.md                                  |  6 ++
 api/models.py                                 | 90 +++++++++++++++++--
 api/routes.py                                 | 52 ++++++++---
 static/sessions.js                            | 12 ++-
 .../test_session_model_resolution_on_load.py  | 34 ++++---
 tests/test_webui_state_db_reconciliation.py   | 73 ++++++++++++++-
 6 files changed, 222 insertions(+), 45 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5af702c4..bcc8f37c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,12 @@
 
 ## [Unreleased]
 
+### Fixed
+
+- Session switching now keeps the initial metadata fetch on `/api/session?messages=0&resolve_model=0` and defers stale model/provider repair until after the new session is assigned, so first paint does not block on cold model catalog hydration.
+- Metadata-only session loads now use a cheap state summary instead of full transcript reconciliation, while still detecting real external state.db growth and ignoring restamped replay rows that would otherwise retrigger refresh polling.
+- Session transcript reconciliation now precomputes visible-duplicate lookup state instead of recomputing loose-content normalization for every state.db row, reducing long-session tail-load latency without changing the append-only merge contract.
+
 ## [v0.51.137] — 2026-05-25 — Release DI (stage-batch19 — 6-PR medium-risk batch)
 
 ### Added
diff --git a/api/models.py b/api/models.py
index d121c7a4..394de6e4 100644
--- a/api/models.py
+++ b/api/models.py
@@ -576,9 +576,10 @@ class Session:
             'enabled_toolsets', 'composer_draft',
         ]
         meta = {k: getattr(self, k, None) for k in METADATA_FIELDS}
+        meta['message_count'] = len(self.messages or [])
         meta['messages'] = self.messages
         meta['tool_calls'] = self.tool_calls
-        # Fields not in METADATA_FIELDS (e.g. last_usage, message_count) go at the end
+        # Fields not in METADATA_FIELDS (e.g. last_usage) go at the end
         extra = {k: v for k, v in self.__dict__.items()
                  if k not in METADATA_FIELDS and k not in ('messages', 'tool_calls')
                  and not k.startswith('_')}
@@ -3187,6 +3188,53 @@ def get_state_db_session_messages(sid, *, stitch_continuations: bool = False, pr
     return msgs
 
 
+def get_state_db_session_summary(sid, *, profile=None) -> dict:
+    """Return a cheap message count/timestamp summary for one state.db session."""
+    try:
+        import sqlite3
+    except ImportError:
+        return {"message_count": 0, "last_message_at": 0.0}
+
+    if isinstance(profile, str) and profile:
+        db_path = _get_profile_home(profile) / 'state.db'
+        if not db_path.exists():
+            db_path = _active_state_db_path()
+    else:
+        db_path = _active_state_db_path()
+    if not sid or not db_path.exists():
+        return {"message_count": 0, "last_message_at": 0.0}
+
+    try:
+        with closing(sqlite3.connect(str(db_path))) as conn:
+            conn.row_factory = sqlite3.Row
+            cur = conn.cursor()
+            cur.execute("PRAGMA table_info(messages)")
+            available = {str(row['name']) for row in cur.fetchall()}
+            if 'session_id' not in available:
+                return {"message_count": 0, "last_message_at": 0.0}
+            if 'timestamp' in available:
+                cur.execute(
+                    "SELECT COUNT(*) AS message_count, MAX(timestamp) AS last_message_at "
+                    "FROM messages WHERE session_id = ?",
+                    (str(sid),),
+                )
+                row = cur.fetchone()
+                if not row:
+                    return {"message_count": 0, "last_message_at": 0.0}
+                return {
+                    "message_count": max(0, int(row["message_count"] or 0)),
+                    "last_message_at": float(row["last_message_at"] or 0) if row["last_message_at"] is not None else 0.0,
+                }
+            cur.execute("SELECT COUNT(*) AS message_count FROM messages WHERE session_id = ?", (str(sid),))
+            row = cur.fetchone()
+            return {
+                "message_count": max(0, int(row["message_count"] or 0)) if row else 0,
+                "last_message_at": 0.0,
+            }
+    except Exception:
+        return {"message_count": 0, "last_message_at": 0.0}
+
+
 def _normalized_message_timestamp_for_key(value):
     if value is None or value == "":
         return ""
@@ -3258,19 +3306,38 @@ def _session_message_visible_key(msg: dict):
     )
 
 
-def _matching_visible_duplicate(visible_key: tuple, visible_keys: set[tuple]):
+def _build_visible_duplicate_lookup(visible_keys: set[tuple]) -> dict:
+    by_role = {}
+    loose_by_key = {}
+    for key in visible_keys:
+        try:
+            role, content = key
+        except (TypeError, ValueError):
+            continue
+        if not content:
+            continue
+        by_role.setdefault(role, []).append(key)
+        loose_by_key[key] = _loose_session_message_content(content)
+    return {"keys": visible_keys, "by_role": by_role, "loose_by_key": loose_by_key}
+
+
+def _matching_visible_duplicate(visible_key: tuple, visible_keys: set[tuple], lookup: dict | None = None):
     if visible_key in visible_keys:
         return visible_key
     role, content = visible_key
     if not content:
         return None
-    for existing_role, existing_content in visible_keys:
+    if lookup is None:
+        lookup = _build_visible_duplicate_lookup(visible_keys)
+    loose_content = None
+    for existing_role, existing_content in lookup.get("by_role", {}).get(role, []):
         if role != existing_role or not existing_content:
             continue
         if content in existing_content or existing_content in content:
             return (existing_role, existing_content)
-        loose_content = _loose_session_message_content(content)
-        loose_existing = _loose_session_message_content(existing_content)
+        if loose_content is None:
+            loose_content = _loose_session_message_content(content)
+        loose_existing = lookup.get("loose_by_key", {}).get((existing_role, existing_content), "")
         if loose_content and loose_existing and (
             loose_content in loose_existing or loose_existing in loose_content
         ):
@@ -3376,6 +3443,7 @@ def merge_session_messages_append_only(
         sidecar_visible_counts[visible_key] = sidecar_visible_counts.get(visible_key, 0) + 1
         sidecar_visible_sequence.append(visible_key)
         merged_messages.append(msg)
+    sidecar_visible_lookup = _build_visible_duplicate_lookup(sidecar_visible_keys)
     state_replay_idx = 0
     skipped_state_visible_counts = {}
     for msg in state_messages:
@@ -3391,7 +3459,11 @@ def merge_session_messages_append_only(
                 replays_sidecar_prefix = True
                 state_replay_idx += 1
         if replays_sidecar_prefix:
-            matched_visible_key = _matching_visible_duplicate(visible_key, sidecar_visible_keys)
+            matched_visible_key = _matching_visible_duplicate(
+                visible_key,
+                sidecar_visible_keys,
+                sidecar_visible_lookup,
+            )
             if matched_visible_key is not None:
                 skipped_state_visible_counts[matched_visible_key] = (
                     skipped_state_visible_counts.get(matched_visible_key, 0) + 1
@@ -3411,7 +3483,11 @@ def merge_session_messages_append_only(
                 continue
         if key in seen_message_keys:
             continue
-        matched_visible_key = _matching_visible_duplicate(visible_key, sidecar_visible_keys)
+        matched_visible_key = _matching_visible_duplicate(
+            visible_key,
+            sidecar_visible_keys,
+            sidecar_visible_lookup,
+        )
         if matched_visible_key is not None:
             skipped_count = skipped_state_visible_counts.get(matched_visible_key, 0)
             sidecar_count = sidecar_visible_counts.get(matched_visible_key, 0)
diff --git a/api/routes.py b/api/routes.py
index 420dffe6..029de183 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -2206,25 +2206,50 @@ def _message_summary(messages) -> dict:
 
 
 def _metadata_only_message_summary(sid: str, profile: str | None = None) -> dict:
-    """Return the reconciled message summary used by metadata-only session loads.
+    """Return the cheap message summary used by metadata-only session loads.
 
-    Threads ``profile=`` through to ``get_state_db_session_messages`` so
+    Threads ``profile=`` through to ``get_state_db_session_summary`` so
     background-thread reads land on the correct profile's state.db (per the
     cookie-bound profile selector — fixes the same TLS-vs-thread race the
     #2762 fix addressed for write paths).
+
+    This intentionally does not full-read or merge transcripts.  If state.db has
+    grown beyond the sidecar count, report that growth so active-session polling
+    can refresh.  If state.db only contains restamped replay rows at or below the
+    sidecar count, keep the sidecar metadata so polling does not loop forever on
+    a false "newer transcript" signal.
     """
-    sidecar_session = Session.load(sid)
-    sidecar_messages = []
+    sidecar_session = Session.load_metadata_only(sid)
+    sidecar_count = 0
+    sidecar_last_message_at = 0.0
     if sidecar_session:
-        sidecar_messages = getattr(sidecar_session, "messages", []) or []
-    state_db_messages = get_state_db_session_messages(sid, profile=profile)
-    return _message_summary(
-        merge_session_messages_append_only(
-            sidecar_messages,
-            state_db_messages,
-            truncation_watermark=getattr(sidecar_session, "truncation_watermark", None),
-        )
-    )
+        sidecar_count = _numeric_count(getattr(sidecar_session, "_metadata_message_count", None))
+        if sidecar_count <= 0:
+            sidecar_count = _numeric_count(sidecar_session.compact().get("message_count"))
+        try:
+            sidecar_last_message_at = float(getattr(sidecar_session, "updated_at", 0) or 0)
+        except (TypeError, ValueError):
+            sidecar_last_message_at = 0.0
+        if getattr(sidecar_session, "truncation_watermark", None) is not None:
+            return {
+                "message_count": sidecar_count,
+                "last_message_at": sidecar_last_message_at,
+            }
+    state_summary = get_state_db_session_summary(sid, profile=profile)
+    state_count = _numeric_count(state_summary.get("message_count"))
+    try:
+        state_last_message_at = float(state_summary.get("last_message_at") or 0)
+    except (TypeError, ValueError):
+        state_last_message_at = 0.0
+    if state_count > sidecar_count and state_last_message_at > sidecar_last_message_at:
+        return {
+            "message_count": state_count,
+            "last_message_at": state_last_message_at,
+        }
+    return {
+        "message_count": sidecar_count,
+        "last_message_at": sidecar_last_message_at,
+    }
 
 
 def _session_requires_cli_metadata_lookup(session) -> bool:
@@ -2457,6 +2482,7 @@ from api.models import (
     get_cli_sessions,
     get_cli_session_messages,
     get_state_db_session_messages,
+    get_state_db_session_summary,
     merge_session_messages_append_only,
     _session_message_merge_key,
     ensure_cron_project,
diff --git a/static/sessions.js b/static/sessions.js
index ac689194..3d7a82b6 100644
--- a/static/sessions.js
+++ b/static/sessions.js
@@ -579,15 +579,13 @@ async function loadSession(sid){
     const _msgInner = $('msgInner');
     if (_msgInner && currentSid !== sid) _msgInner.innerHTML = '<div style="display:flex;align-items:center;justify-content:center;height:100%;color:var(--text-muted);font-size:14px;padding:40px;text-align:center;">Loading conversation...</div>';
   }
-  // Phase 1: Load metadata only (~1KB) for fast session switching.
-  // Resolve model immediately: old sessions can persist stale provider-shaped
-  // IDs (e.g. openai/gpt-5.4-mini) and assigning those to S.session creates a
-  // short race where the composer can display/send the wrong model before the
-  // deferred resolver catches up.
+  // Phase 1: Load metadata only (~1KB) for fast session switching. Keep model
+  // resolution out of the first-paint path; old provider-shaped model IDs are
+  // repaired by the deferred resolver after S.session is assigned.
   // Guard against network/server failures to prevent a permanently stuck loading state.
   let data;
   try {
-    data = await api(`/api/session?session_id=${encodeURIComponent(sid)}&messages=0&resolve_model=1`);
+    data = await api(`/api/session?session_id=${encodeURIComponent(sid)}&messages=0&resolve_model=0`);
   } catch(e) {
     const _msgInner = $('msgInner');
     if(_msgInner){
@@ -628,6 +626,7 @@ async function loadSession(sid){
     try { window._resetScrollDirectionTracker(); } catch (_) {}
   }
   if(typeof _applyPendingSessionModelForSession==='function') _applyPendingSessionModelForSession(sid);
+  _resolveSessionModelForDisplaySoon(sid);
   // Sync workspace display immediately so the chip label reflects the new session's workspace
   // before any async message-loading begins (mirrors how model is handled).
   if(typeof syncTopbar==='function') syncTopbar();
@@ -828,7 +827,6 @@ async function loadSession(sid){
     _restoreComposerDraft(_draft, sid, {preserveActiveInput:currentSid===sid&&forceReload});
   }
 
-  _resolveSessionModelForDisplaySoon(sid);
   // Clear the in-flight session marker now that this load has completed (#1060).
   if (_loadingSessionId === sid) _loadingSessionId = null;
 
diff --git a/tests/test_session_model_resolution_on_load.py b/tests/test_session_model_resolution_on_load.py
index fcb29157..6ea41181 100644
--- a/tests/test_session_model_resolution_on_load.py
+++ b/tests/test_session_model_resolution_on_load.py
@@ -1,10 +1,9 @@
-"""Regression tests for stale session model hydration in the WebUI.
+"""Regression tests for session switch model hydration in the WebUI.
 
 Old sessions can persist provider-shaped model IDs such as ``openai/gpt-5.4-mini``
-after the active runtime moved to OpenAI Codex ``gpt-5.5``.  The first
-``loadSession()`` metadata request must ask the backend for the resolved model so
-that the composer state cannot briefly use the stale raw value for display or the
-next chat-start payload.
+after the active runtime moved to OpenAI Codex ``gpt-5.5``.  The UI still needs
+to repair those stale values, but session switching first paint must not pay the
+model catalog cost synchronously.
 """
 
 from pathlib import Path
@@ -30,17 +29,24 @@ def _extract_function(src: str, signature: str) -> str:
     raise AssertionError(f"unterminated function body for: {signature}")
 
 
-def test_load_session_initial_metadata_request_resolves_model_before_state_assignment():
+def test_load_session_initial_metadata_request_defers_model_resolution_until_after_state_assignment():
     body = _extract_function(SESSIONS_JS, "async function loadSession(sid")
-    metadata_fetch = "messages=0&resolve_model=1"
-    stale_metadata_fetch = "messages=0&resolve_model=0"
+    fast_metadata_fetch = "messages=0&resolve_model=0"
+    deferred_metadata_fetch = "messages=0&resolve_model=1"
     assignment = "S.session=data.session"
 
-    assert metadata_fetch in body, (
-        "loadSession() must resolve model metadata on the initial fetch so stale "
-        "persisted models like openai/gpt-5.4-mini cannot become active composer state"
+    assert fast_metadata_fetch in body[: body.index(assignment)], (
+        "loadSession() first paint must use the metadata fast path so session "
+        "switching cannot block on cold model catalog hydration"
     )
-    assert stale_metadata_fetch not in body[: body.index(assignment)], (
-        "loadSession() must not assign S.session from unresolved metadata before the "
-        "backend has normalized stale model/provider combinations"
+    assert deferred_metadata_fetch not in body[: body.index(assignment)], (
+        "loadSession() must not resolve model metadata before assigning S.session; "
+        "stale model/provider correction belongs to the deferred path"
+    )
+    assert "_resolveSessionModelForDisplaySoon(sid)" in body[body.index(assignment):], (
+        "stale persisted model/provider correction must still happen after first paint"
+    )
+    assert body.count("_resolveSessionModelForDisplaySoon(sid)") == 1, (
+        "deferred model repair should run once per session switch, not once after "
+        "metadata and again after message hydration"
     )
diff --git a/tests/test_webui_state_db_reconciliation.py b/tests/test_webui_state_db_reconciliation.py
index ad6bd83b..44e811ea 100644
--- a/tests/test_webui_state_db_reconciliation.py
+++ b/tests/test_webui_state_db_reconciliation.py
@@ -512,10 +512,12 @@ def test_api_session_reload_drops_stale_cached_user_tail_after_saved_assistant(m
     assert handler.response_json["session"]["message_count"] == 2
 
 
-def test_metadata_fast_path_matches_reconciliation_for_restamped_replays(monkeypatch, tmp_path):
-    """#2716 invariant: metadata-only /api/session uses merge_session_messages_append_only
-    (not a raw state.db COUNT) so restamped replay rows don't make sidebar polling think
-    the transcript is always newer than the loaded conversation."""
+def test_metadata_fast_path_uses_summary_without_full_merge_for_restamped_replays(monkeypatch, tmp_path):
+    """Metadata-only /api/session must not full-read and merge transcripts.
+
+    It still must not let a restamped replay row make sidebar polling think the
+    transcript is newer than the loaded sidecar conversation.
+    """
     import api.routes as routes
 
     sid = "webui_reconcile_metadata_replay"
@@ -535,6 +537,20 @@ def test_metadata_fast_path_matches_reconciliation_for_restamped_replays(monkeyp
             {"role": "user", "content": "old user", "timestamp": 1002.0},
         ],
     )
+    monkeypatch.setattr(
+        routes,
+        "get_state_db_session_messages",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(
+            AssertionError("metadata-only loads must not full-read state.db messages")
+        ),
+    )
+    monkeypatch.setattr(
+        routes,
+        "merge_session_messages_append_only",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(
+            AssertionError("metadata-only loads must not merge full transcripts")
+        ),
+    )
 
     handler = _GetHandler(f"/api/session?session_id={sid}&messages=0&resolve_model=0")
     routes.handle_get(handler, urlparse(handler.path))
@@ -546,6 +562,55 @@ def test_metadata_fast_path_matches_reconciliation_for_restamped_replays(monkeyp
     assert session["last_message_at"] == 1001.0
 
 
+def test_metadata_fast_path_uses_state_db_summary_for_external_growth(monkeypatch, tmp_path):
+    """Metadata-only polling can detect real external growth without a full merge."""
+    import api.routes as routes
+
+    sid = "webui_reconcile_metadata_summary_growth"
+    _install_test_session(
+        monkeypatch,
+        tmp_path,
+        sid,
+        [
+            {"role": "user", "content": "old user", "timestamp": 1000.0},
+            {"role": "assistant", "content": "old assistant", "timestamp": 1001.0},
+        ],
+    )
+    _make_state_db(
+        tmp_path / "state.db",
+        sid,
+        [
+            {"role": "user", "content": "old user", "timestamp": 1000.0},
+            {"role": "assistant", "content": "old assistant", "timestamp": 1001.0},
+            {"role": "user", "content": "external user", "timestamp": 1002.0},
+            {"role": "assistant", "content": "external assistant", "timestamp": 1003.0},
+        ],
+    )
+    monkeypatch.setattr(
+        routes,
+        "get_state_db_session_messages",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(
+            AssertionError("metadata-only loads must not full-read state.db messages")
+        ),
+    )
+    monkeypatch.setattr(
+        routes,
+        "merge_session_messages_append_only",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(
+            AssertionError("metadata-only loads must not merge full transcripts")
+        ),
+    )
+
+    handler = _GetHandler(f"/api/session?session_id={sid}&messages=0&resolve_model=0")
+    routes.handle_get(handler, urlparse(handler.path))
+
+    assert handler.status == 200
+    session = handler.response_json["session"]
+    assert session["messages"] == []
+    assert session["message_count"] == 4
+    assert session["last_message_at"] == 1003.0
+
+
 def test_state_db_reconciliation_preserves_tool_metadata(monkeypatch, tmp_path):
     import api.routes as routes
 

From 2ff73c99543d9f3397e04ec0aa3f0d0f5e5ba8b0 Mon Sep 17 00:00:00 2001
From: Frank Song <franksong2702@gmail.com>
Date: Tue, 26 May 2026 20:13:01 +0800
Subject: [PATCH 2/2] Document truncation watermark metadata intent

---
 api/routes.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/api/routes.py b/api/routes.py
index 029de183..c7d45d6b 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -2231,6 +2231,11 @@ def _metadata_only_message_summary(sid: str, profile: str | None = None) -> dict
         except (TypeError, ValueError):
             sidecar_last_message_at = 0.0
         if getattr(sidecar_session, "truncation_watermark", None) is not None:
+            # Intentional: once the user has truncated this sidecar, metadata
+            # polling must keep the sidecar as authoritative.  A full message
+            # load can still apply the watermark-aware merge, but the cheap
+            # metadata path should not treat later state.db rows as external
+            # growth and resurrect turns the user deliberately cut away.
             return {
                 "message_count": sidecar_count,
                 "last_message_at": sidecar_last_message_at,