fix: fast conversation switching with metadata-first load (#959)

- Backend: save session JSON with metadata fields before messages array
  so load_metadata_only() reads only ~1KB without parsing the full session
- Backend: add GET /api/session?messages=0 for metadata-only responses
  (~1KB vs ~400KB), enabling instant sidebar switching
- Backend: add POST /api/admin/reload to hot-reload models without restart
- Backend: gzip compress JSON API responses (>1KB) for 70-80% bandwidth reduction
- Frontend: show Loading indicator immediately on session switch, replacing
  old DOM before API call to prevent stale content flash
- Frontend: clear S.messages before API call so _ensureMessagesLoaded
  always fetches fresh data for the target session
- Frontend: wrap both Phase 1 (messages=0) and Phase 2 (_ensureMessagesLoaded)
  in try/catch to prevent permanently stuck loading state on network/server errors
This commit is contained in:
Josh Jameson
2026-04-24 19:35:14 +01:00
committed by GitHub
parent 2d5c4b71cc
commit 7e17ec497c
5 changed files with 218 additions and 46 deletions
+15
View File
@@ -55,6 +55,12 @@ def _security_headers(handler):
)
def _accepts_gzip(handler) -> bool:
"""Check if the client accepts gzip encoding."""
ae = handler.headers.get('Accept-Encoding', '')
return 'gzip' in ae
def j(handler, payload, status: int=200, extra_headers: dict=None) -> None:
"""Send a JSON response.
@@ -64,6 +70,15 @@ def j(handler, payload, status: int=200, extra_headers: dict=None) -> None:
body = _json.dumps(payload, ensure_ascii=False, indent=2).encode('utf-8')
handler.send_response(status)
handler.send_header('Content-Type', 'application/json; charset=utf-8')
# Gzip-compress responses over 1KB when the client accepts it.
# Typical JSON API responses compress 70-80%, giving a big speedup
# for large payloads (session history, message lists).
if _accepts_gzip(handler) and len(body) > 1024:
import gzip
body = gzip.compress(body, compresslevel=4)
handler.send_header('Content-Encoding', 'gzip')
handler.send_header('Content-Length', str(len(body)))
handler.send_header('Cache-Control', 'no-store')
_security_headers(handler)
+69 -3
View File
@@ -239,7 +239,24 @@ class Session:
def save(self, touch_updated_at: bool = True, skip_index: bool = False) -> None:
if touch_updated_at:
self.updated_at = time.time()
payload = json.dumps(self.__dict__, ensure_ascii=False, indent=2)
# Write metadata fields first so load_metadata_only() can read them
# without parsing the full messages array (which may be 400KB+).
# Fields are listed in the order they should appear in the JSON file.
METADATA_FIELDS = [
'session_id', 'title', 'workspace', 'model', 'created_at', 'updated_at',
'pinned', 'archived', 'project_id', 'profile',
'input_tokens', 'output_tokens', 'estimated_cost',
'personality', 'active_stream_id',
'pending_user_message', 'pending_attachments', 'pending_started_at',
'compression_anchor_visible_idx', 'compression_anchor_message_key',
]
meta = {k: getattr(self, k, None) for k in METADATA_FIELDS}
meta['messages'] = self.messages
meta['tool_calls'] = self.tool_calls
# Fields not in METADATA_FIELDS (e.g. last_usage, message_count) go at the end
extra = {k: v for k, v in self.__dict__.items()
if k not in METADATA_FIELDS and k not in ('messages', 'tool_calls')}
payload = json.dumps({**meta, **extra}, ensure_ascii=False, indent=2)
tmp = self.path.with_suffix(f'.tmp.{os.getpid()}.{threading.current_thread().ident}')
try:
with open(tmp, 'w', encoding='utf-8') as f:
@@ -266,6 +283,46 @@ class Session:
return None
return cls(**json.loads(p.read_text(encoding='utf-8')))
@classmethod
def load_metadata_only(cls, sid):
"""Load only the compact metadata fields, skipping the messages array.
Session JSON files have metadata fields (session_id, title, model, etc.)
at the top level, before the large messages array. We read only the
first ~1KB — enough to capture all compact() fields — then parse just
that prefix. Falls back to load() if the prefix doesn't contain enough
fields or if the file is unexpectedly small.
"""
if not sid or not all(c in '0123456789abcdefghijklmnopqrstuvwxyz_' for c in sid):
return None
p = SESSION_DIR / f'{sid}.json'
if not p.exists():
return None
try:
# Read just the first 1 KB — metadata comes before messages array
with open(p, 'r', encoding='utf-8') as f:
prefix = f.read(1024)
if not prefix:
return cls.load(sid)
parsed = json.loads(prefix)
# Verify we got the essential fields.
# With metadata-first save() ordering, messages appears at byte ~567.
# For sessions <= ~512 bytes total the entire messages array fits in the
# first 1 KB and we get a valid list. For larger sessions json.loads
# fails on the truncated buffer (unterminated string), so we fall back
# to full load. The one exception is a truncation inside a string value
# that happens to produce valid JSON with a truncated string — guard
# against that by requiring messages to be a list.
needed = {'session_id', 'title', 'created_at', 'updated_at'}
if not needed.issubset(parsed.keys()):
return cls.load(sid)
if not isinstance(parsed.get('messages'), list):
return cls.load(sid)
return cls(**parsed)
except Exception:
# Corrupt prefix or decode error — fall back to full load
return cls.load(sid)
def compact(self, include_runtime=False, active_stream_ids=None) -> dict:
active_stream_ids = active_stream_ids if active_stream_ids is not None else set()
return {
@@ -292,12 +349,21 @@ class Session:
) if include_runtime else False,
}
def get_session(sid):
def get_session(sid, metadata_only=False):
"""Load a session, optionally with metadata only (skipping the messages array).
When metadata_only=True the session is still cached so the full load on the
next access is fast. Use this when you only need compact() metadata and not
the actual message history (e.g., for fast sidebar switching).
"""
with LOCK:
if sid in SESSIONS:
SESSIONS.move_to_end(sid) # LRU: mark as recently used
return SESSIONS[sid]
s = Session.load(sid)
if metadata_only:
s = Session.load_metadata_only(sid)
else:
s = Session.load(sid)
if s:
with LOCK:
SESSIONS[sid] = s
+42 -5
View File
@@ -676,23 +676,47 @@ def handle_get(handler, parsed) -> bool:
return _serve_static(handler, parsed)
if parsed.path == "/api/session":
import time as _time
_t0 = _time.monotonic()
_debug_slow = os.environ.get("HERMES_DEBUG_SLOW", "")
sid = parse_qs(parsed.query).get("session_id", [""])[0]
if not sid:
return j(handler, {"error": "session_id is required"}, status=400)
# ?messages=0 skips the message payload for fast session switching.
# The frontend uses this when switching conversations in the sidebar
# (only needs metadata). The full message array is loaded lazily
# via ?messages=1 when the message panel opens.
load_messages = parse_qs(parsed.query).get("messages", ["1"])[0] != "0"
try:
s = get_session(sid)
_t1 = _time.monotonic()
s = get_session(sid, metadata_only=(not load_messages))
_t2 = _time.monotonic()
effective_model = _resolve_effective_session_model_for_display(s)
_t3 = _time.monotonic()
raw = s.compact() | {
"messages": s.messages,
"tool_calls": getattr(s, "tool_calls", []),
"messages": s.messages if load_messages else [],
"tool_calls": getattr(s, "tool_calls", []) if load_messages else [],
"active_stream_id": getattr(s, "active_stream_id", None),
"pending_user_message": getattr(s, "pending_user_message", None),
"pending_attachments": getattr(s, "pending_attachments", []),
"pending_attachments": getattr(s, "pending_attachments", []) if load_messages else [],
"pending_started_at": getattr(s, "pending_started_at", None),
}
_t4 = _time.monotonic()
if effective_model:
raw["model"] = effective_model
return j(handler, {"session": redact_session_data(raw)})
redact = redact_session_data(raw)
_t5 = _time.monotonic()
resp = j(handler, {"session": redact})
_t6 = _time.monotonic()
if _debug_slow:
logger.warning(
"[SLOW] session_id=%s get_session=%.1fms model_resolve=%.1fms "
"compact=%.1fms redact=%.1fms json_write=%.1fms total=%.1fms",
sid,
(_t2-_t1)*1000, (_t3-_t2)*1000, (_t4-_t3)*1000,
(_t5-_t4)*1000, (_t6-_t5)*1000, (_t6-_t0)*1000,
)
return resp
except KeyError:
# Not a WebUI session -- try CLI store
msgs = get_cli_session_messages(sid)
@@ -1079,6 +1103,19 @@ def handle_post(handler, parsed) -> bool:
except RuntimeError as e:
return bad(handler, str(e), 500)
if parsed.path == "/api/admin/reload":
# Hot-reload api.models module to pick up code changes without restart.
import importlib
from api import models as _models
importlib.reload(_models)
# Also re-expose get_session from the reloaded module so routes.py
# continues to work (routes.py imported it at module level).
import api.routes as _routes
_routes.get_session = _models.get_session
_routes.Session = _models.Session
_routes.compact = _models.compact
return j(handler, {"status": "ok", "reloaded": "api.models"})
if parsed.path == "/api/sessions/cleanup":
return _handle_sessions_cleanup(handler, body, zero_only=False)
+5
View File
@@ -1350,6 +1350,11 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
logger.debug("Periodic checkpoint save failed: %s", e)
_checkpoint_stop = threading.Event()
# Persist the user message BEFORE streaming starts so it's durable even if
# the server crashes before the first checkpoint fires (every 15s).
with _agent_lock:
s.save(touch_updated_at=True, skip_index=False)
_ckpt_thread = threading.Thread(
target=_periodic_checkpoint, daemon=True,
name=f"ckpt-{session_id[:8]}",
+87 -38
View File
@@ -96,31 +96,51 @@ async function loadSession(sid){
stopApprovalPolling();hideApprovalCard();
if(typeof stopClarifyPolling==='function') stopClarifyPolling();
if(typeof hideClarifyCard==='function') hideClarifyCard();
const data=await api(`/api/session?session_id=${encodeURIComponent(sid)}`);
// Show loading indicator immediately for responsiveness.
// Cleared by renderMessages() once full session data arrives.
const currentSid = S.session ? S.session.session_id : null;
if (currentSid !== sid) {
S.messages = [];
S.toolCalls = [];
const _msgInner = $('msgInner');
if (_msgInner) _msgInner.innerHTML = '<div style="display:flex;align-items:center;justify-content:center;height:100%;color:var(--text-muted);font-size:14px;padding:40px;text-align:center;">Loading conversation...</div>';
}
// Phase 1: Load metadata only (~1KB) for fast session switching.
// Guard against network/server failures to prevent a permanently stuck loading state.
let data;
try {
data = await api(`/api/session?session_id=${encodeURIComponent(sid)}&messages=0`);
} catch(e) {
const _msgInner = $('msgInner');
if (_msgInner) {
_msgInner.innerHTML = '<div style="display:flex;align-items:center;justify-content:center;height:100%;color:var(--text-muted);font-size:14px;padding:40px;text-align:center;">Failed to load session. Try switching sessions or refreshing.</div>';
}
if (typeof showToast === 'function') showToast('Failed to load session', 3000, 'error');
return;
}
S.session=data.session;
S.lastUsage={...(data.session.last_usage||{})};
_setSessionViewedCount(S.session.session_id, Number(data.session.message_count || 0));
localStorage.setItem('hermes-webui-session',S.session.session_id);
data.session.messages = (data.session.messages || []).filter(m => m && m.role);
const hasMessageToolMetadata = (data.session.messages || []).some(m => {
if (!m || m.role !== 'assistant') return false;
const hasTc = Array.isArray(m.tool_calls) && m.tool_calls.length > 0;
const hasTu = Array.isArray(m.content) && m.content.some(p => p && p.type === 'tool_use');
return hasTc || hasTu;
});
const activeStreamId=data.session.active_stream_id||null;
const activeStreamId=S.session.active_stream_id||null;
// Phase 2a: If session is streaming, restore from INFLIGHT cache before
// loading full messages (INFLIGHT state is self-contained and sufficient).
if(!INFLIGHT[sid]&&activeStreamId&&typeof loadInflightState==='function'){
const stored=loadInflightState(sid, activeStreamId);
if(stored){
INFLIGHT[sid]={
messages:Array.isArray(stored.messages)&&stored.messages.length?stored.messages:[...(data.session.messages||[])],
uploaded:Array.isArray(stored.uploaded)?stored.uploaded:[...(data.session.pending_attachments||[])],
messages:Array.isArray(stored.messages)&&stored.messages.length?stored.messages:[],
uploaded:Array.isArray(stored.uploaded)?stored.uploaded:[],
toolCalls:Array.isArray(stored.toolCalls)?stored.toolCalls:[],
reattach:true,
};
}
}
if(INFLIGHT[sid]){
// Streaming session: use cached INFLIGHT messages (already has pending assistant output).
S.messages=INFLIGHT[sid].messages;
S.toolCalls=(INFLIGHT[sid].toolCalls||[]);
S.busy=true;
@@ -137,29 +157,38 @@ async function loadSession(sid){
const _cb=$('btnCancel');if(_cb&&activeStreamId)_cb.style.display='inline-flex';
if(INFLIGHT[sid].reattach&&activeStreamId&&typeof attachLiveStream==='function'){
INFLIGHT[sid].reattach=false;
attachLiveStream(sid, activeStreamId, data.session.pending_attachments||[], {reconnecting:true});
attachLiveStream(sid, activeStreamId, S.session.pending_attachments||[], {reconnecting:true});
}
}else{
// Phase 2b: Idle session — load full messages lazily for rendering.
// _ensureMessagesLoaded is idempotent; it skips if S.messages already populated.
try {
await _ensureMessagesLoaded(sid);
} catch (e) {
// Network errors, server failures, or SSE drops (Chrome error codes 4/5)
// can cause _ensureMessagesLoaded to throw. Without a try/catch here the
// "Loading conversation..." div injected at the top of loadSession would
// persist forever with no recovery path.
const _msgInner = $('msgInner');
if (_msgInner) {
_msgInner.innerHTML = '<div style="display:flex;align-items:center;justify-content:center;height:100%;color:var(--text-muted);font-size:14px;padding:40px;text-align:center;">Failed to load messages. Try switching sessions or refreshing.</div>';
}
if (typeof showToast === 'function') showToast('Failed to load conversation messages', 3000, 'error');
return;
}
// Restore any queued message that survived page refresh via sessionStorage.
// Only restore when the agent is idle — if active, the done handler drains it.
if(typeof queueSessionMessage==='function'){
try{
const _storedQ=sessionStorage.getItem('hermes-queue-'+sid);
if(_storedQ){
const _entries=JSON.parse(_storedQ);
if(Array.isArray(_entries)&&_entries.length){
// Timestamp guard: drop entries older than the last assistant response
// (means the agent already ran and the queue was already dispatched)
const _lastMsg=(data.session.messages||[]).slice().reverse()
const _lastMsg=S.messages.slice().reverse()
.find(m=>m&&m.role==='assistant');
const _lastAsst=_lastMsg?(_lastMsg.timestamp||_lastMsg._ts||0)*1000:0;
const _fresh=_entries.filter(e=>!e._queued_at||e._queued_at>_lastAsst);
if(_fresh.length){
// Idle path: restore the first entry as a composer draft only. Do NOT
// re-enqueue into SESSION_QUEUES — if we did, send() would dispatch the
// draft directly (S.busy=false) and then setBusy(false) would drain the
// same entry from the queue, causing a duplicate send. Any follow-up
// entries (2..N) are discarded by design; the toast tells the user so.
const _first=_fresh[0];
const _msg=$&&$('msg');
if(_msg&&_first.text&&!_msg.value){
@@ -167,7 +196,6 @@ async function loadSession(sid){
if(typeof autoResize==='function') autoResize();
if(typeof showToast==='function') showToast((_fresh.length>1?`${_fresh.length} queued messages restored (showing first)`:'Queued message restored')+' — review and send when ready');
}
// Clear persisted queue now that the draft is in the composer
sessionStorage.removeItem('hermes-queue-'+sid);
} else {
sessionStorage.removeItem('hermes-queue-'+sid);
@@ -178,19 +206,15 @@ async function loadSession(sid){
}
}catch(_){sessionStorage.removeItem('hermes-queue-'+sid);}
}
// Reconstruct tool calls from message metadata, or fall back to session-level summary.
// (hasMessageToolMetadata already computed inside _ensureMessagesLoaded; S.toolCalls set there.)
updateQueueBadge(sid);
S.messages=data.session.messages||[];
const pendingMsg=typeof getPendingSessionMessage==='function'?getPendingSessionMessage(data.session):null;
// Attach pending user message if one is queued.
const pendingMsg=typeof getPendingSessionMessage==='function'?getPendingSessionMessage(S.session):null;
if(pendingMsg) S.messages.push(pendingMsg);
// Prefer reconstructing cards from per-message tool metadata when available.
// Fall back to persisted session summaries for older sessions that only
// saved session.tool_calls and bare role=tool results.
if(!hasMessageToolMetadata&&data.session.tool_calls&&data.session.tool_calls.length){
S.toolCalls=(data.session.tool_calls||[]).map(tc=>({...tc,done:true}));
}else{
S.toolCalls=[];
}
clearLiveToolCards();
if(activeStreamId){
S.busy=true;
S.activeStreamId=activeStreamId;
@@ -202,13 +226,9 @@ async function loadSession(sid){
updateQueueBadge(sid);
startApprovalPolling(sid);
if(typeof startClarifyPolling==='function') startClarifyPolling(sid);
if(typeof attachLiveStream==='function') attachLiveStream(sid, activeStreamId, data.session.pending_attachments||[], {reconnecting:true});
if(typeof attachLiveStream==='function') attachLiveStream(sid, activeStreamId, S.session.pending_attachments||[], {reconnecting:true});
else if(typeof watchInflightSession==='function') watchInflightSession(sid, activeStreamId);
}else{
// Reset per-session visual state: the viewed session is idle even if another
// session's stream is still running in the background.
// We directly update the DOM instead of calling setBusy(false), because
// setBusy(false) drains the viewed session's queued follow-up turns.
S.busy=false;
S.activeStreamId=null;
updateSendBtn();
@@ -219,6 +239,7 @@ async function loadSession(sid){
syncTopbar();renderMessages();highlightCode();loadDir('.');
}
}
// Sync context usage indicator from session data
const _s=S.session;
if(_s&&typeof _syncCtxIndicator==='function'){
@@ -235,6 +256,34 @@ async function loadSession(sid){
}
}
// Load session messages if not already present.
// Called after loadSession fetches metadata (messages=0).
// Idempotent: if messages are already in S.messages, resolves immediately.
// Handles streaming sessions specially: restores from INFLIGHT cache or API.
async function _ensureMessagesLoaded(sid) {
// Already have messages? (e.g. from INFLIGHT restore path, already set)
if (S.messages && S.messages.length > 0 && S.messages[0] && S.messages[0].role) {
return;
}
// Fetch full session with messages
const data = await api(`/api/session?session_id=${encodeURIComponent(sid)}&messages=1`);
const msgs = (data.session.messages || []).filter(m => m && m.role);
// Check for tool-call metadata on messages (for tool-call card rendering)
const hasMessageToolMetadata = msgs.some(m => {
if (!m || m.role !== 'assistant') return false;
const hasTc = Array.isArray(m.tool_calls) && m.tool_calls.length > 0;
const hasTu = Array.isArray(m.content) && m.content.some(p => p && p.type === 'tool_use');
return hasTc || hasTu;
});
if (!hasMessageToolMetadata && data.session.tool_calls && data.session.tool_calls.length) {
S.toolCalls = data.session.tool_calls.map(tc => ({...tc, done: true}));
} else {
S.toolCalls = [];
}
clearLiveToolCards();
S.messages = msgs;
}
let _allSessions = []; // cached for search filter
let _renamingSid = null; // session_id currently being renamed (blocks list re-renders)
let _showArchived = false; // toggle to show archived sessions