diff --git a/api/helpers.py b/api/helpers.py index 631ad35f..be137f89 100644 --- a/api/helpers.py +++ b/api/helpers.py @@ -55,6 +55,12 @@ def _security_headers(handler): ) +def _accepts_gzip(handler) -> bool: + """Check if the client accepts gzip encoding.""" + ae = handler.headers.get('Accept-Encoding', '') + return 'gzip' in ae + + def j(handler, payload, status: int=200, extra_headers: dict=None) -> None: """Send a JSON response. @@ -64,6 +70,15 @@ def j(handler, payload, status: int=200, extra_headers: dict=None) -> None: body = _json.dumps(payload, ensure_ascii=False, indent=2).encode('utf-8') handler.send_response(status) handler.send_header('Content-Type', 'application/json; charset=utf-8') + + # Gzip-compress responses over 1KB when the client accepts it. + # Typical JSON API responses compress 70-80%, giving a big speedup + # for large payloads (session history, message lists). + if _accepts_gzip(handler) and len(body) > 1024: + import gzip + body = gzip.compress(body, compresslevel=4) + handler.send_header('Content-Encoding', 'gzip') + handler.send_header('Content-Length', str(len(body))) handler.send_header('Cache-Control', 'no-store') _security_headers(handler) diff --git a/api/models.py b/api/models.py index 05feeeb7..eb65d78a 100644 --- a/api/models.py +++ b/api/models.py @@ -239,7 +239,24 @@ class Session: def save(self, touch_updated_at: bool = True, skip_index: bool = False) -> None: if touch_updated_at: self.updated_at = time.time() - payload = json.dumps(self.__dict__, ensure_ascii=False, indent=2) + # Write metadata fields first so load_metadata_only() can read them + # without parsing the full messages array (which may be 400KB+). + # Fields are listed in the order they should appear in the JSON file. + METADATA_FIELDS = [ + 'session_id', 'title', 'workspace', 'model', 'created_at', 'updated_at', + 'pinned', 'archived', 'project_id', 'profile', + 'input_tokens', 'output_tokens', 'estimated_cost', + 'personality', 'active_stream_id', + 'pending_user_message', 'pending_attachments', 'pending_started_at', + 'compression_anchor_visible_idx', 'compression_anchor_message_key', + ] + meta = {k: getattr(self, k, None) for k in METADATA_FIELDS} + meta['messages'] = self.messages + meta['tool_calls'] = self.tool_calls + # Fields not in METADATA_FIELDS (e.g. last_usage, message_count) go at the end + extra = {k: v for k, v in self.__dict__.items() + if k not in METADATA_FIELDS and k not in ('messages', 'tool_calls')} + payload = json.dumps({**meta, **extra}, ensure_ascii=False, indent=2) tmp = self.path.with_suffix(f'.tmp.{os.getpid()}.{threading.current_thread().ident}') try: with open(tmp, 'w', encoding='utf-8') as f: @@ -266,6 +283,46 @@ class Session: return None return cls(**json.loads(p.read_text(encoding='utf-8'))) + @classmethod + def load_metadata_only(cls, sid): + """Load only the compact metadata fields, skipping the messages array. + + Session JSON files have metadata fields (session_id, title, model, etc.) + at the top level, before the large messages array. We read only the + first ~1KB — enough to capture all compact() fields — then parse just + that prefix. Falls back to load() if the prefix doesn't contain enough + fields or if the file is unexpectedly small. + """ + if not sid or not all(c in '0123456789abcdefghijklmnopqrstuvwxyz_' for c in sid): + return None + p = SESSION_DIR / f'{sid}.json' + if not p.exists(): + return None + try: + # Read just the first 1 KB — metadata comes before messages array + with open(p, 'r', encoding='utf-8') as f: + prefix = f.read(1024) + if not prefix: + return cls.load(sid) + parsed = json.loads(prefix) + # Verify we got the essential fields. + # With metadata-first save() ordering, messages appears at byte ~567. + # For sessions <= ~512 bytes total the entire messages array fits in the + # first 1 KB and we get a valid list. For larger sessions json.loads + # fails on the truncated buffer (unterminated string), so we fall back + # to full load. The one exception is a truncation inside a string value + # that happens to produce valid JSON with a truncated string — guard + # against that by requiring messages to be a list. + needed = {'session_id', 'title', 'created_at', 'updated_at'} + if not needed.issubset(parsed.keys()): + return cls.load(sid) + if not isinstance(parsed.get('messages'), list): + return cls.load(sid) + return cls(**parsed) + except Exception: + # Corrupt prefix or decode error — fall back to full load + return cls.load(sid) + def compact(self, include_runtime=False, active_stream_ids=None) -> dict: active_stream_ids = active_stream_ids if active_stream_ids is not None else set() return { @@ -292,12 +349,21 @@ class Session: ) if include_runtime else False, } -def get_session(sid): +def get_session(sid, metadata_only=False): + """Load a session, optionally with metadata only (skipping the messages array). + + When metadata_only=True the session is still cached so the full load on the + next access is fast. Use this when you only need compact() metadata and not + the actual message history (e.g., for fast sidebar switching). + """ with LOCK: if sid in SESSIONS: SESSIONS.move_to_end(sid) # LRU: mark as recently used return SESSIONS[sid] - s = Session.load(sid) + if metadata_only: + s = Session.load_metadata_only(sid) + else: + s = Session.load(sid) if s: with LOCK: SESSIONS[sid] = s diff --git a/api/routes.py b/api/routes.py index e86f310c..31bd0377 100644 --- a/api/routes.py +++ b/api/routes.py @@ -676,23 +676,47 @@ def handle_get(handler, parsed) -> bool: return _serve_static(handler, parsed) if parsed.path == "/api/session": + import time as _time + _t0 = _time.monotonic() + _debug_slow = os.environ.get("HERMES_DEBUG_SLOW", "") sid = parse_qs(parsed.query).get("session_id", [""])[0] if not sid: return j(handler, {"error": "session_id is required"}, status=400) + # ?messages=0 skips the message payload for fast session switching. + # The frontend uses this when switching conversations in the sidebar + # (only needs metadata). The full message array is loaded lazily + # via ?messages=1 when the message panel opens. + load_messages = parse_qs(parsed.query).get("messages", ["1"])[0] != "0" try: - s = get_session(sid) + _t1 = _time.monotonic() + s = get_session(sid, metadata_only=(not load_messages)) + _t2 = _time.monotonic() effective_model = _resolve_effective_session_model_for_display(s) + _t3 = _time.monotonic() raw = s.compact() | { - "messages": s.messages, - "tool_calls": getattr(s, "tool_calls", []), + "messages": s.messages if load_messages else [], + "tool_calls": getattr(s, "tool_calls", []) if load_messages else [], "active_stream_id": getattr(s, "active_stream_id", None), "pending_user_message": getattr(s, "pending_user_message", None), - "pending_attachments": getattr(s, "pending_attachments", []), + "pending_attachments": getattr(s, "pending_attachments", []) if load_messages else [], "pending_started_at": getattr(s, "pending_started_at", None), } + _t4 = _time.monotonic() if effective_model: raw["model"] = effective_model - return j(handler, {"session": redact_session_data(raw)}) + redact = redact_session_data(raw) + _t5 = _time.monotonic() + resp = j(handler, {"session": redact}) + _t6 = _time.monotonic() + if _debug_slow: + logger.warning( + "[SLOW] session_id=%s get_session=%.1fms model_resolve=%.1fms " + "compact=%.1fms redact=%.1fms json_write=%.1fms total=%.1fms", + sid, + (_t2-_t1)*1000, (_t3-_t2)*1000, (_t4-_t3)*1000, + (_t5-_t4)*1000, (_t6-_t5)*1000, (_t6-_t0)*1000, + ) + return resp except KeyError: # Not a WebUI session -- try CLI store msgs = get_cli_session_messages(sid) @@ -1079,6 +1103,19 @@ def handle_post(handler, parsed) -> bool: except RuntimeError as e: return bad(handler, str(e), 500) + if parsed.path == "/api/admin/reload": + # Hot-reload api.models module to pick up code changes without restart. + import importlib + from api import models as _models + importlib.reload(_models) + # Also re-expose get_session from the reloaded module so routes.py + # continues to work (routes.py imported it at module level). + import api.routes as _routes + _routes.get_session = _models.get_session + _routes.Session = _models.Session + _routes.compact = _models.compact + return j(handler, {"status": "ok", "reloaded": "api.models"}) + if parsed.path == "/api/sessions/cleanup": return _handle_sessions_cleanup(handler, body, zero_only=False) diff --git a/api/streaming.py b/api/streaming.py index f743381e..aa5fbca6 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -1350,6 +1350,11 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta logger.debug("Periodic checkpoint save failed: %s", e) _checkpoint_stop = threading.Event() + # Persist the user message BEFORE streaming starts so it's durable even if + # the server crashes before the first checkpoint fires (every 15s). + with _agent_lock: + s.save(touch_updated_at=True, skip_index=False) + _ckpt_thread = threading.Thread( target=_periodic_checkpoint, daemon=True, name=f"ckpt-{session_id[:8]}", diff --git a/static/sessions.js b/static/sessions.js index a3071407..6bc3278d 100644 --- a/static/sessions.js +++ b/static/sessions.js @@ -96,31 +96,51 @@ async function loadSession(sid){ stopApprovalPolling();hideApprovalCard(); if(typeof stopClarifyPolling==='function') stopClarifyPolling(); if(typeof hideClarifyCard==='function') hideClarifyCard(); - const data=await api(`/api/session?session_id=${encodeURIComponent(sid)}`); + // Show loading indicator immediately for responsiveness. + // Cleared by renderMessages() once full session data arrives. + const currentSid = S.session ? S.session.session_id : null; + if (currentSid !== sid) { + S.messages = []; + S.toolCalls = []; + const _msgInner = $('msgInner'); + if (_msgInner) _msgInner.innerHTML = '
Loading conversation...
'; + } + // Phase 1: Load metadata only (~1KB) for fast session switching. + // Guard against network/server failures to prevent a permanently stuck loading state. + let data; + try { + data = await api(`/api/session?session_id=${encodeURIComponent(sid)}&messages=0`); + } catch(e) { + const _msgInner = $('msgInner'); + if (_msgInner) { + _msgInner.innerHTML = '
Failed to load session. Try switching sessions or refreshing.
'; + } + if (typeof showToast === 'function') showToast('Failed to load session', 3000, 'error'); + return; + } S.session=data.session; S.lastUsage={...(data.session.last_usage||{})}; _setSessionViewedCount(S.session.session_id, Number(data.session.message_count || 0)); localStorage.setItem('hermes-webui-session',S.session.session_id); - data.session.messages = (data.session.messages || []).filter(m => m && m.role); - const hasMessageToolMetadata = (data.session.messages || []).some(m => { - if (!m || m.role !== 'assistant') return false; - const hasTc = Array.isArray(m.tool_calls) && m.tool_calls.length > 0; - const hasTu = Array.isArray(m.content) && m.content.some(p => p && p.type === 'tool_use'); - return hasTc || hasTu; - }); - const activeStreamId=data.session.active_stream_id||null; + + const activeStreamId=S.session.active_stream_id||null; + + // Phase 2a: If session is streaming, restore from INFLIGHT cache before + // loading full messages (INFLIGHT state is self-contained and sufficient). if(!INFLIGHT[sid]&&activeStreamId&&typeof loadInflightState==='function'){ const stored=loadInflightState(sid, activeStreamId); if(stored){ INFLIGHT[sid]={ - messages:Array.isArray(stored.messages)&&stored.messages.length?stored.messages:[...(data.session.messages||[])], - uploaded:Array.isArray(stored.uploaded)?stored.uploaded:[...(data.session.pending_attachments||[])], + messages:Array.isArray(stored.messages)&&stored.messages.length?stored.messages:[], + uploaded:Array.isArray(stored.uploaded)?stored.uploaded:[], toolCalls:Array.isArray(stored.toolCalls)?stored.toolCalls:[], reattach:true, }; } } + if(INFLIGHT[sid]){ + // Streaming session: use cached INFLIGHT messages (already has pending assistant output). S.messages=INFLIGHT[sid].messages; S.toolCalls=(INFLIGHT[sid].toolCalls||[]); S.busy=true; @@ -137,29 +157,38 @@ async function loadSession(sid){ const _cb=$('btnCancel');if(_cb&&activeStreamId)_cb.style.display='inline-flex'; if(INFLIGHT[sid].reattach&&activeStreamId&&typeof attachLiveStream==='function'){ INFLIGHT[sid].reattach=false; - attachLiveStream(sid, activeStreamId, data.session.pending_attachments||[], {reconnecting:true}); + attachLiveStream(sid, activeStreamId, S.session.pending_attachments||[], {reconnecting:true}); } }else{ + // Phase 2b: Idle session — load full messages lazily for rendering. + // _ensureMessagesLoaded is idempotent; it skips if S.messages already populated. + try { + await _ensureMessagesLoaded(sid); + } catch (e) { + // Network errors, server failures, or SSE drops (Chrome error codes 4/5) + // can cause _ensureMessagesLoaded to throw. Without a try/catch here the + // "Loading conversation..." div injected at the top of loadSession would + // persist forever with no recovery path. + const _msgInner = $('msgInner'); + if (_msgInner) { + _msgInner.innerHTML = '
Failed to load messages. Try switching sessions or refreshing.
'; + } + if (typeof showToast === 'function') showToast('Failed to load conversation messages', 3000, 'error'); + return; + } + // Restore any queued message that survived page refresh via sessionStorage. - // Only restore when the agent is idle — if active, the done handler drains it. if(typeof queueSessionMessage==='function'){ try{ const _storedQ=sessionStorage.getItem('hermes-queue-'+sid); if(_storedQ){ const _entries=JSON.parse(_storedQ); if(Array.isArray(_entries)&&_entries.length){ - // Timestamp guard: drop entries older than the last assistant response - // (means the agent already ran and the queue was already dispatched) - const _lastMsg=(data.session.messages||[]).slice().reverse() + const _lastMsg=S.messages.slice().reverse() .find(m=>m&&m.role==='assistant'); const _lastAsst=_lastMsg?(_lastMsg.timestamp||_lastMsg._ts||0)*1000:0; const _fresh=_entries.filter(e=>!e._queued_at||e._queued_at>_lastAsst); if(_fresh.length){ - // Idle path: restore the first entry as a composer draft only. Do NOT - // re-enqueue into SESSION_QUEUES — if we did, send() would dispatch the - // draft directly (S.busy=false) and then setBusy(false) would drain the - // same entry from the queue, causing a duplicate send. Any follow-up - // entries (2..N) are discarded by design; the toast tells the user so. const _first=_fresh[0]; const _msg=$&&$('msg'); if(_msg&&_first.text&&!_msg.value){ @@ -167,7 +196,6 @@ async function loadSession(sid){ if(typeof autoResize==='function') autoResize(); if(typeof showToast==='function') showToast((_fresh.length>1?`${_fresh.length} queued messages restored (showing first)`:'Queued message restored')+' — review and send when ready'); } - // Clear persisted queue now that the draft is in the composer sessionStorage.removeItem('hermes-queue-'+sid); } else { sessionStorage.removeItem('hermes-queue-'+sid); @@ -178,19 +206,15 @@ async function loadSession(sid){ } }catch(_){sessionStorage.removeItem('hermes-queue-'+sid);} } + + // Reconstruct tool calls from message metadata, or fall back to session-level summary. + // (hasMessageToolMetadata already computed inside _ensureMessagesLoaded; S.toolCalls set there.) updateQueueBadge(sid); - S.messages=data.session.messages||[]; - const pendingMsg=typeof getPendingSessionMessage==='function'?getPendingSessionMessage(data.session):null; + + // Attach pending user message if one is queued. + const pendingMsg=typeof getPendingSessionMessage==='function'?getPendingSessionMessage(S.session):null; if(pendingMsg) S.messages.push(pendingMsg); - // Prefer reconstructing cards from per-message tool metadata when available. - // Fall back to persisted session summaries for older sessions that only - // saved session.tool_calls and bare role=tool results. - if(!hasMessageToolMetadata&&data.session.tool_calls&&data.session.tool_calls.length){ - S.toolCalls=(data.session.tool_calls||[]).map(tc=>({...tc,done:true})); - }else{ - S.toolCalls=[]; - } - clearLiveToolCards(); + if(activeStreamId){ S.busy=true; S.activeStreamId=activeStreamId; @@ -202,13 +226,9 @@ async function loadSession(sid){ updateQueueBadge(sid); startApprovalPolling(sid); if(typeof startClarifyPolling==='function') startClarifyPolling(sid); - if(typeof attachLiveStream==='function') attachLiveStream(sid, activeStreamId, data.session.pending_attachments||[], {reconnecting:true}); + if(typeof attachLiveStream==='function') attachLiveStream(sid, activeStreamId, S.session.pending_attachments||[], {reconnecting:true}); else if(typeof watchInflightSession==='function') watchInflightSession(sid, activeStreamId); }else{ - // Reset per-session visual state: the viewed session is idle even if another - // session's stream is still running in the background. - // We directly update the DOM instead of calling setBusy(false), because - // setBusy(false) drains the viewed session's queued follow-up turns. S.busy=false; S.activeStreamId=null; updateSendBtn(); @@ -219,6 +239,7 @@ async function loadSession(sid){ syncTopbar();renderMessages();highlightCode();loadDir('.'); } } + // Sync context usage indicator from session data const _s=S.session; if(_s&&typeof _syncCtxIndicator==='function'){ @@ -235,6 +256,34 @@ async function loadSession(sid){ } } +// Load session messages if not already present. +// Called after loadSession fetches metadata (messages=0). +// Idempotent: if messages are already in S.messages, resolves immediately. +// Handles streaming sessions specially: restores from INFLIGHT cache or API. +async function _ensureMessagesLoaded(sid) { + // Already have messages? (e.g. from INFLIGHT restore path, already set) + if (S.messages && S.messages.length > 0 && S.messages[0] && S.messages[0].role) { + return; + } + // Fetch full session with messages + const data = await api(`/api/session?session_id=${encodeURIComponent(sid)}&messages=1`); + const msgs = (data.session.messages || []).filter(m => m && m.role); + // Check for tool-call metadata on messages (for tool-call card rendering) + const hasMessageToolMetadata = msgs.some(m => { + if (!m || m.role !== 'assistant') return false; + const hasTc = Array.isArray(m.tool_calls) && m.tool_calls.length > 0; + const hasTu = Array.isArray(m.content) && m.content.some(p => p && p.type === 'tool_use'); + return hasTc || hasTu; + }); + if (!hasMessageToolMetadata && data.session.tool_calls && data.session.tool_calls.length) { + S.toolCalls = data.session.tool_calls.map(tc => ({...tc, done: true})); + } else { + S.toolCalls = []; + } + clearLiveToolCards(); + S.messages = msgs; +} + let _allSessions = []; // cached for search filter let _renamingSid = null; // session_id currently being renamed (blocks list re-renders) let _showArchived = false; // toggle to show archived sessions