diff --git a/api/helpers.py b/api/helpers.py
index 631ad35f..be137f89 100644
--- a/api/helpers.py
+++ b/api/helpers.py
@@ -55,6 +55,12 @@ def _security_headers(handler):
     )
 
 
+def _accepts_gzip(handler) -> bool:
+    """Check if the client accepts gzip encoding."""
+    ae = handler.headers.get('Accept-Encoding', '')
+    return 'gzip' in ae
+
+
 def j(handler, payload, status: int=200, extra_headers: dict=None) -> None:
     """Send a JSON response.
 
@@ -64,6 +70,15 @@ def j(handler, payload, status: int=200, extra_headers: dict=None) -> None:
     body = _json.dumps(payload, ensure_ascii=False, indent=2).encode('utf-8')
     handler.send_response(status)
     handler.send_header('Content-Type', 'application/json; charset=utf-8')
+
+    # Gzip-compress responses over 1KB when the client accepts it.
+    # Typical JSON API responses compress 70-80%, giving a big speedup
+    # for large payloads (session history, message lists).
+    if _accepts_gzip(handler) and len(body) > 1024:
+        import gzip
+        body = gzip.compress(body, compresslevel=4)
+        handler.send_header('Content-Encoding', 'gzip')
+
     handler.send_header('Content-Length', str(len(body)))
     handler.send_header('Cache-Control', 'no-store')
     _security_headers(handler)
diff --git a/api/models.py b/api/models.py
index 05feeeb7..eb65d78a 100644
--- a/api/models.py
+++ b/api/models.py
@@ -239,7 +239,24 @@ class Session:
     def save(self, touch_updated_at: bool = True, skip_index: bool = False) -> None:
         if touch_updated_at:
             self.updated_at = time.time()
-        payload = json.dumps(self.__dict__, ensure_ascii=False, indent=2)
+        # Write metadata fields first so load_metadata_only() can read them
+        # without parsing the full messages array (which may be 400KB+).
+        # Fields are listed in the order they should appear in the JSON file.
+        METADATA_FIELDS = [
+            'session_id', 'title', 'workspace', 'model', 'created_at', 'updated_at',
+            'pinned', 'archived', 'project_id', 'profile',
+            'input_tokens', 'output_tokens', 'estimated_cost',
+            'personality', 'active_stream_id',
+            'pending_user_message', 'pending_attachments', 'pending_started_at',
+            'compression_anchor_visible_idx', 'compression_anchor_message_key',
+        ]
+        meta = {k: getattr(self, k, None) for k in METADATA_FIELDS}
+        meta['messages'] = self.messages
+        meta['tool_calls'] = self.tool_calls
+        # Fields not in METADATA_FIELDS (e.g. last_usage, message_count) go at the end
+        extra = {k: v for k, v in self.__dict__.items()
+                 if k not in METADATA_FIELDS and k not in ('messages', 'tool_calls')}
+        payload = json.dumps({**meta, **extra}, ensure_ascii=False, indent=2)
         tmp = self.path.with_suffix(f'.tmp.{os.getpid()}.{threading.current_thread().ident}')
         try:
             with open(tmp, 'w', encoding='utf-8') as f:
@@ -266,6 +283,46 @@ class Session:
             return None
         return cls(**json.loads(p.read_text(encoding='utf-8')))
 
+    @classmethod
+    def load_metadata_only(cls, sid):
+        """Load only the compact metadata fields, skipping the messages array.
+
+        Session JSON files have metadata fields (session_id, title, model, etc.)
+        at the top level, before the large messages array. We read only the
+        first ~1KB — enough to capture all compact() fields — then parse just
+        that prefix. Falls back to load() if the prefix doesn't contain enough
+        fields or if the file is unexpectedly small.
+        """
+        if not sid or not all(c in '0123456789abcdefghijklmnopqrstuvwxyz_' for c in sid):
+            return None
+        p = SESSION_DIR / f'{sid}.json'
+        if not p.exists():
+            return None
+        try:
+            # Read just the first 1 KB — metadata comes before messages array
+            with open(p, 'r', encoding='utf-8') as f:
+                prefix = f.read(1024)
+            if not prefix:
+                return cls.load(sid)
+            parsed = json.loads(prefix)
+            # Verify we got the essential fields.
+            # With metadata-first save() ordering, messages appears at byte ~567.
+            # For sessions <= ~512 bytes total the entire messages array fits in the
+            # first 1 KB and we get a valid list. For larger sessions json.loads
+            # fails on the truncated buffer (unterminated string), so we fall back
+            # to full load. The one exception is a truncation inside a string value
+            # that happens to produce valid JSON with a truncated string — guard
+            # against that by requiring messages to be a list.
+            needed = {'session_id', 'title', 'created_at', 'updated_at'}
+            if not needed.issubset(parsed.keys()):
+                return cls.load(sid)
+            if not isinstance(parsed.get('messages'), list):
+                return cls.load(sid)
+            return cls(**parsed)
+        except Exception:
+            # Corrupt prefix or decode error — fall back to full load
+            return cls.load(sid)
+
     def compact(self, include_runtime=False, active_stream_ids=None) -> dict:
         active_stream_ids = active_stream_ids if active_stream_ids is not None else set()
         return {
@@ -292,12 +349,21 @@ class Session:
             ) if include_runtime else False,
         }
 
-def get_session(sid):
+def get_session(sid, metadata_only=False):
+    """Load a session, optionally with metadata only (skipping the messages array).
+
+    When metadata_only=True the session is still cached so the full load on the
+    next access is fast. Use this when you only need compact() metadata and not
+    the actual message history (e.g., for fast sidebar switching).
+    """
     with LOCK:
         if sid in SESSIONS:
             SESSIONS.move_to_end(sid)  # LRU: mark as recently used
             return SESSIONS[sid]
-    s = Session.load(sid)
+    if metadata_only:
+        s = Session.load_metadata_only(sid)
+    else:
+        s = Session.load(sid)
     if s:
         with LOCK:
             SESSIONS[sid] = s
diff --git a/api/routes.py b/api/routes.py
index e86f310c..31bd0377 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -676,23 +676,47 @@ def handle_get(handler, parsed) -> bool:
         return _serve_static(handler, parsed)
 
     if parsed.path == "/api/session":
+        import time as _time
+        _t0 = _time.monotonic()
+        _debug_slow = os.environ.get("HERMES_DEBUG_SLOW", "")
         sid = parse_qs(parsed.query).get("session_id", [""])[0]
         if not sid:
             return j(handler, {"error": "session_id is required"}, status=400)
+        # ?messages=0 skips the message payload for fast session switching.
+        # The frontend uses this when switching conversations in the sidebar
+        # (only needs metadata). The full message array is loaded lazily
+        # via ?messages=1 when the message panel opens.
+        load_messages = parse_qs(parsed.query).get("messages", ["1"])[0] != "0"
         try:
-            s = get_session(sid)
+            _t1 = _time.monotonic()
+            s = get_session(sid, metadata_only=(not load_messages))
+            _t2 = _time.monotonic()
             effective_model = _resolve_effective_session_model_for_display(s)
+            _t3 = _time.monotonic()
             raw = s.compact() | {
-                "messages": s.messages,
-                "tool_calls": getattr(s, "tool_calls", []),
+                "messages": s.messages if load_messages else [],
+                "tool_calls": getattr(s, "tool_calls", []) if load_messages else [],
                 "active_stream_id": getattr(s, "active_stream_id", None),
                 "pending_user_message": getattr(s, "pending_user_message", None),
-                "pending_attachments": getattr(s, "pending_attachments", []),
+                "pending_attachments": getattr(s, "pending_attachments", []) if load_messages else [],
                 "pending_started_at": getattr(s, "pending_started_at", None),
             }
+            _t4 = _time.monotonic()
             if effective_model:
                 raw["model"] = effective_model
-            return j(handler, {"session": redact_session_data(raw)})
+            redact = redact_session_data(raw)
+            _t5 = _time.monotonic()
+            resp = j(handler, {"session": redact})
+            _t6 = _time.monotonic()
+            if _debug_slow:
+                logger.warning(
+                    "[SLOW] session_id=%s get_session=%.1fms model_resolve=%.1fms "
+                    "compact=%.1fms redact=%.1fms json_write=%.1fms total=%.1fms",
+                    sid,
+                    (_t2-_t1)*1000, (_t3-_t2)*1000, (_t4-_t3)*1000,
+                    (_t5-_t4)*1000, (_t6-_t5)*1000, (_t6-_t0)*1000,
+                )
+            return resp
         except KeyError:
             # Not a WebUI session -- try CLI store
             msgs = get_cli_session_messages(sid)
@@ -1079,6 +1103,19 @@ def handle_post(handler, parsed) -> bool:
         except RuntimeError as e:
             return bad(handler, str(e), 500)
 
+    if parsed.path == "/api/admin/reload":
+        # Hot-reload api.models module to pick up code changes without restart.
+        import importlib
+        from api import models as _models
+        importlib.reload(_models)
+        # Also re-expose get_session from the reloaded module so routes.py
+        # continues to work (routes.py imported it at module level).
+        import api.routes as _routes
+        _routes.get_session = _models.get_session
+        _routes.Session = _models.Session
+        _routes.compact = _models.compact
+        return j(handler, {"status": "ok", "reloaded": "api.models"})
+
     if parsed.path == "/api/sessions/cleanup":
         return _handle_sessions_cleanup(handler, body, zero_only=False)
 
diff --git a/api/streaming.py b/api/streaming.py
index f743381e..aa5fbca6 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -1350,6 +1350,11 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
                         logger.debug("Periodic checkpoint save failed: %s", e)
 
             _checkpoint_stop = threading.Event()
+            # Persist the user message BEFORE streaming starts so it's durable even if
+            # the server crashes before the first checkpoint fires (every 15s).
+            with _agent_lock:
+                s.save(touch_updated_at=True, skip_index=False)
+
             _ckpt_thread = threading.Thread(
                 target=_periodic_checkpoint, daemon=True,
                 name=f"ckpt-{session_id[:8]}",
diff --git a/static/sessions.js b/static/sessions.js
index a3071407..6bc3278d 100644
--- a/static/sessions.js
+++ b/static/sessions.js
@@ -96,31 +96,51 @@ async function loadSession(sid){
   stopApprovalPolling();hideApprovalCard();
   if(typeof stopClarifyPolling==='function') stopClarifyPolling();
   if(typeof hideClarifyCard==='function') hideClarifyCard();
-  const data=await api(`/api/session?session_id=${encodeURIComponent(sid)}`);
+  // Show loading indicator immediately for responsiveness.
+  // Cleared by renderMessages() once full session data arrives.
+  const currentSid = S.session ? S.session.session_id : null;
+  if (currentSid !== sid) {
+    S.messages = [];
+    S.toolCalls = [];
+    const _msgInner = $('msgInner');
+    if (_msgInner) _msgInner.innerHTML = '<div style="display:flex;align-items:center;justify-content:center;height:100%;color:var(--text-muted);font-size:14px;padding:40px;text-align:center;">Loading conversation...</div>';
+  }
+  // Phase 1: Load metadata only (~1KB) for fast session switching.
+  // Guard against network/server failures to prevent a permanently stuck loading state.
+  let data;
+  try {
+    data = await api(`/api/session?session_id=${encodeURIComponent(sid)}&messages=0`);
+  } catch(e) {
+    const _msgInner = $('msgInner');
+    if (_msgInner) {
+      _msgInner.innerHTML = '<div style="display:flex;align-items:center;justify-content:center;height:100%;color:var(--text-muted);font-size:14px;padding:40px;text-align:center;">Failed to load session. Try switching sessions or refreshing.</div>';
+    }
+    if (typeof showToast === 'function') showToast('Failed to load session', 3000, 'error');
+    return;
+  }
   S.session=data.session;
   S.lastUsage={...(data.session.last_usage||{})};
   _setSessionViewedCount(S.session.session_id, Number(data.session.message_count || 0));
   localStorage.setItem('hermes-webui-session',S.session.session_id);
-  data.session.messages = (data.session.messages || []).filter(m => m && m.role);
-  const hasMessageToolMetadata = (data.session.messages || []).some(m => {
-    if (!m || m.role !== 'assistant') return false;
-    const hasTc = Array.isArray(m.tool_calls) && m.tool_calls.length > 0;
-    const hasTu = Array.isArray(m.content) && m.content.some(p => p && p.type === 'tool_use');
-    return hasTc || hasTu;
-  });
-  const activeStreamId=data.session.active_stream_id||null;
+
+  const activeStreamId=S.session.active_stream_id||null;
+
+  // Phase 2a: If session is streaming, restore from INFLIGHT cache before
+  // loading full messages (INFLIGHT state is self-contained and sufficient).
   if(!INFLIGHT[sid]&&activeStreamId&&typeof loadInflightState==='function'){
     const stored=loadInflightState(sid, activeStreamId);
     if(stored){
       INFLIGHT[sid]={
-        messages:Array.isArray(stored.messages)&&stored.messages.length?stored.messages:[...(data.session.messages||[])],
-        uploaded:Array.isArray(stored.uploaded)?stored.uploaded:[...(data.session.pending_attachments||[])],
+        messages:Array.isArray(stored.messages)&&stored.messages.length?stored.messages:[],
+        uploaded:Array.isArray(stored.uploaded)?stored.uploaded:[],
         toolCalls:Array.isArray(stored.toolCalls)?stored.toolCalls:[],
         reattach:true,
       };
     }
   }
+
   if(INFLIGHT[sid]){
+    // Streaming session: use cached INFLIGHT messages (already has pending assistant output).
     S.messages=INFLIGHT[sid].messages;
     S.toolCalls=(INFLIGHT[sid].toolCalls||[]);
     S.busy=true;
@@ -137,29 +157,38 @@ async function loadSession(sid){
     const _cb=$('btnCancel');if(_cb&&activeStreamId)_cb.style.display='inline-flex';
     if(INFLIGHT[sid].reattach&&activeStreamId&&typeof attachLiveStream==='function'){
       INFLIGHT[sid].reattach=false;
-      attachLiveStream(sid, activeStreamId, data.session.pending_attachments||[], {reconnecting:true});
+      attachLiveStream(sid, activeStreamId, S.session.pending_attachments||[], {reconnecting:true});
     }
   }else{
+    // Phase 2b: Idle session — load full messages lazily for rendering.
+    // _ensureMessagesLoaded is idempotent; it skips if S.messages already populated.
+    try {
+      await _ensureMessagesLoaded(sid);
+    } catch (e) {
+      // Network errors, server failures, or SSE drops (Chrome error codes 4/5)
+      // can cause _ensureMessagesLoaded to throw. Without a try/catch here the
+      // "Loading conversation..." div injected at the top of loadSession would
+      // persist forever with no recovery path.
+      const _msgInner = $('msgInner');
+      if (_msgInner) {
+        _msgInner.innerHTML = '<div style="display:flex;align-items:center;justify-content:center;height:100%;color:var(--text-muted);font-size:14px;padding:40px;text-align:center;">Failed to load messages. Try switching sessions or refreshing.</div>';
+      }
+      if (typeof showToast === 'function') showToast('Failed to load conversation messages', 3000, 'error');
+      return;
+    }
+
     // Restore any queued message that survived page refresh via sessionStorage.
-    // Only restore when the agent is idle — if active, the done handler drains it.
     if(typeof queueSessionMessage==='function'){
       try{
         const _storedQ=sessionStorage.getItem('hermes-queue-'+sid);
         if(_storedQ){
           const _entries=JSON.parse(_storedQ);
           if(Array.isArray(_entries)&&_entries.length){
-            // Timestamp guard: drop entries older than the last assistant response
-            // (means the agent already ran and the queue was already dispatched)
-            const _lastMsg=(data.session.messages||[]).slice().reverse()
+            const _lastMsg=S.messages.slice().reverse()
               .find(m=>m&&m.role==='assistant');
             const _lastAsst=_lastMsg?(_lastMsg.timestamp||_lastMsg._ts||0)*1000:0;
             const _fresh=_entries.filter(e=>!e._queued_at||e._queued_at>_lastAsst);
             if(_fresh.length){
-              // Idle path: restore the first entry as a composer draft only. Do NOT
-              // re-enqueue into SESSION_QUEUES — if we did, send() would dispatch the
-              // draft directly (S.busy=false) and then setBusy(false) would drain the
-              // same entry from the queue, causing a duplicate send. Any follow-up
-              // entries (2..N) are discarded by design; the toast tells the user so.
               const _first=_fresh[0];
               const _msg=$&&$('msg');
               if(_msg&&_first.text&&!_msg.value){
@@ -167,7 +196,6 @@ async function loadSession(sid){
                 if(typeof autoResize==='function') autoResize();
                 if(typeof showToast==='function') showToast((_fresh.length>1?`${_fresh.length} queued messages restored (showing first)`:'Queued message restored')+' — review and send when ready');
               }
-              // Clear persisted queue now that the draft is in the composer
               sessionStorage.removeItem('hermes-queue-'+sid);
             } else {
               sessionStorage.removeItem('hermes-queue-'+sid);
@@ -178,19 +206,15 @@ async function loadSession(sid){
         }
       }catch(_){sessionStorage.removeItem('hermes-queue-'+sid);}
     }
+
+    // Reconstruct tool calls from message metadata, or fall back to session-level summary.
+    // (hasMessageToolMetadata already computed inside _ensureMessagesLoaded; S.toolCalls set there.)
     updateQueueBadge(sid);
-    S.messages=data.session.messages||[];
-    const pendingMsg=typeof getPendingSessionMessage==='function'?getPendingSessionMessage(data.session):null;
+
+    // Attach pending user message if one is queued.
+    const pendingMsg=typeof getPendingSessionMessage==='function'?getPendingSessionMessage(S.session):null;
     if(pendingMsg) S.messages.push(pendingMsg);
-    // Prefer reconstructing cards from per-message tool metadata when available.
-    // Fall back to persisted session summaries for older sessions that only
-    // saved session.tool_calls and bare role=tool results.
-    if(!hasMessageToolMetadata&&data.session.tool_calls&&data.session.tool_calls.length){
-      S.toolCalls=(data.session.tool_calls||[]).map(tc=>({...tc,done:true}));
-    }else{
-      S.toolCalls=[];
-    }
-    clearLiveToolCards();
+
     if(activeStreamId){
       S.busy=true;
       S.activeStreamId=activeStreamId;
@@ -202,13 +226,9 @@ async function loadSession(sid){
       updateQueueBadge(sid);
       startApprovalPolling(sid);
       if(typeof startClarifyPolling==='function') startClarifyPolling(sid);
-      if(typeof attachLiveStream==='function') attachLiveStream(sid, activeStreamId, data.session.pending_attachments||[], {reconnecting:true});
+      if(typeof attachLiveStream==='function') attachLiveStream(sid, activeStreamId, S.session.pending_attachments||[], {reconnecting:true});
       else if(typeof watchInflightSession==='function') watchInflightSession(sid, activeStreamId);
     }else{
-      // Reset per-session visual state: the viewed session is idle even if another
-      // session's stream is still running in the background.
-      // We directly update the DOM instead of calling setBusy(false), because
-      // setBusy(false) drains the viewed session's queued follow-up turns.
       S.busy=false;
       S.activeStreamId=null;
       updateSendBtn();
@@ -219,6 +239,7 @@ async function loadSession(sid){
       syncTopbar();renderMessages();highlightCode();loadDir('.');
     }
   }
+
   // Sync context usage indicator from session data
   const _s=S.session;
   if(_s&&typeof _syncCtxIndicator==='function'){
@@ -235,6 +256,34 @@ async function loadSession(sid){
   }
 }
 
+// Load session messages if not already present.
+// Called after loadSession fetches metadata (messages=0).
+// Idempotent: if messages are already in S.messages, resolves immediately.
+// Handles streaming sessions specially: restores from INFLIGHT cache or API.
+async function _ensureMessagesLoaded(sid) {
+  // Already have messages? (e.g. from INFLIGHT restore path, already set)
+  if (S.messages && S.messages.length > 0 && S.messages[0] && S.messages[0].role) {
+    return;
+  }
+  // Fetch full session with messages
+  const data = await api(`/api/session?session_id=${encodeURIComponent(sid)}&messages=1`);
+  const msgs = (data.session.messages || []).filter(m => m && m.role);
+  // Check for tool-call metadata on messages (for tool-call card rendering)
+  const hasMessageToolMetadata = msgs.some(m => {
+    if (!m || m.role !== 'assistant') return false;
+    const hasTc = Array.isArray(m.tool_calls) && m.tool_calls.length > 0;
+    const hasTu = Array.isArray(m.content) && m.content.some(p => p && p.type === 'tool_use');
+    return hasTc || hasTu;
+  });
+  if (!hasMessageToolMetadata && data.session.tool_calls && data.session.tool_calls.length) {
+    S.toolCalls = data.session.tool_calls.map(tc => ({...tc, done: true}));
+  } else {
+    S.toolCalls = [];
+  }
+  clearLiveToolCards();
+  S.messages = msgs;
+}
+
 let _allSessions = [];  // cached for search filter
 let _renamingSid = null;  // session_id currently being renamed (blocks list re-renders)
 let _showArchived = false;  // toggle to show archived sessions