diff --git a/api/helpers.py b/api/helpers.py index 631ad35f..be137f89 100644 --- a/api/helpers.py +++ b/api/helpers.py @@ -55,6 +55,12 @@ def _security_headers(handler): ) +def _accepts_gzip(handler) -> bool: + """Check if the client accepts gzip encoding.""" + ae = handler.headers.get('Accept-Encoding', '') + return 'gzip' in ae + + def j(handler, payload, status: int=200, extra_headers: dict=None) -> None: """Send a JSON response. @@ -64,6 +70,15 @@ def j(handler, payload, status: int=200, extra_headers: dict=None) -> None: body = _json.dumps(payload, ensure_ascii=False, indent=2).encode('utf-8') handler.send_response(status) handler.send_header('Content-Type', 'application/json; charset=utf-8') + + # Gzip-compress responses over 1KB when the client accepts it. + # Typical JSON API responses compress 70-80%, giving a big speedup + # for large payloads (session history, message lists). + if _accepts_gzip(handler) and len(body) > 1024: + import gzip + body = gzip.compress(body, compresslevel=4) + handler.send_header('Content-Encoding', 'gzip') + handler.send_header('Content-Length', str(len(body))) handler.send_header('Cache-Control', 'no-store') _security_headers(handler) diff --git a/api/models.py b/api/models.py index 05feeeb7..eb65d78a 100644 --- a/api/models.py +++ b/api/models.py @@ -239,7 +239,24 @@ class Session: def save(self, touch_updated_at: bool = True, skip_index: bool = False) -> None: if touch_updated_at: self.updated_at = time.time() - payload = json.dumps(self.__dict__, ensure_ascii=False, indent=2) + # Write metadata fields first so load_metadata_only() can read them + # without parsing the full messages array (which may be 400KB+). + # Fields are listed in the order they should appear in the JSON file. + METADATA_FIELDS = [ + 'session_id', 'title', 'workspace', 'model', 'created_at', 'updated_at', + 'pinned', 'archived', 'project_id', 'profile', + 'input_tokens', 'output_tokens', 'estimated_cost', + 'personality', 'active_stream_id', + 'pending_user_message', 'pending_attachments', 'pending_started_at', + 'compression_anchor_visible_idx', 'compression_anchor_message_key', + ] + meta = {k: getattr(self, k, None) for k in METADATA_FIELDS} + meta['messages'] = self.messages + meta['tool_calls'] = self.tool_calls + # Fields not in METADATA_FIELDS (e.g. last_usage, message_count) go at the end + extra = {k: v for k, v in self.__dict__.items() + if k not in METADATA_FIELDS and k not in ('messages', 'tool_calls')} + payload = json.dumps({**meta, **extra}, ensure_ascii=False, indent=2) tmp = self.path.with_suffix(f'.tmp.{os.getpid()}.{threading.current_thread().ident}') try: with open(tmp, 'w', encoding='utf-8') as f: @@ -266,6 +283,46 @@ class Session: return None return cls(**json.loads(p.read_text(encoding='utf-8'))) + @classmethod + def load_metadata_only(cls, sid): + """Load only the compact metadata fields, skipping the messages array. + + Session JSON files have metadata fields (session_id, title, model, etc.) + at the top level, before the large messages array. We read only the + first ~1KB — enough to capture all compact() fields — then parse just + that prefix. Falls back to load() if the prefix doesn't contain enough + fields or if the file is unexpectedly small. + """ + if not sid or not all(c in '0123456789abcdefghijklmnopqrstuvwxyz_' for c in sid): + return None + p = SESSION_DIR / f'{sid}.json' + if not p.exists(): + return None + try: + # Read just the first 1 KB — metadata comes before messages array + with open(p, 'r', encoding='utf-8') as f: + prefix = f.read(1024) + if not prefix: + return cls.load(sid) + parsed = json.loads(prefix) + # Verify we got the essential fields. + # With metadata-first save() ordering, messages appears at byte ~567. + # For sessions <= ~512 bytes total the entire messages array fits in the + # first 1 KB and we get a valid list. For larger sessions json.loads + # fails on the truncated buffer (unterminated string), so we fall back + # to full load. The one exception is a truncation inside a string value + # that happens to produce valid JSON with a truncated string — guard + # against that by requiring messages to be a list. + needed = {'session_id', 'title', 'created_at', 'updated_at'} + if not needed.issubset(parsed.keys()): + return cls.load(sid) + if not isinstance(parsed.get('messages'), list): + return cls.load(sid) + return cls(**parsed) + except Exception: + # Corrupt prefix or decode error — fall back to full load + return cls.load(sid) + def compact(self, include_runtime=False, active_stream_ids=None) -> dict: active_stream_ids = active_stream_ids if active_stream_ids is not None else set() return { @@ -292,12 +349,21 @@ class Session: ) if include_runtime else False, } -def get_session(sid): +def get_session(sid, metadata_only=False): + """Load a session, optionally with metadata only (skipping the messages array). + + When metadata_only=True the session is still cached so the full load on the + next access is fast. Use this when you only need compact() metadata and not + the actual message history (e.g., for fast sidebar switching). + """ with LOCK: if sid in SESSIONS: SESSIONS.move_to_end(sid) # LRU: mark as recently used return SESSIONS[sid] - s = Session.load(sid) + if metadata_only: + s = Session.load_metadata_only(sid) + else: + s = Session.load(sid) if s: with LOCK: SESSIONS[sid] = s diff --git a/api/routes.py b/api/routes.py index e86f310c..31bd0377 100644 --- a/api/routes.py +++ b/api/routes.py @@ -676,23 +676,47 @@ def handle_get(handler, parsed) -> bool: return _serve_static(handler, parsed) if parsed.path == "/api/session": + import time as _time + _t0 = _time.monotonic() + _debug_slow = os.environ.get("HERMES_DEBUG_SLOW", "") sid = parse_qs(parsed.query).get("session_id", [""])[0] if not sid: return j(handler, {"error": "session_id is required"}, status=400) + # ?messages=0 skips the message payload for fast session switching. + # The frontend uses this when switching conversations in the sidebar + # (only needs metadata). The full message array is loaded lazily + # via ?messages=1 when the message panel opens. + load_messages = parse_qs(parsed.query).get("messages", ["1"])[0] != "0" try: - s = get_session(sid) + _t1 = _time.monotonic() + s = get_session(sid, metadata_only=(not load_messages)) + _t2 = _time.monotonic() effective_model = _resolve_effective_session_model_for_display(s) + _t3 = _time.monotonic() raw = s.compact() | { - "messages": s.messages, - "tool_calls": getattr(s, "tool_calls", []), + "messages": s.messages if load_messages else [], + "tool_calls": getattr(s, "tool_calls", []) if load_messages else [], "active_stream_id": getattr(s, "active_stream_id", None), "pending_user_message": getattr(s, "pending_user_message", None), - "pending_attachments": getattr(s, "pending_attachments", []), + "pending_attachments": getattr(s, "pending_attachments", []) if load_messages else [], "pending_started_at": getattr(s, "pending_started_at", None), } + _t4 = _time.monotonic() if effective_model: raw["model"] = effective_model - return j(handler, {"session": redact_session_data(raw)}) + redact = redact_session_data(raw) + _t5 = _time.monotonic() + resp = j(handler, {"session": redact}) + _t6 = _time.monotonic() + if _debug_slow: + logger.warning( + "[SLOW] session_id=%s get_session=%.1fms model_resolve=%.1fms " + "compact=%.1fms redact=%.1fms json_write=%.1fms total=%.1fms", + sid, + (_t2-_t1)*1000, (_t3-_t2)*1000, (_t4-_t3)*1000, + (_t5-_t4)*1000, (_t6-_t5)*1000, (_t6-_t0)*1000, + ) + return resp except KeyError: # Not a WebUI session -- try CLI store msgs = get_cli_session_messages(sid) @@ -1079,6 +1103,19 @@ def handle_post(handler, parsed) -> bool: except RuntimeError as e: return bad(handler, str(e), 500) + if parsed.path == "/api/admin/reload": + # Hot-reload api.models module to pick up code changes without restart. + import importlib + from api import models as _models + importlib.reload(_models) + # Also re-expose get_session from the reloaded module so routes.py + # continues to work (routes.py imported it at module level). + import api.routes as _routes + _routes.get_session = _models.get_session + _routes.Session = _models.Session + _routes.compact = _models.compact + return j(handler, {"status": "ok", "reloaded": "api.models"}) + if parsed.path == "/api/sessions/cleanup": return _handle_sessions_cleanup(handler, body, zero_only=False) diff --git a/api/streaming.py b/api/streaming.py index f743381e..aa5fbca6 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -1350,6 +1350,11 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta logger.debug("Periodic checkpoint save failed: %s", e) _checkpoint_stop = threading.Event() + # Persist the user message BEFORE streaming starts so it's durable even if + # the server crashes before the first checkpoint fires (every 15s). + with _agent_lock: + s.save(touch_updated_at=True, skip_index=False) + _ckpt_thread = threading.Thread( target=_periodic_checkpoint, daemon=True, name=f"ckpt-{session_id[:8]}", diff --git a/static/sessions.js b/static/sessions.js index a3071407..6bc3278d 100644 --- a/static/sessions.js +++ b/static/sessions.js @@ -96,31 +96,51 @@ async function loadSession(sid){ stopApprovalPolling();hideApprovalCard(); if(typeof stopClarifyPolling==='function') stopClarifyPolling(); if(typeof hideClarifyCard==='function') hideClarifyCard(); - const data=await api(`/api/session?session_id=${encodeURIComponent(sid)}`); + // Show loading indicator immediately for responsiveness. + // Cleared by renderMessages() once full session data arrives. + const currentSid = S.session ? S.session.session_id : null; + if (currentSid !== sid) { + S.messages = []; + S.toolCalls = []; + const _msgInner = $('msgInner'); + if (_msgInner) _msgInner.innerHTML = '