diff --git a/api/models.py b/api/models.py index 6e6b5115..1e33ad30 100644 --- a/api/models.py +++ b/api/models.py @@ -366,7 +366,7 @@ class Session: return SESSION_DIR / f'{self.session_id}.json' def save(self, touch_updated_at: bool = True, skip_index: bool = False) -> None: - # ── #1557 P0 guard ────────────────────────────────────────────── + # ── #1558 P0 guard ────────────────────────────────────────────── # Refuse to save a session that was loaded with metadata_only=True. # Such sessions have messages=[] (it's the whole point of the partial # load), and save() unconditionally writes self.messages to disk via @@ -381,7 +381,7 @@ class Session: f"Refusing to save metadata-only session {self.session_id!r}: " f"would atomically overwrite on-disk messages with []. " f"Reload with metadata_only=False before mutating state. " - f"See #1557." + f"See #1558." ) if touch_updated_at: self.updated_at = time.time() @@ -409,14 +409,14 @@ class Session: and not k.startswith('_')} payload = json.dumps({**meta, **extra}, ensure_ascii=False, indent=2) - # ── #1557 backup safeguard ────────────────────────────────────── + # ── #1558 backup safeguard ────────────────────────────────────── # Before overwriting the session file, copy the previous version to # ``.json.bak`` IFF the previous file has more messages than the # incoming payload. The asymmetric guard means: # * Normal grow-the-conversation saves never produce a backup # (incoming messages >= existing) — keeps disk overhead near zero. # * Any save that would shrink the messages array (the failure mode - # of #1557, plus anything similar in the future) leaves a recoverable + # of #1558, plus anything similar in the future) leaves a recoverable # snapshot of the pre-shrink state on disk. # The recovery path is api/session_recovery.py — at server startup and # via /api/session/recover, sessions whose JSON has fewer messages than @@ -497,7 +497,7 @@ class Session: # on-disk JSON with messages=[], wiping the conversation. Any # caller that needs to mutate persisted state on a metadata-only # session must reload it with metadata_only=False first. - # See #1557 — v0.50.279 _clear_stale_stream_state() data-loss bug. + # See #1558 — v0.50.279 _clear_stale_stream_state() data-loss bug. session._loaded_metadata_only = True return session except Exception: diff --git a/api/routes.py b/api/routes.py index 68045c1e..577881ea 100644 --- a/api/routes.py +++ b/api/routes.py @@ -328,7 +328,7 @@ def _clear_stale_stream_state(session) -> bool: session JSON while STREAMS is empty. The frontend then keeps reconnecting to a dead stream and shows a permanent running/thinking state. - SAFETY (#1557): If ``session`` was loaded with ``metadata_only=True``, its + SAFETY (#1558): If ``session`` was loaded with ``metadata_only=True``, its ``messages`` array is empty by design and calling ``save()`` would atomically overwrite the on-disk JSON, wiping the conversation. In that case we re-load the full session before mutating, so the persisted @@ -356,7 +356,7 @@ def _clear_stale_stream_state(session) -> bool: logger.warning( "_clear_stale_stream_state: refused to clear stale stream %s " "for session %s — full reload failed and we will not save a " - "metadata-only stub. See #1557.", + "metadata-only stub. See #1558.", stream_id, getattr(session, "session_id", "?"), ) return False diff --git a/api/session_recovery.py b/api/session_recovery.py index ce520cfe..565297ba 100644 --- a/api/session_recovery.py +++ b/api/session_recovery.py @@ -1,6 +1,6 @@ """ Session recovery from .bak snapshots — last line of defense against -data-loss bugs like #1557. +data-loss bugs like #1558. ``Session.save()`` writes a ``.json.bak`` snapshot of the previous state whenever an incoming save would shrink the messages array. This @@ -100,7 +100,7 @@ def recover_session(session_path: Path) -> dict: return {**status, "restored": False, "error": str(exc)} logger.warning( "recover_session: restored %s from .bak (live=%d → bak=%d messages). " - "See #1557 for the data-loss class this guards against.", + "See #1558 for the data-loss class this guards against.", session_path.name, status["live_messages"], status["bak_messages"], ) return {**status, "restored": True} @@ -126,6 +126,6 @@ def recover_all_sessions_on_startup(session_dir: Path) -> dict: logger.warning( "recover_all_sessions_on_startup: restored %d/%d sessions from .bak. " "If you weren't expecting this, check the session list for missing " - "messages — see #1557.", restored, scanned, + "messages — see #1558.", restored, scanned, ) return {"scanned": scanned, "restored": restored, "details": details} diff --git a/server.py b/server.py index 6bb5bdf1..2aee95cb 100644 --- a/server.py +++ b/server.py @@ -110,15 +110,15 @@ def main() -> None: # Fix sensitive file permissions before doing anything else fix_credential_permissions() - # ── #1557 startup self-heal ───────────────────────────────────────── + # ── #1558 startup self-heal ───────────────────────────────────────── # If a previous process wrote a session JSON with fewer messages than - # its .bak (the data-loss shape #1557 produced), restore from the .bak. + # its .bak (the data-loss shape #1558 produced), restore from the .bak. # Safe to run unconditionally — a clean install is a no-op. try: from api.session_recovery import recover_all_sessions_on_startup result = recover_all_sessions_on_startup(SESSION_DIR) if result.get("restored"): - print(f"[recovery] Restored {result['restored']}/{result['scanned']} sessions from .bak (see #1557).", flush=True) + print(f"[recovery] Restored {result['restored']}/{result['scanned']} sessions from .bak (see #1558).", flush=True) except Exception as exc: # Recovery is best-effort; never block server startup. print(f"[recovery] startup recovery failed: {exc}", flush=True) diff --git a/tests/test_metadata_save_wipe_1557.py b/tests/test_metadata_save_wipe_1558.py similarity index 95% rename from tests/test_metadata_save_wipe_1557.py rename to tests/test_metadata_save_wipe_1558.py index 176b8ca6..b624efc5 100644 --- a/tests/test_metadata_save_wipe_1557.py +++ b/tests/test_metadata_save_wipe_1558.py @@ -1,5 +1,5 @@ """ -P0 regression test for the metadata-only save-wipe (#1557). +P0 regression test for the metadata-only save-wipe (#1558). Before this fix, `_clear_stale_stream_state()` could be called on a session loaded with `metadata_only=True` (which means messages=[]). That handler called @@ -63,7 +63,7 @@ def _make_session_on_disk(session_dir, sid="s_test_1557", n_msgs=1000, with_acti def test_metadata_only_save_raises_to_prevent_wipe(temp_session_dir): - """Direct test of the #1557 guard: save() must refuse to wipe on-disk messages.""" + """Direct test of the #1558 guard: save() must refuse to wipe on-disk messages.""" from api.models import get_session sid = _make_session_on_disk(temp_session_dir, n_msgs=1000) @@ -76,7 +76,7 @@ def test_metadata_only_save_raises_to_prevent_wipe(temp_session_dir): assert len(s.messages) == 0, "metadata-only load synthesizes empty messages — that's its job" assert getattr(s, "_loaded_metadata_only", False) is True, ( "load_metadata_only() must set the _loaded_metadata_only flag so save() " - "knows to refuse this save and prevent #1557 data-loss." + "knows to refuse this save and prevent #1558 data-loss." ) # Mutate as the buggy code path did, then attempt to save. @@ -94,7 +94,7 @@ def test_metadata_only_save_raises_to_prevent_wipe(temp_session_dir): def test_clear_stale_stream_state_preserves_messages(temp_session_dir): - """High-level: the production trigger from #1557 must NOT wipe messages.""" + """High-level: the production trigger from #1558 must NOT wipe messages.""" from api.models import get_session sid = _make_session_on_disk(temp_session_dir, n_msgs=1000, with_active_stream=True) @@ -120,7 +120,7 @@ def test_clear_stale_stream_state_preserves_messages(temp_session_dir): raw = json.loads((temp_session_dir / f"{sid}.json").read_text(encoding="utf-8")) assert len(raw["messages"]) >= 1000, ( f"_clear_stale_stream_state() shrank messages to {len(raw['messages'])} — " - "see #1557. It must clear the stream flags WITHOUT losing existing messages." + "see #1558. It must clear the stream flags WITHOUT losing existing messages." ) # And the stream flag must actually be cleared (whether by _repair_stale_pending # during the reload or by the explicit clear afterwards). @@ -136,7 +136,7 @@ def test_save_writes_bak_when_messages_shrink(temp_session_dir): sid = _make_session_on_disk(temp_session_dir, n_msgs=1000, with_active_stream=False) # Build a fresh in-memory Session with a smaller messages array, then save — - # this models the precise failure shape of #1557 (a caller mutates messages + # this models the precise failure shape of #1558 (a caller mutates messages # downward and saves). We construct the Session directly rather than going # through get_session() so we don't trigger _repair_stale_pending side-effects. s = Session( @@ -150,7 +150,7 @@ def test_save_writes_bak_when_messages_shrink(temp_session_dir): bak_path = temp_session_dir / f"{sid}.json.bak" assert bak_path.exists(), ( - "save() that shrinks messages must leave a .bak — #1557 backup safeguard." + "save() that shrinks messages must leave a .bak — #1558 backup safeguard." ) bak_data = json.loads(bak_path.read_text(encoding="utf-8")) assert len(bak_data["messages"]) == 1000, (