fix: tolerate unreadable gateway JSONL transcripts

2026-05-21 03:39:54 +00:00 · 2026-05-19 00:11:06 -07:00
parent 663ee14865
commit b8a9cbd18c
2 changed files with 44 additions and 11 deletions
@@ -1326,17 +1326,23 @@ class SessionStore:
        transcript_path = self.get_transcript_path(session_id)
        jsonl_messages = []
        if transcript_path.exists():
-            with open(transcript_path, "r", encoding="utf-8") as f:
-                for line in f:
-                    line = line.strip()
-                    if line:
-                        try:
-                            jsonl_messages.append(json.loads(line))
-                        except json.JSONDecodeError:
-                            logger.warning(
-                                "Skipping corrupt line in transcript %s: %s",
-                                session_id, line[:120],
-                            )
+            try:
+                with open(transcript_path, "r", encoding="utf-8") as f:
+                    for line in f:
+                        line = line.strip()
+                        if line:
+                            try:
+                                jsonl_messages.append(json.loads(line))
+                            except json.JSONDecodeError:
+                                logger.warning(
+                                    "Skipping corrupt line in transcript %s: %s",
+                                    session_id, line[:120],
+                                )
+            except OSError as e:
+                # JSONL is the legacy compatibility store. If it becomes
+                # unreadable, keep gateway recovery working by falling back to
+                # SQLite rows loaded above (or [] when no DB exists).
+                logger.debug("Failed to read JSONL transcript for %s: %s", session_id, e)

        # Prefer whichever source has more messages.
        #
@@ -1,5 +1,6 @@
 """Tests for gateway session management."""

+import builtins
 import json
 import pytest
 from pathlib import Path
@@ -688,6 +689,32 @@ class TestLoadTranscriptPreferLongerSource:
        # Should be the SQLite version (equal count → prefers SQLite)
        assert result[0]["content"] == "db-q"

+    def test_unreadable_jsonl_returns_sqlite(self, store_with_db, monkeypatch):
+        """Unreadable legacy JSONL must not hide valid SQLite history."""
+        sid = "unreadable_jsonl"
+        store_with_db._db.create_session(session_id=sid, source="gateway", model="m")
+        store_with_db._db.append_message(session_id=sid, role="user", content="db-q")
+        store_with_db._db.append_message(session_id=sid, role="assistant", content="db-a")
+
+        transcript_path = store_with_db.get_transcript_path(sid)
+        transcript_path.parent.mkdir(parents=True, exist_ok=True)
+        transcript_path.write_text('{"role": "user", "content": "jsonl-q"}\n', encoding="utf-8")
+
+        real_open = builtins.open
+
+        def raise_for_transcript(path, *args, **kwargs):
+            mode = args[0] if args else kwargs.get("mode", "r")
+            if Path(path) == transcript_path and "r" in mode:
+                raise OSError("simulated unreadable transcript")
+            return real_open(path, *args, **kwargs)
+
+        monkeypatch.setattr(builtins, "open", raise_for_transcript)
+
+        result = store_with_db.load_transcript(sid)
+        assert len(result) == 2
+        assert result[0]["content"] == "db-q"
+        assert result[1]["content"] == "db-a"
+

 class TestSessionStoreSwitchSession:
    """Regression coverage for gateway /resume session switching semantics."""