From b8a9cbd18cc2511b6c3ba89157c1c2d2aaa2ef76 Mon Sep 17 00:00:00 2001 From: MoonJuhan <35164907+MoonJuhan@users.noreply.github.com> Date: Tue, 19 May 2026 00:11:06 -0700 Subject: [PATCH] fix: tolerate unreadable gateway JSONL transcripts --- gateway/session.py | 28 +++++++++++++++++----------- tests/gateway/test_session.py | 27 +++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/gateway/session.py b/gateway/session.py index dfa2ca9651..ee90726a8b 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -1326,17 +1326,23 @@ class SessionStore: transcript_path = self.get_transcript_path(session_id) jsonl_messages = [] if transcript_path.exists(): - with open(transcript_path, "r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if line: - try: - jsonl_messages.append(json.loads(line)) - except json.JSONDecodeError: - logger.warning( - "Skipping corrupt line in transcript %s: %s", - session_id, line[:120], - ) + try: + with open(transcript_path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if line: + try: + jsonl_messages.append(json.loads(line)) + except json.JSONDecodeError: + logger.warning( + "Skipping corrupt line in transcript %s: %s", + session_id, line[:120], + ) + except OSError as e: + # JSONL is the legacy compatibility store. If it becomes + # unreadable, keep gateway recovery working by falling back to + # SQLite rows loaded above (or [] when no DB exists). + logger.debug("Failed to read JSONL transcript for %s: %s", session_id, e) # Prefer whichever source has more messages. # diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py index b8fd45558c..dcd6ef9020 100644 --- a/tests/gateway/test_session.py +++ b/tests/gateway/test_session.py @@ -1,5 +1,6 @@ """Tests for gateway session management.""" +import builtins import json import pytest from pathlib import Path @@ -688,6 +689,32 @@ class TestLoadTranscriptPreferLongerSource: # Should be the SQLite version (equal count → prefers SQLite) assert result[0]["content"] == "db-q" + def test_unreadable_jsonl_returns_sqlite(self, store_with_db, monkeypatch): + """Unreadable legacy JSONL must not hide valid SQLite history.""" + sid = "unreadable_jsonl" + store_with_db._db.create_session(session_id=sid, source="gateway", model="m") + store_with_db._db.append_message(session_id=sid, role="user", content="db-q") + store_with_db._db.append_message(session_id=sid, role="assistant", content="db-a") + + transcript_path = store_with_db.get_transcript_path(sid) + transcript_path.parent.mkdir(parents=True, exist_ok=True) + transcript_path.write_text('{"role": "user", "content": "jsonl-q"}\n', encoding="utf-8") + + real_open = builtins.open + + def raise_for_transcript(path, *args, **kwargs): + mode = args[0] if args else kwargs.get("mode", "r") + if Path(path) == transcript_path and "r" in mode: + raise OSError("simulated unreadable transcript") + return real_open(path, *args, **kwargs) + + monkeypatch.setattr(builtins, "open", raise_for_transcript) + + result = store_with_db.load_transcript(sid) + assert len(result) == 2 + assert result[0]["content"] == "db-q" + assert result[1]["content"] == "db-a" + class TestSessionStoreSwitchSession: """Regression coverage for gateway /resume session switching semantics."""