fix: tolerate unreadable gateway JSONL transcripts

This commit is contained in:
MoonJuhan
2026-05-19 00:11:06 -07:00
committed by Teknium
parent 663ee14865
commit b8a9cbd18c
2 changed files with 44 additions and 11 deletions
+17 -11
View File
@@ -1326,17 +1326,23 @@ class SessionStore:
transcript_path = self.get_transcript_path(session_id)
jsonl_messages = []
if transcript_path.exists():
with open(transcript_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line:
try:
jsonl_messages.append(json.loads(line))
except json.JSONDecodeError:
logger.warning(
"Skipping corrupt line in transcript %s: %s",
session_id, line[:120],
)
try:
with open(transcript_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line:
try:
jsonl_messages.append(json.loads(line))
except json.JSONDecodeError:
logger.warning(
"Skipping corrupt line in transcript %s: %s",
session_id, line[:120],
)
except OSError as e:
# JSONL is the legacy compatibility store. If it becomes
# unreadable, keep gateway recovery working by falling back to
# SQLite rows loaded above (or [] when no DB exists).
logger.debug("Failed to read JSONL transcript for %s: %s", session_id, e)
# Prefer whichever source has more messages.
#
+27
View File
@@ -1,5 +1,6 @@
"""Tests for gateway session management."""
import builtins
import json
import pytest
from pathlib import Path
@@ -688,6 +689,32 @@ class TestLoadTranscriptPreferLongerSource:
# Should be the SQLite version (equal count → prefers SQLite)
assert result[0]["content"] == "db-q"
def test_unreadable_jsonl_returns_sqlite(self, store_with_db, monkeypatch):
"""Unreadable legacy JSONL must not hide valid SQLite history."""
sid = "unreadable_jsonl"
store_with_db._db.create_session(session_id=sid, source="gateway", model="m")
store_with_db._db.append_message(session_id=sid, role="user", content="db-q")
store_with_db._db.append_message(session_id=sid, role="assistant", content="db-a")
transcript_path = store_with_db.get_transcript_path(sid)
transcript_path.parent.mkdir(parents=True, exist_ok=True)
transcript_path.write_text('{"role": "user", "content": "jsonl-q"}\n', encoding="utf-8")
real_open = builtins.open
def raise_for_transcript(path, *args, **kwargs):
mode = args[0] if args else kwargs.get("mode", "r")
if Path(path) == transcript_path and "r" in mode:
raise OSError("simulated unreadable transcript")
return real_open(path, *args, **kwargs)
monkeypatch.setattr(builtins, "open", raise_for_transcript)
result = store_with_db.load_transcript(sid)
assert len(result) == 2
assert result[0]["content"] == "db-q"
assert result[1]["content"] == "db-a"
class TestSessionStoreSwitchSession:
"""Regression coverage for gateway /resume session switching semantics."""