fix(streaming): prevent dropped characters in incremental smd path (#960)

Detect prefix desync between current display text and already-streamed text, then rebuild the streaming-markdown parser from full content to avoid character loss during live rendering. Add regression assertions for the new desync guard.

Made-with: Cursor

Co-authored-by: bsgdigital <bsg@bsgdigital.com>
This commit is contained in:
bsgdigital
2026-04-24 20:04:32 +02:00
committed by GitHub
parent c06ec43f17
commit a2d7f311be
2 changed files with 29 additions and 1 deletions
+17 -1
View File
@@ -192,6 +192,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
// streaming-markdown state: incremental DOM-building parser per segment
let _smdParser=null; // current smd parser instance (null until first content)
let _smdWrittenLen=0; // how many chars of displayText have been fed to smd parser
let _smdWrittenText=''; // exact displayText snapshot used for prefix-alignment checks
// On reconnect, the assistantBody already has partial smd-rendered content.
// We clear it on first new token and restart the parser from the reconnect point.
let _smdReconnect=reconnecting;
@@ -384,6 +385,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
// Called when assistantBody is first created and after each tool-call segment reset.
function _smdNewParser(el){
_smdWrittenLen=0;
_smdWrittenText='';
if(!window.smd){_smdParser=null;return;}
const renderer=window.smd.default_renderer(el);
_smdParser=window.smd.parser(renderer);
@@ -398,15 +400,29 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
}
_smdParser=null;
_smdWrittenLen=0;
_smdWrittenText='';
}
// Helper: feed new displayText delta to the smd parser.
// Only feeds chars beyond what has already been written (_smdWrittenLen).
function _smdWrite(displayText){
if(!_smdParser||!window.smd) return;
const delta=displayText.slice(_smdWrittenLen);
displayText=String(displayText||'');
// Self-heal desyncs: if displayText no longer starts with what we've already
// written (e.g. due to stream sanitization/tag stripping), incremental slicing
// can skip characters. Rebuild parser from the full current displayText.
if(_smdWrittenText && !displayText.startsWith(_smdWrittenText)){
_smdParser=null;
_smdWrittenLen=0;
_smdWrittenText='';
if(assistantBody) assistantBody.innerHTML='';
_smdNewParser(assistantBody);
if(!_smdParser) return;
}
const delta=displayText.slice(_smdWrittenText.length);
if(!delta) return;
try{window.smd.parser_write(_smdParser,delta);}catch(_){}
_smdWrittenLen=displayText.length;
_smdWrittenText=displayText;
// streaming-markdown does NOT sanitize URL schemes — `[click](javascript:...)`
// and `![alt](javascript:...)` survive as href/src. Strip any unsafe schemes
// from anchors/images that were just added to the live DOM. The existing
+12
View File
@@ -135,6 +135,12 @@ class TestClosureVariables:
"_smdReconnect must be declared in the attachLiveStream closure scope"
)
def test_smd_written_text_declared(self):
prelude = self.get_prelude()
assert prelude and "_smdWrittenText" in prelude, (
"_smdWrittenText must be declared in the attachLiveStream closure scope"
)
def test_smd_parser_initialised_null(self):
prelude = self.get_prelude()
assert prelude and (
@@ -225,6 +231,12 @@ class TestSmdHelpers:
"_smdWrite must advance _smdWrittenLen to displayText.length after writing"
)
def test_smd_write_has_prefix_desync_guard(self):
fn = extract_fn(MESSAGES_JS, "_smdWrite")
assert fn and "startsWith(_smdWrittenText)" in fn, (
"_smdWrite must detect prefix desyncs and rebuild parser to avoid dropped chars"
)
def test_smd_write_guards_on_parser(self):
fn = extract_fn(MESSAGES_JS, "_smdWrite")
assert fn and "_smdParser" in fn, (