fix(streaming): strip malformed DSML function_calls tags (#958)

Handle DeepSeek DSML variants including truncated and spaced tag forms, and sanitize thinking-card text so leaked XML fragments never render. Add regression tests for DSML edge cases and thinking-card sanitization.

Made-with: Cursor

Co-authored-by: bsgdigital <bsg@bsgdigital.com>
This commit is contained in:
bsgdigital
2026-04-24 20:04:16 +02:00
committed by GitHub
parent 70de09290c
commit e5cf9c5910
4 changed files with 85 additions and 14 deletions
+26 -5
View File
@@ -93,14 +93,35 @@ def _strip_xml_tool_calls(text: str) -> str:
Handles both complete blocks (<function_calls>…</function_calls>) and
partial/orphaned opening tags that may appear at the tail of a stream.
Also handles variants like <DSMLfunction_calls> from DeepSeek on Bedrock.
"""
if not text or '<function_calls>' not in text.lower():
if not text:
return text
s = str(text)
# Strip complete blocks (possibly multiple)
s = re.sub(r'<function_calls>.*?</function_calls>', '', s, flags=re.IGNORECASE | re.DOTALL)
# Strip orphaned opening tags (stream cut off before closing tag)
s = re.sub(r'<function_calls>.*$', '', s, flags=re.IGNORECASE | re.DOTALL)
# Check if contains any function_calls/DSML marker (case-insensitive)
_lo = s.lower()
if 'function_calls' not in _lo and 'dsml' not in _lo:
return text
_dsml_prefix = r'(?:\s*\s*DSML\s*[|]\s*)?'
open_tag = rf'<{_dsml_prefix}function_calls'
close_tag = rf'</{_dsml_prefix}function_calls>'
# Strip complete blocks for both <function_calls> and <DSMLfunction_calls>.
s = re.sub(
rf'{open_tag}>.*?{close_tag}',
'',
s,
flags=re.IGNORECASE | re.DOTALL
)
# Strip orphaned/truncated opening tags, including missing ">" at stream tail.
s = re.sub(
rf'{open_tag}(?:>|$).*$',
'',
s,
flags=re.IGNORECASE | re.DOTALL
)
# Remove malformed DSML fragments like "<DSML |" that can leak in tokens.
s = re.sub(r'<\s*\s*DSML\s*[|]\s*', '', s, flags=re.IGNORECASE)
return s.strip()
+11 -3
View File
@@ -300,9 +300,17 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
// Strip <function_calls>...</function_calls> blocks (DeepSeek XML tool syntax).
// These are processed as tool calls server-side; showing them raw in the bubble
// looks broken. Also handles orphaned opening tags mid-stream. (#702)
if(!s||s.toLowerCase().indexOf('<function_calls>')===-1) return s;
s=s.replace(/<function_calls>[\s\S]*?<\/function_calls>/gi,'');
s=s.replace(/<function_calls>[\s\S]*$/i,'');
// Also handles DSML-prefixed variants from DeepSeek/Bedrock, including
// spacing variants like "<DSML |function_calls" and truncated prefixes.
if(!s) return s;
const lo=String(s).toLowerCase();
if(lo.indexOf('function_calls')===-1 && lo.indexOf('dsml')===-1) return s;
// Support both plain <function_calls> and DSML-prefixed variants.
s=s.replace(/<(?:\s*\s*DSML\s*[|]\s*)?function_calls>[\s\S]*?<\/(?:\s*\s*DSML\s*[|]\s*)?function_calls>/gi,'');
// Also remove truncated opening tags (missing closing ">" at stream tail).
s=s.replace(/<(?:\s*\s*DSML\s*[|]\s*)?function_calls(?:>|$)[\s\S]*$/i,'');
// Remove malformed DSML tag fragments like "<DSML |" that can leak in tokens.
s=s.replace(/<\s*\s*DSML\s*[|]\s*/gi,'');
return s.trim();
}
function _streamDisplay(){
+21 -6
View File
@@ -629,12 +629,25 @@ function _stripXmlToolCallsDisplay(s){
// similar models in their raw response text. These are processed separately
// as tool calls; leaving them in the content causes them to render visibly
// in the settled chat bubble. (#702)
if(!s||s.toLowerCase().indexOf('<function_calls>')===-1) return s;
s=s.replace(/<function_calls>[\s\S]*?<\/function_calls>/gi,'');
s=s.replace(/<function_calls>[\s\S]*$/i,'');
// Also handles DSML-prefixed variants from DeepSeek/Bedrock, including
// spacing variants like "<DSML |function_calls" and truncated prefixes.
if(!s) return s;
const lo=String(s).toLowerCase();
if(lo.indexOf('function_calls')===-1 && lo.indexOf('dsml')===-1) return s;
// Support both plain <function_calls> and DSML-prefixed variants.
s=s.replace(/<(?:\s*\s*DSML\s*[|]\s*)?function_calls>[\s\S]*?<\/(?:\s*\s*DSML\s*[|]\s*)?function_calls>/gi,'');
// Also remove truncated opening tags (missing closing ">" at stream tail).
s=s.replace(/<(?:\s*\s*DSML\s*[|]\s*)?function_calls(?:>|$)[\s\S]*$/i,'');
// Remove malformed DSML tag fragments like "<DSML |" that can leak in tokens.
s=s.replace(/<\s*\s*DSML\s*[|]\s*/gi,'');
return s.trim();
}
function _sanitizeThinkingDisplayText(text){
const stripped=_stripXmlToolCallsDisplay(String(text||''));
return stripped.trim();
}
function renderMd(raw){
let s=raw||'';
// ── MEDIA: token stash (must run first, before any other processing) ───────
@@ -1476,7 +1489,8 @@ function _assistantTurnBlocks(turn){
return turn?turn.querySelector('.assistant-turn-blocks'):null;
}
function _thinkingCardHtml(text){
return `<div class="thinking-card"><div class="thinking-card-header" onclick="this.parentElement.classList.toggle('open')"><span class="thinking-card-icon">${li('lightbulb',14)}</span><span class="thinking-card-label">${t('thinking')}</span><span class="thinking-card-toggle">${li('chevron-right',12)}</span></div><div class="thinking-card-body"><pre>${esc(text)}</pre></div></div>`;
const clean=_sanitizeThinkingDisplayText(text);
return `<div class="thinking-card"><div class="thinking-card-header" onclick="this.parentElement.classList.toggle('open')"><span class="thinking-card-icon">${li('lightbulb',14)}</span><span class="thinking-card-label">${t('thinking')}</span><span class="thinking-card-toggle">${li('chevron-right',12)}</span></div><div class="thinking-card-body"><pre>${esc(clean)}</pre></div></div>`;
}
function _compressionStateForCurrentSession(){
const state=window._compressionUi;
@@ -2383,8 +2397,9 @@ function renderKatexBlocks(){
}
function _thinkingMarkup(text=''){
return (text&&String(text).trim())
? `<div class="thinking-card open"><div class="thinking-card-header" onclick="this.parentElement.classList.toggle('open')"><span class="thinking-card-icon">${li('lightbulb',14)}</span><span class="thinking-card-label">${t('thinking')}</span><span class="thinking-card-toggle">${li('chevron-right',12)}</span></div><div class="thinking-card-body"><pre>${esc(String(text).trim())}</pre></div></div>`
const clean=_sanitizeThinkingDisplayText(text);
return (clean&&String(clean).trim())
? `<div class="thinking-card open"><div class="thinking-card-header" onclick="this.parentElement.classList.toggle('open')"><span class="thinking-card-icon">${li('lightbulb',14)}</span><span class="thinking-card-label">${t('thinking')}</span><span class="thinking-card-toggle">${li('chevron-right',12)}</span></div><div class="thinking-card-body"><pre>${esc(String(clean).trim())}</pre></div></div>`
: `<div class="thinking"><div class="dot"></div><div class="dot"></div><div class="dot"></div></div>`;
}
function finalizeThinkingCard(){
+27
View File
@@ -88,6 +88,21 @@ class TestXmlToolCallStrip:
assert 'middle' in result
assert 'end' in result
def test_dsml_prefixed_truncated_opening_tag_removed(self):
fn = self._load_fn()
text = "Answer before tool tag <DSMLfunction_calls"
result = fn(text)
assert 'function_calls' not in result.lower()
assert 'Answer before tool tag' in result
def test_malformed_dsml_fragment_removed(self):
fn = self._load_fn()
text = "Answer <DSML | still streaming"
result = fn(text)
assert '<DSML |' not in result
assert 'Answer' in result
assert 'still streaming' in result
def test_function_defined_in_streaming_py(self):
src = read('api/streaming.py')
assert 'def _strip_xml_tool_calls(' in src, (
@@ -121,6 +136,18 @@ class TestXmlToolCallStrip:
"_stripXmlToolCallsDisplay must exist in static/ui.js"
)
def test_thinking_card_text_is_sanitized(self):
src = read('static/ui.js')
assert '_sanitizeThinkingDisplayText' in src, (
"Thinking card text sanitizer must exist in static/ui.js"
)
assert '_thinkingCardHtml' in src and '_thinkingMarkup' in src, (
"Thinking card render helpers must exist in static/ui.js"
)
assert src.count('_sanitizeThinkingDisplayText(') >= 3, (
"Thinking card helpers must call _sanitizeThinkingDisplayText"
)
# ── Bug #703 — Workspace file panel empty state ───────────────────────────────