diff --git a/api/streaming.py b/api/streaming.py index f5abd978..f743381e 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -93,14 +93,35 @@ def _strip_xml_tool_calls(text: str) -> str: Handles both complete blocks () and partial/orphaned opening tags that may appear at the tail of a stream. + Also handles variants like <|DSML|function_calls> from DeepSeek on Bedrock. """ - if not text or '' not in text.lower(): + if not text: return text s = str(text) - # Strip complete blocks (possibly multiple) - s = re.sub(r'.*?', '', s, flags=re.IGNORECASE | re.DOTALL) - # Strip orphaned opening tags (stream cut off before closing tag) - s = re.sub(r'.*$', '', s, flags=re.IGNORECASE | re.DOTALL) + # Check if contains any function_calls/DSML marker (case-insensitive) + _lo = s.lower() + if 'function_calls' not in _lo and 'dsml' not in _lo: + return text + + _dsml_prefix = r'(?:\s*|\s*DSML\s*[||]\s*)?' + open_tag = rf'<{_dsml_prefix}function_calls' + close_tag = rf'' + # Strip complete blocks for both and <|DSML|function_calls>. + s = re.sub( + rf'{open_tag}>.*?{close_tag}', + '', + s, + flags=re.IGNORECASE | re.DOTALL + ) + # Strip orphaned/truncated opening tags, including missing ">" at stream tail. + s = re.sub( + rf'{open_tag}(?:>|$).*$', + '', + s, + flags=re.IGNORECASE | re.DOTALL + ) + # Remove malformed DSML fragments like "<|DSML |" that can leak in tokens. + s = re.sub(r'<\s*|\s*DSML\s*[||]\s*', '', s, flags=re.IGNORECASE) return s.strip() diff --git a/static/messages.js b/static/messages.js index ecd202dd..c1623e75 100644 --- a/static/messages.js +++ b/static/messages.js @@ -300,9 +300,17 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ // Strip ... blocks (DeepSeek XML tool syntax). // These are processed as tool calls server-side; showing them raw in the bubble // looks broken. Also handles orphaned opening tags mid-stream. (#702) - if(!s||s.toLowerCase().indexOf('')===-1) return s; - s=s.replace(/[\s\S]*?<\/function_calls>/gi,''); - s=s.replace(/[\s\S]*$/i,''); + // Also handles DSML-prefixed variants from DeepSeek/Bedrock, including + // spacing variants like "<|DSML |function_calls" and truncated prefixes. + if(!s) return s; + const lo=String(s).toLowerCase(); + if(lo.indexOf('function_calls')===-1 && lo.indexOf('dsml')===-1) return s; + // Support both plain and DSML-prefixed variants. + s=s.replace(/<(?:\s*|\s*DSML\s*[||]\s*)?function_calls>[\s\S]*?<\/(?:\s*|\s*DSML\s*[||]\s*)?function_calls>/gi,''); + // Also remove truncated opening tags (missing closing ">" at stream tail). + s=s.replace(/<(?:\s*|\s*DSML\s*[||]\s*)?function_calls(?:>|$)[\s\S]*$/i,''); + // Remove malformed DSML tag fragments like "<|DSML |" that can leak in tokens. + s=s.replace(/<\s*|\s*DSML\s*[||]\s*/gi,''); return s.trim(); } function _streamDisplay(){ diff --git a/static/ui.js b/static/ui.js index 51ce6257..1fa044e8 100644 --- a/static/ui.js +++ b/static/ui.js @@ -629,12 +629,25 @@ function _stripXmlToolCallsDisplay(s){ // similar models in their raw response text. These are processed separately // as tool calls; leaving them in the content causes them to render visibly // in the settled chat bubble. (#702) - if(!s||s.toLowerCase().indexOf('')===-1) return s; - s=s.replace(/[\s\S]*?<\/function_calls>/gi,''); - s=s.replace(/[\s\S]*$/i,''); + // Also handles DSML-prefixed variants from DeepSeek/Bedrock, including + // spacing variants like "<|DSML |function_calls" and truncated prefixes. + if(!s) return s; + const lo=String(s).toLowerCase(); + if(lo.indexOf('function_calls')===-1 && lo.indexOf('dsml')===-1) return s; + // Support both plain and DSML-prefixed variants. + s=s.replace(/<(?:\s*|\s*DSML\s*[||]\s*)?function_calls>[\s\S]*?<\/(?:\s*|\s*DSML\s*[||]\s*)?function_calls>/gi,''); + // Also remove truncated opening tags (missing closing ">" at stream tail). + s=s.replace(/<(?:\s*|\s*DSML\s*[||]\s*)?function_calls(?:>|$)[\s\S]*$/i,''); + // Remove malformed DSML tag fragments like "<|DSML |" that can leak in tokens. + s=s.replace(/<\s*|\s*DSML\s*[||]\s*/gi,''); return s.trim(); } +function _sanitizeThinkingDisplayText(text){ + const stripped=_stripXmlToolCallsDisplay(String(text||'')); + return stripped.trim(); +} + function renderMd(raw){ let s=raw||''; // ── MEDIA: token stash (must run first, before any other processing) ─────── @@ -1476,7 +1489,8 @@ function _assistantTurnBlocks(turn){ return turn?turn.querySelector('.assistant-turn-blocks'):null; } function _thinkingCardHtml(text){ - return `
${li('lightbulb',14)}${t('thinking')}${li('chevron-right',12)}
${esc(text)}
`; + const clean=_sanitizeThinkingDisplayText(text); + return `
${li('lightbulb',14)}${t('thinking')}${li('chevron-right',12)}
${esc(clean)}
`; } function _compressionStateForCurrentSession(){ const state=window._compressionUi; @@ -2383,8 +2397,9 @@ function renderKatexBlocks(){ } function _thinkingMarkup(text=''){ - return (text&&String(text).trim()) - ? `
${li('lightbulb',14)}${t('thinking')}${li('chevron-right',12)}
${esc(String(text).trim())}
` + const clean=_sanitizeThinkingDisplayText(text); + return (clean&&String(clean).trim()) + ? `
${li('lightbulb',14)}${t('thinking')}${li('chevron-right',12)}
${esc(String(clean).trim())}
` : `
`; } function finalizeThinkingCard(){ diff --git a/tests/test_sprint48.py b/tests/test_sprint48.py index c015a080..2de27e23 100644 --- a/tests/test_sprint48.py +++ b/tests/test_sprint48.py @@ -88,6 +88,21 @@ class TestXmlToolCallStrip: assert 'middle' in result assert 'end' in result + def test_dsml_prefixed_truncated_opening_tag_removed(self): + fn = self._load_fn() + text = "Answer before tool tag <|DSML|function_calls" + result = fn(text) + assert 'function_calls' not in result.lower() + assert 'Answer before tool tag' in result + + def test_malformed_dsml_fragment_removed(self): + fn = self._load_fn() + text = "Answer <|DSML | still streaming" + result = fn(text) + assert '<|DSML |' not in result + assert 'Answer' in result + assert 'still streaming' in result + def test_function_defined_in_streaming_py(self): src = read('api/streaming.py') assert 'def _strip_xml_tool_calls(' in src, ( @@ -121,6 +136,18 @@ class TestXmlToolCallStrip: "_stripXmlToolCallsDisplay must exist in static/ui.js" ) + def test_thinking_card_text_is_sanitized(self): + src = read('static/ui.js') + assert '_sanitizeThinkingDisplayText' in src, ( + "Thinking card text sanitizer must exist in static/ui.js" + ) + assert '_thinkingCardHtml' in src and '_thinkingMarkup' in src, ( + "Thinking card render helpers must exist in static/ui.js" + ) + assert src.count('_sanitizeThinkingDisplayText(') >= 3, ( + "Thinking card helpers must call _sanitizeThinkingDisplayText" + ) + # ── Bug #703 — Workspace file panel empty state ───────────────────────────────