From 81ac62c0e95dd4de80daabc691fe2889b990055d Mon Sep 17 00:00:00 2001 From: bravohenry Date: Sun, 12 Apr 2026 00:37:47 -0700 Subject: [PATCH] fix(weixin): split chatty short replies into separate bubbles, keep structured content together Add content-aware splitting to compact mode: short chat-like exchanges (2-6 short lines without headings/lists/quotes) get separate message bubbles for a natural chat feel, while structured content (tables, headings with body, numbered lists) stays in a single message. Cherry-picked from PR #7587 by bravohenry, adapted to the compact/legacy split_per_line architecture from #7903. --- gateway/platforms/weixin.py | 46 ++++++++++++++++++++++++++++++++++-- tests/gateway/test_weixin.py | 35 +++++++++++++++++++++++++-- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/gateway/platforms/weixin.py b/gateway/platforms/weixin.py index 5821d922f8..dc4e7cf969 100644 --- a/gateway/platforms/weixin.py +++ b/gateway/platforms/weixin.py @@ -734,6 +734,42 @@ def _split_delivery_units_for_weixin(content: str) -> List[str]: return [unit for unit in units if unit] +def _looks_like_chatty_line_for_weixin(line: str) -> bool: + """Return True when a line looks like a standalone chat utterance.""" + stripped = line.strip() + if not stripped: + return False + if len(stripped) > 48: + return False + if line.startswith((" ", "\t")): + return False + if stripped.startswith((">", "-", "*", "【")): + return False + if re.match(r"^\*\*[^*]+\*\*$", stripped): + return False + if re.match(r"^\d+\.\s", stripped): + return False + return True + + +def _looks_like_heading_line_for_weixin(line: str) -> bool: + """Return True when a short line behaves like a plain-text heading.""" + stripped = line.strip() + if not stripped: + return False + return len(stripped) <= 24 and stripped.endswith((":", ":")) + + +def _should_split_short_chat_block_for_weixin(block: str) -> bool: + """Split only chat-like multiline blocks into separate bubbles.""" + lines = [line for line in block.splitlines() if line.strip()] + if not 2 <= len(lines) <= 6: + return False + if _looks_like_heading_line_for_weixin(lines[0]): + return False + return all(_looks_like_chatty_line_for_weixin(line) for line in lines) + + def _pack_markdown_blocks_for_weixin(content: str, max_length: int) -> List[str]: if len(content) <= max_length: return [content] @@ -787,9 +823,15 @@ def _split_text_for_weixin_delivery( chunks.extend(_pack_markdown_blocks_for_weixin(unit, max_length)) return chunks or [content] - # Compact (default): single message when under the limit. + # Compact (default): single message when under the limit — unless the + # content looks like a short chatty exchange, in which case split into + # separate bubbles for a more natural chat feel. if len(content) <= max_length: - return [content] + return ( + _split_delivery_units_for_weixin(content) + if _should_split_short_chat_block_for_weixin(content) + else [content] + ) return _pack_markdown_blocks_for_weixin(content, max_length) or [content] diff --git a/tests/gateway/test_weixin.py b/tests/gateway/test_weixin.py index bb439fa9a6..f2afe1049a 100644 --- a/tests/gateway/test_weixin.py +++ b/tests/gateway/test_weixin.py @@ -64,13 +64,44 @@ class TestWeixinFormatting: class TestWeixinChunking: - def test_split_text_keeps_short_multiline_message_in_single_chunk(self): + def test_split_text_splits_short_chatty_replies_into_separate_bubbles(self): adapter = _make_adapter() content = adapter.format_message("第一行\n第二行\n第三行") chunks = adapter._split_text(content) - assert chunks == ["第一行\n第二行\n第三行"] + assert chunks == ["第一行", "第二行", "第三行"] + + def test_split_text_keeps_structured_table_block_together(self): + adapter = _make_adapter() + + content = adapter.format_message( + "- Setting: Timeout\n Value: 30s\n- Setting: Retries\n Value: 3" + ) + chunks = adapter._split_text(content) + + assert chunks == ["- Setting: Timeout\n Value: 30s\n- Setting: Retries\n Value: 3"] + + def test_split_text_keeps_four_line_structured_blocks_together(self): + adapter = _make_adapter() + + content = adapter.format_message( + "今天结论:\n" + "- 留存下降 3%\n" + "- 转化上涨 8%\n" + "- 主要问题在首日激活" + ) + chunks = adapter._split_text(content) + + assert chunks == ["今天结论:\n- 留存下降 3%\n- 转化上涨 8%\n- 主要问题在首日激活"] + + def test_split_text_keeps_heading_with_body_together(self): + adapter = _make_adapter() + + content = adapter.format_message("## 结论\n这是正文") + chunks = adapter._split_text(content) + + assert chunks == ["**结论**\n这是正文"] def test_split_text_keeps_short_reformatted_table_in_single_chunk(self): adapter = _make_adapter()