mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-21 03:39:54 +00:00
fix(weixin): split chatty short replies into separate bubbles, keep structured content together
Add content-aware splitting to compact mode: short chat-like exchanges (2-6 short lines without headings/lists/quotes) get separate message bubbles for a natural chat feel, while structured content (tables, headings with body, numbered lists) stays in a single message. Cherry-picked from PR #7587 by bravohenry, adapted to the compact/legacy split_per_line architecture from #7903.
This commit is contained in:
@@ -734,6 +734,42 @@ def _split_delivery_units_for_weixin(content: str) -> List[str]:
|
||||
return [unit for unit in units if unit]
|
||||
|
||||
|
||||
def _looks_like_chatty_line_for_weixin(line: str) -> bool:
|
||||
"""Return True when a line looks like a standalone chat utterance."""
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
return False
|
||||
if len(stripped) > 48:
|
||||
return False
|
||||
if line.startswith((" ", "\t")):
|
||||
return False
|
||||
if stripped.startswith((">", "-", "*", "【")):
|
||||
return False
|
||||
if re.match(r"^\*\*[^*]+\*\*$", stripped):
|
||||
return False
|
||||
if re.match(r"^\d+\.\s", stripped):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _looks_like_heading_line_for_weixin(line: str) -> bool:
|
||||
"""Return True when a short line behaves like a plain-text heading."""
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
return False
|
||||
return len(stripped) <= 24 and stripped.endswith((":", ":"))
|
||||
|
||||
|
||||
def _should_split_short_chat_block_for_weixin(block: str) -> bool:
|
||||
"""Split only chat-like multiline blocks into separate bubbles."""
|
||||
lines = [line for line in block.splitlines() if line.strip()]
|
||||
if not 2 <= len(lines) <= 6:
|
||||
return False
|
||||
if _looks_like_heading_line_for_weixin(lines[0]):
|
||||
return False
|
||||
return all(_looks_like_chatty_line_for_weixin(line) for line in lines)
|
||||
|
||||
|
||||
def _pack_markdown_blocks_for_weixin(content: str, max_length: int) -> List[str]:
|
||||
if len(content) <= max_length:
|
||||
return [content]
|
||||
@@ -787,9 +823,15 @@ def _split_text_for_weixin_delivery(
|
||||
chunks.extend(_pack_markdown_blocks_for_weixin(unit, max_length))
|
||||
return chunks or [content]
|
||||
|
||||
# Compact (default): single message when under the limit.
|
||||
# Compact (default): single message when under the limit — unless the
|
||||
# content looks like a short chatty exchange, in which case split into
|
||||
# separate bubbles for a more natural chat feel.
|
||||
if len(content) <= max_length:
|
||||
return [content]
|
||||
return (
|
||||
_split_delivery_units_for_weixin(content)
|
||||
if _should_split_short_chat_block_for_weixin(content)
|
||||
else [content]
|
||||
)
|
||||
return _pack_markdown_blocks_for_weixin(content, max_length) or [content]
|
||||
|
||||
|
||||
|
||||
@@ -64,13 +64,44 @@ class TestWeixinFormatting:
|
||||
|
||||
|
||||
class TestWeixinChunking:
|
||||
def test_split_text_keeps_short_multiline_message_in_single_chunk(self):
|
||||
def test_split_text_splits_short_chatty_replies_into_separate_bubbles(self):
|
||||
adapter = _make_adapter()
|
||||
|
||||
content = adapter.format_message("第一行\n第二行\n第三行")
|
||||
chunks = adapter._split_text(content)
|
||||
|
||||
assert chunks == ["第一行\n第二行\n第三行"]
|
||||
assert chunks == ["第一行", "第二行", "第三行"]
|
||||
|
||||
def test_split_text_keeps_structured_table_block_together(self):
|
||||
adapter = _make_adapter()
|
||||
|
||||
content = adapter.format_message(
|
||||
"- Setting: Timeout\n Value: 30s\n- Setting: Retries\n Value: 3"
|
||||
)
|
||||
chunks = adapter._split_text(content)
|
||||
|
||||
assert chunks == ["- Setting: Timeout\n Value: 30s\n- Setting: Retries\n Value: 3"]
|
||||
|
||||
def test_split_text_keeps_four_line_structured_blocks_together(self):
|
||||
adapter = _make_adapter()
|
||||
|
||||
content = adapter.format_message(
|
||||
"今天结论:\n"
|
||||
"- 留存下降 3%\n"
|
||||
"- 转化上涨 8%\n"
|
||||
"- 主要问题在首日激活"
|
||||
)
|
||||
chunks = adapter._split_text(content)
|
||||
|
||||
assert chunks == ["今天结论:\n- 留存下降 3%\n- 转化上涨 8%\n- 主要问题在首日激活"]
|
||||
|
||||
def test_split_text_keeps_heading_with_body_together(self):
|
||||
adapter = _make_adapter()
|
||||
|
||||
content = adapter.format_message("## 结论\n这是正文")
|
||||
chunks = adapter._split_text(content)
|
||||
|
||||
assert chunks == ["**结论**\n这是正文"]
|
||||
|
||||
def test_split_text_keeps_short_reformatted_table_in_single_chunk(self):
|
||||
adapter = _make_adapter()
|
||||
|
||||
Reference in New Issue
Block a user