fix(yuanbao): persist message_id on @bot user transcript writes

Yuanbao's QuoteContextMiddleware has a transcript-lookup fallback for
when quote.desc is empty: it scans the session transcript for the quoted
message_id and pulls ybres anchors out of its content. That fallback
works for observed (silent) group messages because the platform writer
attaches message_id (yuanbao.py:2091).

It silently fails for @bot agent-processed messages because gateway/run.py
wrote them as {role:user, content, timestamp} with no message_id, so
quoting an earlier @bot turn that contained an image/file couldn't be
resolved.

Fix: attach event.message_id to the user transcript entry at all three
write sites in gateway/run.py — the agent_failed_early branch, the
no-new-messages edge case, and the normal agent path (first user-role
entry in new_messages).

Surfaces gap reported in #27425 (loongfay) using the existing fallback
already on main; no new caches needed.

Co-authored-by: loongfay <loongfay@users.noreply.github.com>
This commit is contained in:
teknium1
2026-05-18 01:19:16 -07:00
committed by Teknium
parent 41f1eddee3
commit 0fa46c613b
+21 -2
View File
@@ -8072,9 +8072,12 @@ class GatewayRunner:
# message so the next message can load a transcript that
# reflects what was said. Skip the assistant error text since
# it's a gateway-generated hint, not model output. (#7100)
_user_entry = {"role": "user", "content": message_text, "timestamp": ts}
if event.message_id:
_user_entry["message_id"] = str(event.message_id)
self.session_store.append_to_transcript(
session_entry.session_id,
{"role": "user", "content": message_text, "timestamp": ts},
_user_entry,
)
else:
history_len = agent_result.get("history_offset", len(history))
@@ -8082,9 +8085,12 @@ class GatewayRunner:
# If no new messages found (edge case), fall back to simple user/assistant
if not new_messages:
_user_entry = {"role": "user", "content": message_text, "timestamp": ts}
if event.message_id:
_user_entry["message_id"] = str(event.message_id)
self.session_store.append_to_transcript(
session_entry.session_id,
{"role": "user", "content": message_text, "timestamp": ts}
_user_entry,
)
if response:
self.session_store.append_to_transcript(
@@ -8097,12 +8103,25 @@ class GatewayRunner:
# to prevent the duplicate-write bug (#860). We still write
# to JSONL for backward compatibility and as a backup.
agent_persisted = self._session_db is not None
# Attach the inbound platform message_id to the first user
# entry written this turn so platform-level quote-resolution
# (e.g. Yuanbao QuoteContextMiddleware's transcript fallback)
# can find earlier @bot messages by their original message_id.
_user_msg_id_attached = False
for msg in new_messages:
# Skip system messages (they're rebuilt each run)
if msg.get("role") == "system":
continue
# Add timestamp to each message for debugging
entry = {**msg, "timestamp": ts}
if (
not _user_msg_id_attached
and msg.get("role") == "user"
and event.message_id
and "message_id" not in entry
):
entry["message_id"] = str(event.message_id)
_user_msg_id_attached = True
self.session_store.append_to_transcript(
session_entry.session_id, entry,
skip_db=agent_persisted,