From 0fa46c613b364e435ea8a8ea6c3cb31c1a01ab50 Mon Sep 17 00:00:00 2001
From: teknium1 <127238744+teknium1@users.noreply.github.com>
Date: Mon, 18 May 2026 01:19:16 -0700
Subject: [PATCH] fix(yuanbao): persist message_id on @bot user transcript
 writes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Yuanbao's QuoteContextMiddleware has a transcript-lookup fallback for
when quote.desc is empty: it scans the session transcript for the quoted
message_id and pulls ybres anchors out of its content. That fallback
works for observed (silent) group messages because the platform writer
attaches message_id (yuanbao.py:2091).

It silently fails for @bot agent-processed messages because gateway/run.py
wrote them as {role:user, content, timestamp} with no message_id, so
quoting an earlier @bot turn that contained an image/file couldn't be
resolved.

Fix: attach event.message_id to the user transcript entry at all three
write sites in gateway/run.py — the agent_failed_early branch, the
no-new-messages edge case, and the normal agent path (first user-role
entry in new_messages).

Surfaces gap reported in #27425 (loongfay) using the existing fallback
already on main; no new caches needed.

Co-authored-by: loongfay <loongfay@users.noreply.github.com>
---
 gateway/run.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index 818bd282dd..623d238af3 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -8072,9 +8072,12 @@ class GatewayRunner:
                 # message so the next message can load a transcript that
                 # reflects what was said.  Skip the assistant error text since
                 # it's a gateway-generated hint, not model output. (#7100)
+                _user_entry = {"role": "user", "content": message_text, "timestamp": ts}
+                if event.message_id:
+                    _user_entry["message_id"] = str(event.message_id)
                 self.session_store.append_to_transcript(
                     session_entry.session_id,
-                    {"role": "user", "content": message_text, "timestamp": ts},
+                    _user_entry,
                 )
             else:
                 history_len = agent_result.get("history_offset", len(history))
@@ -8082,9 +8085,12 @@ class GatewayRunner:
 
                 # If no new messages found (edge case), fall back to simple user/assistant
                 if not new_messages:
+                    _user_entry = {"role": "user", "content": message_text, "timestamp": ts}
+                    if event.message_id:
+                        _user_entry["message_id"] = str(event.message_id)
                     self.session_store.append_to_transcript(
                         session_entry.session_id,
-                        {"role": "user", "content": message_text, "timestamp": ts}
+                        _user_entry,
                     )
                     if response:
                         self.session_store.append_to_transcript(
@@ -8097,12 +8103,25 @@ class GatewayRunner:
                     # to prevent the duplicate-write bug (#860).  We still write
                     # to JSONL for backward compatibility and as a backup.
                     agent_persisted = self._session_db is not None
+                    # Attach the inbound platform message_id to the first user
+                    # entry written this turn so platform-level quote-resolution
+                    # (e.g. Yuanbao QuoteContextMiddleware's transcript fallback)
+                    # can find earlier @bot messages by their original message_id.
+                    _user_msg_id_attached = False
                     for msg in new_messages:
                         # Skip system messages (they're rebuilt each run)
                         if msg.get("role") == "system":
                             continue
                         # Add timestamp to each message for debugging
                         entry = {**msg, "timestamp": ts}
+                        if (
+                            not _user_msg_id_attached
+                            and msg.get("role") == "user"
+                            and event.message_id
+                            and "message_id" not in entry
+                        ):
+                            entry["message_id"] = str(event.message_id)
+                            _user_msg_id_attached = True
                         self.session_store.append_to_transcript(
                             session_entry.session_id, entry,
                             skip_db=agent_persisted,