From df22d29522ced894ab79ff66e4496c2c93be65c4 Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sat, 16 May 2026 23:38:45 -0700 Subject: [PATCH] =?UTF-8?q?fix(copilot):=20GitHub=20Models=20413=20hint=20?= =?UTF-8?q?=E2=80=94=20port=20to=20extracted=20conversation=5Floop?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original commits 4ded3ede3 (@konsisumer) + 374dc81c2 (Teknium) added a 413 hint to run_agent.py's agent loop. Final-state version (the sharpened 374dc81c2 wording) ported to agent/conversation_loop.py, where the payload_too_large branch now lives. The deprecation detection + _URL_TO_PROVIDER changes from both commits landed in agent/copilot_acp_client.py and agent/model_metadata.py via the prior merge. Closes #10648 Co-authored-by: konsisumer Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com> --- agent/conversation_loop.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index e121c4b2a7..8096b75429 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -2333,6 +2333,39 @@ def run_conversation( classified.reason == FailoverReason.payload_too_large ) + # Actionable hint for GitHub Models (Azure) 413 errors. + # The free tier enforces a hard 8K token cap per request, + # which Hermes' system prompt + tool schemas alone exceed. + # Compression can't help — the floor is the system prompt + # itself, not the conversation — so surface a clear "not + # compatible" message instead of looping into three futile + # compression attempts. + if ( + status_code == 413 + and isinstance(agent.base_url, str) + and "models.inference.ai.azure.com" in agent.base_url + ): + agent._vprint( + f"{agent.log_prefix} 💡 GitHub Models free tier (models.inference.ai.azure.com) caps every", + force=True, + ) + agent._vprint( + f"{agent.log_prefix} request at ~8K tokens. Hermes' system prompt + tool schemas baseline", + force=True, + ) + agent._vprint( + f"{agent.log_prefix} exceeds that floor, so this endpoint cannot run an agentic loop.", + force=True, + ) + agent._vprint( + f"{agent.log_prefix} Use the `copilot` provider with a Copilot subscription token (`hermes", + force=True, + ) + agent._vprint( + f"{agent.log_prefix} setup` → GitHub Copilot), or pick any other provider.", + force=True, + ) + if is_payload_too_large: compression_attempts += 1 if compression_attempts > max_compression_attempts: