fix(copilot): GitHub Models 413 hint — port to extracted conversation_loop

Original commits 4ded3ede3 (@konsisumer) + 374dc81c2 (Teknium) added a 413 hint to run_agent.py's agent loop. Final-state version (the sharpened 374dc81c2 wording) ported to agent/conversation_loop.py, where the payload_too_large branch now lives. The deprecation detection + _URL_TO_PROVIDER changes from both commits landed in agent/copilot_acp_client.py and agent/model_metadata.py via the prior merge. Closes #10648 Co-authored-by: konsisumer <der@konsi.org> Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com>
2026-05-21 03:39:54 +00:00 · 2026-05-16 23:38:45 -07:00
parent 3fbedd732e
commit df22d29522
1 changed files with 33 additions and 0 deletions
@@ -2333,6 +2333,39 @@ def run_conversation(
                    classified.reason == FailoverReason.payload_too_large
                )

+                # Actionable hint for GitHub Models (Azure) 413 errors.
+                # The free tier enforces a hard 8K token cap per request,
+                # which Hermes' system prompt + tool schemas alone exceed.
+                # Compression can't help — the floor is the system prompt
+                # itself, not the conversation — so surface a clear "not
+                # compatible" message instead of looping into three futile
+                # compression attempts.
+                if (
+                    status_code == 413
+                    and isinstance(agent.base_url, str)
+                    and "models.inference.ai.azure.com" in agent.base_url
+                ):
+                    agent._vprint(
+                        f"{agent.log_prefix}   💡 GitHub Models free tier (models.inference.ai.azure.com) caps every",
+                        force=True,
+                    )
+                    agent._vprint(
+                        f"{agent.log_prefix}      request at ~8K tokens. Hermes' system prompt + tool schemas baseline",
+                        force=True,
+                    )
+                    agent._vprint(
+                        f"{agent.log_prefix}      exceeds that floor, so this endpoint cannot run an agentic loop.",
+                        force=True,
+                    )
+                    agent._vprint(
+                        f"{agent.log_prefix}      Use the `copilot` provider with a Copilot subscription token (`hermes",
+                        force=True,
+                    )
+                    agent._vprint(
+                        f"{agent.log_prefix}      setup` → GitHub Copilot), or pick any other provider.",
+                        force=True,
+                    )
+
                if is_payload_too_large:
                    compression_attempts += 1
                    if compression_attempts > max_compression_attempts: