diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index 0db33e1cb3..603b44ff9b 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -268,7 +268,7 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = ( # Model name substrings that trigger tool-use enforcement guidance. # Add new patterns here when a model family needs explicit steering. -TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm") +TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm", "qwen", "deepseek") # OpenAI GPT/Codex-specific execution guidance. Addresses known failure modes # where GPT models abandon work on partial results, skip prerequisite lookups, diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index 936aff16bf..76d13f5d22 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -1144,6 +1144,12 @@ class TestToolUseEnforcementGuidance: def test_enforcement_models_includes_grok(self): assert "grok" in TOOL_USE_ENFORCEMENT_MODELS + def test_enforcement_models_includes_qwen(self): + assert "qwen" in TOOL_USE_ENFORCEMENT_MODELS + + def test_enforcement_models_includes_deepseek(self): + assert "deepseek" in TOOL_USE_ENFORCEMENT_MODELS + def test_enforcement_models_is_tuple(self): assert isinstance(TOOL_USE_ENFORCEMENT_MODELS, tuple) diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 9ff7ab2861..93513ab0ef 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -1103,6 +1103,20 @@ class TestToolUseEnforcementConfig: prompt = agent._build_system_prompt() assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt + def test_auto_injects_for_qwen(self): + """Qwen models default to chatty/hallucinatory tool use without enforcement.""" + from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent(model="qwen/qwen3.6-plus", tool_use_enforcement="auto") + prompt = agent._build_system_prompt() + assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt + + def test_auto_injects_for_deepseek(self): + """DeepSeek models default to chatty/hallucinatory tool use without enforcement.""" + from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE + agent = self._make_agent(model="deepseek/deepseek-r1", tool_use_enforcement="auto") + prompt = agent._build_system_prompt() + assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt + def test_auto_injects_execution_guidance_for_grok(self): """Grok also gets OPENAI_MODEL_EXECUTION_GUIDANCE (verification, mandatory_tool_use, act_dont_ask). Same failure modes as GPT in