mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-21 03:39:54 +00:00
fix(agent): add qwen and deepseek to TOOL_USE_ENFORCEMENT_MODELS
When `agent.tool_use_enforcement` is `"auto"` (the default), the runtime checks the active model name against `TOOL_USE_ENFORCEMENT_MODELS` in `agent/prompt_builder.py` and only injects `TOOL_USE_ENFORCEMENT_GUIDANCE` if a substring matches. Qwen and DeepSeek hit the same chatty/hallucinatory failure mode as GPT, Codex, Grok, and GLM (describing intended actions instead of calling tools, ignoring memory, silently stopping mid-execution), but neither substring was in the tuple — so the enforcement prompt was never injected for users on those families, even with `auto` left at its default. Add `"qwen"` and `"deepseek"` to the tuple, matching the established additive pattern (#5595 added grok, #24715 added glm, #27797 widened grok to xai-oauth). Add four regression-guard tests that fail before the production change and pass after: two unit assertions in `test_prompt_builder.py` mirroring the existing grok/gpt checks, and two integration tests in `test_run_agent.py` confirming that a qwen/deepseek model under `tool_use_enforcement="auto"` now gets the guidance string in its system prompt. The "robust" alternative from the issue (default-true for all models) is intentionally not taken: it would silently flip behavior for users who currently rely on `auto` leaving Claude / non-listed families unsteered, and the maintainer's prior merged work in this area is uniformly additive. Fixes #28079
This commit is contained in:
@@ -268,7 +268,7 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||
|
||||
# Model name substrings that trigger tool-use enforcement guidance.
|
||||
# Add new patterns here when a model family needs explicit steering.
|
||||
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm")
|
||||
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm", "qwen", "deepseek")
|
||||
|
||||
# OpenAI GPT/Codex-specific execution guidance. Addresses known failure modes
|
||||
# where GPT models abandon work on partial results, skip prerequisite lookups,
|
||||
|
||||
@@ -1144,6 +1144,12 @@ class TestToolUseEnforcementGuidance:
|
||||
def test_enforcement_models_includes_grok(self):
|
||||
assert "grok" in TOOL_USE_ENFORCEMENT_MODELS
|
||||
|
||||
def test_enforcement_models_includes_qwen(self):
|
||||
assert "qwen" in TOOL_USE_ENFORCEMENT_MODELS
|
||||
|
||||
def test_enforcement_models_includes_deepseek(self):
|
||||
assert "deepseek" in TOOL_USE_ENFORCEMENT_MODELS
|
||||
|
||||
def test_enforcement_models_is_tuple(self):
|
||||
assert isinstance(TOOL_USE_ENFORCEMENT_MODELS, tuple)
|
||||
|
||||
|
||||
@@ -1103,6 +1103,20 @@ class TestToolUseEnforcementConfig:
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_auto_injects_for_qwen(self):
|
||||
"""Qwen models default to chatty/hallucinatory tool use without enforcement."""
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="qwen/qwen3.6-plus", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_auto_injects_for_deepseek(self):
|
||||
"""DeepSeek models default to chatty/hallucinatory tool use without enforcement."""
|
||||
from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE
|
||||
agent = self._make_agent(model="deepseek/deepseek-r1", tool_use_enforcement="auto")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt
|
||||
|
||||
def test_auto_injects_execution_guidance_for_grok(self):
|
||||
"""Grok also gets OPENAI_MODEL_EXECUTION_GUIDANCE (verification,
|
||||
mandatory_tool_use, act_dont_ask). Same failure modes as GPT in
|
||||
|
||||
Reference in New Issue
Block a user