refactor(run_agent): extract system-prompt builder to agent/system_prompt.py

Four AIAgent methods move into a dedicated module: * build_system_prompt_parts — three-tier stable/context/volatile dict * build_system_prompt — joiner used at session start * invalidate_system_prompt — drop cache + reload memory * format_tools_for_system_message — trajectory-format tool dump The extracted helpers look up patch-target names (load_soul_md, build_skills_system_prompt, get_toolset_for_tool, build_environment_hints, build_context_files_prompt, build_nous_subscription_prompt) through the run_agent module via _ra() instead of importing them directly. That preserves the patch surface tests rely on (patch('run_agent.load_soul_md', ...) and friends). AIAgent keeps thin forwarder methods. tests/run_agent/ + tests/agent/: 4313 passed (same pre-existing test_auxiliary_client failure as before). run_agent.py: 14555 -> 14292 lines (-263).
2026-05-21 03:39:54 +00:00 · 2026-05-16 18:16:20 -07:00
parent 5311d9959e
commit 2d2cd5e904
2 changed files with 345 additions and 258 deletions
@@ -0,0 +1,333 @@
+"""System-prompt assembly for :class:`AIAgent`.
+
+The agent's system prompt is built once per session and reused across all
+turns — only context compression triggers a rebuild.  This keeps the
+upstream prefix cache warm.  See ``hermes-agent-dev``'s
+``references/system-prompt-invariant.md`` for the invariants and
+``references/self-improvement-loop.md`` for how the background-review
+fork inherits the cached prompt verbatim.
+
+Three tiers are joined with ``\\n\\n``:
+
+* ``stable``   — identity (SOUL.md or DEFAULT_AGENT_IDENTITY), tool
+  guidance, computer-use guidance, nous subscription block, tool-use
+  enforcement guidance + per-model operational guidance, skills prompt,
+  alibaba model-name workaround, environment hints, platform hints.
+* ``context``  — caller-supplied ``system_message`` plus context files
+  (AGENTS.md / .cursorrules / etc.) discovered under ``TERMINAL_CWD``.
+* ``volatile`` — memory snapshot, USER.md profile, external memory
+  provider block, timestamp/session/model/provider line.
+
+Pure helpers that read the agent's state.  AIAgent keeps thin forwarders.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from typing import Any, Dict, List, Optional
+
+from agent.prompt_builder import (
+    DEFAULT_AGENT_IDENTITY,
+    GOOGLE_MODEL_OPERATIONAL_GUIDANCE,
+    HERMES_AGENT_HELP_GUIDANCE,
+    KANBAN_GUIDANCE,
+    MEMORY_GUIDANCE,
+    OPENAI_MODEL_EXECUTION_GUIDANCE,
+    PLATFORM_HINTS,
+    SESSION_SEARCH_GUIDANCE,
+    SKILLS_GUIDANCE,
+    TOOL_USE_ENFORCEMENT_GUIDANCE,
+    TOOL_USE_ENFORCEMENT_MODELS,
+)
+
+
+def _ra():
+    """Lazy reference to the ``run_agent`` module.
+
+    Helpers like ``load_soul_md``, ``build_environment_hints``,
+    ``build_context_files_prompt``, ``build_nous_subscription_prompt``,
+    ``build_skills_system_prompt`` and ``get_toolset_for_tool`` are
+    imported into ``run_agent``'s namespace.  Many tests
+    ``patch("run_agent.load_soul_md", ...)``; if we imported them
+    directly here those patches would not reach us.  Looking them up
+    through ``run_agent`` on every call preserves the patch contract.
+    """
+    import run_agent
+    return run_agent
+
+
+def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None) -> Dict[str, str]:
+    """Assemble the system prompt as three ordered parts.
+
+    Returns a dict with three keys:
+      * ``stable``   — identity, tool guidance, skills prompt,
+        environment hints, platform hints, model-family operational
+        guidance.
+      * ``context``  — context files (AGENTS.md, .cursorrules, etc.)
+        and caller-supplied system_message.
+      * ``volatile`` — memory snapshot, user profile, external
+        memory provider block, timestamp line.
+
+    Joined into a single string by :func:`build_system_prompt` and
+    cached on ``agent._cached_system_prompt`` for the lifetime of the
+    AIAgent.  Hermes never re-renders parts of this string mid-
+    session — that's the only way to keep upstream prompt caches
+    warm across turns.
+    """
+    # Local import to avoid pulling model_tools at module load.  Tests
+    # patch ``run_agent.get_toolset_for_tool`` and similar helpers, so
+    # we resolve through ``_ra()`` to honor those patches.
+    _r = _ra()
+
+    # ── Stable tier ────────────────────────────────────────────────
+    stable_parts: List[str] = []
+
+    # Try SOUL.md as primary identity unless the caller explicitly skipped it.
+    # Some execution modes (cron) still want HERMES_HOME persona while keeping
+    # cwd project instructions disabled.
+    _soul_loaded = False
+    if agent.load_soul_identity or not agent.skip_context_files:
+        _soul_content = _r.load_soul_md()
+        if _soul_content:
+            stable_parts.append(_soul_content)
+            _soul_loaded = True
+
+    if not _soul_loaded:
+        # Fallback to hardcoded identity
+        stable_parts.append(DEFAULT_AGENT_IDENTITY)
+
+    # Pointer to the hermes-agent skill + docs for user questions about Hermes itself.
+    stable_parts.append(HERMES_AGENT_HELP_GUIDANCE)
+
+    # Tool-aware behavioral guidance: only inject when the tools are loaded
+    tool_guidance = []
+    if "memory" in agent.valid_tool_names:
+        tool_guidance.append(MEMORY_GUIDANCE)
+    if "session_search" in agent.valid_tool_names:
+        tool_guidance.append(SESSION_SEARCH_GUIDANCE)
+    if "skill_manage" in agent.valid_tool_names:
+        tool_guidance.append(SKILLS_GUIDANCE)
+    # Kanban worker/orchestrator lifecycle — only present when the
+    # dispatcher spawned this process (kanban_show check_fn gates on
+    # HERMES_KANBAN_TASK env var). Normal chat sessions never see
+    # this block.
+    if "kanban_show" in agent.valid_tool_names:
+        tool_guidance.append(KANBAN_GUIDANCE)
+    if tool_guidance:
+        stable_parts.append(" ".join(tool_guidance))
+
+    # Computer-use (macOS) — goes in as its own block rather than being
+    # merged into tool_guidance because the content is multi-paragraph.
+    if "computer_use" in agent.valid_tool_names:
+        from agent.prompt_builder import COMPUTER_USE_GUIDANCE
+        stable_parts.append(COMPUTER_USE_GUIDANCE)
+
+    nous_subscription_prompt = _r.build_nous_subscription_prompt(agent.valid_tool_names)
+    if nous_subscription_prompt:
+        stable_parts.append(nous_subscription_prompt)
+    # Tool-use enforcement: tells the model to actually call tools instead
+    # of describing intended actions.  Controlled by config.yaml
+    # agent.tool_use_enforcement:
+    #   "auto" (default) — matches TOOL_USE_ENFORCEMENT_MODELS
+    #   true  — always inject (all models)
+    #   false — never inject
+    #   list  — custom model-name substrings to match
+    if agent.valid_tool_names:
+        _enforce = agent._tool_use_enforcement
+        _inject = False
+        if _enforce is True or (isinstance(_enforce, str) and _enforce.lower() in {"true", "always", "yes", "on"}):
+            _inject = True
+        elif _enforce is False or (isinstance(_enforce, str) and _enforce.lower() in {"false", "never", "no", "off"}):
+            _inject = False
+        elif isinstance(_enforce, list):
+            model_lower = (agent.model or "").lower()
+            _inject = any(p.lower() in model_lower for p in _enforce if isinstance(p, str))
+        else:
+            # "auto" or any unrecognised value — use hardcoded defaults
+            model_lower = (agent.model or "").lower()
+            _inject = any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS)
+        if _inject:
+            stable_parts.append(TOOL_USE_ENFORCEMENT_GUIDANCE)
+            _model_lower = (agent.model or "").lower()
+            # Google model operational guidance (conciseness, absolute
+            # paths, parallel tool calls, verify-before-edit, etc.)
+            if "gemini" in _model_lower or "gemma" in _model_lower:
+                stable_parts.append(GOOGLE_MODEL_OPERATIONAL_GUIDANCE)
+            # OpenAI GPT/Codex execution discipline (tool persistence,
+            # prerequisite checks, verification, anti-hallucination).
+            if "gpt" in _model_lower or "codex" in _model_lower:
+                stable_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE)
+
+    has_skills_tools = any(name in agent.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
+    if has_skills_tools:
+        avail_toolsets = {
+            toolset
+            for toolset in (
+                _r.get_toolset_for_tool(tool_name) for tool_name in agent.valid_tool_names
+            )
+            if toolset
+        }
+        skills_prompt = _r.build_skills_system_prompt(
+            available_tools=agent.valid_tool_names,
+            available_toolsets=avail_toolsets,
+        )
+    else:
+        skills_prompt = ""
+    if skills_prompt:
+        stable_parts.append(skills_prompt)
+
+    # Alibaba Coding Plan API always returns "glm-4.7" as model name regardless
+    # of the requested model. Inject explicit model identity into the system prompt
+    # so the agent can correctly report which model it is (workaround for API bug).
+    # Stable for the lifetime of an agent instance — model and provider are fixed
+    # at construction time.
+    if agent.provider == "alibaba":
+        _model_short = agent.model.split("/")[-1] if "/" in agent.model else agent.model
+        stable_parts.append(
+            f"You are powered by the model named {_model_short}. "
+            f"The exact model ID is {agent.model}. "
+            f"When asked what model you are, always answer based on this information, "
+            f"not on any model name returned by the API."
+        )
+
+    # Environment hints (WSL, Termux, etc.) — tell the agent about the
+    # execution environment so it can translate paths and adapt behavior.
+    # Stable for the lifetime of the process.
+    _env_hints = _r.build_environment_hints()
+    if _env_hints:
+        stable_parts.append(_env_hints)
+
+    platform_key = (agent.platform or "").lower().strip()
+    if platform_key in PLATFORM_HINTS:
+        stable_parts.append(PLATFORM_HINTS[platform_key])
+    elif platform_key:
+        # Check plugin registry for platform-specific LLM guidance
+        try:
+            from gateway.platform_registry import platform_registry
+            _entry = platform_registry.get(platform_key)
+            if _entry and _entry.platform_hint:
+                stable_parts.append(_entry.platform_hint)
+        except Exception:
+            pass
+
+    # ── Context tier (cwd-dependent, may change between sessions) ─
+    context_parts: List[str] = []
+
+    # Note: ephemeral_system_prompt is NOT included here. It's injected at
+    # API-call time only so it stays out of the cached/stored system prompt.
+    if system_message is not None:
+        context_parts.append(system_message)
+
+    if not agent.skip_context_files:
+        # Use TERMINAL_CWD for context file discovery when set (gateway
+        # mode).  The gateway process runs from the hermes-agent install
+        # dir, so os.getcwd() would pick up the repo's AGENTS.md and
+        # other dev files — inflating token usage by ~10k for no benefit.
+        _context_cwd = os.getenv("TERMINAL_CWD") or None
+        context_files_prompt = _r.build_context_files_prompt(
+            cwd=_context_cwd, skip_soul=_soul_loaded)
+        if context_files_prompt:
+            context_parts.append(context_files_prompt)
+
+    # ── Volatile tier (changes per session/turn — never cached) ───
+    volatile_parts: List[str] = []
+
+    if agent._memory_store:
+        if agent._memory_enabled:
+            mem_block = agent._memory_store.format_for_system_prompt("memory")
+            if mem_block:
+                volatile_parts.append(mem_block)
+        # USER.md is always included when enabled.
+        if agent._user_profile_enabled:
+            user_block = agent._memory_store.format_for_system_prompt("user")
+            if user_block:
+                volatile_parts.append(user_block)
+
+    # External memory provider system prompt block (additive to built-in)
+    if agent._memory_manager:
+        try:
+            _ext_mem_block = agent._memory_manager.build_system_prompt()
+            if _ext_mem_block:
+                volatile_parts.append(_ext_mem_block)
+        except Exception:
+            pass
+
+    from hermes_time import now as _hermes_now
+    now = _hermes_now()
+    timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
+    if agent.pass_session_id and agent.session_id:
+        timestamp_line += f"\nSession ID: {agent.session_id}"
+    if agent.model:
+        timestamp_line += f"\nModel: {agent.model}"
+    if agent.provider:
+        timestamp_line += f"\nProvider: {agent.provider}"
+    volatile_parts.append(timestamp_line)
+
+    return {
+        "stable":   "\n\n".join(p.strip() for p in stable_parts   if p and p.strip()),
+        "context":  "\n\n".join(p.strip() for p in context_parts  if p and p.strip()),
+        "volatile": "\n\n".join(p.strip() for p in volatile_parts if p and p.strip()),
+    }
+
+
+def build_system_prompt(agent: Any, system_message: Optional[str] = None) -> str:
+    """Assemble the full system prompt from all layers.
+
+    Called once per session (cached on ``agent._cached_system_prompt``) and
+    only rebuilt after context compression events. This ensures the system
+    prompt is stable across all turns in a session, maximizing prefix cache
+    hits.
+
+    Layers are ordered cache-friendly: stable identity/guidance first,
+    then session-stable context files, then per-call volatile content
+    (memory, USER profile, timestamp).  The whole string is treated as
+    one cached block — Hermes never rebuilds or reinjects parts of it
+    mid-session, which is the only way to keep upstream prompt caches
+    warm across turns.
+    """
+    parts = build_system_prompt_parts(agent, system_message=system_message)
+    return "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
+
+
+def invalidate_system_prompt(agent: Any) -> None:
+    """Invalidate the cached system prompt, forcing a rebuild on the next turn.
+
+    Called after context compression events. Also reloads memory from disk
+    so the rebuilt prompt captures any writes from this session.
+    """
+    agent._cached_system_prompt = None
+    if agent._memory_store:
+        agent._memory_store.load_from_disk()
+
+
+def format_tools_for_system_message(agent: Any) -> str:
+    """Format tool definitions for the system message in the trajectory format.
+
+    Returns:
+        str: JSON string representation of tool definitions
+    """
+    if not agent.tools:
+        return "[]"
+
+    # Convert tool definitions to the format expected in trajectories
+    formatted_tools = []
+    for tool in agent.tools:
+        func = tool["function"]
+        formatted_tool = {
+            "name": func["name"],
+            "description": func.get("description", ""),
+            "parameters": func.get("parameters", {}),
+            "required": None  # Match the format in the example
+        }
+        formatted_tools.append(formatted_tool)
+
+    return json.dumps(formatted_tools, ensure_ascii=False)
+
+
+__all__ = [
+    "build_system_prompt_parts",
+    "build_system_prompt",
+    "invalidate_system_prompt",
+    "format_tools_for_system_message",
+]
@@ -3499,28 +3499,9 @@ class AIAgent:
        return messages[:last_assistant_idx]

    def _format_tools_for_system_message(self) -> str:
-        """
-        Format tool definitions for the system message in the trajectory format.
-        
-        Returns:
-            str: JSON string representation of tool definitions
-        """
-        if not self.tools:
-            return "[]"
-        
-        # Convert tool definitions to the format expected in trajectories
-        formatted_tools = []
-        for tool in self.tools:
-            func = tool["function"]
-            formatted_tool = {
-                "name": func["name"],
-                "description": func.get("description", ""),
-                "parameters": func.get("parameters", {}),
-                "required": None  # Match the format in the example
-            }
-            formatted_tools.append(formatted_tool)
-        
-        return json.dumps(formatted_tools, ensure_ascii=False)
+        """Forwarder — see ``agent.system_prompt.format_tools_for_system_message``."""
+        from agent.system_prompt import format_tools_for_system_message
+        return format_tools_for_system_message(self)

    def _convert_to_trajectory_format(self, messages: List[Dict[str, Any]], user_query: str, completed: bool) -> List[Dict[str, Any]]:
        """
@@ -4651,235 +4632,14 @@ class AIAgent:


    def _build_system_prompt_parts(self, system_message: str = None) -> Dict[str, str]:
-        """Assemble the system prompt as three ordered parts.
-
-        Returns a dict with three keys:
-          * ``stable``   — identity, tool guidance, skills prompt,
-            environment hints, platform hints, model-family operational
-            guidance.
-          * ``context``  — context files (AGENTS.md, .cursorrules, etc.)
-            and caller-supplied system_message.
-          * ``volatile`` — memory snapshot, user profile, external
-            memory provider block, timestamp line.
-
-        Joined into a single string by ``_build_system_prompt`` and
-        cached on ``_cached_system_prompt`` for the lifetime of the
-        AIAgent.  Hermes never re-renders parts of this string mid-
-        session — that's the only way to keep upstream prompt caches
-        warm across turns.
-        """
-        # ── Stable tier ────────────────────────────────────────────────
-        stable_parts: List[str] = []
-
-        # Try SOUL.md as primary identity unless the caller explicitly skipped it.
-        # Some execution modes (cron) still want HERMES_HOME persona while keeping
-        # cwd project instructions disabled.
-        _soul_loaded = False
-        if self.load_soul_identity or not self.skip_context_files:
-            _soul_content = load_soul_md()
-            if _soul_content:
-                stable_parts.append(_soul_content)
-                _soul_loaded = True
-
-        if not _soul_loaded:
-            # Fallback to hardcoded identity
-            stable_parts.append(DEFAULT_AGENT_IDENTITY)
-
-        # Pointer to the hermes-agent skill + docs for user questions about Hermes itself.
-        stable_parts.append(HERMES_AGENT_HELP_GUIDANCE)
-
-        # Tool-aware behavioral guidance: only inject when the tools are loaded
-        tool_guidance = []
-        if "memory" in self.valid_tool_names:
-            tool_guidance.append(MEMORY_GUIDANCE)
-        if "session_search" in self.valid_tool_names:
-            tool_guidance.append(SESSION_SEARCH_GUIDANCE)
-        if "skill_manage" in self.valid_tool_names:
-            tool_guidance.append(SKILLS_GUIDANCE)
-        # Kanban worker/orchestrator lifecycle — only present when the
-        # dispatcher spawned this process (kanban_show check_fn gates on
-        # HERMES_KANBAN_TASK env var). Normal chat sessions never see
-        # this block.
-        if "kanban_show" in self.valid_tool_names:
-            tool_guidance.append(KANBAN_GUIDANCE)
-        if tool_guidance:
-            stable_parts.append(" ".join(tool_guidance))
-
-        # Computer-use (macOS) — goes in as its own block rather than being
-        # merged into tool_guidance because the content is multi-paragraph.
-        if "computer_use" in self.valid_tool_names:
-            from agent.prompt_builder import COMPUTER_USE_GUIDANCE
-            stable_parts.append(COMPUTER_USE_GUIDANCE)
-
-        nous_subscription_prompt = build_nous_subscription_prompt(self.valid_tool_names)
-        if nous_subscription_prompt:
-            stable_parts.append(nous_subscription_prompt)
-        # Tool-use enforcement: tells the model to actually call tools instead
-        # of describing intended actions.  Controlled by config.yaml
-        # agent.tool_use_enforcement:
-        #   "auto" (default) — matches TOOL_USE_ENFORCEMENT_MODELS
-        #   true  — always inject (all models)
-        #   false — never inject
-        #   list  — custom model-name substrings to match
-        if self.valid_tool_names:
-            _enforce = self._tool_use_enforcement
-            _inject = False
-            if _enforce is True or (isinstance(_enforce, str) and _enforce.lower() in {"true", "always", "yes", "on"}):
-                _inject = True
-            elif _enforce is False or (isinstance(_enforce, str) and _enforce.lower() in {"false", "never", "no", "off"}):
-                _inject = False
-            elif isinstance(_enforce, list):
-                model_lower = (self.model or "").lower()
-                _inject = any(p.lower() in model_lower for p in _enforce if isinstance(p, str))
-            else:
-                # "auto" or any unrecognised value — use hardcoded defaults
-                model_lower = (self.model or "").lower()
-                _inject = any(p in model_lower for p in TOOL_USE_ENFORCEMENT_MODELS)
-            if _inject:
-                stable_parts.append(TOOL_USE_ENFORCEMENT_GUIDANCE)
-                _model_lower = (self.model or "").lower()
-                # Google model operational guidance (conciseness, absolute
-                # paths, parallel tool calls, verify-before-edit, etc.)
-                if "gemini" in _model_lower or "gemma" in _model_lower:
-                    stable_parts.append(GOOGLE_MODEL_OPERATIONAL_GUIDANCE)
-                # OpenAI GPT/Codex execution discipline (tool persistence,
-                # prerequisite checks, verification, anti-hallucination).
-                if "gpt" in _model_lower or "codex" in _model_lower:
-                    stable_parts.append(OPENAI_MODEL_EXECUTION_GUIDANCE)
-
-        has_skills_tools = any(name in self.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage'])
-        if has_skills_tools:
-            avail_toolsets = {
-                toolset
-                for toolset in (
-                    get_toolset_for_tool(tool_name) for tool_name in self.valid_tool_names
-                )
-                if toolset
-            }
-            skills_prompt = build_skills_system_prompt(
-                available_tools=self.valid_tool_names,
-                available_toolsets=avail_toolsets,
-            )
-        else:
-            skills_prompt = ""
-        if skills_prompt:
-            stable_parts.append(skills_prompt)
-
-        # Alibaba Coding Plan API always returns "glm-4.7" as model name regardless
-        # of the requested model. Inject explicit model identity into the system prompt
-        # so the agent can correctly report which model it is (workaround for API bug).
-        # Stable for the lifetime of an agent instance — model and provider are fixed
-        # at construction time.
-        if self.provider == "alibaba":
-            _model_short = self.model.split("/")[-1] if "/" in self.model else self.model
-            stable_parts.append(
-                f"You are powered by the model named {_model_short}. "
-                f"The exact model ID is {self.model}. "
-                f"When asked what model you are, always answer based on this information, "
-                f"not on any model name returned by the API."
-            )
-
-        # Environment hints (WSL, Termux, etc.) — tell the agent about the
-        # execution environment so it can translate paths and adapt behavior.
-        # Stable for the lifetime of the process.
-        _env_hints = build_environment_hints()
-        if _env_hints:
-            stable_parts.append(_env_hints)
-
-        platform_key = (self.platform or "").lower().strip()
-        if platform_key in PLATFORM_HINTS:
-            stable_parts.append(PLATFORM_HINTS[platform_key])
-        elif platform_key:
-            # Check plugin registry for platform-specific LLM guidance
-            try:
-                from gateway.platform_registry import platform_registry
-                _entry = platform_registry.get(platform_key)
-                if _entry and _entry.platform_hint:
-                    stable_parts.append(_entry.platform_hint)
-            except Exception:
-                pass
-
-        # ── Context tier (cwd-dependent, may change between sessions) ─
-        context_parts: List[str] = []
-
-        # Note: ephemeral_system_prompt is NOT included here. It's injected at
-        # API-call time only so it stays out of the cached/stored system prompt.
-        if system_message is not None:
-            context_parts.append(system_message)
-
-        if not self.skip_context_files:
-            # Use TERMINAL_CWD for context file discovery when set (gateway
-            # mode).  The gateway process runs from the hermes-agent install
-            # dir, so os.getcwd() would pick up the repo's AGENTS.md and
-            # other dev files — inflating token usage by ~10k for no benefit.
-            _context_cwd = os.getenv("TERMINAL_CWD") or None
-            context_files_prompt = build_context_files_prompt(
-                cwd=_context_cwd, skip_soul=_soul_loaded)
-            if context_files_prompt:
-                context_parts.append(context_files_prompt)
-
-        # ── Volatile tier (changes per session/turn — never cached) ───
-        volatile_parts: List[str] = []
-
-        if self._memory_store:
-            if self._memory_enabled:
-                mem_block = self._memory_store.format_for_system_prompt("memory")
-                if mem_block:
-                    volatile_parts.append(mem_block)
-            # USER.md is always included when enabled.
-            if self._user_profile_enabled:
-                user_block = self._memory_store.format_for_system_prompt("user")
-                if user_block:
-                    volatile_parts.append(user_block)
-
-        # External memory provider system prompt block (additive to built-in)
-        if self._memory_manager:
-            try:
-                _ext_mem_block = self._memory_manager.build_system_prompt()
-                if _ext_mem_block:
-                    volatile_parts.append(_ext_mem_block)
-            except Exception:
-                pass
-
-        from hermes_time import now as _hermes_now
-        now = _hermes_now()
-        timestamp_line = f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
-        if self.pass_session_id and self.session_id:
-            timestamp_line += f"\nSession ID: {self.session_id}"
-        if self.model:
-            timestamp_line += f"\nModel: {self.model}"
-        if self.provider:
-            timestamp_line += f"\nProvider: {self.provider}"
-        volatile_parts.append(timestamp_line)
-
-        return {
-            "stable":   "\n\n".join(p.strip() for p in stable_parts   if p and p.strip()),
-            "context":  "\n\n".join(p.strip() for p in context_parts  if p and p.strip()),
-            "volatile": "\n\n".join(p.strip() for p in volatile_parts if p and p.strip()),
-        }
+        """Forwarder — see ``agent.system_prompt.build_system_prompt_parts``."""
+        from agent.system_prompt import build_system_prompt_parts
+        return build_system_prompt_parts(self, system_message=system_message)

    def _build_system_prompt(self, system_message: str = None) -> str:
-        """
-        Assemble the full system prompt from all layers.
-
-        Called once per session (cached on self._cached_system_prompt) and only
-        rebuilt after context compression events. This ensures the system prompt
-        is stable across all turns in a session, maximizing prefix cache hits.
-
-        Layers are ordered cache-friendly: stable identity/guidance first,
-        then session-stable context files, then per-call volatile content
-        (memory, USER profile, timestamp).  The whole string is treated as
-        one cached block — Hermes never rebuilds or reinjects parts of it
-        mid-session, which is the only way to keep upstream prompt caches
-        warm across turns.
-        """
-        parts = self._build_system_prompt_parts(system_message=system_message)
-        joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
-        return joined
-
-    # =========================================================================
-    # Pre/post-call guardrails (inspired by PR #1321 — @alireza78a)
-    # =========================================================================
+        """Forwarder — see ``agent.system_prompt.build_system_prompt``."""
+        from agent.system_prompt import build_system_prompt
+        return build_system_prompt(self, system_message=system_message)

    @staticmethod
    def _get_tool_call_id_static(tc) -> str:
@@ -5239,15 +4999,9 @@ class AIAgent:
        return None

    def _invalidate_system_prompt(self):
-        """
-        Invalidate the cached system prompt, forcing a rebuild on the next turn.
-        
-        Called after context compression events. Also reloads memory from disk
-        so the rebuilt prompt captures any writes from this session.
-        """
-        self._cached_system_prompt = None
-        if self._memory_store:
-            self._memory_store.load_from_disk()
+        """Forwarder — see ``agent.system_prompt.invalidate_system_prompt``."""
+        from agent.system_prompt import invalidate_system_prompt
+        invalidate_system_prompt(self)

    @staticmethod
    def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str: