diff --git a/Dockerfile b/Dockerfile
index ee2c491c06..8655c51f34 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -94,9 +94,13 @@ RUN cd web && npm run build && \
 # hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time
 # only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally
 # not chowned here.
+# The .venv MUST be hermes-writable so lazy_deps.py can install platform
+# packages (discord.py, telegram, slack, etc.) at first gateway boot.
+# Without this, `uv pip install` fails with EACCES and all messaging
+# adapters silently fail to load.  See tools/lazy_deps.py.
 USER root
 RUN chmod -R a+rX /opt/hermes && \
-    chown -R hermes:hermes /opt/hermes/ui-tui /opt/hermes/node_modules
+    chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/node_modules
 # Start as root so the entrypoint can usermod/groupmod + gosu.
 # If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).
 
diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index 3919c8565b..4b1134a4c0 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -1305,9 +1305,8 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
             ),
         }
         # Forward cache_control marker when present on the OpenAI-format
-        # tool dict (set by ``mark_tools_for_long_lived_cache``). Anthropic's
-        # tools array supports cache_control on the last tool to cache the
-        # entire schema cross-session.
+        # tool dict. Anthropic's tools array supports cache_control on the
+        # last tool to cache the entire schema cross-session.
         cache_control = t.get("cache_control")
         if isinstance(cache_control, dict):
             anthropic_tool["cache_control"] = dict(cache_control)
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 377e4ba22e..de7b6db2b1 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -382,7 +382,28 @@ _AI_GATEWAY_HEADERS = {
 # Nous Portal extra_body for product attribution.
 # Callers should pass this as extra_body in chat.completions.create()
 # when the auxiliary client is backed by Nous Portal.
-NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent", "client=aux"]}
+#
+# The tags are computed from agent.portal_tags so the client= marker stays
+# in lockstep with hermes_cli.__version__ across every Portal call site
+# (main loop, aux, compression, web_extract). Do not inline a literal here;
+# see agent/portal_tags.py for the rationale.
+from agent.portal_tags import nous_portal_tags as _nous_portal_tags
+
+
+def _nous_extra_body() -> dict:
+    """Return a fresh Nous Portal ``extra_body`` dict.
+
+    Computed at call time so a hot-reloaded ``hermes_cli.__version__`` is
+    reflected without restarting long-running processes.
+    """
+    return {"tags": _nous_portal_tags()}
+
+
+# Backwards-compatible module attribute. Some callers (tests, third-party
+# plugins) read ``NOUS_EXTRA_BODY`` directly; keep it as a snapshot of the
+# current tags. Callers that need the freshest value should call
+# ``_nous_extra_body()`` or import ``nous_portal_tags`` directly.
+NOUS_EXTRA_BODY = _nous_extra_body()
 
 # Set at resolve time — True if the auxiliary client points to Nous Portal
 auxiliary_is_nous: bool = False
@@ -3437,7 +3458,7 @@ def get_auxiliary_extra_body() -> dict:
     Includes Nous Portal product tags when the auxiliary client is backed
     by Nous Portal. Returns empty dict otherwise.
     """
-    return dict(NOUS_EXTRA_BODY) if auxiliary_is_nous else {}
+    return _nous_extra_body() if auxiliary_is_nous else {}
 
 
 def auxiliary_max_tokens_param(value: int) -> dict:
@@ -4026,7 +4047,7 @@ def _build_call_kwargs(
     # Provider-specific extra_body
     merged_extra = dict(extra_body or {})
     if provider == "nous" or auxiliary_is_nous:
-        merged_extra.setdefault("tags", []).extend(NOUS_EXTRA_BODY["tags"])
+        merged_extra.setdefault("tags", []).extend(_nous_portal_tags())
     if merged_extra:
         kwargs["extra_body"] = merged_extra
 
diff --git a/agent/portal_tags.py b/agent/portal_tags.py
new file mode 100644
index 0000000000..647c52a076
--- /dev/null
+++ b/agent/portal_tags.py
@@ -0,0 +1,64 @@
+"""Centralized Nous Portal request tags.
+
+Every Hermes request that hits the Nous Portal — main agent loop, auxiliary
+client (compression / titles / vision / web_extract / session_search / etc.),
+and any future code path — must carry the same product-attribution tags so
+Nous can attribute usage to Hermes Agent and bucket it by client release.
+
+Tag shape (sent in OpenAI-compatible ``extra_body['tags']``):
+
+    [
+        "product=hermes-agent",
+        "client=hermes-client-v<__version__>",
+    ]
+
+The version is sourced live from ``hermes_cli.__version__`` so it auto-aligns
+to whatever release is installed; the release script
+(``scripts/release.py``) regex-bumps that single string, and every Portal
+request picks up the new tag on the next process start.
+
+Why one helper instead of inlining the literal at each site:
+* Four call sites (main loop profile, aux client, run_agent compression
+  fallback, web_tools fallback) used to drift apart — see PR #24194 which
+  only got the aux site, leaving the main loop sending a different tag set.
+* Tests should assert the same tag list everywhere; centralizing makes that
+  assertion a one-liner against this module.
+
+Do NOT pre-compute these as module-level constants in the consumers. The
+version can change at runtime (editable installs, hot-reload tooling), and
+``hermes_cli.__version__`` is the canonical source of truth.
+"""
+
+from __future__ import annotations
+
+from typing import List
+
+
+def _hermes_version() -> str:
+    """Return the current Hermes release version, e.g. ``"0.13.0"``.
+
+    Falls back to ``"unknown"`` if ``hermes_cli`` cannot be imported (should
+    never happen in a real install — guarded for defensive testing).
+    """
+    try:
+        from hermes_cli import __version__
+        return __version__
+    except Exception:
+        return "unknown"
+
+
+def hermes_client_tag() -> str:
+    """Return the ``client=...`` tag for Nous Portal requests.
+
+    Format: ``client=hermes-client-v<MAJOR>.<MINOR>.<PATCH>``.
+    """
+    return f"client=hermes-client-v{_hermes_version()}"
+
+
+def nous_portal_tags() -> List[str]:
+    """Return the canonical list of Nous Portal product tags.
+
+    Always returns a fresh list so callers can mutate it freely
+    (e.g. ``merged_extra.setdefault("tags", []).extend(nous_portal_tags())``).
+    """
+    return ["product=hermes-agent", hermes_client_tag()]
diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index 025ea8ab65..6bd3638783 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -268,7 +268,7 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
 
 # Model name substrings that trigger tool-use enforcement guidance.
 # Add new patterns here when a model family needs explicit steering.
-TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok")
+TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm")
 
 # OpenAI GPT/Codex-specific execution guidance.  Addresses known failure modes
 # where GPT models abandon work on partial results, skip prerequisite lookups,
diff --git a/agent/prompt_caching.py b/agent/prompt_caching.py
index 4829c96b33..a73d6e113d 100644
--- a/agent/prompt_caching.py
+++ b/agent/prompt_caching.py
@@ -1,25 +1,15 @@
-"""Anthropic prompt caching strategies.
+"""Anthropic prompt caching strategy.
 
-Two layouts:
-
-* ``system_and_3`` (default, used everywhere except the long-lived path):
-  4 cache_control breakpoints — system prompt + last 3 non-system messages.
-  All at the same TTL (5m or 1h). Reduces input token costs by ~75% on
-  multi-turn conversations within a single session.
-
-* ``prefix_and_2`` (Claude on Anthropic / OpenRouter / Nous Portal):
-  4 breakpoints split across two TTL tiers — tools[-1] (1h) +
-  stable system prefix (1h) + last 2 non-system messages (5m). The
-  long-lived prefix is byte-stable across sessions for a given user
-  config, so every fresh session reads the cached system+tools instead
-  of re-paying for them. Within-session rolling window shrinks from 3
-  messages to 2 to free the breakpoint budget.
+Single layout: ``system_and_3``. 4 cache_control breakpoints — system
+prompt + last 3 non-system messages, all at the same TTL (5m or 1h).
+Reduces input token costs by ~75% on multi-turn conversations within a
+single session.
 
 Pure functions -- no class state, no AIAgent dependency.
 """
 
 import copy
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List
 
 
 def _apply_cache_marker(msg: dict, cache_marker: dict, native_anthropic: bool = False) -> None:
@@ -87,115 +77,3 @@ def apply_anthropic_cache_control(
         _apply_cache_marker(messages[idx], marker, native_anthropic=native_anthropic)
 
     return messages
-
-
-def _mark_system_stable_block(
-    messages: List[Dict[str, Any]],
-    long_lived_marker: Dict[str, str],
-) -> bool:
-    """Mark the *first* content block of the system message with the 1h marker.
-
-    The system message is expected to have been split into multiple content
-    blocks beforehand by the caller — block[0] is the cross-session-stable
-    prefix, subsequent blocks carry context files + volatile suffix.
-    Falls back to marking the whole system message as a single block when
-    the message hasn't been split (preserves correctness on the fallback path).
-
-    Returns True when a marker was placed.
-    """
-    if not messages or messages[0].get("role") != "system":
-        return False
-
-    sys_msg = messages[0]
-    content = sys_msg.get("content")
-
-    # Already a list of blocks → mark the first block.
-    if isinstance(content, list) and content:
-        first = content[0]
-        if isinstance(first, dict):
-            first["cache_control"] = long_lived_marker
-            return True
-        return False
-
-    # String content (no split) → cannot place a stable-prefix breakpoint
-    # without changing the byte content.  Caller is responsible for
-    # splitting; if they didn't, fall through to envelope marker so we still
-    # cache *something* for this turn.
-    if isinstance(content, str) and content:
-        sys_msg["content"] = [
-            {"type": "text", "text": content, "cache_control": long_lived_marker}
-        ]
-        return True
-
-    return False
-
-
-def apply_anthropic_cache_control_long_lived(
-    api_messages: List[Dict[str, Any]],
-    long_lived_ttl: str = "1h",
-    rolling_ttl: str = "5m",
-    native_anthropic: bool = False,
-) -> List[Dict[str, Any]]:
-    """Apply prefix_and_2 caching: long-lived stable prefix + rolling window.
-
-    Layout (4 breakpoints total):
-      * Stable system prefix (block[0]) → ``long_lived_ttl`` TTL
-      * Last 2 non-system messages → ``rolling_ttl`` TTL each
-
-    NOTE: this function does NOT mark the tools array. Tools cache_control
-    is attached separately (see ``mark_tools_for_long_lived_cache``) because
-    tools live outside the messages list in the API payload.
-
-    The caller MUST have split the system message into ordered content
-    blocks where block[0] is the cross-session-stable portion. If the system
-    message is still a single string, it is wrapped into a single block and
-    marked — this is correct, just less effective (the volatile suffix is
-    not isolated, so the prefix invalidates per-session).
-
-    Returns:
-        Deep copy of messages with cache_control breakpoints injected.
-    """
-    messages = copy.deepcopy(api_messages)
-    if not messages:
-        return messages
-
-    long_marker = _build_marker(long_lived_ttl)
-    rolling_marker = _build_marker(rolling_ttl)
-
-    placed_prefix = _mark_system_stable_block(messages, long_marker)
-
-    # Reserve 1 breakpoint for the system prefix (when placed); spend the
-    # remaining 3 on the rolling tail.  Anthropic max is 4 total —
-    # tools[-1] (when marked) consumes the 4th, so we cap rolling at 2 here.
-    rolling_budget = 2 if placed_prefix else 3
-    non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"]
-    for idx in non_sys[-rolling_budget:]:
-        _apply_cache_marker(messages[idx], rolling_marker, native_anthropic=native_anthropic)
-
-    return messages
-
-
-def mark_tools_for_long_lived_cache(
-    tools: Optional[List[Dict[str, Any]]],
-    long_lived_ttl: str = "1h",
-) -> Optional[List[Dict[str, Any]]]:
-    """Attach cache_control to the last tool in the OpenAI-format tools list.
-
-    Anthropic prefix-cache order is ``tools → system → messages``.  Marking
-    the last tool dict caches the entire tools array (Anthropic's docs:
-    "the marker is placed on the last block you want included in the cached
-    prefix").  Marker is preserved across the OpenAI-wire boundary on
-    OpenRouter and Nous Portal (which proxies to OpenRouter); on native
-    Anthropic the marker is forwarded by ``convert_tools_to_anthropic``.
-
-    Returns a deep copy of the tools list with the marker attached, or the
-    input unchanged when tools is empty/None.  Pure function — does not
-    mutate the input.
-    """
-    if not tools:
-        return tools
-    out = copy.deepcopy(tools)
-    last = out[-1]
-    if isinstance(last, dict):
-        last["cache_control"] = _build_marker(long_lived_ttl)
-    return out
diff --git a/apps/desktop/src/app/chat/composer/controls.tsx b/apps/desktop/src/app/chat/composer/controls.tsx
index 010c6d67fc..7fa9255a9e 100644
--- a/apps/desktop/src/app/chat/composer/controls.tsx
+++ b/apps/desktop/src/app/chat/composer/controls.tsx
@@ -1,6 +1,6 @@
 import { Button } from '@/components/ui/button'
 import { triggerHaptic } from '@/lib/haptics'
-import { ArrowUp, AudioLines, Loader2, Mic, MicOff, Square } from '@/lib/icons'
+import { ArrowUp, AudioLines, Layers3, Loader2, Mic, MicOff, Square } from '@/lib/icons'
 import { cn } from '@/lib/utils'
 
 import type { ConversationStatus } from './hooks/use-voice-conversation'
@@ -31,6 +31,7 @@ interface ConversationProps {
 
 export function ComposerControls({
   busy,
+  busyAction,
   canSubmit,
   conversation,
   disabled,
@@ -40,6 +41,7 @@ export function ComposerControls({
   onDictate
 }: {
   busy: boolean
+  busyAction: 'queue' | 'stop'
   canSubmit: boolean
   conversation: ConversationProps
   disabled: boolean
@@ -74,12 +76,21 @@ export function ComposerControls({
         </Button>
       ) : (
         <Button
-          aria-label={busy ? 'Stop' : 'Send'}
+          aria-label={busy ? (busyAction === 'queue' ? 'Queue message' : 'Stop') : 'Send'}
           className={PRIMARY_ICON_BTN}
           disabled={disabled || !canSubmit}
+          title={busy ? (busyAction === 'queue' ? 'Queue message' : 'Stop') : 'Send'}
           type="submit"
         >
-          {busy ? <span className="block size-3 rounded-[0.1875rem] bg-current" /> : <ArrowUp size={18} />}
+          {busy ? (
+            busyAction === 'queue' ? (
+              <Layers3 size={16} />
+            ) : (
+              <span className="block size-3 rounded-[0.1875rem] bg-current" />
+            )
+          ) : (
+            <ArrowUp size={18} />
+          )}
         </Button>
       )}
     </div>
diff --git a/apps/desktop/src/app/chat/composer/index.tsx b/apps/desktop/src/app/chat/composer/index.tsx
index ace13c58cb..db9935d389 100644
--- a/apps/desktop/src/app/chat/composer/index.tsx
+++ b/apps/desktop/src/app/chat/composer/index.tsx
@@ -13,6 +13,7 @@ import {
 } from 'react'
 
 import { formatRefValue, hermesDirectiveFormatter } from '@/components/assistant-ui/directive-text'
+import { Button } from '@/components/ui/button'
 import { useMediaQuery } from '@/hooks/use-media-query'
 import { useResizeObserver } from '@/hooks/use-resize-observer'
 import { chatMessageText } from '@/lib/chat-messages'
@@ -20,7 +21,19 @@ import { contextPath } from '@/lib/chat-runtime'
 import { DATA_IMAGE_URL_RE } from '@/lib/embedded-images'
 import { triggerHaptic } from '@/lib/haptics'
 import { cn } from '@/lib/utils'
-import { $composerAttachments, $composerDraft } from '@/store/composer'
+import {
+  $composerAttachments,
+  $composerDraft,
+  clearComposerAttachments,
+  type ComposerAttachment
+} from '@/store/composer'
+import {
+  $queuedPromptsBySession,
+  enqueueQueuedPrompt,
+  removeQueuedPrompt,
+  type QueuedPromptEntry,
+  updateQueuedPrompt
+} from '@/store/composer-queue'
 import { $messages } from '@/store/session'
 import { $threadScrolledUp } from '@/store/thread-scroll'
 
@@ -41,6 +54,7 @@ import {
   renderComposerContents,
   RICH_INPUT_SLOT
 } from './rich-editor'
+import { QueuePanel } from './queue-panel'
 import { SkinSlashPopover } from './skin-slash-popover'
 import { detectTrigger, extractClipboardImageBlobs, textBeforeCaret, type TriggerState } from './text-utils'
 import { ComposerTriggerPopover } from './trigger-popover'
@@ -53,6 +67,15 @@ const COMPOSER_STACK_BREAKPOINT_PX = 320
 const COMPOSER_FADE_BACKGROUND =
   'linear-gradient(to bottom, transparent, color-mix(in srgb, var(--dt-background) 10%, transparent))'
 
+interface QueueEditState {
+  attachments: ComposerAttachment[]
+  draft: string
+  entryId: string
+  sessionKey: string
+}
+
+const cloneAttachments = (attachments: ComposerAttachment[]) => attachments.map(a => ({ ...a }))
+
 export function ChatBar({
   busy,
   cwd,
@@ -60,6 +83,7 @@ export function ChatBar({
   focusKey,
   gateway,
   maxRecordingSeconds = 120,
+  queueSessionKey,
   sessionId,
   state,
   onCancel,
@@ -77,12 +101,17 @@ export function ChatBar({
   const aui = useAui()
   const draft = useAuiState(s => s.composer.text)
   const attachments = useStore($composerAttachments)
+  const queuedPromptsBySession = useStore($queuedPromptsBySession)
   const scrolledUp = useStore($threadScrolledUp)
+  const activeQueueSessionKey = queueSessionKey || sessionId || null
+  const queuedPrompts = activeQueueSessionKey ? (queuedPromptsBySession[activeQueueSessionKey] ?? []) : []
 
   const composerRef = useRef<HTMLFormElement | null>(null)
   const composerSurfaceRef = useRef<HTMLDivElement | null>(null)
   const editorRef = useRef<HTMLDivElement | null>(null)
   const draftRef = useRef(draft)
+  const previousBusyRef = useRef(busy)
+  const drainingQueueRef = useRef(false)
   const urlInputRef = useRef<HTMLInputElement | null>(null)
 
   const [urlOpen, setUrlOpen] = useState(false)
@@ -91,6 +120,7 @@ export function ChatBar({
   const [voiceConversationActive, setVoiceConversationActive] = useState(false)
   const [tight, setTight] = useState(false)
   const [dragActive, setDragActive] = useState(false)
+  const [queueEdit, setQueueEdit] = useState<QueueEditState | null>(null)
   const dragDepthRef = useRef(0)
   const lastSpokenIdRef = useRef<string | null>(null)
 
@@ -102,6 +132,8 @@ export function ChatBar({
   const stacked = expanded || narrow || tight
   const hasComposerPayload = draft.trim().length > 0 || attachments.length > 0
   const canSubmit = busy || hasComposerPayload
+  const editingQueuedPrompt = queueEdit ? queuedPrompts.find(entry => entry.id === queueEdit.entryId) ?? null : null
+  const busyAction = busy && hasComposerPayload ? 'queue' : 'stop'
   const showHelpHint = draft === '?'
 
   const placeholder = disabled ? 'Starting Hermes…' : 'Ask anything'
@@ -463,6 +495,14 @@ export function ChatBar({
   }
 
   const handleEditorKeyDown = (event: KeyboardEvent<HTMLDivElement>) => {
+    if ((event.metaKey || event.ctrlKey) && !event.altKey && !event.shiftKey && event.key.toLowerCase() === 'k') {
+      event.preventDefault()
+
+      if (!busy) void drainNextQueued()
+
+      return
+    }
+
     if (trigger && triggerItems.length > 0) {
       if (event.key === 'ArrowDown') {
         event.preventDefault()
@@ -499,6 +539,13 @@ export function ChatBar({
 
     if (event.key === 'Enter' && !event.shiftKey) {
       event.preventDefault()
+
+      if (!busy && !hasComposerPayload && queuedPrompts.length > 0) {
+        void drainNextQueued()
+
+        return
+      }
+
       submitDraft()
     }
   }
@@ -635,10 +682,147 @@ export function ChatBar({
     }
   }
 
-  const submitDraft = () => {
-    if (busy) {
+  const loadIntoComposer = (text: string, attachments: ComposerAttachment[]) => {
+    draftRef.current = text
+    aui.composer().setText(text)
+    $composerAttachments.set(cloneAttachments(attachments))
+
+    const editor = editorRef.current
+
+    if (editor) {
+      renderComposerContents(editor, text)
+      placeCaretEnd(editor)
+    }
+  }
+
+  const beginQueuedEdit = (entry: QueuedPromptEntry) => {
+    if (!activeQueueSessionKey || queueEdit) return
+
+    setQueueEdit({
+      attachments: cloneAttachments($composerAttachments.get()),
+      draft: draftRef.current,
+      entryId: entry.id,
+      sessionKey: activeQueueSessionKey
+    })
+    loadIntoComposer(entry.text, entry.attachments)
+    triggerHaptic('selection')
+    focusInput()
+  }
+
+  const exitQueuedEdit = (action: 'cancel' | 'save'): boolean => {
+    if (!queueEdit) return false
+
+    if (action === 'save') {
+      const text = draftRef.current
+      const next = cloneAttachments($composerAttachments.get())
+
+      if (!text.trim() && next.length === 0) return false
+
+      const saved = updateQueuedPrompt(queueEdit.sessionKey, queueEdit.entryId, { attachments: next, text })
+      triggerHaptic(saved ? 'success' : 'selection')
+    } else {
       triggerHaptic('cancel')
-      onCancel()
+    }
+
+    loadIntoComposer(queueEdit.draft, queueEdit.attachments)
+    setQueueEdit(null)
+    focusInput()
+
+    return true
+  }
+
+  const queueCurrentDraft = useCallback(() => {
+    if (!activeQueueSessionKey || (!draft.trim() && attachments.length === 0)) return false
+    if (!enqueueQueuedPrompt(activeQueueSessionKey, { text: draft, attachments })) return false
+
+    clearDraft()
+    clearComposerAttachments()
+    triggerHaptic('selection')
+
+    return true
+  }, [activeQueueSessionKey, attachments, draft])
+
+  // All queue drain paths share one lock + send-then-remove sequence.
+  // `pickEntry` lets each caller choose head, by-id, or skip-edited.
+  const runDrain = useCallback(
+    async (pickEntry: (entries: QueuedPromptEntry[]) => QueuedPromptEntry | undefined): Promise<boolean> => {
+      if (drainingQueueRef.current || !activeQueueSessionKey) return false
+
+      const entry = pickEntry(queuedPrompts)
+
+      if (!entry) return false
+
+      drainingQueueRef.current = true
+
+      try {
+        const accepted = await Promise.resolve(onSubmit(entry.text, { attachments: entry.attachments, fromQueue: true }))
+
+        if (accepted === false) return false
+
+        removeQueuedPrompt(activeQueueSessionKey, entry.id)
+
+        return true
+      } finally {
+        drainingQueueRef.current = false
+      }
+    },
+    [activeQueueSessionKey, onSubmit, queuedPrompts]
+  )
+
+  const drainNextQueued = useCallback(
+    () =>
+      runDrain(entries => {
+        const skip = queueEdit?.entryId
+
+        return skip ? entries.find(e => e.id !== skip) : entries[0]
+      }),
+    [queueEdit, runDrain]
+  )
+
+  const sendQueuedNow = useCallback(
+    (id: string) => runDrain(entries => entries.find(e => e.id === id && id !== queueEdit?.entryId)),
+    [queueEdit, runDrain]
+  )
+
+  const interruptAndSendNextQueued = useCallback(async () => {
+    if (queuedPrompts.length === 0) return false
+
+    await Promise.resolve(onCancel())
+
+    return drainNextQueued()
+  }, [drainNextQueued, onCancel, queuedPrompts.length])
+
+  // Auto-drain on busy → false (turn settled).
+  useEffect(() => {
+    const wasBusy = previousBusyRef.current
+    previousBusyRef.current = busy
+
+    if (busy || !wasBusy || queuedPrompts.length === 0) return
+
+    void drainNextQueued()
+  }, [busy, drainNextQueued, queuedPrompts.length])
+
+  // Clean up queue edit when its target disappears (session swap or external delete).
+  useEffect(() => {
+    if (!queueEdit) return
+    if (queueEdit.sessionKey === activeQueueSessionKey && editingQueuedPrompt) return
+
+    loadIntoComposer(queueEdit.draft, queueEdit.attachments)
+    setQueueEdit(null)
+  }, [activeQueueSessionKey, editingQueuedPrompt, queueEdit]) // eslint-disable-line react-hooks/exhaustive-deps
+
+  const submitDraft = () => {
+    if (queueEdit) {
+      exitQueuedEdit('save')
+    } else if (busy) {
+      if (hasComposerPayload) queueCurrentDraft()
+      else if (queuedPrompts.length > 0) void interruptAndSendNextQueued()
+      else {
+        triggerHaptic('cancel')
+        void Promise.resolve(onCancel())
+      }
+    } else if (!hasComposerPayload && queuedPrompts.length > 0) {
+      void drainNextQueued()
     } else if (draft.trim() || attachments.length > 0) {
       const submitted = draft
       triggerHaptic('submit')
@@ -742,6 +926,7 @@ export function ChatBar({
   const controls = (
     <ComposerControls
       busy={busy}
+      busyAction={busyAction}
       canSubmit={canSubmit}
       conversation={{
         active: voiceConversationActive,
@@ -824,6 +1009,22 @@ export function ChatBar({
             />
           )}
           <SkinSlashPopover draft={draft} onSelect={selectSkinSlashCommand} />
+          {activeQueueSessionKey && queuedPrompts.length > 0 && (
+            <div className="relative z-6 mb-1 px-0.5">
+              <QueuePanel
+                busy={busy}
+                editingId={queueEdit?.entryId ?? null}
+                entries={queuedPrompts}
+                onDelete={id => {
+                  if (removeQueuedPrompt(activeQueueSessionKey, id) && queueEdit?.entryId === id) {
+                    exitQueuedEdit('cancel')
+                  }
+                }}
+                onEdit={beginQueuedEdit}
+                onSendNow={id => void sendQueuedNow(id)}
+              />
+            </div>
+          )}
           <div
             className="pointer-events-none absolute inset-0 rounded-[inherit]"
             style={{ background: COMPOSER_FADE_BACKGROUND }}
@@ -871,6 +1072,28 @@ export function ChatBar({
               >
                 <VoiceActivity state={voiceActivityState} />
                 <VoicePlaybackActivity />
+                {queueEdit && editingQueuedPrompt && (
+                  <div className="flex items-center justify-between gap-2 rounded-lg border border-[color-mix(in_srgb,var(--dt-composer-ring)_32%,transparent)] bg-accent/18 px-2 py-1">
+                    <div className="min-w-0 text-[0.7rem] text-muted-foreground/88">Editing queued turn in composer</div>
+                    <div className="flex shrink-0 items-center gap-1">
+                      <Button
+                        className="h-6 rounded-md px-2 text-[0.68rem]"
+                        onClick={() => exitQueuedEdit('cancel')}
+                        type="button"
+                        variant="ghost"
+                      >
+                        Cancel
+                      </Button>
+                      <Button
+                        className="h-6 rounded-md px-2 text-[0.68rem]"
+                        onClick={() => exitQueuedEdit('save')}
+                        type="button"
+                      >
+                        Save
+                      </Button>
+                    </div>
+                  </div>
+                )}
                 {attachments.length > 0 && <AttachmentList attachments={attachments} onRemove={onRemoveAttachment} />}
                 <div
                   className={cn(
diff --git a/apps/desktop/src/app/chat/composer/queue-panel.tsx b/apps/desktop/src/app/chat/composer/queue-panel.tsx
new file mode 100644
index 0000000000..e0c8dc88e2
--- /dev/null
+++ b/apps/desktop/src/app/chat/composer/queue-panel.tsx
@@ -0,0 +1,123 @@
+import { useState } from 'react'
+
+import { Button } from '@/components/ui/button'
+import { ArrowUp, ChevronDown, Pencil, Trash2 } from '@/lib/icons'
+import { cn } from '@/lib/utils'
+import type { QueuedPromptEntry } from '@/store/composer-queue'
+
+interface QueuePanelProps {
+  busy: boolean
+  editingId: null | string
+  entries: QueuedPromptEntry[]
+  onDelete: (id: string) => void
+  onEdit: (entry: QueuedPromptEntry) => void
+  onSendNow: (id: string) => void
+}
+
+const entryPreview = (entry: QueuedPromptEntry) =>
+  entry.text.trim() || (entry.attachments.length > 0 ? 'Attachment-only turn' : 'Empty turn')
+
+export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendNow }: QueuePanelProps) {
+  const [collapsed, setCollapsed] = useState(false)
+
+  if (entries.length === 0) return null
+
+  return (
+    <div className="rounded-2xl border border-border/65 bg-[color-mix(in_srgb,var(--dt-card)_70%,transparent)] py-0.5 shadow-[0_0_0_1px_color-mix(in_srgb,var(--dt-card)_30%,transparent)_inset]">
+      <button
+        className="flex w-full items-center gap-1.5 px-2.5 py-1 text-left text-[0.72rem] font-medium text-muted-foreground/92 transition-colors hover:text-foreground/90"
+        onClick={() => setCollapsed(open => !open)}
+        type="button"
+      >
+        <ChevronDown className={cn('shrink-0 transition-transform', collapsed && '-rotate-90')} size={14} />
+        <span className="truncate">{entries.length} Queued</span>
+      </button>
+
+      {!collapsed && (
+        <div className="space-y-0.5 px-1.5 pb-0.5">
+          {entries.map(entry => {
+            const isEditing = editingId === entry.id
+            const attachmentsCount = entry.attachments.length
+
+            return (
+              <div
+                className={cn(
+                  'group/queue-row flex items-center gap-1.5 rounded-lg border border-transparent px-1.5 py-1',
+                  'transition-colors duration-300 ease-out hover:bg-(--chrome-action-hover) hover:transition-none',
+                  isEditing && 'border-[color-mix(in_srgb,var(--dt-composer-ring)_40%,transparent)] bg-accent/25'
+                )}
+                key={entry.id}
+              >
+                <span
+                  aria-hidden
+                  className="h-3.5 w-3.5 shrink-0 rounded-full border border-foreground/35 bg-transparent"
+                />
+                <div className="min-w-0 flex-1">
+                  <p className="truncate text-[0.73rem] leading-4 text-foreground/92">{entryPreview(entry)}</p>
+                  {(attachmentsCount > 0 || isEditing) && (
+                    <div className="mt-0.5 flex items-center gap-1.5 text-[0.64rem] text-muted-foreground/75">
+                      {attachmentsCount > 0 && (
+                        <span>
+                          {attachmentsCount} attachment{attachmentsCount === 1 ? '' : 's'}
+                        </span>
+                      )}
+                      {isEditing && (
+                        <span className="text-[color-mix(in_srgb,var(--dt-composer-ring)_78%,var(--muted-foreground))]">
+                          Editing in composer
+                        </span>
+                      )}
+                    </div>
+                  )}
+                </div>
+                <div
+                  className={cn(
+                    'flex shrink-0 items-center gap-0 transition-opacity',
+                    isEditing
+                      ? 'opacity-100'
+                      : 'opacity-0 group-hover/queue-row:opacity-100 group-focus-within/queue-row:opacity-100'
+                  )}
+                >
+                  <Button
+                    aria-label="Edit queued turn"
+                    className="h-5 w-5 rounded-md"
+                    disabled={Boolean(editingId) && !isEditing}
+                    onClick={() => onEdit(entry)}
+                    size="icon-xs"
+                    title="Edit queued turn"
+                    type="button"
+                    variant="ghost"
+                  >
+                    <Pencil size={11} />
+                  </Button>
+                  <Button
+                    aria-label="Send queued turn now"
+                    className="h-5 w-5 rounded-md"
+                    disabled={busy || isEditing}
+                    onClick={() => onSendNow(entry.id)}
+                    size="icon-xs"
+                    title="Send queued turn now"
+                    type="button"
+                    variant="ghost"
+                  >
+                    <ArrowUp size={11} />
+                  </Button>
+                  <Button
+                    aria-label="Delete queued turn"
+                    className="h-5 w-5 rounded-md"
+                    onClick={() => onDelete(entry.id)}
+                    size="icon-xs"
+                    title="Delete queued turn"
+                    type="button"
+                    variant="ghost"
+                  >
+                    <Trash2 size={11} />
+                  </Button>
+                </div>
+              </div>
+            )
+          })}
+        </div>
+      )}
+    </div>
+  )
+}
diff --git a/apps/desktop/src/app/chat/composer/types.ts b/apps/desktop/src/app/chat/composer/types.ts
index 71c601e396..524667e95f 100644
--- a/apps/desktop/src/app/chat/composer/types.ts
+++ b/apps/desktop/src/app/chat/composer/types.ts
@@ -1,4 +1,5 @@
 import type { HermesGateway } from '@/hermes'
+import type { ComposerAttachment } from '@/store/composer'
 
 import type { DroppedFile } from '../hooks/use-composer-actions'
 
@@ -33,9 +34,10 @@ export interface ChatBarProps {
   maxRecordingSeconds?: number
   state: ChatBarState
   gateway?: HermesGateway | null
+  queueSessionKey?: string | null
   sessionId?: string | null
   cwd?: string | null
-  onCancel: () => void
+  onCancel: () => Promise<void> | void
   onAddContextRef?: (refText: string, label?: string, detail?: string) => void
   onAddUrl?: (url: string) => void
   onAttachImageBlob?: (blob: Blob) => Promise<boolean | void> | boolean | void
@@ -45,7 +47,10 @@ export interface ChatBarProps {
   onPickFolders?: () => void
   onPickImages?: () => void
   onRemoveAttachment?: (id: string) => void
-  onSubmit: (value: string) => Promise<void> | void
+  onSubmit: (
+    value: string,
+    options?: { attachments?: ComposerAttachment[]; fromQueue?: boolean }
+  ) => Promise<boolean> | boolean
   onTranscribeAudio?: (audio: Blob) => Promise<string>
 }
 
diff --git a/apps/desktop/src/app/chat/index.tsx b/apps/desktop/src/app/chat/index.tsx
index 0afed13a1a..8786b7bb2a 100644
--- a/apps/desktop/src/app/chat/index.tsx
+++ b/apps/desktop/src/app/chat/index.tsx
@@ -20,6 +20,7 @@ import { ChevronDown } from '@/lib/icons'
 import { useIncrementalExternalStoreRuntime } from '@/lib/incremental-external-store-runtime'
 import { cn } from '@/lib/utils'
 import { $pinnedSessionIds } from '@/store/layout'
+import type { ComposerAttachment } from '@/store/composer'
 import {
   $activeSessionId,
   $awaitingResponse,
@@ -51,7 +52,7 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
   gateway: HermesGateway | null
   onToggleSelectedPin: () => void
   onDeleteSelectedSession: () => void
-  onCancel: () => void
+  onCancel: () => Promise<void> | void
   onAddContextRef: (refText: string, label?: string, detail?: string) => void
   onAddUrl: (url: string) => void
   onBranchInNewChat: (messageId: string) => void
@@ -63,7 +64,10 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
   onPickFolders: () => void
   onPickImages: () => void
   onRemoveAttachment: (id: string) => void
-  onSubmit: (text: string) => Promise<void> | void
+  onSubmit: (
+    text: string,
+    options?: { attachments?: ComposerAttachment[]; fromQueue?: boolean }
+  ) => Promise<boolean> | boolean
   onThreadMessagesChange: (messages: readonly ThreadMessage[]) => void
   onEdit: (message: AppendMessage) => Promise<void>
   onReload: (parentId: string | null) => Promise<void>
@@ -311,6 +315,7 @@ export function ChatView({
                 onRemoveAttachment={onRemoveAttachment}
                 onSubmit={onSubmit}
                 onTranscribeAudio={onTranscribeAudio}
+                queueSessionKey={selectedSessionId || activeSessionId}
                 sessionId={activeSessionId}
                 state={chatBarState}
               />
diff --git a/apps/desktop/src/app/desktop-controller.tsx b/apps/desktop/src/app/desktop-controller.tsx
index dcc2a76b2d..d4097260a7 100644
--- a/apps/desktop/src/app/desktop-controller.tsx
+++ b/apps/desktop/src/app/desktop-controller.tsx
@@ -472,7 +472,7 @@ export function DesktopController() {
       onAttachDroppedItems={composer.attachDroppedItems}
       onAttachImageBlob={composer.attachImageBlob}
       onBranchInNewChat={messageId => void branchInNewChat(messageId)}
-      onCancel={() => void cancelRun()}
+      onCancel={cancelRun}
       onDeleteSelectedSession={() => {
         if (selectedStoredSessionId) {
           void removeSession(selectedStoredSessionId)
diff --git a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
index bee5f78f09..ebb1e7dd6e 100644
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
@@ -71,6 +71,11 @@ interface PromptActionsOptions {
   ) => ClientSessionState
 }
 
+interface SubmitTextOptions {
+  attachments?: ComposerAttachment[]
+  fromQueue?: boolean
+}
+
 function renderCommandsCatalog(catalog: CommandsCatalogLike): string {
   const desktopCatalog = filterDesktopCommandsCatalog(catalog)
 
@@ -153,7 +158,12 @@ export function usePromptActions({
   )
 
   const syncImageAttachmentsForSubmit = useCallback(
-    async (sessionId: string, attachments: ComposerAttachment[]) => {
+    async (
+      sessionId: string,
+      attachments: ComposerAttachment[],
+      options: { updateComposerAttachments?: boolean } = {}
+    ) => {
+      const updateComposerAttachments = options.updateComposerAttachments ?? true
       const images = attachments.filter(attachment => attachment.kind === 'image' && attachment.path)
 
       for (const attachment of images) {
@@ -173,22 +183,25 @@ export function usePromptActions({
 
         const attachedPath = result.path || attachment.path
 
-        addComposerAttachment({
-          ...attachment,
-          id: attachment.id,
-          label: attachedPath ? pathLabel(attachedPath) : attachment.label,
-          path: attachedPath,
-          attachedSessionId: sessionId
-        })
+        if (updateComposerAttachments) {
+          addComposerAttachment({
+            ...attachment,
+            id: attachment.id,
+            label: attachedPath ? pathLabel(attachedPath) : attachment.label,
+            path: attachedPath,
+            attachedSessionId: sessionId
+          })
+        }
       }
     },
     [requestGateway]
   )
 
   const submitPromptText = useCallback(
-    async (rawText: string) => {
+    async (rawText: string, options?: SubmitTextOptions) => {
       const visibleText = rawText.trim()
-      const attachments = $composerAttachments.get()
+      const usingComposerAttachments = !options?.attachments
+      const attachments = options?.attachments ?? $composerAttachments.get()
       const contextRefs = attachments
         .map(a => a.refText)
         .filter(Boolean)
@@ -200,7 +213,7 @@ export function usePromptActions({
         [contextRefs, visibleText].filter(Boolean).join('\n\n') || (hasImage ? 'What do you see in this image?' : '')
 
       if (!text || busyRef.current) {
-        return
+        return false
       }
 
       const optimisticId = `user-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
@@ -232,7 +245,7 @@ export function usePromptActions({
             awaitingResponse: true,
             pendingBranchGroup: null,
             sawAssistantPayload: false,
-            interrupted: false
+            interrupted: state.interrupted
           }),
           selectedStoredSessionIdRef.current
         )
@@ -278,7 +291,7 @@ export function usePromptActions({
           releaseBusy()
           notifyError(err, 'Session unavailable')
 
-          return
+          return false
         }
 
         if (!sessionId) {
@@ -286,16 +299,21 @@ export function usePromptActions({
           releaseBusy()
           notify({ kind: 'error', title: 'Session unavailable', message: 'Could not create a new session' })
 
-          return
+          return false
         }
 
         seedOptimistic(sessionId)
       }
 
       try {
-        await syncImageAttachmentsForSubmit(sessionId, attachments)
+        await syncImageAttachmentsForSubmit(sessionId, attachments, {
+          updateComposerAttachments: usingComposerAttachments
+        })
         await requestGateway('prompt.submit', { session_id: sessionId, text })
-        clearComposerAttachments()
+
+        if (usingComposerAttachments) clearComposerAttachments()
+
+        return true
       } catch (err) {
         releaseBusy()
         updateSessionState(sessionId, state => ({ ...state, busy: false, awaitingResponse: false }))
@@ -303,10 +321,11 @@ export function usePromptActions({
         if (isProviderSetupError(err)) {
           requestDesktopOnboarding('Add a provider credential before sending your first message.')
 
-          return
+          return false
         }
 
         notifyError(err, 'Prompt failed')
+        return false
       }
     },
     [
@@ -477,18 +496,18 @@ export function usePromptActions({
   )
 
   const submitText = useCallback(
-    async (rawText: string) => {
+    async (rawText: string, options?: SubmitTextOptions) => {
       const visibleText = rawText.trim()
-      const attachments = $composerAttachments.get()
+      const attachments = options?.attachments ?? $composerAttachments.get()
 
       if (!attachments.length && SLASH_COMMAND_RE.test(visibleText)) {
         triggerHaptic('selection')
         await executeSlashCommand(visibleText)
 
-        return
+        return true
       }
 
-      await submitPromptText(rawText)
+      return await submitPromptText(rawText, options)
     },
     [executeSlashCommand, submitPromptText]
   )
diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts
index 926f934e69..f1685de244 100644
--- a/apps/desktop/src/app/session/hooks/use-session-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts
@@ -7,6 +7,7 @@ import { type ChatMessage, chatMessageText, toChatMessages } from '@/lib/chat-me
 import { normalizePersonalityValue } from '@/lib/chat-runtime'
 import { embeddedImageUrls, textWithoutEmbeddedImages } from '@/lib/embedded-images'
 import { clearComposerAttachments, clearComposerDraft } from '@/store/composer'
+import { clearQueuedPrompts } from '@/store/composer-queue'
 import { $pinnedSessionIds } from '@/store/layout'
 import { clearNotifications, notify, notifyError } from '@/store/notifications'
 import { requestDesktopOnboarding } from '@/store/onboarding'
@@ -649,6 +650,11 @@ export function useSessionActions({
         }
 
         await deleteSession(storedSessionId)
+        clearQueuedPrompts(storedSessionId)
+
+        if (closingRuntimeId) {
+          clearQueuedPrompts(closingRuntimeId)
+        }
       } catch (err) {
         if (removed) {
           setSessions(prev => [removed, ...prev])
diff --git a/apps/desktop/src/components/assistant-ui/thread.tsx b/apps/desktop/src/components/assistant-ui/thread.tsx
index 6dfae16e7c..d0a039f0f1 100644
--- a/apps/desktop/src/components/assistant-ui/thread.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread.tsx
@@ -95,6 +95,10 @@ function messageContentText(content: unknown): string {
   return Array.isArray(content) ? content.map(partText).join('').trim() : ''
 }
 
+const INTERRUPTED_ONLY_RE = /^_?\[interrupted\]_?$/i
+
+const isInterruptedOnlyMessage = (text: string) => INTERRUPTED_ONLY_RE.test(text.trim())
+
 function resetStickyState(state: StickyStateFlags) {
   state.escapedFromLock = false
   state.isAtBottom = true
@@ -368,6 +372,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
 
   const messageStatus = useAuiState(s => s.message.status?.type)
   const isPlaceholder = messageStatus === 'running' && content.length === 0
+  const interruptedOnly = useMemo(() => isInterruptedOnlyMessage(messageText), [messageText])
 
   if (isPlaceholder) {
     return null
@@ -380,7 +385,10 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
       data-slot="aui_assistant-message-root"
     >
       <div
-        className="wrap-anywhere min-w-0 max-w-full overflow-hidden text-pretty text-base leading-(--dt-line-height) text-foreground"
+        className={cn(
+          'wrap-anywhere min-w-0 max-w-full overflow-hidden text-pretty text-base leading-(--dt-line-height) text-foreground',
+          interruptedOnly && 'text-[0.8rem] leading-5 text-muted-foreground/82'
+        )}
         data-slot="aui_assistant-message-content"
       >
         {hoistedTodos.length > 0 && <HoistedTodoPanel todos={hoistedTodos} />}
@@ -401,7 +409,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
           </ErrorPrimitive.Root>
         </MessagePrimitive.Error>
       </div>
-      {messageText.trim().length > 0 && (
+      {messageText.trim().length > 0 && !interruptedOnly && (
         <AssistantFooter messageId={messageId} messageText={messageText} onBranchInNewChat={onBranchInNewChat} />
       )}
     </MessagePrimitive.Root>
diff --git a/apps/desktop/src/store/composer-queue.test.ts b/apps/desktop/src/store/composer-queue.test.ts
new file mode 100644
index 0000000000..9f15232aec
--- /dev/null
+++ b/apps/desktop/src/store/composer-queue.test.ts
@@ -0,0 +1,102 @@
+import { beforeEach, describe, expect, it } from 'vitest'
+
+import type { ComposerAttachment } from './composer'
+import {
+  $queuedPromptsBySession,
+  clearQueuedPrompts,
+  dequeueQueuedPrompt,
+  enqueueQueuedPrompt,
+  getQueuedPrompts,
+  removeQueuedPrompt,
+  updateQueuedPrompt,
+  updateQueuedPromptText
+} from './composer-queue'
+
+const SESSION_KEY = 'session-abc'
+const QUEUE_STORAGE_KEY = 'hermes.desktop.composerQueue.v1'
+
+function attachment(id: string, kind: ComposerAttachment['kind'] = 'file'): ComposerAttachment {
+  return {
+    id,
+    kind,
+    label: id,
+    refText: `@file:${id}`
+  }
+}
+
+describe('composer queue store', () => {
+  beforeEach(() => {
+    window.localStorage.removeItem(QUEUE_STORAGE_KEY)
+    $queuedPromptsBySession.set({})
+  })
+
+  it('queues prompts in FIFO order', () => {
+    enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'first' })
+    enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'second' })
+
+    expect(dequeueQueuedPrompt(SESSION_KEY)?.text).toBe('first')
+    expect(dequeueQueuedPrompt(SESSION_KEY)?.text).toBe('second')
+    expect(dequeueQueuedPrompt(SESSION_KEY)).toBeNull()
+  })
+
+  it('clones attachments when queueing', () => {
+    const source = [attachment('a-1')]
+    const queued = enqueueQueuedPrompt(SESSION_KEY, { attachments: source, text: 'check clones' })
+
+    expect(queued).not.toBeNull()
+    expect(getQueuedPrompts(SESSION_KEY)[0]?.attachments[0]).toEqual(source[0])
+    expect(getQueuedPrompts(SESSION_KEY)[0]?.attachments[0]).not.toBe(source[0])
+  })
+
+  it('updates and removes queued entries by id', () => {
+    const first = enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'draft one' })
+    const second = enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'draft two' })
+
+    expect(first).not.toBeNull()
+    expect(second).not.toBeNull()
+
+    expect(updateQueuedPromptText(SESSION_KEY, first!.id, 'draft one edited')).toBe(true)
+    expect(getQueuedPrompts(SESSION_KEY).map(entry => entry.text)).toEqual(['draft one edited', 'draft two'])
+
+    expect(removeQueuedPrompt(SESSION_KEY, first!.id)).toBe(true)
+    expect(getQueuedPrompts(SESSION_KEY).map(entry => entry.text)).toEqual(['draft two'])
+  })
+
+  it('updates queued text and attachment snapshot', () => {
+    const first = enqueueQueuedPrompt(SESSION_KEY, { attachments: [attachment('f-1')], text: 'draft one' })
+    const editedAttachments = [attachment('f-2'), attachment('f-3', 'image')]
+
+    expect(first).not.toBeNull()
+    expect(
+      updateQueuedPrompt(SESSION_KEY, first!.id, {
+        attachments: editedAttachments,
+        text: 'edited text'
+      })
+    ).toBe(true)
+
+    const queue = getQueuedPrompts(SESSION_KEY)
+    expect(queue[0]?.text).toBe('edited text')
+    expect(queue[0]?.attachments).toEqual(editedAttachments)
+    expect(queue[0]?.attachments[0]).not.toBe(editedAttachments[0])
+  })
+
+  it('clears queue state for a session', () => {
+    enqueueQueuedPrompt(SESSION_KEY, { attachments: [attachment('img-1', 'image')], text: 'queued' })
+
+    clearQueuedPrompts(SESSION_KEY)
+
+    expect(getQueuedPrompts(SESSION_KEY)).toEqual([])
+    expect($queuedPromptsBySession.get()[SESSION_KEY]).toBeUndefined()
+    expect(window.localStorage.getItem(QUEUE_STORAGE_KEY)).toBeNull()
+  })
+
+  it('persists queue entries into local storage', () => {
+    enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'persist me' })
+
+    const raw = window.localStorage.getItem(QUEUE_STORAGE_KEY)
+    expect(raw).toBeTruthy()
+
+    const parsed = JSON.parse(String(raw)) as Record<string, { text: string }[]>
+    expect(parsed[SESSION_KEY]?.[0]?.text).toBe('persist me')
+  })
+})
diff --git a/apps/desktop/src/store/composer-queue.ts b/apps/desktop/src/store/composer-queue.ts
new file mode 100644
index 0000000000..d2a3f228ff
--- /dev/null
+++ b/apps/desktop/src/store/composer-queue.ts
@@ -0,0 +1,158 @@
+import { atom } from 'nanostores'
+
+import type { ComposerAttachment } from './composer'
+
+export interface QueuedPromptEntry {
+  id: string
+  text: string
+  attachments: ComposerAttachment[]
+  queuedAt: number
+}
+
+type QueueState = Record<string, QueuedPromptEntry[]>
+
+const STORAGE_KEY = 'hermes.desktop.composerQueue.v1'
+
+const load = (): QueueState => {
+  if (typeof window === 'undefined') return {}
+  try {
+    const raw = window.localStorage.getItem(STORAGE_KEY)
+    const parsed = raw ? JSON.parse(raw) : null
+
+    return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? (parsed as QueueState) : {}
+  } catch {
+    return {}
+  }
+}
+
+const save = (state: QueueState) => {
+  if (typeof window === 'undefined') return
+  try {
+    if (Object.keys(state).length === 0) window.localStorage.removeItem(STORAGE_KEY)
+    else window.localStorage.setItem(STORAGE_KEY, JSON.stringify(state))
+  } catch {
+    // best-effort: storage may be unavailable, queue still works in-memory
+  }
+}
+
+export const $queuedPromptsBySession = atom<QueueState>(load())
+
+const writeSession = (sid: string, queue: QueuedPromptEntry[]) => {
+  const current = $queuedPromptsBySession.get()
+  const next = { ...current }
+
+  if (queue.length === 0) delete next[sid]
+  else next[sid] = queue
+
+  $queuedPromptsBySession.set(next)
+  save(next)
+}
+
+const sidOf = (key: string | null | undefined): null | string => {
+  const trimmed = key?.trim()
+
+  return trimmed ? trimmed : null
+}
+
+const queueFor = (sid: string) => $queuedPromptsBySession.get()[sid] ?? []
+
+const nextId = () => `queued-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
+
+const cloneAttachments = (attachments: ComposerAttachment[]) => attachments.map(a => ({ ...a }))
+
+export const getQueuedPrompts = (key: string | null | undefined): QueuedPromptEntry[] => {
+  const sid = sidOf(key)
+
+  return sid ? queueFor(sid) : []
+}
+
+export const enqueueQueuedPrompt = (
+  key: string | null | undefined,
+  payload: { text: string; attachments: ComposerAttachment[] }
+): null | QueuedPromptEntry => {
+  const sid = sidOf(key)
+
+  if (!sid) return null
+
+  const entry: QueuedPromptEntry = {
+    id: nextId(),
+    text: payload.text,
+    attachments: cloneAttachments(payload.attachments),
+    queuedAt: Date.now()
+  }
+
+  writeSession(sid, [...queueFor(sid), entry])
+
+  return entry
+}
+
+export const dequeueQueuedPrompt = (key: string | null | undefined): null | QueuedPromptEntry => {
+  const sid = sidOf(key)
+
+  if (!sid) return null
+
+  const [head, ...rest] = queueFor(sid)
+
+  if (!head) return null
+
+  writeSession(sid, rest)
+
+  return head
+}
+
+export const removeQueuedPrompt = (key: string | null | undefined, id: string): boolean => {
+  const sid = sidOf(key)
+
+  if (!sid) return false
+
+  const queue = queueFor(sid)
+  const next = queue.filter(e => e.id !== id)
+
+  if (next.length === queue.length) return false
+
+  writeSession(sid, next)
+
+  return true
+}
+
+export const updateQueuedPrompt = (
+  key: string | null | undefined,
+  id: string,
+  update: { text: string; attachments?: ComposerAttachment[] }
+): boolean => {
+  const sid = sidOf(key)
+
+  if (!sid) return false
+
+  const queue = queueFor(sid)
+  let changed = false
+
+  const next = queue.map(entry => {
+    if (entry.id !== id) return entry
+
+    const attachments = update.attachments ? cloneAttachments(update.attachments) : entry.attachments
+
+    if (entry.text === update.text && !update.attachments) return entry
+
+    changed = true
+
+    return { ...entry, text: update.text, attachments }
+  })
+
+  if (!changed) return false
+
+  writeSession(sid, next)
+
+  return true
+}
+
+export const updateQueuedPromptText = (key: string | null | undefined, id: string, text: string): boolean =>
+  updateQueuedPrompt(key, id, { text })
+
+export const clearQueuedPrompts = (key: string | null | undefined) => {
+  const sid = sidOf(key)
+
+  if (!sid || !(sid in $queuedPromptsBySession.get())) return
+
+  writeSession(sid, [])
+}
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
index 288ae2614b..09e870543a 100755
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -39,6 +39,10 @@ if [ "$(id -u)" = "0" ]; then
         # by the mapped user on the host side.
         chown -R hermes:hermes "$HERMES_HOME" 2>/dev/null || \
             echo "Warning: chown failed (rootless container?) — continuing anyway"
+        # The .venv must also be re-chowned when UID is remapped, otherwise
+        # lazy_deps.py cannot install platform packages (discord.py, etc.).
+        chown -R hermes:hermes "$INSTALL_DIR/.venv" 2>/dev/null || \
+            echo "Warning: chown .venv failed (rootless container?) — continuing anyway"
     fi
 
     # Ensure config.yaml is readable by the hermes runtime user even if it was
diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py
index 118eb688cc..bd731a7ab5 100644
--- a/gateway/platforms/signal.py
+++ b/gateway/platforms/signal.py
@@ -446,7 +446,9 @@ class SignalAdapter(BasePlatformAdapter):
                 if sent_msg and isinstance(sent_msg, dict):
                     dest = sent_msg.get("destinationNumber") or sent_msg.get("destination")
                     sent_ts = sent_msg.get("timestamp")
-                    if dest == self._account_normalized:
+                    sent_msg_group_info = sent_msg.get("groupInfo") or {}
+                    sent_msg_group_id = sent_msg_group_info.get("groupId") if sent_msg_group_info else None
+                    if dest == self._account_normalized or sent_msg_group_id:
                         # Check if this is an echo of our own outbound reply
                         if sent_ts and sent_ts in self._recent_sent_timestamps:
                             self._recent_sent_timestamps.discard(sent_ts)
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 415ddb5608..db25b87497 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -2772,7 +2772,7 @@ class TelegramAdapter(BasePlatformAdapter):
                                     {"thread_id": str(thread_id)},
                                 )
                             )
-                        await self._bot.send_message(**send_kwargs)
+                        await self._send_message_with_thread_fallback(**send_kwargs)
                 except Exception as exc:
                     logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True)
             return
diff --git a/gateway/platforms/wecom.py b/gateway/platforms/wecom.py
index d7a5c1d9a4..96769ea59b 100644
--- a/gateway/platforms/wecom.py
+++ b/gateway/platforms/wecom.py
@@ -345,6 +345,7 @@ class WeComAdapter(BasePlatformAdapter):
                 try:
                     await self._open_connection()
                     backoff_idx = 0
+                    self._mark_connected()
                     logger.info("[%s] Reconnected", self.name)
                 except Exception as reconnect_exc:
                     logger.warning("[%s] Reconnect failed: %s", self.name, reconnect_exc)
diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py
index 2fb6fc1332..29b78d75d0 100644
--- a/gateway/platforms/whatsapp.py
+++ b/gateway/platforms/whatsapp.py
@@ -494,12 +494,15 @@ class WhatsAppAdapter(BasePlatformAdapter):
                 # plain executable path.
                 _npm_bin = shutil.which("npm") or "npm"
                 try:
+                    # Read timeout from environment variable, default to 300 seconds (5 minutes)
+                    # to accommodate slower systems like Unraid NAS
+                    npm_install_timeout = int(os.environ.get("WHATSAPP_NPM_INSTALL_TIMEOUT", "300"))
                     install_result = subprocess.run(
                         [_npm_bin, "install", "--silent"],
                         cwd=str(bridge_dir),
                         capture_output=True,
                         text=True,
-                        timeout=60,
+                        timeout=npm_install_timeout,
                     )
                     if install_result.returncode != 0:
                         print(f"[{self.name}] npm install failed: {install_result.stderr}")
diff --git a/gateway/run.py b/gateway/run.py
index bda0cbf983..46c508e4bd 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -7543,6 +7543,7 @@ class GatewayRunner:
             hook_ctx = {
                 "platform": source.platform.value if source.platform else "",
                 "user_id": source.user_id,
+                "chat_id": source.chat_id or "",
                 "session_id": session_entry.session_id,
                 "message": message_text[:500],
             }
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 90d6a63935..88acd1cd43 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -284,7 +284,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
     ),
     "alibaba": ProviderConfig(
         id="alibaba",
-        name="Alibaba Cloud (DashScope)",
+        name="Qwen Cloud",
         auth_type="api_key",
         inference_base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
         api_key_env_vars=("DASHSCOPE_API_KEY",),
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 89f0f487cf..52e08cb2cb 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -735,15 +735,8 @@ DEFAULT_CONFIG = {
 
     # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
     # cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
-    # long_lived_prefix: when true (default), Claude on Anthropic / OpenRouter / Nous
-    #   Portal uses a split layout: tools[-1] + stable system prefix at long_lived_ttl
-    #   (cross-session cache), last 2 messages at cache_ttl (within-session rolling).
-    #   Set false to keep the legacy "system + last 3 messages" single-tier layout.
-    # long_lived_ttl: TTL for the cross-session prefix tier ("5m" or "1h"; default "1h").
     "prompt_caching": {
         "cache_ttl": "5m",
-        "long_lived_prefix": True,
-        "long_lived_ttl": "1h",
     },
 
     # OpenRouter-specific settings.
diff --git a/hermes_cli/goals.py b/hermes_cli/goals.py
index 9e8742e08a..6a8a2ae971 100644
--- a/hermes_cli/goals.py
+++ b/hermes_cli/goals.py
@@ -307,7 +307,7 @@ def judge_goal(
         return "continue", "empty response (nothing to evaluate)", False
 
     try:
-        from agent.auxiliary_client import get_text_auxiliary_client
+        from agent.auxiliary_client import get_auxiliary_extra_body, get_text_auxiliary_client
     except Exception as exc:
         logger.debug("goal judge: auxiliary client import failed: %s", exc)
         return "continue", "auxiliary client unavailable", False
@@ -336,6 +336,7 @@ def judge_goal(
             temperature=0,
             max_tokens=200,
             timeout=timeout,
+            extra_body=get_auxiliary_extra_body() or None,
         )
     except Exception as exc:
         logger.info("goal judge: API call failed (%s) — falling through to continue", exc)
diff --git a/hermes_cli/kanban_specify.py b/hermes_cli/kanban_specify.py
index d069e5ee1a..0d57fbb250 100644
--- a/hermes_cli/kanban_specify.py
+++ b/hermes_cli/kanban_specify.py
@@ -155,7 +155,7 @@ def specify_task(
         )
 
     try:
-        from agent.auxiliary_client import get_text_auxiliary_client
+        from agent.auxiliary_client import get_auxiliary_extra_body, get_text_auxiliary_client
     except Exception as exc:  # pragma: no cover — import smoke test
         logger.debug("specify: auxiliary client import failed: %s", exc)
         return SpecifyOutcome(task_id, False, "auxiliary client unavailable")
@@ -187,6 +187,7 @@ def specify_task(
             temperature=0.3,
             max_tokens=1500,
             timeout=timeout or 120,
+            extra_body=get_auxiliary_extra_body() or None,
         )
     except Exception as exc:
         logger.info(
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 5f355d03b9..eb55b59ee5 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -908,10 +908,10 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
     ProviderEntry("lmstudio",       "LM Studio",                "LM Studio (local desktop app with built-in model server)"),
     ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
     ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
+    ProviderEntry("alibaba",        "Qwen Cloud",               "Qwen Cloud / DashScope Coding (Qwen + multi-provider)"),
     ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
     ProviderEntry("tencent-tokenhub", "Tencent TokenHub",       "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"),
     ProviderEntry("nvidia",         "NVIDIA NIM",               "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
-    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
     ProviderEntry("copilot",        "GitHub Copilot",           "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
     ProviderEntry("copilot-acp",    "GitHub Copilot ACP",       "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
     ProviderEntry("huggingface",    "Hugging Face",             "Hugging Face Inference Providers (20+ open models)"),
@@ -926,7 +926,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
     ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
     ProviderEntry("minimax-oauth",  "MiniMax (OAuth)",          "MiniMax via OAuth browser login (Coding Plan, minimax.io)"),
     ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
-    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
     ProviderEntry("ollama-cloud",   "Ollama Cloud",             "Ollama Cloud (cloud-hosted open models — ollama.com)"),
     ProviderEntry("arcee",          "Arcee AI",                 "Arcee AI (Trinity models — direct API)"),
     ProviderEntry("gmi",            "GMI Cloud",                "GMI Cloud (multi-model direct API)"),
@@ -936,6 +935,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
     ProviderEntry("bedrock",        "AWS Bedrock",              "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
     ProviderEntry("azure-foundry",  "Azure Foundry",            "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
     ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway"),
+    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
 ]
 
 # Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/
diff --git a/plugins/model-providers/nous/__init__.py b/plugins/model-providers/nous/__init__.py
index f89e56c23a..5a61952d74 100644
--- a/plugins/model-providers/nous/__init__.py
+++ b/plugins/model-providers/nous/__init__.py
@@ -2,6 +2,7 @@
 
 from typing import Any
 
+from agent.portal_tags import nous_portal_tags
 from providers import register_provider
 from providers.base import ProviderProfile
 
@@ -12,7 +13,7 @@ class NousProfile(ProviderProfile):
     def build_extra_body(
         self, *, session_id: str | None = None, **context
     ) -> dict[str, Any]:
-        return {"tags": ["product=hermes-agent"]}
+        return {"tags": nous_portal_tags()}
 
     def build_api_kwargs_extras(
         self,
diff --git a/plugins/platforms/line/adapter.py b/plugins/platforms/line/adapter.py
index 67582ffae8..db5d3564d3 100644
--- a/plugins/platforms/line/adapter.py
+++ b/plugins/platforms/line/adapter.py
@@ -959,7 +959,7 @@ class LineAdapter(BasePlatformAdapter):
         if chat_type == "dm" and self._client:
             asyncio.create_task(self._client.loading(chat_id))
 
-        source_obj = self.create_source(
+        source_obj = self.build_source(
             chat_id=chat_id,
             chat_type=chat_type,
             user_id=user_id,
diff --git a/run_agent.py b/run_agent.py
index a8b071c872..f0597c9088 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1454,15 +1454,6 @@ class AIAgent:
         # 1h tier costs 2x on write vs 1.25x for 5m, but amortizes across long
         # sessions with >5-minute pauses between turns (#14971).
         self._cache_ttl = "5m"
-        # Long-lived prefix caching: when enabled and supported by the
-        # current provider, splits the system prompt into a stable prefix
-        # (cached cross-session at 1h TTL) and a volatile suffix
-        # (memory/timestamp — never cached), and attaches a 1h cache_control
-        # marker to the last tool in the schema array.  Restricted to
-        # Claude on Anthropic / OpenRouter / Nous Portal; see
-        # ``_supports_long_lived_anthropic_cache``.
-        self._use_long_lived_prefix_cache = False
-        self._long_lived_cache_ttl = "1h"
         try:
             from hermes_cli.config import load_config as _load_pc_cfg
 
@@ -1470,12 +1461,6 @@ class AIAgent:
             _ttl = _pc_cfg.get("cache_ttl", "5m")
             if _ttl in {"5m", "1h"}:
                 self._cache_ttl = _ttl
-            _ll_enabled = _pc_cfg.get("long_lived_prefix", True)
-            _ll_ttl = _pc_cfg.get("long_lived_ttl", "1h")
-            if _ll_ttl in ("5m", "1h"):
-                self._long_lived_cache_ttl = _ll_ttl
-            if _ll_enabled and self._use_prompt_caching and self._supports_long_lived_anthropic_cache():
-                self._use_long_lived_prefix_cache = True
         except Exception:
             pass
 
@@ -2480,7 +2465,6 @@ class AIAgent:
             "client_kwargs": dict(self._client_kwargs),
             "use_prompt_caching": self._use_prompt_caching,
             "use_native_cache_layout": self._use_native_cache_layout,
-            "use_long_lived_prefix_cache": self._use_long_lived_prefix_cache,
             # Context engine state that _try_activate_fallback() overwrites.
             # Use getattr for model/base_url/api_key/provider since plugin
             # engines may not have these (they're ContextCompressor-specific).
@@ -2647,6 +2631,11 @@ class AIAgent:
         old_model = self.model
         old_provider = self.provider
 
+        # Clear the per-config context_length override so the new model's
+        # actual context window is resolved via get_model_context_length()
+        # instead of inheriting the stale value from the previous model.
+        self._config_context_length = None
+
         # ── Swap core runtime fields ──
         self.model = new_model
         self.provider = new_provider
@@ -2711,15 +2700,6 @@ class AIAgent:
                 model=new_model,
             )
         )
-        self._use_long_lived_prefix_cache = bool(
-            self._use_prompt_caching
-            and self._supports_long_lived_anthropic_cache(
-                provider=new_provider,
-                base_url=self.base_url,
-                api_mode=api_mode,
-                model=new_model,
-            )
-        )
 
         # ── LM Studio: preload before probing context length ──
         self._ensure_lmstudio_runtime_loaded()
@@ -2768,7 +2748,6 @@ class AIAgent:
             "client_kwargs": dict(self._client_kwargs),
             "use_prompt_caching": self._use_prompt_caching,
             "use_native_cache_layout": self._use_native_cache_layout,
-            "use_long_lived_prefix_cache": self._use_long_lived_prefix_cache,
             "compressor_model": getattr(_cc, "model", self.model) if _cc else self.model,
             "compressor_base_url": getattr(_cc, "base_url", self.base_url) if _cc else self.base_url,
             "compressor_api_key": getattr(_cc, "api_key", "") if _cc else "",
@@ -3579,73 +3558,6 @@ class AIAgent:
 
         return False, False
 
-    def _supports_long_lived_anthropic_cache(
-        self,
-        *,
-        provider: Optional[str] = None,
-        base_url: Optional[str] = None,
-        api_mode: Optional[str] = None,
-        model: Optional[str] = None,
-    ) -> bool:
-        """Decide whether the long-lived (1h cross-session) cache layout applies.
-
-        Narrower than ``_anthropic_prompt_cache_policy`` — only enabled
-        for Claude models on the four endpoints whose cross-session
-        cache_control behavior we have explicitly validated:
-
-          * Native Anthropic API (``api_mode == 'anthropic_messages'`` +
-            host ``api.anthropic.com``)
-          * Anthropic OAuth subscription (same transport as native API)
-          * OpenRouter (``base_url`` contains ``openrouter.ai``)
-          * Nous Portal (``base_url`` contains ``nousresearch`` — proxies
-            to OpenRouter, so identical wire-format)
-
-        All four honour ``cache_control`` on both the tools array and the
-        first system content block, and bill cross-session cache reads at
-        the documented 0.1× rate.
-
-        Other endpoints covered by the standard ``system_and_3`` policy
-        (third-party Anthropic gateways, MiniMax, opencode-go Qwen, etc.)
-        keep that layout — they support cache_control but their behavior
-        with mixed-TTL multi-block system content has not been validated
-        against this codebase.
-        """
-        eff_provider = (provider if provider is not None else self.provider) or ""
-        eff_base_url = base_url if base_url is not None else (self.base_url or "")
-        eff_api_mode = api_mode if api_mode is not None else (self.api_mode or "")
-        eff_model = (model if model is not None else self.model) or ""
-
-        model_lower = eff_model.lower()
-        is_claude = "claude" in model_lower
-        is_nous_portal = "nousresearch" in eff_base_url.lower()
-
-        # Nous Portal: Claude AND Qwen both get long-lived caching.
-        # Portal proxies to OpenRouter with identical cache_control
-        # semantics; any model on Portal that accepts envelope-layout
-        # markers via _anthropic_prompt_cache_policy also benefits from
-        # the documented 1h cross-session TTL.
-        if is_nous_portal and (is_claude or "qwen" in model_lower):
-            return True
-
-        if not is_claude:
-            return False
-
-        # Native Anthropic + Anthropic OAuth subscription
-        if eff_api_mode == "anthropic_messages":
-            if eff_provider == "anthropic" or base_url_hostname(eff_base_url) == "api.anthropic.com":
-                return True
-
-        # OpenRouter
-        if base_url_host_matches(eff_base_url, "openrouter.ai"):
-            return True
-
-        # Nous Portal — front-ends OpenRouter behind the scenes; identical
-        # wire format and cache_control semantics.
-        if is_nous_portal:
-            return True
-
-        return False
-
     @staticmethod
     def _model_requires_responses_api(model: str) -> bool:
         """Return True for models that require the Responses API path.
@@ -5894,26 +5806,19 @@ class AIAgent:
         """Assemble the system prompt as three ordered parts.
 
         Returns a dict with three keys:
-          * ``stable``  — content that is byte-stable across sessions for a
-            given user config: identity, tool guidance, skills prompt,
+          * ``stable``   — identity, tool guidance, skills prompt,
             environment hints, platform hints, model-family operational
-            guidance.  Eligible for cross-session 1h prompt caching when
-            placed as a separate Anthropic content block (see
-            ``apply_anthropic_cache_control_long_lived``).
-          * ``context`` — context files (AGENTS.md, .cursorrules, etc.) and
-            caller-supplied system_message.  Stable within a session but may
-            change between sessions when files are edited or the cwd
-            differs.  Cached within-session via the rolling messages
-            breakpoint (5m TTL); not promoted to the long-lived tier so
-            edits don't poison the cross-session cache.
-          * ``volatile`` — content that changes on most turns/sessions:
-            memory snapshot, user profile, external memory provider block,
-            timestamp line.  Never marked for caching.
+            guidance.
+          * ``context``  — context files (AGENTS.md, .cursorrules, etc.)
+            and caller-supplied system_message.
+          * ``volatile`` — memory snapshot, user profile, external
+            memory provider block, timestamp line.
 
-        Joined ``stable\\n\\ncontext\\n\\nvolatile`` produces the same
-        logical content the old single-string builder produced, with the
-        guarantee that volatile content is at the end (cache-friendly
-        ordering for any provider that does prefix caching).
+        Joined into a single string by ``_build_system_prompt`` and
+        cached on ``_cached_system_prompt`` for the lifetime of the
+        AIAgent.  Hermes never re-renders parts of this string mid-
+        session — that's the only way to keep upstream prompt caches
+        warm across turns.
         """
         # ── Stable tier ────────────────────────────────────────────────
         stable_parts: List[str] = []
@@ -6115,9 +6020,10 @@ class AIAgent:
 
         Layers are ordered cache-friendly: stable identity/guidance first,
         then session-stable context files, then per-call volatile content
-        (memory, USER profile, timestamp). The split is exposed via
-        ``_build_system_prompt_parts`` for the long-lived prompt-caching
-        path (Claude on Anthropic / OpenRouter / Nous Portal).
+        (memory, USER profile, timestamp).  The whole string is treated as
+        one cached block — Hermes never rebuilds or reinjects parts of it
+        mid-session, which is the only way to keep upstream prompt caches
+        warm across turns.
         """
         parts = self._build_system_prompt_parts(system_message=system_message)
         joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
@@ -8817,6 +8723,11 @@ class AIAgent:
                 fb_api_mode = "bedrock_converse"
 
             old_model = self.model
+
+            # Clear the per-config context_length override so the fallback
+            # model's actual context window is resolved instead of inheriting
+            # the stale value from the previous model.  See #22387.
+            self._config_context_length = None
             self.model = fb_model
             self.provider = fb_provider
             self.base_url = fb_base_url
@@ -8879,15 +8790,6 @@ class AIAgent:
                     model=fb_model,
                 )
             )
-            self._use_long_lived_prefix_cache = bool(
-                self._use_prompt_caching
-                and self._supports_long_lived_anthropic_cache(
-                    provider=fb_provider,
-                    base_url=fb_base_url,
-                    api_mode=fb_api_mode,
-                    model=fb_model,
-                )
-            )
 
             # LM Studio: preload before probing the fallback's context length.
             self._ensure_lmstudio_runtime_loaded()
@@ -8964,16 +8866,6 @@ class AIAgent:
                 "use_native_cache_layout",
                 self.api_mode == "anthropic_messages" and self.provider == "anthropic",
             )
-            # Long-lived prefix flag was added later — restore False on
-            # snapshots predating the new field, then re-evaluate against
-            # the restored provider/model in case the user had it enabled.
-            self._use_long_lived_prefix_cache = rt.get(
-                "use_long_lived_prefix_cache",
-                bool(
-                    self._use_prompt_caching
-                    and self._supports_long_lived_anthropic_cache()
-                ),
-            )
 
             # ── Rebuild client for the primary provider ──
             if self.api_mode == "anthropic_messages":
@@ -9551,19 +9443,7 @@ class AIAgent:
 
     def _build_api_kwargs(self, api_messages: list) -> dict:
         """Build the keyword arguments dict for the active API mode."""
-        # Resolve the tools array exactly once. When the long-lived
-        # prefix-cache layout is active (Claude on Anthropic / OpenRouter
-        # / Nous Portal), attach a 1h cache_control marker to the last
-        # tool — this caches the entire tools array cross-session via
-        # Anthropic's tools→system→messages prefix order. The function
-        # returns a deep copy, so self.tools is never mutated.
-        if self._use_long_lived_prefix_cache and self.tools:
-            from agent.prompt_caching import mark_tools_for_long_lived_cache
-            tools_for_api = mark_tools_for_long_lived_cache(
-                self.tools, long_lived_ttl=self._long_lived_cache_ttl,
-            )
-        else:
-            tools_for_api = self.tools
+        tools_for_api = self.tools
 
         if self.api_mode == "anthropic_messages":
             _transport = self._get_transport()
@@ -11662,7 +11542,8 @@ class AIAgent:
                         "effort": "medium"
                     }
             if _is_nous:
-                summary_extra_body["tags"] = ["product=hermes-agent"]
+                from agent.portal_tags import nous_portal_tags as _portal_tags
+                summary_extra_body["tags"] = _portal_tags()
 
             if self.api_mode == "codex_responses":
                 codex_kwargs = self._build_api_kwargs(api_messages)
@@ -12423,36 +12304,21 @@ class AIAgent:
             # External recall context is injected into the user message, not the system
             # prompt, so the stable cache prefix remains unchanged.
             #
-            # When the long-lived prefix-cache layout is active (Claude on
-            # Anthropic / OpenRouter / Nous Portal), we build the system
-            # message as a *list of content blocks*: [stable, context,
-            # volatile, ephemeral?].  Block 0 (stable) gets the 1h
-            # cache_control marker further down via
-            # apply_anthropic_cache_control_long_lived; blocks 1-3 are
-            # cached only via the rolling messages window at 5m.
             # NOTE: Plugin context from pre_llm_call hooks is injected into the
             # user message (see injection block above), NOT the system prompt.
             # This is intentional — system prompt modifications break the prompt
             # cache prefix.  The system prompt is reserved for Hermes internals.
-            if self._use_long_lived_prefix_cache:
-                _sys_parts = self._build_system_prompt_parts(system_message=system_message)
-                _sys_blocks: list = []
-                if _sys_parts.get("stable"):
-                    _sys_blocks.append({"type": "text", "text": _sys_parts["stable"]})
-                if _sys_parts.get("context"):
-                    _sys_blocks.append({"type": "text", "text": _sys_parts["context"]})
-                if _sys_parts.get("volatile"):
-                    _sys_blocks.append({"type": "text", "text": _sys_parts["volatile"]})
-                if self.ephemeral_system_prompt:
-                    _sys_blocks.append({"type": "text", "text": self.ephemeral_system_prompt})
-                if _sys_blocks:
-                    api_messages = [{"role": "system", "content": _sys_blocks}] + api_messages
-            else:
-                effective_system = active_system_prompt or ""
-                if self.ephemeral_system_prompt:
-                    effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
-                if effective_system:
-                    api_messages = [{"role": "system", "content": effective_system}] + api_messages
+            #
+            # Hermes invariant: the system prompt is built ONCE per session
+            # (cached on ``_cached_system_prompt``) and replayed verbatim on
+            # every turn.  We send it as a single content string so the
+            # bytes are byte-stable across turns and upstream prompt caches
+            # stay warm.
+            effective_system = active_system_prompt or ""
+            if self.ephemeral_system_prompt:
+                effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
+            if effective_system:
+                api_messages = [{"role": "system", "content": effective_system}] + api_messages
 
             # Inject ephemeral prefill messages right after the system prompt
             # but before conversation history. Same API-call-time-only pattern.
@@ -12466,29 +12332,13 @@ class AIAgent:
             # gateways. Auto-detected: if ``_use_prompt_caching`` is set,
             # inject cache_control breakpoints (system + last 3 messages)
             # to reduce input token costs by ~75% on multi-turn
-            # conversations. Layout is chosen per endpoint by
-            # ``_anthropic_prompt_cache_policy``.
-            #
-            # Long-lived prefix layout (prefix_and_2): stable system block
-            # gets 1h marker + last 2 messages get 5m markers. Tools
-            # array's last entry is marked separately at API-call kwargs
-            # build time (see ``_build_api_kwargs`` and
-            # ``mark_tools_for_long_lived_cache``).
+            # conversations.
             if self._use_prompt_caching:
-                if self._use_long_lived_prefix_cache:
-                    from agent.prompt_caching import apply_anthropic_cache_control_long_lived
-                    api_messages = apply_anthropic_cache_control_long_lived(
-                        api_messages,
-                        long_lived_ttl=self._long_lived_cache_ttl,
-                        rolling_ttl=self._cache_ttl,
-                        native_anthropic=self._use_native_cache_layout,
-                    )
-                else:
-                    api_messages = apply_anthropic_cache_control(
-                        api_messages,
-                        cache_ttl=self._cache_ttl,
-                        native_anthropic=self._use_native_cache_layout,
-                    )
+                api_messages = apply_anthropic_cache_control(
+                    api_messages,
+                    cache_ttl=self._cache_ttl,
+                    native_anthropic=self._use_native_cache_layout,
+                )
 
             # Safety net: strip orphaned tool results / add stubs for missing
             # results before sending to the API.  Runs unconditionally — not
@@ -14442,7 +14292,7 @@ class AIAgent:
                             _ra_raw = _resp_headers.get("retry-after") or _resp_headers.get("Retry-After")
                             if _ra_raw:
                                 try:
-                                    _retry_after = min(int(_ra_raw), 120)  # Cap at 2 minutes
+                                    _retry_after = min(float(_ra_raw), 120)  # Cap at 2 minutes
                                 except (TypeError, ValueError):
                                     pass
                     wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0)
diff --git a/scripts/install.sh b/scripts/install.sh
index aaa810f3c8..72cc81637d 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -890,7 +890,7 @@ clone_repo() {
                 stash_name="hermes-install-autostash-$(date -u +%Y%m%d-%H%M%S)"
                 log_info "Local changes detected, stashing before update..."
                 git stash push --include-untracked -m "$stash_name"
-                autostash_ref="$(git rev-parse --verify refs/stash)"
+                autostash_ref="stash@{0}"
             fi
 
             git fetch origin
diff --git a/tests/agent/test_portal_tags.py b/tests/agent/test_portal_tags.py
new file mode 100644
index 0000000000..7c873ef0f6
--- /dev/null
+++ b/tests/agent/test_portal_tags.py
@@ -0,0 +1,61 @@
+"""Tests for agent.portal_tags — Nous Portal request tag contract."""
+
+from __future__ import annotations
+
+
+def test_hermes_client_tag_includes_current_version():
+    """The client tag must reflect hermes_cli.__version__ verbatim."""
+    from hermes_cli import __version__
+    from agent.portal_tags import hermes_client_tag
+
+    assert hermes_client_tag() == f"client=hermes-client-v{__version__}"
+
+
+def test_hermes_client_tag_format():
+    """The client tag has the exact shape Nous Portal expects."""
+    from agent.portal_tags import hermes_client_tag
+
+    tag = hermes_client_tag()
+    assert tag.startswith("client=hermes-client-v")
+    # No spaces, no commas — single tag value
+    assert " " not in tag
+    assert "," not in tag
+
+
+def test_nous_portal_tags_contains_product_and_client():
+    """Every Nous Portal request gets BOTH the product tag and the version tag."""
+    from agent.portal_tags import hermes_client_tag, nous_portal_tags
+
+    tags = nous_portal_tags()
+    assert "product=hermes-agent" in tags
+    assert hermes_client_tag() in tags
+    assert len(tags) == 2
+
+
+def test_nous_portal_tags_returns_fresh_list():
+    """Callers mutate the returned list; we must not share state across calls."""
+    from agent.portal_tags import nous_portal_tags
+
+    a = nous_portal_tags()
+    a.append("client=test-mutation")
+    b = nous_portal_tags()
+    assert "client=test-mutation" not in b
+
+
+def test_auxiliary_client_nous_extra_body_uses_helper():
+    """auxiliary_client.NOUS_EXTRA_BODY must match the canonical helper output."""
+    from agent.auxiliary_client import NOUS_EXTRA_BODY
+    from agent.portal_tags import nous_portal_tags
+
+    assert NOUS_EXTRA_BODY == {"tags": nous_portal_tags()}
+
+
+def test_nous_provider_profile_uses_helper():
+    """The Nous provider profile (main agent loop) must use the canonical tags."""
+    from agent.portal_tags import nous_portal_tags
+    from providers import get_provider_profile
+
+    profile = get_provider_profile("nous")
+    assert profile is not None
+    body = profile.build_extra_body()
+    assert body["tags"] == nous_portal_tags()
diff --git a/tests/agent/test_prompt_caching.py b/tests/agent/test_prompt_caching.py
index 9d989571b5..f6f3e9f0a3 100644
--- a/tests/agent/test_prompt_caching.py
+++ b/tests/agent/test_prompt_caching.py
@@ -6,8 +6,6 @@ import pytest
 from agent.prompt_caching import (
     _apply_cache_marker,
     apply_anthropic_cache_control,
-    apply_anthropic_cache_control_long_lived,
-    mark_tools_for_long_lived_cache,
 )
 
 
@@ -143,132 +141,3 @@ class TestApplyAnthropicCacheControl:
             elif "cache_control" in msg:
                 count += 1
         assert count <= 4
-
-
-class TestMarkToolsForLongLivedCache:
-    def test_returns_unchanged_for_empty_tools(self):
-        assert mark_tools_for_long_lived_cache(None) is None
-        assert mark_tools_for_long_lived_cache([]) == []
-
-    def test_marks_only_last_tool(self):
-        tools = [
-            {"type": "function", "function": {"name": "a"}},
-            {"type": "function", "function": {"name": "b"}},
-            {"type": "function", "function": {"name": "c"}},
-        ]
-        out = mark_tools_for_long_lived_cache(tools)
-        assert "cache_control" not in out[0]
-        assert "cache_control" not in out[1]
-        assert out[2]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
-
-    def test_does_not_mutate_input(self):
-        tools = [{"type": "function", "function": {"name": "a"}}]
-        mark_tools_for_long_lived_cache(tools)
-        assert "cache_control" not in tools[0]
-
-    def test_5m_ttl_drops_ttl_field(self):
-        tools = [{"type": "function", "function": {"name": "a"}}]
-        out = mark_tools_for_long_lived_cache(tools, long_lived_ttl="5m")
-        assert out[0]["cache_control"] == {"type": "ephemeral"}
-
-
-class TestApplyAnthropicCacheControlLongLived:
-    def test_empty_messages(self):
-        assert apply_anthropic_cache_control_long_lived([]) == []
-
-    def test_marks_first_block_of_split_system(self):
-        msgs = [
-            {"role": "system", "content": [
-                {"type": "text", "text": "STABLE"},
-                {"type": "text", "text": "CONTEXT"},
-                {"type": "text", "text": "VOLATILE"},
-            ]},
-            {"role": "user", "content": "msg1"},
-            {"role": "assistant", "content": "msg2"},
-        ]
-        out = apply_anthropic_cache_control_long_lived(msgs)
-        sys_blocks = out[0]["content"]
-        assert sys_blocks[0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
-        assert "cache_control" not in sys_blocks[1]
-        assert "cache_control" not in sys_blocks[2]
-
-    def test_rolling_marker_on_last_2_messages(self):
-        msgs = [
-            {"role": "system", "content": [{"type": "text", "text": "S"}]},
-            {"role": "user", "content": "u1"},
-            {"role": "assistant", "content": "a1"},
-            {"role": "user", "content": "u2"},
-            {"role": "assistant", "content": "a2"},
-        ]
-        out = apply_anthropic_cache_control_long_lived(msgs)
-
-        def has_marker(m):
-            c = m.get("content")
-            if isinstance(c, list) and c and isinstance(c[-1], dict):
-                return "cache_control" in c[-1]
-            return "cache_control" in m
-
-        # u1 and a1 (older messages) should NOT be marked
-        assert not has_marker(out[1])
-        assert not has_marker(out[2])
-        # u2 and a2 (last 2) SHOULD be marked
-        assert has_marker(out[3])
-        assert has_marker(out[4])
-
-    def test_rolling_marker_uses_5m_ttl(self):
-        msgs = [
-            {"role": "system", "content": [{"type": "text", "text": "S"}]},
-            {"role": "user", "content": "u1"},
-            {"role": "assistant", "content": "a1"},
-        ]
-        out = apply_anthropic_cache_control_long_lived(
-            msgs, long_lived_ttl="1h", rolling_ttl="5m",
-        )
-        # Last user message: cache_control on the wrapped text part should be 5m
-        last = out[-1]
-        c = last["content"]
-        assert isinstance(c, list)
-        assert c[-1]["cache_control"] == {"type": "ephemeral"}  # 5m has no ttl key
-
-    def test_string_system_falls_back_to_envelope_marker(self):
-        """When the caller didn't split the system message, we still place a marker."""
-        msgs = [
-            {"role": "system", "content": "Single string system"},
-            {"role": "user", "content": "u1"},
-        ]
-        out = apply_anthropic_cache_control_long_lived(msgs)
-        sys_content = out[0]["content"]
-        # Wrapped into a list and the (now sole) block gets the 1h marker
-        assert isinstance(sys_content, list)
-        assert sys_content[0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
-
-    def test_does_not_mutate_input(self):
-        msgs = [
-            {"role": "system", "content": [{"type": "text", "text": "S"}]},
-            {"role": "user", "content": "u1"},
-        ]
-        before = copy.deepcopy(msgs)
-        apply_anthropic_cache_control_long_lived(msgs)
-        assert msgs == before
-
-    def test_max_4_breakpoints_with_split_system(self):
-        msgs = [
-            {"role": "system", "content": [{"type": "text", "text": "S"}, {"type": "text", "text": "V"}]},
-        ] + [
-            {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg{i}"}
-            for i in range(10)
-        ]
-        out = apply_anthropic_cache_control_long_lived(msgs)
-        count = 0
-        for m in out:
-            c = m.get("content")
-            if isinstance(c, list):
-                for item in c:
-                    if isinstance(item, dict) and "cache_control" in item:
-                        count += 1
-            elif "cache_control" in m:
-                count += 1
-        # 1 system block + last 2 messages = 3 breakpoints from this function.
-        # tools[-1] is marked separately (not via this function), so a 4th
-        # breakpoint can be added at API-call time.
-        assert count == 3
diff --git a/tests/agent/test_prompt_caching_live.py b/tests/agent/test_prompt_caching_live.py
deleted file mode 100644
index f72b6b9d90..0000000000
--- a/tests/agent/test_prompt_caching_live.py
+++ /dev/null
@@ -1,112 +0,0 @@
-"""Live E2E: long-lived prefix caching on Claude via OpenRouter.
-
-Run only when LIVE_OR_KEY env var is set. Skipped under the normal hermetic
-test suite (which unsets credentials).
-"""
-import os, sys, tempfile, time, shutil, pytest
-
-
-# Probe for the key BEFORE conftest unsets it
-_LIVE_KEY = os.environ.get("OPENROUTER_API_KEY") or os.environ.get("LIVE_OR_KEY")
-if not _LIVE_KEY:
-    # Try to read directly from .env
-    env_path = os.path.expanduser("~/.hermes/.env")
-    if os.path.exists(env_path):
-        with open(env_path) as f:
-            for line in f:
-                if line.startswith("OPENROUTER_API_KEY="):
-                    _LIVE_KEY = line.strip().split("=", 1)[1].strip().strip('"').strip("'")
-                    break
-
-
-pytestmark = pytest.mark.skipif(
-    not _LIVE_KEY,
-    reason="set OPENROUTER_API_KEY (or LIVE_OR_KEY) to run live cache test",
-)
-
-
-def test_long_lived_prefix_cache_e2e_openrouter(tmp_path, monkeypatch):
-    """Two AIAgent runs in fresh sessions: call 1 writes cache, call 2 reads it."""
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
-    # The hermetic conftest unsets OPENROUTER_API_KEY — restore for this test
-    monkeypatch.setenv("OPENROUTER_API_KEY", _LIVE_KEY)
-
-    # Minimal config — but with enough toolset/guidance to exceed Anthropic's
-    # ~1024-token minimum-cacheable-prefix threshold. Anthropic silently
-    # ignores cache_control markers on small blocks.
-    import yaml
-    cfg_path = tmp_path / "config.yaml"
-    cfg_path.write_text(yaml.safe_dump({
-        "model": {"provider": "openrouter", "default": "anthropic/claude-haiku-4.5"},
-        "prompt_caching": {"long_lived_prefix": True, "long_lived_ttl": "1h", "cache_ttl": "5m"},
-        "agent": {"tool_use_enforcement": True},   # adds substantial guidance text
-        "memory": {"provider": ""},
-        "compression": {"enabled": False},
-    }))
-
-    from run_agent import AIAgent
-
-    def make_agent():
-        return AIAgent(
-            api_key=_LIVE_KEY,
-            base_url="https://openrouter.ai/api/v1",
-            provider="openrouter",
-            model="anthropic/claude-haiku-4.5",
-            api_mode="chat_completions",
-            # Use the default toolset roster — the tools array (~13k tokens
-            # for ~35 tools) is what carries the bulk of the cross-session
-            # cache value. With a tiny toolset the cached prefix can fall
-            # below Anthropic Haiku's 2048-token minimum cacheable size and
-            # the marker is silently ignored.
-            enabled_toolsets=None,
-            quiet_mode=True,
-            skip_context_files=True,
-            skip_memory=True,
-            save_trajectories=False,
-        )
-
-    a1 = make_agent()
-    assert a1._use_prompt_caching is True, "policy should enable caching for Claude on OR"
-    assert a1._use_long_lived_prefix_cache is True, "long-lived path should activate"
-    parts = a1._build_system_prompt_parts()
-    print(f"\nstable={len(parts['stable']):,} ctx={len(parts['context']):,} volatile={len(parts['volatile']):,} chars")
-    print(f"tool count: {len(a1.tools or [])}")
-
-    # Use distinct user messages each call so OpenRouter's response cache
-    # doesn't short-circuit the upstream Anthropic call (we need real
-    # Anthropic billing visibility to verify cache_creation/cache_read).
-    USER_1 = "Reply with the single word ALPHA."
-    USER_2 = "Reply with the single word BRAVO."
-
-    print("\n--- Call 1 (cold) ---")
-    r1 = a1.run_conversation(USER_1, conversation_history=[])
-    print(f"final_response[:80]: {(r1.get('final_response') or '')[:80]!r}")
-    cr1 = a1.session_cache_read_tokens
-    cw1 = a1.session_cache_write_tokens
-    print(f"call1: cache_read={cr1} cache_write={cw1}")
-
-    # Wait so cache settles, then fresh agent (NEW SESSION) for cross-session read
-    time.sleep(2)
-    a2 = make_agent()
-    assert a2.session_id != a1.session_id, "second agent must have a new session"
-
-    print("\n--- Call 2 (warm, NEW session, different user msg) ---")
-    r2 = a2.run_conversation(USER_2, conversation_history=[])
-    print(f"final_response[:80]: {(r2.get('final_response') or '')[:80]!r}")
-    cr2 = a2.session_cache_read_tokens
-    cw2 = a2.session_cache_write_tokens
-    print(f"call2: cache_read={cr2} cache_write={cw2}")
-
-    print(f"\n=== VERDICT ===")
-    print(f"  call1 wrote {cw1:,} cache tokens, read {cr1:,}")
-    print(f"  call2 wrote {cw2:,} cache tokens, read {cr2:,}")
-    if cw1:
-        print(f"  cross-session read fraction: cr2/cw1 = {cr2/cw1:.2%}")
-
-    # Assertions
-    assert cw1 > 0, f"call 1 must write cache (got {cw1}); long-lived layout not reaching wire"
-    assert cr2 > 0, (
-        f"call 2 must read cache cross-session (got {cr2}); "
-        f"stable prefix is not byte-stable across sessions"
-    )
-    assert cr2 >= 1000, f"cache_read on call 2 ({cr2}) too small to indicate real reuse"
diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py
index 47d402a215..7ed0d4da63 100644
--- a/tests/agent/transports/test_chat_completions.py
+++ b/tests/agent/transports/test_chat_completions.py
@@ -147,11 +147,12 @@ class TestChatCompletionsBuildKwargs:
         ]
 
     def test_nous_tags(self, transport):
+        from agent.portal_tags import nous_portal_tags
         from providers import get_provider_profile
         profile = get_provider_profile("nous")
         msgs = [{"role": "user", "content": "Hi"}]
         kw = transport.build_kwargs(model="gpt-4o", messages=msgs, provider_profile=profile)
-        assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
+        assert kw["extra_body"]["tags"] == nous_portal_tags()
 
     def test_reasoning_default(self, transport):
         msgs = [{"role": "user", "content": "Hi"}]
diff --git a/tests/gateway/test_restart_drain.py b/tests/gateway/test_restart_drain.py
index 55de5a4554..844af42730 100644
--- a/tests/gateway/test_restart_drain.py
+++ b/tests/gateway/test_restart_drain.py
@@ -7,6 +7,7 @@ from unittest.mock import AsyncMock, MagicMock
 import pytest
 
 import gateway.run as gateway_run
+from agent.i18n import t
 from gateway.platforms.base import MessageEvent, MessageType
 from gateway.restart import DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
 from gateway.session import SessionEntry, build_session_key
@@ -32,7 +33,7 @@ async def test_restart_command_while_busy_requests_drain_without_interrupt(monke
 
     result = await runner._handle_message(event)
 
-    assert result == "⏳ Draining 1 active agent(s) before restart..."
+    assert result == t("gateway.draining", count=1)
     running_agent.interrupt.assert_not_called()
     runner.request_restart.assert_called_once_with(detached=True, via_service=False)
 
diff --git a/tests/providers/test_profile_wiring.py b/tests/providers/test_profile_wiring.py
index 9096c82b6a..258ff53180 100644
--- a/tests/providers/test_profile_wiring.py
+++ b/tests/providers/test_profile_wiring.py
@@ -273,12 +273,13 @@ class TestRequestOverridesParity:
 
     def test_extra_body_override_merges_with_provider_body(self, transport):
         """Override extra_body merges WITH provider extra_body, not replaces."""
+        from agent.portal_tags import nous_portal_tags
         kw = transport.build_kwargs(
             model="hermes-3", messages=_msgs(), tools=None,
             provider_profile=get_provider_profile("nous"),
             request_overrides={"extra_body": {"custom": True}},
         )
-        assert kw["extra_body"]["tags"] == ["product=hermes-agent"]  # from profile
+        assert kw["extra_body"]["tags"] == nous_portal_tags()  # from profile
         assert kw["extra_body"]["custom"] is True  # from override
 
     def test_top_level_override(self, transport):
diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py
index 68f7b5f497..c79ed2aea9 100644
--- a/tests/providers/test_provider_profiles.py
+++ b/tests/providers/test_provider_profiles.py
@@ -210,9 +210,10 @@ class TestOpenRouterProfile:
 
 class TestNousProfile:
     def test_tags(self):
+        from agent.portal_tags import nous_portal_tags
         p = get_provider_profile("nous")
         body = p.build_extra_body()
-        assert body["tags"] == ["product=hermes-agent"]
+        assert body["tags"] == nous_portal_tags()
 
     def test_auth_type(self):
         p = get_provider_profile("nous")
diff --git a/tests/providers/test_transport_parity.py b/tests/providers/test_transport_parity.py
index be88bc580a..8c1fb6eb4f 100644
--- a/tests/providers/test_transport_parity.py
+++ b/tests/providers/test_transport_parity.py
@@ -165,13 +165,14 @@ class TestNousParity:
     """Nous: product tags, reasoning, omit when disabled."""
 
     def test_tags(self, transport):
+        from agent.portal_tags import nous_portal_tags
         kw = transport.build_kwargs(
             model="hermes-3-llama-3.1-405b",
             messages=_simple_messages(),
             tools=None,
             provider_profile=get_provider_profile("nous"),
         )
-        assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
+        assert kw["extra_body"]["tags"] == nous_portal_tags()
 
     def test_reasoning_omitted_when_disabled(self, transport):
         """Nous special case: reasoning omitted entirely when disabled."""
diff --git a/tests/run_agent/test_anthropic_prompt_cache_policy.py b/tests/run_agent/test_anthropic_prompt_cache_policy.py
index 15d1cb4e87..ba6e54f037 100644
--- a/tests/run_agent/test_anthropic_prompt_cache_policy.py
+++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py
@@ -330,127 +330,3 @@ class TestExplicitOverrides:
 # Long-lived prefix cache policy (cross-session 1h tier)
 # ─────────────────────────────────────────────────────────────────────
 
-class TestSupportsLongLivedAnthropicCache:
-    """Narrower than _anthropic_prompt_cache_policy — only Claude on the 4
-    explicitly-validated endpoints get the long-lived layout."""
-
-    def test_native_anthropic_claude_supported(self):
-        agent = _make_agent(
-            provider="anthropic",
-            base_url="https://api.anthropic.com",
-            api_mode="anthropic_messages",
-            model="claude-sonnet-4.6",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is True
-
-    def test_anthropic_oauth_supported(self):
-        # OAuth uses the same transport as native Anthropic
-        agent = _make_agent(
-            provider="anthropic",
-            base_url="https://api.anthropic.com",
-            api_mode="anthropic_messages",
-            model="claude-opus-4.6",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is True
-
-    def test_openrouter_claude_supported(self):
-        agent = _make_agent(
-            provider="openrouter",
-            base_url="https://openrouter.ai/api/v1",
-            api_mode="chat_completions",
-            model="anthropic/claude-sonnet-4.6",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is True
-
-    def test_nous_portal_claude_supported(self):
-        # Nous Portal proxies to OpenRouter — same wire format
-        agent = _make_agent(
-            provider="nous",
-            base_url="https://inference-api.nousresearch.com/v1",
-            api_mode="chat_completions",
-            model="anthropic/claude-opus-4.7",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is True
-
-    def test_nous_portal_qwen_supported(self):
-        # Portal Qwen rides the same OpenRouter-equivalent transport as
-        # Portal Claude; long-lived (1h cross-session) cache_control
-        # markers apply identically.
-        agent = _make_agent(
-            provider="nous",
-            base_url="https://inference-api.nousresearch.com/v1",
-            api_mode="chat_completions",
-            model="qwen3.6-plus",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is True
-
-    def test_nous_portal_qwen_vendored_slug_supported(self):
-        agent = _make_agent(
-            provider="nous",
-            base_url="https://inference-api.nousresearch.com/v1",
-            api_mode="chat_completions",
-            model="qwen/qwen3.6-plus",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is True
-
-    def test_nous_portal_non_claude_non_qwen_rejected(self):
-        # Portal long-lived cache scope mirrors policy: Claude or Qwen only.
-        agent = _make_agent(
-            provider="nous",
-            base_url="https://inference-api.nousresearch.com/v1",
-            api_mode="chat_completions",
-            model="openai/gpt-5.4",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is False
-
-    def test_openrouter_non_claude_rejected(self):
-        agent = _make_agent(
-            provider="openrouter",
-            base_url="https://openrouter.ai/api/v1",
-            api_mode="chat_completions",
-            model="openai/gpt-5.4",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is False
-
-    def test_third_party_anthropic_gateway_rejected(self):
-        # MiniMax / Kimi / etc. — anthropic-wire but not in our validated list
-        agent = _make_agent(
-            provider="minimax",
-            base_url="https://api.minimax.io/anthropic",
-            api_mode="anthropic_messages",
-            model="minimax-m2.7",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is False
-
-    def test_alibaba_dashscope_rejected(self):
-        agent = _make_agent(
-            provider="alibaba",
-            base_url="https://dashscope.aliyuncs.com/api/v1/anthropic",
-            api_mode="anthropic_messages",
-            model="qwen3.5-plus",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is False
-
-    def test_opencode_qwen_rejected(self):
-        agent = _make_agent(
-            provider="opencode-go",
-            base_url="https://api.opencode-go.example/v1",
-            api_mode="chat_completions",
-            model="qwen3.6-plus",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is False
-
-    def test_fallback_target_evaluated_independently(self):
-        # Starting on a non-supported provider, falling back to OpenRouter Claude
-        agent = _make_agent(
-            provider="minimax",
-            base_url="https://api.minimax.io/anthropic",
-            api_mode="anthropic_messages",
-            model="minimax-m2.7",
-        )
-        assert agent._supports_long_lived_anthropic_cache(
-            provider="openrouter",
-            base_url="https://openrouter.ai/api/v1",
-            api_mode="chat_completions",
-            model="anthropic/claude-sonnet-4.6",
-        ) is True
diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py
index f97885a038..d3a5a1b37f 100644
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@@ -343,11 +343,12 @@ class TestBuildApiKwargsAIGateway:
 
 class TestBuildApiKwargsNousPortal:
     def test_includes_nous_product_tags(self, monkeypatch):
+        from agent.portal_tags import nous_portal_tags
         agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
         messages = [{"role": "user", "content": "hi"}]
         kwargs = agent._build_api_kwargs(messages)
         extra = kwargs.get("extra_body", {})
-        assert extra.get("tags") == ["product=hermes-agent"]
+        assert extra.get("tags") == nous_portal_tags()
 
     def test_uses_chat_completions_format(self, monkeypatch):
         agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
diff --git a/tests/test_ctx_halving_fix.py b/tests/test_ctx_halving_fix.py
index afeee84878..0dd3ca4e7e 100644
--- a/tests/test_ctx_halving_fix.py
+++ b/tests/test_ctx_halving_fix.py
@@ -169,7 +169,6 @@ class TestEphemeralMaxOutputTokens:
         agent.reasoning_config = None
         agent._is_anthropic_oauth = False
         agent._ephemeral_max_output_tokens = None
-        agent._use_long_lived_prefix_cache = False
 
         compressor = MagicMock()
         compressor.context_length = 200_000
diff --git a/tools/approval.py b/tools/approval.py
index d6db5a05a0..dbb3810886 100644
--- a/tools/approval.py
+++ b/tools/approval.py
@@ -314,7 +314,9 @@ DANGEROUS_PATTERNS = [
     (r'\bdd\s+.*if=', "disk copy"),
     (r'>\s*/dev/sd', "write to block device"),
     (r'\bDROP\s+(TABLE|DATABASE)\b', "SQL DROP"),
-    (r'\bDELETE\s+FROM\b(?!.*\bWHERE\b)', "SQL DELETE without WHERE"),
+    # Use [^\n]* instead of .* so DOTALL mode does not cause a WHERE clause on the
+    # *next* line to satisfy the negative lookahead, silently allowing DELETE without WHERE.
+    (r'\bDELETE\s+FROM\b(?![^\n]*\bWHERE\b)', "SQL DELETE without WHERE"),
     (r'\bTRUNCATE\s+(TABLE)?\s*\w', "SQL TRUNCATE"),
     (r'>\s*/etc/', "overwrite system config"),
     (r'\bsystemctl\s+(-[^\s]+\s+)*(stop|restart|disable|mask)\b', "stop/restart system service"),
diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index 664c8736a1..d5b2c0c782 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -461,7 +461,8 @@ async def _send_via_adapter(
             adapter = None
         if adapter is not None:
             try:
-                result = await adapter.send(chat_id=chat_id, content=chunk)
+                metadata = {"thread_id": thread_id} if thread_id else None
+                result = await adapter.send(chat_id=chat_id, content=chunk, metadata=metadata)
             except asyncio.CancelledError:
                 raise
             except Exception as e:
diff --git a/tools/voice_mode.py b/tools/voice_mode.py
index 238fed4b28..cc691afad7 100644
--- a/tools/voice_mode.py
+++ b/tools/voice_mode.py
@@ -130,7 +130,9 @@ def detect_audio_environment() -> dict:
         try:
             devices = sd.query_devices()
             if not devices:
-                if termux_capture:
+                if os.environ.get('PULSE_SERVER'):
+                    notices.append("No PortAudio devices detected but PULSE_SERVER is set -- continuing")
+                elif termux_capture:
                     notices.append("No PortAudio devices detected, but Termux:API microphone capture is available")
                 else:
                     warnings.append("No audio input/output devices detected")
diff --git a/tools/web_tools.py b/tools/web_tools.py
index b9df0cd3be..79ddc8d27f 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -593,7 +593,8 @@ def _resolve_web_extract_auxiliary(model: Optional[str] = None) -> tuple[Optiona
     extra_body: Dict[str, Any] = {}
     if client is not None and _is_nous_auxiliary_client(client):
         from agent.auxiliary_client import get_auxiliary_extra_body
-        extra_body = get_auxiliary_extra_body() or {"tags": ["product=hermes-agent"]}
+        from agent.portal_tags import nous_portal_tags
+        extra_body = get_auxiliary_extra_body() or {"tags": nous_portal_tags()}
 
     return client, effective_model, extra_body
 
diff --git a/website/docs/user-guide/features/lsp.md b/website/docs/user-guide/features/lsp.md
index ef0f403d20..bb54003b11 100644
--- a/website/docs/user-guide/features/lsp.md
+++ b/website/docs/user-guide/features/lsp.md
@@ -92,6 +92,13 @@ manager makes sense for that language (rustup, ghcup, opam, brew,
 …). Hermes auto-detects the binary on PATH or in
 `<HERMES_HOME>/lsp/bin/`.
 
+A few servers are installed alongside a peer dependency that npm
+won't auto-pull. The current case is `typescript-language-server`,
+which requires the `typescript` SDK importable from the same
+`node_modules` tree — Hermes installs both packages together when you
+run `hermes lsp install typescript` or auto-install fires on first
+use.
+
 ## CLI
 
 ```
@@ -207,6 +214,24 @@ The binary isn't on PATH and isn't in `<HERMES_HOME>/lsp/bin/`. Run
 `hermes lsp install <server_id>` to attempt an auto-install, or
 install the binary manually through the language's normal toolchain.
 
+**`Backend warnings` section in `hermes lsp status`**
+
+Some servers ship as thin wrappers around an external CLI for actual
+diagnostics — they spawn cleanly and accept requests but never emit
+errors when the sidecar binary is missing. The most common case is
+`bash-language-server`, which delegates diagnostics to `shellcheck`.
+When `hermes lsp status` shows a `Backend warnings` section, install
+the named tool through your OS package manager:
+
+```
+apt install shellcheck      # Debian / Ubuntu
+brew install shellcheck     # macOS
+scoop install shellcheck    # Windows
+```
+
+The same warning is logged once at server spawn time in
+`~/.hermes/logs/agent.log`.
+
 **Server starts but never returns diagnostics**
 
 Check `~/.hermes/logs/agent.log` for `[agent.lsp.client]` entries —