From 2a18b6283b528817e87354b1c524501b570a7d62 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 18:34:43 -0700
Subject: [PATCH 01/22] =?UTF-8?q?fix(cache):=20drop=20ttl=3D1h=20on=20Port?=
 =?UTF-8?q?al=20Qwen=20=E2=80=94=20Alibaba=20upstream=20is=205m-only=20(#2?=
 =?UTF-8?q?4702)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR #24151 routed Portal Qwen (qwen3.6-plus) through the prefix_and_2
long-lived cache layout, attaching {"type":"ephemeral","ttl":"1h"}
markers to the tools[-1] entry and the stable system-prefix block.
That layout works for Portal Claude because Anthropic / OpenRouter on
Anthropic routes honour 1h TTL — but Portal Qwen ultimately proxies to
Alibaba DashScope, which documents a single "ephemeral" TTL of 5
minutes on its Context Cache. The ttl="1h" qualifier is silently
dropped upstream, so the two highest-value breakpoints (tools array +
system prefix) never land. Only the rolling-window 5m markers on the
last 2 messages cache, which matches the observed ~25% read rate.

Fix: keep Portal Qwen on cache_control via _anthropic_prompt_cache_policy
returning (True, False), but drop it from _supports_long_lived_anthropic_cache
so it rides the standard system_and_3 5m layout (system + last 3 messages,
all at 5m). Same 4 breakpoints, all in a TTL the upstream actually honours.

Refs: https://www.alibabacloud.com/help/en/model-studio/context-cache
      https://openrouter.ai/docs/features/prompt-caching (Alibaba Qwen
      section: "TTL: 5 minutes")

- _supports_long_lived_anthropic_cache: Portal scope narrowed back to Claude
- tests: flip the two qwen long-lived expectations to False, retitle
  non_claude_non_qwen_rejected -> non_claude_rejected
---
 run_agent.py                                  | 19 +++++++++-----
 .../test_anthropic_prompt_cache_policy.py     | 25 ++++++++++++-------
 2 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index a8b071c872..6b5c199a41 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -3619,12 +3619,19 @@ class AIAgent:
         is_claude = "claude" in model_lower
         is_nous_portal = "nousresearch" in eff_base_url.lower()
 
-        # Nous Portal: Claude AND Qwen both get long-lived caching.
-        # Portal proxies to OpenRouter with identical cache_control
-        # semantics; any model on Portal that accepts envelope-layout
-        # markers via _anthropic_prompt_cache_policy also benefits from
-        # the documented 1h cross-session TTL.
-        if is_nous_portal and (is_claude or "qwen" in model_lower):
+        # Nous Portal Claude rides the 1h prefix_and_2 layout (Portal
+        # proxies to OpenRouter, which honours ttl=1h on Anthropic
+        # routes).  Qwen does NOT — Alibaba DashScope (the upstream for
+        # all Qwen routes, including Portal -> OpenRouter -> Alibaba)
+        # documents a single ``ephemeral`` TTL of 5 minutes; ttl="1h"
+        # on Qwen markers is silently ignored upstream, so the
+        # high-value tools[-1] + system-prefix breakpoints never land
+        # and only the 5m rolling-window markers on the last 2 messages
+        # get cached.  Portal Qwen still gets cache_control via
+        # _anthropic_prompt_cache_policy returning (True, False) — it
+        # just rides the standard system_and_3 5m layout instead of the
+        # mismatched prefix_and_2 1h layout.
+        if is_nous_portal and is_claude:
             return True
 
         if not is_claude:
diff --git a/tests/run_agent/test_anthropic_prompt_cache_policy.py b/tests/run_agent/test_anthropic_prompt_cache_policy.py
index 15d1cb4e87..3d7358e670 100644
--- a/tests/run_agent/test_anthropic_prompt_cache_policy.py
+++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py
@@ -372,29 +372,36 @@ class TestSupportsLongLivedAnthropicCache:
         )
         assert agent._supports_long_lived_anthropic_cache() is True
 
-    def test_nous_portal_qwen_supported(self):
-        # Portal Qwen rides the same OpenRouter-equivalent transport as
-        # Portal Claude; long-lived (1h cross-session) cache_control
-        # markers apply identically.
+    def test_nous_portal_qwen_NOT_long_lived(self):
+        # Portal Qwen still gets cache_control markers via the standard
+        # system_and_3 5m layout (see _anthropic_prompt_cache_policy
+        # tests above), but it must NOT ride the prefix_and_2 1h layout.
+        # Alibaba DashScope (the upstream for every Qwen route, incl.
+        # Portal -> OpenRouter -> Alibaba) only supports a single
+        # ``ephemeral`` TTL of 5 minutes; ttl="1h" markers are silently
+        # ignored, so the high-value tools[-1] + system-prefix
+        # breakpoints don't land. Stay on system_and_3 instead.
         agent = _make_agent(
             provider="nous",
             base_url="https://inference-api.nousresearch.com/v1",
             api_mode="chat_completions",
             model="qwen3.6-plus",
         )
-        assert agent._supports_long_lived_anthropic_cache() is True
+        assert agent._supports_long_lived_anthropic_cache() is False
 
-    def test_nous_portal_qwen_vendored_slug_supported(self):
+    def test_nous_portal_qwen_vendored_slug_NOT_long_lived(self):
         agent = _make_agent(
             provider="nous",
             base_url="https://inference-api.nousresearch.com/v1",
             api_mode="chat_completions",
             model="qwen/qwen3.6-plus",
         )
-        assert agent._supports_long_lived_anthropic_cache() is True
+        assert agent._supports_long_lived_anthropic_cache() is False
 
-    def test_nous_portal_non_claude_non_qwen_rejected(self):
-        # Portal long-lived cache scope mirrors policy: Claude or Qwen only.
+    def test_nous_portal_non_claude_rejected(self):
+        # Portal long-lived cache scope is now Claude-only. Qwen
+        # rejection is covered by the dedicated tests above; this
+        # covers everything else (gpt, etc.).
         agent = _make_agent(
             provider="nous",
             base_url="https://inference-api.nousresearch.com/v1",

From 4c825554c185ddb8961e68a7b146c75636c7acfe Mon Sep 17 00:00:00 2001
From: ryptotalent <112634774+ryptotalent@users.noreply.github.com>
Date: Tue, 12 May 2026 18:42:13 -0700
Subject: [PATCH 02/22] fix(retry): use float() for Retry-After header to
 handle sub-second values

---
 run_agent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run_agent.py b/run_agent.py
index 6b5c199a41..eb9cc94f3a 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -14449,7 +14449,7 @@ class AIAgent:
                             _ra_raw = _resp_headers.get("retry-after") or _resp_headers.get("Retry-After")
                             if _ra_raw:
                                 try:
-                                    _retry_after = min(int(_ra_raw), 120)  # Cap at 2 minutes
+                                    _retry_after = min(float(_ra_raw), 120)  # Cap at 2 minutes
                                 except (TypeError, ValueError):
                                     pass
                     wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0)

From e71393237efd41af688569c3100baf3a89226b47 Mon Sep 17 00:00:00 2001
From: ms-alan <1472110+ms-alan@users.noreply.github.com>
Date: Tue, 12 May 2026 18:43:20 -0700
Subject: [PATCH 03/22] fix(signal): handle group messages from linked devices
 in syncMessage path

Closes #23064

When Hermes connects to Signal via signal-cli in daemon mode (linked
device setup), group messages sent from the user's phone were silently
dropped. The syncMessage handler only processed events where
destinationNumber equals the bot's own number (Note to Self).

Group messages from linked devices carry a groupInfo.groupId instead of a
destinationNumber. Extend the condition to also pass through sync messages
that have a groupId, so group messages are promoted to dataMessage and
reach the agent.
---
 gateway/platforms/signal.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py
index 118eb688cc..bd731a7ab5 100644
--- a/gateway/platforms/signal.py
+++ b/gateway/platforms/signal.py
@@ -446,7 +446,9 @@ class SignalAdapter(BasePlatformAdapter):
                 if sent_msg and isinstance(sent_msg, dict):
                     dest = sent_msg.get("destinationNumber") or sent_msg.get("destination")
                     sent_ts = sent_msg.get("timestamp")
-                    if dest == self._account_normalized:
+                    sent_msg_group_info = sent_msg.get("groupInfo") or {}
+                    sent_msg_group_id = sent_msg_group_info.get("groupId") if sent_msg_group_info else None
+                    if dest == self._account_normalized or sent_msg_group_id:
                         # Check if this is an echo of our own outbound reply
                         if sent_ts and sent_ts in self._recent_sent_timestamps:
                             self._recent_sent_timestamps.discard(sent_ts)

From 081f9368bcf341dced07bc515ce26a3b25f2eaa2 Mon Sep 17 00:00:00 2001
From: RhombusMaximus <31224721+RhombusMaximus@users.noreply.github.com>
Date: Tue, 12 May 2026 18:43:44 -0700
Subject: [PATCH 04/22] fix(voice_mode): detect audio in WSL when
 sd.query_devices() returns empty list but PULSE_SERVER is set

In WSL2, sounddevice.query_devices() returns [] even when the
PulseAudio bridge is functional. The existing code already handled
the case where the query itself raises an exception, but it missed
the empty-list case.

This change treats an empty device list as non-fatal in WSL when
PULSE_SERVER is configured, matching the existing exception-handler
behavior.

Fixes: WSL users seeing 'No audio input/output devices detected'
even though paplay/arecord work fine.
---
 tools/voice_mode.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/voice_mode.py b/tools/voice_mode.py
index 238fed4b28..cc691afad7 100644
--- a/tools/voice_mode.py
+++ b/tools/voice_mode.py
@@ -130,7 +130,9 @@ def detect_audio_environment() -> dict:
         try:
             devices = sd.query_devices()
             if not devices:
-                if termux_capture:
+                if os.environ.get('PULSE_SERVER'):
+                    notices.append("No PortAudio devices detected but PULSE_SERVER is set -- continuing")
+                elif termux_capture:
                     notices.append("No PortAudio devices detected, but Termux:API microphone capture is available")
                 else:
                     warnings.append("No audio input/output devices detected")

From 557deece6f0f6081c7fb8bcf30e8abf952165170 Mon Sep 17 00:00:00 2001
From: Dangooy <151443764+Dangooy@users.noreply.github.com>
Date: Tue, 12 May 2026 18:44:12 -0700
Subject: [PATCH 05/22] fix(tui): use TERMINAL_CWD in _session_info for
 accurate status line path

_session_info() used os.getcwd() which reflects the gateway process
working directory, not the user's actual working directory. This caused
the TUI status line to display incorrect paths (e.g. D:\HermesWork
instead of D:\Hermes\HermesWork) after agent turns that changed the
process cwd.

Align with session.create which already correctly reads TERMINAL_CWD
env var set by the CLI launcher.
---
 tui_gateway/server.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index d105250701..41cbdd05e3 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1378,7 +1378,7 @@ def _session_info(agent) -> dict:
         "fast": service_tier == "priority",
         "tools": {},
         "skills": {},
-        "cwd": os.getcwd(),
+        "cwd": os.getenv("TERMINAL_CWD", os.getcwd()),
         "version": "",
         "release_date": "",
         "update_behind": None,

From 80c4b27437122a605ffc187123a4375b300280f6 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 18:44:33 -0700
Subject: [PATCH 06/22] docs(lsp): document follow-up fixes from #24630
 (#24709)

- Note that typescript-language-server pulls in the typescript SDK
  automatically (peer-dep relationship was previously implicit and
  caused initialize failures when the SDK was absent).
- Add a Troubleshooting entry for the new Backend warnings section
  in hermes lsp status, with the shellcheck install commands across
  apt / brew / scoop.

Reflects what shipped in PR #24630.
---
 website/docs/user-guide/features/lsp.md | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/website/docs/user-guide/features/lsp.md b/website/docs/user-guide/features/lsp.md
index ef0f403d20..bb54003b11 100644
--- a/website/docs/user-guide/features/lsp.md
+++ b/website/docs/user-guide/features/lsp.md
@@ -92,6 +92,13 @@ manager makes sense for that language (rustup, ghcup, opam, brew,
 …). Hermes auto-detects the binary on PATH or in
 `<HERMES_HOME>/lsp/bin/`.
 
+A few servers are installed alongside a peer dependency that npm
+won't auto-pull. The current case is `typescript-language-server`,
+which requires the `typescript` SDK importable from the same
+`node_modules` tree — Hermes installs both packages together when you
+run `hermes lsp install typescript` or auto-install fires on first
+use.
+
 ## CLI
 
 ```
@@ -207,6 +214,24 @@ The binary isn't on PATH and isn't in `<HERMES_HOME>/lsp/bin/`. Run
 `hermes lsp install <server_id>` to attempt an auto-install, or
 install the binary manually through the language's normal toolchain.
 
+**`Backend warnings` section in `hermes lsp status`**
+
+Some servers ship as thin wrappers around an external CLI for actual
+diagnostics — they spawn cleanly and accept requests but never emit
+errors when the sidecar binary is missing. The most common case is
+`bash-language-server`, which delegates diagnostics to `shellcheck`.
+When `hermes lsp status` shows a `Backend warnings` section, install
+the named tool through your OS package manager:
+
+```
+apt install shellcheck      # Debian / Ubuntu
+brew install shellcheck     # macOS
+scoop install shellcheck    # Windows
+```
+
+The same warning is logged once at server spawn time in
+`~/.hermes/logs/agent.log`.
+
 **Server starts but never returns diagnostics**
 
 Check `~/.hermes/logs/agent.log` for `[agent.lsp.client]` entries —

From dd1d4e9c5d8284ae5bc1250260493bada1e8d685 Mon Sep 17 00:00:00 2001
From: diablozzc <24596+diablozzc@users.noreply.github.com>
Date: Tue, 12 May 2026 18:44:51 -0700
Subject: [PATCH 07/22] fix(gateway): add chat_id to hook_ctx for message
 source tracking

---
 gateway/run.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gateway/run.py b/gateway/run.py
index bda0cbf983..46c508e4bd 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -7543,6 +7543,7 @@ class GatewayRunner:
             hook_ctx = {
                 "platform": source.platform.value if source.platform else "",
                 "user_id": source.user_id,
+                "chat_id": source.chat_id or "",
                 "session_id": session_entry.session_id,
                 "message": message_text[:500],
             }

From 327b8cee9eaeb17724c7b5daa686e736f7d3b5e4 Mon Sep 17 00:00:00 2001
From: Jwd-gity <280797544+Jwd-gity@users.noreply.github.com>
Date: Tue, 12 May 2026 18:45:20 -0700
Subject: [PATCH 08/22] fix(install): use stash@{0} instead of git rev-parse
 refs/stash for autostash recovery Autostash creates refs/stash as a pointer
 to the latest stash commit, but git stash apply/drop expect the symbolic ref
 format like stash@{0}, not the raw commit SHA. Using the commit SHA causes:
 error: 'X is not a stash reference'

---
 scripts/install.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/install.sh b/scripts/install.sh
index aaa810f3c8..72cc81637d 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -890,7 +890,7 @@ clone_repo() {
                 stash_name="hermes-install-autostash-$(date -u +%Y%m%d-%H%M%S)"
                 log_info "Local changes detected, stashing before update..."
                 git stash push --include-untracked -m "$stash_name"
-                autostash_ref="$(git rev-parse --verify refs/stash)"
+                autostash_ref="stash@{0}"
             fi
 
             git fetch origin

From e474130c487c5e4c3d58f309ec2fdb19474cc4dc Mon Sep 17 00:00:00 2001
From: AhmetArif0 <147827411+AhmetArif0@users.noreply.github.com>
Date: Tue, 12 May 2026 18:45:57 -0700
Subject: [PATCH 09/22] fix(telegram): use thread fallback helper in
 slash-confirm result send

PR #23458 introduced _send_message_with_thread_fallback() and applied it
to all control-style sends (send_update_prompt, send_approval_request,
send_model_picker_prompt), but the slash-confirm result message in
handle_callback_query still called self._bot.send_message directly.

In supergroups with stale message_thread_id on the callback's parent
message, this raises "Message thread not found" and silently swallows
the result text. Replace with the helper so the same retry-without-
thread-id logic applies.
---
 gateway/platforms/telegram.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 415ddb5608..db25b87497 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -2772,7 +2772,7 @@ class TelegramAdapter(BasePlatformAdapter):
                                     {"thread_id": str(thread_id)},
                                 )
                             )
-                        await self._bot.send_message(**send_kwargs)
+                        await self._send_message_with_thread_fallback(**send_kwargs)
                 except Exception as exc:
                     logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True)
             return

From afa5b81918617126a489f21874cb39b1af7a7e93 Mon Sep 17 00:00:00 2001
From: ALIYILD <62653182+ALIYILD@users.noreply.github.com>
Date: Tue, 12 May 2026 18:46:22 -0700
Subject: [PATCH 10/22] fix(prompt_builder): inject tool-use enforcement for
 GLM models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GLM-family models (z-ai/glm-4.5-air, z-ai/glm-4.5-flash, etc.) exhibit
the same "describe-instead-of-call" failure mode that gpt/codex/gemini/
gemma/grok already trigger enforcement for. Without the injection,
free-tier GLM workers spawned by the kanban dispatcher routinely exit
cleanly (rc=0) without invoking kanban_complete or kanban_block,
producing the "protocol violation" error and triggering the dispatcher's
gave_up path.

Observed in real workloads: seven consecutive kanban tasks across three
GLM-tier profiles (shipbackend, frontend-engineer, backend-engineer) all
failed with the identical message:

    worker exited cleanly (rc=0) without calling kanban_complete or
    kanban_block — protocol violation

Re-running the same tasks on Claude Haiku immediately resolved them.
Adding "glm" to TOOL_USE_ENFORCEMENT_MODELS closes the gap so future
GLM-routed work receives the explicit "every response must contain a
tool call or final result" steering that already protects the other
enforcement-gated model families.

One-line change; no behavior change for non-GLM models.
---
 agent/prompt_builder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index 025ea8ab65..6bd3638783 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -268,7 +268,7 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
 
 # Model name substrings that trigger tool-use enforcement guidance.
 # Add new patterns here when a model family needs explicit steering.
-TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok")
+TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm")
 
 # OpenAI GPT/Codex-specific execution guidance.  Addresses known failure modes
 # where GPT models abandon work on partial results, skip prerequisite lookups,

From 7c67097325f5fe4b4b703fed16ebafca7ca686dd Mon Sep 17 00:00:00 2001
From: mizgyo <10610247+mizgyo@users.noreply.github.com>
Date: Tue, 12 May 2026 18:46:49 -0700
Subject: [PATCH 11/22] fix(line): use build_source instead of nonexistent
 create_source
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The LINE adapter calls self.create_source(...) which raises
AttributeError on every inbound message — no such method exists.
The base PlatformAdapter exposes this factory as build_source(),
consistent with the IRC and Teams adapters.

Fixes #23728
---
 plugins/platforms/line/adapter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plugins/platforms/line/adapter.py b/plugins/platforms/line/adapter.py
index 67582ffae8..db5d3564d3 100644
--- a/plugins/platforms/line/adapter.py
+++ b/plugins/platforms/line/adapter.py
@@ -959,7 +959,7 @@ class LineAdapter(BasePlatformAdapter):
         if chat_type == "dm" and self._client:
             asyncio.create_task(self._client.loading(chat_id))
 
-        source_obj = self.create_source(
+        source_obj = self.build_source(
             chat_id=chat_id,
             chat_type=chat_type,
             user_id=user_id,

From e77fd75c442cc3ec6cfbc91964a6dfe2dc3f777d Mon Sep 17 00:00:00 2001
From: 02356abc <198679067+02356abc@users.noreply.github.com>
Date: Tue, 12 May 2026 18:48:10 -0700
Subject: [PATCH 12/22] fix(wecom): update connection status after WebSocket
 reconnection

The WeCom adapter's _listen_loop() automatically reconnects when the
WebSocket drops, but it never called _mark_connected() after a successful
reconnection. This left the runtime status file (gateway_state.json) stuck
in "disconnected" even though the adapter was fully operational again.

Add self._mark_connected() right after _open_connection() succeeds so
that the dashboard and health probes report the correct state.

Tested by forcing a WebSocket close via the heartbeat loop and verifying
that the status file updated from "disconnected" back to "connected".
---
 gateway/platforms/wecom.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gateway/platforms/wecom.py b/gateway/platforms/wecom.py
index d7a5c1d9a4..96769ea59b 100644
--- a/gateway/platforms/wecom.py
+++ b/gateway/platforms/wecom.py
@@ -345,6 +345,7 @@ class WeComAdapter(BasePlatformAdapter):
                 try:
                     await self._open_connection()
                     backoff_idx = 0
+                    self._mark_connected()
                     logger.info("[%s] Reconnected", self.name)
                 except Exception as reconnect_exc:
                     logger.warning("[%s] Reconnect failed: %s", self.name, reconnect_exc)

From 420762f867460bc603d7aab0f6e9684f63fad5a2 Mon Sep 17 00:00:00 2001
From: AhmetArif0 <147827411+AhmetArif0@users.noreply.github.com>
Date: Tue, 12 May 2026 18:48:38 -0700
Subject: [PATCH 13/22] fix(tools): forward thread_id via metadata in
 _send_via_adapter live path

The live adapter path in _send_via_adapter called adapter.send() without
passing thread_id, while the standalone fallback path correctly forwarded
it. For plugin platforms (google_chat, teams, irc, line) running with the
gateway in-process, this caused every threaded reply to land as a new
top-level message instead of continuing the thread.

Matches the pattern already used by _send_matrix_via_adapter and
_send_feishu: build metadata={"thread_id": thread_id} and pass it through.
---
 tools/send_message_tool.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index 664c8736a1..d5b2c0c782 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -461,7 +461,8 @@ async def _send_via_adapter(
             adapter = None
         if adapter is not None:
             try:
-                result = await adapter.send(chat_id=chat_id, content=chunk)
+                metadata = {"thread_id": thread_id} if thread_id else None
+                result = await adapter.send(chat_id=chat_id, content=chunk, metadata=metadata)
             except asyncio.CancelledError:
                 raise
             except Exception as e:

From 1a4e8f70415e073db257c4a31908c21e9921dd5b Mon Sep 17 00:00:00 2001
From: liuhao1024 <11816344+liuhao1024@users.noreply.github.com>
Date: Tue, 12 May 2026 18:49:00 -0700
Subject: [PATCH 14/22] fix(gateway): make WhatsApp npm install timeout
 configurable

Default timeout raised from 60s to 300s (5 minutes) to accommodate
slower systems like Unraid NAS. Configurable via WHATSAPP_NPM_INSTALL_TIMEOUT
environment variable.
---
 gateway/platforms/whatsapp.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py
index 2fb6fc1332..29b78d75d0 100644
--- a/gateway/platforms/whatsapp.py
+++ b/gateway/platforms/whatsapp.py
@@ -494,12 +494,15 @@ class WhatsAppAdapter(BasePlatformAdapter):
                 # plain executable path.
                 _npm_bin = shutil.which("npm") or "npm"
                 try:
+                    # Read timeout from environment variable, default to 300 seconds (5 minutes)
+                    # to accommodate slower systems like Unraid NAS
+                    npm_install_timeout = int(os.environ.get("WHATSAPP_NPM_INSTALL_TIMEOUT", "300"))
                     install_result = subprocess.run(
                         [_npm_bin, "install", "--silent"],
                         cwd=str(bridge_dir),
                         capture_output=True,
                         text=True,
-                        timeout=60,
+                        timeout=npm_install_timeout,
                     )
                     if install_result.returncode != 0:
                         print(f"[{self.name}] npm install failed: {install_result.stderr}")

From a4289d74ac99694350497fb01b15aba63ce9ffde Mon Sep 17 00:00:00 2001
From: alblez <914199+alblez@users.noreply.github.com>
Date: Tue, 12 May 2026 18:49:27 -0700
Subject: [PATCH 15/22] fix(test): use i18n t() for restart drain assertion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The test_restart_command_while_busy_requests_drain_without_interrupt test
was asserting against a hardcoded emoji string that was valid before the
i18n migration. After gateway/run.py switched to t("gateway.draining",
count=N), the test sees the translated output (or the raw key when the
locale catalog isn't resolved in xdist workers).

Fix by asserting against t("gateway.draining", count=1) — this produces
the correct expected value regardless of whether the locale file is
available in the test environment.
---
 tests/gateway/test_restart_drain.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/gateway/test_restart_drain.py b/tests/gateway/test_restart_drain.py
index 55de5a4554..844af42730 100644
--- a/tests/gateway/test_restart_drain.py
+++ b/tests/gateway/test_restart_drain.py
@@ -7,6 +7,7 @@ from unittest.mock import AsyncMock, MagicMock
 import pytest
 
 import gateway.run as gateway_run
+from agent.i18n import t
 from gateway.platforms.base import MessageEvent, MessageType
 from gateway.restart import DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
 from gateway.session import SessionEntry, build_session_key
@@ -32,7 +33,7 @@ async def test_restart_command_while_busy_requests_drain_without_interrupt(monke
 
     result = await runner._handle_message(event)
 
-    assert result == "⏳ Draining 1 active agent(s) before restart..."
+    assert result == t("gateway.draining", count=1)
     running_agent.interrupt.assert_not_called()
     runner.request_restart.assert_called_once_with(detached=True, via_service=False)
 

From 8ac351407ef8c00b3ab8f0be3a944ba921052a39 Mon Sep 17 00:00:00 2001
From: AgentArcLab <19233945+AgentArcLab@users.noreply.github.com>
Date: Tue, 12 May 2026 18:49:58 -0700
Subject: [PATCH 16/22] fix(agent): clear stale config context_length on model
 switch

When switching models via /model, AIAgent._config_context_length was
never cleared, so the new model inherited the previous model's context
window instead of auto-detecting the correct one via
get_model_context_length().

Clear _config_context_length to None before the runtime field swap so
the full resolution chain (custom_providers per-model, endpoint probe,
models.dev, etc.) is re-evaluated for the newly selected model.

Closes #21509
---
 run_agent.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/run_agent.py b/run_agent.py
index eb9cc94f3a..7c6c62cc9a 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2647,6 +2647,11 @@ class AIAgent:
         old_model = self.model
         old_provider = self.provider
 
+        # Clear the per-config context_length override so the new model's
+        # actual context window is resolved via get_model_context_length()
+        # instead of inheriting the stale value from the previous model.
+        self._config_context_length = None
+
         # ── Swap core runtime fields ──
         self.model = new_model
         self.provider = new_provider
@@ -8824,6 +8829,11 @@ class AIAgent:
                 fb_api_mode = "bedrock_converse"
 
             old_model = self.model
+
+            # Clear the per-config context_length override so the fallback
+            # model's actual context window is resolved instead of inheriting
+            # the stale value from the previous model.  See #22387.
+            self._config_context_length = None
             self.model = fb_model
             self.provider = fb_provider
             self.base_url = fb_base_url

From 80374d4dd97368d00f55c551bdbfc0fab0f011a8 Mon Sep 17 00:00:00 2001
From: amathxbt <116212274+amathxbt@users.noreply.github.com>
Date: Tue, 12 May 2026 18:50:31 -0700
Subject: [PATCH 17/22] fix: approval DELETE pattern DOTALL flag allows newline
 bypass

---
 tools/approval.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/approval.py b/tools/approval.py
index d6db5a05a0..dbb3810886 100644
--- a/tools/approval.py
+++ b/tools/approval.py
@@ -314,7 +314,9 @@ DANGEROUS_PATTERNS = [
     (r'\bdd\s+.*if=', "disk copy"),
     (r'>\s*/dev/sd', "write to block device"),
     (r'\bDROP\s+(TABLE|DATABASE)\b', "SQL DROP"),
-    (r'\bDELETE\s+FROM\b(?!.*\bWHERE\b)', "SQL DELETE without WHERE"),
+    # Use [^\n]* instead of .* so DOTALL mode does not cause a WHERE clause on the
+    # *next* line to satisfy the negative lookahead, silently allowing DELETE without WHERE.
+    (r'\bDELETE\s+FROM\b(?![^\n]*\bWHERE\b)', "SQL DELETE without WHERE"),
     (r'\bTRUNCATE\s+(TABLE)?\s*\w', "SQL TRUNCATE"),
     (r'>\s*/etc/', "overwrite system config"),
     (r'\bsystemctl\s+(-[^\s]+\s+)*(stop|restart|disable|mask)\b', "stop/restart system service"),

From b06e9993021a8eebd891fc60d52372446315b2f0 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 20:46:04 -0700
Subject: [PATCH 18/22] =?UTF-8?q?fix(cache):=20kill=20long-lived=20prefix?=
 =?UTF-8?q?=20layout=20=E2=80=94=20system=20prompt=20is=20now=20byte-stati?=
 =?UTF-8?q?c=20within=20a=20session=20(#24778)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The long-lived prefix-cache layout split the system prompt into stable/
context/volatile blocks and re-derived them on every API call. The
volatile tier (timestamp + memory snapshot + USER profile) ticks per
turn, so the system message bytes mutated mid-conversation and broke
upstream prompt caches (OpenRouter, Nous Portal, Anthropic).

Diagnosed via live wire-format diffing: an 8-turn conversation showed
OLD layout flipping system block[1] sha mid-session at the minute
boundary, dropping cached_tokens to 0 on that turn (cumulative
66.6% vs 83.3% for the single-block layout). Hermes invariant:
history (system + all but the last 1-2 messages) must be static.

Fix: drop the long-lived layout entirely. Single layout everywhere —
system_and_3 with one cached system string built once on first turn,
replayed verbatim on every subsequent turn. Loses cross-session 1h
prefix caching for Claude (the feature that motivated the split), but
within-session caching now actually works on every provider.

Removed:
- run_agent.py: _use_long_lived_prefix_cache flag, _long_lived_cache_ttl,
  _supports_long_lived_anthropic_cache method, the long-lived branch in
  run_conversation, mark_tools_for_long_lived_cache call site
- agent/prompt_caching.py: apply_anthropic_cache_control_long_lived,
  mark_tools_for_long_lived_cache, _mark_system_stable_block helper
- hermes_cli/config.py: prompt_caching.long_lived_prefix and
  prompt_caching.long_lived_ttl config keys
- tests/agent/test_prompt_caching_live.py (entire file)
- tests/agent/test_prompt_caching.py: TestMarkToolsForLongLivedCache,
  TestApplyAnthropicCacheControlLongLived
- tests/run_agent/test_anthropic_prompt_cache_policy.py:
  TestSupportsLongLivedAnthropicCache

Targeted tests: 62/62 pass.
---
 agent/anthropic_adapter.py                    |   5 +-
 agent/prompt_caching.py                       | 134 +---------
 hermes_cli/config.py                          |   7 -
 run_agent.py                                  | 234 +++---------------
 tests/agent/test_prompt_caching.py            | 131 ----------
 tests/agent/test_prompt_caching_live.py       | 112 ---------
 .../test_anthropic_prompt_cache_policy.py     | 131 ----------
 tests/test_ctx_halving_fix.py                 |   1 -
 8 files changed, 41 insertions(+), 714 deletions(-)
 delete mode 100644 tests/agent/test_prompt_caching_live.py

diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index 3919c8565b..4b1134a4c0 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -1305,9 +1305,8 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
             ),
         }
         # Forward cache_control marker when present on the OpenAI-format
-        # tool dict (set by ``mark_tools_for_long_lived_cache``). Anthropic's
-        # tools array supports cache_control on the last tool to cache the
-        # entire schema cross-session.
+        # tool dict. Anthropic's tools array supports cache_control on the
+        # last tool to cache the entire schema cross-session.
         cache_control = t.get("cache_control")
         if isinstance(cache_control, dict):
             anthropic_tool["cache_control"] = dict(cache_control)
diff --git a/agent/prompt_caching.py b/agent/prompt_caching.py
index 4829c96b33..a73d6e113d 100644
--- a/agent/prompt_caching.py
+++ b/agent/prompt_caching.py
@@ -1,25 +1,15 @@
-"""Anthropic prompt caching strategies.
+"""Anthropic prompt caching strategy.
 
-Two layouts:
-
-* ``system_and_3`` (default, used everywhere except the long-lived path):
-  4 cache_control breakpoints — system prompt + last 3 non-system messages.
-  All at the same TTL (5m or 1h). Reduces input token costs by ~75% on
-  multi-turn conversations within a single session.
-
-* ``prefix_and_2`` (Claude on Anthropic / OpenRouter / Nous Portal):
-  4 breakpoints split across two TTL tiers — tools[-1] (1h) +
-  stable system prefix (1h) + last 2 non-system messages (5m). The
-  long-lived prefix is byte-stable across sessions for a given user
-  config, so every fresh session reads the cached system+tools instead
-  of re-paying for them. Within-session rolling window shrinks from 3
-  messages to 2 to free the breakpoint budget.
+Single layout: ``system_and_3``. 4 cache_control breakpoints — system
+prompt + last 3 non-system messages, all at the same TTL (5m or 1h).
+Reduces input token costs by ~75% on multi-turn conversations within a
+single session.
 
 Pure functions -- no class state, no AIAgent dependency.
 """
 
 import copy
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List
 
 
 def _apply_cache_marker(msg: dict, cache_marker: dict, native_anthropic: bool = False) -> None:
@@ -87,115 +77,3 @@ def apply_anthropic_cache_control(
         _apply_cache_marker(messages[idx], marker, native_anthropic=native_anthropic)
 
     return messages
-
-
-def _mark_system_stable_block(
-    messages: List[Dict[str, Any]],
-    long_lived_marker: Dict[str, str],
-) -> bool:
-    """Mark the *first* content block of the system message with the 1h marker.
-
-    The system message is expected to have been split into multiple content
-    blocks beforehand by the caller — block[0] is the cross-session-stable
-    prefix, subsequent blocks carry context files + volatile suffix.
-    Falls back to marking the whole system message as a single block when
-    the message hasn't been split (preserves correctness on the fallback path).
-
-    Returns True when a marker was placed.
-    """
-    if not messages or messages[0].get("role") != "system":
-        return False
-
-    sys_msg = messages[0]
-    content = sys_msg.get("content")
-
-    # Already a list of blocks → mark the first block.
-    if isinstance(content, list) and content:
-        first = content[0]
-        if isinstance(first, dict):
-            first["cache_control"] = long_lived_marker
-            return True
-        return False
-
-    # String content (no split) → cannot place a stable-prefix breakpoint
-    # without changing the byte content.  Caller is responsible for
-    # splitting; if they didn't, fall through to envelope marker so we still
-    # cache *something* for this turn.
-    if isinstance(content, str) and content:
-        sys_msg["content"] = [
-            {"type": "text", "text": content, "cache_control": long_lived_marker}
-        ]
-        return True
-
-    return False
-
-
-def apply_anthropic_cache_control_long_lived(
-    api_messages: List[Dict[str, Any]],
-    long_lived_ttl: str = "1h",
-    rolling_ttl: str = "5m",
-    native_anthropic: bool = False,
-) -> List[Dict[str, Any]]:
-    """Apply prefix_and_2 caching: long-lived stable prefix + rolling window.
-
-    Layout (4 breakpoints total):
-      * Stable system prefix (block[0]) → ``long_lived_ttl`` TTL
-      * Last 2 non-system messages → ``rolling_ttl`` TTL each
-
-    NOTE: this function does NOT mark the tools array. Tools cache_control
-    is attached separately (see ``mark_tools_for_long_lived_cache``) because
-    tools live outside the messages list in the API payload.
-
-    The caller MUST have split the system message into ordered content
-    blocks where block[0] is the cross-session-stable portion. If the system
-    message is still a single string, it is wrapped into a single block and
-    marked — this is correct, just less effective (the volatile suffix is
-    not isolated, so the prefix invalidates per-session).
-
-    Returns:
-        Deep copy of messages with cache_control breakpoints injected.
-    """
-    messages = copy.deepcopy(api_messages)
-    if not messages:
-        return messages
-
-    long_marker = _build_marker(long_lived_ttl)
-    rolling_marker = _build_marker(rolling_ttl)
-
-    placed_prefix = _mark_system_stable_block(messages, long_marker)
-
-    # Reserve 1 breakpoint for the system prefix (when placed); spend the
-    # remaining 3 on the rolling tail.  Anthropic max is 4 total —
-    # tools[-1] (when marked) consumes the 4th, so we cap rolling at 2 here.
-    rolling_budget = 2 if placed_prefix else 3
-    non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"]
-    for idx in non_sys[-rolling_budget:]:
-        _apply_cache_marker(messages[idx], rolling_marker, native_anthropic=native_anthropic)
-
-    return messages
-
-
-def mark_tools_for_long_lived_cache(
-    tools: Optional[List[Dict[str, Any]]],
-    long_lived_ttl: str = "1h",
-) -> Optional[List[Dict[str, Any]]]:
-    """Attach cache_control to the last tool in the OpenAI-format tools list.
-
-    Anthropic prefix-cache order is ``tools → system → messages``.  Marking
-    the last tool dict caches the entire tools array (Anthropic's docs:
-    "the marker is placed on the last block you want included in the cached
-    prefix").  Marker is preserved across the OpenAI-wire boundary on
-    OpenRouter and Nous Portal (which proxies to OpenRouter); on native
-    Anthropic the marker is forwarded by ``convert_tools_to_anthropic``.
-
-    Returns a deep copy of the tools list with the marker attached, or the
-    input unchanged when tools is empty/None.  Pure function — does not
-    mutate the input.
-    """
-    if not tools:
-        return tools
-    out = copy.deepcopy(tools)
-    last = out[-1]
-    if isinstance(last, dict):
-        last["cache_control"] = _build_marker(long_lived_ttl)
-    return out
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index dc3e414948..4c2596594e 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -735,15 +735,8 @@ DEFAULT_CONFIG = {
 
     # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
     # cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
-    # long_lived_prefix: when true (default), Claude on Anthropic / OpenRouter / Nous
-    #   Portal uses a split layout: tools[-1] + stable system prefix at long_lived_ttl
-    #   (cross-session cache), last 2 messages at cache_ttl (within-session rolling).
-    #   Set false to keep the legacy "system + last 3 messages" single-tier layout.
-    # long_lived_ttl: TTL for the cross-session prefix tier ("5m" or "1h"; default "1h").
     "prompt_caching": {
         "cache_ttl": "5m",
-        "long_lived_prefix": True,
-        "long_lived_ttl": "1h",
     },
 
     # OpenRouter-specific settings.
diff --git a/run_agent.py b/run_agent.py
index 7c6c62cc9a..1c4c35c96e 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1454,15 +1454,6 @@ class AIAgent:
         # 1h tier costs 2x on write vs 1.25x for 5m, but amortizes across long
         # sessions with >5-minute pauses between turns (#14971).
         self._cache_ttl = "5m"
-        # Long-lived prefix caching: when enabled and supported by the
-        # current provider, splits the system prompt into a stable prefix
-        # (cached cross-session at 1h TTL) and a volatile suffix
-        # (memory/timestamp — never cached), and attaches a 1h cache_control
-        # marker to the last tool in the schema array.  Restricted to
-        # Claude on Anthropic / OpenRouter / Nous Portal; see
-        # ``_supports_long_lived_anthropic_cache``.
-        self._use_long_lived_prefix_cache = False
-        self._long_lived_cache_ttl = "1h"
         try:
             from hermes_cli.config import load_config as _load_pc_cfg
 
@@ -1470,12 +1461,6 @@ class AIAgent:
             _ttl = _pc_cfg.get("cache_ttl", "5m")
             if _ttl in {"5m", "1h"}:
                 self._cache_ttl = _ttl
-            _ll_enabled = _pc_cfg.get("long_lived_prefix", True)
-            _ll_ttl = _pc_cfg.get("long_lived_ttl", "1h")
-            if _ll_ttl in ("5m", "1h"):
-                self._long_lived_cache_ttl = _ll_ttl
-            if _ll_enabled and self._use_prompt_caching and self._supports_long_lived_anthropic_cache():
-                self._use_long_lived_prefix_cache = True
         except Exception:
             pass
 
@@ -2480,7 +2465,6 @@ class AIAgent:
             "client_kwargs": dict(self._client_kwargs),
             "use_prompt_caching": self._use_prompt_caching,
             "use_native_cache_layout": self._use_native_cache_layout,
-            "use_long_lived_prefix_cache": self._use_long_lived_prefix_cache,
             # Context engine state that _try_activate_fallback() overwrites.
             # Use getattr for model/base_url/api_key/provider since plugin
             # engines may not have these (they're ContextCompressor-specific).
@@ -2716,15 +2700,6 @@ class AIAgent:
                 model=new_model,
             )
         )
-        self._use_long_lived_prefix_cache = bool(
-            self._use_prompt_caching
-            and self._supports_long_lived_anthropic_cache(
-                provider=new_provider,
-                base_url=self.base_url,
-                api_mode=api_mode,
-                model=new_model,
-            )
-        )
 
         # ── LM Studio: preload before probing context length ──
         self._ensure_lmstudio_runtime_loaded()
@@ -2773,7 +2748,6 @@ class AIAgent:
             "client_kwargs": dict(self._client_kwargs),
             "use_prompt_caching": self._use_prompt_caching,
             "use_native_cache_layout": self._use_native_cache_layout,
-            "use_long_lived_prefix_cache": self._use_long_lived_prefix_cache,
             "compressor_model": getattr(_cc, "model", self.model) if _cc else self.model,
             "compressor_base_url": getattr(_cc, "base_url", self.base_url) if _cc else self.base_url,
             "compressor_api_key": getattr(_cc, "api_key", "") if _cc else "",
@@ -3584,80 +3558,6 @@ class AIAgent:
 
         return False, False
 
-    def _supports_long_lived_anthropic_cache(
-        self,
-        *,
-        provider: Optional[str] = None,
-        base_url: Optional[str] = None,
-        api_mode: Optional[str] = None,
-        model: Optional[str] = None,
-    ) -> bool:
-        """Decide whether the long-lived (1h cross-session) cache layout applies.
-
-        Narrower than ``_anthropic_prompt_cache_policy`` — only enabled
-        for Claude models on the four endpoints whose cross-session
-        cache_control behavior we have explicitly validated:
-
-          * Native Anthropic API (``api_mode == 'anthropic_messages'`` +
-            host ``api.anthropic.com``)
-          * Anthropic OAuth subscription (same transport as native API)
-          * OpenRouter (``base_url`` contains ``openrouter.ai``)
-          * Nous Portal (``base_url`` contains ``nousresearch`` — proxies
-            to OpenRouter, so identical wire-format)
-
-        All four honour ``cache_control`` on both the tools array and the
-        first system content block, and bill cross-session cache reads at
-        the documented 0.1× rate.
-
-        Other endpoints covered by the standard ``system_and_3`` policy
-        (third-party Anthropic gateways, MiniMax, opencode-go Qwen, etc.)
-        keep that layout — they support cache_control but their behavior
-        with mixed-TTL multi-block system content has not been validated
-        against this codebase.
-        """
-        eff_provider = (provider if provider is not None else self.provider) or ""
-        eff_base_url = base_url if base_url is not None else (self.base_url or "")
-        eff_api_mode = api_mode if api_mode is not None else (self.api_mode or "")
-        eff_model = (model if model is not None else self.model) or ""
-
-        model_lower = eff_model.lower()
-        is_claude = "claude" in model_lower
-        is_nous_portal = "nousresearch" in eff_base_url.lower()
-
-        # Nous Portal Claude rides the 1h prefix_and_2 layout (Portal
-        # proxies to OpenRouter, which honours ttl=1h on Anthropic
-        # routes).  Qwen does NOT — Alibaba DashScope (the upstream for
-        # all Qwen routes, including Portal -> OpenRouter -> Alibaba)
-        # documents a single ``ephemeral`` TTL of 5 minutes; ttl="1h"
-        # on Qwen markers is silently ignored upstream, so the
-        # high-value tools[-1] + system-prefix breakpoints never land
-        # and only the 5m rolling-window markers on the last 2 messages
-        # get cached.  Portal Qwen still gets cache_control via
-        # _anthropic_prompt_cache_policy returning (True, False) — it
-        # just rides the standard system_and_3 5m layout instead of the
-        # mismatched prefix_and_2 1h layout.
-        if is_nous_portal and is_claude:
-            return True
-
-        if not is_claude:
-            return False
-
-        # Native Anthropic + Anthropic OAuth subscription
-        if eff_api_mode == "anthropic_messages":
-            if eff_provider == "anthropic" or base_url_hostname(eff_base_url) == "api.anthropic.com":
-                return True
-
-        # OpenRouter
-        if base_url_host_matches(eff_base_url, "openrouter.ai"):
-            return True
-
-        # Nous Portal — front-ends OpenRouter behind the scenes; identical
-        # wire format and cache_control semantics.
-        if is_nous_portal:
-            return True
-
-        return False
-
     @staticmethod
     def _model_requires_responses_api(model: str) -> bool:
         """Return True for models that require the Responses API path.
@@ -5906,26 +5806,19 @@ class AIAgent:
         """Assemble the system prompt as three ordered parts.
 
         Returns a dict with three keys:
-          * ``stable``  — content that is byte-stable across sessions for a
-            given user config: identity, tool guidance, skills prompt,
+          * ``stable``   — identity, tool guidance, skills prompt,
             environment hints, platform hints, model-family operational
-            guidance.  Eligible for cross-session 1h prompt caching when
-            placed as a separate Anthropic content block (see
-            ``apply_anthropic_cache_control_long_lived``).
-          * ``context`` — context files (AGENTS.md, .cursorrules, etc.) and
-            caller-supplied system_message.  Stable within a session but may
-            change between sessions when files are edited or the cwd
-            differs.  Cached within-session via the rolling messages
-            breakpoint (5m TTL); not promoted to the long-lived tier so
-            edits don't poison the cross-session cache.
-          * ``volatile`` — content that changes on most turns/sessions:
-            memory snapshot, user profile, external memory provider block,
-            timestamp line.  Never marked for caching.
+            guidance.
+          * ``context``  — context files (AGENTS.md, .cursorrules, etc.)
+            and caller-supplied system_message.
+          * ``volatile`` — memory snapshot, user profile, external
+            memory provider block, timestamp line.
 
-        Joined ``stable\\n\\ncontext\\n\\nvolatile`` produces the same
-        logical content the old single-string builder produced, with the
-        guarantee that volatile content is at the end (cache-friendly
-        ordering for any provider that does prefix caching).
+        Joined into a single string by ``_build_system_prompt`` and
+        cached on ``_cached_system_prompt`` for the lifetime of the
+        AIAgent.  Hermes never re-renders parts of this string mid-
+        session — that's the only way to keep upstream prompt caches
+        warm across turns.
         """
         # ── Stable tier ────────────────────────────────────────────────
         stable_parts: List[str] = []
@@ -6127,9 +6020,10 @@ class AIAgent:
 
         Layers are ordered cache-friendly: stable identity/guidance first,
         then session-stable context files, then per-call volatile content
-        (memory, USER profile, timestamp). The split is exposed via
-        ``_build_system_prompt_parts`` for the long-lived prompt-caching
-        path (Claude on Anthropic / OpenRouter / Nous Portal).
+        (memory, USER profile, timestamp).  The whole string is treated as
+        one cached block — Hermes never rebuilds or reinjects parts of it
+        mid-session, which is the only way to keep upstream prompt caches
+        warm across turns.
         """
         parts = self._build_system_prompt_parts(system_message=system_message)
         joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
@@ -8896,15 +8790,6 @@ class AIAgent:
                     model=fb_model,
                 )
             )
-            self._use_long_lived_prefix_cache = bool(
-                self._use_prompt_caching
-                and self._supports_long_lived_anthropic_cache(
-                    provider=fb_provider,
-                    base_url=fb_base_url,
-                    api_mode=fb_api_mode,
-                    model=fb_model,
-                )
-            )
 
             # LM Studio: preload before probing the fallback's context length.
             self._ensure_lmstudio_runtime_loaded()
@@ -8981,16 +8866,6 @@ class AIAgent:
                 "use_native_cache_layout",
                 self.api_mode == "anthropic_messages" and self.provider == "anthropic",
             )
-            # Long-lived prefix flag was added later — restore False on
-            # snapshots predating the new field, then re-evaluate against
-            # the restored provider/model in case the user had it enabled.
-            self._use_long_lived_prefix_cache = rt.get(
-                "use_long_lived_prefix_cache",
-                bool(
-                    self._use_prompt_caching
-                    and self._supports_long_lived_anthropic_cache()
-                ),
-            )
 
             # ── Rebuild client for the primary provider ──
             if self.api_mode == "anthropic_messages":
@@ -9568,19 +9443,7 @@ class AIAgent:
 
     def _build_api_kwargs(self, api_messages: list) -> dict:
         """Build the keyword arguments dict for the active API mode."""
-        # Resolve the tools array exactly once. When the long-lived
-        # prefix-cache layout is active (Claude on Anthropic / OpenRouter
-        # / Nous Portal), attach a 1h cache_control marker to the last
-        # tool — this caches the entire tools array cross-session via
-        # Anthropic's tools→system→messages prefix order. The function
-        # returns a deep copy, so self.tools is never mutated.
-        if self._use_long_lived_prefix_cache and self.tools:
-            from agent.prompt_caching import mark_tools_for_long_lived_cache
-            tools_for_api = mark_tools_for_long_lived_cache(
-                self.tools, long_lived_ttl=self._long_lived_cache_ttl,
-            )
-        else:
-            tools_for_api = self.tools
+        tools_for_api = self.tools
 
         if self.api_mode == "anthropic_messages":
             _transport = self._get_transport()
@@ -12440,36 +12303,21 @@ class AIAgent:
             # External recall context is injected into the user message, not the system
             # prompt, so the stable cache prefix remains unchanged.
             #
-            # When the long-lived prefix-cache layout is active (Claude on
-            # Anthropic / OpenRouter / Nous Portal), we build the system
-            # message as a *list of content blocks*: [stable, context,
-            # volatile, ephemeral?].  Block 0 (stable) gets the 1h
-            # cache_control marker further down via
-            # apply_anthropic_cache_control_long_lived; blocks 1-3 are
-            # cached only via the rolling messages window at 5m.
             # NOTE: Plugin context from pre_llm_call hooks is injected into the
             # user message (see injection block above), NOT the system prompt.
             # This is intentional — system prompt modifications break the prompt
             # cache prefix.  The system prompt is reserved for Hermes internals.
-            if self._use_long_lived_prefix_cache:
-                _sys_parts = self._build_system_prompt_parts(system_message=system_message)
-                _sys_blocks: list = []
-                if _sys_parts.get("stable"):
-                    _sys_blocks.append({"type": "text", "text": _sys_parts["stable"]})
-                if _sys_parts.get("context"):
-                    _sys_blocks.append({"type": "text", "text": _sys_parts["context"]})
-                if _sys_parts.get("volatile"):
-                    _sys_blocks.append({"type": "text", "text": _sys_parts["volatile"]})
-                if self.ephemeral_system_prompt:
-                    _sys_blocks.append({"type": "text", "text": self.ephemeral_system_prompt})
-                if _sys_blocks:
-                    api_messages = [{"role": "system", "content": _sys_blocks}] + api_messages
-            else:
-                effective_system = active_system_prompt or ""
-                if self.ephemeral_system_prompt:
-                    effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
-                if effective_system:
-                    api_messages = [{"role": "system", "content": effective_system}] + api_messages
+            #
+            # Hermes invariant: the system prompt is built ONCE per session
+            # (cached on ``_cached_system_prompt``) and replayed verbatim on
+            # every turn.  We send it as a single content string so the
+            # bytes are byte-stable across turns and upstream prompt caches
+            # stay warm.
+            effective_system = active_system_prompt or ""
+            if self.ephemeral_system_prompt:
+                effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
+            if effective_system:
+                api_messages = [{"role": "system", "content": effective_system}] + api_messages
 
             # Inject ephemeral prefill messages right after the system prompt
             # but before conversation history. Same API-call-time-only pattern.
@@ -12483,29 +12331,13 @@ class AIAgent:
             # gateways. Auto-detected: if ``_use_prompt_caching`` is set,
             # inject cache_control breakpoints (system + last 3 messages)
             # to reduce input token costs by ~75% on multi-turn
-            # conversations. Layout is chosen per endpoint by
-            # ``_anthropic_prompt_cache_policy``.
-            #
-            # Long-lived prefix layout (prefix_and_2): stable system block
-            # gets 1h marker + last 2 messages get 5m markers. Tools
-            # array's last entry is marked separately at API-call kwargs
-            # build time (see ``_build_api_kwargs`` and
-            # ``mark_tools_for_long_lived_cache``).
+            # conversations.
             if self._use_prompt_caching:
-                if self._use_long_lived_prefix_cache:
-                    from agent.prompt_caching import apply_anthropic_cache_control_long_lived
-                    api_messages = apply_anthropic_cache_control_long_lived(
-                        api_messages,
-                        long_lived_ttl=self._long_lived_cache_ttl,
-                        rolling_ttl=self._cache_ttl,
-                        native_anthropic=self._use_native_cache_layout,
-                    )
-                else:
-                    api_messages = apply_anthropic_cache_control(
-                        api_messages,
-                        cache_ttl=self._cache_ttl,
-                        native_anthropic=self._use_native_cache_layout,
-                    )
+                api_messages = apply_anthropic_cache_control(
+                    api_messages,
+                    cache_ttl=self._cache_ttl,
+                    native_anthropic=self._use_native_cache_layout,
+                )
 
             # Safety net: strip orphaned tool results / add stubs for missing
             # results before sending to the API.  Runs unconditionally — not
diff --git a/tests/agent/test_prompt_caching.py b/tests/agent/test_prompt_caching.py
index 9d989571b5..f6f3e9f0a3 100644
--- a/tests/agent/test_prompt_caching.py
+++ b/tests/agent/test_prompt_caching.py
@@ -6,8 +6,6 @@ import pytest
 from agent.prompt_caching import (
     _apply_cache_marker,
     apply_anthropic_cache_control,
-    apply_anthropic_cache_control_long_lived,
-    mark_tools_for_long_lived_cache,
 )
 
 
@@ -143,132 +141,3 @@ class TestApplyAnthropicCacheControl:
             elif "cache_control" in msg:
                 count += 1
         assert count <= 4
-
-
-class TestMarkToolsForLongLivedCache:
-    def test_returns_unchanged_for_empty_tools(self):
-        assert mark_tools_for_long_lived_cache(None) is None
-        assert mark_tools_for_long_lived_cache([]) == []
-
-    def test_marks_only_last_tool(self):
-        tools = [
-            {"type": "function", "function": {"name": "a"}},
-            {"type": "function", "function": {"name": "b"}},
-            {"type": "function", "function": {"name": "c"}},
-        ]
-        out = mark_tools_for_long_lived_cache(tools)
-        assert "cache_control" not in out[0]
-        assert "cache_control" not in out[1]
-        assert out[2]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
-
-    def test_does_not_mutate_input(self):
-        tools = [{"type": "function", "function": {"name": "a"}}]
-        mark_tools_for_long_lived_cache(tools)
-        assert "cache_control" not in tools[0]
-
-    def test_5m_ttl_drops_ttl_field(self):
-        tools = [{"type": "function", "function": {"name": "a"}}]
-        out = mark_tools_for_long_lived_cache(tools, long_lived_ttl="5m")
-        assert out[0]["cache_control"] == {"type": "ephemeral"}
-
-
-class TestApplyAnthropicCacheControlLongLived:
-    def test_empty_messages(self):
-        assert apply_anthropic_cache_control_long_lived([]) == []
-
-    def test_marks_first_block_of_split_system(self):
-        msgs = [
-            {"role": "system", "content": [
-                {"type": "text", "text": "STABLE"},
-                {"type": "text", "text": "CONTEXT"},
-                {"type": "text", "text": "VOLATILE"},
-            ]},
-            {"role": "user", "content": "msg1"},
-            {"role": "assistant", "content": "msg2"},
-        ]
-        out = apply_anthropic_cache_control_long_lived(msgs)
-        sys_blocks = out[0]["content"]
-        assert sys_blocks[0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
-        assert "cache_control" not in sys_blocks[1]
-        assert "cache_control" not in sys_blocks[2]
-
-    def test_rolling_marker_on_last_2_messages(self):
-        msgs = [
-            {"role": "system", "content": [{"type": "text", "text": "S"}]},
-            {"role": "user", "content": "u1"},
-            {"role": "assistant", "content": "a1"},
-            {"role": "user", "content": "u2"},
-            {"role": "assistant", "content": "a2"},
-        ]
-        out = apply_anthropic_cache_control_long_lived(msgs)
-
-        def has_marker(m):
-            c = m.get("content")
-            if isinstance(c, list) and c and isinstance(c[-1], dict):
-                return "cache_control" in c[-1]
-            return "cache_control" in m
-
-        # u1 and a1 (older messages) should NOT be marked
-        assert not has_marker(out[1])
-        assert not has_marker(out[2])
-        # u2 and a2 (last 2) SHOULD be marked
-        assert has_marker(out[3])
-        assert has_marker(out[4])
-
-    def test_rolling_marker_uses_5m_ttl(self):
-        msgs = [
-            {"role": "system", "content": [{"type": "text", "text": "S"}]},
-            {"role": "user", "content": "u1"},
-            {"role": "assistant", "content": "a1"},
-        ]
-        out = apply_anthropic_cache_control_long_lived(
-            msgs, long_lived_ttl="1h", rolling_ttl="5m",
-        )
-        # Last user message: cache_control on the wrapped text part should be 5m
-        last = out[-1]
-        c = last["content"]
-        assert isinstance(c, list)
-        assert c[-1]["cache_control"] == {"type": "ephemeral"}  # 5m has no ttl key
-
-    def test_string_system_falls_back_to_envelope_marker(self):
-        """When the caller didn't split the system message, we still place a marker."""
-        msgs = [
-            {"role": "system", "content": "Single string system"},
-            {"role": "user", "content": "u1"},
-        ]
-        out = apply_anthropic_cache_control_long_lived(msgs)
-        sys_content = out[0]["content"]
-        # Wrapped into a list and the (now sole) block gets the 1h marker
-        assert isinstance(sys_content, list)
-        assert sys_content[0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
-
-    def test_does_not_mutate_input(self):
-        msgs = [
-            {"role": "system", "content": [{"type": "text", "text": "S"}]},
-            {"role": "user", "content": "u1"},
-        ]
-        before = copy.deepcopy(msgs)
-        apply_anthropic_cache_control_long_lived(msgs)
-        assert msgs == before
-
-    def test_max_4_breakpoints_with_split_system(self):
-        msgs = [
-            {"role": "system", "content": [{"type": "text", "text": "S"}, {"type": "text", "text": "V"}]},
-        ] + [
-            {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg{i}"}
-            for i in range(10)
-        ]
-        out = apply_anthropic_cache_control_long_lived(msgs)
-        count = 0
-        for m in out:
-            c = m.get("content")
-            if isinstance(c, list):
-                for item in c:
-                    if isinstance(item, dict) and "cache_control" in item:
-                        count += 1
-            elif "cache_control" in m:
-                count += 1
-        # 1 system block + last 2 messages = 3 breakpoints from this function.
-        # tools[-1] is marked separately (not via this function), so a 4th
-        # breakpoint can be added at API-call time.
-        assert count == 3
diff --git a/tests/agent/test_prompt_caching_live.py b/tests/agent/test_prompt_caching_live.py
deleted file mode 100644
index f72b6b9d90..0000000000
--- a/tests/agent/test_prompt_caching_live.py
+++ /dev/null
@@ -1,112 +0,0 @@
-"""Live E2E: long-lived prefix caching on Claude via OpenRouter.
-
-Run only when LIVE_OR_KEY env var is set. Skipped under the normal hermetic
-test suite (which unsets credentials).
-"""
-import os, sys, tempfile, time, shutil, pytest
-
-
-# Probe for the key BEFORE conftest unsets it
-_LIVE_KEY = os.environ.get("OPENROUTER_API_KEY") or os.environ.get("LIVE_OR_KEY")
-if not _LIVE_KEY:
-    # Try to read directly from .env
-    env_path = os.path.expanduser("~/.hermes/.env")
-    if os.path.exists(env_path):
-        with open(env_path) as f:
-            for line in f:
-                if line.startswith("OPENROUTER_API_KEY="):
-                    _LIVE_KEY = line.strip().split("=", 1)[1].strip().strip('"').strip("'")
-                    break
-
-
-pytestmark = pytest.mark.skipif(
-    not _LIVE_KEY,
-    reason="set OPENROUTER_API_KEY (or LIVE_OR_KEY) to run live cache test",
-)
-
-
-def test_long_lived_prefix_cache_e2e_openrouter(tmp_path, monkeypatch):
-    """Two AIAgent runs in fresh sessions: call 1 writes cache, call 2 reads it."""
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
-    # The hermetic conftest unsets OPENROUTER_API_KEY — restore for this test
-    monkeypatch.setenv("OPENROUTER_API_KEY", _LIVE_KEY)
-
-    # Minimal config — but with enough toolset/guidance to exceed Anthropic's
-    # ~1024-token minimum-cacheable-prefix threshold. Anthropic silently
-    # ignores cache_control markers on small blocks.
-    import yaml
-    cfg_path = tmp_path / "config.yaml"
-    cfg_path.write_text(yaml.safe_dump({
-        "model": {"provider": "openrouter", "default": "anthropic/claude-haiku-4.5"},
-        "prompt_caching": {"long_lived_prefix": True, "long_lived_ttl": "1h", "cache_ttl": "5m"},
-        "agent": {"tool_use_enforcement": True},   # adds substantial guidance text
-        "memory": {"provider": ""},
-        "compression": {"enabled": False},
-    }))
-
-    from run_agent import AIAgent
-
-    def make_agent():
-        return AIAgent(
-            api_key=_LIVE_KEY,
-            base_url="https://openrouter.ai/api/v1",
-            provider="openrouter",
-            model="anthropic/claude-haiku-4.5",
-            api_mode="chat_completions",
-            # Use the default toolset roster — the tools array (~13k tokens
-            # for ~35 tools) is what carries the bulk of the cross-session
-            # cache value. With a tiny toolset the cached prefix can fall
-            # below Anthropic Haiku's 2048-token minimum cacheable size and
-            # the marker is silently ignored.
-            enabled_toolsets=None,
-            quiet_mode=True,
-            skip_context_files=True,
-            skip_memory=True,
-            save_trajectories=False,
-        )
-
-    a1 = make_agent()
-    assert a1._use_prompt_caching is True, "policy should enable caching for Claude on OR"
-    assert a1._use_long_lived_prefix_cache is True, "long-lived path should activate"
-    parts = a1._build_system_prompt_parts()
-    print(f"\nstable={len(parts['stable']):,} ctx={len(parts['context']):,} volatile={len(parts['volatile']):,} chars")
-    print(f"tool count: {len(a1.tools or [])}")
-
-    # Use distinct user messages each call so OpenRouter's response cache
-    # doesn't short-circuit the upstream Anthropic call (we need real
-    # Anthropic billing visibility to verify cache_creation/cache_read).
-    USER_1 = "Reply with the single word ALPHA."
-    USER_2 = "Reply with the single word BRAVO."
-
-    print("\n--- Call 1 (cold) ---")
-    r1 = a1.run_conversation(USER_1, conversation_history=[])
-    print(f"final_response[:80]: {(r1.get('final_response') or '')[:80]!r}")
-    cr1 = a1.session_cache_read_tokens
-    cw1 = a1.session_cache_write_tokens
-    print(f"call1: cache_read={cr1} cache_write={cw1}")
-
-    # Wait so cache settles, then fresh agent (NEW SESSION) for cross-session read
-    time.sleep(2)
-    a2 = make_agent()
-    assert a2.session_id != a1.session_id, "second agent must have a new session"
-
-    print("\n--- Call 2 (warm, NEW session, different user msg) ---")
-    r2 = a2.run_conversation(USER_2, conversation_history=[])
-    print(f"final_response[:80]: {(r2.get('final_response') or '')[:80]!r}")
-    cr2 = a2.session_cache_read_tokens
-    cw2 = a2.session_cache_write_tokens
-    print(f"call2: cache_read={cr2} cache_write={cw2}")
-
-    print(f"\n=== VERDICT ===")
-    print(f"  call1 wrote {cw1:,} cache tokens, read {cr1:,}")
-    print(f"  call2 wrote {cw2:,} cache tokens, read {cr2:,}")
-    if cw1:
-        print(f"  cross-session read fraction: cr2/cw1 = {cr2/cw1:.2%}")
-
-    # Assertions
-    assert cw1 > 0, f"call 1 must write cache (got {cw1}); long-lived layout not reaching wire"
-    assert cr2 > 0, (
-        f"call 2 must read cache cross-session (got {cr2}); "
-        f"stable prefix is not byte-stable across sessions"
-    )
-    assert cr2 >= 1000, f"cache_read on call 2 ({cr2}) too small to indicate real reuse"
diff --git a/tests/run_agent/test_anthropic_prompt_cache_policy.py b/tests/run_agent/test_anthropic_prompt_cache_policy.py
index 3d7358e670..ba6e54f037 100644
--- a/tests/run_agent/test_anthropic_prompt_cache_policy.py
+++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py
@@ -330,134 +330,3 @@ class TestExplicitOverrides:
 # Long-lived prefix cache policy (cross-session 1h tier)
 # ─────────────────────────────────────────────────────────────────────
 
-class TestSupportsLongLivedAnthropicCache:
-    """Narrower than _anthropic_prompt_cache_policy — only Claude on the 4
-    explicitly-validated endpoints get the long-lived layout."""
-
-    def test_native_anthropic_claude_supported(self):
-        agent = _make_agent(
-            provider="anthropic",
-            base_url="https://api.anthropic.com",
-            api_mode="anthropic_messages",
-            model="claude-sonnet-4.6",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is True
-
-    def test_anthropic_oauth_supported(self):
-        # OAuth uses the same transport as native Anthropic
-        agent = _make_agent(
-            provider="anthropic",
-            base_url="https://api.anthropic.com",
-            api_mode="anthropic_messages",
-            model="claude-opus-4.6",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is True
-
-    def test_openrouter_claude_supported(self):
-        agent = _make_agent(
-            provider="openrouter",
-            base_url="https://openrouter.ai/api/v1",
-            api_mode="chat_completions",
-            model="anthropic/claude-sonnet-4.6",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is True
-
-    def test_nous_portal_claude_supported(self):
-        # Nous Portal proxies to OpenRouter — same wire format
-        agent = _make_agent(
-            provider="nous",
-            base_url="https://inference-api.nousresearch.com/v1",
-            api_mode="chat_completions",
-            model="anthropic/claude-opus-4.7",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is True
-
-    def test_nous_portal_qwen_NOT_long_lived(self):
-        # Portal Qwen still gets cache_control markers via the standard
-        # system_and_3 5m layout (see _anthropic_prompt_cache_policy
-        # tests above), but it must NOT ride the prefix_and_2 1h layout.
-        # Alibaba DashScope (the upstream for every Qwen route, incl.
-        # Portal -> OpenRouter -> Alibaba) only supports a single
-        # ``ephemeral`` TTL of 5 minutes; ttl="1h" markers are silently
-        # ignored, so the high-value tools[-1] + system-prefix
-        # breakpoints don't land. Stay on system_and_3 instead.
-        agent = _make_agent(
-            provider="nous",
-            base_url="https://inference-api.nousresearch.com/v1",
-            api_mode="chat_completions",
-            model="qwen3.6-plus",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is False
-
-    def test_nous_portal_qwen_vendored_slug_NOT_long_lived(self):
-        agent = _make_agent(
-            provider="nous",
-            base_url="https://inference-api.nousresearch.com/v1",
-            api_mode="chat_completions",
-            model="qwen/qwen3.6-plus",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is False
-
-    def test_nous_portal_non_claude_rejected(self):
-        # Portal long-lived cache scope is now Claude-only. Qwen
-        # rejection is covered by the dedicated tests above; this
-        # covers everything else (gpt, etc.).
-        agent = _make_agent(
-            provider="nous",
-            base_url="https://inference-api.nousresearch.com/v1",
-            api_mode="chat_completions",
-            model="openai/gpt-5.4",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is False
-
-    def test_openrouter_non_claude_rejected(self):
-        agent = _make_agent(
-            provider="openrouter",
-            base_url="https://openrouter.ai/api/v1",
-            api_mode="chat_completions",
-            model="openai/gpt-5.4",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is False
-
-    def test_third_party_anthropic_gateway_rejected(self):
-        # MiniMax / Kimi / etc. — anthropic-wire but not in our validated list
-        agent = _make_agent(
-            provider="minimax",
-            base_url="https://api.minimax.io/anthropic",
-            api_mode="anthropic_messages",
-            model="minimax-m2.7",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is False
-
-    def test_alibaba_dashscope_rejected(self):
-        agent = _make_agent(
-            provider="alibaba",
-            base_url="https://dashscope.aliyuncs.com/api/v1/anthropic",
-            api_mode="anthropic_messages",
-            model="qwen3.5-plus",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is False
-
-    def test_opencode_qwen_rejected(self):
-        agent = _make_agent(
-            provider="opencode-go",
-            base_url="https://api.opencode-go.example/v1",
-            api_mode="chat_completions",
-            model="qwen3.6-plus",
-        )
-        assert agent._supports_long_lived_anthropic_cache() is False
-
-    def test_fallback_target_evaluated_independently(self):
-        # Starting on a non-supported provider, falling back to OpenRouter Claude
-        agent = _make_agent(
-            provider="minimax",
-            base_url="https://api.minimax.io/anthropic",
-            api_mode="anthropic_messages",
-            model="minimax-m2.7",
-        )
-        assert agent._supports_long_lived_anthropic_cache(
-            provider="openrouter",
-            base_url="https://openrouter.ai/api/v1",
-            api_mode="chat_completions",
-            model="anthropic/claude-sonnet-4.6",
-        ) is True
diff --git a/tests/test_ctx_halving_fix.py b/tests/test_ctx_halving_fix.py
index afeee84878..0dd3ca4e7e 100644
--- a/tests/test_ctx_halving_fix.py
+++ b/tests/test_ctx_halving_fix.py
@@ -169,7 +169,6 @@ class TestEphemeralMaxOutputTokens:
         agent.reasoning_config = None
         agent._is_anthropic_oauth = False
         agent._ephemeral_max_output_tokens = None
-        agent._use_long_lived_prefix_cache = False
 
         compressor = MagicMock()
         compressor.context_length = 200_000

From 486b692ddd801f8f665d3fff023149fb1cb6509e Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 20:49:20 -0700
Subject: [PATCH 19/22] feat(nous): unified client=hermes-client-v<version> tag
 on every Portal request (#24779)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(nous): unified client=hermes-client-v<version> tag on every Portal request

Every Hermes request to Nous Portal now carries the same
client=hermes-client-v<__version__> tag (e.g. client=hermes-client-v0.13.0
on this release), sourced live from hermes_cli.__version__. The release
script's regex bump auto-aligns it on every release.

Centralized in agent/portal_tags.py and wired into all four call sites:
- NousProfile.build_extra_body (main agent loop, every chat completion)
- auxiliary_client.NOUS_EXTRA_BODY + _build_call_kwargs (aux client)
- run_agent.py compression-summary fallback path
- tools/web_tools.py web_extract fallback

Replaces the client=aux marker added in #24194 with the unified version
tag. Tests assert against the helper output (invariant) rather than the
literal string, so they don't need updating on every release.

* feat(nous): cover /goal judge and kanban specify aux paths

Two aux-using surfaces bypassed call_llm by invoking
client.chat.completions.create() directly without extra_body, so they
were missing the unified Portal client tag:

- hermes_cli/goals.py — /goal standing-goal judge
- hermes_cli/kanban_specify.py — kanban triage specifier

Both now pass extra_body=get_auxiliary_extra_body() or None so they
inherit the version tag when the aux client points at Nous Portal, and
emit nothing otherwise (no tag leak to OpenRouter/Anthropic auxes).
---
 agent/auxiliary_client.py                     | 27 +++++++-
 agent/portal_tags.py                          | 64 +++++++++++++++++++
 hermes_cli/goals.py                           |  3 +-
 hermes_cli/kanban_specify.py                  |  3 +-
 plugins/model-providers/nous/__init__.py      |  3 +-
 run_agent.py                                  |  3 +-
 tests/agent/test_portal_tags.py               | 61 ++++++++++++++++++
 .../agent/transports/test_chat_completions.py |  3 +-
 tests/providers/test_profile_wiring.py        |  3 +-
 tests/providers/test_provider_profiles.py     |  3 +-
 tests/providers/test_transport_parity.py      |  3 +-
 tests/run_agent/test_provider_parity.py       |  3 +-
 tools/web_tools.py                            |  3 +-
 13 files changed, 169 insertions(+), 13 deletions(-)
 create mode 100644 agent/portal_tags.py
 create mode 100644 tests/agent/test_portal_tags.py

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 377e4ba22e..de7b6db2b1 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -382,7 +382,28 @@ _AI_GATEWAY_HEADERS = {
 # Nous Portal extra_body for product attribution.
 # Callers should pass this as extra_body in chat.completions.create()
 # when the auxiliary client is backed by Nous Portal.
-NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent", "client=aux"]}
+#
+# The tags are computed from agent.portal_tags so the client= marker stays
+# in lockstep with hermes_cli.__version__ across every Portal call site
+# (main loop, aux, compression, web_extract). Do not inline a literal here;
+# see agent/portal_tags.py for the rationale.
+from agent.portal_tags import nous_portal_tags as _nous_portal_tags
+
+
+def _nous_extra_body() -> dict:
+    """Return a fresh Nous Portal ``extra_body`` dict.
+
+    Computed at call time so a hot-reloaded ``hermes_cli.__version__`` is
+    reflected without restarting long-running processes.
+    """
+    return {"tags": _nous_portal_tags()}
+
+
+# Backwards-compatible module attribute. Some callers (tests, third-party
+# plugins) read ``NOUS_EXTRA_BODY`` directly; keep it as a snapshot of the
+# current tags. Callers that need the freshest value should call
+# ``_nous_extra_body()`` or import ``nous_portal_tags`` directly.
+NOUS_EXTRA_BODY = _nous_extra_body()
 
 # Set at resolve time — True if the auxiliary client points to Nous Portal
 auxiliary_is_nous: bool = False
@@ -3437,7 +3458,7 @@ def get_auxiliary_extra_body() -> dict:
     Includes Nous Portal product tags when the auxiliary client is backed
     by Nous Portal. Returns empty dict otherwise.
     """
-    return dict(NOUS_EXTRA_BODY) if auxiliary_is_nous else {}
+    return _nous_extra_body() if auxiliary_is_nous else {}
 
 
 def auxiliary_max_tokens_param(value: int) -> dict:
@@ -4026,7 +4047,7 @@ def _build_call_kwargs(
     # Provider-specific extra_body
     merged_extra = dict(extra_body or {})
     if provider == "nous" or auxiliary_is_nous:
-        merged_extra.setdefault("tags", []).extend(NOUS_EXTRA_BODY["tags"])
+        merged_extra.setdefault("tags", []).extend(_nous_portal_tags())
     if merged_extra:
         kwargs["extra_body"] = merged_extra
 
diff --git a/agent/portal_tags.py b/agent/portal_tags.py
new file mode 100644
index 0000000000..647c52a076
--- /dev/null
+++ b/agent/portal_tags.py
@@ -0,0 +1,64 @@
+"""Centralized Nous Portal request tags.
+
+Every Hermes request that hits the Nous Portal — main agent loop, auxiliary
+client (compression / titles / vision / web_extract / session_search / etc.),
+and any future code path — must carry the same product-attribution tags so
+Nous can attribute usage to Hermes Agent and bucket it by client release.
+
+Tag shape (sent in OpenAI-compatible ``extra_body['tags']``):
+
+    [
+        "product=hermes-agent",
+        "client=hermes-client-v<__version__>",
+    ]
+
+The version is sourced live from ``hermes_cli.__version__`` so it auto-aligns
+to whatever release is installed; the release script
+(``scripts/release.py``) regex-bumps that single string, and every Portal
+request picks up the new tag on the next process start.
+
+Why one helper instead of inlining the literal at each site:
+* Four call sites (main loop profile, aux client, run_agent compression
+  fallback, web_tools fallback) used to drift apart — see PR #24194 which
+  only got the aux site, leaving the main loop sending a different tag set.
+* Tests should assert the same tag list everywhere; centralizing makes that
+  assertion a one-liner against this module.
+
+Do NOT pre-compute these as module-level constants in the consumers. The
+version can change at runtime (editable installs, hot-reload tooling), and
+``hermes_cli.__version__`` is the canonical source of truth.
+"""
+
+from __future__ import annotations
+
+from typing import List
+
+
+def _hermes_version() -> str:
+    """Return the current Hermes release version, e.g. ``"0.13.0"``.
+
+    Falls back to ``"unknown"`` if ``hermes_cli`` cannot be imported (should
+    never happen in a real install — guarded for defensive testing).
+    """
+    try:
+        from hermes_cli import __version__
+        return __version__
+    except Exception:
+        return "unknown"
+
+
+def hermes_client_tag() -> str:
+    """Return the ``client=...`` tag for Nous Portal requests.
+
+    Format: ``client=hermes-client-v<MAJOR>.<MINOR>.<PATCH>``.
+    """
+    return f"client=hermes-client-v{_hermes_version()}"
+
+
+def nous_portal_tags() -> List[str]:
+    """Return the canonical list of Nous Portal product tags.
+
+    Always returns a fresh list so callers can mutate it freely
+    (e.g. ``merged_extra.setdefault("tags", []).extend(nous_portal_tags())``).
+    """
+    return ["product=hermes-agent", hermes_client_tag()]
diff --git a/hermes_cli/goals.py b/hermes_cli/goals.py
index 9e8742e08a..6a8a2ae971 100644
--- a/hermes_cli/goals.py
+++ b/hermes_cli/goals.py
@@ -307,7 +307,7 @@ def judge_goal(
         return "continue", "empty response (nothing to evaluate)", False
 
     try:
-        from agent.auxiliary_client import get_text_auxiliary_client
+        from agent.auxiliary_client import get_auxiliary_extra_body, get_text_auxiliary_client
     except Exception as exc:
         logger.debug("goal judge: auxiliary client import failed: %s", exc)
         return "continue", "auxiliary client unavailable", False
@@ -336,6 +336,7 @@ def judge_goal(
             temperature=0,
             max_tokens=200,
             timeout=timeout,
+            extra_body=get_auxiliary_extra_body() or None,
         )
     except Exception as exc:
         logger.info("goal judge: API call failed (%s) — falling through to continue", exc)
diff --git a/hermes_cli/kanban_specify.py b/hermes_cli/kanban_specify.py
index d069e5ee1a..0d57fbb250 100644
--- a/hermes_cli/kanban_specify.py
+++ b/hermes_cli/kanban_specify.py
@@ -155,7 +155,7 @@ def specify_task(
         )
 
     try:
-        from agent.auxiliary_client import get_text_auxiliary_client
+        from agent.auxiliary_client import get_auxiliary_extra_body, get_text_auxiliary_client
     except Exception as exc:  # pragma: no cover — import smoke test
         logger.debug("specify: auxiliary client import failed: %s", exc)
         return SpecifyOutcome(task_id, False, "auxiliary client unavailable")
@@ -187,6 +187,7 @@ def specify_task(
             temperature=0.3,
             max_tokens=1500,
             timeout=timeout or 120,
+            extra_body=get_auxiliary_extra_body() or None,
         )
     except Exception as exc:
         logger.info(
diff --git a/plugins/model-providers/nous/__init__.py b/plugins/model-providers/nous/__init__.py
index f89e56c23a..5a61952d74 100644
--- a/plugins/model-providers/nous/__init__.py
+++ b/plugins/model-providers/nous/__init__.py
@@ -2,6 +2,7 @@
 
 from typing import Any
 
+from agent.portal_tags import nous_portal_tags
 from providers import register_provider
 from providers.base import ProviderProfile
 
@@ -12,7 +13,7 @@ class NousProfile(ProviderProfile):
     def build_extra_body(
         self, *, session_id: str | None = None, **context
     ) -> dict[str, Any]:
-        return {"tags": ["product=hermes-agent"]}
+        return {"tags": nous_portal_tags()}
 
     def build_api_kwargs_extras(
         self,
diff --git a/run_agent.py b/run_agent.py
index 1c4c35c96e..f0597c9088 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -11542,7 +11542,8 @@ class AIAgent:
                         "effort": "medium"
                     }
             if _is_nous:
-                summary_extra_body["tags"] = ["product=hermes-agent"]
+                from agent.portal_tags import nous_portal_tags as _portal_tags
+                summary_extra_body["tags"] = _portal_tags()
 
             if self.api_mode == "codex_responses":
                 codex_kwargs = self._build_api_kwargs(api_messages)
diff --git a/tests/agent/test_portal_tags.py b/tests/agent/test_portal_tags.py
new file mode 100644
index 0000000000..7c873ef0f6
--- /dev/null
+++ b/tests/agent/test_portal_tags.py
@@ -0,0 +1,61 @@
+"""Tests for agent.portal_tags — Nous Portal request tag contract."""
+
+from __future__ import annotations
+
+
+def test_hermes_client_tag_includes_current_version():
+    """The client tag must reflect hermes_cli.__version__ verbatim."""
+    from hermes_cli import __version__
+    from agent.portal_tags import hermes_client_tag
+
+    assert hermes_client_tag() == f"client=hermes-client-v{__version__}"
+
+
+def test_hermes_client_tag_format():
+    """The client tag has the exact shape Nous Portal expects."""
+    from agent.portal_tags import hermes_client_tag
+
+    tag = hermes_client_tag()
+    assert tag.startswith("client=hermes-client-v")
+    # No spaces, no commas — single tag value
+    assert " " not in tag
+    assert "," not in tag
+
+
+def test_nous_portal_tags_contains_product_and_client():
+    """Every Nous Portal request gets BOTH the product tag and the version tag."""
+    from agent.portal_tags import hermes_client_tag, nous_portal_tags
+
+    tags = nous_portal_tags()
+    assert "product=hermes-agent" in tags
+    assert hermes_client_tag() in tags
+    assert len(tags) == 2
+
+
+def test_nous_portal_tags_returns_fresh_list():
+    """Callers mutate the returned list; we must not share state across calls."""
+    from agent.portal_tags import nous_portal_tags
+
+    a = nous_portal_tags()
+    a.append("client=test-mutation")
+    b = nous_portal_tags()
+    assert "client=test-mutation" not in b
+
+
+def test_auxiliary_client_nous_extra_body_uses_helper():
+    """auxiliary_client.NOUS_EXTRA_BODY must match the canonical helper output."""
+    from agent.auxiliary_client import NOUS_EXTRA_BODY
+    from agent.portal_tags import nous_portal_tags
+
+    assert NOUS_EXTRA_BODY == {"tags": nous_portal_tags()}
+
+
+def test_nous_provider_profile_uses_helper():
+    """The Nous provider profile (main agent loop) must use the canonical tags."""
+    from agent.portal_tags import nous_portal_tags
+    from providers import get_provider_profile
+
+    profile = get_provider_profile("nous")
+    assert profile is not None
+    body = profile.build_extra_body()
+    assert body["tags"] == nous_portal_tags()
diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py
index 47d402a215..7ed0d4da63 100644
--- a/tests/agent/transports/test_chat_completions.py
+++ b/tests/agent/transports/test_chat_completions.py
@@ -147,11 +147,12 @@ class TestChatCompletionsBuildKwargs:
         ]
 
     def test_nous_tags(self, transport):
+        from agent.portal_tags import nous_portal_tags
         from providers import get_provider_profile
         profile = get_provider_profile("nous")
         msgs = [{"role": "user", "content": "Hi"}]
         kw = transport.build_kwargs(model="gpt-4o", messages=msgs, provider_profile=profile)
-        assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
+        assert kw["extra_body"]["tags"] == nous_portal_tags()
 
     def test_reasoning_default(self, transport):
         msgs = [{"role": "user", "content": "Hi"}]
diff --git a/tests/providers/test_profile_wiring.py b/tests/providers/test_profile_wiring.py
index 9096c82b6a..258ff53180 100644
--- a/tests/providers/test_profile_wiring.py
+++ b/tests/providers/test_profile_wiring.py
@@ -273,12 +273,13 @@ class TestRequestOverridesParity:
 
     def test_extra_body_override_merges_with_provider_body(self, transport):
         """Override extra_body merges WITH provider extra_body, not replaces."""
+        from agent.portal_tags import nous_portal_tags
         kw = transport.build_kwargs(
             model="hermes-3", messages=_msgs(), tools=None,
             provider_profile=get_provider_profile("nous"),
             request_overrides={"extra_body": {"custom": True}},
         )
-        assert kw["extra_body"]["tags"] == ["product=hermes-agent"]  # from profile
+        assert kw["extra_body"]["tags"] == nous_portal_tags()  # from profile
         assert kw["extra_body"]["custom"] is True  # from override
 
     def test_top_level_override(self, transport):
diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py
index 68f7b5f497..c79ed2aea9 100644
--- a/tests/providers/test_provider_profiles.py
+++ b/tests/providers/test_provider_profiles.py
@@ -210,9 +210,10 @@ class TestOpenRouterProfile:
 
 class TestNousProfile:
     def test_tags(self):
+        from agent.portal_tags import nous_portal_tags
         p = get_provider_profile("nous")
         body = p.build_extra_body()
-        assert body["tags"] == ["product=hermes-agent"]
+        assert body["tags"] == nous_portal_tags()
 
     def test_auth_type(self):
         p = get_provider_profile("nous")
diff --git a/tests/providers/test_transport_parity.py b/tests/providers/test_transport_parity.py
index be88bc580a..8c1fb6eb4f 100644
--- a/tests/providers/test_transport_parity.py
+++ b/tests/providers/test_transport_parity.py
@@ -165,13 +165,14 @@ class TestNousParity:
     """Nous: product tags, reasoning, omit when disabled."""
 
     def test_tags(self, transport):
+        from agent.portal_tags import nous_portal_tags
         kw = transport.build_kwargs(
             model="hermes-3-llama-3.1-405b",
             messages=_simple_messages(),
             tools=None,
             provider_profile=get_provider_profile("nous"),
         )
-        assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
+        assert kw["extra_body"]["tags"] == nous_portal_tags()
 
     def test_reasoning_omitted_when_disabled(self, transport):
         """Nous special case: reasoning omitted entirely when disabled."""
diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py
index f97885a038..d3a5a1b37f 100644
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@@ -343,11 +343,12 @@ class TestBuildApiKwargsAIGateway:
 
 class TestBuildApiKwargsNousPortal:
     def test_includes_nous_product_tags(self, monkeypatch):
+        from agent.portal_tags import nous_portal_tags
         agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
         messages = [{"role": "user", "content": "hi"}]
         kwargs = agent._build_api_kwargs(messages)
         extra = kwargs.get("extra_body", {})
-        assert extra.get("tags") == ["product=hermes-agent"]
+        assert extra.get("tags") == nous_portal_tags()
 
     def test_uses_chat_completions_format(self, monkeypatch):
         agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
diff --git a/tools/web_tools.py b/tools/web_tools.py
index b9df0cd3be..79ddc8d27f 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -593,7 +593,8 @@ def _resolve_web_extract_auxiliary(model: Optional[str] = None) -> tuple[Optiona
     extra_body: Dict[str, Any] = {}
     if client is not None and _is_nous_auxiliary_client(client):
         from agent.auxiliary_client import get_auxiliary_extra_body
-        extra_body = get_auxiliary_extra_body() or {"tags": ["product=hermes-agent"]}
+        from agent.portal_tags import nous_portal_tags
+        extra_body = get_auxiliary_extra_body() or {"tags": nous_portal_tags()}
 
     return client, effective_model, extra_body
 

From 1e01b25e76a9258095930c7428c169835fd03059 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 12 May 2026 22:43:41 -0700
Subject: [PATCH 20/22] feat(providers): rename Alibaba Cloud to Qwen Cloud,
 reorder picker (#24835)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename 'Alibaba Cloud (DashScope)' display label to 'Qwen Cloud'
  in CANONICAL_PROVIDERS (model picker, /model, hermes model TUI) and
  PROVIDER_REGISTRY (setup wizard prompts, status output).
- Move Qwen Cloud (alibaba) up to position 6 — directly below
  OpenAI Codex and above Xiaomi MiMo.
- Move Qwen OAuth (Portal) (qwen-oauth) to the bottom of the
  canonical provider list.

Provider slug 'alibaba' is unchanged — only the display label
moved. DashScope env var (DASHSCOPE_API_KEY) and base URL are
unchanged. The separate 'alibaba-coding-plan' plugin provider is
not affected.
---
 hermes_cli/auth.py   | 2 +-
 hermes_cli/models.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 90d6a63935..88acd1cd43 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -284,7 +284,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
     ),
     "alibaba": ProviderConfig(
         id="alibaba",
-        name="Alibaba Cloud (DashScope)",
+        name="Qwen Cloud",
         auth_type="api_key",
         inference_base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
         api_key_env_vars=("DASHSCOPE_API_KEY",),
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 5f355d03b9..eb55b59ee5 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -908,10 +908,10 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
     ProviderEntry("lmstudio",       "LM Studio",                "LM Studio (local desktop app with built-in model server)"),
     ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
     ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
+    ProviderEntry("alibaba",        "Qwen Cloud",               "Qwen Cloud / DashScope Coding (Qwen + multi-provider)"),
     ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
     ProviderEntry("tencent-tokenhub", "Tencent TokenHub",       "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"),
     ProviderEntry("nvidia",         "NVIDIA NIM",               "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
-    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
     ProviderEntry("copilot",        "GitHub Copilot",           "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
     ProviderEntry("copilot-acp",    "GitHub Copilot ACP",       "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
     ProviderEntry("huggingface",    "Hugging Face",             "Hugging Face Inference Providers (20+ open models)"),
@@ -926,7 +926,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
     ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
     ProviderEntry("minimax-oauth",  "MiniMax (OAuth)",          "MiniMax via OAuth browser login (Coding Plan, minimax.io)"),
     ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
-    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
     ProviderEntry("ollama-cloud",   "Ollama Cloud",             "Ollama Cloud (cloud-hosted open models — ollama.com)"),
     ProviderEntry("arcee",          "Arcee AI",                 "Arcee AI (Trinity models — direct API)"),
     ProviderEntry("gmi",            "GMI Cloud",                "GMI Cloud (multi-model direct API)"),
@@ -936,6 +935,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
     ProviderEntry("bedrock",        "AWS Bedrock",              "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
     ProviderEntry("azure-foundry",  "Azure Foundry",            "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
     ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway"),
+    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
 ]
 
 # Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/

From 942adf617910f50a39f41bd200d8083bf4cb2bed Mon Sep 17 00:00:00 2001
From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com>
Date: Wed, 13 May 2026 11:55:07 +0530
Subject: [PATCH 21/22] fix(docker): chown .venv to hermes so lazy_deps can
 install platform packages (#24841)

The Dockerfile permissions section made /opt/hermes/.venv readable but not
writable by the hermes runtime user.  Since the 2026-05-12 policy change
moved messaging packages (discord.py, telegram, slack, etc.) out of [all]
and into lazy_deps.py, the Docker image no longer ships with them
pre-installed.  At first gateway boot, lazy_deps.ensure() tries to
`uv pip install` them into the venv but fails with EACCES because
site-packages is root-owned.

The result: every messaging platform adapter silently fails to load inside
Docker containers, producing only a cryptic "discord.py not installed"
warning despite the gateway being correctly configured.

Two-part fix:

1. Dockerfile: add /opt/hermes/.venv to the existing chown -R hermes:hermes
   line so the default (UID 10000) case works out of the box.

2. docker/entrypoint.sh: extend the needs_chown block to also re-chown the
   .venv when HERMES_UID is remapped. Without this, the build-time chown
   becomes stale when someone uses the documented HERMES_UID override in
   docker-compose.yml.

Fixes #21536
Related: #17674, #21543, #21755
---
 Dockerfile           | 6 +++++-
 docker/entrypoint.sh | 4 ++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index ee2c491c06..8655c51f34 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -94,9 +94,13 @@ RUN cd web && npm run build && \
 # hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time
 # only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally
 # not chowned here.
+# The .venv MUST be hermes-writable so lazy_deps.py can install platform
+# packages (discord.py, telegram, slack, etc.) at first gateway boot.
+# Without this, `uv pip install` fails with EACCES and all messaging
+# adapters silently fail to load.  See tools/lazy_deps.py.
 USER root
 RUN chmod -R a+rX /opt/hermes && \
-    chown -R hermes:hermes /opt/hermes/ui-tui /opt/hermes/node_modules
+    chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/node_modules
 # Start as root so the entrypoint can usermod/groupmod + gosu.
 # If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).
 
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
index 288ae2614b..09e870543a 100755
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -39,6 +39,10 @@ if [ "$(id -u)" = "0" ]; then
         # by the mapped user on the host side.
         chown -R hermes:hermes "$HERMES_HOME" 2>/dev/null || \
             echo "Warning: chown failed (rootless container?) — continuing anyway"
+        # The .venv must also be re-chowned when UID is remapped, otherwise
+        # lazy_deps.py cannot install platform packages (discord.py, etc.).
+        chown -R hermes:hermes "$INSTALL_DIR/.venv" 2>/dev/null || \
+            echo "Warning: chown .venv failed (rootless container?) — continuing anyway"
     fi
 
     # Ensure config.yaml is readable by the hermes runtime user even if it was

From ca2c3d4ab4ab3e5334386db3a79fa7ed449bda48 Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Wed, 13 May 2026 09:19:04 -0400
Subject: [PATCH 22/22] =?UTF-8?q?feat(desktop):=20composer=20queue=20?=
 =?UTF-8?q?=E2=80=94=20queue=20many,=20edit/delete/cancel-edit,=20Cursor-s?=
 =?UTF-8?q?tyle?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Press Enter while busy with a draft to queue it; with no draft to interrupt
and send the next queued turn. Auto-drains one queued turn each time the
session settles, same as Cursor. Queue persists across reloads so an
interrupted-and-queued turn isn't lost on refresh.

Each queued row supports edit-in-composer (with explicit Save/Cancel),
send-now (↑), and delete. Drain skips only the entry currently being
edited so the rest of the queue keeps flowing.

Queue dequeue is transactional — an entry only leaves the queue after
`prompt.submit` is accepted, so a rejected submit doesn't drop the turn.

Also shrinks the `[interrupted]` marker to a muted one-liner and drops
its assistant footer so it stops looking like a real reply.
---
 .../src/app/chat/composer/controls.tsx        |  17 +-
 apps/desktop/src/app/chat/composer/index.tsx  | 231 +++++++++++++++++-
 .../src/app/chat/composer/queue-panel.tsx     | 123 ++++++++++
 apps/desktop/src/app/chat/composer/types.ts   |   9 +-
 apps/desktop/src/app/chat/index.tsx           |   9 +-
 apps/desktop/src/app/desktop-controller.tsx   |   2 +-
 .../app/session/hooks/use-prompt-actions.ts   |  61 +++--
 .../app/session/hooks/use-session-actions.ts  |   6 +
 .../src/components/assistant-ui/thread.tsx    |  12 +-
 apps/desktop/src/store/composer-queue.test.ts | 102 ++++++++
 apps/desktop/src/store/composer-queue.ts      | 158 ++++++++++++
 11 files changed, 695 insertions(+), 35 deletions(-)
 create mode 100644 apps/desktop/src/app/chat/composer/queue-panel.tsx
 create mode 100644 apps/desktop/src/store/composer-queue.test.ts
 create mode 100644 apps/desktop/src/store/composer-queue.ts

diff --git a/apps/desktop/src/app/chat/composer/controls.tsx b/apps/desktop/src/app/chat/composer/controls.tsx
index 010c6d67fc..7fa9255a9e 100644
--- a/apps/desktop/src/app/chat/composer/controls.tsx
+++ b/apps/desktop/src/app/chat/composer/controls.tsx
@@ -1,6 +1,6 @@
 import { Button } from '@/components/ui/button'
 import { triggerHaptic } from '@/lib/haptics'
-import { ArrowUp, AudioLines, Loader2, Mic, MicOff, Square } from '@/lib/icons'
+import { ArrowUp, AudioLines, Layers3, Loader2, Mic, MicOff, Square } from '@/lib/icons'
 import { cn } from '@/lib/utils'
 
 import type { ConversationStatus } from './hooks/use-voice-conversation'
@@ -31,6 +31,7 @@ interface ConversationProps {
 
 export function ComposerControls({
   busy,
+  busyAction,
   canSubmit,
   conversation,
   disabled,
@@ -40,6 +41,7 @@ export function ComposerControls({
   onDictate
 }: {
   busy: boolean
+  busyAction: 'queue' | 'stop'
   canSubmit: boolean
   conversation: ConversationProps
   disabled: boolean
@@ -74,12 +76,21 @@ export function ComposerControls({
         </Button>
       ) : (
         <Button
-          aria-label={busy ? 'Stop' : 'Send'}
+          aria-label={busy ? (busyAction === 'queue' ? 'Queue message' : 'Stop') : 'Send'}
           className={PRIMARY_ICON_BTN}
           disabled={disabled || !canSubmit}
+          title={busy ? (busyAction === 'queue' ? 'Queue message' : 'Stop') : 'Send'}
           type="submit"
         >
-          {busy ? <span className="block size-3 rounded-[0.1875rem] bg-current" /> : <ArrowUp size={18} />}
+          {busy ? (
+            busyAction === 'queue' ? (
+              <Layers3 size={16} />
+            ) : (
+              <span className="block size-3 rounded-[0.1875rem] bg-current" />
+            )
+          ) : (
+            <ArrowUp size={18} />
+          )}
         </Button>
       )}
     </div>
diff --git a/apps/desktop/src/app/chat/composer/index.tsx b/apps/desktop/src/app/chat/composer/index.tsx
index ace13c58cb..db9935d389 100644
--- a/apps/desktop/src/app/chat/composer/index.tsx
+++ b/apps/desktop/src/app/chat/composer/index.tsx
@@ -13,6 +13,7 @@ import {
 } from 'react'
 
 import { formatRefValue, hermesDirectiveFormatter } from '@/components/assistant-ui/directive-text'
+import { Button } from '@/components/ui/button'
 import { useMediaQuery } from '@/hooks/use-media-query'
 import { useResizeObserver } from '@/hooks/use-resize-observer'
 import { chatMessageText } from '@/lib/chat-messages'
@@ -20,7 +21,19 @@ import { contextPath } from '@/lib/chat-runtime'
 import { DATA_IMAGE_URL_RE } from '@/lib/embedded-images'
 import { triggerHaptic } from '@/lib/haptics'
 import { cn } from '@/lib/utils'
-import { $composerAttachments, $composerDraft } from '@/store/composer'
+import {
+  $composerAttachments,
+  $composerDraft,
+  clearComposerAttachments,
+  type ComposerAttachment
+} from '@/store/composer'
+import {
+  $queuedPromptsBySession,
+  enqueueQueuedPrompt,
+  removeQueuedPrompt,
+  type QueuedPromptEntry,
+  updateQueuedPrompt
+} from '@/store/composer-queue'
 import { $messages } from '@/store/session'
 import { $threadScrolledUp } from '@/store/thread-scroll'
 
@@ -41,6 +54,7 @@ import {
   renderComposerContents,
   RICH_INPUT_SLOT
 } from './rich-editor'
+import { QueuePanel } from './queue-panel'
 import { SkinSlashPopover } from './skin-slash-popover'
 import { detectTrigger, extractClipboardImageBlobs, textBeforeCaret, type TriggerState } from './text-utils'
 import { ComposerTriggerPopover } from './trigger-popover'
@@ -53,6 +67,15 @@ const COMPOSER_STACK_BREAKPOINT_PX = 320
 const COMPOSER_FADE_BACKGROUND =
   'linear-gradient(to bottom, transparent, color-mix(in srgb, var(--dt-background) 10%, transparent))'
 
+interface QueueEditState {
+  attachments: ComposerAttachment[]
+  draft: string
+  entryId: string
+  sessionKey: string
+}
+
+const cloneAttachments = (attachments: ComposerAttachment[]) => attachments.map(a => ({ ...a }))
+
 export function ChatBar({
   busy,
   cwd,
@@ -60,6 +83,7 @@ export function ChatBar({
   focusKey,
   gateway,
   maxRecordingSeconds = 120,
+  queueSessionKey,
   sessionId,
   state,
   onCancel,
@@ -77,12 +101,17 @@ export function ChatBar({
   const aui = useAui()
   const draft = useAuiState(s => s.composer.text)
   const attachments = useStore($composerAttachments)
+  const queuedPromptsBySession = useStore($queuedPromptsBySession)
   const scrolledUp = useStore($threadScrolledUp)
+  const activeQueueSessionKey = queueSessionKey || sessionId || null
+  const queuedPrompts = activeQueueSessionKey ? (queuedPromptsBySession[activeQueueSessionKey] ?? []) : []
 
   const composerRef = useRef<HTMLFormElement | null>(null)
   const composerSurfaceRef = useRef<HTMLDivElement | null>(null)
   const editorRef = useRef<HTMLDivElement | null>(null)
   const draftRef = useRef(draft)
+  const previousBusyRef = useRef(busy)
+  const drainingQueueRef = useRef(false)
   const urlInputRef = useRef<HTMLInputElement | null>(null)
 
   const [urlOpen, setUrlOpen] = useState(false)
@@ -91,6 +120,7 @@ export function ChatBar({
   const [voiceConversationActive, setVoiceConversationActive] = useState(false)
   const [tight, setTight] = useState(false)
   const [dragActive, setDragActive] = useState(false)
+  const [queueEdit, setQueueEdit] = useState<QueueEditState | null>(null)
   const dragDepthRef = useRef(0)
   const lastSpokenIdRef = useRef<string | null>(null)
 
@@ -102,6 +132,8 @@ export function ChatBar({
   const stacked = expanded || narrow || tight
   const hasComposerPayload = draft.trim().length > 0 || attachments.length > 0
   const canSubmit = busy || hasComposerPayload
+  const editingQueuedPrompt = queueEdit ? queuedPrompts.find(entry => entry.id === queueEdit.entryId) ?? null : null
+  const busyAction = busy && hasComposerPayload ? 'queue' : 'stop'
   const showHelpHint = draft === '?'
 
   const placeholder = disabled ? 'Starting Hermes…' : 'Ask anything'
@@ -463,6 +495,14 @@ export function ChatBar({
   }
 
   const handleEditorKeyDown = (event: KeyboardEvent<HTMLDivElement>) => {
+    if ((event.metaKey || event.ctrlKey) && !event.altKey && !event.shiftKey && event.key.toLowerCase() === 'k') {
+      event.preventDefault()
+
+      if (!busy) void drainNextQueued()
+
+      return
+    }
+
     if (trigger && triggerItems.length > 0) {
       if (event.key === 'ArrowDown') {
         event.preventDefault()
@@ -499,6 +539,13 @@ export function ChatBar({
 
     if (event.key === 'Enter' && !event.shiftKey) {
       event.preventDefault()
+
+      if (!busy && !hasComposerPayload && queuedPrompts.length > 0) {
+        void drainNextQueued()
+
+        return
+      }
+
       submitDraft()
     }
   }
@@ -635,10 +682,147 @@ export function ChatBar({
     }
   }
 
-  const submitDraft = () => {
-    if (busy) {
+  const loadIntoComposer = (text: string, attachments: ComposerAttachment[]) => {
+    draftRef.current = text
+    aui.composer().setText(text)
+    $composerAttachments.set(cloneAttachments(attachments))
+
+    const editor = editorRef.current
+
+    if (editor) {
+      renderComposerContents(editor, text)
+      placeCaretEnd(editor)
+    }
+  }
+
+  const beginQueuedEdit = (entry: QueuedPromptEntry) => {
+    if (!activeQueueSessionKey || queueEdit) return
+
+    setQueueEdit({
+      attachments: cloneAttachments($composerAttachments.get()),
+      draft: draftRef.current,
+      entryId: entry.id,
+      sessionKey: activeQueueSessionKey
+    })
+    loadIntoComposer(entry.text, entry.attachments)
+    triggerHaptic('selection')
+    focusInput()
+  }
+
+  const exitQueuedEdit = (action: 'cancel' | 'save'): boolean => {
+    if (!queueEdit) return false
+
+    if (action === 'save') {
+      const text = draftRef.current
+      const next = cloneAttachments($composerAttachments.get())
+
+      if (!text.trim() && next.length === 0) return false
+
+      const saved = updateQueuedPrompt(queueEdit.sessionKey, queueEdit.entryId, { attachments: next, text })
+      triggerHaptic(saved ? 'success' : 'selection')
+    } else {
       triggerHaptic('cancel')
-      onCancel()
+    }
+
+    loadIntoComposer(queueEdit.draft, queueEdit.attachments)
+    setQueueEdit(null)
+    focusInput()
+
+    return true
+  }
+
+  const queueCurrentDraft = useCallback(() => {
+    if (!activeQueueSessionKey || (!draft.trim() && attachments.length === 0)) return false
+    if (!enqueueQueuedPrompt(activeQueueSessionKey, { text: draft, attachments })) return false
+
+    clearDraft()
+    clearComposerAttachments()
+    triggerHaptic('selection')
+
+    return true
+  }, [activeQueueSessionKey, attachments, draft])
+
+  // All queue drain paths share one lock + send-then-remove sequence.
+  // `pickEntry` lets each caller choose head, by-id, or skip-edited.
+  const runDrain = useCallback(
+    async (pickEntry: (entries: QueuedPromptEntry[]) => QueuedPromptEntry | undefined): Promise<boolean> => {
+      if (drainingQueueRef.current || !activeQueueSessionKey) return false
+
+      const entry = pickEntry(queuedPrompts)
+
+      if (!entry) return false
+
+      drainingQueueRef.current = true
+
+      try {
+        const accepted = await Promise.resolve(onSubmit(entry.text, { attachments: entry.attachments, fromQueue: true }))
+
+        if (accepted === false) return false
+
+        removeQueuedPrompt(activeQueueSessionKey, entry.id)
+
+        return true
+      } finally {
+        drainingQueueRef.current = false
+      }
+    },
+    [activeQueueSessionKey, onSubmit, queuedPrompts]
+  )
+
+  const drainNextQueued = useCallback(
+    () =>
+      runDrain(entries => {
+        const skip = queueEdit?.entryId
+
+        return skip ? entries.find(e => e.id !== skip) : entries[0]
+      }),
+    [queueEdit, runDrain]
+  )
+
+  const sendQueuedNow = useCallback(
+    (id: string) => runDrain(entries => entries.find(e => e.id === id && id !== queueEdit?.entryId)),
+    [queueEdit, runDrain]
+  )
+
+  const interruptAndSendNextQueued = useCallback(async () => {
+    if (queuedPrompts.length === 0) return false
+
+    await Promise.resolve(onCancel())
+
+    return drainNextQueued()
+  }, [drainNextQueued, onCancel, queuedPrompts.length])
+
+  // Auto-drain on busy → false (turn settled).
+  useEffect(() => {
+    const wasBusy = previousBusyRef.current
+    previousBusyRef.current = busy
+
+    if (busy || !wasBusy || queuedPrompts.length === 0) return
+
+    void drainNextQueued()
+  }, [busy, drainNextQueued, queuedPrompts.length])
+
+  // Clean up queue edit when its target disappears (session swap or external delete).
+  useEffect(() => {
+    if (!queueEdit) return
+    if (queueEdit.sessionKey === activeQueueSessionKey && editingQueuedPrompt) return
+
+    loadIntoComposer(queueEdit.draft, queueEdit.attachments)
+    setQueueEdit(null)
+  }, [activeQueueSessionKey, editingQueuedPrompt, queueEdit]) // eslint-disable-line react-hooks/exhaustive-deps
+
+  const submitDraft = () => {
+    if (queueEdit) {
+      exitQueuedEdit('save')
+    } else if (busy) {
+      if (hasComposerPayload) queueCurrentDraft()
+      else if (queuedPrompts.length > 0) void interruptAndSendNextQueued()
+      else {
+        triggerHaptic('cancel')
+        void Promise.resolve(onCancel())
+      }
+    } else if (!hasComposerPayload && queuedPrompts.length > 0) {
+      void drainNextQueued()
     } else if (draft.trim() || attachments.length > 0) {
       const submitted = draft
       triggerHaptic('submit')
@@ -742,6 +926,7 @@ export function ChatBar({
   const controls = (
     <ComposerControls
       busy={busy}
+      busyAction={busyAction}
       canSubmit={canSubmit}
       conversation={{
         active: voiceConversationActive,
@@ -824,6 +1009,22 @@ export function ChatBar({
             />
           )}
           <SkinSlashPopover draft={draft} onSelect={selectSkinSlashCommand} />
+          {activeQueueSessionKey && queuedPrompts.length > 0 && (
+            <div className="relative z-6 mb-1 px-0.5">
+              <QueuePanel
+                busy={busy}
+                editingId={queueEdit?.entryId ?? null}
+                entries={queuedPrompts}
+                onDelete={id => {
+                  if (removeQueuedPrompt(activeQueueSessionKey, id) && queueEdit?.entryId === id) {
+                    exitQueuedEdit('cancel')
+                  }
+                }}
+                onEdit={beginQueuedEdit}
+                onSendNow={id => void sendQueuedNow(id)}
+              />
+            </div>
+          )}
           <div
             className="pointer-events-none absolute inset-0 rounded-[inherit]"
             style={{ background: COMPOSER_FADE_BACKGROUND }}
@@ -871,6 +1072,28 @@ export function ChatBar({
               >
                 <VoiceActivity state={voiceActivityState} />
                 <VoicePlaybackActivity />
+                {queueEdit && editingQueuedPrompt && (
+                  <div className="flex items-center justify-between gap-2 rounded-lg border border-[color-mix(in_srgb,var(--dt-composer-ring)_32%,transparent)] bg-accent/18 px-2 py-1">
+                    <div className="min-w-0 text-[0.7rem] text-muted-foreground/88">Editing queued turn in composer</div>
+                    <div className="flex shrink-0 items-center gap-1">
+                      <Button
+                        className="h-6 rounded-md px-2 text-[0.68rem]"
+                        onClick={() => exitQueuedEdit('cancel')}
+                        type="button"
+                        variant="ghost"
+                      >
+                        Cancel
+                      </Button>
+                      <Button
+                        className="h-6 rounded-md px-2 text-[0.68rem]"
+                        onClick={() => exitQueuedEdit('save')}
+                        type="button"
+                      >
+                        Save
+                      </Button>
+                    </div>
+                  </div>
+                )}
                 {attachments.length > 0 && <AttachmentList attachments={attachments} onRemove={onRemoveAttachment} />}
                 <div
                   className={cn(
diff --git a/apps/desktop/src/app/chat/composer/queue-panel.tsx b/apps/desktop/src/app/chat/composer/queue-panel.tsx
new file mode 100644
index 0000000000..e0c8dc88e2
--- /dev/null
+++ b/apps/desktop/src/app/chat/composer/queue-panel.tsx
@@ -0,0 +1,123 @@
+import { useState } from 'react'
+
+import { Button } from '@/components/ui/button'
+import { ArrowUp, ChevronDown, Pencil, Trash2 } from '@/lib/icons'
+import { cn } from '@/lib/utils'
+import type { QueuedPromptEntry } from '@/store/composer-queue'
+
+interface QueuePanelProps {
+  busy: boolean
+  editingId: null | string
+  entries: QueuedPromptEntry[]
+  onDelete: (id: string) => void
+  onEdit: (entry: QueuedPromptEntry) => void
+  onSendNow: (id: string) => void
+}
+
+const entryPreview = (entry: QueuedPromptEntry) =>
+  entry.text.trim() || (entry.attachments.length > 0 ? 'Attachment-only turn' : 'Empty turn')
+
+export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendNow }: QueuePanelProps) {
+  const [collapsed, setCollapsed] = useState(false)
+
+  if (entries.length === 0) return null
+
+  return (
+    <div className="rounded-2xl border border-border/65 bg-[color-mix(in_srgb,var(--dt-card)_70%,transparent)] py-0.5 shadow-[0_0_0_1px_color-mix(in_srgb,var(--dt-card)_30%,transparent)_inset]">
+      <button
+        className="flex w-full items-center gap-1.5 px-2.5 py-1 text-left text-[0.72rem] font-medium text-muted-foreground/92 transition-colors hover:text-foreground/90"
+        onClick={() => setCollapsed(open => !open)}
+        type="button"
+      >
+        <ChevronDown className={cn('shrink-0 transition-transform', collapsed && '-rotate-90')} size={14} />
+        <span className="truncate">{entries.length} Queued</span>
+      </button>
+
+      {!collapsed && (
+        <div className="space-y-0.5 px-1.5 pb-0.5">
+          {entries.map(entry => {
+            const isEditing = editingId === entry.id
+            const attachmentsCount = entry.attachments.length
+
+            return (
+              <div
+                className={cn(
+                  'group/queue-row flex items-center gap-1.5 rounded-lg border border-transparent px-1.5 py-1',
+                  'transition-colors duration-300 ease-out hover:bg-(--chrome-action-hover) hover:transition-none',
+                  isEditing && 'border-[color-mix(in_srgb,var(--dt-composer-ring)_40%,transparent)] bg-accent/25'
+                )}
+                key={entry.id}
+              >
+                <span
+                  aria-hidden
+                  className="h-3.5 w-3.5 shrink-0 rounded-full border border-foreground/35 bg-transparent"
+                />
+                <div className="min-w-0 flex-1">
+                  <p className="truncate text-[0.73rem] leading-4 text-foreground/92">{entryPreview(entry)}</p>
+                  {(attachmentsCount > 0 || isEditing) && (
+                    <div className="mt-0.5 flex items-center gap-1.5 text-[0.64rem] text-muted-foreground/75">
+                      {attachmentsCount > 0 && (
+                        <span>
+                          {attachmentsCount} attachment{attachmentsCount === 1 ? '' : 's'}
+                        </span>
+                      )}
+                      {isEditing && (
+                        <span className="text-[color-mix(in_srgb,var(--dt-composer-ring)_78%,var(--muted-foreground))]">
+                          Editing in composer
+                        </span>
+                      )}
+                    </div>
+                  )}
+                </div>
+                <div
+                  className={cn(
+                    'flex shrink-0 items-center gap-0 transition-opacity',
+                    isEditing
+                      ? 'opacity-100'
+                      : 'opacity-0 group-hover/queue-row:opacity-100 group-focus-within/queue-row:opacity-100'
+                  )}
+                >
+                  <Button
+                    aria-label="Edit queued turn"
+                    className="h-5 w-5 rounded-md"
+                    disabled={Boolean(editingId) && !isEditing}
+                    onClick={() => onEdit(entry)}
+                    size="icon-xs"
+                    title="Edit queued turn"
+                    type="button"
+                    variant="ghost"
+                  >
+                    <Pencil size={11} />
+                  </Button>
+                  <Button
+                    aria-label="Send queued turn now"
+                    className="h-5 w-5 rounded-md"
+                    disabled={busy || isEditing}
+                    onClick={() => onSendNow(entry.id)}
+                    size="icon-xs"
+                    title="Send queued turn now"
+                    type="button"
+                    variant="ghost"
+                  >
+                    <ArrowUp size={11} />
+                  </Button>
+                  <Button
+                    aria-label="Delete queued turn"
+                    className="h-5 w-5 rounded-md"
+                    onClick={() => onDelete(entry.id)}
+                    size="icon-xs"
+                    title="Delete queued turn"
+                    type="button"
+                    variant="ghost"
+                  >
+                    <Trash2 size={11} />
+                  </Button>
+                </div>
+              </div>
+            )
+          })}
+        </div>
+      )}
+    </div>
+  )
+}
diff --git a/apps/desktop/src/app/chat/composer/types.ts b/apps/desktop/src/app/chat/composer/types.ts
index 71c601e396..524667e95f 100644
--- a/apps/desktop/src/app/chat/composer/types.ts
+++ b/apps/desktop/src/app/chat/composer/types.ts
@@ -1,4 +1,5 @@
 import type { HermesGateway } from '@/hermes'
+import type { ComposerAttachment } from '@/store/composer'
 
 import type { DroppedFile } from '../hooks/use-composer-actions'
 
@@ -33,9 +34,10 @@ export interface ChatBarProps {
   maxRecordingSeconds?: number
   state: ChatBarState
   gateway?: HermesGateway | null
+  queueSessionKey?: string | null
   sessionId?: string | null
   cwd?: string | null
-  onCancel: () => void
+  onCancel: () => Promise<void> | void
   onAddContextRef?: (refText: string, label?: string, detail?: string) => void
   onAddUrl?: (url: string) => void
   onAttachImageBlob?: (blob: Blob) => Promise<boolean | void> | boolean | void
@@ -45,7 +47,10 @@ export interface ChatBarProps {
   onPickFolders?: () => void
   onPickImages?: () => void
   onRemoveAttachment?: (id: string) => void
-  onSubmit: (value: string) => Promise<void> | void
+  onSubmit: (
+    value: string,
+    options?: { attachments?: ComposerAttachment[]; fromQueue?: boolean }
+  ) => Promise<boolean> | boolean
   onTranscribeAudio?: (audio: Blob) => Promise<string>
 }
 
diff --git a/apps/desktop/src/app/chat/index.tsx b/apps/desktop/src/app/chat/index.tsx
index 0afed13a1a..8786b7bb2a 100644
--- a/apps/desktop/src/app/chat/index.tsx
+++ b/apps/desktop/src/app/chat/index.tsx
@@ -20,6 +20,7 @@ import { ChevronDown } from '@/lib/icons'
 import { useIncrementalExternalStoreRuntime } from '@/lib/incremental-external-store-runtime'
 import { cn } from '@/lib/utils'
 import { $pinnedSessionIds } from '@/store/layout'
+import type { ComposerAttachment } from '@/store/composer'
 import {
   $activeSessionId,
   $awaitingResponse,
@@ -51,7 +52,7 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
   gateway: HermesGateway | null
   onToggleSelectedPin: () => void
   onDeleteSelectedSession: () => void
-  onCancel: () => void
+  onCancel: () => Promise<void> | void
   onAddContextRef: (refText: string, label?: string, detail?: string) => void
   onAddUrl: (url: string) => void
   onBranchInNewChat: (messageId: string) => void
@@ -63,7 +64,10 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
   onPickFolders: () => void
   onPickImages: () => void
   onRemoveAttachment: (id: string) => void
-  onSubmit: (text: string) => Promise<void> | void
+  onSubmit: (
+    text: string,
+    options?: { attachments?: ComposerAttachment[]; fromQueue?: boolean }
+  ) => Promise<boolean> | boolean
   onThreadMessagesChange: (messages: readonly ThreadMessage[]) => void
   onEdit: (message: AppendMessage) => Promise<void>
   onReload: (parentId: string | null) => Promise<void>
@@ -311,6 +315,7 @@ export function ChatView({
                 onRemoveAttachment={onRemoveAttachment}
                 onSubmit={onSubmit}
                 onTranscribeAudio={onTranscribeAudio}
+                queueSessionKey={selectedSessionId || activeSessionId}
                 sessionId={activeSessionId}
                 state={chatBarState}
               />
diff --git a/apps/desktop/src/app/desktop-controller.tsx b/apps/desktop/src/app/desktop-controller.tsx
index 3ee05772cb..f1794d844a 100644
--- a/apps/desktop/src/app/desktop-controller.tsx
+++ b/apps/desktop/src/app/desktop-controller.tsx
@@ -470,7 +470,7 @@ export function DesktopController() {
       onAttachDroppedItems={composer.attachDroppedItems}
       onAttachImageBlob={composer.attachImageBlob}
       onBranchInNewChat={messageId => void branchInNewChat(messageId)}
-      onCancel={() => void cancelRun()}
+      onCancel={cancelRun}
       onDeleteSelectedSession={() => {
         if (selectedStoredSessionId) {
           void removeSession(selectedStoredSessionId)
diff --git a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
index bee5f78f09..ebb1e7dd6e 100644
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
@@ -71,6 +71,11 @@ interface PromptActionsOptions {
   ) => ClientSessionState
 }
 
+interface SubmitTextOptions {
+  attachments?: ComposerAttachment[]
+  fromQueue?: boolean
+}
+
 function renderCommandsCatalog(catalog: CommandsCatalogLike): string {
   const desktopCatalog = filterDesktopCommandsCatalog(catalog)
 
@@ -153,7 +158,12 @@ export function usePromptActions({
   )
 
   const syncImageAttachmentsForSubmit = useCallback(
-    async (sessionId: string, attachments: ComposerAttachment[]) => {
+    async (
+      sessionId: string,
+      attachments: ComposerAttachment[],
+      options: { updateComposerAttachments?: boolean } = {}
+    ) => {
+      const updateComposerAttachments = options.updateComposerAttachments ?? true
       const images = attachments.filter(attachment => attachment.kind === 'image' && attachment.path)
 
       for (const attachment of images) {
@@ -173,22 +183,25 @@ export function usePromptActions({
 
         const attachedPath = result.path || attachment.path
 
-        addComposerAttachment({
-          ...attachment,
-          id: attachment.id,
-          label: attachedPath ? pathLabel(attachedPath) : attachment.label,
-          path: attachedPath,
-          attachedSessionId: sessionId
-        })
+        if (updateComposerAttachments) {
+          addComposerAttachment({
+            ...attachment,
+            id: attachment.id,
+            label: attachedPath ? pathLabel(attachedPath) : attachment.label,
+            path: attachedPath,
+            attachedSessionId: sessionId
+          })
+        }
       }
     },
     [requestGateway]
   )
 
   const submitPromptText = useCallback(
-    async (rawText: string) => {
+    async (rawText: string, options?: SubmitTextOptions) => {
       const visibleText = rawText.trim()
-      const attachments = $composerAttachments.get()
+      const usingComposerAttachments = !options?.attachments
+      const attachments = options?.attachments ?? $composerAttachments.get()
       const contextRefs = attachments
         .map(a => a.refText)
         .filter(Boolean)
@@ -200,7 +213,7 @@ export function usePromptActions({
         [contextRefs, visibleText].filter(Boolean).join('\n\n') || (hasImage ? 'What do you see in this image?' : '')
 
       if (!text || busyRef.current) {
-        return
+        return false
       }
 
       const optimisticId = `user-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
@@ -232,7 +245,7 @@ export function usePromptActions({
             awaitingResponse: true,
             pendingBranchGroup: null,
             sawAssistantPayload: false,
-            interrupted: false
+            interrupted: state.interrupted
           }),
           selectedStoredSessionIdRef.current
         )
@@ -278,7 +291,7 @@ export function usePromptActions({
           releaseBusy()
           notifyError(err, 'Session unavailable')
 
-          return
+          return false
         }
 
         if (!sessionId) {
@@ -286,16 +299,21 @@ export function usePromptActions({
           releaseBusy()
           notify({ kind: 'error', title: 'Session unavailable', message: 'Could not create a new session' })
 
-          return
+          return false
         }
 
         seedOptimistic(sessionId)
       }
 
       try {
-        await syncImageAttachmentsForSubmit(sessionId, attachments)
+        await syncImageAttachmentsForSubmit(sessionId, attachments, {
+          updateComposerAttachments: usingComposerAttachments
+        })
         await requestGateway('prompt.submit', { session_id: sessionId, text })
-        clearComposerAttachments()
+
+        if (usingComposerAttachments) clearComposerAttachments()
+
+        return true
       } catch (err) {
         releaseBusy()
         updateSessionState(sessionId, state => ({ ...state, busy: false, awaitingResponse: false }))
@@ -303,10 +321,11 @@ export function usePromptActions({
         if (isProviderSetupError(err)) {
           requestDesktopOnboarding('Add a provider credential before sending your first message.')
 
-          return
+          return false
         }
 
         notifyError(err, 'Prompt failed')
+        return false
       }
     },
     [
@@ -477,18 +496,18 @@ export function usePromptActions({
   )
 
   const submitText = useCallback(
-    async (rawText: string) => {
+    async (rawText: string, options?: SubmitTextOptions) => {
       const visibleText = rawText.trim()
-      const attachments = $composerAttachments.get()
+      const attachments = options?.attachments ?? $composerAttachments.get()
 
       if (!attachments.length && SLASH_COMMAND_RE.test(visibleText)) {
         triggerHaptic('selection')
         await executeSlashCommand(visibleText)
 
-        return
+        return true
       }
 
-      await submitPromptText(rawText)
+      return await submitPromptText(rawText, options)
     },
     [executeSlashCommand, submitPromptText]
   )
diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts
index 926f934e69..f1685de244 100644
--- a/apps/desktop/src/app/session/hooks/use-session-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts
@@ -7,6 +7,7 @@ import { type ChatMessage, chatMessageText, toChatMessages } from '@/lib/chat-me
 import { normalizePersonalityValue } from '@/lib/chat-runtime'
 import { embeddedImageUrls, textWithoutEmbeddedImages } from '@/lib/embedded-images'
 import { clearComposerAttachments, clearComposerDraft } from '@/store/composer'
+import { clearQueuedPrompts } from '@/store/composer-queue'
 import { $pinnedSessionIds } from '@/store/layout'
 import { clearNotifications, notify, notifyError } from '@/store/notifications'
 import { requestDesktopOnboarding } from '@/store/onboarding'
@@ -649,6 +650,11 @@ export function useSessionActions({
         }
 
         await deleteSession(storedSessionId)
+        clearQueuedPrompts(storedSessionId)
+
+        if (closingRuntimeId) {
+          clearQueuedPrompts(closingRuntimeId)
+        }
       } catch (err) {
         if (removed) {
           setSessions(prev => [removed, ...prev])
diff --git a/apps/desktop/src/components/assistant-ui/thread.tsx b/apps/desktop/src/components/assistant-ui/thread.tsx
index 6dfae16e7c..d0a039f0f1 100644
--- a/apps/desktop/src/components/assistant-ui/thread.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread.tsx
@@ -95,6 +95,10 @@ function messageContentText(content: unknown): string {
   return Array.isArray(content) ? content.map(partText).join('').trim() : ''
 }
 
+const INTERRUPTED_ONLY_RE = /^_?\[interrupted\]_?$/i
+
+const isInterruptedOnlyMessage = (text: string) => INTERRUPTED_ONLY_RE.test(text.trim())
+
 function resetStickyState(state: StickyStateFlags) {
   state.escapedFromLock = false
   state.isAtBottom = true
@@ -368,6 +372,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
 
   const messageStatus = useAuiState(s => s.message.status?.type)
   const isPlaceholder = messageStatus === 'running' && content.length === 0
+  const interruptedOnly = useMemo(() => isInterruptedOnlyMessage(messageText), [messageText])
 
   if (isPlaceholder) {
     return null
@@ -380,7 +385,10 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
       data-slot="aui_assistant-message-root"
     >
       <div
-        className="wrap-anywhere min-w-0 max-w-full overflow-hidden text-pretty text-base leading-(--dt-line-height) text-foreground"
+        className={cn(
+          'wrap-anywhere min-w-0 max-w-full overflow-hidden text-pretty text-base leading-(--dt-line-height) text-foreground',
+          interruptedOnly && 'text-[0.8rem] leading-5 text-muted-foreground/82'
+        )}
         data-slot="aui_assistant-message-content"
       >
         {hoistedTodos.length > 0 && <HoistedTodoPanel todos={hoistedTodos} />}
@@ -401,7 +409,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
           </ErrorPrimitive.Root>
         </MessagePrimitive.Error>
       </div>
-      {messageText.trim().length > 0 && (
+      {messageText.trim().length > 0 && !interruptedOnly && (
         <AssistantFooter messageId={messageId} messageText={messageText} onBranchInNewChat={onBranchInNewChat} />
       )}
     </MessagePrimitive.Root>
diff --git a/apps/desktop/src/store/composer-queue.test.ts b/apps/desktop/src/store/composer-queue.test.ts
new file mode 100644
index 0000000000..9f15232aec
--- /dev/null
+++ b/apps/desktop/src/store/composer-queue.test.ts
@@ -0,0 +1,102 @@
+import { beforeEach, describe, expect, it } from 'vitest'
+
+import type { ComposerAttachment } from './composer'
+import {
+  $queuedPromptsBySession,
+  clearQueuedPrompts,
+  dequeueQueuedPrompt,
+  enqueueQueuedPrompt,
+  getQueuedPrompts,
+  removeQueuedPrompt,
+  updateQueuedPrompt,
+  updateQueuedPromptText
+} from './composer-queue'
+
+const SESSION_KEY = 'session-abc'
+const QUEUE_STORAGE_KEY = 'hermes.desktop.composerQueue.v1'
+
+function attachment(id: string, kind: ComposerAttachment['kind'] = 'file'): ComposerAttachment {
+  return {
+    id,
+    kind,
+    label: id,
+    refText: `@file:${id}`
+  }
+}
+
+describe('composer queue store', () => {
+  beforeEach(() => {
+    window.localStorage.removeItem(QUEUE_STORAGE_KEY)
+    $queuedPromptsBySession.set({})
+  })
+
+  it('queues prompts in FIFO order', () => {
+    enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'first' })
+    enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'second' })
+
+    expect(dequeueQueuedPrompt(SESSION_KEY)?.text).toBe('first')
+    expect(dequeueQueuedPrompt(SESSION_KEY)?.text).toBe('second')
+    expect(dequeueQueuedPrompt(SESSION_KEY)).toBeNull()
+  })
+
+  it('clones attachments when queueing', () => {
+    const source = [attachment('a-1')]
+    const queued = enqueueQueuedPrompt(SESSION_KEY, { attachments: source, text: 'check clones' })
+
+    expect(queued).not.toBeNull()
+    expect(getQueuedPrompts(SESSION_KEY)[0]?.attachments[0]).toEqual(source[0])
+    expect(getQueuedPrompts(SESSION_KEY)[0]?.attachments[0]).not.toBe(source[0])
+  })
+
+  it('updates and removes queued entries by id', () => {
+    const first = enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'draft one' })
+    const second = enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'draft two' })
+
+    expect(first).not.toBeNull()
+    expect(second).not.toBeNull()
+
+    expect(updateQueuedPromptText(SESSION_KEY, first!.id, 'draft one edited')).toBe(true)
+    expect(getQueuedPrompts(SESSION_KEY).map(entry => entry.text)).toEqual(['draft one edited', 'draft two'])
+
+    expect(removeQueuedPrompt(SESSION_KEY, first!.id)).toBe(true)
+    expect(getQueuedPrompts(SESSION_KEY).map(entry => entry.text)).toEqual(['draft two'])
+  })
+
+  it('updates queued text and attachment snapshot', () => {
+    const first = enqueueQueuedPrompt(SESSION_KEY, { attachments: [attachment('f-1')], text: 'draft one' })
+    const editedAttachments = [attachment('f-2'), attachment('f-3', 'image')]
+
+    expect(first).not.toBeNull()
+    expect(
+      updateQueuedPrompt(SESSION_KEY, first!.id, {
+        attachments: editedAttachments,
+        text: 'edited text'
+      })
+    ).toBe(true)
+
+    const queue = getQueuedPrompts(SESSION_KEY)
+    expect(queue[0]?.text).toBe('edited text')
+    expect(queue[0]?.attachments).toEqual(editedAttachments)
+    expect(queue[0]?.attachments[0]).not.toBe(editedAttachments[0])
+  })
+
+  it('clears queue state for a session', () => {
+    enqueueQueuedPrompt(SESSION_KEY, { attachments: [attachment('img-1', 'image')], text: 'queued' })
+
+    clearQueuedPrompts(SESSION_KEY)
+
+    expect(getQueuedPrompts(SESSION_KEY)).toEqual([])
+    expect($queuedPromptsBySession.get()[SESSION_KEY]).toBeUndefined()
+    expect(window.localStorage.getItem(QUEUE_STORAGE_KEY)).toBeNull()
+  })
+
+  it('persists queue entries into local storage', () => {
+    enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'persist me' })
+
+    const raw = window.localStorage.getItem(QUEUE_STORAGE_KEY)
+    expect(raw).toBeTruthy()
+
+    const parsed = JSON.parse(String(raw)) as Record<string, { text: string }[]>
+    expect(parsed[SESSION_KEY]?.[0]?.text).toBe('persist me')
+  })
+})
diff --git a/apps/desktop/src/store/composer-queue.ts b/apps/desktop/src/store/composer-queue.ts
new file mode 100644
index 0000000000..d2a3f228ff
--- /dev/null
+++ b/apps/desktop/src/store/composer-queue.ts
@@ -0,0 +1,158 @@
+import { atom } from 'nanostores'
+
+import type { ComposerAttachment } from './composer'
+
+export interface QueuedPromptEntry {
+  id: string
+  text: string
+  attachments: ComposerAttachment[]
+  queuedAt: number
+}
+
+type QueueState = Record<string, QueuedPromptEntry[]>
+
+const STORAGE_KEY = 'hermes.desktop.composerQueue.v1'
+
+const load = (): QueueState => {
+  if (typeof window === 'undefined') return {}
+  try {
+    const raw = window.localStorage.getItem(STORAGE_KEY)
+    const parsed = raw ? JSON.parse(raw) : null
+
+    return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? (parsed as QueueState) : {}
+  } catch {
+    return {}
+  }
+}
+
+const save = (state: QueueState) => {
+  if (typeof window === 'undefined') return
+  try {
+    if (Object.keys(state).length === 0) window.localStorage.removeItem(STORAGE_KEY)
+    else window.localStorage.setItem(STORAGE_KEY, JSON.stringify(state))
+  } catch {
+    // best-effort: storage may be unavailable, queue still works in-memory
+  }
+}
+
+export const $queuedPromptsBySession = atom<QueueState>(load())
+
+const writeSession = (sid: string, queue: QueuedPromptEntry[]) => {
+  const current = $queuedPromptsBySession.get()
+  const next = { ...current }
+
+  if (queue.length === 0) delete next[sid]
+  else next[sid] = queue
+
+  $queuedPromptsBySession.set(next)
+  save(next)
+}
+
+const sidOf = (key: string | null | undefined): null | string => {
+  const trimmed = key?.trim()
+
+  return trimmed ? trimmed : null
+}
+
+const queueFor = (sid: string) => $queuedPromptsBySession.get()[sid] ?? []
+
+const nextId = () => `queued-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
+
+const cloneAttachments = (attachments: ComposerAttachment[]) => attachments.map(a => ({ ...a }))
+
+export const getQueuedPrompts = (key: string | null | undefined): QueuedPromptEntry[] => {
+  const sid = sidOf(key)
+
+  return sid ? queueFor(sid) : []
+}
+
+export const enqueueQueuedPrompt = (
+  key: string | null | undefined,
+  payload: { text: string; attachments: ComposerAttachment[] }
+): null | QueuedPromptEntry => {
+  const sid = sidOf(key)
+
+  if (!sid) return null
+
+  const entry: QueuedPromptEntry = {
+    id: nextId(),
+    text: payload.text,
+    attachments: cloneAttachments(payload.attachments),
+    queuedAt: Date.now()
+  }
+
+  writeSession(sid, [...queueFor(sid), entry])
+
+  return entry
+}
+
+export const dequeueQueuedPrompt = (key: string | null | undefined): null | QueuedPromptEntry => {
+  const sid = sidOf(key)
+
+  if (!sid) return null
+
+  const [head, ...rest] = queueFor(sid)
+
+  if (!head) return null
+
+  writeSession(sid, rest)
+
+  return head
+}
+
+export const removeQueuedPrompt = (key: string | null | undefined, id: string): boolean => {
+  const sid = sidOf(key)
+
+  if (!sid) return false
+
+  const queue = queueFor(sid)
+  const next = queue.filter(e => e.id !== id)
+
+  if (next.length === queue.length) return false
+
+  writeSession(sid, next)
+
+  return true
+}
+
+export const updateQueuedPrompt = (
+  key: string | null | undefined,
+  id: string,
+  update: { text: string; attachments?: ComposerAttachment[] }
+): boolean => {
+  const sid = sidOf(key)
+
+  if (!sid) return false
+
+  const queue = queueFor(sid)
+  let changed = false
+
+  const next = queue.map(entry => {
+    if (entry.id !== id) return entry
+
+    const attachments = update.attachments ? cloneAttachments(update.attachments) : entry.attachments
+
+    if (entry.text === update.text && !update.attachments) return entry
+
+    changed = true
+
+    return { ...entry, text: update.text, attachments }
+  })
+
+  if (!changed) return false
+
+  writeSession(sid, next)
+
+  return true
+}
+
+export const updateQueuedPromptText = (key: string | null | undefined, id: string, text: string): boolean =>
+  updateQueuedPrompt(key, id, { text })
+
+export const clearQueuedPrompts = (key: string | null | undefined) => {
+  const sid = sidOf(key)
+
+  if (!sid || !(sid in $queuedPromptsBySession.get())) return
+
+  writeSession(sid, [])
+}