diff --git a/Dockerfile b/Dockerfile index ee2c491c06..8655c51f34 100644 --- a/Dockerfile +++ b/Dockerfile @@ -94,9 +94,13 @@ RUN cd web && npm run build && \ # hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time # only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally # not chowned here. +# The .venv MUST be hermes-writable so lazy_deps.py can install platform +# packages (discord.py, telegram, slack, etc.) at first gateway boot. +# Without this, `uv pip install` fails with EACCES and all messaging +# adapters silently fail to load. See tools/lazy_deps.py. USER root RUN chmod -R a+rX /opt/hermes && \ - chown -R hermes:hermes /opt/hermes/ui-tui /opt/hermes/node_modules + chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/node_modules # Start as root so the entrypoint can usermod/groupmod + gosu. # If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000). diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 3919c8565b..4b1134a4c0 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -1305,9 +1305,8 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]: ), } # Forward cache_control marker when present on the OpenAI-format - # tool dict (set by ``mark_tools_for_long_lived_cache``). Anthropic's - # tools array supports cache_control on the last tool to cache the - # entire schema cross-session. + # tool dict. Anthropic's tools array supports cache_control on the + # last tool to cache the entire schema cross-session. cache_control = t.get("cache_control") if isinstance(cache_control, dict): anthropic_tool["cache_control"] = dict(cache_control) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 377e4ba22e..de7b6db2b1 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -382,7 +382,28 @@ _AI_GATEWAY_HEADERS = { # Nous Portal extra_body for product attribution. # Callers should pass this as extra_body in chat.completions.create() # when the auxiliary client is backed by Nous Portal. -NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent", "client=aux"]} +# +# The tags are computed from agent.portal_tags so the client= marker stays +# in lockstep with hermes_cli.__version__ across every Portal call site +# (main loop, aux, compression, web_extract). Do not inline a literal here; +# see agent/portal_tags.py for the rationale. +from agent.portal_tags import nous_portal_tags as _nous_portal_tags + + +def _nous_extra_body() -> dict: + """Return a fresh Nous Portal ``extra_body`` dict. + + Computed at call time so a hot-reloaded ``hermes_cli.__version__`` is + reflected without restarting long-running processes. + """ + return {"tags": _nous_portal_tags()} + + +# Backwards-compatible module attribute. Some callers (tests, third-party +# plugins) read ``NOUS_EXTRA_BODY`` directly; keep it as a snapshot of the +# current tags. Callers that need the freshest value should call +# ``_nous_extra_body()`` or import ``nous_portal_tags`` directly. +NOUS_EXTRA_BODY = _nous_extra_body() # Set at resolve time — True if the auxiliary client points to Nous Portal auxiliary_is_nous: bool = False @@ -3437,7 +3458,7 @@ def get_auxiliary_extra_body() -> dict: Includes Nous Portal product tags when the auxiliary client is backed by Nous Portal. Returns empty dict otherwise. """ - return dict(NOUS_EXTRA_BODY) if auxiliary_is_nous else {} + return _nous_extra_body() if auxiliary_is_nous else {} def auxiliary_max_tokens_param(value: int) -> dict: @@ -4026,7 +4047,7 @@ def _build_call_kwargs( # Provider-specific extra_body merged_extra = dict(extra_body or {}) if provider == "nous" or auxiliary_is_nous: - merged_extra.setdefault("tags", []).extend(NOUS_EXTRA_BODY["tags"]) + merged_extra.setdefault("tags", []).extend(_nous_portal_tags()) if merged_extra: kwargs["extra_body"] = merged_extra diff --git a/agent/portal_tags.py b/agent/portal_tags.py new file mode 100644 index 0000000000..647c52a076 --- /dev/null +++ b/agent/portal_tags.py @@ -0,0 +1,64 @@ +"""Centralized Nous Portal request tags. + +Every Hermes request that hits the Nous Portal — main agent loop, auxiliary +client (compression / titles / vision / web_extract / session_search / etc.), +and any future code path — must carry the same product-attribution tags so +Nous can attribute usage to Hermes Agent and bucket it by client release. + +Tag shape (sent in OpenAI-compatible ``extra_body['tags']``): + + [ + "product=hermes-agent", + "client=hermes-client-v<__version__>", + ] + +The version is sourced live from ``hermes_cli.__version__`` so it auto-aligns +to whatever release is installed; the release script +(``scripts/release.py``) regex-bumps that single string, and every Portal +request picks up the new tag on the next process start. + +Why one helper instead of inlining the literal at each site: +* Four call sites (main loop profile, aux client, run_agent compression + fallback, web_tools fallback) used to drift apart — see PR #24194 which + only got the aux site, leaving the main loop sending a different tag set. +* Tests should assert the same tag list everywhere; centralizing makes that + assertion a one-liner against this module. + +Do NOT pre-compute these as module-level constants in the consumers. The +version can change at runtime (editable installs, hot-reload tooling), and +``hermes_cli.__version__`` is the canonical source of truth. +""" + +from __future__ import annotations + +from typing import List + + +def _hermes_version() -> str: + """Return the current Hermes release version, e.g. ``"0.13.0"``. + + Falls back to ``"unknown"`` if ``hermes_cli`` cannot be imported (should + never happen in a real install — guarded for defensive testing). + """ + try: + from hermes_cli import __version__ + return __version__ + except Exception: + return "unknown" + + +def hermes_client_tag() -> str: + """Return the ``client=...`` tag for Nous Portal requests. + + Format: ``client=hermes-client-v..``. + """ + return f"client=hermes-client-v{_hermes_version()}" + + +def nous_portal_tags() -> List[str]: + """Return the canonical list of Nous Portal product tags. + + Always returns a fresh list so callers can mutate it freely + (e.g. ``merged_extra.setdefault("tags", []).extend(nous_portal_tags())``). + """ + return ["product=hermes-agent", hermes_client_tag()] diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index 025ea8ab65..6bd3638783 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -268,7 +268,7 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = ( # Model name substrings that trigger tool-use enforcement guidance. # Add new patterns here when a model family needs explicit steering. -TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok") +TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm") # OpenAI GPT/Codex-specific execution guidance. Addresses known failure modes # where GPT models abandon work on partial results, skip prerequisite lookups, diff --git a/agent/prompt_caching.py b/agent/prompt_caching.py index 4829c96b33..a73d6e113d 100644 --- a/agent/prompt_caching.py +++ b/agent/prompt_caching.py @@ -1,25 +1,15 @@ -"""Anthropic prompt caching strategies. +"""Anthropic prompt caching strategy. -Two layouts: - -* ``system_and_3`` (default, used everywhere except the long-lived path): - 4 cache_control breakpoints — system prompt + last 3 non-system messages. - All at the same TTL (5m or 1h). Reduces input token costs by ~75% on - multi-turn conversations within a single session. - -* ``prefix_and_2`` (Claude on Anthropic / OpenRouter / Nous Portal): - 4 breakpoints split across two TTL tiers — tools[-1] (1h) + - stable system prefix (1h) + last 2 non-system messages (5m). The - long-lived prefix is byte-stable across sessions for a given user - config, so every fresh session reads the cached system+tools instead - of re-paying for them. Within-session rolling window shrinks from 3 - messages to 2 to free the breakpoint budget. +Single layout: ``system_and_3``. 4 cache_control breakpoints — system +prompt + last 3 non-system messages, all at the same TTL (5m or 1h). +Reduces input token costs by ~75% on multi-turn conversations within a +single session. Pure functions -- no class state, no AIAgent dependency. """ import copy -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List def _apply_cache_marker(msg: dict, cache_marker: dict, native_anthropic: bool = False) -> None: @@ -87,115 +77,3 @@ def apply_anthropic_cache_control( _apply_cache_marker(messages[idx], marker, native_anthropic=native_anthropic) return messages - - -def _mark_system_stable_block( - messages: List[Dict[str, Any]], - long_lived_marker: Dict[str, str], -) -> bool: - """Mark the *first* content block of the system message with the 1h marker. - - The system message is expected to have been split into multiple content - blocks beforehand by the caller — block[0] is the cross-session-stable - prefix, subsequent blocks carry context files + volatile suffix. - Falls back to marking the whole system message as a single block when - the message hasn't been split (preserves correctness on the fallback path). - - Returns True when a marker was placed. - """ - if not messages or messages[0].get("role") != "system": - return False - - sys_msg = messages[0] - content = sys_msg.get("content") - - # Already a list of blocks → mark the first block. - if isinstance(content, list) and content: - first = content[0] - if isinstance(first, dict): - first["cache_control"] = long_lived_marker - return True - return False - - # String content (no split) → cannot place a stable-prefix breakpoint - # without changing the byte content. Caller is responsible for - # splitting; if they didn't, fall through to envelope marker so we still - # cache *something* for this turn. - if isinstance(content, str) and content: - sys_msg["content"] = [ - {"type": "text", "text": content, "cache_control": long_lived_marker} - ] - return True - - return False - - -def apply_anthropic_cache_control_long_lived( - api_messages: List[Dict[str, Any]], - long_lived_ttl: str = "1h", - rolling_ttl: str = "5m", - native_anthropic: bool = False, -) -> List[Dict[str, Any]]: - """Apply prefix_and_2 caching: long-lived stable prefix + rolling window. - - Layout (4 breakpoints total): - * Stable system prefix (block[0]) → ``long_lived_ttl`` TTL - * Last 2 non-system messages → ``rolling_ttl`` TTL each - - NOTE: this function does NOT mark the tools array. Tools cache_control - is attached separately (see ``mark_tools_for_long_lived_cache``) because - tools live outside the messages list in the API payload. - - The caller MUST have split the system message into ordered content - blocks where block[0] is the cross-session-stable portion. If the system - message is still a single string, it is wrapped into a single block and - marked — this is correct, just less effective (the volatile suffix is - not isolated, so the prefix invalidates per-session). - - Returns: - Deep copy of messages with cache_control breakpoints injected. - """ - messages = copy.deepcopy(api_messages) - if not messages: - return messages - - long_marker = _build_marker(long_lived_ttl) - rolling_marker = _build_marker(rolling_ttl) - - placed_prefix = _mark_system_stable_block(messages, long_marker) - - # Reserve 1 breakpoint for the system prefix (when placed); spend the - # remaining 3 on the rolling tail. Anthropic max is 4 total — - # tools[-1] (when marked) consumes the 4th, so we cap rolling at 2 here. - rolling_budget = 2 if placed_prefix else 3 - non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"] - for idx in non_sys[-rolling_budget:]: - _apply_cache_marker(messages[idx], rolling_marker, native_anthropic=native_anthropic) - - return messages - - -def mark_tools_for_long_lived_cache( - tools: Optional[List[Dict[str, Any]]], - long_lived_ttl: str = "1h", -) -> Optional[List[Dict[str, Any]]]: - """Attach cache_control to the last tool in the OpenAI-format tools list. - - Anthropic prefix-cache order is ``tools → system → messages``. Marking - the last tool dict caches the entire tools array (Anthropic's docs: - "the marker is placed on the last block you want included in the cached - prefix"). Marker is preserved across the OpenAI-wire boundary on - OpenRouter and Nous Portal (which proxies to OpenRouter); on native - Anthropic the marker is forwarded by ``convert_tools_to_anthropic``. - - Returns a deep copy of the tools list with the marker attached, or the - input unchanged when tools is empty/None. Pure function — does not - mutate the input. - """ - if not tools: - return tools - out = copy.deepcopy(tools) - last = out[-1] - if isinstance(last, dict): - last["cache_control"] = _build_marker(long_lived_ttl) - return out diff --git a/apps/desktop/src/app/chat/composer/controls.tsx b/apps/desktop/src/app/chat/composer/controls.tsx index 010c6d67fc..7fa9255a9e 100644 --- a/apps/desktop/src/app/chat/composer/controls.tsx +++ b/apps/desktop/src/app/chat/composer/controls.tsx @@ -1,6 +1,6 @@ import { Button } from '@/components/ui/button' import { triggerHaptic } from '@/lib/haptics' -import { ArrowUp, AudioLines, Loader2, Mic, MicOff, Square } from '@/lib/icons' +import { ArrowUp, AudioLines, Layers3, Loader2, Mic, MicOff, Square } from '@/lib/icons' import { cn } from '@/lib/utils' import type { ConversationStatus } from './hooks/use-voice-conversation' @@ -31,6 +31,7 @@ interface ConversationProps { export function ComposerControls({ busy, + busyAction, canSubmit, conversation, disabled, @@ -40,6 +41,7 @@ export function ComposerControls({ onDictate }: { busy: boolean + busyAction: 'queue' | 'stop' canSubmit: boolean conversation: ConversationProps disabled: boolean @@ -74,12 +76,21 @@ export function ComposerControls({ ) : ( )} diff --git a/apps/desktop/src/app/chat/composer/index.tsx b/apps/desktop/src/app/chat/composer/index.tsx index ace13c58cb..db9935d389 100644 --- a/apps/desktop/src/app/chat/composer/index.tsx +++ b/apps/desktop/src/app/chat/composer/index.tsx @@ -13,6 +13,7 @@ import { } from 'react' import { formatRefValue, hermesDirectiveFormatter } from '@/components/assistant-ui/directive-text' +import { Button } from '@/components/ui/button' import { useMediaQuery } from '@/hooks/use-media-query' import { useResizeObserver } from '@/hooks/use-resize-observer' import { chatMessageText } from '@/lib/chat-messages' @@ -20,7 +21,19 @@ import { contextPath } from '@/lib/chat-runtime' import { DATA_IMAGE_URL_RE } from '@/lib/embedded-images' import { triggerHaptic } from '@/lib/haptics' import { cn } from '@/lib/utils' -import { $composerAttachments, $composerDraft } from '@/store/composer' +import { + $composerAttachments, + $composerDraft, + clearComposerAttachments, + type ComposerAttachment +} from '@/store/composer' +import { + $queuedPromptsBySession, + enqueueQueuedPrompt, + removeQueuedPrompt, + type QueuedPromptEntry, + updateQueuedPrompt +} from '@/store/composer-queue' import { $messages } from '@/store/session' import { $threadScrolledUp } from '@/store/thread-scroll' @@ -41,6 +54,7 @@ import { renderComposerContents, RICH_INPUT_SLOT } from './rich-editor' +import { QueuePanel } from './queue-panel' import { SkinSlashPopover } from './skin-slash-popover' import { detectTrigger, extractClipboardImageBlobs, textBeforeCaret, type TriggerState } from './text-utils' import { ComposerTriggerPopover } from './trigger-popover' @@ -53,6 +67,15 @@ const COMPOSER_STACK_BREAKPOINT_PX = 320 const COMPOSER_FADE_BACKGROUND = 'linear-gradient(to bottom, transparent, color-mix(in srgb, var(--dt-background) 10%, transparent))' +interface QueueEditState { + attachments: ComposerAttachment[] + draft: string + entryId: string + sessionKey: string +} + +const cloneAttachments = (attachments: ComposerAttachment[]) => attachments.map(a => ({ ...a })) + export function ChatBar({ busy, cwd, @@ -60,6 +83,7 @@ export function ChatBar({ focusKey, gateway, maxRecordingSeconds = 120, + queueSessionKey, sessionId, state, onCancel, @@ -77,12 +101,17 @@ export function ChatBar({ const aui = useAui() const draft = useAuiState(s => s.composer.text) const attachments = useStore($composerAttachments) + const queuedPromptsBySession = useStore($queuedPromptsBySession) const scrolledUp = useStore($threadScrolledUp) + const activeQueueSessionKey = queueSessionKey || sessionId || null + const queuedPrompts = activeQueueSessionKey ? (queuedPromptsBySession[activeQueueSessionKey] ?? []) : [] const composerRef = useRef(null) const composerSurfaceRef = useRef(null) const editorRef = useRef(null) const draftRef = useRef(draft) + const previousBusyRef = useRef(busy) + const drainingQueueRef = useRef(false) const urlInputRef = useRef(null) const [urlOpen, setUrlOpen] = useState(false) @@ -91,6 +120,7 @@ export function ChatBar({ const [voiceConversationActive, setVoiceConversationActive] = useState(false) const [tight, setTight] = useState(false) const [dragActive, setDragActive] = useState(false) + const [queueEdit, setQueueEdit] = useState(null) const dragDepthRef = useRef(0) const lastSpokenIdRef = useRef(null) @@ -102,6 +132,8 @@ export function ChatBar({ const stacked = expanded || narrow || tight const hasComposerPayload = draft.trim().length > 0 || attachments.length > 0 const canSubmit = busy || hasComposerPayload + const editingQueuedPrompt = queueEdit ? queuedPrompts.find(entry => entry.id === queueEdit.entryId) ?? null : null + const busyAction = busy && hasComposerPayload ? 'queue' : 'stop' const showHelpHint = draft === '?' const placeholder = disabled ? 'Starting Hermes…' : 'Ask anything' @@ -463,6 +495,14 @@ export function ChatBar({ } const handleEditorKeyDown = (event: KeyboardEvent) => { + if ((event.metaKey || event.ctrlKey) && !event.altKey && !event.shiftKey && event.key.toLowerCase() === 'k') { + event.preventDefault() + + if (!busy) void drainNextQueued() + + return + } + if (trigger && triggerItems.length > 0) { if (event.key === 'ArrowDown') { event.preventDefault() @@ -499,6 +539,13 @@ export function ChatBar({ if (event.key === 'Enter' && !event.shiftKey) { event.preventDefault() + + if (!busy && !hasComposerPayload && queuedPrompts.length > 0) { + void drainNextQueued() + + return + } + submitDraft() } } @@ -635,10 +682,147 @@ export function ChatBar({ } } - const submitDraft = () => { - if (busy) { + const loadIntoComposer = (text: string, attachments: ComposerAttachment[]) => { + draftRef.current = text + aui.composer().setText(text) + $composerAttachments.set(cloneAttachments(attachments)) + + const editor = editorRef.current + + if (editor) { + renderComposerContents(editor, text) + placeCaretEnd(editor) + } + } + + const beginQueuedEdit = (entry: QueuedPromptEntry) => { + if (!activeQueueSessionKey || queueEdit) return + + setQueueEdit({ + attachments: cloneAttachments($composerAttachments.get()), + draft: draftRef.current, + entryId: entry.id, + sessionKey: activeQueueSessionKey + }) + loadIntoComposer(entry.text, entry.attachments) + triggerHaptic('selection') + focusInput() + } + + const exitQueuedEdit = (action: 'cancel' | 'save'): boolean => { + if (!queueEdit) return false + + if (action === 'save') { + const text = draftRef.current + const next = cloneAttachments($composerAttachments.get()) + + if (!text.trim() && next.length === 0) return false + + const saved = updateQueuedPrompt(queueEdit.sessionKey, queueEdit.entryId, { attachments: next, text }) + triggerHaptic(saved ? 'success' : 'selection') + } else { triggerHaptic('cancel') - onCancel() + } + + loadIntoComposer(queueEdit.draft, queueEdit.attachments) + setQueueEdit(null) + focusInput() + + return true + } + + const queueCurrentDraft = useCallback(() => { + if (!activeQueueSessionKey || (!draft.trim() && attachments.length === 0)) return false + if (!enqueueQueuedPrompt(activeQueueSessionKey, { text: draft, attachments })) return false + + clearDraft() + clearComposerAttachments() + triggerHaptic('selection') + + return true + }, [activeQueueSessionKey, attachments, draft]) + + // All queue drain paths share one lock + send-then-remove sequence. + // `pickEntry` lets each caller choose head, by-id, or skip-edited. + const runDrain = useCallback( + async (pickEntry: (entries: QueuedPromptEntry[]) => QueuedPromptEntry | undefined): Promise => { + if (drainingQueueRef.current || !activeQueueSessionKey) return false + + const entry = pickEntry(queuedPrompts) + + if (!entry) return false + + drainingQueueRef.current = true + + try { + const accepted = await Promise.resolve(onSubmit(entry.text, { attachments: entry.attachments, fromQueue: true })) + + if (accepted === false) return false + + removeQueuedPrompt(activeQueueSessionKey, entry.id) + + return true + } finally { + drainingQueueRef.current = false + } + }, + [activeQueueSessionKey, onSubmit, queuedPrompts] + ) + + const drainNextQueued = useCallback( + () => + runDrain(entries => { + const skip = queueEdit?.entryId + + return skip ? entries.find(e => e.id !== skip) : entries[0] + }), + [queueEdit, runDrain] + ) + + const sendQueuedNow = useCallback( + (id: string) => runDrain(entries => entries.find(e => e.id === id && id !== queueEdit?.entryId)), + [queueEdit, runDrain] + ) + + const interruptAndSendNextQueued = useCallback(async () => { + if (queuedPrompts.length === 0) return false + + await Promise.resolve(onCancel()) + + return drainNextQueued() + }, [drainNextQueued, onCancel, queuedPrompts.length]) + + // Auto-drain on busy → false (turn settled). + useEffect(() => { + const wasBusy = previousBusyRef.current + previousBusyRef.current = busy + + if (busy || !wasBusy || queuedPrompts.length === 0) return + + void drainNextQueued() + }, [busy, drainNextQueued, queuedPrompts.length]) + + // Clean up queue edit when its target disappears (session swap or external delete). + useEffect(() => { + if (!queueEdit) return + if (queueEdit.sessionKey === activeQueueSessionKey && editingQueuedPrompt) return + + loadIntoComposer(queueEdit.draft, queueEdit.attachments) + setQueueEdit(null) + }, [activeQueueSessionKey, editingQueuedPrompt, queueEdit]) // eslint-disable-line react-hooks/exhaustive-deps + + const submitDraft = () => { + if (queueEdit) { + exitQueuedEdit('save') + } else if (busy) { + if (hasComposerPayload) queueCurrentDraft() + else if (queuedPrompts.length > 0) void interruptAndSendNextQueued() + else { + triggerHaptic('cancel') + void Promise.resolve(onCancel()) + } + } else if (!hasComposerPayload && queuedPrompts.length > 0) { + void drainNextQueued() } else if (draft.trim() || attachments.length > 0) { const submitted = draft triggerHaptic('submit') @@ -742,6 +926,7 @@ export function ChatBar({ const controls = ( )} + {activeQueueSessionKey && queuedPrompts.length > 0 && ( +
+ { + if (removeQueuedPrompt(activeQueueSessionKey, id) && queueEdit?.entryId === id) { + exitQueuedEdit('cancel') + } + }} + onEdit={beginQueuedEdit} + onSendNow={id => void sendQueuedNow(id)} + /> +
+ )}
+ {queueEdit && editingQueuedPrompt && ( +
+
Editing queued turn in composer
+
+ + +
+
+ )} {attachments.length > 0 && }
void + onEdit: (entry: QueuedPromptEntry) => void + onSendNow: (id: string) => void +} + +const entryPreview = (entry: QueuedPromptEntry) => + entry.text.trim() || (entry.attachments.length > 0 ? 'Attachment-only turn' : 'Empty turn') + +export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendNow }: QueuePanelProps) { + const [collapsed, setCollapsed] = useState(false) + + if (entries.length === 0) return null + + return ( +
+ + + {!collapsed && ( +
+ {entries.map(entry => { + const isEditing = editingId === entry.id + const attachmentsCount = entry.attachments.length + + return ( +
+ +
+

{entryPreview(entry)}

+ {(attachmentsCount > 0 || isEditing) && ( +
+ {attachmentsCount > 0 && ( + + {attachmentsCount} attachment{attachmentsCount === 1 ? '' : 's'} + + )} + {isEditing && ( + + Editing in composer + + )} +
+ )} +
+
+ + + +
+
+ ) + })} +
+ )} +
+ ) +} diff --git a/apps/desktop/src/app/chat/composer/types.ts b/apps/desktop/src/app/chat/composer/types.ts index 71c601e396..524667e95f 100644 --- a/apps/desktop/src/app/chat/composer/types.ts +++ b/apps/desktop/src/app/chat/composer/types.ts @@ -1,4 +1,5 @@ import type { HermesGateway } from '@/hermes' +import type { ComposerAttachment } from '@/store/composer' import type { DroppedFile } from '../hooks/use-composer-actions' @@ -33,9 +34,10 @@ export interface ChatBarProps { maxRecordingSeconds?: number state: ChatBarState gateway?: HermesGateway | null + queueSessionKey?: string | null sessionId?: string | null cwd?: string | null - onCancel: () => void + onCancel: () => Promise | void onAddContextRef?: (refText: string, label?: string, detail?: string) => void onAddUrl?: (url: string) => void onAttachImageBlob?: (blob: Blob) => Promise | boolean | void @@ -45,7 +47,10 @@ export interface ChatBarProps { onPickFolders?: () => void onPickImages?: () => void onRemoveAttachment?: (id: string) => void - onSubmit: (value: string) => Promise | void + onSubmit: ( + value: string, + options?: { attachments?: ComposerAttachment[]; fromQueue?: boolean } + ) => Promise | boolean onTranscribeAudio?: (audio: Blob) => Promise } diff --git a/apps/desktop/src/app/chat/index.tsx b/apps/desktop/src/app/chat/index.tsx index 0afed13a1a..8786b7bb2a 100644 --- a/apps/desktop/src/app/chat/index.tsx +++ b/apps/desktop/src/app/chat/index.tsx @@ -20,6 +20,7 @@ import { ChevronDown } from '@/lib/icons' import { useIncrementalExternalStoreRuntime } from '@/lib/incremental-external-store-runtime' import { cn } from '@/lib/utils' import { $pinnedSessionIds } from '@/store/layout' +import type { ComposerAttachment } from '@/store/composer' import { $activeSessionId, $awaitingResponse, @@ -51,7 +52,7 @@ interface ChatViewProps extends Omit, 'onSubmit'> { gateway: HermesGateway | null onToggleSelectedPin: () => void onDeleteSelectedSession: () => void - onCancel: () => void + onCancel: () => Promise | void onAddContextRef: (refText: string, label?: string, detail?: string) => void onAddUrl: (url: string) => void onBranchInNewChat: (messageId: string) => void @@ -63,7 +64,10 @@ interface ChatViewProps extends Omit, 'onSubmit'> { onPickFolders: () => void onPickImages: () => void onRemoveAttachment: (id: string) => void - onSubmit: (text: string) => Promise | void + onSubmit: ( + text: string, + options?: { attachments?: ComposerAttachment[]; fromQueue?: boolean } + ) => Promise | boolean onThreadMessagesChange: (messages: readonly ThreadMessage[]) => void onEdit: (message: AppendMessage) => Promise onReload: (parentId: string | null) => Promise @@ -311,6 +315,7 @@ export function ChatView({ onRemoveAttachment={onRemoveAttachment} onSubmit={onSubmit} onTranscribeAudio={onTranscribeAudio} + queueSessionKey={selectedSessionId || activeSessionId} sessionId={activeSessionId} state={chatBarState} /> diff --git a/apps/desktop/src/app/desktop-controller.tsx b/apps/desktop/src/app/desktop-controller.tsx index dcc2a76b2d..d4097260a7 100644 --- a/apps/desktop/src/app/desktop-controller.tsx +++ b/apps/desktop/src/app/desktop-controller.tsx @@ -472,7 +472,7 @@ export function DesktopController() { onAttachDroppedItems={composer.attachDroppedItems} onAttachImageBlob={composer.attachImageBlob} onBranchInNewChat={messageId => void branchInNewChat(messageId)} - onCancel={() => void cancelRun()} + onCancel={cancelRun} onDeleteSelectedSession={() => { if (selectedStoredSessionId) { void removeSession(selectedStoredSessionId) diff --git a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts index bee5f78f09..ebb1e7dd6e 100644 --- a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts +++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts @@ -71,6 +71,11 @@ interface PromptActionsOptions { ) => ClientSessionState } +interface SubmitTextOptions { + attachments?: ComposerAttachment[] + fromQueue?: boolean +} + function renderCommandsCatalog(catalog: CommandsCatalogLike): string { const desktopCatalog = filterDesktopCommandsCatalog(catalog) @@ -153,7 +158,12 @@ export function usePromptActions({ ) const syncImageAttachmentsForSubmit = useCallback( - async (sessionId: string, attachments: ComposerAttachment[]) => { + async ( + sessionId: string, + attachments: ComposerAttachment[], + options: { updateComposerAttachments?: boolean } = {} + ) => { + const updateComposerAttachments = options.updateComposerAttachments ?? true const images = attachments.filter(attachment => attachment.kind === 'image' && attachment.path) for (const attachment of images) { @@ -173,22 +183,25 @@ export function usePromptActions({ const attachedPath = result.path || attachment.path - addComposerAttachment({ - ...attachment, - id: attachment.id, - label: attachedPath ? pathLabel(attachedPath) : attachment.label, - path: attachedPath, - attachedSessionId: sessionId - }) + if (updateComposerAttachments) { + addComposerAttachment({ + ...attachment, + id: attachment.id, + label: attachedPath ? pathLabel(attachedPath) : attachment.label, + path: attachedPath, + attachedSessionId: sessionId + }) + } } }, [requestGateway] ) const submitPromptText = useCallback( - async (rawText: string) => { + async (rawText: string, options?: SubmitTextOptions) => { const visibleText = rawText.trim() - const attachments = $composerAttachments.get() + const usingComposerAttachments = !options?.attachments + const attachments = options?.attachments ?? $composerAttachments.get() const contextRefs = attachments .map(a => a.refText) .filter(Boolean) @@ -200,7 +213,7 @@ export function usePromptActions({ [contextRefs, visibleText].filter(Boolean).join('\n\n') || (hasImage ? 'What do you see in this image?' : '') if (!text || busyRef.current) { - return + return false } const optimisticId = `user-${Date.now()}-${Math.random().toString(36).slice(2, 8)}` @@ -232,7 +245,7 @@ export function usePromptActions({ awaitingResponse: true, pendingBranchGroup: null, sawAssistantPayload: false, - interrupted: false + interrupted: state.interrupted }), selectedStoredSessionIdRef.current ) @@ -278,7 +291,7 @@ export function usePromptActions({ releaseBusy() notifyError(err, 'Session unavailable') - return + return false } if (!sessionId) { @@ -286,16 +299,21 @@ export function usePromptActions({ releaseBusy() notify({ kind: 'error', title: 'Session unavailable', message: 'Could not create a new session' }) - return + return false } seedOptimistic(sessionId) } try { - await syncImageAttachmentsForSubmit(sessionId, attachments) + await syncImageAttachmentsForSubmit(sessionId, attachments, { + updateComposerAttachments: usingComposerAttachments + }) await requestGateway('prompt.submit', { session_id: sessionId, text }) - clearComposerAttachments() + + if (usingComposerAttachments) clearComposerAttachments() + + return true } catch (err) { releaseBusy() updateSessionState(sessionId, state => ({ ...state, busy: false, awaitingResponse: false })) @@ -303,10 +321,11 @@ export function usePromptActions({ if (isProviderSetupError(err)) { requestDesktopOnboarding('Add a provider credential before sending your first message.') - return + return false } notifyError(err, 'Prompt failed') + return false } }, [ @@ -477,18 +496,18 @@ export function usePromptActions({ ) const submitText = useCallback( - async (rawText: string) => { + async (rawText: string, options?: SubmitTextOptions) => { const visibleText = rawText.trim() - const attachments = $composerAttachments.get() + const attachments = options?.attachments ?? $composerAttachments.get() if (!attachments.length && SLASH_COMMAND_RE.test(visibleText)) { triggerHaptic('selection') await executeSlashCommand(visibleText) - return + return true } - await submitPromptText(rawText) + return await submitPromptText(rawText, options) }, [executeSlashCommand, submitPromptText] ) diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts index 926f934e69..f1685de244 100644 --- a/apps/desktop/src/app/session/hooks/use-session-actions.ts +++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts @@ -7,6 +7,7 @@ import { type ChatMessage, chatMessageText, toChatMessages } from '@/lib/chat-me import { normalizePersonalityValue } from '@/lib/chat-runtime' import { embeddedImageUrls, textWithoutEmbeddedImages } from '@/lib/embedded-images' import { clearComposerAttachments, clearComposerDraft } from '@/store/composer' +import { clearQueuedPrompts } from '@/store/composer-queue' import { $pinnedSessionIds } from '@/store/layout' import { clearNotifications, notify, notifyError } from '@/store/notifications' import { requestDesktopOnboarding } from '@/store/onboarding' @@ -649,6 +650,11 @@ export function useSessionActions({ } await deleteSession(storedSessionId) + clearQueuedPrompts(storedSessionId) + + if (closingRuntimeId) { + clearQueuedPrompts(closingRuntimeId) + } } catch (err) { if (removed) { setSessions(prev => [removed, ...prev]) diff --git a/apps/desktop/src/components/assistant-ui/thread.tsx b/apps/desktop/src/components/assistant-ui/thread.tsx index 6dfae16e7c..d0a039f0f1 100644 --- a/apps/desktop/src/components/assistant-ui/thread.tsx +++ b/apps/desktop/src/components/assistant-ui/thread.tsx @@ -95,6 +95,10 @@ function messageContentText(content: unknown): string { return Array.isArray(content) ? content.map(partText).join('').trim() : '' } +const INTERRUPTED_ONLY_RE = /^_?\[interrupted\]_?$/i + +const isInterruptedOnlyMessage = (text: string) => INTERRUPTED_ONLY_RE.test(text.trim()) + function resetStickyState(state: StickyStateFlags) { state.escapedFromLock = false state.isAtBottom = true @@ -368,6 +372,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }> const messageStatus = useAuiState(s => s.message.status?.type) const isPlaceholder = messageStatus === 'running' && content.length === 0 + const interruptedOnly = useMemo(() => isInterruptedOnlyMessage(messageText), [messageText]) if (isPlaceholder) { return null @@ -380,7 +385,10 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }> data-slot="aui_assistant-message-root" >
{hoistedTodos.length > 0 && } @@ -401,7 +409,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
- {messageText.trim().length > 0 && ( + {messageText.trim().length > 0 && !interruptedOnly && ( )} diff --git a/apps/desktop/src/store/composer-queue.test.ts b/apps/desktop/src/store/composer-queue.test.ts new file mode 100644 index 0000000000..9f15232aec --- /dev/null +++ b/apps/desktop/src/store/composer-queue.test.ts @@ -0,0 +1,102 @@ +import { beforeEach, describe, expect, it } from 'vitest' + +import type { ComposerAttachment } from './composer' +import { + $queuedPromptsBySession, + clearQueuedPrompts, + dequeueQueuedPrompt, + enqueueQueuedPrompt, + getQueuedPrompts, + removeQueuedPrompt, + updateQueuedPrompt, + updateQueuedPromptText +} from './composer-queue' + +const SESSION_KEY = 'session-abc' +const QUEUE_STORAGE_KEY = 'hermes.desktop.composerQueue.v1' + +function attachment(id: string, kind: ComposerAttachment['kind'] = 'file'): ComposerAttachment { + return { + id, + kind, + label: id, + refText: `@file:${id}` + } +} + +describe('composer queue store', () => { + beforeEach(() => { + window.localStorage.removeItem(QUEUE_STORAGE_KEY) + $queuedPromptsBySession.set({}) + }) + + it('queues prompts in FIFO order', () => { + enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'first' }) + enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'second' }) + + expect(dequeueQueuedPrompt(SESSION_KEY)?.text).toBe('first') + expect(dequeueQueuedPrompt(SESSION_KEY)?.text).toBe('second') + expect(dequeueQueuedPrompt(SESSION_KEY)).toBeNull() + }) + + it('clones attachments when queueing', () => { + const source = [attachment('a-1')] + const queued = enqueueQueuedPrompt(SESSION_KEY, { attachments: source, text: 'check clones' }) + + expect(queued).not.toBeNull() + expect(getQueuedPrompts(SESSION_KEY)[0]?.attachments[0]).toEqual(source[0]) + expect(getQueuedPrompts(SESSION_KEY)[0]?.attachments[0]).not.toBe(source[0]) + }) + + it('updates and removes queued entries by id', () => { + const first = enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'draft one' }) + const second = enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'draft two' }) + + expect(first).not.toBeNull() + expect(second).not.toBeNull() + + expect(updateQueuedPromptText(SESSION_KEY, first!.id, 'draft one edited')).toBe(true) + expect(getQueuedPrompts(SESSION_KEY).map(entry => entry.text)).toEqual(['draft one edited', 'draft two']) + + expect(removeQueuedPrompt(SESSION_KEY, first!.id)).toBe(true) + expect(getQueuedPrompts(SESSION_KEY).map(entry => entry.text)).toEqual(['draft two']) + }) + + it('updates queued text and attachment snapshot', () => { + const first = enqueueQueuedPrompt(SESSION_KEY, { attachments: [attachment('f-1')], text: 'draft one' }) + const editedAttachments = [attachment('f-2'), attachment('f-3', 'image')] + + expect(first).not.toBeNull() + expect( + updateQueuedPrompt(SESSION_KEY, first!.id, { + attachments: editedAttachments, + text: 'edited text' + }) + ).toBe(true) + + const queue = getQueuedPrompts(SESSION_KEY) + expect(queue[0]?.text).toBe('edited text') + expect(queue[0]?.attachments).toEqual(editedAttachments) + expect(queue[0]?.attachments[0]).not.toBe(editedAttachments[0]) + }) + + it('clears queue state for a session', () => { + enqueueQueuedPrompt(SESSION_KEY, { attachments: [attachment('img-1', 'image')], text: 'queued' }) + + clearQueuedPrompts(SESSION_KEY) + + expect(getQueuedPrompts(SESSION_KEY)).toEqual([]) + expect($queuedPromptsBySession.get()[SESSION_KEY]).toBeUndefined() + expect(window.localStorage.getItem(QUEUE_STORAGE_KEY)).toBeNull() + }) + + it('persists queue entries into local storage', () => { + enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'persist me' }) + + const raw = window.localStorage.getItem(QUEUE_STORAGE_KEY) + expect(raw).toBeTruthy() + + const parsed = JSON.parse(String(raw)) as Record + expect(parsed[SESSION_KEY]?.[0]?.text).toBe('persist me') + }) +}) diff --git a/apps/desktop/src/store/composer-queue.ts b/apps/desktop/src/store/composer-queue.ts new file mode 100644 index 0000000000..d2a3f228ff --- /dev/null +++ b/apps/desktop/src/store/composer-queue.ts @@ -0,0 +1,158 @@ +import { atom } from 'nanostores' + +import type { ComposerAttachment } from './composer' + +export interface QueuedPromptEntry { + id: string + text: string + attachments: ComposerAttachment[] + queuedAt: number +} + +type QueueState = Record + +const STORAGE_KEY = 'hermes.desktop.composerQueue.v1' + +const load = (): QueueState => { + if (typeof window === 'undefined') return {} + try { + const raw = window.localStorage.getItem(STORAGE_KEY) + const parsed = raw ? JSON.parse(raw) : null + + return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? (parsed as QueueState) : {} + } catch { + return {} + } +} + +const save = (state: QueueState) => { + if (typeof window === 'undefined') return + try { + if (Object.keys(state).length === 0) window.localStorage.removeItem(STORAGE_KEY) + else window.localStorage.setItem(STORAGE_KEY, JSON.stringify(state)) + } catch { + // best-effort: storage may be unavailable, queue still works in-memory + } +} + +export const $queuedPromptsBySession = atom(load()) + +const writeSession = (sid: string, queue: QueuedPromptEntry[]) => { + const current = $queuedPromptsBySession.get() + const next = { ...current } + + if (queue.length === 0) delete next[sid] + else next[sid] = queue + + $queuedPromptsBySession.set(next) + save(next) +} + +const sidOf = (key: string | null | undefined): null | string => { + const trimmed = key?.trim() + + return trimmed ? trimmed : null +} + +const queueFor = (sid: string) => $queuedPromptsBySession.get()[sid] ?? [] + +const nextId = () => `queued-${Date.now()}-${Math.random().toString(36).slice(2, 8)}` + +const cloneAttachments = (attachments: ComposerAttachment[]) => attachments.map(a => ({ ...a })) + +export const getQueuedPrompts = (key: string | null | undefined): QueuedPromptEntry[] => { + const sid = sidOf(key) + + return sid ? queueFor(sid) : [] +} + +export const enqueueQueuedPrompt = ( + key: string | null | undefined, + payload: { text: string; attachments: ComposerAttachment[] } +): null | QueuedPromptEntry => { + const sid = sidOf(key) + + if (!sid) return null + + const entry: QueuedPromptEntry = { + id: nextId(), + text: payload.text, + attachments: cloneAttachments(payload.attachments), + queuedAt: Date.now() + } + + writeSession(sid, [...queueFor(sid), entry]) + + return entry +} + +export const dequeueQueuedPrompt = (key: string | null | undefined): null | QueuedPromptEntry => { + const sid = sidOf(key) + + if (!sid) return null + + const [head, ...rest] = queueFor(sid) + + if (!head) return null + + writeSession(sid, rest) + + return head +} + +export const removeQueuedPrompt = (key: string | null | undefined, id: string): boolean => { + const sid = sidOf(key) + + if (!sid) return false + + const queue = queueFor(sid) + const next = queue.filter(e => e.id !== id) + + if (next.length === queue.length) return false + + writeSession(sid, next) + + return true +} + +export const updateQueuedPrompt = ( + key: string | null | undefined, + id: string, + update: { text: string; attachments?: ComposerAttachment[] } +): boolean => { + const sid = sidOf(key) + + if (!sid) return false + + const queue = queueFor(sid) + let changed = false + + const next = queue.map(entry => { + if (entry.id !== id) return entry + + const attachments = update.attachments ? cloneAttachments(update.attachments) : entry.attachments + + if (entry.text === update.text && !update.attachments) return entry + + changed = true + + return { ...entry, text: update.text, attachments } + }) + + if (!changed) return false + + writeSession(sid, next) + + return true +} + +export const updateQueuedPromptText = (key: string | null | undefined, id: string, text: string): boolean => + updateQueuedPrompt(key, id, { text }) + +export const clearQueuedPrompts = (key: string | null | undefined) => { + const sid = sidOf(key) + + if (!sid || !(sid in $queuedPromptsBySession.get())) return + + writeSession(sid, []) +} diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 288ae2614b..09e870543a 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -39,6 +39,10 @@ if [ "$(id -u)" = "0" ]; then # by the mapped user on the host side. chown -R hermes:hermes "$HERMES_HOME" 2>/dev/null || \ echo "Warning: chown failed (rootless container?) — continuing anyway" + # The .venv must also be re-chowned when UID is remapped, otherwise + # lazy_deps.py cannot install platform packages (discord.py, etc.). + chown -R hermes:hermes "$INSTALL_DIR/.venv" 2>/dev/null || \ + echo "Warning: chown .venv failed (rootless container?) — continuing anyway" fi # Ensure config.yaml is readable by the hermes runtime user even if it was diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py index 118eb688cc..bd731a7ab5 100644 --- a/gateway/platforms/signal.py +++ b/gateway/platforms/signal.py @@ -446,7 +446,9 @@ class SignalAdapter(BasePlatformAdapter): if sent_msg and isinstance(sent_msg, dict): dest = sent_msg.get("destinationNumber") or sent_msg.get("destination") sent_ts = sent_msg.get("timestamp") - if dest == self._account_normalized: + sent_msg_group_info = sent_msg.get("groupInfo") or {} + sent_msg_group_id = sent_msg_group_info.get("groupId") if sent_msg_group_info else None + if dest == self._account_normalized or sent_msg_group_id: # Check if this is an echo of our own outbound reply if sent_ts and sent_ts in self._recent_sent_timestamps: self._recent_sent_timestamps.discard(sent_ts) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 415ddb5608..db25b87497 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -2772,7 +2772,7 @@ class TelegramAdapter(BasePlatformAdapter): {"thread_id": str(thread_id)}, ) ) - await self._bot.send_message(**send_kwargs) + await self._send_message_with_thread_fallback(**send_kwargs) except Exception as exc: logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True) return diff --git a/gateway/platforms/wecom.py b/gateway/platforms/wecom.py index d7a5c1d9a4..96769ea59b 100644 --- a/gateway/platforms/wecom.py +++ b/gateway/platforms/wecom.py @@ -345,6 +345,7 @@ class WeComAdapter(BasePlatformAdapter): try: await self._open_connection() backoff_idx = 0 + self._mark_connected() logger.info("[%s] Reconnected", self.name) except Exception as reconnect_exc: logger.warning("[%s] Reconnect failed: %s", self.name, reconnect_exc) diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py index 2fb6fc1332..29b78d75d0 100644 --- a/gateway/platforms/whatsapp.py +++ b/gateway/platforms/whatsapp.py @@ -494,12 +494,15 @@ class WhatsAppAdapter(BasePlatformAdapter): # plain executable path. _npm_bin = shutil.which("npm") or "npm" try: + # Read timeout from environment variable, default to 300 seconds (5 minutes) + # to accommodate slower systems like Unraid NAS + npm_install_timeout = int(os.environ.get("WHATSAPP_NPM_INSTALL_TIMEOUT", "300")) install_result = subprocess.run( [_npm_bin, "install", "--silent"], cwd=str(bridge_dir), capture_output=True, text=True, - timeout=60, + timeout=npm_install_timeout, ) if install_result.returncode != 0: print(f"[{self.name}] npm install failed: {install_result.stderr}") diff --git a/gateway/run.py b/gateway/run.py index bda0cbf983..46c508e4bd 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -7543,6 +7543,7 @@ class GatewayRunner: hook_ctx = { "platform": source.platform.value if source.platform else "", "user_id": source.user_id, + "chat_id": source.chat_id or "", "session_id": session_entry.session_id, "message": message_text[:500], } diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 90d6a63935..88acd1cd43 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -284,7 +284,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { ), "alibaba": ProviderConfig( id="alibaba", - name="Alibaba Cloud (DashScope)", + name="Qwen Cloud", auth_type="api_key", inference_base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1", api_key_env_vars=("DASHSCOPE_API_KEY",), diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 89f0f487cf..52e08cb2cb 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -735,15 +735,8 @@ DEFAULT_CONFIG = { # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API). # cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored. - # long_lived_prefix: when true (default), Claude on Anthropic / OpenRouter / Nous - # Portal uses a split layout: tools[-1] + stable system prefix at long_lived_ttl - # (cross-session cache), last 2 messages at cache_ttl (within-session rolling). - # Set false to keep the legacy "system + last 3 messages" single-tier layout. - # long_lived_ttl: TTL for the cross-session prefix tier ("5m" or "1h"; default "1h"). "prompt_caching": { "cache_ttl": "5m", - "long_lived_prefix": True, - "long_lived_ttl": "1h", }, # OpenRouter-specific settings. diff --git a/hermes_cli/goals.py b/hermes_cli/goals.py index 9e8742e08a..6a8a2ae971 100644 --- a/hermes_cli/goals.py +++ b/hermes_cli/goals.py @@ -307,7 +307,7 @@ def judge_goal( return "continue", "empty response (nothing to evaluate)", False try: - from agent.auxiliary_client import get_text_auxiliary_client + from agent.auxiliary_client import get_auxiliary_extra_body, get_text_auxiliary_client except Exception as exc: logger.debug("goal judge: auxiliary client import failed: %s", exc) return "continue", "auxiliary client unavailable", False @@ -336,6 +336,7 @@ def judge_goal( temperature=0, max_tokens=200, timeout=timeout, + extra_body=get_auxiliary_extra_body() or None, ) except Exception as exc: logger.info("goal judge: API call failed (%s) — falling through to continue", exc) diff --git a/hermes_cli/kanban_specify.py b/hermes_cli/kanban_specify.py index d069e5ee1a..0d57fbb250 100644 --- a/hermes_cli/kanban_specify.py +++ b/hermes_cli/kanban_specify.py @@ -155,7 +155,7 @@ def specify_task( ) try: - from agent.auxiliary_client import get_text_auxiliary_client + from agent.auxiliary_client import get_auxiliary_extra_body, get_text_auxiliary_client except Exception as exc: # pragma: no cover — import smoke test logger.debug("specify: auxiliary client import failed: %s", exc) return SpecifyOutcome(task_id, False, "auxiliary client unavailable") @@ -187,6 +187,7 @@ def specify_task( temperature=0.3, max_tokens=1500, timeout=timeout or 120, + extra_body=get_auxiliary_extra_body() or None, ) except Exception as exc: logger.info( diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 5f355d03b9..eb55b59ee5 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -908,10 +908,10 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("lmstudio", "LM Studio", "LM Studio (local desktop app with built-in model server)"), ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"), ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"), + ProviderEntry("alibaba", "Qwen Cloud", "Qwen Cloud / DashScope Coding (Qwen + multi-provider)"), ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"), ProviderEntry("tencent-tokenhub", "Tencent TokenHub", "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"), ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"), - ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"), ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"), ProviderEntry("copilot-acp", "GitHub Copilot ACP", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"), ProviderEntry("huggingface", "Hugging Face", "Hugging Face Inference Providers (20+ open models)"), @@ -926,7 +926,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"), ProviderEntry("minimax-oauth", "MiniMax (OAuth)", "MiniMax via OAuth browser login (Coding Plan, minimax.io)"), ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"), - ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"), ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"), ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"), ProviderEntry("gmi", "GMI Cloud", "GMI Cloud (multi-model direct API)"), @@ -936,6 +935,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"), ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"), ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway"), + ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"), ] # Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/ diff --git a/plugins/model-providers/nous/__init__.py b/plugins/model-providers/nous/__init__.py index f89e56c23a..5a61952d74 100644 --- a/plugins/model-providers/nous/__init__.py +++ b/plugins/model-providers/nous/__init__.py @@ -2,6 +2,7 @@ from typing import Any +from agent.portal_tags import nous_portal_tags from providers import register_provider from providers.base import ProviderProfile @@ -12,7 +13,7 @@ class NousProfile(ProviderProfile): def build_extra_body( self, *, session_id: str | None = None, **context ) -> dict[str, Any]: - return {"tags": ["product=hermes-agent"]} + return {"tags": nous_portal_tags()} def build_api_kwargs_extras( self, diff --git a/plugins/platforms/line/adapter.py b/plugins/platforms/line/adapter.py index 67582ffae8..db5d3564d3 100644 --- a/plugins/platforms/line/adapter.py +++ b/plugins/platforms/line/adapter.py @@ -959,7 +959,7 @@ class LineAdapter(BasePlatformAdapter): if chat_type == "dm" and self._client: asyncio.create_task(self._client.loading(chat_id)) - source_obj = self.create_source( + source_obj = self.build_source( chat_id=chat_id, chat_type=chat_type, user_id=user_id, diff --git a/run_agent.py b/run_agent.py index a8b071c872..f0597c9088 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1454,15 +1454,6 @@ class AIAgent: # 1h tier costs 2x on write vs 1.25x for 5m, but amortizes across long # sessions with >5-minute pauses between turns (#14971). self._cache_ttl = "5m" - # Long-lived prefix caching: when enabled and supported by the - # current provider, splits the system prompt into a stable prefix - # (cached cross-session at 1h TTL) and a volatile suffix - # (memory/timestamp — never cached), and attaches a 1h cache_control - # marker to the last tool in the schema array. Restricted to - # Claude on Anthropic / OpenRouter / Nous Portal; see - # ``_supports_long_lived_anthropic_cache``. - self._use_long_lived_prefix_cache = False - self._long_lived_cache_ttl = "1h" try: from hermes_cli.config import load_config as _load_pc_cfg @@ -1470,12 +1461,6 @@ class AIAgent: _ttl = _pc_cfg.get("cache_ttl", "5m") if _ttl in {"5m", "1h"}: self._cache_ttl = _ttl - _ll_enabled = _pc_cfg.get("long_lived_prefix", True) - _ll_ttl = _pc_cfg.get("long_lived_ttl", "1h") - if _ll_ttl in ("5m", "1h"): - self._long_lived_cache_ttl = _ll_ttl - if _ll_enabled and self._use_prompt_caching and self._supports_long_lived_anthropic_cache(): - self._use_long_lived_prefix_cache = True except Exception: pass @@ -2480,7 +2465,6 @@ class AIAgent: "client_kwargs": dict(self._client_kwargs), "use_prompt_caching": self._use_prompt_caching, "use_native_cache_layout": self._use_native_cache_layout, - "use_long_lived_prefix_cache": self._use_long_lived_prefix_cache, # Context engine state that _try_activate_fallback() overwrites. # Use getattr for model/base_url/api_key/provider since plugin # engines may not have these (they're ContextCompressor-specific). @@ -2647,6 +2631,11 @@ class AIAgent: old_model = self.model old_provider = self.provider + # Clear the per-config context_length override so the new model's + # actual context window is resolved via get_model_context_length() + # instead of inheriting the stale value from the previous model. + self._config_context_length = None + # ── Swap core runtime fields ── self.model = new_model self.provider = new_provider @@ -2711,15 +2700,6 @@ class AIAgent: model=new_model, ) ) - self._use_long_lived_prefix_cache = bool( - self._use_prompt_caching - and self._supports_long_lived_anthropic_cache( - provider=new_provider, - base_url=self.base_url, - api_mode=api_mode, - model=new_model, - ) - ) # ── LM Studio: preload before probing context length ── self._ensure_lmstudio_runtime_loaded() @@ -2768,7 +2748,6 @@ class AIAgent: "client_kwargs": dict(self._client_kwargs), "use_prompt_caching": self._use_prompt_caching, "use_native_cache_layout": self._use_native_cache_layout, - "use_long_lived_prefix_cache": self._use_long_lived_prefix_cache, "compressor_model": getattr(_cc, "model", self.model) if _cc else self.model, "compressor_base_url": getattr(_cc, "base_url", self.base_url) if _cc else self.base_url, "compressor_api_key": getattr(_cc, "api_key", "") if _cc else "", @@ -3579,73 +3558,6 @@ class AIAgent: return False, False - def _supports_long_lived_anthropic_cache( - self, - *, - provider: Optional[str] = None, - base_url: Optional[str] = None, - api_mode: Optional[str] = None, - model: Optional[str] = None, - ) -> bool: - """Decide whether the long-lived (1h cross-session) cache layout applies. - - Narrower than ``_anthropic_prompt_cache_policy`` — only enabled - for Claude models on the four endpoints whose cross-session - cache_control behavior we have explicitly validated: - - * Native Anthropic API (``api_mode == 'anthropic_messages'`` + - host ``api.anthropic.com``) - * Anthropic OAuth subscription (same transport as native API) - * OpenRouter (``base_url`` contains ``openrouter.ai``) - * Nous Portal (``base_url`` contains ``nousresearch`` — proxies - to OpenRouter, so identical wire-format) - - All four honour ``cache_control`` on both the tools array and the - first system content block, and bill cross-session cache reads at - the documented 0.1× rate. - - Other endpoints covered by the standard ``system_and_3`` policy - (third-party Anthropic gateways, MiniMax, opencode-go Qwen, etc.) - keep that layout — they support cache_control but their behavior - with mixed-TTL multi-block system content has not been validated - against this codebase. - """ - eff_provider = (provider if provider is not None else self.provider) or "" - eff_base_url = base_url if base_url is not None else (self.base_url or "") - eff_api_mode = api_mode if api_mode is not None else (self.api_mode or "") - eff_model = (model if model is not None else self.model) or "" - - model_lower = eff_model.lower() - is_claude = "claude" in model_lower - is_nous_portal = "nousresearch" in eff_base_url.lower() - - # Nous Portal: Claude AND Qwen both get long-lived caching. - # Portal proxies to OpenRouter with identical cache_control - # semantics; any model on Portal that accepts envelope-layout - # markers via _anthropic_prompt_cache_policy also benefits from - # the documented 1h cross-session TTL. - if is_nous_portal and (is_claude or "qwen" in model_lower): - return True - - if not is_claude: - return False - - # Native Anthropic + Anthropic OAuth subscription - if eff_api_mode == "anthropic_messages": - if eff_provider == "anthropic" or base_url_hostname(eff_base_url) == "api.anthropic.com": - return True - - # OpenRouter - if base_url_host_matches(eff_base_url, "openrouter.ai"): - return True - - # Nous Portal — front-ends OpenRouter behind the scenes; identical - # wire format and cache_control semantics. - if is_nous_portal: - return True - - return False - @staticmethod def _model_requires_responses_api(model: str) -> bool: """Return True for models that require the Responses API path. @@ -5894,26 +5806,19 @@ class AIAgent: """Assemble the system prompt as three ordered parts. Returns a dict with three keys: - * ``stable`` — content that is byte-stable across sessions for a - given user config: identity, tool guidance, skills prompt, + * ``stable`` — identity, tool guidance, skills prompt, environment hints, platform hints, model-family operational - guidance. Eligible for cross-session 1h prompt caching when - placed as a separate Anthropic content block (see - ``apply_anthropic_cache_control_long_lived``). - * ``context`` — context files (AGENTS.md, .cursorrules, etc.) and - caller-supplied system_message. Stable within a session but may - change between sessions when files are edited or the cwd - differs. Cached within-session via the rolling messages - breakpoint (5m TTL); not promoted to the long-lived tier so - edits don't poison the cross-session cache. - * ``volatile`` — content that changes on most turns/sessions: - memory snapshot, user profile, external memory provider block, - timestamp line. Never marked for caching. + guidance. + * ``context`` — context files (AGENTS.md, .cursorrules, etc.) + and caller-supplied system_message. + * ``volatile`` — memory snapshot, user profile, external + memory provider block, timestamp line. - Joined ``stable\\n\\ncontext\\n\\nvolatile`` produces the same - logical content the old single-string builder produced, with the - guarantee that volatile content is at the end (cache-friendly - ordering for any provider that does prefix caching). + Joined into a single string by ``_build_system_prompt`` and + cached on ``_cached_system_prompt`` for the lifetime of the + AIAgent. Hermes never re-renders parts of this string mid- + session — that's the only way to keep upstream prompt caches + warm across turns. """ # ── Stable tier ──────────────────────────────────────────────── stable_parts: List[str] = [] @@ -6115,9 +6020,10 @@ class AIAgent: Layers are ordered cache-friendly: stable identity/guidance first, then session-stable context files, then per-call volatile content - (memory, USER profile, timestamp). The split is exposed via - ``_build_system_prompt_parts`` for the long-lived prompt-caching - path (Claude on Anthropic / OpenRouter / Nous Portal). + (memory, USER profile, timestamp). The whole string is treated as + one cached block — Hermes never rebuilds or reinjects parts of it + mid-session, which is the only way to keep upstream prompt caches + warm across turns. """ parts = self._build_system_prompt_parts(system_message=system_message) joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p) @@ -8817,6 +8723,11 @@ class AIAgent: fb_api_mode = "bedrock_converse" old_model = self.model + + # Clear the per-config context_length override so the fallback + # model's actual context window is resolved instead of inheriting + # the stale value from the previous model. See #22387. + self._config_context_length = None self.model = fb_model self.provider = fb_provider self.base_url = fb_base_url @@ -8879,15 +8790,6 @@ class AIAgent: model=fb_model, ) ) - self._use_long_lived_prefix_cache = bool( - self._use_prompt_caching - and self._supports_long_lived_anthropic_cache( - provider=fb_provider, - base_url=fb_base_url, - api_mode=fb_api_mode, - model=fb_model, - ) - ) # LM Studio: preload before probing the fallback's context length. self._ensure_lmstudio_runtime_loaded() @@ -8964,16 +8866,6 @@ class AIAgent: "use_native_cache_layout", self.api_mode == "anthropic_messages" and self.provider == "anthropic", ) - # Long-lived prefix flag was added later — restore False on - # snapshots predating the new field, then re-evaluate against - # the restored provider/model in case the user had it enabled. - self._use_long_lived_prefix_cache = rt.get( - "use_long_lived_prefix_cache", - bool( - self._use_prompt_caching - and self._supports_long_lived_anthropic_cache() - ), - ) # ── Rebuild client for the primary provider ── if self.api_mode == "anthropic_messages": @@ -9551,19 +9443,7 @@ class AIAgent: def _build_api_kwargs(self, api_messages: list) -> dict: """Build the keyword arguments dict for the active API mode.""" - # Resolve the tools array exactly once. When the long-lived - # prefix-cache layout is active (Claude on Anthropic / OpenRouter - # / Nous Portal), attach a 1h cache_control marker to the last - # tool — this caches the entire tools array cross-session via - # Anthropic's tools→system→messages prefix order. The function - # returns a deep copy, so self.tools is never mutated. - if self._use_long_lived_prefix_cache and self.tools: - from agent.prompt_caching import mark_tools_for_long_lived_cache - tools_for_api = mark_tools_for_long_lived_cache( - self.tools, long_lived_ttl=self._long_lived_cache_ttl, - ) - else: - tools_for_api = self.tools + tools_for_api = self.tools if self.api_mode == "anthropic_messages": _transport = self._get_transport() @@ -11662,7 +11542,8 @@ class AIAgent: "effort": "medium" } if _is_nous: - summary_extra_body["tags"] = ["product=hermes-agent"] + from agent.portal_tags import nous_portal_tags as _portal_tags + summary_extra_body["tags"] = _portal_tags() if self.api_mode == "codex_responses": codex_kwargs = self._build_api_kwargs(api_messages) @@ -12423,36 +12304,21 @@ class AIAgent: # External recall context is injected into the user message, not the system # prompt, so the stable cache prefix remains unchanged. # - # When the long-lived prefix-cache layout is active (Claude on - # Anthropic / OpenRouter / Nous Portal), we build the system - # message as a *list of content blocks*: [stable, context, - # volatile, ephemeral?]. Block 0 (stable) gets the 1h - # cache_control marker further down via - # apply_anthropic_cache_control_long_lived; blocks 1-3 are - # cached only via the rolling messages window at 5m. # NOTE: Plugin context from pre_llm_call hooks is injected into the # user message (see injection block above), NOT the system prompt. # This is intentional — system prompt modifications break the prompt # cache prefix. The system prompt is reserved for Hermes internals. - if self._use_long_lived_prefix_cache: - _sys_parts = self._build_system_prompt_parts(system_message=system_message) - _sys_blocks: list = [] - if _sys_parts.get("stable"): - _sys_blocks.append({"type": "text", "text": _sys_parts["stable"]}) - if _sys_parts.get("context"): - _sys_blocks.append({"type": "text", "text": _sys_parts["context"]}) - if _sys_parts.get("volatile"): - _sys_blocks.append({"type": "text", "text": _sys_parts["volatile"]}) - if self.ephemeral_system_prompt: - _sys_blocks.append({"type": "text", "text": self.ephemeral_system_prompt}) - if _sys_blocks: - api_messages = [{"role": "system", "content": _sys_blocks}] + api_messages - else: - effective_system = active_system_prompt or "" - if self.ephemeral_system_prompt: - effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip() - if effective_system: - api_messages = [{"role": "system", "content": effective_system}] + api_messages + # + # Hermes invariant: the system prompt is built ONCE per session + # (cached on ``_cached_system_prompt``) and replayed verbatim on + # every turn. We send it as a single content string so the + # bytes are byte-stable across turns and upstream prompt caches + # stay warm. + effective_system = active_system_prompt or "" + if self.ephemeral_system_prompt: + effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip() + if effective_system: + api_messages = [{"role": "system", "content": effective_system}] + api_messages # Inject ephemeral prefill messages right after the system prompt # but before conversation history. Same API-call-time-only pattern. @@ -12466,29 +12332,13 @@ class AIAgent: # gateways. Auto-detected: if ``_use_prompt_caching`` is set, # inject cache_control breakpoints (system + last 3 messages) # to reduce input token costs by ~75% on multi-turn - # conversations. Layout is chosen per endpoint by - # ``_anthropic_prompt_cache_policy``. - # - # Long-lived prefix layout (prefix_and_2): stable system block - # gets 1h marker + last 2 messages get 5m markers. Tools - # array's last entry is marked separately at API-call kwargs - # build time (see ``_build_api_kwargs`` and - # ``mark_tools_for_long_lived_cache``). + # conversations. if self._use_prompt_caching: - if self._use_long_lived_prefix_cache: - from agent.prompt_caching import apply_anthropic_cache_control_long_lived - api_messages = apply_anthropic_cache_control_long_lived( - api_messages, - long_lived_ttl=self._long_lived_cache_ttl, - rolling_ttl=self._cache_ttl, - native_anthropic=self._use_native_cache_layout, - ) - else: - api_messages = apply_anthropic_cache_control( - api_messages, - cache_ttl=self._cache_ttl, - native_anthropic=self._use_native_cache_layout, - ) + api_messages = apply_anthropic_cache_control( + api_messages, + cache_ttl=self._cache_ttl, + native_anthropic=self._use_native_cache_layout, + ) # Safety net: strip orphaned tool results / add stubs for missing # results before sending to the API. Runs unconditionally — not @@ -14442,7 +14292,7 @@ class AIAgent: _ra_raw = _resp_headers.get("retry-after") or _resp_headers.get("Retry-After") if _ra_raw: try: - _retry_after = min(int(_ra_raw), 120) # Cap at 2 minutes + _retry_after = min(float(_ra_raw), 120) # Cap at 2 minutes except (TypeError, ValueError): pass wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0) diff --git a/scripts/install.sh b/scripts/install.sh index aaa810f3c8..72cc81637d 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -890,7 +890,7 @@ clone_repo() { stash_name="hermes-install-autostash-$(date -u +%Y%m%d-%H%M%S)" log_info "Local changes detected, stashing before update..." git stash push --include-untracked -m "$stash_name" - autostash_ref="$(git rev-parse --verify refs/stash)" + autostash_ref="stash@{0}" fi git fetch origin diff --git a/tests/agent/test_portal_tags.py b/tests/agent/test_portal_tags.py new file mode 100644 index 0000000000..7c873ef0f6 --- /dev/null +++ b/tests/agent/test_portal_tags.py @@ -0,0 +1,61 @@ +"""Tests for agent.portal_tags — Nous Portal request tag contract.""" + +from __future__ import annotations + + +def test_hermes_client_tag_includes_current_version(): + """The client tag must reflect hermes_cli.__version__ verbatim.""" + from hermes_cli import __version__ + from agent.portal_tags import hermes_client_tag + + assert hermes_client_tag() == f"client=hermes-client-v{__version__}" + + +def test_hermes_client_tag_format(): + """The client tag has the exact shape Nous Portal expects.""" + from agent.portal_tags import hermes_client_tag + + tag = hermes_client_tag() + assert tag.startswith("client=hermes-client-v") + # No spaces, no commas — single tag value + assert " " not in tag + assert "," not in tag + + +def test_nous_portal_tags_contains_product_and_client(): + """Every Nous Portal request gets BOTH the product tag and the version tag.""" + from agent.portal_tags import hermes_client_tag, nous_portal_tags + + tags = nous_portal_tags() + assert "product=hermes-agent" in tags + assert hermes_client_tag() in tags + assert len(tags) == 2 + + +def test_nous_portal_tags_returns_fresh_list(): + """Callers mutate the returned list; we must not share state across calls.""" + from agent.portal_tags import nous_portal_tags + + a = nous_portal_tags() + a.append("client=test-mutation") + b = nous_portal_tags() + assert "client=test-mutation" not in b + + +def test_auxiliary_client_nous_extra_body_uses_helper(): + """auxiliary_client.NOUS_EXTRA_BODY must match the canonical helper output.""" + from agent.auxiliary_client import NOUS_EXTRA_BODY + from agent.portal_tags import nous_portal_tags + + assert NOUS_EXTRA_BODY == {"tags": nous_portal_tags()} + + +def test_nous_provider_profile_uses_helper(): + """The Nous provider profile (main agent loop) must use the canonical tags.""" + from agent.portal_tags import nous_portal_tags + from providers import get_provider_profile + + profile = get_provider_profile("nous") + assert profile is not None + body = profile.build_extra_body() + assert body["tags"] == nous_portal_tags() diff --git a/tests/agent/test_prompt_caching.py b/tests/agent/test_prompt_caching.py index 9d989571b5..f6f3e9f0a3 100644 --- a/tests/agent/test_prompt_caching.py +++ b/tests/agent/test_prompt_caching.py @@ -6,8 +6,6 @@ import pytest from agent.prompt_caching import ( _apply_cache_marker, apply_anthropic_cache_control, - apply_anthropic_cache_control_long_lived, - mark_tools_for_long_lived_cache, ) @@ -143,132 +141,3 @@ class TestApplyAnthropicCacheControl: elif "cache_control" in msg: count += 1 assert count <= 4 - - -class TestMarkToolsForLongLivedCache: - def test_returns_unchanged_for_empty_tools(self): - assert mark_tools_for_long_lived_cache(None) is None - assert mark_tools_for_long_lived_cache([]) == [] - - def test_marks_only_last_tool(self): - tools = [ - {"type": "function", "function": {"name": "a"}}, - {"type": "function", "function": {"name": "b"}}, - {"type": "function", "function": {"name": "c"}}, - ] - out = mark_tools_for_long_lived_cache(tools) - assert "cache_control" not in out[0] - assert "cache_control" not in out[1] - assert out[2]["cache_control"] == {"type": "ephemeral", "ttl": "1h"} - - def test_does_not_mutate_input(self): - tools = [{"type": "function", "function": {"name": "a"}}] - mark_tools_for_long_lived_cache(tools) - assert "cache_control" not in tools[0] - - def test_5m_ttl_drops_ttl_field(self): - tools = [{"type": "function", "function": {"name": "a"}}] - out = mark_tools_for_long_lived_cache(tools, long_lived_ttl="5m") - assert out[0]["cache_control"] == {"type": "ephemeral"} - - -class TestApplyAnthropicCacheControlLongLived: - def test_empty_messages(self): - assert apply_anthropic_cache_control_long_lived([]) == [] - - def test_marks_first_block_of_split_system(self): - msgs = [ - {"role": "system", "content": [ - {"type": "text", "text": "STABLE"}, - {"type": "text", "text": "CONTEXT"}, - {"type": "text", "text": "VOLATILE"}, - ]}, - {"role": "user", "content": "msg1"}, - {"role": "assistant", "content": "msg2"}, - ] - out = apply_anthropic_cache_control_long_lived(msgs) - sys_blocks = out[0]["content"] - assert sys_blocks[0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"} - assert "cache_control" not in sys_blocks[1] - assert "cache_control" not in sys_blocks[2] - - def test_rolling_marker_on_last_2_messages(self): - msgs = [ - {"role": "system", "content": [{"type": "text", "text": "S"}]}, - {"role": "user", "content": "u1"}, - {"role": "assistant", "content": "a1"}, - {"role": "user", "content": "u2"}, - {"role": "assistant", "content": "a2"}, - ] - out = apply_anthropic_cache_control_long_lived(msgs) - - def has_marker(m): - c = m.get("content") - if isinstance(c, list) and c and isinstance(c[-1], dict): - return "cache_control" in c[-1] - return "cache_control" in m - - # u1 and a1 (older messages) should NOT be marked - assert not has_marker(out[1]) - assert not has_marker(out[2]) - # u2 and a2 (last 2) SHOULD be marked - assert has_marker(out[3]) - assert has_marker(out[4]) - - def test_rolling_marker_uses_5m_ttl(self): - msgs = [ - {"role": "system", "content": [{"type": "text", "text": "S"}]}, - {"role": "user", "content": "u1"}, - {"role": "assistant", "content": "a1"}, - ] - out = apply_anthropic_cache_control_long_lived( - msgs, long_lived_ttl="1h", rolling_ttl="5m", - ) - # Last user message: cache_control on the wrapped text part should be 5m - last = out[-1] - c = last["content"] - assert isinstance(c, list) - assert c[-1]["cache_control"] == {"type": "ephemeral"} # 5m has no ttl key - - def test_string_system_falls_back_to_envelope_marker(self): - """When the caller didn't split the system message, we still place a marker.""" - msgs = [ - {"role": "system", "content": "Single string system"}, - {"role": "user", "content": "u1"}, - ] - out = apply_anthropic_cache_control_long_lived(msgs) - sys_content = out[0]["content"] - # Wrapped into a list and the (now sole) block gets the 1h marker - assert isinstance(sys_content, list) - assert sys_content[0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"} - - def test_does_not_mutate_input(self): - msgs = [ - {"role": "system", "content": [{"type": "text", "text": "S"}]}, - {"role": "user", "content": "u1"}, - ] - before = copy.deepcopy(msgs) - apply_anthropic_cache_control_long_lived(msgs) - assert msgs == before - - def test_max_4_breakpoints_with_split_system(self): - msgs = [ - {"role": "system", "content": [{"type": "text", "text": "S"}, {"type": "text", "text": "V"}]}, - ] + [ - {"role": "user" if i % 2 == 0 else "assistant", "content": f"msg{i}"} - for i in range(10) - ] - out = apply_anthropic_cache_control_long_lived(msgs) - count = 0 - for m in out: - c = m.get("content") - if isinstance(c, list): - for item in c: - if isinstance(item, dict) and "cache_control" in item: - count += 1 - elif "cache_control" in m: - count += 1 - # 1 system block + last 2 messages = 3 breakpoints from this function. - # tools[-1] is marked separately (not via this function), so a 4th - # breakpoint can be added at API-call time. - assert count == 3 diff --git a/tests/agent/test_prompt_caching_live.py b/tests/agent/test_prompt_caching_live.py deleted file mode 100644 index f72b6b9d90..0000000000 --- a/tests/agent/test_prompt_caching_live.py +++ /dev/null @@ -1,112 +0,0 @@ -"""Live E2E: long-lived prefix caching on Claude via OpenRouter. - -Run only when LIVE_OR_KEY env var is set. Skipped under the normal hermetic -test suite (which unsets credentials). -""" -import os, sys, tempfile, time, shutil, pytest - - -# Probe for the key BEFORE conftest unsets it -_LIVE_KEY = os.environ.get("OPENROUTER_API_KEY") or os.environ.get("LIVE_OR_KEY") -if not _LIVE_KEY: - # Try to read directly from .env - env_path = os.path.expanduser("~/.hermes/.env") - if os.path.exists(env_path): - with open(env_path) as f: - for line in f: - if line.startswith("OPENROUTER_API_KEY="): - _LIVE_KEY = line.strip().split("=", 1)[1].strip().strip('"').strip("'") - break - - -pytestmark = pytest.mark.skipif( - not _LIVE_KEY, - reason="set OPENROUTER_API_KEY (or LIVE_OR_KEY) to run live cache test", -) - - -def test_long_lived_prefix_cache_e2e_openrouter(tmp_path, monkeypatch): - """Two AIAgent runs in fresh sessions: call 1 writes cache, call 2 reads it.""" - monkeypatch.setenv("HERMES_HOME", str(tmp_path)) - # The hermetic conftest unsets OPENROUTER_API_KEY — restore for this test - monkeypatch.setenv("OPENROUTER_API_KEY", _LIVE_KEY) - - # Minimal config — but with enough toolset/guidance to exceed Anthropic's - # ~1024-token minimum-cacheable-prefix threshold. Anthropic silently - # ignores cache_control markers on small blocks. - import yaml - cfg_path = tmp_path / "config.yaml" - cfg_path.write_text(yaml.safe_dump({ - "model": {"provider": "openrouter", "default": "anthropic/claude-haiku-4.5"}, - "prompt_caching": {"long_lived_prefix": True, "long_lived_ttl": "1h", "cache_ttl": "5m"}, - "agent": {"tool_use_enforcement": True}, # adds substantial guidance text - "memory": {"provider": ""}, - "compression": {"enabled": False}, - })) - - from run_agent import AIAgent - - def make_agent(): - return AIAgent( - api_key=_LIVE_KEY, - base_url="https://openrouter.ai/api/v1", - provider="openrouter", - model="anthropic/claude-haiku-4.5", - api_mode="chat_completions", - # Use the default toolset roster — the tools array (~13k tokens - # for ~35 tools) is what carries the bulk of the cross-session - # cache value. With a tiny toolset the cached prefix can fall - # below Anthropic Haiku's 2048-token minimum cacheable size and - # the marker is silently ignored. - enabled_toolsets=None, - quiet_mode=True, - skip_context_files=True, - skip_memory=True, - save_trajectories=False, - ) - - a1 = make_agent() - assert a1._use_prompt_caching is True, "policy should enable caching for Claude on OR" - assert a1._use_long_lived_prefix_cache is True, "long-lived path should activate" - parts = a1._build_system_prompt_parts() - print(f"\nstable={len(parts['stable']):,} ctx={len(parts['context']):,} volatile={len(parts['volatile']):,} chars") - print(f"tool count: {len(a1.tools or [])}") - - # Use distinct user messages each call so OpenRouter's response cache - # doesn't short-circuit the upstream Anthropic call (we need real - # Anthropic billing visibility to verify cache_creation/cache_read). - USER_1 = "Reply with the single word ALPHA." - USER_2 = "Reply with the single word BRAVO." - - print("\n--- Call 1 (cold) ---") - r1 = a1.run_conversation(USER_1, conversation_history=[]) - print(f"final_response[:80]: {(r1.get('final_response') or '')[:80]!r}") - cr1 = a1.session_cache_read_tokens - cw1 = a1.session_cache_write_tokens - print(f"call1: cache_read={cr1} cache_write={cw1}") - - # Wait so cache settles, then fresh agent (NEW SESSION) for cross-session read - time.sleep(2) - a2 = make_agent() - assert a2.session_id != a1.session_id, "second agent must have a new session" - - print("\n--- Call 2 (warm, NEW session, different user msg) ---") - r2 = a2.run_conversation(USER_2, conversation_history=[]) - print(f"final_response[:80]: {(r2.get('final_response') or '')[:80]!r}") - cr2 = a2.session_cache_read_tokens - cw2 = a2.session_cache_write_tokens - print(f"call2: cache_read={cr2} cache_write={cw2}") - - print(f"\n=== VERDICT ===") - print(f" call1 wrote {cw1:,} cache tokens, read {cr1:,}") - print(f" call2 wrote {cw2:,} cache tokens, read {cr2:,}") - if cw1: - print(f" cross-session read fraction: cr2/cw1 = {cr2/cw1:.2%}") - - # Assertions - assert cw1 > 0, f"call 1 must write cache (got {cw1}); long-lived layout not reaching wire" - assert cr2 > 0, ( - f"call 2 must read cache cross-session (got {cr2}); " - f"stable prefix is not byte-stable across sessions" - ) - assert cr2 >= 1000, f"cache_read on call 2 ({cr2}) too small to indicate real reuse" diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py index 47d402a215..7ed0d4da63 100644 --- a/tests/agent/transports/test_chat_completions.py +++ b/tests/agent/transports/test_chat_completions.py @@ -147,11 +147,12 @@ class TestChatCompletionsBuildKwargs: ] def test_nous_tags(self, transport): + from agent.portal_tags import nous_portal_tags from providers import get_provider_profile profile = get_provider_profile("nous") msgs = [{"role": "user", "content": "Hi"}] kw = transport.build_kwargs(model="gpt-4o", messages=msgs, provider_profile=profile) - assert kw["extra_body"]["tags"] == ["product=hermes-agent"] + assert kw["extra_body"]["tags"] == nous_portal_tags() def test_reasoning_default(self, transport): msgs = [{"role": "user", "content": "Hi"}] diff --git a/tests/gateway/test_restart_drain.py b/tests/gateway/test_restart_drain.py index 55de5a4554..844af42730 100644 --- a/tests/gateway/test_restart_drain.py +++ b/tests/gateway/test_restart_drain.py @@ -7,6 +7,7 @@ from unittest.mock import AsyncMock, MagicMock import pytest import gateway.run as gateway_run +from agent.i18n import t from gateway.platforms.base import MessageEvent, MessageType from gateway.restart import DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT from gateway.session import SessionEntry, build_session_key @@ -32,7 +33,7 @@ async def test_restart_command_while_busy_requests_drain_without_interrupt(monke result = await runner._handle_message(event) - assert result == "⏳ Draining 1 active agent(s) before restart..." + assert result == t("gateway.draining", count=1) running_agent.interrupt.assert_not_called() runner.request_restart.assert_called_once_with(detached=True, via_service=False) diff --git a/tests/providers/test_profile_wiring.py b/tests/providers/test_profile_wiring.py index 9096c82b6a..258ff53180 100644 --- a/tests/providers/test_profile_wiring.py +++ b/tests/providers/test_profile_wiring.py @@ -273,12 +273,13 @@ class TestRequestOverridesParity: def test_extra_body_override_merges_with_provider_body(self, transport): """Override extra_body merges WITH provider extra_body, not replaces.""" + from agent.portal_tags import nous_portal_tags kw = transport.build_kwargs( model="hermes-3", messages=_msgs(), tools=None, provider_profile=get_provider_profile("nous"), request_overrides={"extra_body": {"custom": True}}, ) - assert kw["extra_body"]["tags"] == ["product=hermes-agent"] # from profile + assert kw["extra_body"]["tags"] == nous_portal_tags() # from profile assert kw["extra_body"]["custom"] is True # from override def test_top_level_override(self, transport): diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py index 68f7b5f497..c79ed2aea9 100644 --- a/tests/providers/test_provider_profiles.py +++ b/tests/providers/test_provider_profiles.py @@ -210,9 +210,10 @@ class TestOpenRouterProfile: class TestNousProfile: def test_tags(self): + from agent.portal_tags import nous_portal_tags p = get_provider_profile("nous") body = p.build_extra_body() - assert body["tags"] == ["product=hermes-agent"] + assert body["tags"] == nous_portal_tags() def test_auth_type(self): p = get_provider_profile("nous") diff --git a/tests/providers/test_transport_parity.py b/tests/providers/test_transport_parity.py index be88bc580a..8c1fb6eb4f 100644 --- a/tests/providers/test_transport_parity.py +++ b/tests/providers/test_transport_parity.py @@ -165,13 +165,14 @@ class TestNousParity: """Nous: product tags, reasoning, omit when disabled.""" def test_tags(self, transport): + from agent.portal_tags import nous_portal_tags kw = transport.build_kwargs( model="hermes-3-llama-3.1-405b", messages=_simple_messages(), tools=None, provider_profile=get_provider_profile("nous"), ) - assert kw["extra_body"]["tags"] == ["product=hermes-agent"] + assert kw["extra_body"]["tags"] == nous_portal_tags() def test_reasoning_omitted_when_disabled(self, transport): """Nous special case: reasoning omitted entirely when disabled.""" diff --git a/tests/run_agent/test_anthropic_prompt_cache_policy.py b/tests/run_agent/test_anthropic_prompt_cache_policy.py index 15d1cb4e87..ba6e54f037 100644 --- a/tests/run_agent/test_anthropic_prompt_cache_policy.py +++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py @@ -330,127 +330,3 @@ class TestExplicitOverrides: # Long-lived prefix cache policy (cross-session 1h tier) # ───────────────────────────────────────────────────────────────────── -class TestSupportsLongLivedAnthropicCache: - """Narrower than _anthropic_prompt_cache_policy — only Claude on the 4 - explicitly-validated endpoints get the long-lived layout.""" - - def test_native_anthropic_claude_supported(self): - agent = _make_agent( - provider="anthropic", - base_url="https://api.anthropic.com", - api_mode="anthropic_messages", - model="claude-sonnet-4.6", - ) - assert agent._supports_long_lived_anthropic_cache() is True - - def test_anthropic_oauth_supported(self): - # OAuth uses the same transport as native Anthropic - agent = _make_agent( - provider="anthropic", - base_url="https://api.anthropic.com", - api_mode="anthropic_messages", - model="claude-opus-4.6", - ) - assert agent._supports_long_lived_anthropic_cache() is True - - def test_openrouter_claude_supported(self): - agent = _make_agent( - provider="openrouter", - base_url="https://openrouter.ai/api/v1", - api_mode="chat_completions", - model="anthropic/claude-sonnet-4.6", - ) - assert agent._supports_long_lived_anthropic_cache() is True - - def test_nous_portal_claude_supported(self): - # Nous Portal proxies to OpenRouter — same wire format - agent = _make_agent( - provider="nous", - base_url="https://inference-api.nousresearch.com/v1", - api_mode="chat_completions", - model="anthropic/claude-opus-4.7", - ) - assert agent._supports_long_lived_anthropic_cache() is True - - def test_nous_portal_qwen_supported(self): - # Portal Qwen rides the same OpenRouter-equivalent transport as - # Portal Claude; long-lived (1h cross-session) cache_control - # markers apply identically. - agent = _make_agent( - provider="nous", - base_url="https://inference-api.nousresearch.com/v1", - api_mode="chat_completions", - model="qwen3.6-plus", - ) - assert agent._supports_long_lived_anthropic_cache() is True - - def test_nous_portal_qwen_vendored_slug_supported(self): - agent = _make_agent( - provider="nous", - base_url="https://inference-api.nousresearch.com/v1", - api_mode="chat_completions", - model="qwen/qwen3.6-plus", - ) - assert agent._supports_long_lived_anthropic_cache() is True - - def test_nous_portal_non_claude_non_qwen_rejected(self): - # Portal long-lived cache scope mirrors policy: Claude or Qwen only. - agent = _make_agent( - provider="nous", - base_url="https://inference-api.nousresearch.com/v1", - api_mode="chat_completions", - model="openai/gpt-5.4", - ) - assert agent._supports_long_lived_anthropic_cache() is False - - def test_openrouter_non_claude_rejected(self): - agent = _make_agent( - provider="openrouter", - base_url="https://openrouter.ai/api/v1", - api_mode="chat_completions", - model="openai/gpt-5.4", - ) - assert agent._supports_long_lived_anthropic_cache() is False - - def test_third_party_anthropic_gateway_rejected(self): - # MiniMax / Kimi / etc. — anthropic-wire but not in our validated list - agent = _make_agent( - provider="minimax", - base_url="https://api.minimax.io/anthropic", - api_mode="anthropic_messages", - model="minimax-m2.7", - ) - assert agent._supports_long_lived_anthropic_cache() is False - - def test_alibaba_dashscope_rejected(self): - agent = _make_agent( - provider="alibaba", - base_url="https://dashscope.aliyuncs.com/api/v1/anthropic", - api_mode="anthropic_messages", - model="qwen3.5-plus", - ) - assert agent._supports_long_lived_anthropic_cache() is False - - def test_opencode_qwen_rejected(self): - agent = _make_agent( - provider="opencode-go", - base_url="https://api.opencode-go.example/v1", - api_mode="chat_completions", - model="qwen3.6-plus", - ) - assert agent._supports_long_lived_anthropic_cache() is False - - def test_fallback_target_evaluated_independently(self): - # Starting on a non-supported provider, falling back to OpenRouter Claude - agent = _make_agent( - provider="minimax", - base_url="https://api.minimax.io/anthropic", - api_mode="anthropic_messages", - model="minimax-m2.7", - ) - assert agent._supports_long_lived_anthropic_cache( - provider="openrouter", - base_url="https://openrouter.ai/api/v1", - api_mode="chat_completions", - model="anthropic/claude-sonnet-4.6", - ) is True diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py index f97885a038..d3a5a1b37f 100644 --- a/tests/run_agent/test_provider_parity.py +++ b/tests/run_agent/test_provider_parity.py @@ -343,11 +343,12 @@ class TestBuildApiKwargsAIGateway: class TestBuildApiKwargsNousPortal: def test_includes_nous_product_tags(self, monkeypatch): + from agent.portal_tags import nous_portal_tags agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1") messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) extra = kwargs.get("extra_body", {}) - assert extra.get("tags") == ["product=hermes-agent"] + assert extra.get("tags") == nous_portal_tags() def test_uses_chat_completions_format(self, monkeypatch): agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1") diff --git a/tests/test_ctx_halving_fix.py b/tests/test_ctx_halving_fix.py index afeee84878..0dd3ca4e7e 100644 --- a/tests/test_ctx_halving_fix.py +++ b/tests/test_ctx_halving_fix.py @@ -169,7 +169,6 @@ class TestEphemeralMaxOutputTokens: agent.reasoning_config = None agent._is_anthropic_oauth = False agent._ephemeral_max_output_tokens = None - agent._use_long_lived_prefix_cache = False compressor = MagicMock() compressor.context_length = 200_000 diff --git a/tools/approval.py b/tools/approval.py index d6db5a05a0..dbb3810886 100644 --- a/tools/approval.py +++ b/tools/approval.py @@ -314,7 +314,9 @@ DANGEROUS_PATTERNS = [ (r'\bdd\s+.*if=', "disk copy"), (r'>\s*/dev/sd', "write to block device"), (r'\bDROP\s+(TABLE|DATABASE)\b', "SQL DROP"), - (r'\bDELETE\s+FROM\b(?!.*\bWHERE\b)', "SQL DELETE without WHERE"), + # Use [^\n]* instead of .* so DOTALL mode does not cause a WHERE clause on the + # *next* line to satisfy the negative lookahead, silently allowing DELETE without WHERE. + (r'\bDELETE\s+FROM\b(?![^\n]*\bWHERE\b)', "SQL DELETE without WHERE"), (r'\bTRUNCATE\s+(TABLE)?\s*\w', "SQL TRUNCATE"), (r'>\s*/etc/', "overwrite system config"), (r'\bsystemctl\s+(-[^\s]+\s+)*(stop|restart|disable|mask)\b', "stop/restart system service"), diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py index 664c8736a1..d5b2c0c782 100644 --- a/tools/send_message_tool.py +++ b/tools/send_message_tool.py @@ -461,7 +461,8 @@ async def _send_via_adapter( adapter = None if adapter is not None: try: - result = await adapter.send(chat_id=chat_id, content=chunk) + metadata = {"thread_id": thread_id} if thread_id else None + result = await adapter.send(chat_id=chat_id, content=chunk, metadata=metadata) except asyncio.CancelledError: raise except Exception as e: diff --git a/tools/voice_mode.py b/tools/voice_mode.py index 238fed4b28..cc691afad7 100644 --- a/tools/voice_mode.py +++ b/tools/voice_mode.py @@ -130,7 +130,9 @@ def detect_audio_environment() -> dict: try: devices = sd.query_devices() if not devices: - if termux_capture: + if os.environ.get('PULSE_SERVER'): + notices.append("No PortAudio devices detected but PULSE_SERVER is set -- continuing") + elif termux_capture: notices.append("No PortAudio devices detected, but Termux:API microphone capture is available") else: warnings.append("No audio input/output devices detected") diff --git a/tools/web_tools.py b/tools/web_tools.py index b9df0cd3be..79ddc8d27f 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -593,7 +593,8 @@ def _resolve_web_extract_auxiliary(model: Optional[str] = None) -> tuple[Optiona extra_body: Dict[str, Any] = {} if client is not None and _is_nous_auxiliary_client(client): from agent.auxiliary_client import get_auxiliary_extra_body - extra_body = get_auxiliary_extra_body() or {"tags": ["product=hermes-agent"]} + from agent.portal_tags import nous_portal_tags + extra_body = get_auxiliary_extra_body() or {"tags": nous_portal_tags()} return client, effective_model, extra_body diff --git a/website/docs/user-guide/features/lsp.md b/website/docs/user-guide/features/lsp.md index ef0f403d20..bb54003b11 100644 --- a/website/docs/user-guide/features/lsp.md +++ b/website/docs/user-guide/features/lsp.md @@ -92,6 +92,13 @@ manager makes sense for that language (rustup, ghcup, opam, brew, …). Hermes auto-detects the binary on PATH or in `/lsp/bin/`. +A few servers are installed alongside a peer dependency that npm +won't auto-pull. The current case is `typescript-language-server`, +which requires the `typescript` SDK importable from the same +`node_modules` tree — Hermes installs both packages together when you +run `hermes lsp install typescript` or auto-install fires on first +use. + ## CLI ``` @@ -207,6 +214,24 @@ The binary isn't on PATH and isn't in `/lsp/bin/`. Run `hermes lsp install ` to attempt an auto-install, or install the binary manually through the language's normal toolchain. +**`Backend warnings` section in `hermes lsp status`** + +Some servers ship as thin wrappers around an external CLI for actual +diagnostics — they spawn cleanly and accept requests but never emit +errors when the sidecar binary is missing. The most common case is +`bash-language-server`, which delegates diagnostics to `shellcheck`. +When `hermes lsp status` shows a `Backend warnings` section, install +the named tool through your OS package manager: + +``` +apt install shellcheck # Debian / Ubuntu +brew install shellcheck # macOS +scoop install shellcheck # Windows +``` + +The same warning is logged once at server spawn time in +`~/.hermes/logs/agent.log`. + **Server starts but never returns diagnostics** Check `~/.hermes/logs/agent.log` for `[agent.lsp.client]` entries —