Merge remote-tracking branch 'origin/bb/gui' into austin/bb/gui

This commit is contained in:
Austin Pickett
2026-05-13 10:18:22 -04:00
48 changed files with 976 additions and 763 deletions
+5 -1
View File
@@ -94,9 +94,13 @@ RUN cd web && npm run build && \
# hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time
# only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally
# not chowned here.
# The .venv MUST be hermes-writable so lazy_deps.py can install platform
# packages (discord.py, telegram, slack, etc.) at first gateway boot.
# Without this, `uv pip install` fails with EACCES and all messaging
# adapters silently fail to load. See tools/lazy_deps.py.
USER root
RUN chmod -R a+rX /opt/hermes && \
chown -R hermes:hermes /opt/hermes/ui-tui /opt/hermes/node_modules
chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/node_modules
# Start as root so the entrypoint can usermod/groupmod + gosu.
# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).
+2 -3
View File
@@ -1305,9 +1305,8 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
),
}
# Forward cache_control marker when present on the OpenAI-format
# tool dict (set by ``mark_tools_for_long_lived_cache``). Anthropic's
# tools array supports cache_control on the last tool to cache the
# entire schema cross-session.
# tool dict. Anthropic's tools array supports cache_control on the
# last tool to cache the entire schema cross-session.
cache_control = t.get("cache_control")
if isinstance(cache_control, dict):
anthropic_tool["cache_control"] = dict(cache_control)
+24 -3
View File
@@ -382,7 +382,28 @@ _AI_GATEWAY_HEADERS = {
# Nous Portal extra_body for product attribution.
# Callers should pass this as extra_body in chat.completions.create()
# when the auxiliary client is backed by Nous Portal.
NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent", "client=aux"]}
#
# The tags are computed from agent.portal_tags so the client= marker stays
# in lockstep with hermes_cli.__version__ across every Portal call site
# (main loop, aux, compression, web_extract). Do not inline a literal here;
# see agent/portal_tags.py for the rationale.
from agent.portal_tags import nous_portal_tags as _nous_portal_tags
def _nous_extra_body() -> dict:
"""Return a fresh Nous Portal ``extra_body`` dict.
Computed at call time so a hot-reloaded ``hermes_cli.__version__`` is
reflected without restarting long-running processes.
"""
return {"tags": _nous_portal_tags()}
# Backwards-compatible module attribute. Some callers (tests, third-party
# plugins) read ``NOUS_EXTRA_BODY`` directly; keep it as a snapshot of the
# current tags. Callers that need the freshest value should call
# ``_nous_extra_body()`` or import ``nous_portal_tags`` directly.
NOUS_EXTRA_BODY = _nous_extra_body()
# Set at resolve time — True if the auxiliary client points to Nous Portal
auxiliary_is_nous: bool = False
@@ -3437,7 +3458,7 @@ def get_auxiliary_extra_body() -> dict:
Includes Nous Portal product tags when the auxiliary client is backed
by Nous Portal. Returns empty dict otherwise.
"""
return dict(NOUS_EXTRA_BODY) if auxiliary_is_nous else {}
return _nous_extra_body() if auxiliary_is_nous else {}
def auxiliary_max_tokens_param(value: int) -> dict:
@@ -4026,7 +4047,7 @@ def _build_call_kwargs(
# Provider-specific extra_body
merged_extra = dict(extra_body or {})
if provider == "nous" or auxiliary_is_nous:
merged_extra.setdefault("tags", []).extend(NOUS_EXTRA_BODY["tags"])
merged_extra.setdefault("tags", []).extend(_nous_portal_tags())
if merged_extra:
kwargs["extra_body"] = merged_extra
+64
View File
@@ -0,0 +1,64 @@
"""Centralized Nous Portal request tags.
Every Hermes request that hits the Nous Portal main agent loop, auxiliary
client (compression / titles / vision / web_extract / session_search / etc.),
and any future code path must carry the same product-attribution tags so
Nous can attribute usage to Hermes Agent and bucket it by client release.
Tag shape (sent in OpenAI-compatible ``extra_body['tags']``):
[
"product=hermes-agent",
"client=hermes-client-v<__version__>",
]
The version is sourced live from ``hermes_cli.__version__`` so it auto-aligns
to whatever release is installed; the release script
(``scripts/release.py``) regex-bumps that single string, and every Portal
request picks up the new tag on the next process start.
Why one helper instead of inlining the literal at each site:
* Four call sites (main loop profile, aux client, run_agent compression
fallback, web_tools fallback) used to drift apart see PR #24194 which
only got the aux site, leaving the main loop sending a different tag set.
* Tests should assert the same tag list everywhere; centralizing makes that
assertion a one-liner against this module.
Do NOT pre-compute these as module-level constants in the consumers. The
version can change at runtime (editable installs, hot-reload tooling), and
``hermes_cli.__version__`` is the canonical source of truth.
"""
from __future__ import annotations
from typing import List
def _hermes_version() -> str:
"""Return the current Hermes release version, e.g. ``"0.13.0"``.
Falls back to ``"unknown"`` if ``hermes_cli`` cannot be imported (should
never happen in a real install guarded for defensive testing).
"""
try:
from hermes_cli import __version__
return __version__
except Exception:
return "unknown"
def hermes_client_tag() -> str:
"""Return the ``client=...`` tag for Nous Portal requests.
Format: ``client=hermes-client-v<MAJOR>.<MINOR>.<PATCH>``.
"""
return f"client=hermes-client-v{_hermes_version()}"
def nous_portal_tags() -> List[str]:
"""Return the canonical list of Nous Portal product tags.
Always returns a fresh list so callers can mutate it freely
(e.g. ``merged_extra.setdefault("tags", []).extend(nous_portal_tags())``).
"""
return ["product=hermes-agent", hermes_client_tag()]
+1 -1
View File
@@ -268,7 +268,7 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
# Model name substrings that trigger tool-use enforcement guidance.
# Add new patterns here when a model family needs explicit steering.
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok")
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm")
# OpenAI GPT/Codex-specific execution guidance. Addresses known failure modes
# where GPT models abandon work on partial results, skip prerequisite lookups,
+6 -128
View File
@@ -1,25 +1,15 @@
"""Anthropic prompt caching strategies.
"""Anthropic prompt caching strategy.
Two layouts:
* ``system_and_3`` (default, used everywhere except the long-lived path):
4 cache_control breakpoints system prompt + last 3 non-system messages.
All at the same TTL (5m or 1h). Reduces input token costs by ~75% on
multi-turn conversations within a single session.
* ``prefix_and_2`` (Claude on Anthropic / OpenRouter / Nous Portal):
4 breakpoints split across two TTL tiers tools[-1] (1h) +
stable system prefix (1h) + last 2 non-system messages (5m). The
long-lived prefix is byte-stable across sessions for a given user
config, so every fresh session reads the cached system+tools instead
of re-paying for them. Within-session rolling window shrinks from 3
messages to 2 to free the breakpoint budget.
Single layout: ``system_and_3``. 4 cache_control breakpoints system
prompt + last 3 non-system messages, all at the same TTL (5m or 1h).
Reduces input token costs by ~75% on multi-turn conversations within a
single session.
Pure functions -- no class state, no AIAgent dependency.
"""
import copy
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List
def _apply_cache_marker(msg: dict, cache_marker: dict, native_anthropic: bool = False) -> None:
@@ -87,115 +77,3 @@ def apply_anthropic_cache_control(
_apply_cache_marker(messages[idx], marker, native_anthropic=native_anthropic)
return messages
def _mark_system_stable_block(
messages: List[Dict[str, Any]],
long_lived_marker: Dict[str, str],
) -> bool:
"""Mark the *first* content block of the system message with the 1h marker.
The system message is expected to have been split into multiple content
blocks beforehand by the caller block[0] is the cross-session-stable
prefix, subsequent blocks carry context files + volatile suffix.
Falls back to marking the whole system message as a single block when
the message hasn't been split (preserves correctness on the fallback path).
Returns True when a marker was placed.
"""
if not messages or messages[0].get("role") != "system":
return False
sys_msg = messages[0]
content = sys_msg.get("content")
# Already a list of blocks → mark the first block.
if isinstance(content, list) and content:
first = content[0]
if isinstance(first, dict):
first["cache_control"] = long_lived_marker
return True
return False
# String content (no split) → cannot place a stable-prefix breakpoint
# without changing the byte content. Caller is responsible for
# splitting; if they didn't, fall through to envelope marker so we still
# cache *something* for this turn.
if isinstance(content, str) and content:
sys_msg["content"] = [
{"type": "text", "text": content, "cache_control": long_lived_marker}
]
return True
return False
def apply_anthropic_cache_control_long_lived(
api_messages: List[Dict[str, Any]],
long_lived_ttl: str = "1h",
rolling_ttl: str = "5m",
native_anthropic: bool = False,
) -> List[Dict[str, Any]]:
"""Apply prefix_and_2 caching: long-lived stable prefix + rolling window.
Layout (4 breakpoints total):
* Stable system prefix (block[0]) ``long_lived_ttl`` TTL
* Last 2 non-system messages ``rolling_ttl`` TTL each
NOTE: this function does NOT mark the tools array. Tools cache_control
is attached separately (see ``mark_tools_for_long_lived_cache``) because
tools live outside the messages list in the API payload.
The caller MUST have split the system message into ordered content
blocks where block[0] is the cross-session-stable portion. If the system
message is still a single string, it is wrapped into a single block and
marked this is correct, just less effective (the volatile suffix is
not isolated, so the prefix invalidates per-session).
Returns:
Deep copy of messages with cache_control breakpoints injected.
"""
messages = copy.deepcopy(api_messages)
if not messages:
return messages
long_marker = _build_marker(long_lived_ttl)
rolling_marker = _build_marker(rolling_ttl)
placed_prefix = _mark_system_stable_block(messages, long_marker)
# Reserve 1 breakpoint for the system prefix (when placed); spend the
# remaining 3 on the rolling tail. Anthropic max is 4 total —
# tools[-1] (when marked) consumes the 4th, so we cap rolling at 2 here.
rolling_budget = 2 if placed_prefix else 3
non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"]
for idx in non_sys[-rolling_budget:]:
_apply_cache_marker(messages[idx], rolling_marker, native_anthropic=native_anthropic)
return messages
def mark_tools_for_long_lived_cache(
tools: Optional[List[Dict[str, Any]]],
long_lived_ttl: str = "1h",
) -> Optional[List[Dict[str, Any]]]:
"""Attach cache_control to the last tool in the OpenAI-format tools list.
Anthropic prefix-cache order is ``tools system messages``. Marking
the last tool dict caches the entire tools array (Anthropic's docs:
"the marker is placed on the last block you want included in the cached
prefix"). Marker is preserved across the OpenAI-wire boundary on
OpenRouter and Nous Portal (which proxies to OpenRouter); on native
Anthropic the marker is forwarded by ``convert_tools_to_anthropic``.
Returns a deep copy of the tools list with the marker attached, or the
input unchanged when tools is empty/None. Pure function does not
mutate the input.
"""
if not tools:
return tools
out = copy.deepcopy(tools)
last = out[-1]
if isinstance(last, dict):
last["cache_control"] = _build_marker(long_lived_ttl)
return out
@@ -1,6 +1,6 @@
import { Button } from '@/components/ui/button'
import { triggerHaptic } from '@/lib/haptics'
import { ArrowUp, AudioLines, Loader2, Mic, MicOff, Square } from '@/lib/icons'
import { ArrowUp, AudioLines, Layers3, Loader2, Mic, MicOff, Square } from '@/lib/icons'
import { cn } from '@/lib/utils'
import type { ConversationStatus } from './hooks/use-voice-conversation'
@@ -31,6 +31,7 @@ interface ConversationProps {
export function ComposerControls({
busy,
busyAction,
canSubmit,
conversation,
disabled,
@@ -40,6 +41,7 @@ export function ComposerControls({
onDictate
}: {
busy: boolean
busyAction: 'queue' | 'stop'
canSubmit: boolean
conversation: ConversationProps
disabled: boolean
@@ -74,12 +76,21 @@ export function ComposerControls({
</Button>
) : (
<Button
aria-label={busy ? 'Stop' : 'Send'}
aria-label={busy ? (busyAction === 'queue' ? 'Queue message' : 'Stop') : 'Send'}
className={PRIMARY_ICON_BTN}
disabled={disabled || !canSubmit}
title={busy ? (busyAction === 'queue' ? 'Queue message' : 'Stop') : 'Send'}
type="submit"
>
{busy ? <span className="block size-3 rounded-[0.1875rem] bg-current" /> : <ArrowUp size={18} />}
{busy ? (
busyAction === 'queue' ? (
<Layers3 size={16} />
) : (
<span className="block size-3 rounded-[0.1875rem] bg-current" />
)
) : (
<ArrowUp size={18} />
)}
</Button>
)}
</div>
+227 -4
View File
@@ -13,6 +13,7 @@ import {
} from 'react'
import { formatRefValue, hermesDirectiveFormatter } from '@/components/assistant-ui/directive-text'
import { Button } from '@/components/ui/button'
import { useMediaQuery } from '@/hooks/use-media-query'
import { useResizeObserver } from '@/hooks/use-resize-observer'
import { chatMessageText } from '@/lib/chat-messages'
@@ -20,7 +21,19 @@ import { contextPath } from '@/lib/chat-runtime'
import { DATA_IMAGE_URL_RE } from '@/lib/embedded-images'
import { triggerHaptic } from '@/lib/haptics'
import { cn } from '@/lib/utils'
import { $composerAttachments, $composerDraft } from '@/store/composer'
import {
$composerAttachments,
$composerDraft,
clearComposerAttachments,
type ComposerAttachment
} from '@/store/composer'
import {
$queuedPromptsBySession,
enqueueQueuedPrompt,
removeQueuedPrompt,
type QueuedPromptEntry,
updateQueuedPrompt
} from '@/store/composer-queue'
import { $messages } from '@/store/session'
import { $threadScrolledUp } from '@/store/thread-scroll'
@@ -41,6 +54,7 @@ import {
renderComposerContents,
RICH_INPUT_SLOT
} from './rich-editor'
import { QueuePanel } from './queue-panel'
import { SkinSlashPopover } from './skin-slash-popover'
import { detectTrigger, extractClipboardImageBlobs, textBeforeCaret, type TriggerState } from './text-utils'
import { ComposerTriggerPopover } from './trigger-popover'
@@ -53,6 +67,15 @@ const COMPOSER_STACK_BREAKPOINT_PX = 320
const COMPOSER_FADE_BACKGROUND =
'linear-gradient(to bottom, transparent, color-mix(in srgb, var(--dt-background) 10%, transparent))'
interface QueueEditState {
attachments: ComposerAttachment[]
draft: string
entryId: string
sessionKey: string
}
const cloneAttachments = (attachments: ComposerAttachment[]) => attachments.map(a => ({ ...a }))
export function ChatBar({
busy,
cwd,
@@ -60,6 +83,7 @@ export function ChatBar({
focusKey,
gateway,
maxRecordingSeconds = 120,
queueSessionKey,
sessionId,
state,
onCancel,
@@ -77,12 +101,17 @@ export function ChatBar({
const aui = useAui()
const draft = useAuiState(s => s.composer.text)
const attachments = useStore($composerAttachments)
const queuedPromptsBySession = useStore($queuedPromptsBySession)
const scrolledUp = useStore($threadScrolledUp)
const activeQueueSessionKey = queueSessionKey || sessionId || null
const queuedPrompts = activeQueueSessionKey ? (queuedPromptsBySession[activeQueueSessionKey] ?? []) : []
const composerRef = useRef<HTMLFormElement | null>(null)
const composerSurfaceRef = useRef<HTMLDivElement | null>(null)
const editorRef = useRef<HTMLDivElement | null>(null)
const draftRef = useRef(draft)
const previousBusyRef = useRef(busy)
const drainingQueueRef = useRef(false)
const urlInputRef = useRef<HTMLInputElement | null>(null)
const [urlOpen, setUrlOpen] = useState(false)
@@ -91,6 +120,7 @@ export function ChatBar({
const [voiceConversationActive, setVoiceConversationActive] = useState(false)
const [tight, setTight] = useState(false)
const [dragActive, setDragActive] = useState(false)
const [queueEdit, setQueueEdit] = useState<QueueEditState | null>(null)
const dragDepthRef = useRef(0)
const lastSpokenIdRef = useRef<string | null>(null)
@@ -102,6 +132,8 @@ export function ChatBar({
const stacked = expanded || narrow || tight
const hasComposerPayload = draft.trim().length > 0 || attachments.length > 0
const canSubmit = busy || hasComposerPayload
const editingQueuedPrompt = queueEdit ? queuedPrompts.find(entry => entry.id === queueEdit.entryId) ?? null : null
const busyAction = busy && hasComposerPayload ? 'queue' : 'stop'
const showHelpHint = draft === '?'
const placeholder = disabled ? 'Starting Hermes…' : 'Ask anything'
@@ -463,6 +495,14 @@ export function ChatBar({
}
const handleEditorKeyDown = (event: KeyboardEvent<HTMLDivElement>) => {
if ((event.metaKey || event.ctrlKey) && !event.altKey && !event.shiftKey && event.key.toLowerCase() === 'k') {
event.preventDefault()
if (!busy) void drainNextQueued()
return
}
if (trigger && triggerItems.length > 0) {
if (event.key === 'ArrowDown') {
event.preventDefault()
@@ -499,6 +539,13 @@ export function ChatBar({
if (event.key === 'Enter' && !event.shiftKey) {
event.preventDefault()
if (!busy && !hasComposerPayload && queuedPrompts.length > 0) {
void drainNextQueued()
return
}
submitDraft()
}
}
@@ -635,10 +682,147 @@ export function ChatBar({
}
}
const submitDraft = () => {
if (busy) {
const loadIntoComposer = (text: string, attachments: ComposerAttachment[]) => {
draftRef.current = text
aui.composer().setText(text)
$composerAttachments.set(cloneAttachments(attachments))
const editor = editorRef.current
if (editor) {
renderComposerContents(editor, text)
placeCaretEnd(editor)
}
}
const beginQueuedEdit = (entry: QueuedPromptEntry) => {
if (!activeQueueSessionKey || queueEdit) return
setQueueEdit({
attachments: cloneAttachments($composerAttachments.get()),
draft: draftRef.current,
entryId: entry.id,
sessionKey: activeQueueSessionKey
})
loadIntoComposer(entry.text, entry.attachments)
triggerHaptic('selection')
focusInput()
}
const exitQueuedEdit = (action: 'cancel' | 'save'): boolean => {
if (!queueEdit) return false
if (action === 'save') {
const text = draftRef.current
const next = cloneAttachments($composerAttachments.get())
if (!text.trim() && next.length === 0) return false
const saved = updateQueuedPrompt(queueEdit.sessionKey, queueEdit.entryId, { attachments: next, text })
triggerHaptic(saved ? 'success' : 'selection')
} else {
triggerHaptic('cancel')
onCancel()
}
loadIntoComposer(queueEdit.draft, queueEdit.attachments)
setQueueEdit(null)
focusInput()
return true
}
const queueCurrentDraft = useCallback(() => {
if (!activeQueueSessionKey || (!draft.trim() && attachments.length === 0)) return false
if (!enqueueQueuedPrompt(activeQueueSessionKey, { text: draft, attachments })) return false
clearDraft()
clearComposerAttachments()
triggerHaptic('selection')
return true
}, [activeQueueSessionKey, attachments, draft])
// All queue drain paths share one lock + send-then-remove sequence.
// `pickEntry` lets each caller choose head, by-id, or skip-edited.
const runDrain = useCallback(
async (pickEntry: (entries: QueuedPromptEntry[]) => QueuedPromptEntry | undefined): Promise<boolean> => {
if (drainingQueueRef.current || !activeQueueSessionKey) return false
const entry = pickEntry(queuedPrompts)
if (!entry) return false
drainingQueueRef.current = true
try {
const accepted = await Promise.resolve(onSubmit(entry.text, { attachments: entry.attachments, fromQueue: true }))
if (accepted === false) return false
removeQueuedPrompt(activeQueueSessionKey, entry.id)
return true
} finally {
drainingQueueRef.current = false
}
},
[activeQueueSessionKey, onSubmit, queuedPrompts]
)
const drainNextQueued = useCallback(
() =>
runDrain(entries => {
const skip = queueEdit?.entryId
return skip ? entries.find(e => e.id !== skip) : entries[0]
}),
[queueEdit, runDrain]
)
const sendQueuedNow = useCallback(
(id: string) => runDrain(entries => entries.find(e => e.id === id && id !== queueEdit?.entryId)),
[queueEdit, runDrain]
)
const interruptAndSendNextQueued = useCallback(async () => {
if (queuedPrompts.length === 0) return false
await Promise.resolve(onCancel())
return drainNextQueued()
}, [drainNextQueued, onCancel, queuedPrompts.length])
// Auto-drain on busy → false (turn settled).
useEffect(() => {
const wasBusy = previousBusyRef.current
previousBusyRef.current = busy
if (busy || !wasBusy || queuedPrompts.length === 0) return
void drainNextQueued()
}, [busy, drainNextQueued, queuedPrompts.length])
// Clean up queue edit when its target disappears (session swap or external delete).
useEffect(() => {
if (!queueEdit) return
if (queueEdit.sessionKey === activeQueueSessionKey && editingQueuedPrompt) return
loadIntoComposer(queueEdit.draft, queueEdit.attachments)
setQueueEdit(null)
}, [activeQueueSessionKey, editingQueuedPrompt, queueEdit]) // eslint-disable-line react-hooks/exhaustive-deps
const submitDraft = () => {
if (queueEdit) {
exitQueuedEdit('save')
} else if (busy) {
if (hasComposerPayload) queueCurrentDraft()
else if (queuedPrompts.length > 0) void interruptAndSendNextQueued()
else {
triggerHaptic('cancel')
void Promise.resolve(onCancel())
}
} else if (!hasComposerPayload && queuedPrompts.length > 0) {
void drainNextQueued()
} else if (draft.trim() || attachments.length > 0) {
const submitted = draft
triggerHaptic('submit')
@@ -742,6 +926,7 @@ export function ChatBar({
const controls = (
<ComposerControls
busy={busy}
busyAction={busyAction}
canSubmit={canSubmit}
conversation={{
active: voiceConversationActive,
@@ -824,6 +1009,22 @@ export function ChatBar({
/>
)}
<SkinSlashPopover draft={draft} onSelect={selectSkinSlashCommand} />
{activeQueueSessionKey && queuedPrompts.length > 0 && (
<div className="relative z-6 mb-1 px-0.5">
<QueuePanel
busy={busy}
editingId={queueEdit?.entryId ?? null}
entries={queuedPrompts}
onDelete={id => {
if (removeQueuedPrompt(activeQueueSessionKey, id) && queueEdit?.entryId === id) {
exitQueuedEdit('cancel')
}
}}
onEdit={beginQueuedEdit}
onSendNow={id => void sendQueuedNow(id)}
/>
</div>
)}
<div
className="pointer-events-none absolute inset-0 rounded-[inherit]"
style={{ background: COMPOSER_FADE_BACKGROUND }}
@@ -871,6 +1072,28 @@ export function ChatBar({
>
<VoiceActivity state={voiceActivityState} />
<VoicePlaybackActivity />
{queueEdit && editingQueuedPrompt && (
<div className="flex items-center justify-between gap-2 rounded-lg border border-[color-mix(in_srgb,var(--dt-composer-ring)_32%,transparent)] bg-accent/18 px-2 py-1">
<div className="min-w-0 text-[0.7rem] text-muted-foreground/88">Editing queued turn in composer</div>
<div className="flex shrink-0 items-center gap-1">
<Button
className="h-6 rounded-md px-2 text-[0.68rem]"
onClick={() => exitQueuedEdit('cancel')}
type="button"
variant="ghost"
>
Cancel
</Button>
<Button
className="h-6 rounded-md px-2 text-[0.68rem]"
onClick={() => exitQueuedEdit('save')}
type="button"
>
Save
</Button>
</div>
</div>
)}
{attachments.length > 0 && <AttachmentList attachments={attachments} onRemove={onRemoveAttachment} />}
<div
className={cn(
@@ -0,0 +1,123 @@
import { useState } from 'react'
import { Button } from '@/components/ui/button'
import { ArrowUp, ChevronDown, Pencil, Trash2 } from '@/lib/icons'
import { cn } from '@/lib/utils'
import type { QueuedPromptEntry } from '@/store/composer-queue'
interface QueuePanelProps {
busy: boolean
editingId: null | string
entries: QueuedPromptEntry[]
onDelete: (id: string) => void
onEdit: (entry: QueuedPromptEntry) => void
onSendNow: (id: string) => void
}
const entryPreview = (entry: QueuedPromptEntry) =>
entry.text.trim() || (entry.attachments.length > 0 ? 'Attachment-only turn' : 'Empty turn')
export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendNow }: QueuePanelProps) {
const [collapsed, setCollapsed] = useState(false)
if (entries.length === 0) return null
return (
<div className="rounded-2xl border border-border/65 bg-[color-mix(in_srgb,var(--dt-card)_70%,transparent)] py-0.5 shadow-[0_0_0_1px_color-mix(in_srgb,var(--dt-card)_30%,transparent)_inset]">
<button
className="flex w-full items-center gap-1.5 px-2.5 py-1 text-left text-[0.72rem] font-medium text-muted-foreground/92 transition-colors hover:text-foreground/90"
onClick={() => setCollapsed(open => !open)}
type="button"
>
<ChevronDown className={cn('shrink-0 transition-transform', collapsed && '-rotate-90')} size={14} />
<span className="truncate">{entries.length} Queued</span>
</button>
{!collapsed && (
<div className="space-y-0.5 px-1.5 pb-0.5">
{entries.map(entry => {
const isEditing = editingId === entry.id
const attachmentsCount = entry.attachments.length
return (
<div
className={cn(
'group/queue-row flex items-center gap-1.5 rounded-lg border border-transparent px-1.5 py-1',
'transition-colors duration-300 ease-out hover:bg-(--chrome-action-hover) hover:transition-none',
isEditing && 'border-[color-mix(in_srgb,var(--dt-composer-ring)_40%,transparent)] bg-accent/25'
)}
key={entry.id}
>
<span
aria-hidden
className="h-3.5 w-3.5 shrink-0 rounded-full border border-foreground/35 bg-transparent"
/>
<div className="min-w-0 flex-1">
<p className="truncate text-[0.73rem] leading-4 text-foreground/92">{entryPreview(entry)}</p>
{(attachmentsCount > 0 || isEditing) && (
<div className="mt-0.5 flex items-center gap-1.5 text-[0.64rem] text-muted-foreground/75">
{attachmentsCount > 0 && (
<span>
{attachmentsCount} attachment{attachmentsCount === 1 ? '' : 's'}
</span>
)}
{isEditing && (
<span className="text-[color-mix(in_srgb,var(--dt-composer-ring)_78%,var(--muted-foreground))]">
Editing in composer
</span>
)}
</div>
)}
</div>
<div
className={cn(
'flex shrink-0 items-center gap-0 transition-opacity',
isEditing
? 'opacity-100'
: 'opacity-0 group-hover/queue-row:opacity-100 group-focus-within/queue-row:opacity-100'
)}
>
<Button
aria-label="Edit queued turn"
className="h-5 w-5 rounded-md"
disabled={Boolean(editingId) && !isEditing}
onClick={() => onEdit(entry)}
size="icon-xs"
title="Edit queued turn"
type="button"
variant="ghost"
>
<Pencil size={11} />
</Button>
<Button
aria-label="Send queued turn now"
className="h-5 w-5 rounded-md"
disabled={busy || isEditing}
onClick={() => onSendNow(entry.id)}
size="icon-xs"
title="Send queued turn now"
type="button"
variant="ghost"
>
<ArrowUp size={11} />
</Button>
<Button
aria-label="Delete queued turn"
className="h-5 w-5 rounded-md"
onClick={() => onDelete(entry.id)}
size="icon-xs"
title="Delete queued turn"
type="button"
variant="ghost"
>
<Trash2 size={11} />
</Button>
</div>
</div>
)
})}
</div>
)}
</div>
)
}
+7 -2
View File
@@ -1,4 +1,5 @@
import type { HermesGateway } from '@/hermes'
import type { ComposerAttachment } from '@/store/composer'
import type { DroppedFile } from '../hooks/use-composer-actions'
@@ -33,9 +34,10 @@ export interface ChatBarProps {
maxRecordingSeconds?: number
state: ChatBarState
gateway?: HermesGateway | null
queueSessionKey?: string | null
sessionId?: string | null
cwd?: string | null
onCancel: () => void
onCancel: () => Promise<void> | void
onAddContextRef?: (refText: string, label?: string, detail?: string) => void
onAddUrl?: (url: string) => void
onAttachImageBlob?: (blob: Blob) => Promise<boolean | void> | boolean | void
@@ -45,7 +47,10 @@ export interface ChatBarProps {
onPickFolders?: () => void
onPickImages?: () => void
onRemoveAttachment?: (id: string) => void
onSubmit: (value: string) => Promise<void> | void
onSubmit: (
value: string,
options?: { attachments?: ComposerAttachment[]; fromQueue?: boolean }
) => Promise<boolean> | boolean
onTranscribeAudio?: (audio: Blob) => Promise<string>
}
+7 -2
View File
@@ -20,6 +20,7 @@ import { ChevronDown } from '@/lib/icons'
import { useIncrementalExternalStoreRuntime } from '@/lib/incremental-external-store-runtime'
import { cn } from '@/lib/utils'
import { $pinnedSessionIds } from '@/store/layout'
import type { ComposerAttachment } from '@/store/composer'
import {
$activeSessionId,
$awaitingResponse,
@@ -51,7 +52,7 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
gateway: HermesGateway | null
onToggleSelectedPin: () => void
onDeleteSelectedSession: () => void
onCancel: () => void
onCancel: () => Promise<void> | void
onAddContextRef: (refText: string, label?: string, detail?: string) => void
onAddUrl: (url: string) => void
onBranchInNewChat: (messageId: string) => void
@@ -63,7 +64,10 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
onPickFolders: () => void
onPickImages: () => void
onRemoveAttachment: (id: string) => void
onSubmit: (text: string) => Promise<void> | void
onSubmit: (
text: string,
options?: { attachments?: ComposerAttachment[]; fromQueue?: boolean }
) => Promise<boolean> | boolean
onThreadMessagesChange: (messages: readonly ThreadMessage[]) => void
onEdit: (message: AppendMessage) => Promise<void>
onReload: (parentId: string | null) => Promise<void>
@@ -311,6 +315,7 @@ export function ChatView({
onRemoveAttachment={onRemoveAttachment}
onSubmit={onSubmit}
onTranscribeAudio={onTranscribeAudio}
queueSessionKey={selectedSessionId || activeSessionId}
sessionId={activeSessionId}
state={chatBarState}
/>
+1 -1
View File
@@ -472,7 +472,7 @@ export function DesktopController() {
onAttachDroppedItems={composer.attachDroppedItems}
onAttachImageBlob={composer.attachImageBlob}
onBranchInNewChat={messageId => void branchInNewChat(messageId)}
onCancel={() => void cancelRun()}
onCancel={cancelRun}
onDeleteSelectedSession={() => {
if (selectedStoredSessionId) {
void removeSession(selectedStoredSessionId)
@@ -71,6 +71,11 @@ interface PromptActionsOptions {
) => ClientSessionState
}
interface SubmitTextOptions {
attachments?: ComposerAttachment[]
fromQueue?: boolean
}
function renderCommandsCatalog(catalog: CommandsCatalogLike): string {
const desktopCatalog = filterDesktopCommandsCatalog(catalog)
@@ -153,7 +158,12 @@ export function usePromptActions({
)
const syncImageAttachmentsForSubmit = useCallback(
async (sessionId: string, attachments: ComposerAttachment[]) => {
async (
sessionId: string,
attachments: ComposerAttachment[],
options: { updateComposerAttachments?: boolean } = {}
) => {
const updateComposerAttachments = options.updateComposerAttachments ?? true
const images = attachments.filter(attachment => attachment.kind === 'image' && attachment.path)
for (const attachment of images) {
@@ -173,22 +183,25 @@ export function usePromptActions({
const attachedPath = result.path || attachment.path
addComposerAttachment({
...attachment,
id: attachment.id,
label: attachedPath ? pathLabel(attachedPath) : attachment.label,
path: attachedPath,
attachedSessionId: sessionId
})
if (updateComposerAttachments) {
addComposerAttachment({
...attachment,
id: attachment.id,
label: attachedPath ? pathLabel(attachedPath) : attachment.label,
path: attachedPath,
attachedSessionId: sessionId
})
}
}
},
[requestGateway]
)
const submitPromptText = useCallback(
async (rawText: string) => {
async (rawText: string, options?: SubmitTextOptions) => {
const visibleText = rawText.trim()
const attachments = $composerAttachments.get()
const usingComposerAttachments = !options?.attachments
const attachments = options?.attachments ?? $composerAttachments.get()
const contextRefs = attachments
.map(a => a.refText)
.filter(Boolean)
@@ -200,7 +213,7 @@ export function usePromptActions({
[contextRefs, visibleText].filter(Boolean).join('\n\n') || (hasImage ? 'What do you see in this image?' : '')
if (!text || busyRef.current) {
return
return false
}
const optimisticId = `user-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
@@ -232,7 +245,7 @@ export function usePromptActions({
awaitingResponse: true,
pendingBranchGroup: null,
sawAssistantPayload: false,
interrupted: false
interrupted: state.interrupted
}),
selectedStoredSessionIdRef.current
)
@@ -278,7 +291,7 @@ export function usePromptActions({
releaseBusy()
notifyError(err, 'Session unavailable')
return
return false
}
if (!sessionId) {
@@ -286,16 +299,21 @@ export function usePromptActions({
releaseBusy()
notify({ kind: 'error', title: 'Session unavailable', message: 'Could not create a new session' })
return
return false
}
seedOptimistic(sessionId)
}
try {
await syncImageAttachmentsForSubmit(sessionId, attachments)
await syncImageAttachmentsForSubmit(sessionId, attachments, {
updateComposerAttachments: usingComposerAttachments
})
await requestGateway('prompt.submit', { session_id: sessionId, text })
clearComposerAttachments()
if (usingComposerAttachments) clearComposerAttachments()
return true
} catch (err) {
releaseBusy()
updateSessionState(sessionId, state => ({ ...state, busy: false, awaitingResponse: false }))
@@ -303,10 +321,11 @@ export function usePromptActions({
if (isProviderSetupError(err)) {
requestDesktopOnboarding('Add a provider credential before sending your first message.')
return
return false
}
notifyError(err, 'Prompt failed')
return false
}
},
[
@@ -477,18 +496,18 @@ export function usePromptActions({
)
const submitText = useCallback(
async (rawText: string) => {
async (rawText: string, options?: SubmitTextOptions) => {
const visibleText = rawText.trim()
const attachments = $composerAttachments.get()
const attachments = options?.attachments ?? $composerAttachments.get()
if (!attachments.length && SLASH_COMMAND_RE.test(visibleText)) {
triggerHaptic('selection')
await executeSlashCommand(visibleText)
return
return true
}
await submitPromptText(rawText)
return await submitPromptText(rawText, options)
},
[executeSlashCommand, submitPromptText]
)
@@ -7,6 +7,7 @@ import { type ChatMessage, chatMessageText, toChatMessages } from '@/lib/chat-me
import { normalizePersonalityValue } from '@/lib/chat-runtime'
import { embeddedImageUrls, textWithoutEmbeddedImages } from '@/lib/embedded-images'
import { clearComposerAttachments, clearComposerDraft } from '@/store/composer'
import { clearQueuedPrompts } from '@/store/composer-queue'
import { $pinnedSessionIds } from '@/store/layout'
import { clearNotifications, notify, notifyError } from '@/store/notifications'
import { requestDesktopOnboarding } from '@/store/onboarding'
@@ -649,6 +650,11 @@ export function useSessionActions({
}
await deleteSession(storedSessionId)
clearQueuedPrompts(storedSessionId)
if (closingRuntimeId) {
clearQueuedPrompts(closingRuntimeId)
}
} catch (err) {
if (removed) {
setSessions(prev => [removed, ...prev])
@@ -95,6 +95,10 @@ function messageContentText(content: unknown): string {
return Array.isArray(content) ? content.map(partText).join('').trim() : ''
}
const INTERRUPTED_ONLY_RE = /^_?\[interrupted\]_?$/i
const isInterruptedOnlyMessage = (text: string) => INTERRUPTED_ONLY_RE.test(text.trim())
function resetStickyState(state: StickyStateFlags) {
state.escapedFromLock = false
state.isAtBottom = true
@@ -368,6 +372,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
const messageStatus = useAuiState(s => s.message.status?.type)
const isPlaceholder = messageStatus === 'running' && content.length === 0
const interruptedOnly = useMemo(() => isInterruptedOnlyMessage(messageText), [messageText])
if (isPlaceholder) {
return null
@@ -380,7 +385,10 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
data-slot="aui_assistant-message-root"
>
<div
className="wrap-anywhere min-w-0 max-w-full overflow-hidden text-pretty text-base leading-(--dt-line-height) text-foreground"
className={cn(
'wrap-anywhere min-w-0 max-w-full overflow-hidden text-pretty text-base leading-(--dt-line-height) text-foreground',
interruptedOnly && 'text-[0.8rem] leading-5 text-muted-foreground/82'
)}
data-slot="aui_assistant-message-content"
>
{hoistedTodos.length > 0 && <HoistedTodoPanel todos={hoistedTodos} />}
@@ -401,7 +409,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
</ErrorPrimitive.Root>
</MessagePrimitive.Error>
</div>
{messageText.trim().length > 0 && (
{messageText.trim().length > 0 && !interruptedOnly && (
<AssistantFooter messageId={messageId} messageText={messageText} onBranchInNewChat={onBranchInNewChat} />
)}
</MessagePrimitive.Root>
@@ -0,0 +1,102 @@
import { beforeEach, describe, expect, it } from 'vitest'
import type { ComposerAttachment } from './composer'
import {
$queuedPromptsBySession,
clearQueuedPrompts,
dequeueQueuedPrompt,
enqueueQueuedPrompt,
getQueuedPrompts,
removeQueuedPrompt,
updateQueuedPrompt,
updateQueuedPromptText
} from './composer-queue'
const SESSION_KEY = 'session-abc'
const QUEUE_STORAGE_KEY = 'hermes.desktop.composerQueue.v1'
function attachment(id: string, kind: ComposerAttachment['kind'] = 'file'): ComposerAttachment {
return {
id,
kind,
label: id,
refText: `@file:${id}`
}
}
describe('composer queue store', () => {
beforeEach(() => {
window.localStorage.removeItem(QUEUE_STORAGE_KEY)
$queuedPromptsBySession.set({})
})
it('queues prompts in FIFO order', () => {
enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'first' })
enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'second' })
expect(dequeueQueuedPrompt(SESSION_KEY)?.text).toBe('first')
expect(dequeueQueuedPrompt(SESSION_KEY)?.text).toBe('second')
expect(dequeueQueuedPrompt(SESSION_KEY)).toBeNull()
})
it('clones attachments when queueing', () => {
const source = [attachment('a-1')]
const queued = enqueueQueuedPrompt(SESSION_KEY, { attachments: source, text: 'check clones' })
expect(queued).not.toBeNull()
expect(getQueuedPrompts(SESSION_KEY)[0]?.attachments[0]).toEqual(source[0])
expect(getQueuedPrompts(SESSION_KEY)[0]?.attachments[0]).not.toBe(source[0])
})
it('updates and removes queued entries by id', () => {
const first = enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'draft one' })
const second = enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'draft two' })
expect(first).not.toBeNull()
expect(second).not.toBeNull()
expect(updateQueuedPromptText(SESSION_KEY, first!.id, 'draft one edited')).toBe(true)
expect(getQueuedPrompts(SESSION_KEY).map(entry => entry.text)).toEqual(['draft one edited', 'draft two'])
expect(removeQueuedPrompt(SESSION_KEY, first!.id)).toBe(true)
expect(getQueuedPrompts(SESSION_KEY).map(entry => entry.text)).toEqual(['draft two'])
})
it('updates queued text and attachment snapshot', () => {
const first = enqueueQueuedPrompt(SESSION_KEY, { attachments: [attachment('f-1')], text: 'draft one' })
const editedAttachments = [attachment('f-2'), attachment('f-3', 'image')]
expect(first).not.toBeNull()
expect(
updateQueuedPrompt(SESSION_KEY, first!.id, {
attachments: editedAttachments,
text: 'edited text'
})
).toBe(true)
const queue = getQueuedPrompts(SESSION_KEY)
expect(queue[0]?.text).toBe('edited text')
expect(queue[0]?.attachments).toEqual(editedAttachments)
expect(queue[0]?.attachments[0]).not.toBe(editedAttachments[0])
})
it('clears queue state for a session', () => {
enqueueQueuedPrompt(SESSION_KEY, { attachments: [attachment('img-1', 'image')], text: 'queued' })
clearQueuedPrompts(SESSION_KEY)
expect(getQueuedPrompts(SESSION_KEY)).toEqual([])
expect($queuedPromptsBySession.get()[SESSION_KEY]).toBeUndefined()
expect(window.localStorage.getItem(QUEUE_STORAGE_KEY)).toBeNull()
})
it('persists queue entries into local storage', () => {
enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'persist me' })
const raw = window.localStorage.getItem(QUEUE_STORAGE_KEY)
expect(raw).toBeTruthy()
const parsed = JSON.parse(String(raw)) as Record<string, { text: string }[]>
expect(parsed[SESSION_KEY]?.[0]?.text).toBe('persist me')
})
})
+158
View File
@@ -0,0 +1,158 @@
import { atom } from 'nanostores'
import type { ComposerAttachment } from './composer'
export interface QueuedPromptEntry {
id: string
text: string
attachments: ComposerAttachment[]
queuedAt: number
}
type QueueState = Record<string, QueuedPromptEntry[]>
const STORAGE_KEY = 'hermes.desktop.composerQueue.v1'
const load = (): QueueState => {
if (typeof window === 'undefined') return {}
try {
const raw = window.localStorage.getItem(STORAGE_KEY)
const parsed = raw ? JSON.parse(raw) : null
return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? (parsed as QueueState) : {}
} catch {
return {}
}
}
const save = (state: QueueState) => {
if (typeof window === 'undefined') return
try {
if (Object.keys(state).length === 0) window.localStorage.removeItem(STORAGE_KEY)
else window.localStorage.setItem(STORAGE_KEY, JSON.stringify(state))
} catch {
// best-effort: storage may be unavailable, queue still works in-memory
}
}
export const $queuedPromptsBySession = atom<QueueState>(load())
const writeSession = (sid: string, queue: QueuedPromptEntry[]) => {
const current = $queuedPromptsBySession.get()
const next = { ...current }
if (queue.length === 0) delete next[sid]
else next[sid] = queue
$queuedPromptsBySession.set(next)
save(next)
}
const sidOf = (key: string | null | undefined): null | string => {
const trimmed = key?.trim()
return trimmed ? trimmed : null
}
const queueFor = (sid: string) => $queuedPromptsBySession.get()[sid] ?? []
const nextId = () => `queued-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
const cloneAttachments = (attachments: ComposerAttachment[]) => attachments.map(a => ({ ...a }))
export const getQueuedPrompts = (key: string | null | undefined): QueuedPromptEntry[] => {
const sid = sidOf(key)
return sid ? queueFor(sid) : []
}
export const enqueueQueuedPrompt = (
key: string | null | undefined,
payload: { text: string; attachments: ComposerAttachment[] }
): null | QueuedPromptEntry => {
const sid = sidOf(key)
if (!sid) return null
const entry: QueuedPromptEntry = {
id: nextId(),
text: payload.text,
attachments: cloneAttachments(payload.attachments),
queuedAt: Date.now()
}
writeSession(sid, [...queueFor(sid), entry])
return entry
}
export const dequeueQueuedPrompt = (key: string | null | undefined): null | QueuedPromptEntry => {
const sid = sidOf(key)
if (!sid) return null
const [head, ...rest] = queueFor(sid)
if (!head) return null
writeSession(sid, rest)
return head
}
export const removeQueuedPrompt = (key: string | null | undefined, id: string): boolean => {
const sid = sidOf(key)
if (!sid) return false
const queue = queueFor(sid)
const next = queue.filter(e => e.id !== id)
if (next.length === queue.length) return false
writeSession(sid, next)
return true
}
export const updateQueuedPrompt = (
key: string | null | undefined,
id: string,
update: { text: string; attachments?: ComposerAttachment[] }
): boolean => {
const sid = sidOf(key)
if (!sid) return false
const queue = queueFor(sid)
let changed = false
const next = queue.map(entry => {
if (entry.id !== id) return entry
const attachments = update.attachments ? cloneAttachments(update.attachments) : entry.attachments
if (entry.text === update.text && !update.attachments) return entry
changed = true
return { ...entry, text: update.text, attachments }
})
if (!changed) return false
writeSession(sid, next)
return true
}
export const updateQueuedPromptText = (key: string | null | undefined, id: string, text: string): boolean =>
updateQueuedPrompt(key, id, { text })
export const clearQueuedPrompts = (key: string | null | undefined) => {
const sid = sidOf(key)
if (!sid || !(sid in $queuedPromptsBySession.get())) return
writeSession(sid, [])
}
+4
View File
@@ -39,6 +39,10 @@ if [ "$(id -u)" = "0" ]; then
# by the mapped user on the host side.
chown -R hermes:hermes "$HERMES_HOME" 2>/dev/null || \
echo "Warning: chown failed (rootless container?) — continuing anyway"
# The .venv must also be re-chowned when UID is remapped, otherwise
# lazy_deps.py cannot install platform packages (discord.py, etc.).
chown -R hermes:hermes "$INSTALL_DIR/.venv" 2>/dev/null || \
echo "Warning: chown .venv failed (rootless container?) — continuing anyway"
fi
# Ensure config.yaml is readable by the hermes runtime user even if it was
+3 -1
View File
@@ -446,7 +446,9 @@ class SignalAdapter(BasePlatformAdapter):
if sent_msg and isinstance(sent_msg, dict):
dest = sent_msg.get("destinationNumber") or sent_msg.get("destination")
sent_ts = sent_msg.get("timestamp")
if dest == self._account_normalized:
sent_msg_group_info = sent_msg.get("groupInfo") or {}
sent_msg_group_id = sent_msg_group_info.get("groupId") if sent_msg_group_info else None
if dest == self._account_normalized or sent_msg_group_id:
# Check if this is an echo of our own outbound reply
if sent_ts and sent_ts in self._recent_sent_timestamps:
self._recent_sent_timestamps.discard(sent_ts)
+1 -1
View File
@@ -2772,7 +2772,7 @@ class TelegramAdapter(BasePlatformAdapter):
{"thread_id": str(thread_id)},
)
)
await self._bot.send_message(**send_kwargs)
await self._send_message_with_thread_fallback(**send_kwargs)
except Exception as exc:
logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True)
return
+1
View File
@@ -345,6 +345,7 @@ class WeComAdapter(BasePlatformAdapter):
try:
await self._open_connection()
backoff_idx = 0
self._mark_connected()
logger.info("[%s] Reconnected", self.name)
except Exception as reconnect_exc:
logger.warning("[%s] Reconnect failed: %s", self.name, reconnect_exc)
+4 -1
View File
@@ -494,12 +494,15 @@ class WhatsAppAdapter(BasePlatformAdapter):
# plain executable path.
_npm_bin = shutil.which("npm") or "npm"
try:
# Read timeout from environment variable, default to 300 seconds (5 minutes)
# to accommodate slower systems like Unraid NAS
npm_install_timeout = int(os.environ.get("WHATSAPP_NPM_INSTALL_TIMEOUT", "300"))
install_result = subprocess.run(
[_npm_bin, "install", "--silent"],
cwd=str(bridge_dir),
capture_output=True,
text=True,
timeout=60,
timeout=npm_install_timeout,
)
if install_result.returncode != 0:
print(f"[{self.name}] npm install failed: {install_result.stderr}")
+1
View File
@@ -7543,6 +7543,7 @@ class GatewayRunner:
hook_ctx = {
"platform": source.platform.value if source.platform else "",
"user_id": source.user_id,
"chat_id": source.chat_id or "",
"session_id": session_entry.session_id,
"message": message_text[:500],
}
+1 -1
View File
@@ -284,7 +284,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
),
"alibaba": ProviderConfig(
id="alibaba",
name="Alibaba Cloud (DashScope)",
name="Qwen Cloud",
auth_type="api_key",
inference_base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
api_key_env_vars=("DASHSCOPE_API_KEY",),
-7
View File
@@ -735,15 +735,8 @@ DEFAULT_CONFIG = {
# Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
# cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
# long_lived_prefix: when true (default), Claude on Anthropic / OpenRouter / Nous
# Portal uses a split layout: tools[-1] + stable system prefix at long_lived_ttl
# (cross-session cache), last 2 messages at cache_ttl (within-session rolling).
# Set false to keep the legacy "system + last 3 messages" single-tier layout.
# long_lived_ttl: TTL for the cross-session prefix tier ("5m" or "1h"; default "1h").
"prompt_caching": {
"cache_ttl": "5m",
"long_lived_prefix": True,
"long_lived_ttl": "1h",
},
# OpenRouter-specific settings.
+2 -1
View File
@@ -307,7 +307,7 @@ def judge_goal(
return "continue", "empty response (nothing to evaluate)", False
try:
from agent.auxiliary_client import get_text_auxiliary_client
from agent.auxiliary_client import get_auxiliary_extra_body, get_text_auxiliary_client
except Exception as exc:
logger.debug("goal judge: auxiliary client import failed: %s", exc)
return "continue", "auxiliary client unavailable", False
@@ -336,6 +336,7 @@ def judge_goal(
temperature=0,
max_tokens=200,
timeout=timeout,
extra_body=get_auxiliary_extra_body() or None,
)
except Exception as exc:
logger.info("goal judge: API call failed (%s) — falling through to continue", exc)
+2 -1
View File
@@ -155,7 +155,7 @@ def specify_task(
)
try:
from agent.auxiliary_client import get_text_auxiliary_client
from agent.auxiliary_client import get_auxiliary_extra_body, get_text_auxiliary_client
except Exception as exc: # pragma: no cover — import smoke test
logger.debug("specify: auxiliary client import failed: %s", exc)
return SpecifyOutcome(task_id, False, "auxiliary client unavailable")
@@ -187,6 +187,7 @@ def specify_task(
temperature=0.3,
max_tokens=1500,
timeout=timeout or 120,
extra_body=get_auxiliary_extra_body() or None,
)
except Exception as exc:
logger.info(
+2 -2
View File
@@ -908,10 +908,10 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("lmstudio", "LM Studio", "LM Studio (local desktop app with built-in model server)"),
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
ProviderEntry("alibaba", "Qwen Cloud", "Qwen Cloud / DashScope Coding (Qwen + multi-provider)"),
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
ProviderEntry("tencent-tokenhub", "Tencent TokenHub", "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"),
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
ProviderEntry("copilot-acp", "GitHub Copilot ACP", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
ProviderEntry("huggingface", "Hugging Face", "Hugging Face Inference Providers (20+ open models)"),
@@ -926,7 +926,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"),
ProviderEntry("minimax-oauth", "MiniMax (OAuth)", "MiniMax via OAuth browser login (Coding Plan, minimax.io)"),
ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"),
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
ProviderEntry("gmi", "GMI Cloud", "GMI Cloud (multi-model direct API)"),
@@ -936,6 +935,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway"),
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
]
# Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/
+2 -1
View File
@@ -2,6 +2,7 @@
from typing import Any
from agent.portal_tags import nous_portal_tags
from providers import register_provider
from providers.base import ProviderProfile
@@ -12,7 +13,7 @@ class NousProfile(ProviderProfile):
def build_extra_body(
self, *, session_id: str | None = None, **context
) -> dict[str, Any]:
return {"tags": ["product=hermes-agent"]}
return {"tags": nous_portal_tags()}
def build_api_kwargs_extras(
self,
+1 -1
View File
@@ -959,7 +959,7 @@ class LineAdapter(BasePlatformAdapter):
if chat_type == "dm" and self._client:
asyncio.create_task(self._client.loading(chat_id))
source_obj = self.create_source(
source_obj = self.build_source(
chat_id=chat_id,
chat_type=chat_type,
user_id=user_id,
+46 -196
View File
@@ -1454,15 +1454,6 @@ class AIAgent:
# 1h tier costs 2x on write vs 1.25x for 5m, but amortizes across long
# sessions with >5-minute pauses between turns (#14971).
self._cache_ttl = "5m"
# Long-lived prefix caching: when enabled and supported by the
# current provider, splits the system prompt into a stable prefix
# (cached cross-session at 1h TTL) and a volatile suffix
# (memory/timestamp — never cached), and attaches a 1h cache_control
# marker to the last tool in the schema array. Restricted to
# Claude on Anthropic / OpenRouter / Nous Portal; see
# ``_supports_long_lived_anthropic_cache``.
self._use_long_lived_prefix_cache = False
self._long_lived_cache_ttl = "1h"
try:
from hermes_cli.config import load_config as _load_pc_cfg
@@ -1470,12 +1461,6 @@ class AIAgent:
_ttl = _pc_cfg.get("cache_ttl", "5m")
if _ttl in {"5m", "1h"}:
self._cache_ttl = _ttl
_ll_enabled = _pc_cfg.get("long_lived_prefix", True)
_ll_ttl = _pc_cfg.get("long_lived_ttl", "1h")
if _ll_ttl in ("5m", "1h"):
self._long_lived_cache_ttl = _ll_ttl
if _ll_enabled and self._use_prompt_caching and self._supports_long_lived_anthropic_cache():
self._use_long_lived_prefix_cache = True
except Exception:
pass
@@ -2480,7 +2465,6 @@ class AIAgent:
"client_kwargs": dict(self._client_kwargs),
"use_prompt_caching": self._use_prompt_caching,
"use_native_cache_layout": self._use_native_cache_layout,
"use_long_lived_prefix_cache": self._use_long_lived_prefix_cache,
# Context engine state that _try_activate_fallback() overwrites.
# Use getattr for model/base_url/api_key/provider since plugin
# engines may not have these (they're ContextCompressor-specific).
@@ -2647,6 +2631,11 @@ class AIAgent:
old_model = self.model
old_provider = self.provider
# Clear the per-config context_length override so the new model's
# actual context window is resolved via get_model_context_length()
# instead of inheriting the stale value from the previous model.
self._config_context_length = None
# ── Swap core runtime fields ──
self.model = new_model
self.provider = new_provider
@@ -2711,15 +2700,6 @@ class AIAgent:
model=new_model,
)
)
self._use_long_lived_prefix_cache = bool(
self._use_prompt_caching
and self._supports_long_lived_anthropic_cache(
provider=new_provider,
base_url=self.base_url,
api_mode=api_mode,
model=new_model,
)
)
# ── LM Studio: preload before probing context length ──
self._ensure_lmstudio_runtime_loaded()
@@ -2768,7 +2748,6 @@ class AIAgent:
"client_kwargs": dict(self._client_kwargs),
"use_prompt_caching": self._use_prompt_caching,
"use_native_cache_layout": self._use_native_cache_layout,
"use_long_lived_prefix_cache": self._use_long_lived_prefix_cache,
"compressor_model": getattr(_cc, "model", self.model) if _cc else self.model,
"compressor_base_url": getattr(_cc, "base_url", self.base_url) if _cc else self.base_url,
"compressor_api_key": getattr(_cc, "api_key", "") if _cc else "",
@@ -3579,73 +3558,6 @@ class AIAgent:
return False, False
def _supports_long_lived_anthropic_cache(
self,
*,
provider: Optional[str] = None,
base_url: Optional[str] = None,
api_mode: Optional[str] = None,
model: Optional[str] = None,
) -> bool:
"""Decide whether the long-lived (1h cross-session) cache layout applies.
Narrower than ``_anthropic_prompt_cache_policy`` only enabled
for Claude models on the four endpoints whose cross-session
cache_control behavior we have explicitly validated:
* Native Anthropic API (``api_mode == 'anthropic_messages'`` +
host ``api.anthropic.com``)
* Anthropic OAuth subscription (same transport as native API)
* OpenRouter (``base_url`` contains ``openrouter.ai``)
* Nous Portal (``base_url`` contains ``nousresearch`` proxies
to OpenRouter, so identical wire-format)
All four honour ``cache_control`` on both the tools array and the
first system content block, and bill cross-session cache reads at
the documented 0.1× rate.
Other endpoints covered by the standard ``system_and_3`` policy
(third-party Anthropic gateways, MiniMax, opencode-go Qwen, etc.)
keep that layout they support cache_control but their behavior
with mixed-TTL multi-block system content has not been validated
against this codebase.
"""
eff_provider = (provider if provider is not None else self.provider) or ""
eff_base_url = base_url if base_url is not None else (self.base_url or "")
eff_api_mode = api_mode if api_mode is not None else (self.api_mode or "")
eff_model = (model if model is not None else self.model) or ""
model_lower = eff_model.lower()
is_claude = "claude" in model_lower
is_nous_portal = "nousresearch" in eff_base_url.lower()
# Nous Portal: Claude AND Qwen both get long-lived caching.
# Portal proxies to OpenRouter with identical cache_control
# semantics; any model on Portal that accepts envelope-layout
# markers via _anthropic_prompt_cache_policy also benefits from
# the documented 1h cross-session TTL.
if is_nous_portal and (is_claude or "qwen" in model_lower):
return True
if not is_claude:
return False
# Native Anthropic + Anthropic OAuth subscription
if eff_api_mode == "anthropic_messages":
if eff_provider == "anthropic" or base_url_hostname(eff_base_url) == "api.anthropic.com":
return True
# OpenRouter
if base_url_host_matches(eff_base_url, "openrouter.ai"):
return True
# Nous Portal — front-ends OpenRouter behind the scenes; identical
# wire format and cache_control semantics.
if is_nous_portal:
return True
return False
@staticmethod
def _model_requires_responses_api(model: str) -> bool:
"""Return True for models that require the Responses API path.
@@ -5894,26 +5806,19 @@ class AIAgent:
"""Assemble the system prompt as three ordered parts.
Returns a dict with three keys:
* ``stable`` content that is byte-stable across sessions for a
given user config: identity, tool guidance, skills prompt,
* ``stable`` identity, tool guidance, skills prompt,
environment hints, platform hints, model-family operational
guidance. Eligible for cross-session 1h prompt caching when
placed as a separate Anthropic content block (see
``apply_anthropic_cache_control_long_lived``).
* ``context`` context files (AGENTS.md, .cursorrules, etc.) and
caller-supplied system_message. Stable within a session but may
change between sessions when files are edited or the cwd
differs. Cached within-session via the rolling messages
breakpoint (5m TTL); not promoted to the long-lived tier so
edits don't poison the cross-session cache.
* ``volatile`` content that changes on most turns/sessions:
memory snapshot, user profile, external memory provider block,
timestamp line. Never marked for caching.
guidance.
* ``context`` context files (AGENTS.md, .cursorrules, etc.)
and caller-supplied system_message.
* ``volatile`` memory snapshot, user profile, external
memory provider block, timestamp line.
Joined ``stable\\n\\ncontext\\n\\nvolatile`` produces the same
logical content the old single-string builder produced, with the
guarantee that volatile content is at the end (cache-friendly
ordering for any provider that does prefix caching).
Joined into a single string by ``_build_system_prompt`` and
cached on ``_cached_system_prompt`` for the lifetime of the
AIAgent. Hermes never re-renders parts of this string mid-
session that's the only way to keep upstream prompt caches
warm across turns.
"""
# ── Stable tier ────────────────────────────────────────────────
stable_parts: List[str] = []
@@ -6115,9 +6020,10 @@ class AIAgent:
Layers are ordered cache-friendly: stable identity/guidance first,
then session-stable context files, then per-call volatile content
(memory, USER profile, timestamp). The split is exposed via
``_build_system_prompt_parts`` for the long-lived prompt-caching
path (Claude on Anthropic / OpenRouter / Nous Portal).
(memory, USER profile, timestamp). The whole string is treated as
one cached block Hermes never rebuilds or reinjects parts of it
mid-session, which is the only way to keep upstream prompt caches
warm across turns.
"""
parts = self._build_system_prompt_parts(system_message=system_message)
joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
@@ -8817,6 +8723,11 @@ class AIAgent:
fb_api_mode = "bedrock_converse"
old_model = self.model
# Clear the per-config context_length override so the fallback
# model's actual context window is resolved instead of inheriting
# the stale value from the previous model. See #22387.
self._config_context_length = None
self.model = fb_model
self.provider = fb_provider
self.base_url = fb_base_url
@@ -8879,15 +8790,6 @@ class AIAgent:
model=fb_model,
)
)
self._use_long_lived_prefix_cache = bool(
self._use_prompt_caching
and self._supports_long_lived_anthropic_cache(
provider=fb_provider,
base_url=fb_base_url,
api_mode=fb_api_mode,
model=fb_model,
)
)
# LM Studio: preload before probing the fallback's context length.
self._ensure_lmstudio_runtime_loaded()
@@ -8964,16 +8866,6 @@ class AIAgent:
"use_native_cache_layout",
self.api_mode == "anthropic_messages" and self.provider == "anthropic",
)
# Long-lived prefix flag was added later — restore False on
# snapshots predating the new field, then re-evaluate against
# the restored provider/model in case the user had it enabled.
self._use_long_lived_prefix_cache = rt.get(
"use_long_lived_prefix_cache",
bool(
self._use_prompt_caching
and self._supports_long_lived_anthropic_cache()
),
)
# ── Rebuild client for the primary provider ──
if self.api_mode == "anthropic_messages":
@@ -9551,19 +9443,7 @@ class AIAgent:
def _build_api_kwargs(self, api_messages: list) -> dict:
"""Build the keyword arguments dict for the active API mode."""
# Resolve the tools array exactly once. When the long-lived
# prefix-cache layout is active (Claude on Anthropic / OpenRouter
# / Nous Portal), attach a 1h cache_control marker to the last
# tool — this caches the entire tools array cross-session via
# Anthropic's tools→system→messages prefix order. The function
# returns a deep copy, so self.tools is never mutated.
if self._use_long_lived_prefix_cache and self.tools:
from agent.prompt_caching import mark_tools_for_long_lived_cache
tools_for_api = mark_tools_for_long_lived_cache(
self.tools, long_lived_ttl=self._long_lived_cache_ttl,
)
else:
tools_for_api = self.tools
tools_for_api = self.tools
if self.api_mode == "anthropic_messages":
_transport = self._get_transport()
@@ -11662,7 +11542,8 @@ class AIAgent:
"effort": "medium"
}
if _is_nous:
summary_extra_body["tags"] = ["product=hermes-agent"]
from agent.portal_tags import nous_portal_tags as _portal_tags
summary_extra_body["tags"] = _portal_tags()
if self.api_mode == "codex_responses":
codex_kwargs = self._build_api_kwargs(api_messages)
@@ -12423,36 +12304,21 @@ class AIAgent:
# External recall context is injected into the user message, not the system
# prompt, so the stable cache prefix remains unchanged.
#
# When the long-lived prefix-cache layout is active (Claude on
# Anthropic / OpenRouter / Nous Portal), we build the system
# message as a *list of content blocks*: [stable, context,
# volatile, ephemeral?]. Block 0 (stable) gets the 1h
# cache_control marker further down via
# apply_anthropic_cache_control_long_lived; blocks 1-3 are
# cached only via the rolling messages window at 5m.
# NOTE: Plugin context from pre_llm_call hooks is injected into the
# user message (see injection block above), NOT the system prompt.
# This is intentional — system prompt modifications break the prompt
# cache prefix. The system prompt is reserved for Hermes internals.
if self._use_long_lived_prefix_cache:
_sys_parts = self._build_system_prompt_parts(system_message=system_message)
_sys_blocks: list = []
if _sys_parts.get("stable"):
_sys_blocks.append({"type": "text", "text": _sys_parts["stable"]})
if _sys_parts.get("context"):
_sys_blocks.append({"type": "text", "text": _sys_parts["context"]})
if _sys_parts.get("volatile"):
_sys_blocks.append({"type": "text", "text": _sys_parts["volatile"]})
if self.ephemeral_system_prompt:
_sys_blocks.append({"type": "text", "text": self.ephemeral_system_prompt})
if _sys_blocks:
api_messages = [{"role": "system", "content": _sys_blocks}] + api_messages
else:
effective_system = active_system_prompt or ""
if self.ephemeral_system_prompt:
effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
if effective_system:
api_messages = [{"role": "system", "content": effective_system}] + api_messages
#
# Hermes invariant: the system prompt is built ONCE per session
# (cached on ``_cached_system_prompt``) and replayed verbatim on
# every turn. We send it as a single content string so the
# bytes are byte-stable across turns and upstream prompt caches
# stay warm.
effective_system = active_system_prompt or ""
if self.ephemeral_system_prompt:
effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
if effective_system:
api_messages = [{"role": "system", "content": effective_system}] + api_messages
# Inject ephemeral prefill messages right after the system prompt
# but before conversation history. Same API-call-time-only pattern.
@@ -12466,29 +12332,13 @@ class AIAgent:
# gateways. Auto-detected: if ``_use_prompt_caching`` is set,
# inject cache_control breakpoints (system + last 3 messages)
# to reduce input token costs by ~75% on multi-turn
# conversations. Layout is chosen per endpoint by
# ``_anthropic_prompt_cache_policy``.
#
# Long-lived prefix layout (prefix_and_2): stable system block
# gets 1h marker + last 2 messages get 5m markers. Tools
# array's last entry is marked separately at API-call kwargs
# build time (see ``_build_api_kwargs`` and
# ``mark_tools_for_long_lived_cache``).
# conversations.
if self._use_prompt_caching:
if self._use_long_lived_prefix_cache:
from agent.prompt_caching import apply_anthropic_cache_control_long_lived
api_messages = apply_anthropic_cache_control_long_lived(
api_messages,
long_lived_ttl=self._long_lived_cache_ttl,
rolling_ttl=self._cache_ttl,
native_anthropic=self._use_native_cache_layout,
)
else:
api_messages = apply_anthropic_cache_control(
api_messages,
cache_ttl=self._cache_ttl,
native_anthropic=self._use_native_cache_layout,
)
api_messages = apply_anthropic_cache_control(
api_messages,
cache_ttl=self._cache_ttl,
native_anthropic=self._use_native_cache_layout,
)
# Safety net: strip orphaned tool results / add stubs for missing
# results before sending to the API. Runs unconditionally — not
@@ -14442,7 +14292,7 @@ class AIAgent:
_ra_raw = _resp_headers.get("retry-after") or _resp_headers.get("Retry-After")
if _ra_raw:
try:
_retry_after = min(int(_ra_raw), 120) # Cap at 2 minutes
_retry_after = min(float(_ra_raw), 120) # Cap at 2 minutes
except (TypeError, ValueError):
pass
wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0)
+1 -1
View File
@@ -890,7 +890,7 @@ clone_repo() {
stash_name="hermes-install-autostash-$(date -u +%Y%m%d-%H%M%S)"
log_info "Local changes detected, stashing before update..."
git stash push --include-untracked -m "$stash_name"
autostash_ref="$(git rev-parse --verify refs/stash)"
autostash_ref="stash@{0}"
fi
git fetch origin
+61
View File
@@ -0,0 +1,61 @@
"""Tests for agent.portal_tags — Nous Portal request tag contract."""
from __future__ import annotations
def test_hermes_client_tag_includes_current_version():
"""The client tag must reflect hermes_cli.__version__ verbatim."""
from hermes_cli import __version__
from agent.portal_tags import hermes_client_tag
assert hermes_client_tag() == f"client=hermes-client-v{__version__}"
def test_hermes_client_tag_format():
"""The client tag has the exact shape Nous Portal expects."""
from agent.portal_tags import hermes_client_tag
tag = hermes_client_tag()
assert tag.startswith("client=hermes-client-v")
# No spaces, no commas — single tag value
assert " " not in tag
assert "," not in tag
def test_nous_portal_tags_contains_product_and_client():
"""Every Nous Portal request gets BOTH the product tag and the version tag."""
from agent.portal_tags import hermes_client_tag, nous_portal_tags
tags = nous_portal_tags()
assert "product=hermes-agent" in tags
assert hermes_client_tag() in tags
assert len(tags) == 2
def test_nous_portal_tags_returns_fresh_list():
"""Callers mutate the returned list; we must not share state across calls."""
from agent.portal_tags import nous_portal_tags
a = nous_portal_tags()
a.append("client=test-mutation")
b = nous_portal_tags()
assert "client=test-mutation" not in b
def test_auxiliary_client_nous_extra_body_uses_helper():
"""auxiliary_client.NOUS_EXTRA_BODY must match the canonical helper output."""
from agent.auxiliary_client import NOUS_EXTRA_BODY
from agent.portal_tags import nous_portal_tags
assert NOUS_EXTRA_BODY == {"tags": nous_portal_tags()}
def test_nous_provider_profile_uses_helper():
"""The Nous provider profile (main agent loop) must use the canonical tags."""
from agent.portal_tags import nous_portal_tags
from providers import get_provider_profile
profile = get_provider_profile("nous")
assert profile is not None
body = profile.build_extra_body()
assert body["tags"] == nous_portal_tags()
-131
View File
@@ -6,8 +6,6 @@ import pytest
from agent.prompt_caching import (
_apply_cache_marker,
apply_anthropic_cache_control,
apply_anthropic_cache_control_long_lived,
mark_tools_for_long_lived_cache,
)
@@ -143,132 +141,3 @@ class TestApplyAnthropicCacheControl:
elif "cache_control" in msg:
count += 1
assert count <= 4
class TestMarkToolsForLongLivedCache:
def test_returns_unchanged_for_empty_tools(self):
assert mark_tools_for_long_lived_cache(None) is None
assert mark_tools_for_long_lived_cache([]) == []
def test_marks_only_last_tool(self):
tools = [
{"type": "function", "function": {"name": "a"}},
{"type": "function", "function": {"name": "b"}},
{"type": "function", "function": {"name": "c"}},
]
out = mark_tools_for_long_lived_cache(tools)
assert "cache_control" not in out[0]
assert "cache_control" not in out[1]
assert out[2]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
def test_does_not_mutate_input(self):
tools = [{"type": "function", "function": {"name": "a"}}]
mark_tools_for_long_lived_cache(tools)
assert "cache_control" not in tools[0]
def test_5m_ttl_drops_ttl_field(self):
tools = [{"type": "function", "function": {"name": "a"}}]
out = mark_tools_for_long_lived_cache(tools, long_lived_ttl="5m")
assert out[0]["cache_control"] == {"type": "ephemeral"}
class TestApplyAnthropicCacheControlLongLived:
def test_empty_messages(self):
assert apply_anthropic_cache_control_long_lived([]) == []
def test_marks_first_block_of_split_system(self):
msgs = [
{"role": "system", "content": [
{"type": "text", "text": "STABLE"},
{"type": "text", "text": "CONTEXT"},
{"type": "text", "text": "VOLATILE"},
]},
{"role": "user", "content": "msg1"},
{"role": "assistant", "content": "msg2"},
]
out = apply_anthropic_cache_control_long_lived(msgs)
sys_blocks = out[0]["content"]
assert sys_blocks[0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
assert "cache_control" not in sys_blocks[1]
assert "cache_control" not in sys_blocks[2]
def test_rolling_marker_on_last_2_messages(self):
msgs = [
{"role": "system", "content": [{"type": "text", "text": "S"}]},
{"role": "user", "content": "u1"},
{"role": "assistant", "content": "a1"},
{"role": "user", "content": "u2"},
{"role": "assistant", "content": "a2"},
]
out = apply_anthropic_cache_control_long_lived(msgs)
def has_marker(m):
c = m.get("content")
if isinstance(c, list) and c and isinstance(c[-1], dict):
return "cache_control" in c[-1]
return "cache_control" in m
# u1 and a1 (older messages) should NOT be marked
assert not has_marker(out[1])
assert not has_marker(out[2])
# u2 and a2 (last 2) SHOULD be marked
assert has_marker(out[3])
assert has_marker(out[4])
def test_rolling_marker_uses_5m_ttl(self):
msgs = [
{"role": "system", "content": [{"type": "text", "text": "S"}]},
{"role": "user", "content": "u1"},
{"role": "assistant", "content": "a1"},
]
out = apply_anthropic_cache_control_long_lived(
msgs, long_lived_ttl="1h", rolling_ttl="5m",
)
# Last user message: cache_control on the wrapped text part should be 5m
last = out[-1]
c = last["content"]
assert isinstance(c, list)
assert c[-1]["cache_control"] == {"type": "ephemeral"} # 5m has no ttl key
def test_string_system_falls_back_to_envelope_marker(self):
"""When the caller didn't split the system message, we still place a marker."""
msgs = [
{"role": "system", "content": "Single string system"},
{"role": "user", "content": "u1"},
]
out = apply_anthropic_cache_control_long_lived(msgs)
sys_content = out[0]["content"]
# Wrapped into a list and the (now sole) block gets the 1h marker
assert isinstance(sys_content, list)
assert sys_content[0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
def test_does_not_mutate_input(self):
msgs = [
{"role": "system", "content": [{"type": "text", "text": "S"}]},
{"role": "user", "content": "u1"},
]
before = copy.deepcopy(msgs)
apply_anthropic_cache_control_long_lived(msgs)
assert msgs == before
def test_max_4_breakpoints_with_split_system(self):
msgs = [
{"role": "system", "content": [{"type": "text", "text": "S"}, {"type": "text", "text": "V"}]},
] + [
{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg{i}"}
for i in range(10)
]
out = apply_anthropic_cache_control_long_lived(msgs)
count = 0
for m in out:
c = m.get("content")
if isinstance(c, list):
for item in c:
if isinstance(item, dict) and "cache_control" in item:
count += 1
elif "cache_control" in m:
count += 1
# 1 system block + last 2 messages = 3 breakpoints from this function.
# tools[-1] is marked separately (not via this function), so a 4th
# breakpoint can be added at API-call time.
assert count == 3
-112
View File
@@ -1,112 +0,0 @@
"""Live E2E: long-lived prefix caching on Claude via OpenRouter.
Run only when LIVE_OR_KEY env var is set. Skipped under the normal hermetic
test suite (which unsets credentials).
"""
import os, sys, tempfile, time, shutil, pytest
# Probe for the key BEFORE conftest unsets it
_LIVE_KEY = os.environ.get("OPENROUTER_API_KEY") or os.environ.get("LIVE_OR_KEY")
if not _LIVE_KEY:
# Try to read directly from .env
env_path = os.path.expanduser("~/.hermes/.env")
if os.path.exists(env_path):
with open(env_path) as f:
for line in f:
if line.startswith("OPENROUTER_API_KEY="):
_LIVE_KEY = line.strip().split("=", 1)[1].strip().strip('"').strip("'")
break
pytestmark = pytest.mark.skipif(
not _LIVE_KEY,
reason="set OPENROUTER_API_KEY (or LIVE_OR_KEY) to run live cache test",
)
def test_long_lived_prefix_cache_e2e_openrouter(tmp_path, monkeypatch):
"""Two AIAgent runs in fresh sessions: call 1 writes cache, call 2 reads it."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
# The hermetic conftest unsets OPENROUTER_API_KEY — restore for this test
monkeypatch.setenv("OPENROUTER_API_KEY", _LIVE_KEY)
# Minimal config — but with enough toolset/guidance to exceed Anthropic's
# ~1024-token minimum-cacheable-prefix threshold. Anthropic silently
# ignores cache_control markers on small blocks.
import yaml
cfg_path = tmp_path / "config.yaml"
cfg_path.write_text(yaml.safe_dump({
"model": {"provider": "openrouter", "default": "anthropic/claude-haiku-4.5"},
"prompt_caching": {"long_lived_prefix": True, "long_lived_ttl": "1h", "cache_ttl": "5m"},
"agent": {"tool_use_enforcement": True}, # adds substantial guidance text
"memory": {"provider": ""},
"compression": {"enabled": False},
}))
from run_agent import AIAgent
def make_agent():
return AIAgent(
api_key=_LIVE_KEY,
base_url="https://openrouter.ai/api/v1",
provider="openrouter",
model="anthropic/claude-haiku-4.5",
api_mode="chat_completions",
# Use the default toolset roster — the tools array (~13k tokens
# for ~35 tools) is what carries the bulk of the cross-session
# cache value. With a tiny toolset the cached prefix can fall
# below Anthropic Haiku's 2048-token minimum cacheable size and
# the marker is silently ignored.
enabled_toolsets=None,
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
save_trajectories=False,
)
a1 = make_agent()
assert a1._use_prompt_caching is True, "policy should enable caching for Claude on OR"
assert a1._use_long_lived_prefix_cache is True, "long-lived path should activate"
parts = a1._build_system_prompt_parts()
print(f"\nstable={len(parts['stable']):,} ctx={len(parts['context']):,} volatile={len(parts['volatile']):,} chars")
print(f"tool count: {len(a1.tools or [])}")
# Use distinct user messages each call so OpenRouter's response cache
# doesn't short-circuit the upstream Anthropic call (we need real
# Anthropic billing visibility to verify cache_creation/cache_read).
USER_1 = "Reply with the single word ALPHA."
USER_2 = "Reply with the single word BRAVO."
print("\n--- Call 1 (cold) ---")
r1 = a1.run_conversation(USER_1, conversation_history=[])
print(f"final_response[:80]: {(r1.get('final_response') or '')[:80]!r}")
cr1 = a1.session_cache_read_tokens
cw1 = a1.session_cache_write_tokens
print(f"call1: cache_read={cr1} cache_write={cw1}")
# Wait so cache settles, then fresh agent (NEW SESSION) for cross-session read
time.sleep(2)
a2 = make_agent()
assert a2.session_id != a1.session_id, "second agent must have a new session"
print("\n--- Call 2 (warm, NEW session, different user msg) ---")
r2 = a2.run_conversation(USER_2, conversation_history=[])
print(f"final_response[:80]: {(r2.get('final_response') or '')[:80]!r}")
cr2 = a2.session_cache_read_tokens
cw2 = a2.session_cache_write_tokens
print(f"call2: cache_read={cr2} cache_write={cw2}")
print(f"\n=== VERDICT ===")
print(f" call1 wrote {cw1:,} cache tokens, read {cr1:,}")
print(f" call2 wrote {cw2:,} cache tokens, read {cr2:,}")
if cw1:
print(f" cross-session read fraction: cr2/cw1 = {cr2/cw1:.2%}")
# Assertions
assert cw1 > 0, f"call 1 must write cache (got {cw1}); long-lived layout not reaching wire"
assert cr2 > 0, (
f"call 2 must read cache cross-session (got {cr2}); "
f"stable prefix is not byte-stable across sessions"
)
assert cr2 >= 1000, f"cache_read on call 2 ({cr2}) too small to indicate real reuse"
@@ -147,11 +147,12 @@ class TestChatCompletionsBuildKwargs:
]
def test_nous_tags(self, transport):
from agent.portal_tags import nous_portal_tags
from providers import get_provider_profile
profile = get_provider_profile("nous")
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, provider_profile=profile)
assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
assert kw["extra_body"]["tags"] == nous_portal_tags()
def test_reasoning_default(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
+2 -1
View File
@@ -7,6 +7,7 @@ from unittest.mock import AsyncMock, MagicMock
import pytest
import gateway.run as gateway_run
from agent.i18n import t
from gateway.platforms.base import MessageEvent, MessageType
from gateway.restart import DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
from gateway.session import SessionEntry, build_session_key
@@ -32,7 +33,7 @@ async def test_restart_command_while_busy_requests_drain_without_interrupt(monke
result = await runner._handle_message(event)
assert result == "⏳ Draining 1 active agent(s) before restart..."
assert result == t("gateway.draining", count=1)
running_agent.interrupt.assert_not_called()
runner.request_restart.assert_called_once_with(detached=True, via_service=False)
+2 -1
View File
@@ -273,12 +273,13 @@ class TestRequestOverridesParity:
def test_extra_body_override_merges_with_provider_body(self, transport):
"""Override extra_body merges WITH provider extra_body, not replaces."""
from agent.portal_tags import nous_portal_tags
kw = transport.build_kwargs(
model="hermes-3", messages=_msgs(), tools=None,
provider_profile=get_provider_profile("nous"),
request_overrides={"extra_body": {"custom": True}},
)
assert kw["extra_body"]["tags"] == ["product=hermes-agent"] # from profile
assert kw["extra_body"]["tags"] == nous_portal_tags() # from profile
assert kw["extra_body"]["custom"] is True # from override
def test_top_level_override(self, transport):
+2 -1
View File
@@ -210,9 +210,10 @@ class TestOpenRouterProfile:
class TestNousProfile:
def test_tags(self):
from agent.portal_tags import nous_portal_tags
p = get_provider_profile("nous")
body = p.build_extra_body()
assert body["tags"] == ["product=hermes-agent"]
assert body["tags"] == nous_portal_tags()
def test_auth_type(self):
p = get_provider_profile("nous")
+2 -1
View File
@@ -165,13 +165,14 @@ class TestNousParity:
"""Nous: product tags, reasoning, omit when disabled."""
def test_tags(self, transport):
from agent.portal_tags import nous_portal_tags
kw = transport.build_kwargs(
model="hermes-3-llama-3.1-405b",
messages=_simple_messages(),
tools=None,
provider_profile=get_provider_profile("nous"),
)
assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
assert kw["extra_body"]["tags"] == nous_portal_tags()
def test_reasoning_omitted_when_disabled(self, transport):
"""Nous special case: reasoning omitted entirely when disabled."""
@@ -330,127 +330,3 @@ class TestExplicitOverrides:
# Long-lived prefix cache policy (cross-session 1h tier)
# ─────────────────────────────────────────────────────────────────────
class TestSupportsLongLivedAnthropicCache:
"""Narrower than _anthropic_prompt_cache_policy — only Claude on the 4
explicitly-validated endpoints get the long-lived layout."""
def test_native_anthropic_claude_supported(self):
agent = _make_agent(
provider="anthropic",
base_url="https://api.anthropic.com",
api_mode="anthropic_messages",
model="claude-sonnet-4.6",
)
assert agent._supports_long_lived_anthropic_cache() is True
def test_anthropic_oauth_supported(self):
# OAuth uses the same transport as native Anthropic
agent = _make_agent(
provider="anthropic",
base_url="https://api.anthropic.com",
api_mode="anthropic_messages",
model="claude-opus-4.6",
)
assert agent._supports_long_lived_anthropic_cache() is True
def test_openrouter_claude_supported(self):
agent = _make_agent(
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
api_mode="chat_completions",
model="anthropic/claude-sonnet-4.6",
)
assert agent._supports_long_lived_anthropic_cache() is True
def test_nous_portal_claude_supported(self):
# Nous Portal proxies to OpenRouter — same wire format
agent = _make_agent(
provider="nous",
base_url="https://inference-api.nousresearch.com/v1",
api_mode="chat_completions",
model="anthropic/claude-opus-4.7",
)
assert agent._supports_long_lived_anthropic_cache() is True
def test_nous_portal_qwen_supported(self):
# Portal Qwen rides the same OpenRouter-equivalent transport as
# Portal Claude; long-lived (1h cross-session) cache_control
# markers apply identically.
agent = _make_agent(
provider="nous",
base_url="https://inference-api.nousresearch.com/v1",
api_mode="chat_completions",
model="qwen3.6-plus",
)
assert agent._supports_long_lived_anthropic_cache() is True
def test_nous_portal_qwen_vendored_slug_supported(self):
agent = _make_agent(
provider="nous",
base_url="https://inference-api.nousresearch.com/v1",
api_mode="chat_completions",
model="qwen/qwen3.6-plus",
)
assert agent._supports_long_lived_anthropic_cache() is True
def test_nous_portal_non_claude_non_qwen_rejected(self):
# Portal long-lived cache scope mirrors policy: Claude or Qwen only.
agent = _make_agent(
provider="nous",
base_url="https://inference-api.nousresearch.com/v1",
api_mode="chat_completions",
model="openai/gpt-5.4",
)
assert agent._supports_long_lived_anthropic_cache() is False
def test_openrouter_non_claude_rejected(self):
agent = _make_agent(
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
api_mode="chat_completions",
model="openai/gpt-5.4",
)
assert agent._supports_long_lived_anthropic_cache() is False
def test_third_party_anthropic_gateway_rejected(self):
# MiniMax / Kimi / etc. — anthropic-wire but not in our validated list
agent = _make_agent(
provider="minimax",
base_url="https://api.minimax.io/anthropic",
api_mode="anthropic_messages",
model="minimax-m2.7",
)
assert agent._supports_long_lived_anthropic_cache() is False
def test_alibaba_dashscope_rejected(self):
agent = _make_agent(
provider="alibaba",
base_url="https://dashscope.aliyuncs.com/api/v1/anthropic",
api_mode="anthropic_messages",
model="qwen3.5-plus",
)
assert agent._supports_long_lived_anthropic_cache() is False
def test_opencode_qwen_rejected(self):
agent = _make_agent(
provider="opencode-go",
base_url="https://api.opencode-go.example/v1",
api_mode="chat_completions",
model="qwen3.6-plus",
)
assert agent._supports_long_lived_anthropic_cache() is False
def test_fallback_target_evaluated_independently(self):
# Starting on a non-supported provider, falling back to OpenRouter Claude
agent = _make_agent(
provider="minimax",
base_url="https://api.minimax.io/anthropic",
api_mode="anthropic_messages",
model="minimax-m2.7",
)
assert agent._supports_long_lived_anthropic_cache(
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
api_mode="chat_completions",
model="anthropic/claude-sonnet-4.6",
) is True
+2 -1
View File
@@ -343,11 +343,12 @@ class TestBuildApiKwargsAIGateway:
class TestBuildApiKwargsNousPortal:
def test_includes_nous_product_tags(self, monkeypatch):
from agent.portal_tags import nous_portal_tags
agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
extra = kwargs.get("extra_body", {})
assert extra.get("tags") == ["product=hermes-agent"]
assert extra.get("tags") == nous_portal_tags()
def test_uses_chat_completions_format(self, monkeypatch):
agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
-1
View File
@@ -169,7 +169,6 @@ class TestEphemeralMaxOutputTokens:
agent.reasoning_config = None
agent._is_anthropic_oauth = False
agent._ephemeral_max_output_tokens = None
agent._use_long_lived_prefix_cache = False
compressor = MagicMock()
compressor.context_length = 200_000
+3 -1
View File
@@ -314,7 +314,9 @@ DANGEROUS_PATTERNS = [
(r'\bdd\s+.*if=', "disk copy"),
(r'>\s*/dev/sd', "write to block device"),
(r'\bDROP\s+(TABLE|DATABASE)\b', "SQL DROP"),
(r'\bDELETE\s+FROM\b(?!.*\bWHERE\b)', "SQL DELETE without WHERE"),
# Use [^\n]* instead of .* so DOTALL mode does not cause a WHERE clause on the
# *next* line to satisfy the negative lookahead, silently allowing DELETE without WHERE.
(r'\bDELETE\s+FROM\b(?![^\n]*\bWHERE\b)', "SQL DELETE without WHERE"),
(r'\bTRUNCATE\s+(TABLE)?\s*\w', "SQL TRUNCATE"),
(r'>\s*/etc/', "overwrite system config"),
(r'\bsystemctl\s+(-[^\s]+\s+)*(stop|restart|disable|mask)\b', "stop/restart system service"),
+2 -1
View File
@@ -461,7 +461,8 @@ async def _send_via_adapter(
adapter = None
if adapter is not None:
try:
result = await adapter.send(chat_id=chat_id, content=chunk)
metadata = {"thread_id": thread_id} if thread_id else None
result = await adapter.send(chat_id=chat_id, content=chunk, metadata=metadata)
except asyncio.CancelledError:
raise
except Exception as e:
+3 -1
View File
@@ -130,7 +130,9 @@ def detect_audio_environment() -> dict:
try:
devices = sd.query_devices()
if not devices:
if termux_capture:
if os.environ.get('PULSE_SERVER'):
notices.append("No PortAudio devices detected but PULSE_SERVER is set -- continuing")
elif termux_capture:
notices.append("No PortAudio devices detected, but Termux:API microphone capture is available")
else:
warnings.append("No audio input/output devices detected")
+2 -1
View File
@@ -593,7 +593,8 @@ def _resolve_web_extract_auxiliary(model: Optional[str] = None) -> tuple[Optiona
extra_body: Dict[str, Any] = {}
if client is not None and _is_nous_auxiliary_client(client):
from agent.auxiliary_client import get_auxiliary_extra_body
extra_body = get_auxiliary_extra_body() or {"tags": ["product=hermes-agent"]}
from agent.portal_tags import nous_portal_tags
extra_body = get_auxiliary_extra_body() or {"tags": nous_portal_tags()}
return client, effective_model, extra_body
+25
View File
@@ -92,6 +92,13 @@ manager makes sense for that language (rustup, ghcup, opam, brew,
…). Hermes auto-detects the binary on PATH or in
`<HERMES_HOME>/lsp/bin/`.
A few servers are installed alongside a peer dependency that npm
won't auto-pull. The current case is `typescript-language-server`,
which requires the `typescript` SDK importable from the same
`node_modules` tree — Hermes installs both packages together when you
run `hermes lsp install typescript` or auto-install fires on first
use.
## CLI
```
@@ -207,6 +214,24 @@ The binary isn't on PATH and isn't in `<HERMES_HOME>/lsp/bin/`. Run
`hermes lsp install <server_id>` to attempt an auto-install, or
install the binary manually through the language's normal toolchain.
**`Backend warnings` section in `hermes lsp status`**
Some servers ship as thin wrappers around an external CLI for actual
diagnostics — they spawn cleanly and accept requests but never emit
errors when the sidecar binary is missing. The most common case is
`bash-language-server`, which delegates diagnostics to `shellcheck`.
When `hermes lsp status` shows a `Backend warnings` section, install
the named tool through your OS package manager:
```
apt install shellcheck # Debian / Ubuntu
brew install shellcheck # macOS
scoop install shellcheck # Windows
```
The same warning is logged once at server spawn time in
`~/.hermes/logs/agent.log`.
**Server starts but never returns diagnostics**
Check `~/.hermes/logs/agent.log` for `[agent.lsp.client]` entries —