mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-21 03:39:54 +00:00
Merge remote-tracking branch 'origin/bb/gui' into austin/bb/gui
This commit is contained in:
+5
-1
@@ -94,9 +94,13 @@ RUN cd web && npm run build && \
|
||||
# hermes_cli/main.py succeeds (see #18800). /opt/hermes/web is build-time
|
||||
# only (HERMES_WEB_DIST points at hermes_cli/web_dist) and is intentionally
|
||||
# not chowned here.
|
||||
# The .venv MUST be hermes-writable so lazy_deps.py can install platform
|
||||
# packages (discord.py, telegram, slack, etc.) at first gateway boot.
|
||||
# Without this, `uv pip install` fails with EACCES and all messaging
|
||||
# adapters silently fail to load. See tools/lazy_deps.py.
|
||||
USER root
|
||||
RUN chmod -R a+rX /opt/hermes && \
|
||||
chown -R hermes:hermes /opt/hermes/ui-tui /opt/hermes/node_modules
|
||||
chown -R hermes:hermes /opt/hermes/.venv /opt/hermes/ui-tui /opt/hermes/node_modules
|
||||
# Start as root so the entrypoint can usermod/groupmod + gosu.
|
||||
# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).
|
||||
|
||||
|
||||
@@ -1305,9 +1305,8 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
||||
),
|
||||
}
|
||||
# Forward cache_control marker when present on the OpenAI-format
|
||||
# tool dict (set by ``mark_tools_for_long_lived_cache``). Anthropic's
|
||||
# tools array supports cache_control on the last tool to cache the
|
||||
# entire schema cross-session.
|
||||
# tool dict. Anthropic's tools array supports cache_control on the
|
||||
# last tool to cache the entire schema cross-session.
|
||||
cache_control = t.get("cache_control")
|
||||
if isinstance(cache_control, dict):
|
||||
anthropic_tool["cache_control"] = dict(cache_control)
|
||||
|
||||
@@ -382,7 +382,28 @@ _AI_GATEWAY_HEADERS = {
|
||||
# Nous Portal extra_body for product attribution.
|
||||
# Callers should pass this as extra_body in chat.completions.create()
|
||||
# when the auxiliary client is backed by Nous Portal.
|
||||
NOUS_EXTRA_BODY = {"tags": ["product=hermes-agent", "client=aux"]}
|
||||
#
|
||||
# The tags are computed from agent.portal_tags so the client= marker stays
|
||||
# in lockstep with hermes_cli.__version__ across every Portal call site
|
||||
# (main loop, aux, compression, web_extract). Do not inline a literal here;
|
||||
# see agent/portal_tags.py for the rationale.
|
||||
from agent.portal_tags import nous_portal_tags as _nous_portal_tags
|
||||
|
||||
|
||||
def _nous_extra_body() -> dict:
|
||||
"""Return a fresh Nous Portal ``extra_body`` dict.
|
||||
|
||||
Computed at call time so a hot-reloaded ``hermes_cli.__version__`` is
|
||||
reflected without restarting long-running processes.
|
||||
"""
|
||||
return {"tags": _nous_portal_tags()}
|
||||
|
||||
|
||||
# Backwards-compatible module attribute. Some callers (tests, third-party
|
||||
# plugins) read ``NOUS_EXTRA_BODY`` directly; keep it as a snapshot of the
|
||||
# current tags. Callers that need the freshest value should call
|
||||
# ``_nous_extra_body()`` or import ``nous_portal_tags`` directly.
|
||||
NOUS_EXTRA_BODY = _nous_extra_body()
|
||||
|
||||
# Set at resolve time — True if the auxiliary client points to Nous Portal
|
||||
auxiliary_is_nous: bool = False
|
||||
@@ -3437,7 +3458,7 @@ def get_auxiliary_extra_body() -> dict:
|
||||
Includes Nous Portal product tags when the auxiliary client is backed
|
||||
by Nous Portal. Returns empty dict otherwise.
|
||||
"""
|
||||
return dict(NOUS_EXTRA_BODY) if auxiliary_is_nous else {}
|
||||
return _nous_extra_body() if auxiliary_is_nous else {}
|
||||
|
||||
|
||||
def auxiliary_max_tokens_param(value: int) -> dict:
|
||||
@@ -4026,7 +4047,7 @@ def _build_call_kwargs(
|
||||
# Provider-specific extra_body
|
||||
merged_extra = dict(extra_body or {})
|
||||
if provider == "nous" or auxiliary_is_nous:
|
||||
merged_extra.setdefault("tags", []).extend(NOUS_EXTRA_BODY["tags"])
|
||||
merged_extra.setdefault("tags", []).extend(_nous_portal_tags())
|
||||
if merged_extra:
|
||||
kwargs["extra_body"] = merged_extra
|
||||
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
"""Centralized Nous Portal request tags.
|
||||
|
||||
Every Hermes request that hits the Nous Portal — main agent loop, auxiliary
|
||||
client (compression / titles / vision / web_extract / session_search / etc.),
|
||||
and any future code path — must carry the same product-attribution tags so
|
||||
Nous can attribute usage to Hermes Agent and bucket it by client release.
|
||||
|
||||
Tag shape (sent in OpenAI-compatible ``extra_body['tags']``):
|
||||
|
||||
[
|
||||
"product=hermes-agent",
|
||||
"client=hermes-client-v<__version__>",
|
||||
]
|
||||
|
||||
The version is sourced live from ``hermes_cli.__version__`` so it auto-aligns
|
||||
to whatever release is installed; the release script
|
||||
(``scripts/release.py``) regex-bumps that single string, and every Portal
|
||||
request picks up the new tag on the next process start.
|
||||
|
||||
Why one helper instead of inlining the literal at each site:
|
||||
* Four call sites (main loop profile, aux client, run_agent compression
|
||||
fallback, web_tools fallback) used to drift apart — see PR #24194 which
|
||||
only got the aux site, leaving the main loop sending a different tag set.
|
||||
* Tests should assert the same tag list everywhere; centralizing makes that
|
||||
assertion a one-liner against this module.
|
||||
|
||||
Do NOT pre-compute these as module-level constants in the consumers. The
|
||||
version can change at runtime (editable installs, hot-reload tooling), and
|
||||
``hermes_cli.__version__`` is the canonical source of truth.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List
|
||||
|
||||
|
||||
def _hermes_version() -> str:
|
||||
"""Return the current Hermes release version, e.g. ``"0.13.0"``.
|
||||
|
||||
Falls back to ``"unknown"`` if ``hermes_cli`` cannot be imported (should
|
||||
never happen in a real install — guarded for defensive testing).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli import __version__
|
||||
return __version__
|
||||
except Exception:
|
||||
return "unknown"
|
||||
|
||||
|
||||
def hermes_client_tag() -> str:
|
||||
"""Return the ``client=...`` tag for Nous Portal requests.
|
||||
|
||||
Format: ``client=hermes-client-v<MAJOR>.<MINOR>.<PATCH>``.
|
||||
"""
|
||||
return f"client=hermes-client-v{_hermes_version()}"
|
||||
|
||||
|
||||
def nous_portal_tags() -> List[str]:
|
||||
"""Return the canonical list of Nous Portal product tags.
|
||||
|
||||
Always returns a fresh list so callers can mutate it freely
|
||||
(e.g. ``merged_extra.setdefault("tags", []).extend(nous_portal_tags())``).
|
||||
"""
|
||||
return ["product=hermes-agent", hermes_client_tag()]
|
||||
@@ -268,7 +268,7 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||
|
||||
# Model name substrings that trigger tool-use enforcement guidance.
|
||||
# Add new patterns here when a model family needs explicit steering.
|
||||
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok")
|
||||
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm")
|
||||
|
||||
# OpenAI GPT/Codex-specific execution guidance. Addresses known failure modes
|
||||
# where GPT models abandon work on partial results, skip prerequisite lookups,
|
||||
|
||||
+6
-128
@@ -1,25 +1,15 @@
|
||||
"""Anthropic prompt caching strategies.
|
||||
"""Anthropic prompt caching strategy.
|
||||
|
||||
Two layouts:
|
||||
|
||||
* ``system_and_3`` (default, used everywhere except the long-lived path):
|
||||
4 cache_control breakpoints — system prompt + last 3 non-system messages.
|
||||
All at the same TTL (5m or 1h). Reduces input token costs by ~75% on
|
||||
multi-turn conversations within a single session.
|
||||
|
||||
* ``prefix_and_2`` (Claude on Anthropic / OpenRouter / Nous Portal):
|
||||
4 breakpoints split across two TTL tiers — tools[-1] (1h) +
|
||||
stable system prefix (1h) + last 2 non-system messages (5m). The
|
||||
long-lived prefix is byte-stable across sessions for a given user
|
||||
config, so every fresh session reads the cached system+tools instead
|
||||
of re-paying for them. Within-session rolling window shrinks from 3
|
||||
messages to 2 to free the breakpoint budget.
|
||||
Single layout: ``system_and_3``. 4 cache_control breakpoints — system
|
||||
prompt + last 3 non-system messages, all at the same TTL (5m or 1h).
|
||||
Reduces input token costs by ~75% on multi-turn conversations within a
|
||||
single session.
|
||||
|
||||
Pure functions -- no class state, no AIAgent dependency.
|
||||
"""
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
def _apply_cache_marker(msg: dict, cache_marker: dict, native_anthropic: bool = False) -> None:
|
||||
@@ -87,115 +77,3 @@ def apply_anthropic_cache_control(
|
||||
_apply_cache_marker(messages[idx], marker, native_anthropic=native_anthropic)
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
def _mark_system_stable_block(
|
||||
messages: List[Dict[str, Any]],
|
||||
long_lived_marker: Dict[str, str],
|
||||
) -> bool:
|
||||
"""Mark the *first* content block of the system message with the 1h marker.
|
||||
|
||||
The system message is expected to have been split into multiple content
|
||||
blocks beforehand by the caller — block[0] is the cross-session-stable
|
||||
prefix, subsequent blocks carry context files + volatile suffix.
|
||||
Falls back to marking the whole system message as a single block when
|
||||
the message hasn't been split (preserves correctness on the fallback path).
|
||||
|
||||
Returns True when a marker was placed.
|
||||
"""
|
||||
if not messages or messages[0].get("role") != "system":
|
||||
return False
|
||||
|
||||
sys_msg = messages[0]
|
||||
content = sys_msg.get("content")
|
||||
|
||||
# Already a list of blocks → mark the first block.
|
||||
if isinstance(content, list) and content:
|
||||
first = content[0]
|
||||
if isinstance(first, dict):
|
||||
first["cache_control"] = long_lived_marker
|
||||
return True
|
||||
return False
|
||||
|
||||
# String content (no split) → cannot place a stable-prefix breakpoint
|
||||
# without changing the byte content. Caller is responsible for
|
||||
# splitting; if they didn't, fall through to envelope marker so we still
|
||||
# cache *something* for this turn.
|
||||
if isinstance(content, str) and content:
|
||||
sys_msg["content"] = [
|
||||
{"type": "text", "text": content, "cache_control": long_lived_marker}
|
||||
]
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def apply_anthropic_cache_control_long_lived(
|
||||
api_messages: List[Dict[str, Any]],
|
||||
long_lived_ttl: str = "1h",
|
||||
rolling_ttl: str = "5m",
|
||||
native_anthropic: bool = False,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Apply prefix_and_2 caching: long-lived stable prefix + rolling window.
|
||||
|
||||
Layout (4 breakpoints total):
|
||||
* Stable system prefix (block[0]) → ``long_lived_ttl`` TTL
|
||||
* Last 2 non-system messages → ``rolling_ttl`` TTL each
|
||||
|
||||
NOTE: this function does NOT mark the tools array. Tools cache_control
|
||||
is attached separately (see ``mark_tools_for_long_lived_cache``) because
|
||||
tools live outside the messages list in the API payload.
|
||||
|
||||
The caller MUST have split the system message into ordered content
|
||||
blocks where block[0] is the cross-session-stable portion. If the system
|
||||
message is still a single string, it is wrapped into a single block and
|
||||
marked — this is correct, just less effective (the volatile suffix is
|
||||
not isolated, so the prefix invalidates per-session).
|
||||
|
||||
Returns:
|
||||
Deep copy of messages with cache_control breakpoints injected.
|
||||
"""
|
||||
messages = copy.deepcopy(api_messages)
|
||||
if not messages:
|
||||
return messages
|
||||
|
||||
long_marker = _build_marker(long_lived_ttl)
|
||||
rolling_marker = _build_marker(rolling_ttl)
|
||||
|
||||
placed_prefix = _mark_system_stable_block(messages, long_marker)
|
||||
|
||||
# Reserve 1 breakpoint for the system prefix (when placed); spend the
|
||||
# remaining 3 on the rolling tail. Anthropic max is 4 total —
|
||||
# tools[-1] (when marked) consumes the 4th, so we cap rolling at 2 here.
|
||||
rolling_budget = 2 if placed_prefix else 3
|
||||
non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"]
|
||||
for idx in non_sys[-rolling_budget:]:
|
||||
_apply_cache_marker(messages[idx], rolling_marker, native_anthropic=native_anthropic)
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
def mark_tools_for_long_lived_cache(
|
||||
tools: Optional[List[Dict[str, Any]]],
|
||||
long_lived_ttl: str = "1h",
|
||||
) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Attach cache_control to the last tool in the OpenAI-format tools list.
|
||||
|
||||
Anthropic prefix-cache order is ``tools → system → messages``. Marking
|
||||
the last tool dict caches the entire tools array (Anthropic's docs:
|
||||
"the marker is placed on the last block you want included in the cached
|
||||
prefix"). Marker is preserved across the OpenAI-wire boundary on
|
||||
OpenRouter and Nous Portal (which proxies to OpenRouter); on native
|
||||
Anthropic the marker is forwarded by ``convert_tools_to_anthropic``.
|
||||
|
||||
Returns a deep copy of the tools list with the marker attached, or the
|
||||
input unchanged when tools is empty/None. Pure function — does not
|
||||
mutate the input.
|
||||
"""
|
||||
if not tools:
|
||||
return tools
|
||||
out = copy.deepcopy(tools)
|
||||
last = out[-1]
|
||||
if isinstance(last, dict):
|
||||
last["cache_control"] = _build_marker(long_lived_ttl)
|
||||
return out
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { triggerHaptic } from '@/lib/haptics'
|
||||
import { ArrowUp, AudioLines, Loader2, Mic, MicOff, Square } from '@/lib/icons'
|
||||
import { ArrowUp, AudioLines, Layers3, Loader2, Mic, MicOff, Square } from '@/lib/icons'
|
||||
import { cn } from '@/lib/utils'
|
||||
|
||||
import type { ConversationStatus } from './hooks/use-voice-conversation'
|
||||
@@ -31,6 +31,7 @@ interface ConversationProps {
|
||||
|
||||
export function ComposerControls({
|
||||
busy,
|
||||
busyAction,
|
||||
canSubmit,
|
||||
conversation,
|
||||
disabled,
|
||||
@@ -40,6 +41,7 @@ export function ComposerControls({
|
||||
onDictate
|
||||
}: {
|
||||
busy: boolean
|
||||
busyAction: 'queue' | 'stop'
|
||||
canSubmit: boolean
|
||||
conversation: ConversationProps
|
||||
disabled: boolean
|
||||
@@ -74,12 +76,21 @@ export function ComposerControls({
|
||||
</Button>
|
||||
) : (
|
||||
<Button
|
||||
aria-label={busy ? 'Stop' : 'Send'}
|
||||
aria-label={busy ? (busyAction === 'queue' ? 'Queue message' : 'Stop') : 'Send'}
|
||||
className={PRIMARY_ICON_BTN}
|
||||
disabled={disabled || !canSubmit}
|
||||
title={busy ? (busyAction === 'queue' ? 'Queue message' : 'Stop') : 'Send'}
|
||||
type="submit"
|
||||
>
|
||||
{busy ? <span className="block size-3 rounded-[0.1875rem] bg-current" /> : <ArrowUp size={18} />}
|
||||
{busy ? (
|
||||
busyAction === 'queue' ? (
|
||||
<Layers3 size={16} />
|
||||
) : (
|
||||
<span className="block size-3 rounded-[0.1875rem] bg-current" />
|
||||
)
|
||||
) : (
|
||||
<ArrowUp size={18} />
|
||||
)}
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
} from 'react'
|
||||
|
||||
import { formatRefValue, hermesDirectiveFormatter } from '@/components/assistant-ui/directive-text'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { useMediaQuery } from '@/hooks/use-media-query'
|
||||
import { useResizeObserver } from '@/hooks/use-resize-observer'
|
||||
import { chatMessageText } from '@/lib/chat-messages'
|
||||
@@ -20,7 +21,19 @@ import { contextPath } from '@/lib/chat-runtime'
|
||||
import { DATA_IMAGE_URL_RE } from '@/lib/embedded-images'
|
||||
import { triggerHaptic } from '@/lib/haptics'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { $composerAttachments, $composerDraft } from '@/store/composer'
|
||||
import {
|
||||
$composerAttachments,
|
||||
$composerDraft,
|
||||
clearComposerAttachments,
|
||||
type ComposerAttachment
|
||||
} from '@/store/composer'
|
||||
import {
|
||||
$queuedPromptsBySession,
|
||||
enqueueQueuedPrompt,
|
||||
removeQueuedPrompt,
|
||||
type QueuedPromptEntry,
|
||||
updateQueuedPrompt
|
||||
} from '@/store/composer-queue'
|
||||
import { $messages } from '@/store/session'
|
||||
import { $threadScrolledUp } from '@/store/thread-scroll'
|
||||
|
||||
@@ -41,6 +54,7 @@ import {
|
||||
renderComposerContents,
|
||||
RICH_INPUT_SLOT
|
||||
} from './rich-editor'
|
||||
import { QueuePanel } from './queue-panel'
|
||||
import { SkinSlashPopover } from './skin-slash-popover'
|
||||
import { detectTrigger, extractClipboardImageBlobs, textBeforeCaret, type TriggerState } from './text-utils'
|
||||
import { ComposerTriggerPopover } from './trigger-popover'
|
||||
@@ -53,6 +67,15 @@ const COMPOSER_STACK_BREAKPOINT_PX = 320
|
||||
const COMPOSER_FADE_BACKGROUND =
|
||||
'linear-gradient(to bottom, transparent, color-mix(in srgb, var(--dt-background) 10%, transparent))'
|
||||
|
||||
interface QueueEditState {
|
||||
attachments: ComposerAttachment[]
|
||||
draft: string
|
||||
entryId: string
|
||||
sessionKey: string
|
||||
}
|
||||
|
||||
const cloneAttachments = (attachments: ComposerAttachment[]) => attachments.map(a => ({ ...a }))
|
||||
|
||||
export function ChatBar({
|
||||
busy,
|
||||
cwd,
|
||||
@@ -60,6 +83,7 @@ export function ChatBar({
|
||||
focusKey,
|
||||
gateway,
|
||||
maxRecordingSeconds = 120,
|
||||
queueSessionKey,
|
||||
sessionId,
|
||||
state,
|
||||
onCancel,
|
||||
@@ -77,12 +101,17 @@ export function ChatBar({
|
||||
const aui = useAui()
|
||||
const draft = useAuiState(s => s.composer.text)
|
||||
const attachments = useStore($composerAttachments)
|
||||
const queuedPromptsBySession = useStore($queuedPromptsBySession)
|
||||
const scrolledUp = useStore($threadScrolledUp)
|
||||
const activeQueueSessionKey = queueSessionKey || sessionId || null
|
||||
const queuedPrompts = activeQueueSessionKey ? (queuedPromptsBySession[activeQueueSessionKey] ?? []) : []
|
||||
|
||||
const composerRef = useRef<HTMLFormElement | null>(null)
|
||||
const composerSurfaceRef = useRef<HTMLDivElement | null>(null)
|
||||
const editorRef = useRef<HTMLDivElement | null>(null)
|
||||
const draftRef = useRef(draft)
|
||||
const previousBusyRef = useRef(busy)
|
||||
const drainingQueueRef = useRef(false)
|
||||
const urlInputRef = useRef<HTMLInputElement | null>(null)
|
||||
|
||||
const [urlOpen, setUrlOpen] = useState(false)
|
||||
@@ -91,6 +120,7 @@ export function ChatBar({
|
||||
const [voiceConversationActive, setVoiceConversationActive] = useState(false)
|
||||
const [tight, setTight] = useState(false)
|
||||
const [dragActive, setDragActive] = useState(false)
|
||||
const [queueEdit, setQueueEdit] = useState<QueueEditState | null>(null)
|
||||
const dragDepthRef = useRef(0)
|
||||
const lastSpokenIdRef = useRef<string | null>(null)
|
||||
|
||||
@@ -102,6 +132,8 @@ export function ChatBar({
|
||||
const stacked = expanded || narrow || tight
|
||||
const hasComposerPayload = draft.trim().length > 0 || attachments.length > 0
|
||||
const canSubmit = busy || hasComposerPayload
|
||||
const editingQueuedPrompt = queueEdit ? queuedPrompts.find(entry => entry.id === queueEdit.entryId) ?? null : null
|
||||
const busyAction = busy && hasComposerPayload ? 'queue' : 'stop'
|
||||
const showHelpHint = draft === '?'
|
||||
|
||||
const placeholder = disabled ? 'Starting Hermes…' : 'Ask anything'
|
||||
@@ -463,6 +495,14 @@ export function ChatBar({
|
||||
}
|
||||
|
||||
const handleEditorKeyDown = (event: KeyboardEvent<HTMLDivElement>) => {
|
||||
if ((event.metaKey || event.ctrlKey) && !event.altKey && !event.shiftKey && event.key.toLowerCase() === 'k') {
|
||||
event.preventDefault()
|
||||
|
||||
if (!busy) void drainNextQueued()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if (trigger && triggerItems.length > 0) {
|
||||
if (event.key === 'ArrowDown') {
|
||||
event.preventDefault()
|
||||
@@ -499,6 +539,13 @@ export function ChatBar({
|
||||
|
||||
if (event.key === 'Enter' && !event.shiftKey) {
|
||||
event.preventDefault()
|
||||
|
||||
if (!busy && !hasComposerPayload && queuedPrompts.length > 0) {
|
||||
void drainNextQueued()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
submitDraft()
|
||||
}
|
||||
}
|
||||
@@ -635,10 +682,147 @@ export function ChatBar({
|
||||
}
|
||||
}
|
||||
|
||||
const submitDraft = () => {
|
||||
if (busy) {
|
||||
const loadIntoComposer = (text: string, attachments: ComposerAttachment[]) => {
|
||||
draftRef.current = text
|
||||
aui.composer().setText(text)
|
||||
$composerAttachments.set(cloneAttachments(attachments))
|
||||
|
||||
const editor = editorRef.current
|
||||
|
||||
if (editor) {
|
||||
renderComposerContents(editor, text)
|
||||
placeCaretEnd(editor)
|
||||
}
|
||||
}
|
||||
|
||||
const beginQueuedEdit = (entry: QueuedPromptEntry) => {
|
||||
if (!activeQueueSessionKey || queueEdit) return
|
||||
|
||||
setQueueEdit({
|
||||
attachments: cloneAttachments($composerAttachments.get()),
|
||||
draft: draftRef.current,
|
||||
entryId: entry.id,
|
||||
sessionKey: activeQueueSessionKey
|
||||
})
|
||||
loadIntoComposer(entry.text, entry.attachments)
|
||||
triggerHaptic('selection')
|
||||
focusInput()
|
||||
}
|
||||
|
||||
const exitQueuedEdit = (action: 'cancel' | 'save'): boolean => {
|
||||
if (!queueEdit) return false
|
||||
|
||||
if (action === 'save') {
|
||||
const text = draftRef.current
|
||||
const next = cloneAttachments($composerAttachments.get())
|
||||
|
||||
if (!text.trim() && next.length === 0) return false
|
||||
|
||||
const saved = updateQueuedPrompt(queueEdit.sessionKey, queueEdit.entryId, { attachments: next, text })
|
||||
triggerHaptic(saved ? 'success' : 'selection')
|
||||
} else {
|
||||
triggerHaptic('cancel')
|
||||
onCancel()
|
||||
}
|
||||
|
||||
loadIntoComposer(queueEdit.draft, queueEdit.attachments)
|
||||
setQueueEdit(null)
|
||||
focusInput()
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
const queueCurrentDraft = useCallback(() => {
|
||||
if (!activeQueueSessionKey || (!draft.trim() && attachments.length === 0)) return false
|
||||
if (!enqueueQueuedPrompt(activeQueueSessionKey, { text: draft, attachments })) return false
|
||||
|
||||
clearDraft()
|
||||
clearComposerAttachments()
|
||||
triggerHaptic('selection')
|
||||
|
||||
return true
|
||||
}, [activeQueueSessionKey, attachments, draft])
|
||||
|
||||
// All queue drain paths share one lock + send-then-remove sequence.
|
||||
// `pickEntry` lets each caller choose head, by-id, or skip-edited.
|
||||
const runDrain = useCallback(
|
||||
async (pickEntry: (entries: QueuedPromptEntry[]) => QueuedPromptEntry | undefined): Promise<boolean> => {
|
||||
if (drainingQueueRef.current || !activeQueueSessionKey) return false
|
||||
|
||||
const entry = pickEntry(queuedPrompts)
|
||||
|
||||
if (!entry) return false
|
||||
|
||||
drainingQueueRef.current = true
|
||||
|
||||
try {
|
||||
const accepted = await Promise.resolve(onSubmit(entry.text, { attachments: entry.attachments, fromQueue: true }))
|
||||
|
||||
if (accepted === false) return false
|
||||
|
||||
removeQueuedPrompt(activeQueueSessionKey, entry.id)
|
||||
|
||||
return true
|
||||
} finally {
|
||||
drainingQueueRef.current = false
|
||||
}
|
||||
},
|
||||
[activeQueueSessionKey, onSubmit, queuedPrompts]
|
||||
)
|
||||
|
||||
const drainNextQueued = useCallback(
|
||||
() =>
|
||||
runDrain(entries => {
|
||||
const skip = queueEdit?.entryId
|
||||
|
||||
return skip ? entries.find(e => e.id !== skip) : entries[0]
|
||||
}),
|
||||
[queueEdit, runDrain]
|
||||
)
|
||||
|
||||
const sendQueuedNow = useCallback(
|
||||
(id: string) => runDrain(entries => entries.find(e => e.id === id && id !== queueEdit?.entryId)),
|
||||
[queueEdit, runDrain]
|
||||
)
|
||||
|
||||
const interruptAndSendNextQueued = useCallback(async () => {
|
||||
if (queuedPrompts.length === 0) return false
|
||||
|
||||
await Promise.resolve(onCancel())
|
||||
|
||||
return drainNextQueued()
|
||||
}, [drainNextQueued, onCancel, queuedPrompts.length])
|
||||
|
||||
// Auto-drain on busy → false (turn settled).
|
||||
useEffect(() => {
|
||||
const wasBusy = previousBusyRef.current
|
||||
previousBusyRef.current = busy
|
||||
|
||||
if (busy || !wasBusy || queuedPrompts.length === 0) return
|
||||
|
||||
void drainNextQueued()
|
||||
}, [busy, drainNextQueued, queuedPrompts.length])
|
||||
|
||||
// Clean up queue edit when its target disappears (session swap or external delete).
|
||||
useEffect(() => {
|
||||
if (!queueEdit) return
|
||||
if (queueEdit.sessionKey === activeQueueSessionKey && editingQueuedPrompt) return
|
||||
|
||||
loadIntoComposer(queueEdit.draft, queueEdit.attachments)
|
||||
setQueueEdit(null)
|
||||
}, [activeQueueSessionKey, editingQueuedPrompt, queueEdit]) // eslint-disable-line react-hooks/exhaustive-deps
|
||||
|
||||
const submitDraft = () => {
|
||||
if (queueEdit) {
|
||||
exitQueuedEdit('save')
|
||||
} else if (busy) {
|
||||
if (hasComposerPayload) queueCurrentDraft()
|
||||
else if (queuedPrompts.length > 0) void interruptAndSendNextQueued()
|
||||
else {
|
||||
triggerHaptic('cancel')
|
||||
void Promise.resolve(onCancel())
|
||||
}
|
||||
} else if (!hasComposerPayload && queuedPrompts.length > 0) {
|
||||
void drainNextQueued()
|
||||
} else if (draft.trim() || attachments.length > 0) {
|
||||
const submitted = draft
|
||||
triggerHaptic('submit')
|
||||
@@ -742,6 +926,7 @@ export function ChatBar({
|
||||
const controls = (
|
||||
<ComposerControls
|
||||
busy={busy}
|
||||
busyAction={busyAction}
|
||||
canSubmit={canSubmit}
|
||||
conversation={{
|
||||
active: voiceConversationActive,
|
||||
@@ -824,6 +1009,22 @@ export function ChatBar({
|
||||
/>
|
||||
)}
|
||||
<SkinSlashPopover draft={draft} onSelect={selectSkinSlashCommand} />
|
||||
{activeQueueSessionKey && queuedPrompts.length > 0 && (
|
||||
<div className="relative z-6 mb-1 px-0.5">
|
||||
<QueuePanel
|
||||
busy={busy}
|
||||
editingId={queueEdit?.entryId ?? null}
|
||||
entries={queuedPrompts}
|
||||
onDelete={id => {
|
||||
if (removeQueuedPrompt(activeQueueSessionKey, id) && queueEdit?.entryId === id) {
|
||||
exitQueuedEdit('cancel')
|
||||
}
|
||||
}}
|
||||
onEdit={beginQueuedEdit}
|
||||
onSendNow={id => void sendQueuedNow(id)}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
<div
|
||||
className="pointer-events-none absolute inset-0 rounded-[inherit]"
|
||||
style={{ background: COMPOSER_FADE_BACKGROUND }}
|
||||
@@ -871,6 +1072,28 @@ export function ChatBar({
|
||||
>
|
||||
<VoiceActivity state={voiceActivityState} />
|
||||
<VoicePlaybackActivity />
|
||||
{queueEdit && editingQueuedPrompt && (
|
||||
<div className="flex items-center justify-between gap-2 rounded-lg border border-[color-mix(in_srgb,var(--dt-composer-ring)_32%,transparent)] bg-accent/18 px-2 py-1">
|
||||
<div className="min-w-0 text-[0.7rem] text-muted-foreground/88">Editing queued turn in composer</div>
|
||||
<div className="flex shrink-0 items-center gap-1">
|
||||
<Button
|
||||
className="h-6 rounded-md px-2 text-[0.68rem]"
|
||||
onClick={() => exitQueuedEdit('cancel')}
|
||||
type="button"
|
||||
variant="ghost"
|
||||
>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button
|
||||
className="h-6 rounded-md px-2 text-[0.68rem]"
|
||||
onClick={() => exitQueuedEdit('save')}
|
||||
type="button"
|
||||
>
|
||||
Save
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{attachments.length > 0 && <AttachmentList attachments={attachments} onRemove={onRemoveAttachment} />}
|
||||
<div
|
||||
className={cn(
|
||||
|
||||
@@ -0,0 +1,123 @@
|
||||
import { useState } from 'react'
|
||||
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { ArrowUp, ChevronDown, Pencil, Trash2 } from '@/lib/icons'
|
||||
import { cn } from '@/lib/utils'
|
||||
import type { QueuedPromptEntry } from '@/store/composer-queue'
|
||||
|
||||
interface QueuePanelProps {
|
||||
busy: boolean
|
||||
editingId: null | string
|
||||
entries: QueuedPromptEntry[]
|
||||
onDelete: (id: string) => void
|
||||
onEdit: (entry: QueuedPromptEntry) => void
|
||||
onSendNow: (id: string) => void
|
||||
}
|
||||
|
||||
const entryPreview = (entry: QueuedPromptEntry) =>
|
||||
entry.text.trim() || (entry.attachments.length > 0 ? 'Attachment-only turn' : 'Empty turn')
|
||||
|
||||
export function QueuePanel({ busy, editingId, entries, onDelete, onEdit, onSendNow }: QueuePanelProps) {
|
||||
const [collapsed, setCollapsed] = useState(false)
|
||||
|
||||
if (entries.length === 0) return null
|
||||
|
||||
return (
|
||||
<div className="rounded-2xl border border-border/65 bg-[color-mix(in_srgb,var(--dt-card)_70%,transparent)] py-0.5 shadow-[0_0_0_1px_color-mix(in_srgb,var(--dt-card)_30%,transparent)_inset]">
|
||||
<button
|
||||
className="flex w-full items-center gap-1.5 px-2.5 py-1 text-left text-[0.72rem] font-medium text-muted-foreground/92 transition-colors hover:text-foreground/90"
|
||||
onClick={() => setCollapsed(open => !open)}
|
||||
type="button"
|
||||
>
|
||||
<ChevronDown className={cn('shrink-0 transition-transform', collapsed && '-rotate-90')} size={14} />
|
||||
<span className="truncate">{entries.length} Queued</span>
|
||||
</button>
|
||||
|
||||
{!collapsed && (
|
||||
<div className="space-y-0.5 px-1.5 pb-0.5">
|
||||
{entries.map(entry => {
|
||||
const isEditing = editingId === entry.id
|
||||
const attachmentsCount = entry.attachments.length
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
'group/queue-row flex items-center gap-1.5 rounded-lg border border-transparent px-1.5 py-1',
|
||||
'transition-colors duration-300 ease-out hover:bg-(--chrome-action-hover) hover:transition-none',
|
||||
isEditing && 'border-[color-mix(in_srgb,var(--dt-composer-ring)_40%,transparent)] bg-accent/25'
|
||||
)}
|
||||
key={entry.id}
|
||||
>
|
||||
<span
|
||||
aria-hidden
|
||||
className="h-3.5 w-3.5 shrink-0 rounded-full border border-foreground/35 bg-transparent"
|
||||
/>
|
||||
<div className="min-w-0 flex-1">
|
||||
<p className="truncate text-[0.73rem] leading-4 text-foreground/92">{entryPreview(entry)}</p>
|
||||
{(attachmentsCount > 0 || isEditing) && (
|
||||
<div className="mt-0.5 flex items-center gap-1.5 text-[0.64rem] text-muted-foreground/75">
|
||||
{attachmentsCount > 0 && (
|
||||
<span>
|
||||
{attachmentsCount} attachment{attachmentsCount === 1 ? '' : 's'}
|
||||
</span>
|
||||
)}
|
||||
{isEditing && (
|
||||
<span className="text-[color-mix(in_srgb,var(--dt-composer-ring)_78%,var(--muted-foreground))]">
|
||||
Editing in composer
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div
|
||||
className={cn(
|
||||
'flex shrink-0 items-center gap-0 transition-opacity',
|
||||
isEditing
|
||||
? 'opacity-100'
|
||||
: 'opacity-0 group-hover/queue-row:opacity-100 group-focus-within/queue-row:opacity-100'
|
||||
)}
|
||||
>
|
||||
<Button
|
||||
aria-label="Edit queued turn"
|
||||
className="h-5 w-5 rounded-md"
|
||||
disabled={Boolean(editingId) && !isEditing}
|
||||
onClick={() => onEdit(entry)}
|
||||
size="icon-xs"
|
||||
title="Edit queued turn"
|
||||
type="button"
|
||||
variant="ghost"
|
||||
>
|
||||
<Pencil size={11} />
|
||||
</Button>
|
||||
<Button
|
||||
aria-label="Send queued turn now"
|
||||
className="h-5 w-5 rounded-md"
|
||||
disabled={busy || isEditing}
|
||||
onClick={() => onSendNow(entry.id)}
|
||||
size="icon-xs"
|
||||
title="Send queued turn now"
|
||||
type="button"
|
||||
variant="ghost"
|
||||
>
|
||||
<ArrowUp size={11} />
|
||||
</Button>
|
||||
<Button
|
||||
aria-label="Delete queued turn"
|
||||
className="h-5 w-5 rounded-md"
|
||||
onClick={() => onDelete(entry.id)}
|
||||
size="icon-xs"
|
||||
title="Delete queued turn"
|
||||
type="button"
|
||||
variant="ghost"
|
||||
>
|
||||
<Trash2 size={11} />
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { HermesGateway } from '@/hermes'
|
||||
import type { ComposerAttachment } from '@/store/composer'
|
||||
|
||||
import type { DroppedFile } from '../hooks/use-composer-actions'
|
||||
|
||||
@@ -33,9 +34,10 @@ export interface ChatBarProps {
|
||||
maxRecordingSeconds?: number
|
||||
state: ChatBarState
|
||||
gateway?: HermesGateway | null
|
||||
queueSessionKey?: string | null
|
||||
sessionId?: string | null
|
||||
cwd?: string | null
|
||||
onCancel: () => void
|
||||
onCancel: () => Promise<void> | void
|
||||
onAddContextRef?: (refText: string, label?: string, detail?: string) => void
|
||||
onAddUrl?: (url: string) => void
|
||||
onAttachImageBlob?: (blob: Blob) => Promise<boolean | void> | boolean | void
|
||||
@@ -45,7 +47,10 @@ export interface ChatBarProps {
|
||||
onPickFolders?: () => void
|
||||
onPickImages?: () => void
|
||||
onRemoveAttachment?: (id: string) => void
|
||||
onSubmit: (value: string) => Promise<void> | void
|
||||
onSubmit: (
|
||||
value: string,
|
||||
options?: { attachments?: ComposerAttachment[]; fromQueue?: boolean }
|
||||
) => Promise<boolean> | boolean
|
||||
onTranscribeAudio?: (audio: Blob) => Promise<string>
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ import { ChevronDown } from '@/lib/icons'
|
||||
import { useIncrementalExternalStoreRuntime } from '@/lib/incremental-external-store-runtime'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { $pinnedSessionIds } from '@/store/layout'
|
||||
import type { ComposerAttachment } from '@/store/composer'
|
||||
import {
|
||||
$activeSessionId,
|
||||
$awaitingResponse,
|
||||
@@ -51,7 +52,7 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
|
||||
gateway: HermesGateway | null
|
||||
onToggleSelectedPin: () => void
|
||||
onDeleteSelectedSession: () => void
|
||||
onCancel: () => void
|
||||
onCancel: () => Promise<void> | void
|
||||
onAddContextRef: (refText: string, label?: string, detail?: string) => void
|
||||
onAddUrl: (url: string) => void
|
||||
onBranchInNewChat: (messageId: string) => void
|
||||
@@ -63,7 +64,10 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
|
||||
onPickFolders: () => void
|
||||
onPickImages: () => void
|
||||
onRemoveAttachment: (id: string) => void
|
||||
onSubmit: (text: string) => Promise<void> | void
|
||||
onSubmit: (
|
||||
text: string,
|
||||
options?: { attachments?: ComposerAttachment[]; fromQueue?: boolean }
|
||||
) => Promise<boolean> | boolean
|
||||
onThreadMessagesChange: (messages: readonly ThreadMessage[]) => void
|
||||
onEdit: (message: AppendMessage) => Promise<void>
|
||||
onReload: (parentId: string | null) => Promise<void>
|
||||
@@ -311,6 +315,7 @@ export function ChatView({
|
||||
onRemoveAttachment={onRemoveAttachment}
|
||||
onSubmit={onSubmit}
|
||||
onTranscribeAudio={onTranscribeAudio}
|
||||
queueSessionKey={selectedSessionId || activeSessionId}
|
||||
sessionId={activeSessionId}
|
||||
state={chatBarState}
|
||||
/>
|
||||
|
||||
@@ -472,7 +472,7 @@ export function DesktopController() {
|
||||
onAttachDroppedItems={composer.attachDroppedItems}
|
||||
onAttachImageBlob={composer.attachImageBlob}
|
||||
onBranchInNewChat={messageId => void branchInNewChat(messageId)}
|
||||
onCancel={() => void cancelRun()}
|
||||
onCancel={cancelRun}
|
||||
onDeleteSelectedSession={() => {
|
||||
if (selectedStoredSessionId) {
|
||||
void removeSession(selectedStoredSessionId)
|
||||
|
||||
@@ -71,6 +71,11 @@ interface PromptActionsOptions {
|
||||
) => ClientSessionState
|
||||
}
|
||||
|
||||
interface SubmitTextOptions {
|
||||
attachments?: ComposerAttachment[]
|
||||
fromQueue?: boolean
|
||||
}
|
||||
|
||||
function renderCommandsCatalog(catalog: CommandsCatalogLike): string {
|
||||
const desktopCatalog = filterDesktopCommandsCatalog(catalog)
|
||||
|
||||
@@ -153,7 +158,12 @@ export function usePromptActions({
|
||||
)
|
||||
|
||||
const syncImageAttachmentsForSubmit = useCallback(
|
||||
async (sessionId: string, attachments: ComposerAttachment[]) => {
|
||||
async (
|
||||
sessionId: string,
|
||||
attachments: ComposerAttachment[],
|
||||
options: { updateComposerAttachments?: boolean } = {}
|
||||
) => {
|
||||
const updateComposerAttachments = options.updateComposerAttachments ?? true
|
||||
const images = attachments.filter(attachment => attachment.kind === 'image' && attachment.path)
|
||||
|
||||
for (const attachment of images) {
|
||||
@@ -173,22 +183,25 @@ export function usePromptActions({
|
||||
|
||||
const attachedPath = result.path || attachment.path
|
||||
|
||||
addComposerAttachment({
|
||||
...attachment,
|
||||
id: attachment.id,
|
||||
label: attachedPath ? pathLabel(attachedPath) : attachment.label,
|
||||
path: attachedPath,
|
||||
attachedSessionId: sessionId
|
||||
})
|
||||
if (updateComposerAttachments) {
|
||||
addComposerAttachment({
|
||||
...attachment,
|
||||
id: attachment.id,
|
||||
label: attachedPath ? pathLabel(attachedPath) : attachment.label,
|
||||
path: attachedPath,
|
||||
attachedSessionId: sessionId
|
||||
})
|
||||
}
|
||||
}
|
||||
},
|
||||
[requestGateway]
|
||||
)
|
||||
|
||||
const submitPromptText = useCallback(
|
||||
async (rawText: string) => {
|
||||
async (rawText: string, options?: SubmitTextOptions) => {
|
||||
const visibleText = rawText.trim()
|
||||
const attachments = $composerAttachments.get()
|
||||
const usingComposerAttachments = !options?.attachments
|
||||
const attachments = options?.attachments ?? $composerAttachments.get()
|
||||
const contextRefs = attachments
|
||||
.map(a => a.refText)
|
||||
.filter(Boolean)
|
||||
@@ -200,7 +213,7 @@ export function usePromptActions({
|
||||
[contextRefs, visibleText].filter(Boolean).join('\n\n') || (hasImage ? 'What do you see in this image?' : '')
|
||||
|
||||
if (!text || busyRef.current) {
|
||||
return
|
||||
return false
|
||||
}
|
||||
|
||||
const optimisticId = `user-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
||||
@@ -232,7 +245,7 @@ export function usePromptActions({
|
||||
awaitingResponse: true,
|
||||
pendingBranchGroup: null,
|
||||
sawAssistantPayload: false,
|
||||
interrupted: false
|
||||
interrupted: state.interrupted
|
||||
}),
|
||||
selectedStoredSessionIdRef.current
|
||||
)
|
||||
@@ -278,7 +291,7 @@ export function usePromptActions({
|
||||
releaseBusy()
|
||||
notifyError(err, 'Session unavailable')
|
||||
|
||||
return
|
||||
return false
|
||||
}
|
||||
|
||||
if (!sessionId) {
|
||||
@@ -286,16 +299,21 @@ export function usePromptActions({
|
||||
releaseBusy()
|
||||
notify({ kind: 'error', title: 'Session unavailable', message: 'Could not create a new session' })
|
||||
|
||||
return
|
||||
return false
|
||||
}
|
||||
|
||||
seedOptimistic(sessionId)
|
||||
}
|
||||
|
||||
try {
|
||||
await syncImageAttachmentsForSubmit(sessionId, attachments)
|
||||
await syncImageAttachmentsForSubmit(sessionId, attachments, {
|
||||
updateComposerAttachments: usingComposerAttachments
|
||||
})
|
||||
await requestGateway('prompt.submit', { session_id: sessionId, text })
|
||||
clearComposerAttachments()
|
||||
|
||||
if (usingComposerAttachments) clearComposerAttachments()
|
||||
|
||||
return true
|
||||
} catch (err) {
|
||||
releaseBusy()
|
||||
updateSessionState(sessionId, state => ({ ...state, busy: false, awaitingResponse: false }))
|
||||
@@ -303,10 +321,11 @@ export function usePromptActions({
|
||||
if (isProviderSetupError(err)) {
|
||||
requestDesktopOnboarding('Add a provider credential before sending your first message.')
|
||||
|
||||
return
|
||||
return false
|
||||
}
|
||||
|
||||
notifyError(err, 'Prompt failed')
|
||||
return false
|
||||
}
|
||||
},
|
||||
[
|
||||
@@ -477,18 +496,18 @@ export function usePromptActions({
|
||||
)
|
||||
|
||||
const submitText = useCallback(
|
||||
async (rawText: string) => {
|
||||
async (rawText: string, options?: SubmitTextOptions) => {
|
||||
const visibleText = rawText.trim()
|
||||
const attachments = $composerAttachments.get()
|
||||
const attachments = options?.attachments ?? $composerAttachments.get()
|
||||
|
||||
if (!attachments.length && SLASH_COMMAND_RE.test(visibleText)) {
|
||||
triggerHaptic('selection')
|
||||
await executeSlashCommand(visibleText)
|
||||
|
||||
return
|
||||
return true
|
||||
}
|
||||
|
||||
await submitPromptText(rawText)
|
||||
return await submitPromptText(rawText, options)
|
||||
},
|
||||
[executeSlashCommand, submitPromptText]
|
||||
)
|
||||
|
||||
@@ -7,6 +7,7 @@ import { type ChatMessage, chatMessageText, toChatMessages } from '@/lib/chat-me
|
||||
import { normalizePersonalityValue } from '@/lib/chat-runtime'
|
||||
import { embeddedImageUrls, textWithoutEmbeddedImages } from '@/lib/embedded-images'
|
||||
import { clearComposerAttachments, clearComposerDraft } from '@/store/composer'
|
||||
import { clearQueuedPrompts } from '@/store/composer-queue'
|
||||
import { $pinnedSessionIds } from '@/store/layout'
|
||||
import { clearNotifications, notify, notifyError } from '@/store/notifications'
|
||||
import { requestDesktopOnboarding } from '@/store/onboarding'
|
||||
@@ -649,6 +650,11 @@ export function useSessionActions({
|
||||
}
|
||||
|
||||
await deleteSession(storedSessionId)
|
||||
clearQueuedPrompts(storedSessionId)
|
||||
|
||||
if (closingRuntimeId) {
|
||||
clearQueuedPrompts(closingRuntimeId)
|
||||
}
|
||||
} catch (err) {
|
||||
if (removed) {
|
||||
setSessions(prev => [removed, ...prev])
|
||||
|
||||
@@ -95,6 +95,10 @@ function messageContentText(content: unknown): string {
|
||||
return Array.isArray(content) ? content.map(partText).join('').trim() : ''
|
||||
}
|
||||
|
||||
const INTERRUPTED_ONLY_RE = /^_?\[interrupted\]_?$/i
|
||||
|
||||
const isInterruptedOnlyMessage = (text: string) => INTERRUPTED_ONLY_RE.test(text.trim())
|
||||
|
||||
function resetStickyState(state: StickyStateFlags) {
|
||||
state.escapedFromLock = false
|
||||
state.isAtBottom = true
|
||||
@@ -368,6 +372,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
|
||||
|
||||
const messageStatus = useAuiState(s => s.message.status?.type)
|
||||
const isPlaceholder = messageStatus === 'running' && content.length === 0
|
||||
const interruptedOnly = useMemo(() => isInterruptedOnlyMessage(messageText), [messageText])
|
||||
|
||||
if (isPlaceholder) {
|
||||
return null
|
||||
@@ -380,7 +385,10 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
|
||||
data-slot="aui_assistant-message-root"
|
||||
>
|
||||
<div
|
||||
className="wrap-anywhere min-w-0 max-w-full overflow-hidden text-pretty text-base leading-(--dt-line-height) text-foreground"
|
||||
className={cn(
|
||||
'wrap-anywhere min-w-0 max-w-full overflow-hidden text-pretty text-base leading-(--dt-line-height) text-foreground',
|
||||
interruptedOnly && 'text-[0.8rem] leading-5 text-muted-foreground/82'
|
||||
)}
|
||||
data-slot="aui_assistant-message-content"
|
||||
>
|
||||
{hoistedTodos.length > 0 && <HoistedTodoPanel todos={hoistedTodos} />}
|
||||
@@ -401,7 +409,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
|
||||
</ErrorPrimitive.Root>
|
||||
</MessagePrimitive.Error>
|
||||
</div>
|
||||
{messageText.trim().length > 0 && (
|
||||
{messageText.trim().length > 0 && !interruptedOnly && (
|
||||
<AssistantFooter messageId={messageId} messageText={messageText} onBranchInNewChat={onBranchInNewChat} />
|
||||
)}
|
||||
</MessagePrimitive.Root>
|
||||
|
||||
@@ -0,0 +1,102 @@
|
||||
import { beforeEach, describe, expect, it } from 'vitest'
|
||||
|
||||
import type { ComposerAttachment } from './composer'
|
||||
import {
|
||||
$queuedPromptsBySession,
|
||||
clearQueuedPrompts,
|
||||
dequeueQueuedPrompt,
|
||||
enqueueQueuedPrompt,
|
||||
getQueuedPrompts,
|
||||
removeQueuedPrompt,
|
||||
updateQueuedPrompt,
|
||||
updateQueuedPromptText
|
||||
} from './composer-queue'
|
||||
|
||||
const SESSION_KEY = 'session-abc'
|
||||
const QUEUE_STORAGE_KEY = 'hermes.desktop.composerQueue.v1'
|
||||
|
||||
function attachment(id: string, kind: ComposerAttachment['kind'] = 'file'): ComposerAttachment {
|
||||
return {
|
||||
id,
|
||||
kind,
|
||||
label: id,
|
||||
refText: `@file:${id}`
|
||||
}
|
||||
}
|
||||
|
||||
describe('composer queue store', () => {
|
||||
beforeEach(() => {
|
||||
window.localStorage.removeItem(QUEUE_STORAGE_KEY)
|
||||
$queuedPromptsBySession.set({})
|
||||
})
|
||||
|
||||
it('queues prompts in FIFO order', () => {
|
||||
enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'first' })
|
||||
enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'second' })
|
||||
|
||||
expect(dequeueQueuedPrompt(SESSION_KEY)?.text).toBe('first')
|
||||
expect(dequeueQueuedPrompt(SESSION_KEY)?.text).toBe('second')
|
||||
expect(dequeueQueuedPrompt(SESSION_KEY)).toBeNull()
|
||||
})
|
||||
|
||||
it('clones attachments when queueing', () => {
|
||||
const source = [attachment('a-1')]
|
||||
const queued = enqueueQueuedPrompt(SESSION_KEY, { attachments: source, text: 'check clones' })
|
||||
|
||||
expect(queued).not.toBeNull()
|
||||
expect(getQueuedPrompts(SESSION_KEY)[0]?.attachments[0]).toEqual(source[0])
|
||||
expect(getQueuedPrompts(SESSION_KEY)[0]?.attachments[0]).not.toBe(source[0])
|
||||
})
|
||||
|
||||
it('updates and removes queued entries by id', () => {
|
||||
const first = enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'draft one' })
|
||||
const second = enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'draft two' })
|
||||
|
||||
expect(first).not.toBeNull()
|
||||
expect(second).not.toBeNull()
|
||||
|
||||
expect(updateQueuedPromptText(SESSION_KEY, first!.id, 'draft one edited')).toBe(true)
|
||||
expect(getQueuedPrompts(SESSION_KEY).map(entry => entry.text)).toEqual(['draft one edited', 'draft two'])
|
||||
|
||||
expect(removeQueuedPrompt(SESSION_KEY, first!.id)).toBe(true)
|
||||
expect(getQueuedPrompts(SESSION_KEY).map(entry => entry.text)).toEqual(['draft two'])
|
||||
})
|
||||
|
||||
it('updates queued text and attachment snapshot', () => {
|
||||
const first = enqueueQueuedPrompt(SESSION_KEY, { attachments: [attachment('f-1')], text: 'draft one' })
|
||||
const editedAttachments = [attachment('f-2'), attachment('f-3', 'image')]
|
||||
|
||||
expect(first).not.toBeNull()
|
||||
expect(
|
||||
updateQueuedPrompt(SESSION_KEY, first!.id, {
|
||||
attachments: editedAttachments,
|
||||
text: 'edited text'
|
||||
})
|
||||
).toBe(true)
|
||||
|
||||
const queue = getQueuedPrompts(SESSION_KEY)
|
||||
expect(queue[0]?.text).toBe('edited text')
|
||||
expect(queue[0]?.attachments).toEqual(editedAttachments)
|
||||
expect(queue[0]?.attachments[0]).not.toBe(editedAttachments[0])
|
||||
})
|
||||
|
||||
it('clears queue state for a session', () => {
|
||||
enqueueQueuedPrompt(SESSION_KEY, { attachments: [attachment('img-1', 'image')], text: 'queued' })
|
||||
|
||||
clearQueuedPrompts(SESSION_KEY)
|
||||
|
||||
expect(getQueuedPrompts(SESSION_KEY)).toEqual([])
|
||||
expect($queuedPromptsBySession.get()[SESSION_KEY]).toBeUndefined()
|
||||
expect(window.localStorage.getItem(QUEUE_STORAGE_KEY)).toBeNull()
|
||||
})
|
||||
|
||||
it('persists queue entries into local storage', () => {
|
||||
enqueueQueuedPrompt(SESSION_KEY, { attachments: [], text: 'persist me' })
|
||||
|
||||
const raw = window.localStorage.getItem(QUEUE_STORAGE_KEY)
|
||||
expect(raw).toBeTruthy()
|
||||
|
||||
const parsed = JSON.parse(String(raw)) as Record<string, { text: string }[]>
|
||||
expect(parsed[SESSION_KEY]?.[0]?.text).toBe('persist me')
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,158 @@
|
||||
import { atom } from 'nanostores'
|
||||
|
||||
import type { ComposerAttachment } from './composer'
|
||||
|
||||
export interface QueuedPromptEntry {
|
||||
id: string
|
||||
text: string
|
||||
attachments: ComposerAttachment[]
|
||||
queuedAt: number
|
||||
}
|
||||
|
||||
type QueueState = Record<string, QueuedPromptEntry[]>
|
||||
|
||||
const STORAGE_KEY = 'hermes.desktop.composerQueue.v1'
|
||||
|
||||
const load = (): QueueState => {
|
||||
if (typeof window === 'undefined') return {}
|
||||
try {
|
||||
const raw = window.localStorage.getItem(STORAGE_KEY)
|
||||
const parsed = raw ? JSON.parse(raw) : null
|
||||
|
||||
return parsed && typeof parsed === 'object' && !Array.isArray(parsed) ? (parsed as QueueState) : {}
|
||||
} catch {
|
||||
return {}
|
||||
}
|
||||
}
|
||||
|
||||
const save = (state: QueueState) => {
|
||||
if (typeof window === 'undefined') return
|
||||
try {
|
||||
if (Object.keys(state).length === 0) window.localStorage.removeItem(STORAGE_KEY)
|
||||
else window.localStorage.setItem(STORAGE_KEY, JSON.stringify(state))
|
||||
} catch {
|
||||
// best-effort: storage may be unavailable, queue still works in-memory
|
||||
}
|
||||
}
|
||||
|
||||
export const $queuedPromptsBySession = atom<QueueState>(load())
|
||||
|
||||
const writeSession = (sid: string, queue: QueuedPromptEntry[]) => {
|
||||
const current = $queuedPromptsBySession.get()
|
||||
const next = { ...current }
|
||||
|
||||
if (queue.length === 0) delete next[sid]
|
||||
else next[sid] = queue
|
||||
|
||||
$queuedPromptsBySession.set(next)
|
||||
save(next)
|
||||
}
|
||||
|
||||
const sidOf = (key: string | null | undefined): null | string => {
|
||||
const trimmed = key?.trim()
|
||||
|
||||
return trimmed ? trimmed : null
|
||||
}
|
||||
|
||||
const queueFor = (sid: string) => $queuedPromptsBySession.get()[sid] ?? []
|
||||
|
||||
const nextId = () => `queued-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
|
||||
|
||||
const cloneAttachments = (attachments: ComposerAttachment[]) => attachments.map(a => ({ ...a }))
|
||||
|
||||
export const getQueuedPrompts = (key: string | null | undefined): QueuedPromptEntry[] => {
|
||||
const sid = sidOf(key)
|
||||
|
||||
return sid ? queueFor(sid) : []
|
||||
}
|
||||
|
||||
export const enqueueQueuedPrompt = (
|
||||
key: string | null | undefined,
|
||||
payload: { text: string; attachments: ComposerAttachment[] }
|
||||
): null | QueuedPromptEntry => {
|
||||
const sid = sidOf(key)
|
||||
|
||||
if (!sid) return null
|
||||
|
||||
const entry: QueuedPromptEntry = {
|
||||
id: nextId(),
|
||||
text: payload.text,
|
||||
attachments: cloneAttachments(payload.attachments),
|
||||
queuedAt: Date.now()
|
||||
}
|
||||
|
||||
writeSession(sid, [...queueFor(sid), entry])
|
||||
|
||||
return entry
|
||||
}
|
||||
|
||||
export const dequeueQueuedPrompt = (key: string | null | undefined): null | QueuedPromptEntry => {
|
||||
const sid = sidOf(key)
|
||||
|
||||
if (!sid) return null
|
||||
|
||||
const [head, ...rest] = queueFor(sid)
|
||||
|
||||
if (!head) return null
|
||||
|
||||
writeSession(sid, rest)
|
||||
|
||||
return head
|
||||
}
|
||||
|
||||
export const removeQueuedPrompt = (key: string | null | undefined, id: string): boolean => {
|
||||
const sid = sidOf(key)
|
||||
|
||||
if (!sid) return false
|
||||
|
||||
const queue = queueFor(sid)
|
||||
const next = queue.filter(e => e.id !== id)
|
||||
|
||||
if (next.length === queue.length) return false
|
||||
|
||||
writeSession(sid, next)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
export const updateQueuedPrompt = (
|
||||
key: string | null | undefined,
|
||||
id: string,
|
||||
update: { text: string; attachments?: ComposerAttachment[] }
|
||||
): boolean => {
|
||||
const sid = sidOf(key)
|
||||
|
||||
if (!sid) return false
|
||||
|
||||
const queue = queueFor(sid)
|
||||
let changed = false
|
||||
|
||||
const next = queue.map(entry => {
|
||||
if (entry.id !== id) return entry
|
||||
|
||||
const attachments = update.attachments ? cloneAttachments(update.attachments) : entry.attachments
|
||||
|
||||
if (entry.text === update.text && !update.attachments) return entry
|
||||
|
||||
changed = true
|
||||
|
||||
return { ...entry, text: update.text, attachments }
|
||||
})
|
||||
|
||||
if (!changed) return false
|
||||
|
||||
writeSession(sid, next)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
export const updateQueuedPromptText = (key: string | null | undefined, id: string, text: string): boolean =>
|
||||
updateQueuedPrompt(key, id, { text })
|
||||
|
||||
export const clearQueuedPrompts = (key: string | null | undefined) => {
|
||||
const sid = sidOf(key)
|
||||
|
||||
if (!sid || !(sid in $queuedPromptsBySession.get())) return
|
||||
|
||||
writeSession(sid, [])
|
||||
}
|
||||
@@ -39,6 +39,10 @@ if [ "$(id -u)" = "0" ]; then
|
||||
# by the mapped user on the host side.
|
||||
chown -R hermes:hermes "$HERMES_HOME" 2>/dev/null || \
|
||||
echo "Warning: chown failed (rootless container?) — continuing anyway"
|
||||
# The .venv must also be re-chowned when UID is remapped, otherwise
|
||||
# lazy_deps.py cannot install platform packages (discord.py, etc.).
|
||||
chown -R hermes:hermes "$INSTALL_DIR/.venv" 2>/dev/null || \
|
||||
echo "Warning: chown .venv failed (rootless container?) — continuing anyway"
|
||||
fi
|
||||
|
||||
# Ensure config.yaml is readable by the hermes runtime user even if it was
|
||||
|
||||
@@ -446,7 +446,9 @@ class SignalAdapter(BasePlatformAdapter):
|
||||
if sent_msg and isinstance(sent_msg, dict):
|
||||
dest = sent_msg.get("destinationNumber") or sent_msg.get("destination")
|
||||
sent_ts = sent_msg.get("timestamp")
|
||||
if dest == self._account_normalized:
|
||||
sent_msg_group_info = sent_msg.get("groupInfo") or {}
|
||||
sent_msg_group_id = sent_msg_group_info.get("groupId") if sent_msg_group_info else None
|
||||
if dest == self._account_normalized or sent_msg_group_id:
|
||||
# Check if this is an echo of our own outbound reply
|
||||
if sent_ts and sent_ts in self._recent_sent_timestamps:
|
||||
self._recent_sent_timestamps.discard(sent_ts)
|
||||
|
||||
@@ -2772,7 +2772,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
{"thread_id": str(thread_id)},
|
||||
)
|
||||
)
|
||||
await self._bot.send_message(**send_kwargs)
|
||||
await self._send_message_with_thread_fallback(**send_kwargs)
|
||||
except Exception as exc:
|
||||
logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True)
|
||||
return
|
||||
|
||||
@@ -345,6 +345,7 @@ class WeComAdapter(BasePlatformAdapter):
|
||||
try:
|
||||
await self._open_connection()
|
||||
backoff_idx = 0
|
||||
self._mark_connected()
|
||||
logger.info("[%s] Reconnected", self.name)
|
||||
except Exception as reconnect_exc:
|
||||
logger.warning("[%s] Reconnect failed: %s", self.name, reconnect_exc)
|
||||
|
||||
@@ -494,12 +494,15 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
# plain executable path.
|
||||
_npm_bin = shutil.which("npm") or "npm"
|
||||
try:
|
||||
# Read timeout from environment variable, default to 300 seconds (5 minutes)
|
||||
# to accommodate slower systems like Unraid NAS
|
||||
npm_install_timeout = int(os.environ.get("WHATSAPP_NPM_INSTALL_TIMEOUT", "300"))
|
||||
install_result = subprocess.run(
|
||||
[_npm_bin, "install", "--silent"],
|
||||
cwd=str(bridge_dir),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
timeout=npm_install_timeout,
|
||||
)
|
||||
if install_result.returncode != 0:
|
||||
print(f"[{self.name}] npm install failed: {install_result.stderr}")
|
||||
|
||||
@@ -7543,6 +7543,7 @@ class GatewayRunner:
|
||||
hook_ctx = {
|
||||
"platform": source.platform.value if source.platform else "",
|
||||
"user_id": source.user_id,
|
||||
"chat_id": source.chat_id or "",
|
||||
"session_id": session_entry.session_id,
|
||||
"message": message_text[:500],
|
||||
}
|
||||
|
||||
+1
-1
@@ -284,7 +284,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
),
|
||||
"alibaba": ProviderConfig(
|
||||
id="alibaba",
|
||||
name="Alibaba Cloud (DashScope)",
|
||||
name="Qwen Cloud",
|
||||
auth_type="api_key",
|
||||
inference_base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
|
||||
api_key_env_vars=("DASHSCOPE_API_KEY",),
|
||||
|
||||
@@ -735,15 +735,8 @@ DEFAULT_CONFIG = {
|
||||
|
||||
# Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
|
||||
# cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
|
||||
# long_lived_prefix: when true (default), Claude on Anthropic / OpenRouter / Nous
|
||||
# Portal uses a split layout: tools[-1] + stable system prefix at long_lived_ttl
|
||||
# (cross-session cache), last 2 messages at cache_ttl (within-session rolling).
|
||||
# Set false to keep the legacy "system + last 3 messages" single-tier layout.
|
||||
# long_lived_ttl: TTL for the cross-session prefix tier ("5m" or "1h"; default "1h").
|
||||
"prompt_caching": {
|
||||
"cache_ttl": "5m",
|
||||
"long_lived_prefix": True,
|
||||
"long_lived_ttl": "1h",
|
||||
},
|
||||
|
||||
# OpenRouter-specific settings.
|
||||
|
||||
+2
-1
@@ -307,7 +307,7 @@ def judge_goal(
|
||||
return "continue", "empty response (nothing to evaluate)", False
|
||||
|
||||
try:
|
||||
from agent.auxiliary_client import get_text_auxiliary_client
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body, get_text_auxiliary_client
|
||||
except Exception as exc:
|
||||
logger.debug("goal judge: auxiliary client import failed: %s", exc)
|
||||
return "continue", "auxiliary client unavailable", False
|
||||
@@ -336,6 +336,7 @@ def judge_goal(
|
||||
temperature=0,
|
||||
max_tokens=200,
|
||||
timeout=timeout,
|
||||
extra_body=get_auxiliary_extra_body() or None,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.info("goal judge: API call failed (%s) — falling through to continue", exc)
|
||||
|
||||
@@ -155,7 +155,7 @@ def specify_task(
|
||||
)
|
||||
|
||||
try:
|
||||
from agent.auxiliary_client import get_text_auxiliary_client
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body, get_text_auxiliary_client
|
||||
except Exception as exc: # pragma: no cover — import smoke test
|
||||
logger.debug("specify: auxiliary client import failed: %s", exc)
|
||||
return SpecifyOutcome(task_id, False, "auxiliary client unavailable")
|
||||
@@ -187,6 +187,7 @@ def specify_task(
|
||||
temperature=0.3,
|
||||
max_tokens=1500,
|
||||
timeout=timeout or 120,
|
||||
extra_body=get_auxiliary_extra_body() or None,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.info(
|
||||
|
||||
@@ -908,10 +908,10 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("lmstudio", "LM Studio", "LM Studio (local desktop app with built-in model server)"),
|
||||
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
|
||||
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
|
||||
ProviderEntry("alibaba", "Qwen Cloud", "Qwen Cloud / DashScope Coding (Qwen + multi-provider)"),
|
||||
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
|
||||
ProviderEntry("tencent-tokenhub", "Tencent TokenHub", "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"),
|
||||
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
|
||||
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
|
||||
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
|
||||
ProviderEntry("copilot-acp", "GitHub Copilot ACP", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
|
||||
ProviderEntry("huggingface", "Hugging Face", "Hugging Face Inference Providers (20+ open models)"),
|
||||
@@ -926,7 +926,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("minimax", "MiniMax", "MiniMax (global direct API)"),
|
||||
ProviderEntry("minimax-oauth", "MiniMax (OAuth)", "MiniMax via OAuth browser login (Coding Plan, minimax.io)"),
|
||||
ProviderEntry("minimax-cn", "MiniMax (China)", "MiniMax China (domestic direct API)"),
|
||||
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
|
||||
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
|
||||
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
|
||||
ProviderEntry("gmi", "GMI Cloud", "GMI Cloud (multi-model direct API)"),
|
||||
@@ -936,6 +935,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
|
||||
ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
|
||||
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway"),
|
||||
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
|
||||
]
|
||||
|
||||
# Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from typing import Any
|
||||
|
||||
from agent.portal_tags import nous_portal_tags
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
@@ -12,7 +13,7 @@ class NousProfile(ProviderProfile):
|
||||
def build_extra_body(
|
||||
self, *, session_id: str | None = None, **context
|
||||
) -> dict[str, Any]:
|
||||
return {"tags": ["product=hermes-agent"]}
|
||||
return {"tags": nous_portal_tags()}
|
||||
|
||||
def build_api_kwargs_extras(
|
||||
self,
|
||||
|
||||
@@ -959,7 +959,7 @@ class LineAdapter(BasePlatformAdapter):
|
||||
if chat_type == "dm" and self._client:
|
||||
asyncio.create_task(self._client.loading(chat_id))
|
||||
|
||||
source_obj = self.create_source(
|
||||
source_obj = self.build_source(
|
||||
chat_id=chat_id,
|
||||
chat_type=chat_type,
|
||||
user_id=user_id,
|
||||
|
||||
+46
-196
@@ -1454,15 +1454,6 @@ class AIAgent:
|
||||
# 1h tier costs 2x on write vs 1.25x for 5m, but amortizes across long
|
||||
# sessions with >5-minute pauses between turns (#14971).
|
||||
self._cache_ttl = "5m"
|
||||
# Long-lived prefix caching: when enabled and supported by the
|
||||
# current provider, splits the system prompt into a stable prefix
|
||||
# (cached cross-session at 1h TTL) and a volatile suffix
|
||||
# (memory/timestamp — never cached), and attaches a 1h cache_control
|
||||
# marker to the last tool in the schema array. Restricted to
|
||||
# Claude on Anthropic / OpenRouter / Nous Portal; see
|
||||
# ``_supports_long_lived_anthropic_cache``.
|
||||
self._use_long_lived_prefix_cache = False
|
||||
self._long_lived_cache_ttl = "1h"
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_pc_cfg
|
||||
|
||||
@@ -1470,12 +1461,6 @@ class AIAgent:
|
||||
_ttl = _pc_cfg.get("cache_ttl", "5m")
|
||||
if _ttl in {"5m", "1h"}:
|
||||
self._cache_ttl = _ttl
|
||||
_ll_enabled = _pc_cfg.get("long_lived_prefix", True)
|
||||
_ll_ttl = _pc_cfg.get("long_lived_ttl", "1h")
|
||||
if _ll_ttl in ("5m", "1h"):
|
||||
self._long_lived_cache_ttl = _ll_ttl
|
||||
if _ll_enabled and self._use_prompt_caching and self._supports_long_lived_anthropic_cache():
|
||||
self._use_long_lived_prefix_cache = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -2480,7 +2465,6 @@ class AIAgent:
|
||||
"client_kwargs": dict(self._client_kwargs),
|
||||
"use_prompt_caching": self._use_prompt_caching,
|
||||
"use_native_cache_layout": self._use_native_cache_layout,
|
||||
"use_long_lived_prefix_cache": self._use_long_lived_prefix_cache,
|
||||
# Context engine state that _try_activate_fallback() overwrites.
|
||||
# Use getattr for model/base_url/api_key/provider since plugin
|
||||
# engines may not have these (they're ContextCompressor-specific).
|
||||
@@ -2647,6 +2631,11 @@ class AIAgent:
|
||||
old_model = self.model
|
||||
old_provider = self.provider
|
||||
|
||||
# Clear the per-config context_length override so the new model's
|
||||
# actual context window is resolved via get_model_context_length()
|
||||
# instead of inheriting the stale value from the previous model.
|
||||
self._config_context_length = None
|
||||
|
||||
# ── Swap core runtime fields ──
|
||||
self.model = new_model
|
||||
self.provider = new_provider
|
||||
@@ -2711,15 +2700,6 @@ class AIAgent:
|
||||
model=new_model,
|
||||
)
|
||||
)
|
||||
self._use_long_lived_prefix_cache = bool(
|
||||
self._use_prompt_caching
|
||||
and self._supports_long_lived_anthropic_cache(
|
||||
provider=new_provider,
|
||||
base_url=self.base_url,
|
||||
api_mode=api_mode,
|
||||
model=new_model,
|
||||
)
|
||||
)
|
||||
|
||||
# ── LM Studio: preload before probing context length ──
|
||||
self._ensure_lmstudio_runtime_loaded()
|
||||
@@ -2768,7 +2748,6 @@ class AIAgent:
|
||||
"client_kwargs": dict(self._client_kwargs),
|
||||
"use_prompt_caching": self._use_prompt_caching,
|
||||
"use_native_cache_layout": self._use_native_cache_layout,
|
||||
"use_long_lived_prefix_cache": self._use_long_lived_prefix_cache,
|
||||
"compressor_model": getattr(_cc, "model", self.model) if _cc else self.model,
|
||||
"compressor_base_url": getattr(_cc, "base_url", self.base_url) if _cc else self.base_url,
|
||||
"compressor_api_key": getattr(_cc, "api_key", "") if _cc else "",
|
||||
@@ -3579,73 +3558,6 @@ class AIAgent:
|
||||
|
||||
return False, False
|
||||
|
||||
def _supports_long_lived_anthropic_cache(
|
||||
self,
|
||||
*,
|
||||
provider: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
api_mode: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Decide whether the long-lived (1h cross-session) cache layout applies.
|
||||
|
||||
Narrower than ``_anthropic_prompt_cache_policy`` — only enabled
|
||||
for Claude models on the four endpoints whose cross-session
|
||||
cache_control behavior we have explicitly validated:
|
||||
|
||||
* Native Anthropic API (``api_mode == 'anthropic_messages'`` +
|
||||
host ``api.anthropic.com``)
|
||||
* Anthropic OAuth subscription (same transport as native API)
|
||||
* OpenRouter (``base_url`` contains ``openrouter.ai``)
|
||||
* Nous Portal (``base_url`` contains ``nousresearch`` — proxies
|
||||
to OpenRouter, so identical wire-format)
|
||||
|
||||
All four honour ``cache_control`` on both the tools array and the
|
||||
first system content block, and bill cross-session cache reads at
|
||||
the documented 0.1× rate.
|
||||
|
||||
Other endpoints covered by the standard ``system_and_3`` policy
|
||||
(third-party Anthropic gateways, MiniMax, opencode-go Qwen, etc.)
|
||||
keep that layout — they support cache_control but their behavior
|
||||
with mixed-TTL multi-block system content has not been validated
|
||||
against this codebase.
|
||||
"""
|
||||
eff_provider = (provider if provider is not None else self.provider) or ""
|
||||
eff_base_url = base_url if base_url is not None else (self.base_url or "")
|
||||
eff_api_mode = api_mode if api_mode is not None else (self.api_mode or "")
|
||||
eff_model = (model if model is not None else self.model) or ""
|
||||
|
||||
model_lower = eff_model.lower()
|
||||
is_claude = "claude" in model_lower
|
||||
is_nous_portal = "nousresearch" in eff_base_url.lower()
|
||||
|
||||
# Nous Portal: Claude AND Qwen both get long-lived caching.
|
||||
# Portal proxies to OpenRouter with identical cache_control
|
||||
# semantics; any model on Portal that accepts envelope-layout
|
||||
# markers via _anthropic_prompt_cache_policy also benefits from
|
||||
# the documented 1h cross-session TTL.
|
||||
if is_nous_portal and (is_claude or "qwen" in model_lower):
|
||||
return True
|
||||
|
||||
if not is_claude:
|
||||
return False
|
||||
|
||||
# Native Anthropic + Anthropic OAuth subscription
|
||||
if eff_api_mode == "anthropic_messages":
|
||||
if eff_provider == "anthropic" or base_url_hostname(eff_base_url) == "api.anthropic.com":
|
||||
return True
|
||||
|
||||
# OpenRouter
|
||||
if base_url_host_matches(eff_base_url, "openrouter.ai"):
|
||||
return True
|
||||
|
||||
# Nous Portal — front-ends OpenRouter behind the scenes; identical
|
||||
# wire format and cache_control semantics.
|
||||
if is_nous_portal:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _model_requires_responses_api(model: str) -> bool:
|
||||
"""Return True for models that require the Responses API path.
|
||||
@@ -5894,26 +5806,19 @@ class AIAgent:
|
||||
"""Assemble the system prompt as three ordered parts.
|
||||
|
||||
Returns a dict with three keys:
|
||||
* ``stable`` — content that is byte-stable across sessions for a
|
||||
given user config: identity, tool guidance, skills prompt,
|
||||
* ``stable`` — identity, tool guidance, skills prompt,
|
||||
environment hints, platform hints, model-family operational
|
||||
guidance. Eligible for cross-session 1h prompt caching when
|
||||
placed as a separate Anthropic content block (see
|
||||
``apply_anthropic_cache_control_long_lived``).
|
||||
* ``context`` — context files (AGENTS.md, .cursorrules, etc.) and
|
||||
caller-supplied system_message. Stable within a session but may
|
||||
change between sessions when files are edited or the cwd
|
||||
differs. Cached within-session via the rolling messages
|
||||
breakpoint (5m TTL); not promoted to the long-lived tier so
|
||||
edits don't poison the cross-session cache.
|
||||
* ``volatile`` — content that changes on most turns/sessions:
|
||||
memory snapshot, user profile, external memory provider block,
|
||||
timestamp line. Never marked for caching.
|
||||
guidance.
|
||||
* ``context`` — context files (AGENTS.md, .cursorrules, etc.)
|
||||
and caller-supplied system_message.
|
||||
* ``volatile`` — memory snapshot, user profile, external
|
||||
memory provider block, timestamp line.
|
||||
|
||||
Joined ``stable\\n\\ncontext\\n\\nvolatile`` produces the same
|
||||
logical content the old single-string builder produced, with the
|
||||
guarantee that volatile content is at the end (cache-friendly
|
||||
ordering for any provider that does prefix caching).
|
||||
Joined into a single string by ``_build_system_prompt`` and
|
||||
cached on ``_cached_system_prompt`` for the lifetime of the
|
||||
AIAgent. Hermes never re-renders parts of this string mid-
|
||||
session — that's the only way to keep upstream prompt caches
|
||||
warm across turns.
|
||||
"""
|
||||
# ── Stable tier ────────────────────────────────────────────────
|
||||
stable_parts: List[str] = []
|
||||
@@ -6115,9 +6020,10 @@ class AIAgent:
|
||||
|
||||
Layers are ordered cache-friendly: stable identity/guidance first,
|
||||
then session-stable context files, then per-call volatile content
|
||||
(memory, USER profile, timestamp). The split is exposed via
|
||||
``_build_system_prompt_parts`` for the long-lived prompt-caching
|
||||
path (Claude on Anthropic / OpenRouter / Nous Portal).
|
||||
(memory, USER profile, timestamp). The whole string is treated as
|
||||
one cached block — Hermes never rebuilds or reinjects parts of it
|
||||
mid-session, which is the only way to keep upstream prompt caches
|
||||
warm across turns.
|
||||
"""
|
||||
parts = self._build_system_prompt_parts(system_message=system_message)
|
||||
joined = "\n\n".join(p for p in (parts["stable"], parts["context"], parts["volatile"]) if p)
|
||||
@@ -8817,6 +8723,11 @@ class AIAgent:
|
||||
fb_api_mode = "bedrock_converse"
|
||||
|
||||
old_model = self.model
|
||||
|
||||
# Clear the per-config context_length override so the fallback
|
||||
# model's actual context window is resolved instead of inheriting
|
||||
# the stale value from the previous model. See #22387.
|
||||
self._config_context_length = None
|
||||
self.model = fb_model
|
||||
self.provider = fb_provider
|
||||
self.base_url = fb_base_url
|
||||
@@ -8879,15 +8790,6 @@ class AIAgent:
|
||||
model=fb_model,
|
||||
)
|
||||
)
|
||||
self._use_long_lived_prefix_cache = bool(
|
||||
self._use_prompt_caching
|
||||
and self._supports_long_lived_anthropic_cache(
|
||||
provider=fb_provider,
|
||||
base_url=fb_base_url,
|
||||
api_mode=fb_api_mode,
|
||||
model=fb_model,
|
||||
)
|
||||
)
|
||||
|
||||
# LM Studio: preload before probing the fallback's context length.
|
||||
self._ensure_lmstudio_runtime_loaded()
|
||||
@@ -8964,16 +8866,6 @@ class AIAgent:
|
||||
"use_native_cache_layout",
|
||||
self.api_mode == "anthropic_messages" and self.provider == "anthropic",
|
||||
)
|
||||
# Long-lived prefix flag was added later — restore False on
|
||||
# snapshots predating the new field, then re-evaluate against
|
||||
# the restored provider/model in case the user had it enabled.
|
||||
self._use_long_lived_prefix_cache = rt.get(
|
||||
"use_long_lived_prefix_cache",
|
||||
bool(
|
||||
self._use_prompt_caching
|
||||
and self._supports_long_lived_anthropic_cache()
|
||||
),
|
||||
)
|
||||
|
||||
# ── Rebuild client for the primary provider ──
|
||||
if self.api_mode == "anthropic_messages":
|
||||
@@ -9551,19 +9443,7 @@ class AIAgent:
|
||||
|
||||
def _build_api_kwargs(self, api_messages: list) -> dict:
|
||||
"""Build the keyword arguments dict for the active API mode."""
|
||||
# Resolve the tools array exactly once. When the long-lived
|
||||
# prefix-cache layout is active (Claude on Anthropic / OpenRouter
|
||||
# / Nous Portal), attach a 1h cache_control marker to the last
|
||||
# tool — this caches the entire tools array cross-session via
|
||||
# Anthropic's tools→system→messages prefix order. The function
|
||||
# returns a deep copy, so self.tools is never mutated.
|
||||
if self._use_long_lived_prefix_cache and self.tools:
|
||||
from agent.prompt_caching import mark_tools_for_long_lived_cache
|
||||
tools_for_api = mark_tools_for_long_lived_cache(
|
||||
self.tools, long_lived_ttl=self._long_lived_cache_ttl,
|
||||
)
|
||||
else:
|
||||
tools_for_api = self.tools
|
||||
tools_for_api = self.tools
|
||||
|
||||
if self.api_mode == "anthropic_messages":
|
||||
_transport = self._get_transport()
|
||||
@@ -11662,7 +11542,8 @@ class AIAgent:
|
||||
"effort": "medium"
|
||||
}
|
||||
if _is_nous:
|
||||
summary_extra_body["tags"] = ["product=hermes-agent"]
|
||||
from agent.portal_tags import nous_portal_tags as _portal_tags
|
||||
summary_extra_body["tags"] = _portal_tags()
|
||||
|
||||
if self.api_mode == "codex_responses":
|
||||
codex_kwargs = self._build_api_kwargs(api_messages)
|
||||
@@ -12423,36 +12304,21 @@ class AIAgent:
|
||||
# External recall context is injected into the user message, not the system
|
||||
# prompt, so the stable cache prefix remains unchanged.
|
||||
#
|
||||
# When the long-lived prefix-cache layout is active (Claude on
|
||||
# Anthropic / OpenRouter / Nous Portal), we build the system
|
||||
# message as a *list of content blocks*: [stable, context,
|
||||
# volatile, ephemeral?]. Block 0 (stable) gets the 1h
|
||||
# cache_control marker further down via
|
||||
# apply_anthropic_cache_control_long_lived; blocks 1-3 are
|
||||
# cached only via the rolling messages window at 5m.
|
||||
# NOTE: Plugin context from pre_llm_call hooks is injected into the
|
||||
# user message (see injection block above), NOT the system prompt.
|
||||
# This is intentional — system prompt modifications break the prompt
|
||||
# cache prefix. The system prompt is reserved for Hermes internals.
|
||||
if self._use_long_lived_prefix_cache:
|
||||
_sys_parts = self._build_system_prompt_parts(system_message=system_message)
|
||||
_sys_blocks: list = []
|
||||
if _sys_parts.get("stable"):
|
||||
_sys_blocks.append({"type": "text", "text": _sys_parts["stable"]})
|
||||
if _sys_parts.get("context"):
|
||||
_sys_blocks.append({"type": "text", "text": _sys_parts["context"]})
|
||||
if _sys_parts.get("volatile"):
|
||||
_sys_blocks.append({"type": "text", "text": _sys_parts["volatile"]})
|
||||
if self.ephemeral_system_prompt:
|
||||
_sys_blocks.append({"type": "text", "text": self.ephemeral_system_prompt})
|
||||
if _sys_blocks:
|
||||
api_messages = [{"role": "system", "content": _sys_blocks}] + api_messages
|
||||
else:
|
||||
effective_system = active_system_prompt or ""
|
||||
if self.ephemeral_system_prompt:
|
||||
effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
|
||||
if effective_system:
|
||||
api_messages = [{"role": "system", "content": effective_system}] + api_messages
|
||||
#
|
||||
# Hermes invariant: the system prompt is built ONCE per session
|
||||
# (cached on ``_cached_system_prompt``) and replayed verbatim on
|
||||
# every turn. We send it as a single content string so the
|
||||
# bytes are byte-stable across turns and upstream prompt caches
|
||||
# stay warm.
|
||||
effective_system = active_system_prompt or ""
|
||||
if self.ephemeral_system_prompt:
|
||||
effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
|
||||
if effective_system:
|
||||
api_messages = [{"role": "system", "content": effective_system}] + api_messages
|
||||
|
||||
# Inject ephemeral prefill messages right after the system prompt
|
||||
# but before conversation history. Same API-call-time-only pattern.
|
||||
@@ -12466,29 +12332,13 @@ class AIAgent:
|
||||
# gateways. Auto-detected: if ``_use_prompt_caching`` is set,
|
||||
# inject cache_control breakpoints (system + last 3 messages)
|
||||
# to reduce input token costs by ~75% on multi-turn
|
||||
# conversations. Layout is chosen per endpoint by
|
||||
# ``_anthropic_prompt_cache_policy``.
|
||||
#
|
||||
# Long-lived prefix layout (prefix_and_2): stable system block
|
||||
# gets 1h marker + last 2 messages get 5m markers. Tools
|
||||
# array's last entry is marked separately at API-call kwargs
|
||||
# build time (see ``_build_api_kwargs`` and
|
||||
# ``mark_tools_for_long_lived_cache``).
|
||||
# conversations.
|
||||
if self._use_prompt_caching:
|
||||
if self._use_long_lived_prefix_cache:
|
||||
from agent.prompt_caching import apply_anthropic_cache_control_long_lived
|
||||
api_messages = apply_anthropic_cache_control_long_lived(
|
||||
api_messages,
|
||||
long_lived_ttl=self._long_lived_cache_ttl,
|
||||
rolling_ttl=self._cache_ttl,
|
||||
native_anthropic=self._use_native_cache_layout,
|
||||
)
|
||||
else:
|
||||
api_messages = apply_anthropic_cache_control(
|
||||
api_messages,
|
||||
cache_ttl=self._cache_ttl,
|
||||
native_anthropic=self._use_native_cache_layout,
|
||||
)
|
||||
api_messages = apply_anthropic_cache_control(
|
||||
api_messages,
|
||||
cache_ttl=self._cache_ttl,
|
||||
native_anthropic=self._use_native_cache_layout,
|
||||
)
|
||||
|
||||
# Safety net: strip orphaned tool results / add stubs for missing
|
||||
# results before sending to the API. Runs unconditionally — not
|
||||
@@ -14442,7 +14292,7 @@ class AIAgent:
|
||||
_ra_raw = _resp_headers.get("retry-after") or _resp_headers.get("Retry-After")
|
||||
if _ra_raw:
|
||||
try:
|
||||
_retry_after = min(int(_ra_raw), 120) # Cap at 2 minutes
|
||||
_retry_after = min(float(_ra_raw), 120) # Cap at 2 minutes
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0)
|
||||
|
||||
+1
-1
@@ -890,7 +890,7 @@ clone_repo() {
|
||||
stash_name="hermes-install-autostash-$(date -u +%Y%m%d-%H%M%S)"
|
||||
log_info "Local changes detected, stashing before update..."
|
||||
git stash push --include-untracked -m "$stash_name"
|
||||
autostash_ref="$(git rev-parse --verify refs/stash)"
|
||||
autostash_ref="stash@{0}"
|
||||
fi
|
||||
|
||||
git fetch origin
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
"""Tests for agent.portal_tags — Nous Portal request tag contract."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
def test_hermes_client_tag_includes_current_version():
|
||||
"""The client tag must reflect hermes_cli.__version__ verbatim."""
|
||||
from hermes_cli import __version__
|
||||
from agent.portal_tags import hermes_client_tag
|
||||
|
||||
assert hermes_client_tag() == f"client=hermes-client-v{__version__}"
|
||||
|
||||
|
||||
def test_hermes_client_tag_format():
|
||||
"""The client tag has the exact shape Nous Portal expects."""
|
||||
from agent.portal_tags import hermes_client_tag
|
||||
|
||||
tag = hermes_client_tag()
|
||||
assert tag.startswith("client=hermes-client-v")
|
||||
# No spaces, no commas — single tag value
|
||||
assert " " not in tag
|
||||
assert "," not in tag
|
||||
|
||||
|
||||
def test_nous_portal_tags_contains_product_and_client():
|
||||
"""Every Nous Portal request gets BOTH the product tag and the version tag."""
|
||||
from agent.portal_tags import hermes_client_tag, nous_portal_tags
|
||||
|
||||
tags = nous_portal_tags()
|
||||
assert "product=hermes-agent" in tags
|
||||
assert hermes_client_tag() in tags
|
||||
assert len(tags) == 2
|
||||
|
||||
|
||||
def test_nous_portal_tags_returns_fresh_list():
|
||||
"""Callers mutate the returned list; we must not share state across calls."""
|
||||
from agent.portal_tags import nous_portal_tags
|
||||
|
||||
a = nous_portal_tags()
|
||||
a.append("client=test-mutation")
|
||||
b = nous_portal_tags()
|
||||
assert "client=test-mutation" not in b
|
||||
|
||||
|
||||
def test_auxiliary_client_nous_extra_body_uses_helper():
|
||||
"""auxiliary_client.NOUS_EXTRA_BODY must match the canonical helper output."""
|
||||
from agent.auxiliary_client import NOUS_EXTRA_BODY
|
||||
from agent.portal_tags import nous_portal_tags
|
||||
|
||||
assert NOUS_EXTRA_BODY == {"tags": nous_portal_tags()}
|
||||
|
||||
|
||||
def test_nous_provider_profile_uses_helper():
|
||||
"""The Nous provider profile (main agent loop) must use the canonical tags."""
|
||||
from agent.portal_tags import nous_portal_tags
|
||||
from providers import get_provider_profile
|
||||
|
||||
profile = get_provider_profile("nous")
|
||||
assert profile is not None
|
||||
body = profile.build_extra_body()
|
||||
assert body["tags"] == nous_portal_tags()
|
||||
@@ -6,8 +6,6 @@ import pytest
|
||||
from agent.prompt_caching import (
|
||||
_apply_cache_marker,
|
||||
apply_anthropic_cache_control,
|
||||
apply_anthropic_cache_control_long_lived,
|
||||
mark_tools_for_long_lived_cache,
|
||||
)
|
||||
|
||||
|
||||
@@ -143,132 +141,3 @@ class TestApplyAnthropicCacheControl:
|
||||
elif "cache_control" in msg:
|
||||
count += 1
|
||||
assert count <= 4
|
||||
|
||||
|
||||
class TestMarkToolsForLongLivedCache:
|
||||
def test_returns_unchanged_for_empty_tools(self):
|
||||
assert mark_tools_for_long_lived_cache(None) is None
|
||||
assert mark_tools_for_long_lived_cache([]) == []
|
||||
|
||||
def test_marks_only_last_tool(self):
|
||||
tools = [
|
||||
{"type": "function", "function": {"name": "a"}},
|
||||
{"type": "function", "function": {"name": "b"}},
|
||||
{"type": "function", "function": {"name": "c"}},
|
||||
]
|
||||
out = mark_tools_for_long_lived_cache(tools)
|
||||
assert "cache_control" not in out[0]
|
||||
assert "cache_control" not in out[1]
|
||||
assert out[2]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
|
||||
|
||||
def test_does_not_mutate_input(self):
|
||||
tools = [{"type": "function", "function": {"name": "a"}}]
|
||||
mark_tools_for_long_lived_cache(tools)
|
||||
assert "cache_control" not in tools[0]
|
||||
|
||||
def test_5m_ttl_drops_ttl_field(self):
|
||||
tools = [{"type": "function", "function": {"name": "a"}}]
|
||||
out = mark_tools_for_long_lived_cache(tools, long_lived_ttl="5m")
|
||||
assert out[0]["cache_control"] == {"type": "ephemeral"}
|
||||
|
||||
|
||||
class TestApplyAnthropicCacheControlLongLived:
|
||||
def test_empty_messages(self):
|
||||
assert apply_anthropic_cache_control_long_lived([]) == []
|
||||
|
||||
def test_marks_first_block_of_split_system(self):
|
||||
msgs = [
|
||||
{"role": "system", "content": [
|
||||
{"type": "text", "text": "STABLE"},
|
||||
{"type": "text", "text": "CONTEXT"},
|
||||
{"type": "text", "text": "VOLATILE"},
|
||||
]},
|
||||
{"role": "user", "content": "msg1"},
|
||||
{"role": "assistant", "content": "msg2"},
|
||||
]
|
||||
out = apply_anthropic_cache_control_long_lived(msgs)
|
||||
sys_blocks = out[0]["content"]
|
||||
assert sys_blocks[0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
|
||||
assert "cache_control" not in sys_blocks[1]
|
||||
assert "cache_control" not in sys_blocks[2]
|
||||
|
||||
def test_rolling_marker_on_last_2_messages(self):
|
||||
msgs = [
|
||||
{"role": "system", "content": [{"type": "text", "text": "S"}]},
|
||||
{"role": "user", "content": "u1"},
|
||||
{"role": "assistant", "content": "a1"},
|
||||
{"role": "user", "content": "u2"},
|
||||
{"role": "assistant", "content": "a2"},
|
||||
]
|
||||
out = apply_anthropic_cache_control_long_lived(msgs)
|
||||
|
||||
def has_marker(m):
|
||||
c = m.get("content")
|
||||
if isinstance(c, list) and c and isinstance(c[-1], dict):
|
||||
return "cache_control" in c[-1]
|
||||
return "cache_control" in m
|
||||
|
||||
# u1 and a1 (older messages) should NOT be marked
|
||||
assert not has_marker(out[1])
|
||||
assert not has_marker(out[2])
|
||||
# u2 and a2 (last 2) SHOULD be marked
|
||||
assert has_marker(out[3])
|
||||
assert has_marker(out[4])
|
||||
|
||||
def test_rolling_marker_uses_5m_ttl(self):
|
||||
msgs = [
|
||||
{"role": "system", "content": [{"type": "text", "text": "S"}]},
|
||||
{"role": "user", "content": "u1"},
|
||||
{"role": "assistant", "content": "a1"},
|
||||
]
|
||||
out = apply_anthropic_cache_control_long_lived(
|
||||
msgs, long_lived_ttl="1h", rolling_ttl="5m",
|
||||
)
|
||||
# Last user message: cache_control on the wrapped text part should be 5m
|
||||
last = out[-1]
|
||||
c = last["content"]
|
||||
assert isinstance(c, list)
|
||||
assert c[-1]["cache_control"] == {"type": "ephemeral"} # 5m has no ttl key
|
||||
|
||||
def test_string_system_falls_back_to_envelope_marker(self):
|
||||
"""When the caller didn't split the system message, we still place a marker."""
|
||||
msgs = [
|
||||
{"role": "system", "content": "Single string system"},
|
||||
{"role": "user", "content": "u1"},
|
||||
]
|
||||
out = apply_anthropic_cache_control_long_lived(msgs)
|
||||
sys_content = out[0]["content"]
|
||||
# Wrapped into a list and the (now sole) block gets the 1h marker
|
||||
assert isinstance(sys_content, list)
|
||||
assert sys_content[0]["cache_control"] == {"type": "ephemeral", "ttl": "1h"}
|
||||
|
||||
def test_does_not_mutate_input(self):
|
||||
msgs = [
|
||||
{"role": "system", "content": [{"type": "text", "text": "S"}]},
|
||||
{"role": "user", "content": "u1"},
|
||||
]
|
||||
before = copy.deepcopy(msgs)
|
||||
apply_anthropic_cache_control_long_lived(msgs)
|
||||
assert msgs == before
|
||||
|
||||
def test_max_4_breakpoints_with_split_system(self):
|
||||
msgs = [
|
||||
{"role": "system", "content": [{"type": "text", "text": "S"}, {"type": "text", "text": "V"}]},
|
||||
] + [
|
||||
{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg{i}"}
|
||||
for i in range(10)
|
||||
]
|
||||
out = apply_anthropic_cache_control_long_lived(msgs)
|
||||
count = 0
|
||||
for m in out:
|
||||
c = m.get("content")
|
||||
if isinstance(c, list):
|
||||
for item in c:
|
||||
if isinstance(item, dict) and "cache_control" in item:
|
||||
count += 1
|
||||
elif "cache_control" in m:
|
||||
count += 1
|
||||
# 1 system block + last 2 messages = 3 breakpoints from this function.
|
||||
# tools[-1] is marked separately (not via this function), so a 4th
|
||||
# breakpoint can be added at API-call time.
|
||||
assert count == 3
|
||||
|
||||
@@ -1,112 +0,0 @@
|
||||
"""Live E2E: long-lived prefix caching on Claude via OpenRouter.
|
||||
|
||||
Run only when LIVE_OR_KEY env var is set. Skipped under the normal hermetic
|
||||
test suite (which unsets credentials).
|
||||
"""
|
||||
import os, sys, tempfile, time, shutil, pytest
|
||||
|
||||
|
||||
# Probe for the key BEFORE conftest unsets it
|
||||
_LIVE_KEY = os.environ.get("OPENROUTER_API_KEY") or os.environ.get("LIVE_OR_KEY")
|
||||
if not _LIVE_KEY:
|
||||
# Try to read directly from .env
|
||||
env_path = os.path.expanduser("~/.hermes/.env")
|
||||
if os.path.exists(env_path):
|
||||
with open(env_path) as f:
|
||||
for line in f:
|
||||
if line.startswith("OPENROUTER_API_KEY="):
|
||||
_LIVE_KEY = line.strip().split("=", 1)[1].strip().strip('"').strip("'")
|
||||
break
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
not _LIVE_KEY,
|
||||
reason="set OPENROUTER_API_KEY (or LIVE_OR_KEY) to run live cache test",
|
||||
)
|
||||
|
||||
|
||||
def test_long_lived_prefix_cache_e2e_openrouter(tmp_path, monkeypatch):
|
||||
"""Two AIAgent runs in fresh sessions: call 1 writes cache, call 2 reads it."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
# The hermetic conftest unsets OPENROUTER_API_KEY — restore for this test
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", _LIVE_KEY)
|
||||
|
||||
# Minimal config — but with enough toolset/guidance to exceed Anthropic's
|
||||
# ~1024-token minimum-cacheable-prefix threshold. Anthropic silently
|
||||
# ignores cache_control markers on small blocks.
|
||||
import yaml
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
cfg_path.write_text(yaml.safe_dump({
|
||||
"model": {"provider": "openrouter", "default": "anthropic/claude-haiku-4.5"},
|
||||
"prompt_caching": {"long_lived_prefix": True, "long_lived_ttl": "1h", "cache_ttl": "5m"},
|
||||
"agent": {"tool_use_enforcement": True}, # adds substantial guidance text
|
||||
"memory": {"provider": ""},
|
||||
"compression": {"enabled": False},
|
||||
}))
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
||||
def make_agent():
|
||||
return AIAgent(
|
||||
api_key=_LIVE_KEY,
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
provider="openrouter",
|
||||
model="anthropic/claude-haiku-4.5",
|
||||
api_mode="chat_completions",
|
||||
# Use the default toolset roster — the tools array (~13k tokens
|
||||
# for ~35 tools) is what carries the bulk of the cross-session
|
||||
# cache value. With a tiny toolset the cached prefix can fall
|
||||
# below Anthropic Haiku's 2048-token minimum cacheable size and
|
||||
# the marker is silently ignored.
|
||||
enabled_toolsets=None,
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
save_trajectories=False,
|
||||
)
|
||||
|
||||
a1 = make_agent()
|
||||
assert a1._use_prompt_caching is True, "policy should enable caching for Claude on OR"
|
||||
assert a1._use_long_lived_prefix_cache is True, "long-lived path should activate"
|
||||
parts = a1._build_system_prompt_parts()
|
||||
print(f"\nstable={len(parts['stable']):,} ctx={len(parts['context']):,} volatile={len(parts['volatile']):,} chars")
|
||||
print(f"tool count: {len(a1.tools or [])}")
|
||||
|
||||
# Use distinct user messages each call so OpenRouter's response cache
|
||||
# doesn't short-circuit the upstream Anthropic call (we need real
|
||||
# Anthropic billing visibility to verify cache_creation/cache_read).
|
||||
USER_1 = "Reply with the single word ALPHA."
|
||||
USER_2 = "Reply with the single word BRAVO."
|
||||
|
||||
print("\n--- Call 1 (cold) ---")
|
||||
r1 = a1.run_conversation(USER_1, conversation_history=[])
|
||||
print(f"final_response[:80]: {(r1.get('final_response') or '')[:80]!r}")
|
||||
cr1 = a1.session_cache_read_tokens
|
||||
cw1 = a1.session_cache_write_tokens
|
||||
print(f"call1: cache_read={cr1} cache_write={cw1}")
|
||||
|
||||
# Wait so cache settles, then fresh agent (NEW SESSION) for cross-session read
|
||||
time.sleep(2)
|
||||
a2 = make_agent()
|
||||
assert a2.session_id != a1.session_id, "second agent must have a new session"
|
||||
|
||||
print("\n--- Call 2 (warm, NEW session, different user msg) ---")
|
||||
r2 = a2.run_conversation(USER_2, conversation_history=[])
|
||||
print(f"final_response[:80]: {(r2.get('final_response') or '')[:80]!r}")
|
||||
cr2 = a2.session_cache_read_tokens
|
||||
cw2 = a2.session_cache_write_tokens
|
||||
print(f"call2: cache_read={cr2} cache_write={cw2}")
|
||||
|
||||
print(f"\n=== VERDICT ===")
|
||||
print(f" call1 wrote {cw1:,} cache tokens, read {cr1:,}")
|
||||
print(f" call2 wrote {cw2:,} cache tokens, read {cr2:,}")
|
||||
if cw1:
|
||||
print(f" cross-session read fraction: cr2/cw1 = {cr2/cw1:.2%}")
|
||||
|
||||
# Assertions
|
||||
assert cw1 > 0, f"call 1 must write cache (got {cw1}); long-lived layout not reaching wire"
|
||||
assert cr2 > 0, (
|
||||
f"call 2 must read cache cross-session (got {cr2}); "
|
||||
f"stable prefix is not byte-stable across sessions"
|
||||
)
|
||||
assert cr2 >= 1000, f"cache_read on call 2 ({cr2}) too small to indicate real reuse"
|
||||
@@ -147,11 +147,12 @@ class TestChatCompletionsBuildKwargs:
|
||||
]
|
||||
|
||||
def test_nous_tags(self, transport):
|
||||
from agent.portal_tags import nous_portal_tags
|
||||
from providers import get_provider_profile
|
||||
profile = get_provider_profile("nous")
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, provider_profile=profile)
|
||||
assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
|
||||
assert kw["extra_body"]["tags"] == nous_portal_tags()
|
||||
|
||||
def test_reasoning_default(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
|
||||
@@ -7,6 +7,7 @@ from unittest.mock import AsyncMock, MagicMock
|
||||
import pytest
|
||||
|
||||
import gateway.run as gateway_run
|
||||
from agent.i18n import t
|
||||
from gateway.platforms.base import MessageEvent, MessageType
|
||||
from gateway.restart import DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
|
||||
from gateway.session import SessionEntry, build_session_key
|
||||
@@ -32,7 +33,7 @@ async def test_restart_command_while_busy_requests_drain_without_interrupt(monke
|
||||
|
||||
result = await runner._handle_message(event)
|
||||
|
||||
assert result == "⏳ Draining 1 active agent(s) before restart..."
|
||||
assert result == t("gateway.draining", count=1)
|
||||
running_agent.interrupt.assert_not_called()
|
||||
runner.request_restart.assert_called_once_with(detached=True, via_service=False)
|
||||
|
||||
|
||||
@@ -273,12 +273,13 @@ class TestRequestOverridesParity:
|
||||
|
||||
def test_extra_body_override_merges_with_provider_body(self, transport):
|
||||
"""Override extra_body merges WITH provider extra_body, not replaces."""
|
||||
from agent.portal_tags import nous_portal_tags
|
||||
kw = transport.build_kwargs(
|
||||
model="hermes-3", messages=_msgs(), tools=None,
|
||||
provider_profile=get_provider_profile("nous"),
|
||||
request_overrides={"extra_body": {"custom": True}},
|
||||
)
|
||||
assert kw["extra_body"]["tags"] == ["product=hermes-agent"] # from profile
|
||||
assert kw["extra_body"]["tags"] == nous_portal_tags() # from profile
|
||||
assert kw["extra_body"]["custom"] is True # from override
|
||||
|
||||
def test_top_level_override(self, transport):
|
||||
|
||||
@@ -210,9 +210,10 @@ class TestOpenRouterProfile:
|
||||
|
||||
class TestNousProfile:
|
||||
def test_tags(self):
|
||||
from agent.portal_tags import nous_portal_tags
|
||||
p = get_provider_profile("nous")
|
||||
body = p.build_extra_body()
|
||||
assert body["tags"] == ["product=hermes-agent"]
|
||||
assert body["tags"] == nous_portal_tags()
|
||||
|
||||
def test_auth_type(self):
|
||||
p = get_provider_profile("nous")
|
||||
|
||||
@@ -165,13 +165,14 @@ class TestNousParity:
|
||||
"""Nous: product tags, reasoning, omit when disabled."""
|
||||
|
||||
def test_tags(self, transport):
|
||||
from agent.portal_tags import nous_portal_tags
|
||||
kw = transport.build_kwargs(
|
||||
model="hermes-3-llama-3.1-405b",
|
||||
messages=_simple_messages(),
|
||||
tools=None,
|
||||
provider_profile=get_provider_profile("nous"),
|
||||
)
|
||||
assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
|
||||
assert kw["extra_body"]["tags"] == nous_portal_tags()
|
||||
|
||||
def test_reasoning_omitted_when_disabled(self, transport):
|
||||
"""Nous special case: reasoning omitted entirely when disabled."""
|
||||
|
||||
@@ -330,127 +330,3 @@ class TestExplicitOverrides:
|
||||
# Long-lived prefix cache policy (cross-session 1h tier)
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestSupportsLongLivedAnthropicCache:
|
||||
"""Narrower than _anthropic_prompt_cache_policy — only Claude on the 4
|
||||
explicitly-validated endpoints get the long-lived layout."""
|
||||
|
||||
def test_native_anthropic_claude_supported(self):
|
||||
agent = _make_agent(
|
||||
provider="anthropic",
|
||||
base_url="https://api.anthropic.com",
|
||||
api_mode="anthropic_messages",
|
||||
model="claude-sonnet-4.6",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is True
|
||||
|
||||
def test_anthropic_oauth_supported(self):
|
||||
# OAuth uses the same transport as native Anthropic
|
||||
agent = _make_agent(
|
||||
provider="anthropic",
|
||||
base_url="https://api.anthropic.com",
|
||||
api_mode="anthropic_messages",
|
||||
model="claude-opus-4.6",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is True
|
||||
|
||||
def test_openrouter_claude_supported(self):
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="anthropic/claude-sonnet-4.6",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is True
|
||||
|
||||
def test_nous_portal_claude_supported(self):
|
||||
# Nous Portal proxies to OpenRouter — same wire format
|
||||
agent = _make_agent(
|
||||
provider="nous",
|
||||
base_url="https://inference-api.nousresearch.com/v1",
|
||||
api_mode="chat_completions",
|
||||
model="anthropic/claude-opus-4.7",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is True
|
||||
|
||||
def test_nous_portal_qwen_supported(self):
|
||||
# Portal Qwen rides the same OpenRouter-equivalent transport as
|
||||
# Portal Claude; long-lived (1h cross-session) cache_control
|
||||
# markers apply identically.
|
||||
agent = _make_agent(
|
||||
provider="nous",
|
||||
base_url="https://inference-api.nousresearch.com/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen3.6-plus",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is True
|
||||
|
||||
def test_nous_portal_qwen_vendored_slug_supported(self):
|
||||
agent = _make_agent(
|
||||
provider="nous",
|
||||
base_url="https://inference-api.nousresearch.com/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen/qwen3.6-plus",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is True
|
||||
|
||||
def test_nous_portal_non_claude_non_qwen_rejected(self):
|
||||
# Portal long-lived cache scope mirrors policy: Claude or Qwen only.
|
||||
agent = _make_agent(
|
||||
provider="nous",
|
||||
base_url="https://inference-api.nousresearch.com/v1",
|
||||
api_mode="chat_completions",
|
||||
model="openai/gpt-5.4",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is False
|
||||
|
||||
def test_openrouter_non_claude_rejected(self):
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="openai/gpt-5.4",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is False
|
||||
|
||||
def test_third_party_anthropic_gateway_rejected(self):
|
||||
# MiniMax / Kimi / etc. — anthropic-wire but not in our validated list
|
||||
agent = _make_agent(
|
||||
provider="minimax",
|
||||
base_url="https://api.minimax.io/anthropic",
|
||||
api_mode="anthropic_messages",
|
||||
model="minimax-m2.7",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is False
|
||||
|
||||
def test_alibaba_dashscope_rejected(self):
|
||||
agent = _make_agent(
|
||||
provider="alibaba",
|
||||
base_url="https://dashscope.aliyuncs.com/api/v1/anthropic",
|
||||
api_mode="anthropic_messages",
|
||||
model="qwen3.5-plus",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is False
|
||||
|
||||
def test_opencode_qwen_rejected(self):
|
||||
agent = _make_agent(
|
||||
provider="opencode-go",
|
||||
base_url="https://api.opencode-go.example/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen3.6-plus",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache() is False
|
||||
|
||||
def test_fallback_target_evaluated_independently(self):
|
||||
# Starting on a non-supported provider, falling back to OpenRouter Claude
|
||||
agent = _make_agent(
|
||||
provider="minimax",
|
||||
base_url="https://api.minimax.io/anthropic",
|
||||
api_mode="anthropic_messages",
|
||||
model="minimax-m2.7",
|
||||
)
|
||||
assert agent._supports_long_lived_anthropic_cache(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="anthropic/claude-sonnet-4.6",
|
||||
) is True
|
||||
|
||||
@@ -343,11 +343,12 @@ class TestBuildApiKwargsAIGateway:
|
||||
|
||||
class TestBuildApiKwargsNousPortal:
|
||||
def test_includes_nous_product_tags(self, monkeypatch):
|
||||
from agent.portal_tags import nous_portal_tags
|
||||
agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
extra = kwargs.get("extra_body", {})
|
||||
assert extra.get("tags") == ["product=hermes-agent"]
|
||||
assert extra.get("tags") == nous_portal_tags()
|
||||
|
||||
def test_uses_chat_completions_format(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
|
||||
|
||||
@@ -169,7 +169,6 @@ class TestEphemeralMaxOutputTokens:
|
||||
agent.reasoning_config = None
|
||||
agent._is_anthropic_oauth = False
|
||||
agent._ephemeral_max_output_tokens = None
|
||||
agent._use_long_lived_prefix_cache = False
|
||||
|
||||
compressor = MagicMock()
|
||||
compressor.context_length = 200_000
|
||||
|
||||
+3
-1
@@ -314,7 +314,9 @@ DANGEROUS_PATTERNS = [
|
||||
(r'\bdd\s+.*if=', "disk copy"),
|
||||
(r'>\s*/dev/sd', "write to block device"),
|
||||
(r'\bDROP\s+(TABLE|DATABASE)\b', "SQL DROP"),
|
||||
(r'\bDELETE\s+FROM\b(?!.*\bWHERE\b)', "SQL DELETE without WHERE"),
|
||||
# Use [^\n]* instead of .* so DOTALL mode does not cause a WHERE clause on the
|
||||
# *next* line to satisfy the negative lookahead, silently allowing DELETE without WHERE.
|
||||
(r'\bDELETE\s+FROM\b(?![^\n]*\bWHERE\b)', "SQL DELETE without WHERE"),
|
||||
(r'\bTRUNCATE\s+(TABLE)?\s*\w', "SQL TRUNCATE"),
|
||||
(r'>\s*/etc/', "overwrite system config"),
|
||||
(r'\bsystemctl\s+(-[^\s]+\s+)*(stop|restart|disable|mask)\b', "stop/restart system service"),
|
||||
|
||||
@@ -461,7 +461,8 @@ async def _send_via_adapter(
|
||||
adapter = None
|
||||
if adapter is not None:
|
||||
try:
|
||||
result = await adapter.send(chat_id=chat_id, content=chunk)
|
||||
metadata = {"thread_id": thread_id} if thread_id else None
|
||||
result = await adapter.send(chat_id=chat_id, content=chunk, metadata=metadata)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
||||
+3
-1
@@ -130,7 +130,9 @@ def detect_audio_environment() -> dict:
|
||||
try:
|
||||
devices = sd.query_devices()
|
||||
if not devices:
|
||||
if termux_capture:
|
||||
if os.environ.get('PULSE_SERVER'):
|
||||
notices.append("No PortAudio devices detected but PULSE_SERVER is set -- continuing")
|
||||
elif termux_capture:
|
||||
notices.append("No PortAudio devices detected, but Termux:API microphone capture is available")
|
||||
else:
|
||||
warnings.append("No audio input/output devices detected")
|
||||
|
||||
+2
-1
@@ -593,7 +593,8 @@ def _resolve_web_extract_auxiliary(model: Optional[str] = None) -> tuple[Optiona
|
||||
extra_body: Dict[str, Any] = {}
|
||||
if client is not None and _is_nous_auxiliary_client(client):
|
||||
from agent.auxiliary_client import get_auxiliary_extra_body
|
||||
extra_body = get_auxiliary_extra_body() or {"tags": ["product=hermes-agent"]}
|
||||
from agent.portal_tags import nous_portal_tags
|
||||
extra_body = get_auxiliary_extra_body() or {"tags": nous_portal_tags()}
|
||||
|
||||
return client, effective_model, extra_body
|
||||
|
||||
|
||||
@@ -92,6 +92,13 @@ manager makes sense for that language (rustup, ghcup, opam, brew,
|
||||
…). Hermes auto-detects the binary on PATH or in
|
||||
`<HERMES_HOME>/lsp/bin/`.
|
||||
|
||||
A few servers are installed alongside a peer dependency that npm
|
||||
won't auto-pull. The current case is `typescript-language-server`,
|
||||
which requires the `typescript` SDK importable from the same
|
||||
`node_modules` tree — Hermes installs both packages together when you
|
||||
run `hermes lsp install typescript` or auto-install fires on first
|
||||
use.
|
||||
|
||||
## CLI
|
||||
|
||||
```
|
||||
@@ -207,6 +214,24 @@ The binary isn't on PATH and isn't in `<HERMES_HOME>/lsp/bin/`. Run
|
||||
`hermes lsp install <server_id>` to attempt an auto-install, or
|
||||
install the binary manually through the language's normal toolchain.
|
||||
|
||||
**`Backend warnings` section in `hermes lsp status`**
|
||||
|
||||
Some servers ship as thin wrappers around an external CLI for actual
|
||||
diagnostics — they spawn cleanly and accept requests but never emit
|
||||
errors when the sidecar binary is missing. The most common case is
|
||||
`bash-language-server`, which delegates diagnostics to `shellcheck`.
|
||||
When `hermes lsp status` shows a `Backend warnings` section, install
|
||||
the named tool through your OS package manager:
|
||||
|
||||
```
|
||||
apt install shellcheck # Debian / Ubuntu
|
||||
brew install shellcheck # macOS
|
||||
scoop install shellcheck # Windows
|
||||
```
|
||||
|
||||
The same warning is logged once at server spawn time in
|
||||
`~/.hermes/logs/agent.log`.
|
||||
|
||||
**Server starts but never returns diagnostics**
|
||||
|
||||
Check `~/.hermes/logs/agent.log` for `[agent.lsp.client]` entries —
|
||||
|
||||
Reference in New Issue
Block a user