diff --git a/CHANGELOG.md b/CHANGELOG.md index 686cccdb..ad50b6df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,24 @@ ## [Unreleased] ### Fixed -- **Reasoning chip now appears after the model chip** in the composer toolbar — model is a more fundamental choice and should be stable in position regardless of whether reasoning is active. Order: Profile → Workspace → Model → Reasoning. (`static/index.html`) + +## v0.50.207 — 2026-04-25 + +### Added +- **Live TPS stat in header** — a monospace chip in the titlebar shows tokens per second during streaming, with HIGH watermark from the past hour. Emitted via SSE at 1 Hz during active streams; hidden when idle. (`api/metering.py`, `api/streaming.py`, `static/messages.js`, `static/style.css`) [#1005 @JKJameson] + +### Fixed +- **Stale SSE events no longer pollute the new session's DOM on session switch** — `appendThinking()` and `appendLiveToolCard()` now guard against events from a prior session's stream arriving after the user has switched sessions. Thinking card also auto-scrolls to top on completion so the response is immediately visible. (`static/ui.js`) [#1006 @JKJameson] +- **Show agent sessions no longer shows empty/unimportable rows** — `state.db` can contain agent session rows before any messages are written. The sidebar now filters those out consistently across both the regular `/api/sessions` path and the gateway SSE watcher. (`api/agent_sessions.py`, `api/gateway_watcher.py`, `api/models.py`) [#1009 @franksong2702] +- **Three orphaned i18n keys removed from language dropdown** — `cmd_status`, `memory_saved`, and `profile_delete_title` were placed outside any locale block in `static/i18n.js`, causing them to appear as invalid language options. (`static/i18n.js`) [#1010 @bergeouss] +- **Cron panel UX polish** — Resume button SVG now uses a ▶| icon to distinguish it from Run; toast overlap fixed with `z-index` on the header; running-state badge with spinner shows during active jobs; `_cronRunningPoll` clears correctly on panel close. (`static/panels.js`, `static/index.html`, `static/style.css`, `static/i18n.js`) [#1011 @bergeouss] +- **Create Folder and Add as Space from the browser** — users can now create directories and immediately register them as workspace spaces without SSH access; server validates paths against blocked roots before `mkdir`. (`api/routes.py`, `static/ui.js`, `static/panels.js`, `static/i18n.js`) [#1018 @bergeouss] +- **Model-not-found errors now show a helpful message** — when a provider returns a 404 (e.g. Qwen model not available), the error is classified and a user-friendly hint appears instead of a raw HTML page. All 6 locales covered. (`api/streaming.py`, `static/messages.js`, `static/i18n.js`) [#1022 @bergeouss] +- **Session attention indicators moved to right-side actions slot** — streaming spinners and unread dots no longer sit before the session title, avoiding title shifts. Running/unread rows hide the timestamp; idle/read rows keep right-aligned timestamps. Date group carets now point down/right correctly. Pinned group no longer repeats the star icon per row. (`static/sessions.js`, `static/style.css`) [#1024 @franksong2702] +- **Session sidebar dates now use the last real message time** — sorting, grouping, and relative timestamps prefer `last_message_at` derived from the last non-tool message instead of metadata-only `updated_at`, so changing session settings doesn't move old conversations to Today. (`api/models.py`, `api/routes.py`) [#1024 @franksong2702] +- **Running indicators appear immediately after send** — the sidebar now treats the active local busy session and local in-flight sessions as streaming while `/api/sessions` catches up. (`static/messages.js`, `static/sessions.js`) [#1024 @franksong2702] +- **Large session switching and reload no longer block on cold model-catalog resolution** — `GET /api/session?messages=0` now parses only the JSON metadata prefix; metadata-only loads skip the full-session LRU cache; the frontend lazy fetch passes `resolve_model=0`; hard reload no longer waits for `populateModelDropdown()`. (`api/models.py`, `api/routes.py`, `static/boot.js`, `static/sessions.js`, `static/ui.js`) [#1025 @franksong2702] +- **Auto title generation hardened for reasoning models** — title generation now uses a 512-token reasoning-safe budget, retries once with 1024 tokens on empty content or `finish_reason: length`, and preserves the underlying failure reason in `title_status` when falling back to a local summary. (`api/streaming.py`) [#1026 @franksong2702] ## v0.50.206 — 2026-04-25 diff --git a/TESTING.md b/TESTING.md index ac0bba10..88de9d4d 100644 --- a/TESTING.md +++ b/TESTING.md @@ -8,7 +8,7 @@ > Prerequisites: SSH tunnel is active on port 8787. Open http://localhost:8787 in browser. > Server health check: curl http://127.0.0.1:8787/health should return {"status":"ok"}. > -> Automated coverage: 2107 tests collected via `pytest tests/ --collect-only -q`. Includes onboarding coverage for bootstrap/static wizard presence, real provider config persistence (`config.yaml` + `.env`), the `/api/onboarding/*` backend, the onboarding skip/existing-config guard, and CSS regression coverage for smooth thinking/tool card disclosure animation. +> Automated coverage: 2169 tests collected via `pytest tests/ --collect-only -q`. Includes onboarding coverage for bootstrap/static wizard presence, real provider config persistence (`config.yaml` + `.env`), the `/api/onboarding/*` backend, the onboarding skip/existing-config guard, and CSS regression coverage for smooth thinking/tool card disclosure animation. > Run: `pytest tests/ -v --timeout=60` > > Local regression focus: verify that a previously closed workspace panel stays visually closed from first paint through boot completion on desktop refresh; there should be no brief open-then-close flash. diff --git a/api/agent_sessions.py b/api/agent_sessions.py new file mode 100644 index 00000000..00ab8b93 --- /dev/null +++ b/api/agent_sessions.py @@ -0,0 +1,55 @@ +"""Shared helpers for reading Hermes Agent sessions from state.db.""" +import logging +import sqlite3 +from pathlib import Path + +logger = logging.getLogger(__name__) + + +def read_importable_agent_session_rows(db_path: Path, limit: int = 200, log=None) -> list[dict]: + """Return non-WebUI agent sessions that have readable message rows. + + Hermes Agent can create rows in ``state.db.sessions`` before a session has + any messages. WebUI cannot import those rows, so both the regular + ``/api/sessions`` path and the gateway SSE watcher must filter them the + same way. + """ + db_path = Path(db_path) + if not db_path.exists(): + return [] + + log = log or logger + with sqlite3.connect(str(db_path)) as conn: + conn.row_factory = sqlite3.Row + cur = conn.cursor() + + # Older Hermes Agent versions may not have source tracking. Without a + # source column we cannot safely distinguish WebUI rows from agent rows. + cur.execute("PRAGMA table_info(sessions)") + session_cols = {row[1] for row in cur.fetchall()} + if 'source' not in session_cols: + log.warning( + "agent session listing skipped: state.db at %s has no 'source' column " + "(older hermes-agent?). Agent sessions unavailable. " + "Upgrade hermes-agent to fix this.", + db_path, + ) + return [] + + cur.execute( + """ + SELECT s.id, s.title, s.model, s.message_count, + s.started_at, s.source, + COUNT(m.id) AS actual_message_count, + MAX(m.timestamp) AS last_activity + FROM sessions s + LEFT JOIN messages m ON m.session_id = s.id + WHERE s.source IS NOT NULL AND s.source != 'webui' + GROUP BY s.id + HAVING COUNT(m.id) > 0 + ORDER BY COALESCE(MAX(m.timestamp), s.started_at) DESC + LIMIT ? + """, + (int(limit),), + ) + return [dict(row) for row in cur.fetchall()] diff --git a/api/gateway_watcher.py b/api/gateway_watcher.py index 594d18f6..b41ebc83 100644 --- a/api/gateway_watcher.py +++ b/api/gateway_watcher.py @@ -13,12 +13,12 @@ import json import logging import os import queue -import sqlite3 import threading import time from pathlib import Path from api.config import HOME +from api.agent_sessions import read_importable_agent_session_rows logger = logging.getLogger(__name__) @@ -55,33 +55,18 @@ def _get_agent_sessions_from_db() -> list: return [] try: - with sqlite3.connect(str(db_path)) as conn: - conn.row_factory = sqlite3.Row - cur = conn.cursor() - cur.execute(""" - SELECT s.id, s.title, s.model, s.message_count, - s.started_at, s.source, - MAX(m.timestamp) AS last_activity - FROM sessions s - LEFT JOIN messages m ON m.session_id = s.id - WHERE s.source IS NOT NULL AND s.source != 'webui' - GROUP BY s.id - HAVING COUNT(m.id) > 0 - ORDER BY COALESCE(MAX(m.timestamp), s.started_at) DESC - LIMIT 200 - """) - sessions = [] - for row in cur.fetchall(): - sessions.append({ - 'session_id': row['id'], - 'title': row['title'] or 'Agent Session', - 'model': row['model'] or None, - 'message_count': row['message_count'] or 0, - 'created_at': row['started_at'], - 'updated_at': row['last_activity'] or row['started_at'], - 'source': row['source'] or 'cli', - }) - return sessions + sessions = [] + for row in read_importable_agent_session_rows(db_path, limit=200, log=logger): + sessions.append({ + 'session_id': row['id'], + 'title': row['title'] or 'Agent Session', + 'model': row['model'] or None, + 'message_count': row['message_count'] or row['actual_message_count'] or 0, + 'created_at': row['started_at'], + 'updated_at': row['last_activity'] or row['started_at'], + 'source': row['source'] or 'cli', + }) + return sessions except Exception: return [] diff --git a/api/metering.py b/api/metering.py new file mode 100644 index 00000000..6edf2961 --- /dev/null +++ b/api/metering.py @@ -0,0 +1,187 @@ +""" +Hermes Web UI -- Streaming performance metering. + +Tracks Tokens Per Second (TPS) across all active WebUI sessions, and the +HIGH/LOW TPS values observed over the past 60 minutes. Metering data is +emitted via SSE events so the header label can update live during a stream. + +Architecture +──────────── +Each streaming session is tracked independently. TPS per session is: + + session_tps = total_tokens / (last_token_ts - first_token_ts) + +The global tps is the average of all currently active sessions' TPS values. +This correctly represents the system's real-time capacity regardless of how +many sessions are running or how long each has been streaming. + +For HIGH/LOW tracking, every stats snapshot records the current global tps +(only when > 0 — idle periods are skipped) into a rolling 60-minute history. +The max/min of that history gives the peak throughput observed over the past hour. + +The ticker in streaming.py calls get_interval() — it returns 1.0 when sessions +are actively receiving tokens so the header updates at 1 Hz, and 10.0 when idle +so the ticker exits and no idle readings are emitted. + +Usage from api/streaming.py +───────────────────────────── + from api.metering import meter + + meter().begin_session(stream_id) # stream starts + meter().record_token(stream_id, running_output) # per output token + meter().record_reasoning(stream_id, running_reasoning_len) # per reasoning token + +The SSE `metering` event payload: + { + "tps": 47.3, # average TPS across active sessions (real-time) + "high": 52.1, # highest average TPS observed in the past 60 minutes + "low": 31.4, # lowest average TPS (excl. readings < 1 tps, to ignore idle) + "active": 1, # sessions currently streaming + } +""" + +from __future__ import annotations + +import threading +import time +from dataclasses import dataclass + +_HOUR_SECS = 3600.0 # rolling window for HIGH/LOW tracking +_STALE_SECS = 60.0 # consider a session inactive after this + + +@dataclass +class _SessionMeter: + output_tokens: int = 0 + reasoning_tokens: int = 0 + first_token_ts: float = 0.0 # time.monotonic() of first token received + last_token_ts: float = 0.0 # time.monotonic() of last token received + + def total_tokens(self) -> int: + return self.output_tokens + self.reasoning_tokens + + def tps(self) -> float: + if self.first_token_ts == 0.0 or self.last_token_ts <= self.first_token_ts: + return 0.0 + return self.total_tokens() / (self.last_token_ts - self.first_token_ts) + + +class GlobalMeter: + """Thread-safe global streaming meter. + + Tracks per-session TPS, averages them for a global tps, and maintains a + 60-minute rolling history of global tps snapshots for HIGH/LOW reporting. + """ + + __slots__ = ( + '_lock', + '_sessions', # stream_id -> _SessionMeter + '_readings', # [(monotonic_ts, tps), ...] rolling 60-minute history + '_window_start', # monotonic ts of current window + ) + + def __init__(self) -> None: + self._lock = threading.Lock() + self._sessions: dict[str, _SessionMeter] = {} + self._readings: list[tuple[float, float]] = [] + self._window_start: float = time.monotonic() + + # ── Public API ──────────────────────────────────────────────────────────── + + def begin_session(self, stream_id: str) -> None: + with self._lock: + self._sessions[stream_id] = _SessionMeter() + + def get_interval(self) -> float: + """Return 1.0 when sessions are actively receiving tokens, 10.0 when idle. + + Used by the streaming ticker to run at 1 Hz during work and exit when + there is nothing to measure. + """ + now = time.monotonic() + with self._lock: + # Only count sessions that have received at least one token recently. + active_sids = { + sid for sid, s in self._sessions.items() + if s.first_token_ts > 0 and (now - s.last_token_ts) <= _STALE_SECS + } + return 1.0 if active_sids else 10.0 + + def record_token(self, stream_id: str, running_output_tokens: int) -> None: + now = time.monotonic() + with self._lock: + s = self._sessions.get(stream_id) + if s is None: + return + if s.first_token_ts == 0.0: + s.first_token_ts = now + s.last_token_ts = now + s.output_tokens = running_output_tokens + + def record_reasoning(self, stream_id: str, running_reasoning_tokens: int) -> None: + now = time.monotonic() + with self._lock: + s = self._sessions.get(stream_id) + if s is None: + return + if s.first_token_ts == 0.0: + s.first_token_ts = now + s.last_token_ts = now + s.reasoning_tokens = running_reasoning_tokens + + def end_session(self, stream_id: str, final_output_tokens: int, input_tokens: int = 0) -> None: + with self._lock: + self._sessions.pop(stream_id, None) + + def get_stats(self) -> dict: + now = time.monotonic() + with self._lock: + # Prune stale sessions + stale = [ + sid for sid, s in self._sessions.items() + if s.first_token_ts > 0 and (now - s.last_token_ts) > _STALE_SECS + ] + for sid in stale: + self._sessions.pop(sid, None) + + # Reset window if everything went stale + if not self._sessions: + self._window_start = now + + # Compute global tps: average of per-session TPS values + active = [s for s in self._sessions.values() if s.first_token_ts > 0] + if active: + global_tps = sum(s.tps() for s in active) / len(active) + else: + global_tps = 0.0 + + # Prune readings older than 1 hour + cutoff = now - _HOUR_SECS + self._readings = [(ts, v) for ts, v in self._readings if ts > cutoff] + + # Only record this snapshot for HIGH/LOW if there is active work. + # This prevents idle periods from flooding the history and keeps + # HIGH/LOW meaningful for the past hour of actual throughput. + if global_tps > 0: + self._readings.append((now, global_tps)) + + # HIGH/LOW from the past hour (skip near-zero idle readings) + active_readings = [v for _, v in self._readings if v >= 1.0] + high = max(active_readings) if active_readings else 0.0 + low = min(active_readings) if active_readings else 0.0 + + return { + 'tps': round(global_tps, 1), + 'high': round(high, 1), + 'low': round(low, 1), + 'active': len(self._sessions), + } + + +# ── Module-level singleton ───────────────────────────────────────────────────── + +_meter = GlobalMeter() + + +def meter() -> GlobalMeter: + return _meter diff --git a/api/models.py b/api/models.py index eb65d78a..f99a66c2 100644 --- a/api/models.py +++ b/api/models.py @@ -15,6 +15,7 @@ from api.config import ( get_effective_default_model, ) from api.workspace import get_last_workspace +from api.agent_sessions import read_importable_agent_session_rows logger = logging.getLogger(__name__) @@ -193,6 +194,114 @@ def _active_stream_ids(): def _is_streaming_session(active_stream_id, active_stream_ids): return bool(active_stream_id and active_stream_id in active_stream_ids) +def _session_sort_timestamp(session): + if isinstance(session, dict): + return session.get('last_message_at') or session.get('updated_at') or 0 + return _last_message_timestamp(getattr(session, 'messages', None)) or getattr(session, 'updated_at', 0) or 0 + + +def _message_timestamp(message): + if not isinstance(message, dict): + return None + raw = message.get('_ts') or message.get('timestamp') + try: + return float(raw) if raw is not None else None + except (TypeError, ValueError): + return None + + +def _last_message_timestamp(messages): + if not isinstance(messages, list): + return None + for message in reversed(messages): + if isinstance(message, dict) and message.get('role') == 'tool': + continue + ts = _message_timestamp(message) + if ts: + return ts + return None + + +def _find_top_level_json_key(text, key): + """Return the byte offset of a top-level JSON object key, if present.""" + depth = 0 + i = 0 + n = len(text) + while i < n: + ch = text[i] + if ch == '"': + start = i + i += 1 + escaped = False + chars = [] + while i < n: + c = text[i] + if escaped: + chars.append(c) + escaped = False + elif c == '\\': + escaped = True + elif c == '"': + break + else: + chars.append(c) + i += 1 + if i >= n: + return None + if depth == 1 and ''.join(chars) == key: + j = i + 1 + while j < n and text[j] in ' \t\r\n': + j += 1 + if j < n and text[j] == ':': + return start + elif ch in '{[': + depth += 1 + elif ch in '}]': + depth -= 1 + i += 1 + return None + + +def _read_metadata_json_prefix(path, max_prefix_bytes=65536): + """Read only the metadata portion before the top-level messages array.""" + buf = '' + with open(path, 'r', encoding='utf-8') as f: + while len(buf.encode('utf-8')) < max_prefix_bytes: + chunk = f.read(4096) + if not chunk: + return None + buf += chunk + messages_pos = _find_top_level_json_key(buf, 'messages') + if messages_pos is None: + continue + prefix = buf[:messages_pos].rstrip() + if prefix.endswith(','): + prefix = prefix[:-1].rstrip() + return f'{prefix}\n}}' + return None + + +def _lookup_index_message_count(session_id): + """Return the indexed message count without loading the full session file.""" + try: + entries = json.loads(SESSION_INDEX_FILE.read_text(encoding='utf-8')) + except Exception: + return None + if not isinstance(entries, list): + return None + for entry in entries: + if entry.get('session_id') != session_id: + continue + count = entry.get('message_count') + if isinstance(count, int) and count >= 0: + return count + try: + count = int(count) + except (TypeError, ValueError): + return None + return count if count >= 0 else None + return None + class Session: def __init__(self, session_id: str=None, title: str='Untitled', @@ -231,6 +340,7 @@ class Session: self.pending_started_at = pending_started_at self.compression_anchor_visible_idx = compression_anchor_visible_idx self.compression_anchor_message_key = compression_anchor_message_key + self._metadata_message_count = None @property def path(self): @@ -255,7 +365,8 @@ class Session: meta['tool_calls'] = self.tool_calls # Fields not in METADATA_FIELDS (e.g. last_usage, message_count) go at the end extra = {k: v for k, v in self.__dict__.items() - if k not in METADATA_FIELDS and k not in ('messages', 'tool_calls')} + if k not in METADATA_FIELDS and k not in ('messages', 'tool_calls') + and not k.startswith('_')} payload = json.dumps({**meta, **extra}, ensure_ascii=False, indent=2) tmp = self.path.with_suffix(f'.tmp.{os.getpid()}.{threading.current_thread().ident}') try: @@ -288,10 +399,9 @@ class Session: """Load only the compact metadata fields, skipping the messages array. Session JSON files have metadata fields (session_id, title, model, etc.) - at the top level, before the large messages array. We read only the - first ~1KB — enough to capture all compact() fields — then parse just - that prefix. Falls back to load() if the prefix doesn't contain enough - fields or if the file is unexpectedly small. + at the top level, before the large messages array. Read only up to the + top-level "messages" field and synthesize a small metadata-only object. + Falls back to load() for legacy or unexpected file layouts. """ if not sid or not all(c in '0123456789abcdefghijklmnopqrstuvwxyz_' for c in sid): return None @@ -299,26 +409,18 @@ class Session: if not p.exists(): return None try: - # Read just the first 1 KB — metadata comes before messages array - with open(p, 'r', encoding='utf-8') as f: - prefix = f.read(1024) + prefix = _read_metadata_json_prefix(p) if not prefix: return cls.load(sid) parsed = json.loads(prefix) - # Verify we got the essential fields. - # With metadata-first save() ordering, messages appears at byte ~567. - # For sessions <= ~512 bytes total the entire messages array fits in the - # first 1 KB and we get a valid list. For larger sessions json.loads - # fails on the truncated buffer (unterminated string), so we fall back - # to full load. The one exception is a truncation inside a string value - # that happens to produce valid JSON with a truncated string — guard - # against that by requiring messages to be a list. needed = {'session_id', 'title', 'created_at', 'updated_at'} if not needed.issubset(parsed.keys()): return cls.load(sid) - if not isinstance(parsed.get('messages'), list): - return cls.load(sid) - return cls(**parsed) + parsed['messages'] = [] + parsed['tool_calls'] = [] + session = cls(**parsed) + session._metadata_message_count = _lookup_index_message_count(sid) + return session except Exception: # Corrupt prefix or decode error — fall back to full load return cls.load(sid) @@ -330,9 +432,14 @@ class Session: 'title': self.title, 'workspace': self.workspace, 'model': self.model, - 'message_count': len(self.messages), + 'message_count': ( + self._metadata_message_count + if self._metadata_message_count is not None + else len(self.messages) + ), 'created_at': self.created_at, 'updated_at': self.updated_at, + 'last_message_at': _last_message_timestamp(self.messages) or self.updated_at, 'pinned': self.pinned, 'archived': self.archived, 'project_id': self.project_id, @@ -352,9 +459,10 @@ class Session: def get_session(sid, metadata_only=False): """Load a session, optionally with metadata only (skipping the messages array). - When metadata_only=True the session is still cached so the full load on the - next access is fast. Use this when you only need compact() metadata and not - the actual message history (e.g., for fast sidebar switching). + Metadata-only loads intentionally do not populate the full-session cache. + Otherwise a later full load could return a compact object with an empty + messages list. Use this when you only need compact() metadata and not the + actual message history (e.g., for fast sidebar switching). """ with LOCK: if sid in SESSIONS: @@ -362,6 +470,8 @@ def get_session(sid, metadata_only=False): return SESSIONS[sid] if metadata_only: s = Session.load_metadata_only(sid) + if s: + return s else: s = Session.load(sid) if s: @@ -413,6 +523,18 @@ def all_sessions(): s for s in index if _index_entry_exists(s.get('session_id')) ] + backfilled = [] + for i, s in enumerate(index): + if 'last_message_at' not in s: + full = Session.load(s.get('session_id')) + if full: + index[i] = full.compact() + backfilled.append(full) + if backfilled: + try: + _write_session_index(updates=backfilled) + except Exception: + logger.debug("Failed to persist last_message_at backfill") for s in index: s['is_streaming'] = _is_streaming_session( s.get('active_stream_id'), @@ -426,7 +548,7 @@ def all_sessions(): include_runtime=True, active_stream_ids=active_stream_ids, ) - result = sorted(index_map.values(), key=lambda s: (s.get('pinned', False), s['updated_at']), reverse=True) + result = sorted(index_map.values(), key=lambda s: (s.get('pinned', False), _session_sort_timestamp(s)), reverse=True) # Hide empty Untitled sessions from the UI (created by tests, page refreshes, etc.) # Exempt sessions younger than 60 s so a brand-new session stays visible (#789) _now = time.time() @@ -454,7 +576,7 @@ def all_sessions(): logger.debug("Failed to load session from %s", p) for s in SESSIONS.values(): if all(s.session_id != x.session_id for x in out): out.append(s) - out.sort(key=lambda s: (getattr(s, 'pinned', False), s.updated_at), reverse=True) + out.sort(key=lambda s: (getattr(s, 'pinned', False), _session_sort_timestamp(s)), reverse=True) _now = time.time() result = [s.compact(include_runtime=True, active_stream_ids=active_stream_ids) for s in out if not ( s.title == 'Untitled' @@ -528,16 +650,11 @@ def get_cli_sessions() -> list: """Read CLI sessions from the agent's SQLite store and return them as dicts in a format the WebUI sidebar can render alongside local sessions. - Returns empty list if the SQLite DB is missing, the sqlite3 module is - unavailable, or any error occurs -- the bridge is purely additive and never - crashes the WebUI. + Returns empty list if the SQLite DB is missing or any error occurs -- the + bridge is purely additive and never crashes the WebUI. """ import os cli_sessions = [] - try: - import sqlite3 - except ImportError: - return cli_sessions # Use the active WebUI profile's HERMES_HOME to find state.db. # The active profile is determined by what the user has selected in the UI @@ -566,59 +683,30 @@ def get_cli_sessions() -> list: _cli_profile = None # older agent -- fall back to no profile try: - with sqlite3.connect(str(db_path)) as conn: - conn.row_factory = sqlite3.Row - cur = conn.cursor() - # Introspect schema to handle older hermes-agent versions that - # may not have a 'source' column. Without this check the query raises - # OperationalError which is silently swallowed, causing the empty-list bug. - cur.execute("PRAGMA table_info(sessions)") - _session_cols = {row[1] for row in cur.fetchall()} - if 'source' not in _session_cols: - import logging as _logging - _logging.getLogger(__name__).warning( - "get_cli_sessions(): state.db at %s has no 'source' column " - "(older hermes-agent?). CLI sessions unavailable. " - "Upgrade hermes-agent to fix this.", - db_path, - ) - return cli_sessions + for row in read_importable_agent_session_rows(db_path, limit=200, log=logger): + sid = row['id'] + raw_ts = row['last_activity'] or row['started_at'] + # Prefer the CLI session's own profile from the DB; fall back to + # the active CLI profile so sidebar filtering works either way. + profile = _cli_profile # CLI DB has no profile column; use active profile - cur.execute(""" - SELECT s.id, s.title, s.model, s.message_count, - s.started_at, s.source, - MAX(m.timestamp) AS last_activity - FROM sessions s - LEFT JOIN messages m ON m.session_id = s.id - WHERE s.source IS NOT NULL AND s.source != 'webui' - GROUP BY s.id - ORDER BY COALESCE(MAX(m.timestamp), s.started_at) DESC - LIMIT 200 - """) - for row in cur.fetchall(): - sid = row['id'] - raw_ts = row['last_activity'] or row['started_at'] - # Prefer the CLI session's own profile from the DB; fall back to - # the active CLI profile so sidebar filtering works either way. - profile = _cli_profile # CLI DB has no profile column; use active profile - - _source = row['source'] or 'cli' - _display_title = row['title'] or f'{_source.title()} Session' - cli_sessions.append({ - 'session_id': sid, - 'title': _display_title, - 'workspace': str(get_last_workspace()), - 'model': row['model'] or None, - 'message_count': row['message_count'] or 0, - 'created_at': row['started_at'], - 'updated_at': raw_ts, - 'pinned': False, - 'archived': False, - 'project_id': None, - 'profile': profile, - 'source_tag': _source, - 'is_cli_session': True, - }) + _source = row['source'] or 'cli' + _display_title = row['title'] or f'{_source.title()} Session' + cli_sessions.append({ + 'session_id': sid, + 'title': _display_title, + 'workspace': str(get_last_workspace()), + 'model': row['model'] or None, + 'message_count': row['message_count'] or row['actual_message_count'] or 0, + 'created_at': row['started_at'], + 'updated_at': raw_ts, + 'pinned': False, + 'archived': False, + 'project_id': None, + 'profile': profile, + 'source_tag': _source, + 'is_cli_session': True, + }) except Exception as _cli_err: # DB schema changed, locked, or corrupted -- log warning so admins can diagnose. # Still degrade gracefully (don't crash the WebUI). diff --git a/api/routes.py b/api/routes.py index ce8d0e50..b6105dfd 100644 --- a/api/routes.py +++ b/api/routes.py @@ -329,6 +329,7 @@ from api.workspace import ( safe_resolve_ws, resolve_trusted_workspace, validate_workspace_to_add, + _workspace_blocked_roots, ) from api.upload import handle_upload, handle_transcribe from api.streaming import _sse, _run_agent_streaming, cancel_stream @@ -680,19 +681,26 @@ def handle_get(handler, parsed) -> bool: import time as _time _t0 = _time.monotonic() _debug_slow = os.environ.get("HERMES_DEBUG_SLOW", "") - sid = parse_qs(parsed.query).get("session_id", [""])[0] + query = parse_qs(parsed.query) + sid = query.get("session_id", [""])[0] if not sid: return j(handler, {"error": "session_id is required"}, status=400) # ?messages=0 skips the message payload for fast session switching. # The frontend uses this when switching conversations in the sidebar # (only needs metadata). The full message array is loaded lazily # via ?messages=1 when the message panel opens. - load_messages = parse_qs(parsed.query).get("messages", ["1"])[0] != "0" + load_messages = query.get("messages", ["1"])[0] != "0" + resolve_model_default = "1" if load_messages else "0" + resolve_model = query.get("resolve_model", [resolve_model_default])[0] != "0" try: _t1 = _time.monotonic() s = get_session(sid, metadata_only=(not load_messages)) _t2 = _time.monotonic() - effective_model = _resolve_effective_session_model_for_display(s) + effective_model = ( + _resolve_effective_session_model_for_display(s) + if resolve_model + else None + ) _t3 = _time.monotonic() raw = s.compact() | { "messages": s.messages if load_messages else [], @@ -735,6 +743,8 @@ def handle_get(handler, parsed) -> bool: "message_count": len(msgs), "created_at": (cli_meta or {}).get("created_at", 0), "updated_at": (cli_meta or {}).get("updated_at", 0), + "last_message_at": (cli_meta or {}).get("last_message_at") + or (cli_meta or {}).get("updated_at", 0), "pinned": False, "archived": False, "project_id": None, @@ -783,7 +793,10 @@ def handle_get(handler, parsed) -> bool: else: deduped_cli = [] merged = webui_sessions + deduped_cli - merged.sort(key=lambda s: s.get("updated_at", 0) or 0, reverse=True) + merged.sort( + key=lambda s: s.get("last_message_at") or s.get("updated_at", 0) or 0, + reverse=True, + ) safe_merged = [] for s in merged: item = dict(s) @@ -3027,8 +3040,25 @@ def _handle_create_dir(handler, body): def _handle_workspace_add(handler, body): path_str = body.get("path", "").strip() name = body.get("name", "").strip() + auto_create = body.get("create", False) if not path_str: return bad(handler, "path is required") + # Validate the path is NOT a blocked system root BEFORE any filesystem mutation. + # This prevents creating orphan directories on rejected paths (#782 review). + candidate = Path(path_str).expanduser().resolve() + for blocked in _workspace_blocked_roots(): + try: + candidate.relative_to(blocked) + return bad(handler, f"Path points to a system directory: {candidate}") + except ValueError: + pass + # Now safe to create the directory if requested + if auto_create: + try: + candidate.mkdir(parents=True, exist_ok=True) + except (OSError, PermissionError) as e: + return bad(handler, f"Could not create directory: {_sanitize_error(e)}") + # Full validation (exists, is_dir) — should pass now that dir exists try: p = validate_workspace_to_add(path_str) except ValueError as e: diff --git a/api/streaming.py b/api/streaming.py index aa5fbca6..a999e4c6 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -25,6 +25,7 @@ from api.config import ( resolve_model_provider, ) from api.helpers import redact_session_data +from api.metering import meter # Global lock for os.environ writes. Per-session locks (_agent_lock) prevent # concurrent runs of the SAME session, but two DIFFERENT sessions can still @@ -292,9 +293,71 @@ def _aux_title_timeout(default: float = 15.0) -> float: return default def _title_completion_budget(provider: str = '', model: str = '', base_url: str = '') -> int: - if _is_minimax_route(provider, model, base_url): - return 384 - return 160 + # Title generation is a small auxiliary task, but reasoning models may + # spend a surprising amount of the completion budget before emitting final + # content. Keep the budget high enough for MiniMax/Kimi-style reasoning + # responses without making title generation depend on provider-specific + # one-off branches. + return 512 + + +def _title_retry_completion_budget(provider: str = '', model: str = '', base_url: str = '') -> int: + return max(1024, _title_completion_budget(provider, model, base_url) * 2) + + +def _title_retry_status(status: str) -> bool: + return status in { + 'llm_length', + 'llm_length_aux', + 'llm_empty_reasoning', + 'llm_empty_reasoning_aux', + } + + +def _safe_obj_value(obj, key: str): + if obj is None: + return None + if isinstance(obj, dict): + return obj.get(key) + value = getattr(obj, key, None) + # Missing MagicMock attrs stringify as mock reprs and look truthy. Treat + # them as absent so tests model real provider objects accurately. + if value.__class__.__module__.startswith('unittest.mock'): + return None + return value + + +def _safe_text_value(value) -> str: + if value is None: + return '' + if value.__class__.__module__.startswith('unittest.mock'): + return '' + return str(value or '').strip() + + +def _extract_title_response(resp, *, aux: bool = False) -> tuple[str, str]: + """Return (content, empty_status) from an OpenAI-compatible response.""" + suffix = '_aux' if aux else '' + try: + choices = _safe_obj_value(resp, 'choices') or [] + choice = choices[0] if choices else None + message = _safe_obj_value(choice, 'message') + content = _safe_text_value(_safe_obj_value(message, 'content')) + if content: + return content, '' + finish_reason = _safe_text_value(_safe_obj_value(choice, 'finish_reason')).lower() + reasoning = ( + _safe_text_value(_safe_obj_value(message, 'reasoning')) + or _safe_text_value(_safe_obj_value(message, 'reasoning_content')) + or _safe_text_value(_safe_obj_value(message, 'thinking')) + ) + if finish_reason == 'length': + return '', f'llm_length{suffix}' + if reasoning: + return '', f'llm_empty_reasoning{suffix}' + return '', f'llm_empty{suffix}' + except Exception: + return '', f'llm_empty{suffix}' def generate_title_raw_via_aux( @@ -308,41 +371,43 @@ def generate_title_raw_via_aux( if not user_text or not assistant_text: return None, 'missing_exchange' qa, prompts = _title_prompts(user_text, assistant_text) - max_tokens = _title_completion_budget(provider, model, base_url) + base_max_tokens = _title_completion_budget(provider, model, base_url) reasoning_extra = {"reasoning": {"enabled": False}} if _is_minimax_route(provider, model, base_url): reasoning_extra["reasoning_split"] = True try: _timeout = _aux_title_timeout() from agent.auxiliary_client import call_llm + last_status = 'llm_error_aux' for idx, prompt in enumerate(prompts): messages = [ {"role": "system", "content": prompt}, {"role": "user", "content": qa}, ] + budgets = [base_max_tokens] try: - resp = call_llm( - task='title_generation', - provider=provider or None, - model=model or None, - base_url=base_url or None, - messages=messages, - max_tokens=max_tokens, - temperature=0.2, - timeout=_timeout, - extra_body=reasoning_extra, - ) - raw = '' - try: - raw = resp.choices[0].message.content or '' - except Exception: - raw = '' - raw = str(raw or '').strip() - if raw: - return raw, ('llm_aux' if idx == 0 else 'llm_aux_retry') + for budget_idx, max_tokens in enumerate(budgets): + resp = call_llm( + task='title_generation', + provider=provider or None, + model=model or None, + base_url=base_url or None, + messages=messages, + max_tokens=max_tokens, + temperature=0.2, + timeout=_timeout, + extra_body=reasoning_extra, + ) + raw, empty_status = _extract_title_response(resp, aux=True) + if raw: + return raw, ('llm_aux' if idx == 0 and budget_idx == 0 else 'llm_aux_retry') + last_status = empty_status or 'llm_empty_aux' + if budget_idx == 0 and _title_retry_status(last_status): + budgets.append(_title_retry_completion_budget(provider, model, base_url)) except Exception as e: + last_status = 'llm_error_aux' logger.debug("Aux title generation attempt %s failed: %s", idx + 1, e) - return None, 'llm_error_aux' + return None, last_status except Exception as e: logger.debug("Aux title generation failed: %s", e) return None, 'llm_error_aux' @@ -356,7 +421,7 @@ def generate_title_raw_via_agent(agent, user_text: str, assistant_text: str) -> return None, 'missing_agent' qa, prompts = _title_prompts(user_text, assistant_text) - max_tokens = _title_completion_budget( + base_max_tokens = _title_completion_budget( getattr(agent, 'provider', ''), getattr(agent, 'model', ''), getattr(agent, 'base_url', ''), @@ -370,57 +435,70 @@ def generate_title_raw_via_agent(agent, user_text: str, assistant_text: str) -> {"role": "system", "content": prompt}, {"role": "user", "content": qa}, ] + budgets = [base_max_tokens] try: - raw = "" - if getattr(agent, 'api_mode', '') == 'codex_responses': - codex_kwargs = agent._build_api_kwargs(api_messages) - codex_kwargs.pop('tools', None) - if 'max_output_tokens' in codex_kwargs: - codex_kwargs['max_output_tokens'] = max_tokens - resp = agent._run_codex_stream(codex_kwargs) - assistant_message, _ = agent._normalize_codex_response(resp) - raw = (assistant_message.content or '') if assistant_message else '' - elif getattr(agent, 'api_mode', '') == 'anthropic_messages': - from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response - ant_kwargs = build_anthropic_kwargs( - model=agent.model, - messages=api_messages, - tools=None, - max_tokens=max_tokens, - reasoning_config=disabled_reasoning, - is_oauth=getattr(agent, '_is_anthropic_oauth', False), - preserve_dots=agent._anthropic_preserve_dots(), - base_url=getattr(agent, '_anthropic_base_url', None), - ) - resp = agent._anthropic_messages_create(ant_kwargs) - assistant_message, _ = normalize_anthropic_response( - resp, strip_tool_prefix=getattr(agent, '_is_anthropic_oauth', False) - ) - raw = (assistant_message.content or '') if assistant_message else '' - else: - api_kwargs = agent._build_api_kwargs(api_messages) - api_kwargs.pop('tools', None) - api_kwargs['temperature'] = 0.1 - api_kwargs['timeout'] = 15.0 - if _is_minimax_route(getattr(agent, 'provider', ''), getattr(agent, 'model', ''), getattr(agent, 'base_url', '')): - extra_body = dict(api_kwargs.get('extra_body') or {}) - extra_body['reasoning_split'] = True - api_kwargs['extra_body'] = extra_body - if 'max_completion_tokens' in api_kwargs: - api_kwargs['max_completion_tokens'] = max_tokens + last_status = 'llm_empty' + for budget_idx, max_tokens in enumerate(budgets): + raw = "" + empty_status = '' + if getattr(agent, 'api_mode', '') == 'codex_responses': + codex_kwargs = agent._build_api_kwargs(api_messages) + codex_kwargs.pop('tools', None) + if 'max_output_tokens' in codex_kwargs: + codex_kwargs['max_output_tokens'] = max_tokens + resp = agent._run_codex_stream(codex_kwargs) + assistant_message, _ = agent._normalize_codex_response(resp) + raw = (assistant_message.content or '') if assistant_message else '' + if not raw: + empty_status = 'llm_empty' + elif getattr(agent, 'api_mode', '') == 'anthropic_messages': + from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response + ant_kwargs = build_anthropic_kwargs( + model=agent.model, + messages=api_messages, + tools=None, + max_tokens=max_tokens, + reasoning_config=disabled_reasoning, + is_oauth=getattr(agent, '_is_anthropic_oauth', False), + preserve_dots=agent._anthropic_preserve_dots(), + base_url=getattr(agent, '_anthropic_base_url', None), + ) + resp = agent._anthropic_messages_create(ant_kwargs) + assistant_message, _ = normalize_anthropic_response( + resp, strip_tool_prefix=getattr(agent, '_is_anthropic_oauth', False) + ) + raw = (assistant_message.content or '') if assistant_message else '' + if not raw: + empty_status = 'llm_empty' else: - api_kwargs['max_tokens'] = max_tokens - resp = agent._ensure_primary_openai_client(reason='title_generation').chat.completions.create( - **api_kwargs, - ) - try: - raw = resp.choices[0].message.content or "" - except Exception: - raw = "" - raw = str(raw or '').strip() - if raw: - return raw, ('llm' if idx == 0 else 'llm_retry') + api_kwargs = agent._build_api_kwargs(api_messages) + api_kwargs.pop('tools', None) + api_kwargs['temperature'] = 0.1 + api_kwargs['timeout'] = 15.0 + if _is_minimax_route(getattr(agent, 'provider', ''), getattr(agent, 'model', ''), getattr(agent, 'base_url', '')): + extra_body = dict(api_kwargs.get('extra_body') or {}) + extra_body['reasoning_split'] = True + api_kwargs['extra_body'] = extra_body + if 'max_completion_tokens' in api_kwargs: + api_kwargs['max_completion_tokens'] = max_tokens + else: + api_kwargs['max_tokens'] = max_tokens + resp = agent._ensure_primary_openai_client(reason='title_generation').chat.completions.create( + **api_kwargs, + ) + raw, empty_status = _extract_title_response(resp) + raw = str(raw or '').strip() + if raw: + return raw, ('llm' if idx == 0 and budget_idx == 0 else 'llm_retry') + last_status = empty_status or 'llm_empty' + if budget_idx == 0 and _title_retry_status(last_status): + budgets.append(_title_retry_completion_budget( + getattr(agent, 'provider', ''), + getattr(agent, 'model', ''), + getattr(agent, 'base_url', ''), + )) except Exception as e: + last_status = 'llm_error' logger.debug( "Agent title generation attempt %s failed: provider=%s model=%s error=%s", idx + 1, @@ -428,7 +506,7 @@ def generate_title_raw_via_agent(agent, user_text: str, assistant_text: str) -> getattr(agent, 'model', None), e, ) - return None, 'llm_error' + return None, last_status except Exception as e: logger.debug("Agent title generation failed: %s", e) return None, 'llm_error' @@ -611,6 +689,11 @@ def _run_background_title_update(session_id: str, user_text: str, assistant_text if next_title: logger.debug("Using local fallback for session title generation") source = 'fallback' + fallback_reason = ( + f'local_summary:{llm_status}' + if source == 'fallback' and llm_status + else 'local_summary' + ) wrote_title = False effective_title = current if next_title: @@ -638,7 +721,7 @@ def _run_background_title_update(session_id: str, user_text: str, assistant_text if wrote_title: if source == 'fallback': - _put_title_status(put_event, session_id, source, 'local_summary', effective_title, raw_preview) + _put_title_status(put_event, session_id, source, fallback_reason, effective_title, raw_preview) else: _put_title_status(put_event, session_id, source, llm_status, effective_title, raw_preview) put_event('title', {'session_id': session_id, 'title': effective_title}) @@ -919,6 +1002,28 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta CANCEL_FLAGS[stream_id] = cancel_event STREAM_PARTIAL_TEXT[stream_id] = '' # start accumulating partial text (#893) + # Register this stream with the global streaming meter + meter().begin_session(stream_id) + + # Metering ticker — emits a metering event at 1 Hz while sessions are active. + # When get_interval() returns >= 10.0 (no active sessions), the ticker exits + # so no idle readings are emitted and the SSE consumer sees nothing. + _metering_stop = threading.Event() + + def _metering_ticker(): + while True: + interval = meter().get_interval() + if interval >= 10.0: + break # nothing active — stop the ticker + if _metering_stop.wait(interval): + break # stream was cancelled or ended — exit + stats = meter().get_stats() + stats['session_id'] = stream_id + put('metering', stats) + + _metering_thread = threading.Thread(target=_metering_ticker, daemon=True) + _metering_thread.start() + def put(event, data): # If cancelled, drop all further events except the cancel event itself if cancel_event.is_set() and event not in ('cancel', 'error'): @@ -1061,6 +1166,19 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta _reasoning_text = '' # accumulates reasoning/thinking trace for persistence _live_tool_calls = [] # tool progress fallback when final messages omit tool IDs + # Throttle: emit metering events at most every 100 ms so the TPS label + # feels live during fast token streams without flooding the SSE channel. + _metering_last_emit = [time.monotonic() - 1] # fire immediately on first token + + def _emit_metering(): + now = time.monotonic() + if now - _metering_last_emit[0] < 0.1: + return + _metering_last_emit[0] = now + stats = meter().get_stats() + stats['session_id'] = stream_id + put('metering', stats) + def on_token(text): nonlocal _token_sent if text is None: @@ -1070,6 +1188,9 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta if stream_id in STREAM_PARTIAL_TEXT: STREAM_PARTIAL_TEXT[stream_id] += str(text) put('token', {'text': text}) + # Update global throughput meter + meter().record_token(stream_id, len(STREAM_PARTIAL_TEXT[stream_id])) + _emit_metering() def on_reasoning(text): nonlocal _reasoning_text @@ -1077,6 +1198,9 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta return _reasoning_text += str(text) put('reasoning', {'text': str(text)}) + # Track reasoning tokens in the meter so TPS reflects all AI output + meter().record_reasoning(stream_id, len(_reasoning_text)) + _emit_metering() # Pre-initialise the activity counter here so on_tool (which # closes over it) never captures an unbound name even if this @@ -1084,6 +1208,7 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta _checkpoint_activity = [0] def on_tool(*cb_args, **cb_kwargs): + nonlocal _reasoning_text event_type = None name = None preview = None @@ -1103,7 +1228,10 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta if event_type in ('reasoning.available', '_thinking'): reason_text = preview if event_type == 'reasoning.available' else name if reason_text: + _reasoning_text += str(reason_text) put('reasoning', {'text': str(reason_text)}) + meter().record_reasoning(stream_id, len(_reasoning_text)) + _emit_metering() return args_snap = {} @@ -1623,6 +1751,10 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta # (reasoning trace already attached + saved above, before s.save()) raw_session = s.compact() | {'messages': s.messages, 'tool_calls': tool_calls} put('done', {'session': redact_session_data(raw_session), 'usage': usage}) + # Emit metering stats for the header TPS label + meter_stats = meter().get_stats() + meter_stats['session_id'] = session_id + put('metering', meter_stats) if _should_bg_title and _u0 and _a0: threading.Thread( target=_run_background_title_update, @@ -1635,6 +1767,8 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta # activeSid = original session_id so they must match for stream_end to close. put('stream_end', {'session_id': session_id}) finally: + # Stop the live metering ticker + _metering_stop.set() # Unregister the gateway approval callback and unblock any threads # still waiting on approval (e.g. stream cancelled mid-approval). if _approval_registered and _unreg_notify is not None: @@ -1660,6 +1794,13 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta except Exception as e: print('[webui] stream error:\n' + traceback.format_exc(), flush=True) err_str = str(e) + # Sanitize HTML from provider error responses — some providers return + # full HTML pages (e.g. nginx "404 page not found") instead of JSON errors. + # Strip HTML tags to avoid rendering raw markup in the chat message. + _stripped = re.sub(r'<[^>]+>', ' ', err_str) + _stripped = re.sub(r'\s+', ' ', _stripped).strip() + if _stripped != err_str: + err_str = _stripped _exc_lower = err_str.lower() # Classify before saving so the error message can be persisted to the session. # Check quota exhaustion first — OpenAI billing 429s use insufficient_quota which @@ -1683,6 +1824,16 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta or 'invalid api key' in _exc_lower or 'no cookie auth credentials' in _exc_lower ) + _exc_is_not_found = ( + '404' in err_str + or 'not found' in _exc_lower + or 'does not exist' in _exc_lower + or 'model not found' in _exc_lower + or 'model_not_found' in _exc_lower + or 'invalid model' in _exc_lower + or 'does not match any known model' in _exc_lower + or 'unknown model' in _exc_lower + ) if _exc_is_quota: _exc_label, _exc_type, _exc_hint = ( 'Out of credits', 'quota_exhausted', @@ -1699,6 +1850,12 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta 'The selected model may not be supported by your configured provider. ' 'Run `hermes model` in your terminal to switch providers, then restart the WebUI.', ) + elif _exc_is_not_found: + _exc_label, _exc_type, _exc_hint = ( + 'Model not found', 'model_not_found', + 'The selected model was not found by the provider. ' + 'Check the model ID in Settings or run `hermes model` to verify it exists for your provider.', + ) else: _exc_label, _exc_type, _exc_hint = 'Error', 'error', '' if s is not None: diff --git a/static/boot.js b/static/boot.js index 29239895..789d0fae 100644 --- a/static/boot.js +++ b/static/boot.js @@ -793,6 +793,7 @@ function applyBotName(){ window._showThinking=s.show_thinking!==false; window._sidebarDensity=(s.sidebar_density==='detailed'?'detailed':'compact'); window._botName=s.bot_name||'Hermes'; + if(s.default_model) window._defaultModel=s.default_model; // Persist default workspace so the blank new-chat page can show it // and workspace actions (New file/folder) work before the first session (#804). if(s.default_workspace) S._profileDefaultWorkspace=s.default_workspace; @@ -840,16 +841,20 @@ function applyBotName(){ // Update profile chip label immediately const profileLabel=$('profileChipLabel'); if(profileLabel) profileLabel.textContent=S.activeProfile||'default'; - // Fetch available models from server and populate dropdown dynamically - await populateModelDropdown(); - // Restore last-used model preference - const savedModel=localStorage.getItem('hermes-webui-model'); - if(savedModel && $('modelSelect')){ - $('modelSelect').value=savedModel; - // If the value didn't take (model not in list), clear the bad pref - if($('modelSelect').value!==savedModel) localStorage.removeItem('hermes-webui-model'); - else if(typeof syncModelChip==='function') syncModelChip(); - } + // Fetch available models without blocking session restore. The static HTML + // options are enough for first paint; the dynamic provider list can settle + // after the saved session is visible. + const _modelDropdownReady=populateModelDropdown().then(()=>{ + const savedModel=localStorage.getItem('hermes-webui-model'); + if(savedModel && $('modelSelect')){ + $('modelSelect').value=savedModel; + // If the value didn't take (model not in list), clear the bad pref + if($('modelSelect').value!==savedModel) localStorage.removeItem('hermes-webui-model'); + else if(typeof syncModelChip==='function') syncModelChip(); + } + if(S.session) syncTopbar(); + }).catch(()=>{}); + window._modelDropdownReady=_modelDropdownReady; // Pre-load workspace list so sidebar name is correct from first render await loadWorkspaceList(); await loadOnboardingWizard(); diff --git a/static/i18n.js b/static/i18n.js index 671afbf6..16f279c8 100644 --- a/static/i18n.js +++ b/static/i18n.js @@ -56,6 +56,7 @@ const LOCALES = { model_unavailable_title: 'This model is no longer in your current provider list', provider_mismatch_warning: (m,p)=>`"${m}" may not work with your configured provider (${p}). Send anyway, or run \`hermes model\` in your terminal to switch.`, provider_mismatch_label: 'Provider mismatch', + model_not_found_label: 'Model not found', model_custom_label: 'Custom model ID', model_custom_placeholder: 'e.g. openai/gpt-5.4', model_search_placeholder: 'Search models…', @@ -196,6 +197,10 @@ const LOCALES = { new_folder_prompt: 'New folder name:', folder_created: 'Created folder ', folder_create_failed: 'Create folder failed: ', + workspace_auto_create_folder: 'Create folder if it doesn\'t exist', + folder_add_as_space_btn: 'Add as Space', + folder_add_as_space_msg: 'Add this folder as a new space in your workspace list?', + folder_add_as_space_title: 'Add as Space?', remove_title: 'Remove', empty_dir: '(empty)', upload_failed: 'Upload failed: ', @@ -458,6 +463,7 @@ const LOCALES = { cron_status_paused: 'paused', cron_status_error: 'error', cron_status_active: 'active', + cron_status_running: 'running\u2026', cron_next: 'Next', cron_last: 'Last', cron_run_now: 'Run now', @@ -605,9 +611,6 @@ const LOCALES = { profile_api_key_label: 'API key', }, - cmd_status: '\u986f\u793a\u6703\u8a71\u8cc7\u8a0a', - memory_saved: '\u8a18\u61b6\u5df2\u5132\u5b58', - profile_delete_title: '\u522a\u9664\u6b64\u8a2d\u5b9a\u6a94', ru: { _lang: 'ru', _label: 'Русский', @@ -652,6 +655,7 @@ const LOCALES = { provider_mismatch_warning: (m, p) => `"${m}" может не работать с вашим настроенным провайдером (${p}). Всё равно отправить или запустите \`hermes model\` в терминале, чтобы переключиться.`, provider_mismatch_label: 'Несовпадение провайдера', + model_not_found_label: 'Модель не найдена', model_custom_label: 'Пользовательский ID модели', model_custom_placeholder: 'например, openai/gpt-5.4', cmd_help: 'Показать доступные команды', @@ -745,6 +749,10 @@ const LOCALES = { new_folder_prompt: 'Имя новой папки:', folder_created: 'Папка создана ', folder_create_failed: 'Не удалось создать папку: ', + workspace_auto_create_folder: 'Создать папку, если она не существует', + folder_add_as_space_btn: 'Добавить', + folder_add_as_space_msg: 'Добавить эту папку как новое пространство?', + folder_add_as_space_title: 'Добавить как пространство?', remove_title: 'Удаление', empty_dir: '(пусто)', upload_failed: 'Не удалось загрузить: ', @@ -975,6 +983,7 @@ const LOCALES = { cron_status_paused: 'на паузе', cron_status_error: 'ошибка', cron_status_active: 'активно', + cron_status_running: 'выполняется\u2026', cron_next: 'Следующий', cron_last: 'Последний', cron_run_now: 'Запустить сейчас', @@ -1233,6 +1242,7 @@ const LOCALES = { model_unavailable_title: 'Este modelo ya no está en tu lista actual de proveedores', provider_mismatch_warning: (m,p)=>`"${m}" puede no funcionar con tu proveedor configurado (${p}). Envía de todas formas, o ejecuta \`hermes model\` en la terminal para cambiar.`, provider_mismatch_label: 'Proveedor incompatible', + model_not_found_label: 'Modelo no encontrado', model_custom_label: 'ID de modelo personalizado', model_custom_placeholder: 'p. ej. openai/gpt-5.4', model_search_placeholder: 'Buscar modelos…', @@ -1309,6 +1319,10 @@ const LOCALES = { new_folder_prompt: 'Nombre de la carpeta nueva:', folder_created: 'Carpeta creada ', folder_create_failed: 'Error al crear la carpeta: ', + workspace_auto_create_folder: 'Crear carpeta si no existe', + folder_add_as_space_btn: 'Añadir como espacio', + folder_add_as_space_msg: '¿Añadir esta carpeta como un nuevo espacio?', + folder_add_as_space_title: '¿Añadir como espacio?', remove_title: 'Quitar', empty_dir: '(vacío)', upload_failed: 'Error al subir: ', @@ -1532,6 +1546,7 @@ const LOCALES = { cron_status_paused: 'paused', cron_status_error: 'error', cron_status_active: 'active', + cron_status_running: 'running\u2026', cron_next: 'Next', cron_last: 'Last', cron_run_now: 'Run now', @@ -1773,6 +1788,7 @@ const LOCALES = { model_unavailable_title: 'Dieses Modell ist nicht mehr in Ihrer aktuellen Provider-Liste', provider_mismatch_warning: (m,p)=>`"${m}" funktioniert möglicherweise nicht mit Ihrem konfigurierten Provider (${p}). Trotzdem senden, oder \`hermes model\` im Terminal ausführen.`, provider_mismatch_label: 'Provider-Konflikt', + model_not_found_label: 'Modell nicht gefunden', // commands.js cmd_help: 'Verfügbare Befehle auflisten', cmd_clear: 'Konversationsverlauf löschen', @@ -1852,6 +1868,10 @@ const LOCALES = { new_folder_prompt: 'Neuer Ordnername:', folder_created: 'Ordner erstellt ', folder_create_failed: 'Ordner erstellen fehlgeschlagen: ', + workspace_auto_create_folder: 'Ordner erstellen, falls nicht vorhanden', + folder_add_as_space_btn: 'Als Bereich hinzufügen', + folder_add_as_space_msg: 'Diesen Ordner als neuen Bereich zur Liste hinzufügen?', + folder_add_as_space_title: 'Als Bereich hinzufügen?', remove_title: 'Entfernen', empty_dir: '(leer)', upload_failed: 'Upload fehlgeschlagen: ', @@ -2097,6 +2117,7 @@ const LOCALES = { model_unavailable_title: '\u8fd9\u4e2a\u6a21\u578b\u5df2\u7ecf\u4e0d\u5728\u5f53\u524d provider \u5217\u8868\u4e2d', provider_mismatch_warning: (m,p)=>`\"${m}\" \u53ef\u80fd\u65e0\u6cd5\u5728\u5f53\u524d\u914d\u7f6e\u7684\u63d0\u4f9b\u5546 (${p}) \u4e0b\u5de5\u4f5c\u3002\u76f4\u63a5\u53d1\u9001\uff0c\u6216\u5728\u7ec8\u7aef\u8fd0\u884c \`hermes model\` \u5207\u6362\u3002`, provider_mismatch_label: '\u63d0\u4f9b\u5546\u4e0d\u5339\u914d', + model_not_found_label: '\u672a\u627e\u5230\u6a21\u578b', model_custom_label: '\u81ea\u5b9a\u4e49\u6a21\u578b ID', model_custom_placeholder: '\u4f8b\u5982 openai/gpt-5.4', model_search_placeholder: '\u641c\u7d22\u6a21\u578b\u2026', @@ -2181,6 +2202,10 @@ const LOCALES = { new_folder_prompt: '\u65b0\u6587\u4ef6\u5939\u540d\u79f0\uff1a', folder_created: '\u5df2\u521b\u5efa\u6587\u4ef6\u5939 ', folder_create_failed: '\u521b\u5efa\u6587\u4ef6\u5939\u5931\u8d25\uff1a', + workspace_auto_create_folder: '\u5982\u679c\u6587\u4ef6\u5939\u4e0d\u5b58\u5728\u5219\u521b\u5efa', + folder_add_as_space_btn: '\u6dfb\u52a0\u4e3a\u5de5\u4f5c\u533a', + folder_add_as_space_msg: '\u662f\u5426\u5c06\u6b64\u6587\u4ef6\u5939\u6dfb\u52a0\u4e3a\u65b0\u7684\u5de5\u4f5c\u533a\uff1f', + folder_add_as_space_title: '\u6dfb\u52a0\u4e3a\u5de5\u4f5c\u533a\uff1f', remove_title: '\u79fb\u9664', empty_dir: '(\u7a7a)', upload_failed: '\u4e0a\u4f20\u5931\u8d25\uff1a', @@ -2394,6 +2419,7 @@ const LOCALES = { cron_status_paused: '暂停', cron_status_error: '错误', cron_status_active: '运行中', + cron_status_running: '执行中\u2026', cron_next: '下次', cron_last: '上次', cron_run_now: '立即运行', @@ -2635,6 +2661,7 @@ const LOCALES = { model_unavailable_title: '\u6b64\u6a21\u578b\u5df2\u7d93\u4e0d\u5728\u7576\u524d provider \u5217\u8868\u4e2d', provider_mismatch_warning: (m,p)=>`\"${m}\" \u53ef\u80fd\u7121\u6cd5\u5728\u7576\u524d\u914d\u7f6e\u7684\u63d0\u4f9b\u8005 (${p}) \u4e0b\u904b\u4f5c\u3002\u5c1a\u9001\uff0c\u6216\u5728\u7d42\u7aef\u57f7\u884c \`hermes model\` \u5207\u63db\u3002`, provider_mismatch_label: '\u63d0\u4f9b\u8005\u4e0d\u76f8\u7b26', + model_not_found_label: '\u672a\u627e\u5230\u6a21\u578b', // commands.js cmd_help: '\u67e5\u770b\u53ef\u7528\u547d\u4ee4', cmd_clear: '\u6e05\u7a7a\u7576\u524d\u5c0d\u8a71\u8a0a\u606f', @@ -2707,6 +2734,10 @@ const LOCALES = { new_folder_prompt: '\u65b0\u6587\u4ef6\u593e\u540d\u7a31\uff1a', folder_created: '\u5df2\u5275\u5efa\u6587\u4ef6\u593e ', folder_create_failed: '\u5275\u5efa\u6587\u4ef6\u593e\u5931\u6557\uff1a', + workspace_auto_create_folder: '\u8cc7\u6599\u593e\u4e0d\u5b58\u5728\u6642\u5247\u5efa\u7acb', + folder_add_as_space_btn: '\u65b0\u589e\u70ba\u5de5\u4f5c\u5340', + folder_add_as_space_msg: '\u662f\u5426\u5c07\u6b64\u8cc7\u6599\u593e\u65b0\u589e\u70ba\u5de5\u4f5c\u5340\uff1f', + folder_add_as_space_title: '\u65b0\u589e\u70ba\u5de5\u4f5c\u5340\uff1f', remove_title: '\u79fb\u9664', empty_dir: '(空)', upload_failed: '上傳失敗:', @@ -3125,6 +3156,7 @@ const LOCALES = { cron_schedule_required: '\u9700\u8981\u6392\u7a0b', cron_schedule_required_example: '\u9700\u8981\u6392\u7a0b\uff08\u4f8b\u5982 "0 9 * * *" \u6216 "every 1h"\uff09', cron_status_active: '\u6d3b\u8e8d\u4e2d', + cron_status_running: '\u57f7\u884c\u4e2d\u2026', cron_status_error: '\u932f\u8aa4', cron_status_off: '\u672a\u555f\u7528', cron_status_paused: '\u5df2\u66ab\u505c', diff --git a/static/index.html b/static/index.html index 5cd43e22..cd59b5e5 100644 --- a/static/index.html +++ b/static/index.html @@ -69,6 +69,7 @@ +