mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-21 03:39:54 +00:00
d0c20708ce
Capture dashboard and PTY websocket lifecycle failures in gui.log and expose it via hermes logs.
414 lines
14 KiB
Python
414 lines
14 KiB
Python
"""Centralized logging setup for Hermes Agent.
|
|
|
|
Provides a single ``setup_logging()`` entry point that both the CLI and
|
|
gateway call early in their startup path. All log files live under
|
|
``~/.hermes/logs/`` (profile-aware via ``get_hermes_home()``).
|
|
|
|
Log files produced:
|
|
agent.log — INFO+, all agent/tool/session activity (the main log)
|
|
errors.log — WARNING+, errors and warnings only (quick triage)
|
|
gateway.log — INFO+, gateway-only events (created when mode="gateway")
|
|
gui.log — INFO+, dashboard/websocket/TUI-gateway events
|
|
(created when mode="gui")
|
|
|
|
All files use ``RotatingFileHandler`` with ``RedactingFormatter`` so
|
|
secrets are never written to disk.
|
|
|
|
Component separation:
|
|
gateway.log only receives records from ``gateway.*`` loggers —
|
|
platform adapters, session management, slash commands, delivery.
|
|
gui.log receives dashboard-side records from ``hermes_cli.web_server``,
|
|
``hermes_cli.pty_bridge``, ``tui_gateway.*``, and ``uvicorn.*``.
|
|
agent.log remains the catch-all (everything goes there).
|
|
|
|
Session context:
|
|
Call ``set_session_context(session_id)`` at the start of a conversation
|
|
and ``clear_session_context()`` when done. All log lines emitted on
|
|
that thread will include ``[session_id]`` for filtering/correlation.
|
|
"""
|
|
|
|
import logging
|
|
import os
|
|
import threading
|
|
from logging.handlers import RotatingFileHandler
|
|
from pathlib import Path
|
|
from typing import Optional, Sequence
|
|
|
|
from hermes_constants import get_config_path, get_hermes_home
|
|
|
|
# Sentinel to track whether setup_logging() has already run. The function
|
|
# is idempotent — calling it twice is safe but the second call is a no-op
|
|
# unless ``force=True``.
|
|
_logging_initialized = False
|
|
|
|
# Thread-local storage for per-conversation session context.
|
|
_session_context = threading.local()
|
|
|
|
# Default log format — includes timestamp, level, optional session tag,
|
|
# logger name, and message. The ``%(session_tag)s`` field is guaranteed to
|
|
# exist on every LogRecord via _install_session_record_factory() below.
|
|
_LOG_FORMAT = "%(asctime)s %(levelname)s%(session_tag)s %(name)s: %(message)s"
|
|
_LOG_FORMAT_VERBOSE = "%(asctime)s - %(name)s - %(levelname)s%(session_tag)s - %(message)s"
|
|
|
|
# Third-party loggers that are noisy at DEBUG/INFO level.
|
|
_NOISY_LOGGERS = (
|
|
"openai",
|
|
"openai._base_client",
|
|
"httpx",
|
|
"httpcore",
|
|
"asyncio",
|
|
"hpack",
|
|
"hpack.hpack",
|
|
"grpc",
|
|
"modal",
|
|
"urllib3",
|
|
"urllib3.connectionpool",
|
|
"websockets",
|
|
"charset_normalizer",
|
|
"markdown_it",
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Public session context API
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def set_session_context(session_id: str) -> None:
|
|
"""Set the session ID for the current thread.
|
|
|
|
All subsequent log records on this thread will include ``[session_id]``
|
|
in the formatted output. Call at the start of ``run_conversation()``.
|
|
"""
|
|
_session_context.session_id = session_id
|
|
|
|
|
|
def clear_session_context() -> None:
|
|
"""Clear the session ID for the current thread."""
|
|
_session_context.session_id = None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Record factory — injects session_tag into every LogRecord at creation
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _install_session_record_factory() -> None:
|
|
"""Replace the global LogRecord factory with one that adds ``session_tag``.
|
|
|
|
Unlike a ``logging.Filter`` on a handler or logger, the record factory
|
|
runs for EVERY record in the process — including records that propagate
|
|
from child loggers and records handled by third-party handlers. This
|
|
guarantees ``%(session_tag)s`` is always available in format strings,
|
|
eliminating the KeyError that would occur if a handler used our format
|
|
without having a ``_SessionFilter`` attached.
|
|
|
|
Idempotent — checks for a marker attribute to avoid double-wrapping if
|
|
the module is reloaded.
|
|
"""
|
|
current_factory = logging.getLogRecordFactory()
|
|
if getattr(current_factory, "_hermes_session_injector", False):
|
|
return # already installed
|
|
|
|
def _session_record_factory(*args, **kwargs):
|
|
record = current_factory(*args, **kwargs)
|
|
sid = getattr(_session_context, "session_id", None)
|
|
record.session_tag = f" [{sid}]" if sid else "" # type: ignore[attr-defined]
|
|
return record
|
|
|
|
_session_record_factory._hermes_session_injector = True # type: ignore[attr-defined]
|
|
logging.setLogRecordFactory(_session_record_factory)
|
|
|
|
|
|
# Install immediately on import — session_tag is available on all records
|
|
# from this point forward, even before setup_logging() is called.
|
|
_install_session_record_factory()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Filters
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class _ComponentFilter(logging.Filter):
|
|
"""Only pass records whose logger name starts with one of *prefixes*.
|
|
|
|
Used to route gateway-specific records to ``gateway.log`` while
|
|
keeping ``agent.log`` as the catch-all.
|
|
"""
|
|
|
|
def __init__(self, prefixes: Sequence[str]) -> None:
|
|
super().__init__()
|
|
self._prefixes = tuple(prefixes)
|
|
|
|
def filter(self, record: logging.LogRecord) -> bool:
|
|
return record.name.startswith(self._prefixes)
|
|
|
|
|
|
# Logger name prefixes that belong to each component.
|
|
# Used by _ComponentFilter and exposed for ``hermes logs --component``.
|
|
COMPONENT_PREFIXES = {
|
|
"gateway": ("gateway",),
|
|
"agent": ("agent", "run_agent", "model_tools", "batch_runner"),
|
|
"tools": ("tools",),
|
|
"cli": ("hermes_cli", "cli"),
|
|
"cron": ("cron",),
|
|
"gui": (
|
|
"hermes_cli.web_server",
|
|
"hermes_cli.pty_bridge",
|
|
"tui_gateway",
|
|
"uvicorn",
|
|
),
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main setup
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def setup_logging(
|
|
*,
|
|
hermes_home: Optional[Path] = None,
|
|
log_level: Optional[str] = None,
|
|
max_size_mb: Optional[int] = None,
|
|
backup_count: Optional[int] = None,
|
|
mode: Optional[str] = None,
|
|
force: bool = False,
|
|
) -> Path:
|
|
"""Configure the Hermes logging subsystem.
|
|
|
|
Safe to call multiple times — the second call is a no-op unless
|
|
*force* is ``True``.
|
|
|
|
Parameters
|
|
----------
|
|
hermes_home
|
|
Override for the Hermes home directory. Falls back to
|
|
``get_hermes_home()`` (profile-aware).
|
|
log_level
|
|
Minimum level for the ``agent.log`` file handler. Accepts any
|
|
standard Python level name (``"DEBUG"``, ``"INFO"``, ``"WARNING"``).
|
|
Defaults to ``"INFO"`` or the value from config.yaml ``logging.level``.
|
|
max_size_mb
|
|
Maximum size of each log file in megabytes before rotation.
|
|
Defaults to 5 or the value from config.yaml ``logging.max_size_mb``.
|
|
backup_count
|
|
Number of rotated backup files to keep.
|
|
Defaults to 3 or the value from config.yaml ``logging.backup_count``.
|
|
mode
|
|
Caller context: ``"cli"``, ``"gateway"``, ``"gui"``, ``"cron"``.
|
|
When ``"gateway"``, an additional ``gateway.log`` file is created
|
|
that receives only gateway-component records.
|
|
When ``"gui"``, an additional ``gui.log`` file is created that
|
|
receives dashboard and TUI-gateway component records.
|
|
force
|
|
Re-run setup even if it has already been called.
|
|
|
|
Returns
|
|
-------
|
|
Path
|
|
The ``logs/`` directory where files are written.
|
|
"""
|
|
global _logging_initialized
|
|
home = hermes_home or get_hermes_home()
|
|
log_dir = home / "logs"
|
|
log_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Read config defaults (best-effort — config may not be loaded yet).
|
|
cfg_level, cfg_max_size, cfg_backup = _read_logging_config()
|
|
|
|
level_name = (log_level or cfg_level or "INFO").upper()
|
|
level = getattr(logging, level_name, logging.INFO)
|
|
max_bytes = (max_size_mb or cfg_max_size or 5) * 1024 * 1024
|
|
backups = backup_count or cfg_backup or 3
|
|
|
|
# Lazy import to avoid circular dependency at module load time.
|
|
from agent.redact import RedactingFormatter
|
|
|
|
root = logging.getLogger()
|
|
|
|
# --- agent.log (INFO+) — the main activity log -------------------------
|
|
_add_rotating_handler(
|
|
root,
|
|
log_dir / "agent.log",
|
|
level=level,
|
|
max_bytes=max_bytes,
|
|
backup_count=backups,
|
|
formatter=RedactingFormatter(_LOG_FORMAT),
|
|
)
|
|
|
|
# --- errors.log (WARNING+) — quick triage log --------------------------
|
|
_add_rotating_handler(
|
|
root,
|
|
log_dir / "errors.log",
|
|
level=logging.WARNING,
|
|
max_bytes=2 * 1024 * 1024,
|
|
backup_count=2,
|
|
formatter=RedactingFormatter(_LOG_FORMAT),
|
|
)
|
|
|
|
# --- gateway.log (INFO+, gateway component only) ------------------------
|
|
if mode == "gateway":
|
|
_add_rotating_handler(
|
|
root,
|
|
log_dir / "gateway.log",
|
|
level=logging.INFO,
|
|
max_bytes=5 * 1024 * 1024,
|
|
backup_count=3,
|
|
formatter=RedactingFormatter(_LOG_FORMAT),
|
|
log_filter=_ComponentFilter(COMPONENT_PREFIXES["gateway"]),
|
|
)
|
|
|
|
# --- gui.log (INFO+, dashboard/tui-gateway components) -----------------
|
|
if mode == "gui":
|
|
_add_rotating_handler(
|
|
root,
|
|
log_dir / "gui.log",
|
|
level=logging.INFO,
|
|
max_bytes=10 * 1024 * 1024,
|
|
backup_count=5,
|
|
formatter=RedactingFormatter(_LOG_FORMAT),
|
|
log_filter=_ComponentFilter(COMPONENT_PREFIXES["gui"]),
|
|
)
|
|
|
|
if _logging_initialized and not force:
|
|
return log_dir
|
|
|
|
# Ensure root logger level is low enough for the handlers to fire.
|
|
if root.level == logging.NOTSET or root.level > level:
|
|
root.setLevel(level)
|
|
|
|
# Suppress noisy third-party loggers.
|
|
for name in _NOISY_LOGGERS:
|
|
logging.getLogger(name).setLevel(logging.WARNING)
|
|
|
|
_logging_initialized = True
|
|
return log_dir
|
|
|
|
|
|
def setup_verbose_logging() -> None:
|
|
"""Enable DEBUG-level console logging for ``--verbose`` / ``-v`` mode.
|
|
|
|
Called by ``AIAgent.__init__()`` when ``verbose_logging=True``.
|
|
"""
|
|
from agent.redact import RedactingFormatter
|
|
|
|
root = logging.getLogger()
|
|
|
|
# Avoid adding duplicate stream handlers.
|
|
for h in root.handlers:
|
|
if isinstance(h, logging.StreamHandler) and not isinstance(h, RotatingFileHandler):
|
|
if getattr(h, "_hermes_verbose", False):
|
|
return
|
|
|
|
handler = logging.StreamHandler()
|
|
handler.setLevel(logging.DEBUG)
|
|
handler.setFormatter(RedactingFormatter(_LOG_FORMAT_VERBOSE, datefmt="%H:%M:%S"))
|
|
handler._hermes_verbose = True # type: ignore[attr-defined]
|
|
root.addHandler(handler)
|
|
|
|
# Lower root logger level so DEBUG records reach all handlers.
|
|
if root.level > logging.DEBUG:
|
|
root.setLevel(logging.DEBUG)
|
|
|
|
# Keep third-party libraries at WARNING to reduce noise.
|
|
for name in _NOISY_LOGGERS:
|
|
logging.getLogger(name).setLevel(logging.WARNING)
|
|
# rex-deploy at INFO for sandbox status.
|
|
logging.getLogger("rex-deploy").setLevel(logging.INFO)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Internal helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class _ManagedRotatingFileHandler(RotatingFileHandler):
|
|
"""RotatingFileHandler that ensures group-writable perms in managed mode.
|
|
|
|
In managed mode (NixOS), the stateDir uses setgid (2770) so new files
|
|
inherit the hermes group. However, both _open() (initial creation) and
|
|
doRollover() create files via open(), which uses the process umask —
|
|
typically 0022, producing 0644. This subclass applies chmod 0660 after
|
|
both operations so the gateway and interactive users can share log files.
|
|
"""
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
from hermes_cli.config import is_managed
|
|
self._managed = is_managed()
|
|
super().__init__(*args, **kwargs)
|
|
|
|
def _chmod_if_managed(self):
|
|
if self._managed:
|
|
try:
|
|
os.chmod(self.baseFilename, 0o660)
|
|
except OSError:
|
|
pass
|
|
|
|
def _open(self):
|
|
stream = super()._open()
|
|
self._chmod_if_managed()
|
|
return stream
|
|
|
|
def doRollover(self):
|
|
super().doRollover()
|
|
self._chmod_if_managed()
|
|
|
|
|
|
def _add_rotating_handler(
|
|
logger: logging.Logger,
|
|
path: Path,
|
|
*,
|
|
level: int,
|
|
max_bytes: int,
|
|
backup_count: int,
|
|
formatter: logging.Formatter,
|
|
log_filter: Optional[logging.Filter] = None,
|
|
) -> None:
|
|
"""Add a ``RotatingFileHandler`` to *logger*, skipping if one already
|
|
exists for the same resolved file path (idempotent).
|
|
|
|
Parameters
|
|
----------
|
|
log_filter
|
|
Optional filter to attach to the handler (e.g. ``_ComponentFilter``
|
|
for gateway.log).
|
|
"""
|
|
resolved = path.resolve()
|
|
for existing in logger.handlers:
|
|
if (
|
|
isinstance(existing, RotatingFileHandler)
|
|
and Path(getattr(existing, "baseFilename", "")).resolve() == resolved
|
|
):
|
|
return # already attached
|
|
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
handler = _ManagedRotatingFileHandler(
|
|
str(path), maxBytes=max_bytes, backupCount=backup_count,
|
|
encoding="utf-8",
|
|
)
|
|
handler.setLevel(level)
|
|
handler.setFormatter(formatter)
|
|
if log_filter is not None:
|
|
handler.addFilter(log_filter)
|
|
logger.addHandler(handler)
|
|
|
|
|
|
def _read_logging_config():
|
|
"""Best-effort read of ``logging.*`` from config.yaml.
|
|
|
|
Returns ``(level, max_size_mb, backup_count)`` — any may be ``None``.
|
|
"""
|
|
try:
|
|
import yaml
|
|
config_path = get_config_path()
|
|
if config_path.exists():
|
|
with open(config_path, "r", encoding="utf-8") as f:
|
|
cfg = yaml.safe_load(f) or {}
|
|
log_cfg = cfg.get("logging", {})
|
|
if isinstance(log_cfg, dict):
|
|
return (
|
|
log_cfg.get("level"),
|
|
log_cfg.get("max_size_mb"),
|
|
log_cfg.get("backup_count"),
|
|
)
|
|
except Exception:
|
|
pass
|
|
return (None, None, None)
|