""" Hermes Web UI -- Main server entry point. Thin routing shell: imports Handler, delegates to api/routes.py, runs server. All business logic lives in api/*. """ import logging import os import socket import sys import time import traceback from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer # ── Test-mode network isolation ───────────────────────────────────────────── # When `HERMES_WEBUI_TEST_NETWORK_BLOCK=1` is set in the environment, refuse # outbound socket connections to anything that is not loopback / RFC1918 / # link-local / reserved-TLD. This catches accidental real outbound (forgotten # mocks, leaked credentials triggering SDK init, new code paths bypassing an # existing mock) so the test suite stays hermetic and fast. # # tests/conftest.py sets this env var on every test_server subprocess so the # server.py-side network isolation matches the pytest-process-side isolation # already installed there. # # A test that legitimately needs real outbound spawns the server with the env # var unset (no current callers — every test_server-using test should be # mockable). if os.environ.get("HERMES_WEBUI_TEST_NETWORK_BLOCK", "").strip() in ("1", "true", "yes"): _REAL_CREATE_CONN = socket.create_connection _REAL_SOCK_CONNECT = socket.socket.connect import re as _re def _re_match_unique_local_ipv6(h): """Match IPv6 fc00::/7 (canonical syntax). Tighter than startswith('fc') so we don't mistakenly classify hostnames like 'food.example.com' as local.""" return bool(_re.match(r"^f[cd][0-9a-f]{0,2}:", h)) def _addr_is_local(host): if not isinstance(host, str): return False h = host.strip().lower() if not h: return False # IPv6 unique-local fc00::/7: require hex pair + colon to avoid # matching hostnames like "food.example.com" or "fdsa.test". if h in ("::1", "0:0:0:0:0:0:0:1") or h.startswith("fe80:") or _re_match_unique_local_ipv6(h): return True if h == "localhost" or h.endswith(".localhost"): return True if h.endswith(".local") or h.endswith(".test") or h.endswith(".invalid"): return True if h == "example.com" or h.endswith(".example.com"): return True if h == "example.net" or h.endswith(".example.net"): return True if h == "example.org" or h.endswith(".example.org"): return True if h.endswith(".example"): return True if h and h[0].isdigit() and h.count(".") == 3: try: o1, o2, o3, o4 = [int(p) for p in h.split(".")] except ValueError: return False if o1 == 127: return True if o1 == 10: return True if o1 == 192 and o2 == 168: return True if o1 == 172 and 16 <= o2 <= 31: return True if o1 == 169 and o2 == 254: return True if o1 == 203 and o2 == 0 and o3 == 113: return True return False def _blocked_create_connection(address, *a, **kw): try: host = address[0] except (TypeError, IndexError): host = "" if _addr_is_local(host): return _REAL_CREATE_CONN(address, *a, **kw) raise OSError( f"hermes test network isolation (server.py): outbound to {address!r} blocked" ) def _blocked_socket_connect(self, address): try: host = address[0] except (TypeError, IndexError): host = "" if _addr_is_local(host): return _REAL_SOCK_CONNECT(self, address) raise OSError( f"hermes test network isolation (server.py): socket.connect to {address!r} blocked" ) socket.create_connection = _blocked_create_connection socket.socket.connect = _blocked_socket_connect try: import resource except ImportError: # pragma: no cover - resource is Unix-only resource = None from urllib.parse import urlparse logger = logging.getLogger(__name__) from api.auth import check_auth from api.config import HOST, PORT, STATE_DIR, SESSION_DIR, DEFAULT_WORKSPACE from api.helpers import j, get_profile_cookie from api.profiles import set_request_profile, clear_request_profile from api.routes import handle_delete, handle_get, handle_patch, handle_post from api.startup import auto_install_agent_deps, fix_credential_permissions from api.updates import WEBUI_VERSION class QuietHTTPServer(ThreadingHTTPServer): """Custom HTTP server that silently handles common network errors.""" daemon_threads = True request_queue_size = 64 def __init__(self, *args, **kwargs): server_address = args[0] if args else kwargs.get('server_address', None) if server_address and ':' in server_address[0]: self.address_family = socket.AF_INET6 super().__init__(*args, **kwargs) self.accept_loop_requests_total = 0 self.accept_loop_last_request_at = 0.0 def _handle_request_noblock(self): """Record accept-loop progress before dispatching a request handler. A process can be alive and still stop accepting/dispatching requests. Exposing this heartbeat on /health gives supervisors and watchdogs a cheap signal that the accept loop is still moving. Note: this method is called only from the single ``serve_forever()`` thread in CPython socketserver, so the un-locked ``+=`` increment is safe — there is no other thread mutating these counters. The /health readers may see a stale value momentarily but never an inconsistent one (Python int reads are atomic). Per Opus advisor on stage-297. """ self.accept_loop_requests_total += 1 self.accept_loop_last_request_at = time.time() return super()._handle_request_noblock() def handle_error(self, request, client_address): """Override to suppress logging for common client disconnect errors.""" exc_type, exc_value, _ = sys.exc_info() # Silently ignore common connection errors caused by client disconnects if exc_type in (ConnectionResetError, BrokenPipeError, ConnectionAbortedError, TimeoutError): return # Also handle socket errors that indicate client disconnect if issubclass(exc_type, OSError): # errno 54 is Connection reset by peer on macOS/BSD # errno 104 is Connection reset by peer on Linux if getattr(exc_value, 'errno', None) in (32, 54, 104, 110): # EPIPE, ECONNRESET, ETIMEDOUT return # For other errors, use default logging super().handle_error(request, client_address) class Handler(BaseHTTPRequestHandler): timeout = 30 # seconds — kills idle/incomplete connections to prevent thread exhaustion def setup(self): """Set socket options for each accepted connection.""" super().setup() # TCP_NODELAY — universal, disables Nagle for HTTP latency try: self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) except OSError: pass # SO_KEEPALIVE — universal master switch (must be set before timing params) try: self.connection.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) except OSError: pass # Per-platform timing parameters if hasattr(socket, 'TCP_KEEPIDLE'): # Linux try: self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 10) self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 5) self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3) except OSError: pass elif hasattr(socket, 'TCP_KEEPALIVE'): # macOS try: self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, 10) except OSError: pass _ver_suffix = WEBUI_VERSION.removeprefix('v') server_version = ('HermesWebUI/' + _ver_suffix) if _ver_suffix != 'unknown' else 'HermesWebUI' _CSP_REPORT_ONLY = ( "default-src 'self'; " "base-uri 'self'; " "object-src 'none'; " "frame-ancestors 'self'; " "script-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; " "style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; " "img-src 'self' data: blob:; " "font-src 'self' data:; " "media-src 'self' data: blob:; " "connect-src 'self' http://127.0.0.1:* http://localhost:* ws://127.0.0.1:* ws://localhost:*" ) @classmethod def csp_report_only_policy(cls) -> str: return cls._CSP_REPORT_ONLY def end_headers(self) -> None: self.send_header("Content-Security-Policy-Report-Only", self.csp_report_only_policy()) super().end_headers() def log_message(self, fmt, *args): pass # suppress default Apache-style log def log_request(self, code: str='-', size: str='-') -> None: """Structured JSON logs for each request.""" import json as _json duration_ms = round((time.time() - getattr(self, '_req_t0', time.time())) * 1000, 1) record = _json.dumps({ 'ts': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()), 'method': self.command or '-', 'path': self.path or '-', 'status': int(code) if str(code).isdigit() else code, 'ms': duration_ms, }) print(f'[webui] {record}', flush=True) def do_GET(self) -> None: self._req_t0 = time.time() # Per-request profile context from cookie (issue #798) cookie_profile = get_profile_cookie(self) if cookie_profile: set_request_profile(cookie_profile) try: parsed = urlparse(self.path) if not check_auth(self, parsed): return result = handle_get(self, parsed) if result is False: return j(self, {'error': 'not found'}, status=404) except Exception as e: print(f'[webui] ERROR {self.command} {self.path}\n' + traceback.format_exc(), flush=True) return j(self, {'error': 'Internal server error'}, status=500) finally: clear_request_profile() def _handle_write(self, route_func) -> None: self._req_t0 = time.time() # Per-request profile context from cookie (issue #798) cookie_profile = get_profile_cookie(self) if cookie_profile: set_request_profile(cookie_profile) try: parsed = urlparse(self.path) if not check_auth(self, parsed): return result = route_func(self, parsed) if result is False: return j(self, {'error': 'not found'}, status=404) except Exception as e: print(f'[webui] ERROR {self.command} {self.path}\n' + traceback.format_exc(), flush=True) return j(self, {'error': 'Internal server error'}, status=500) finally: clear_request_profile() def do_POST(self) -> None: self._handle_write(handle_post) def do_PATCH(self) -> None: self._handle_write(handle_patch) def do_DELETE(self) -> None: self._handle_write(handle_delete) def _raise_fd_soft_limit(target: int = 4096) -> dict: """Best-effort raise of RLIMIT_NOFILE for persistent WebUI hosts. macOS launchd jobs often start with a 256 soft limit. If a future FD leak regresses, that low ceiling turns a leak into a hard HTTP wedge quickly. Raising the soft limit does not hide leaks; it buys enough headroom for diagnostics and watchdog recovery. """ if resource is None: return {"status": "unsupported"} try: soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) except Exception as exc: return {"status": "error", "error": str(exc)} # On Unix, RLIM_INFINITY is commonly a large int; keep the logic explicit # so tests can use ordinary integers without depending on platform values. desired = int(target) if hard not in (-1, getattr(resource, "RLIM_INFINITY", object())): desired = min(desired, int(hard)) if soft >= desired: return {"status": "unchanged", "soft": soft, "hard": hard} try: resource.setrlimit(resource.RLIMIT_NOFILE, (desired, hard)) except Exception as exc: return {"status": "error", "soft": soft, "hard": hard, "error": str(exc)} return {"status": "raised", "soft": desired, "hard": hard, "previous_soft": soft} def main() -> None: from api.config import print_startup_config, verify_hermes_imports, _HERMES_FOUND print_startup_config() fd_limit = _raise_fd_soft_limit() if fd_limit.get("status") == "raised": print( f"[ok] Raised file descriptor soft limit " f"{fd_limit.get('previous_soft')} -> {fd_limit.get('soft')}", flush=True, ) elif fd_limit.get("status") == "error": print(f"[!!] WARNING: Could not raise file descriptor limit: {fd_limit.get('error')}", flush=True) # Fix sensitive file permissions before doing anything else fix_credential_permissions() # ── #1558 startup self-heal ───────────────────────────────────────── # If a previous process wrote a session JSON with fewer messages than # its .bak (the data-loss shape #1558 produced), restore from the .bak. # Safe to run unconditionally — a clean install is a no-op. try: from api.models import _active_state_db_path from api.session_recovery import recover_all_sessions_on_startup result = recover_all_sessions_on_startup( SESSION_DIR, rebuild_index=True, state_db_path=_active_state_db_path(), ) if result.get("restored"): print(f"[recovery] Restored {result['restored']}/{result['scanned']} sessions from .bak (see #1558).", flush=True) except Exception as exc: # Recovery is best-effort; never block server startup. print(f"[recovery] startup recovery failed: {exc}", flush=True) within_container = False # Check for the "/.within_container" file to determine if we're running inside a container; this file is created in the Dockerfile try: with open('/.within_container', 'r') as f: within_container = True except FileNotFoundError: pass if within_container: print('[ok] Running within container.', flush=True) # Security: warn if binding non-loopback without authentication from api.auth import is_auth_enabled if HOST not in ('127.0.0.1', '::1', 'localhost') and not is_auth_enabled(): print(f'[!!] WARNING: Binding to {HOST} with NO PASSWORD SET.', flush=True) print(f' Anyone on the network can access your filesystem and agent.', flush=True) print(f' Set a password via Settings or HERMES_WEBUI_PASSWORD env var.', flush=True) print(f' To suppress: bind to 127.0.0.1 or set a password.', flush=True) if within_container: print(f' Note: You are running within a container, must bind to 0.0.0.0 (IPv4) or :: (IPv6) to publish the port.', flush=True) elif not is_auth_enabled(): print(f' [tip] No password set. Any process on this machine can read sessions', flush=True) print(f' and memory via the local API. Set HERMES_WEBUI_PASSWORD to', flush=True) print(f' enable authentication.', flush=True) ok, missing, errors = verify_hermes_imports() if not ok and _HERMES_FOUND: print(f'[!!] Warning: Hermes agent found but missing modules: {missing}', flush=True) for mod, err in errors.items(): print(f' {mod}: {err}', flush=True) print(' Attempting to install missing dependencies from agent requirements.txt...', flush=True) auto_install_agent_deps() ok, missing, errors = verify_hermes_imports() if not ok: print(f'[!!] Still missing after install attempt: {missing}', flush=True) for mod, err in errors.items(): print(f' {mod}: {err}', flush=True) print(' Agent features may not work correctly.', flush=True) else: print('[ok] Agent dependencies installed successfully.', flush=True) STATE_DIR.mkdir(parents=True, exist_ok=True) SESSION_DIR.mkdir(parents=True, exist_ok=True) DEFAULT_WORKSPACE.mkdir(parents=True, exist_ok=True) # Start the gateway session watcher for real-time SSE updates try: from api.gateway_watcher import start_watcher start_watcher() except Exception as e: print(f'[!!] WARNING: Gateway watcher failed to start: {e}', flush=True) httpd = QuietHTTPServer((HOST, PORT), Handler) # ── TLS/HTTPS setup (optional) ───────────────────────────────────────── from api.config import TLS_ENABLED, TLS_CERT, TLS_KEY scheme = 'https' if TLS_ENABLED else 'http' if TLS_ENABLED: try: import ssl ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) ctx.minimum_version = ssl.TLSVersion.TLSv1_2 ctx.load_cert_chain(TLS_CERT, TLS_KEY) httpd.socket = ctx.wrap_socket(httpd.socket, server_side=True) print(f' TLS enabled: cert={TLS_CERT}, key={TLS_KEY}', flush=True) except Exception as e: print(f'[!!] WARNING: TLS setup failed ({e}), falling back to HTTP', flush=True) scheme = 'http' print(f' Hermes Web UI listening on {scheme}://{HOST}:{PORT}', flush=True) if HOST in ('127.0.0.1', '::1') or within_container: print(f' Remote access: ssh -N -L {PORT}:127.0.0.1:{PORT} @', flush=True) print(f' Then open: {scheme}://localhost:{PORT}', flush=True) print('', flush=True) try: httpd.serve_forever() finally: # Stop the gateway watcher on shutdown try: from api.gateway_watcher import stop_watcher stop_watcher() except Exception: logger.debug("Failed to stop gateway watcher during shutdown") if __name__ == '__main__': main()