From 79e8cd1233c719ec7f270363d7a1bdecb310d932 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Sat, 11 Apr 2026 09:50:59 +0530 Subject: [PATCH] =?UTF-8?q?feat(nix):=20container-aware=20CLI=20=E2=80=94?= =?UTF-8?q?=20auto-route=20all=20subcommands=20into=20managed=20container?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When container.enable = true, the host `hermes` CLI transparently execs every subcommand into the managed Docker/Podman container. A symlink bridge (~/.hermes -> /var/lib/hermes/.hermes) unifies state between host and container so sessions, config, and memories are shared. CLI changes: - Global routing before subcommand dispatch (all commands forwarded) - docker exec with -u exec_user, env passthrough (TERM, COLORTERM, LANG, LC_ALL), TTY-aware flags - Retry with spinner on failure (TTY: 5s, non-TTY: 10s silent) - Hard fail instead of silent fallback - HERMES_DEV=1 env var bypasses routing for development - No routing messages (invisible to user) NixOS module changes: - container.hostUsers option: lists users who get ~/.hermes symlink and automatic hermes group membership - Activation script creates symlink bridge (with backup of existing ~/.hermes dirs), writes exec_user to .container-mode - Cleanup on disable: removes symlinks + .container-mode + stops service - Warning when hostUsers set without addToSystemPackages --- hermes_cli/config.py | 66 ++++ hermes_cli/main.py | 143 +++++++- nix/nixosModules.nix | 91 +++++ tests/hermes_cli/test_container_aware_cli.py | 354 +++++++++++++++++++ website/docs/getting-started/nix-setup.md | 42 +++ 5 files changed, 695 insertions(+), 1 deletion(-) create mode 100644 tests/hermes_cli/test_container_aware_cli.py diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 2cb6a8d62a..0e8624570f 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -141,6 +141,72 @@ def managed_error(action: str = "modify configuration"): print(format_managed_message(action), file=sys.stderr) +# ============================================================================= +# Container-aware CLI (NixOS container mode) +# ============================================================================= + +def _is_inside_container() -> bool: + """Detect if we're already running inside a Docker/Podman container.""" + # Standard Docker/Podman indicators + if os.path.exists("/.dockerenv"): + return True + # Podman uses /run/.containerenv + if os.path.exists("/run/.containerenv"): + return True + # Check cgroup for container runtime evidence (works for both Docker & Podman) + try: + with open("/proc/1/cgroup", "r") as f: + cgroup = f.read() + if "docker" in cgroup or "podman" in cgroup or "/lxc/" in cgroup: + return True + except (OSError, IOError): + pass + return False + + +def get_container_exec_info() -> Optional[dict]: + """Read container mode metadata from HERMES_HOME/.container-mode. + + Returns a dict with keys: backend, container_name, exec_user, hermes_bin + or None if container mode is not active, we're already inside the + container, or HERMES_DEV=1 is set. + + The .container-mode file is written by the NixOS activation script when + container.enable = true. It tells the host CLI to exec into the container + instead of running locally. + """ + if os.environ.get("HERMES_DEV") == "1": + return None + + if _is_inside_container(): + return None + + container_mode_file = get_hermes_home() / ".container-mode" + + try: + info = {} + with open(container_mode_file, "r") as f: + for line in f: + line = line.strip() + if "=" in line and not line.startswith("#"): + key, _, value = line.partition("=") + info[key.strip()] = value.strip() + + backend = info.get("backend", "docker") + container_name = info.get("container_name", "hermes-agent") + exec_user = info.get("exec_user", "hermes") + hermes_bin = info.get("hermes_bin", "/data/current-package/bin/hermes") + + return { + "backend": backend, + "container_name": container_name, + "exec_user": exec_user, + "hermes_bin": hermes_bin, + } + except (OSError, IOError): + return None + + # ============================================================================= # Config paths # ============================================================================= diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 81850fdfe4..7229bea467 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -528,6 +528,133 @@ def _resolve_last_cli_session() -> Optional[str]: return None +def _exec_in_container(container_info: dict, cli_args: list): + """Route a CLI invocation into the managed container. + + Uses subprocess.run so we can detect docker-level failures (container + not running, user not found, etc.) and retry. On the happy path the + exit code from the containerised hermes is propagated directly. + + Failure behaviour: + - TTY: spinner for up to 5s, then hard fail (exit 1) + - Non-TTY: silent retry for 10s, then exit 126 + + Args: + container_info: dict with backend, container_name, exec_user, hermes_bin + cli_args: the original CLI arguments (everything after 'hermes') + """ + import shutil + import subprocess + import time + + backend = container_info["backend"] + container_name = container_info["container_name"] + exec_user = container_info["exec_user"] + hermes_bin = container_info["hermes_bin"] + + runtime = shutil.which(backend) + if not runtime: + print(f"Error: {backend} not found on PATH. Cannot route to container.", + file=sys.stderr) + sys.exit(1) + + # The NixOS systemd service runs containers as root. Docker users + # typically have group-based socket access, but Podman rootful + # containers require sudo. Probe whether the runtime can see the + # container; if not, retry via sudo. + needs_sudo = False + probe = subprocess.run( + [runtime, "inspect", "--format", "ok", container_name], + capture_output=True, text=True, timeout=5, + ) + if probe.returncode != 0: + sudo = shutil.which("sudo") + if sudo: + probe2 = subprocess.run( + [sudo, "-n", runtime, "inspect", "--format", "ok", container_name], + capture_output=True, text=True, timeout=5, + ) + if probe2.returncode == 0: + needs_sudo = True + else: + print( + f"Error: container '{container_name}' not found via {backend}.\n" + f"\n" + f"The NixOS service runs the container as root. Your user cannot\n" + f"see it because {backend} uses per-user namespaces.\n" + f"\n" + f"Fix: grant passwordless sudo for {backend}:\n" + f"\n" + f' security.sudo.extraRules = [{{\n' + f' users = [ "{os.getenv("USER", "your-user")}" ];\n' + f' commands = [{{ command = "{runtime}"; options = [ "NOPASSWD" ]; }}];\n' + f' }}];\n' + f"\n" + f"Or run: sudo hermes {' '.join(cli_args)}", + file=sys.stderr, + ) + sys.exit(1) + else: + print( + f"Error: container '{container_name}' not found via {backend}.\n" + f"The container may be running under root. Try: sudo hermes {' '.join(cli_args)}", + file=sys.stderr, + ) + sys.exit(1) + + is_tty = sys.stdin.isatty() + tty_flags = ["-it"] if is_tty else ["-i"] + + # Forward terminal environment variables + env_flags = [] + for var in ("TERM", "COLORTERM", "LANG", "LC_ALL"): + val = os.environ.get(var) + if val: + env_flags.extend(["-e", f"{var}={val}"]) + + cmd_prefix = [shutil.which("sudo"), "-n", runtime] if needs_sudo else [runtime] + exec_cmd = ( + cmd_prefix + ["exec"] + + tty_flags + + ["-u", exec_user] + + env_flags + + [container_name, hermes_bin] + + cli_args + ) + + max_retries = 5 if is_tty else 10 + for attempt in range(max_retries): + result = subprocess.run(exec_cmd) + if result.returncode == 0: + sys.exit(0) + + # Exit code 125/126/127 from docker exec = container-level failure + # (not running, user not found, command not found). Retry these. + if result.returncode not in (125, 126, 127): + # Hermes itself exited non-zero — propagate as-is + sys.exit(result.returncode) + + # Container-level failure — retry + if attempt < max_retries - 1: + if is_tty and attempt == 0: + print("Waiting for container...", end="", flush=True, + file=sys.stderr) + elif is_tty: + print(".", end="", flush=True, file=sys.stderr) + time.sleep(1) + else: + if is_tty: + print(file=sys.stderr) # newline after dots + print( + f"Error: container '{container_name}' is not reachable " + f"via {backend}. Is the hermes-agent service running?", + file=sys.stderr, + ) + sys.exit(1) + else: + sys.exit(126) + + def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]: """Resolve a session name (title) or ID to a session ID. @@ -5633,7 +5760,21 @@ Examples: # e.g. ``hermes -c Pokemon Agent Dev`` → ``hermes -c 'Pokemon Agent Dev'`` _processed_argv = _coalesce_session_name_args(sys.argv[1:]) args = parser.parse_args(_processed_argv) - + + # ── Container-aware routing ──────────────────────────────────────── + # When NixOS container mode is active, route ALL subcommands into + # the managed container. This runs before any subcommand dispatch. + try: + from hermes_cli.config import get_container_exec_info + container_info = get_container_exec_info() + if container_info: + _exec_in_container(container_info, sys.argv[1:]) + sys.exit(1) # exec failed if we reach here + except SystemExit: + raise # Re-raise sys.exit from _exec_in_container + except Exception: + pass # Container routing unavailable, proceed locally + # Handle --version flag if args.version: cmd_version(args) diff --git a/nix/nixosModules.nix b/nix/nixosModules.nix index b1be031df2..8bc592fafe 100644 --- a/nix/nixosModules.nix +++ b/nix/nixosModules.nix @@ -499,6 +499,16 @@ default = "ubuntu:24.04"; description = "OCI container image. The container pulls this at runtime via Docker/Podman."; }; + + hostUsers = mkOption { + type = types.listOf types.str; + default = [ ]; + description = '' + Interactive users who get a ~/.hermes symlink to the service + stateDir. These users are automatically added to the hermes group. + ''; + example = [ "sidbin" ]; + }; }; }; @@ -557,6 +567,25 @@ environment.variables.HERMES_HOME = "${cfg.stateDir}/.hermes"; }) + # ── Host user group membership ───────────────────────────────────── + (lib.mkIf (cfg.container.hostUsers != []) { + users.users = lib.genAttrs cfg.container.hostUsers (user: { + extraGroups = [ cfg.group ]; + }); + }) + + # ── Warnings ────────────────────────────────────────────────────── + (lib.mkIf (cfg.container.enable && !cfg.addToSystemPackages && cfg.container.hostUsers != []) { + warnings = [ + '' + services.hermes-agent: container.enable is true and container.hostUsers + is set, but addToSystemPackages is false. Without a host-installed hermes + binary, container routing will not work for interactive users. + Set addToSystemPackages = true or ensure hermes is on PATH. + '' + ]; + }) + # ── Directories ─────────────────────────────────────────────────── { systemd.tmpfiles.rules = [ @@ -611,6 +640,68 @@ chown ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/.managed chmod 0644 ${cfg.stateDir}/.hermes/.managed + # Container mode metadata — tells the host CLI to exec into the + # container instead of running locally. Removed when container mode + # is disabled so the host CLI falls back to native execution. + ${if cfg.container.enable then '' + cat > ${cfg.stateDir}/.hermes/.container-mode <<'HERMES_CONTAINER_MODE_EOF' +# Written by NixOS activation script. Do not edit manually. +backend=${cfg.container.backend} +container_name=${containerName} +exec_user=${cfg.user} +hermes_bin=${containerDataDir}/current-package/bin/hermes +HERMES_CONTAINER_MODE_EOF + chown ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/.container-mode + chmod 0644 ${cfg.stateDir}/.hermes/.container-mode + '' else '' + rm -f ${cfg.stateDir}/.hermes/.container-mode + + # Remove symlink bridge for hostUsers + ${lib.concatStringsSep "\n" (map (user: + let + symlinkPath = "/home/${user}/.hermes"; + in '' + if [ -L "${symlinkPath}" ] && [ "$(readlink "${symlinkPath}")" = "${cfg.stateDir}/.hermes" ]; then + rm -f "${symlinkPath}" + echo "hermes-agent: removed symlink ${symlinkPath}" + fi + '') cfg.container.hostUsers)} + ''} + + # ── Symlink bridge for interactive users ─────────────────────── + # Create ~/.hermes -> stateDir/.hermes for each hostUser so the + # host CLI shares state with the container service. + # Only runs when container mode is enabled. + ${lib.optionalString cfg.container.enable + (lib.concatStringsSep "\n" (map (user: + let + userHome = "/home/${user}"; + symlinkPath = "${userHome}/.hermes"; + target = "${cfg.stateDir}/.hermes"; + in '' + if [ -L "${symlinkPath}" ]; then + # Already a symlink — update target if needed + current_target=$(readlink "${symlinkPath}") + if [ "$current_target" != "${target}" ]; then + ln -sfn "${target}" "${symlinkPath}" + fi + elif [ -d "${symlinkPath}" ]; then + # Existing real directory — backup and replace + _backup="${symlinkPath}.bak.$(date +%s)" + echo "hermes-agent: backing up existing ${symlinkPath} to $_backup" + mv "${symlinkPath}" "$_backup" + ln -sfn "${target}" "${symlinkPath}" + chown -h ${user}:${cfg.group} "${symlinkPath}" + elif [ -e "${symlinkPath}" ]; then + # Some other file type — skip with warning + echo "hermes-agent: WARNING: ${symlinkPath} exists but is not a directory or symlink, skipping" + else + # Does not exist — create symlink + ln -sfn "${target}" "${symlinkPath}" + chown -h ${user}:${cfg.group} "${symlinkPath}" + fi + '') cfg.container.hostUsers))} + # Seed auth file if provided ${lib.optionalString (cfg.authFile != null) '' ${if cfg.authFileForceOverwrite then '' diff --git a/tests/hermes_cli/test_container_aware_cli.py b/tests/hermes_cli/test_container_aware_cli.py new file mode 100644 index 0000000000..3475a43cac --- /dev/null +++ b/tests/hermes_cli/test_container_aware_cli.py @@ -0,0 +1,354 @@ +"""Tests for container-aware CLI routing (NixOS container mode). + +When container.enable = true in the NixOS module, the activation script +writes a .container-mode metadata file. The host CLI detects this and +execs into the container instead of running locally. +""" +import os +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from hermes_cli.config import ( + _is_inside_container, + get_container_exec_info, +) + + +# ============================================================================= +# _is_inside_container +# ============================================================================= + + +def test_is_inside_container_dockerenv(): + """Detects /.dockerenv marker file.""" + with patch("os.path.exists") as mock_exists: + mock_exists.side_effect = lambda p: p == "/.dockerenv" + assert _is_inside_container() is True + + +def test_is_inside_container_containerenv(): + """Detects Podman's /run/.containerenv marker.""" + with patch("os.path.exists") as mock_exists: + mock_exists.side_effect = lambda p: p == "/run/.containerenv" + assert _is_inside_container() is True + + +def test_is_inside_container_cgroup_docker(): + """Detects 'docker' in /proc/1/cgroup.""" + with patch("os.path.exists", return_value=False), \ + patch("builtins.open", create=True) as mock_open: + mock_open.return_value.__enter__ = lambda s: s + mock_open.return_value.__exit__ = MagicMock(return_value=False) + mock_open.return_value.read = MagicMock( + return_value="12:memory:/docker/abc123\n" + ) + assert _is_inside_container() is True + + +def test_is_inside_container_false_on_host(): + """Returns False when none of the container indicators are present.""" + with patch("os.path.exists", return_value=False), \ + patch("builtins.open", side_effect=OSError("no such file")): + assert _is_inside_container() is False + + +# ============================================================================= +# get_container_exec_info +# ============================================================================= + + +@pytest.fixture +def container_env(tmp_path, monkeypatch): + """Set up a fake HERMES_HOME with .container-mode file.""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("HERMES_DEV", raising=False) + + container_mode = hermes_home / ".container-mode" + container_mode.write_text( + "# Written by NixOS activation script. Do not edit manually.\n" + "backend=podman\n" + "container_name=hermes-agent\n" + "exec_user=hermes\n" + "hermes_bin=/data/current-package/bin/hermes\n" + ) + return hermes_home + + +def test_get_container_exec_info_returns_metadata(container_env): + """Reads .container-mode and returns all fields including exec_user.""" + with patch("hermes_cli.config._is_inside_container", return_value=False): + info = get_container_exec_info() + + assert info is not None + assert info["backend"] == "podman" + assert info["container_name"] == "hermes-agent" + assert info["exec_user"] == "hermes" + assert info["hermes_bin"] == "/data/current-package/bin/hermes" + + +def test_get_container_exec_info_none_inside_container(container_env): + """Returns None when we're already inside a container.""" + with patch("hermes_cli.config._is_inside_container", return_value=True): + info = get_container_exec_info() + + assert info is None + + +def test_get_container_exec_info_none_without_file(tmp_path, monkeypatch): + """Returns None when .container-mode doesn't exist (native mode).""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("HERMES_DEV", raising=False) + + with patch("hermes_cli.config._is_inside_container", return_value=False): + info = get_container_exec_info() + + assert info is None + + +def test_get_container_exec_info_skipped_when_hermes_dev(container_env, monkeypatch): + """Returns None when HERMES_DEV=1 is set (dev mode bypass).""" + monkeypatch.setenv("HERMES_DEV", "1") + + with patch("hermes_cli.config._is_inside_container", return_value=False): + info = get_container_exec_info() + + assert info is None + + +def test_get_container_exec_info_not_skipped_when_hermes_dev_zero(container_env, monkeypatch): + """HERMES_DEV=0 does NOT trigger bypass — only '1' does.""" + monkeypatch.setenv("HERMES_DEV", "0") + + with patch("hermes_cli.config._is_inside_container", return_value=False): + info = get_container_exec_info() + + assert info is not None + + +def test_get_container_exec_info_defaults(): + """Falls back to defaults for missing keys.""" + import tempfile + + with tempfile.TemporaryDirectory() as tmpdir: + hermes_home = Path(tmpdir) / ".hermes" + hermes_home.mkdir() + (hermes_home / ".container-mode").write_text( + "# minimal file with no keys\n" + ) + + with patch("hermes_cli.config._is_inside_container", return_value=False), \ + patch("hermes_cli.config.get_hermes_home", return_value=hermes_home), \ + patch.dict(os.environ, {}, clear=False): + os.environ.pop("HERMES_DEV", None) + info = get_container_exec_info() + + assert info is not None + assert info["backend"] == "docker" + assert info["container_name"] == "hermes-agent" + assert info["exec_user"] == "hermes" + assert info["hermes_bin"] == "/data/current-package/bin/hermes" + + +def test_get_container_exec_info_docker_backend(container_env): + """Correctly reads docker backend with custom exec_user.""" + (container_env / ".container-mode").write_text( + "backend=docker\n" + "container_name=hermes-custom\n" + "exec_user=myuser\n" + "hermes_bin=/opt/hermes/bin/hermes\n" + ) + + with patch("hermes_cli.config._is_inside_container", return_value=False): + info = get_container_exec_info() + + assert info["backend"] == "docker" + assert info["container_name"] == "hermes-custom" + assert info["exec_user"] == "myuser" + assert info["hermes_bin"] == "/opt/hermes/bin/hermes" + + +# ============================================================================= +# _exec_in_container +# ============================================================================= + + +def test_exec_in_container_constructs_correct_command(): + """Exec command includes -u exec_user, -e env vars, TTY flags.""" + from hermes_cli.main import _exec_in_container + + container_info = { + "backend": "docker", + "container_name": "hermes-agent", + "exec_user": "hermes", + "hermes_bin": "/data/current-package/bin/hermes", + } + + with patch("shutil.which", return_value="/usr/bin/docker"), \ + patch("subprocess.run") as mock_run, \ + patch("sys.stdin") as mock_stdin, \ + patch.dict(os.environ, {"TERM": "xterm-256color", "LANG": "en_US.UTF-8"}, + clear=False), \ + pytest.raises(SystemExit) as exc_info: + mock_stdin.isatty.return_value = True + # First call = probe (inspect), second = exec + mock_run.return_value = MagicMock(returncode=0) + + _exec_in_container(container_info, ["chat", "-m", "opus"]) + + assert exc_info.value.code == 0 + assert mock_run.call_count == 2 # probe + exec + cmd = mock_run.call_args_list[1][0][0] # second call = exec + # Runtime and exec + assert cmd[0] == "/usr/bin/docker" + assert cmd[1] == "exec" + # TTY flags + assert "-it" in cmd + # User flag + idx_u = cmd.index("-u") + assert cmd[idx_u + 1] == "hermes" + # Env passthrough + e_indices = [i for i, v in enumerate(cmd) if v == "-e"] + e_values = [cmd[i + 1] for i in e_indices] + assert "TERM=xterm-256color" in e_values + assert "LANG=en_US.UTF-8" in e_values + # Container + binary + args + assert "hermes-agent" in cmd + assert "/data/current-package/bin/hermes" in cmd + assert "chat" in cmd + + +def test_exec_in_container_non_tty_uses_i_only(): + """Non-TTY mode uses -i instead of -it.""" + from hermes_cli.main import _exec_in_container + + container_info = { + "backend": "docker", + "container_name": "hermes-agent", + "exec_user": "hermes", + "hermes_bin": "/data/current-package/bin/hermes", + } + + with patch("shutil.which", return_value="/usr/bin/docker"), \ + patch("subprocess.run") as mock_run, \ + patch("sys.stdin") as mock_stdin, \ + pytest.raises(SystemExit): + mock_stdin.isatty.return_value = False + mock_run.return_value = MagicMock(returncode=0) + + _exec_in_container(container_info, ["sessions", "list"]) + + cmd = mock_run.call_args[0][0] + # Should have -i but NOT -it + assert "-i" in cmd + assert "-it" not in cmd + + +def test_exec_in_container_no_runtime_hard_fails(): + """Hard fails when runtime not found (no fallback).""" + from hermes_cli.main import _exec_in_container + + container_info = { + "backend": "podman", + "container_name": "hermes-agent", + "exec_user": "hermes", + "hermes_bin": "/data/current-package/bin/hermes", + } + + with patch("shutil.which", return_value=None), \ + patch("subprocess.run") as mock_run, \ + patch("sys.stdin") as mock_stdin, \ + pytest.raises(SystemExit) as exc_info: + mock_stdin.isatty.return_value = True + _exec_in_container(container_info, ["chat"]) + + mock_run.assert_not_called() + assert exc_info.value.code != 0 + + +def test_exec_in_container_tty_retries_on_container_failure(): + """TTY mode retries on docker exit codes 125-127, then hard fails.""" + from hermes_cli.main import _exec_in_container + + container_info = { + "backend": "docker", + "container_name": "hermes-agent", + "exec_user": "hermes", + "hermes_bin": "/data/current-package/bin/hermes", + } + + with patch("shutil.which", return_value="/usr/bin/docker"), \ + patch("subprocess.run") as mock_run, \ + patch("sys.stdin") as mock_stdin, \ + patch("sys.stderr"), \ + patch("time.sleep") as mock_sleep, \ + pytest.raises(SystemExit) as exc_info: + mock_stdin.isatty.return_value = True + # Probe succeeds (container visible), exec returns 125 (container stopped mid-run) + mock_run.side_effect = [MagicMock(returncode=0)] + \ + [MagicMock(returncode=125)] * 5 + _exec_in_container(container_info, ["chat"]) + + assert mock_sleep.call_count == 4 # 5 exec attempts, 4 sleeps + assert exc_info.value.code == 1 + + +def test_exec_in_container_non_tty_retries_silently_exits_126(): + """Non-TTY mode retries on container failures then exits 126.""" + from hermes_cli.main import _exec_in_container + + container_info = { + "backend": "docker", + "container_name": "hermes-agent", + "exec_user": "hermes", + "hermes_bin": "/data/current-package/bin/hermes", + } + + with patch("shutil.which", return_value="/usr/bin/docker"), \ + patch("subprocess.run") as mock_run, \ + patch("sys.stdin") as mock_stdin, \ + patch("sys.stderr"), \ + patch("time.sleep") as mock_sleep, \ + pytest.raises(SystemExit) as exc_info: + mock_stdin.isatty.return_value = False + # Probe succeeds, exec returns 126 repeatedly + mock_run.side_effect = [MagicMock(returncode=0)] + \ + [MagicMock(returncode=126)] * 10 + _exec_in_container(container_info, ["sessions", "list"]) + + assert mock_sleep.call_count == 9 # 10 exec attempts, 9 sleeps + assert exc_info.value.code == 126 + + +def test_exec_in_container_propagates_hermes_exit_code(): + """Non-zero exit from hermes inside container is propagated, not retried.""" + from hermes_cli.main import _exec_in_container + + container_info = { + "backend": "docker", + "container_name": "hermes-agent", + "exec_user": "hermes", + "hermes_bin": "/data/current-package/bin/hermes", + } + + with patch("shutil.which", return_value="/usr/bin/docker"), \ + patch("subprocess.run") as mock_run, \ + patch("sys.stdin") as mock_stdin, \ + patch("time.sleep") as mock_sleep, \ + pytest.raises(SystemExit) as exc_info: + mock_stdin.isatty.return_value = True + # Probe succeeds (returncode=0), exec returns 1 (hermes error) + mock_run.side_effect = [ + MagicMock(returncode=0), # probe + MagicMock(returncode=1), # exec — hermes error, not docker failure + ] + _exec_in_container(container_info, ["chat"]) + + mock_sleep.assert_not_called() # No retries + assert mock_run.call_count == 2 # probe + one exec attempt + assert exc_info.value.code == 1 diff --git a/website/docs/getting-started/nix-setup.md b/website/docs/getting-started/nix-setup.md index 4db4939868..858315329b 100644 --- a/website/docs/getting-started/nix-setup.md +++ b/website/docs/getting-started/nix-setup.md @@ -122,6 +122,41 @@ services.hermes-agent.environmentFiles = [ "/var/lib/hermes/env" ]; Setting `addToSystemPackages = true` does two things: puts the `hermes` CLI on your system PATH **and** sets `HERMES_HOME` system-wide so the interactive CLI shares state (sessions, skills, cron) with the gateway service. Without it, running `hermes` in your shell creates a separate `~/.hermes/` directory. ::: +:::info Container-aware CLI +When `container.enable = true` and `addToSystemPackages = true`, **every** `hermes` command on the host automatically routes into the managed container. This means your interactive CLI session runs inside the same environment as the gateway service — with access to all container-installed packages and tools. + +- The routing is transparent: `hermes chat`, `hermes sessions list`, `hermes version`, etc. all exec into the container under the hood +- All CLI flags are forwarded as-is +- If the container isn't running, the CLI retries briefly (5s with a spinner for interactive use, 10s silently for scripts) then fails with a clear error — no silent fallback +- For developers working on the hermes codebase, set `HERMES_DEV=1` to bypass container routing and run the local checkout directly + +Set `container.hostUsers` to create a `~/.hermes` symlink to the service state directory, so the host CLI and the container share sessions, config, and memories: + +```nix +services.hermes-agent = { + container.enable = true; + container.hostUsers = [ "your-username" ]; + addToSystemPackages = true; +}; +``` + +Users listed in `hostUsers` are automatically added to the `hermes` group for file permission access. + +**Podman users:** The NixOS service runs the container as root. Docker users get access via the `docker` group socket, but Podman's rootful containers require sudo. Grant passwordless sudo for your container runtime: + +```nix +security.sudo.extraRules = [{ + users = [ "your-username" ]; + commands = [{ + command = "/run/current-system/sw/bin/podman"; + options = [ "NOPASSWD" ]; + }]; +}]; +``` + +The CLI auto-detects when sudo is needed and uses it transparently. Without this, you'll need to run `sudo hermes chat` manually. +::: + ### Verify It Works After `nixos-rebuild switch`, check that the service is running: @@ -246,6 +281,7 @@ Run `nix build .#configKeys && cat result` to see every leaf config key extracte container = { image = "ubuntu:24.04"; backend = "docker"; + hostUsers = [ "your-username" ]; extraVolumes = [ "/home/user/projects:/projects:rw" ]; extraOptions = [ "--gpus" "all" ]; }; @@ -285,6 +321,7 @@ Quick reference for the most common things Nix users want to customize: | Mount host directories into container | `container.extraVolumes` | `[ "/data:/data:rw" ]` | | Pass GPU access to container | `container.extraOptions` | `[ "--gpus" "all" ]` | | Use Podman instead of Docker | `container.backend` | `"podman"` | +| Share state between host CLI and container | `container.hostUsers` | `[ "sidbin" ]` | | Add tools to the service PATH (native only) | `extraPackages` | `[ pkgs.pandoc pkgs.imagemagick ]` | | Use a custom base image | `container.image` | `"ubuntu:24.04"` | | Override the hermes package | `package` | `inputs.hermes-agent.packages.${system}.default.override { ... }` | @@ -518,6 +555,7 @@ When container mode is enabled, hermes runs inside a persistent Ubuntu container Host Container ──── ───────── /nix/store/...-hermes-agent-0.1.0 ──► /nix/store/... (ro) +~/.hermes -> /var/lib/hermes/.hermes (symlink bridge, per hostUsers) /var/lib/hermes/ ──► /data/ (rw) ├── current-package -> /nix/store/... (symlink, updated each rebuild) ├── .gc-root -> /nix/store/... (prevents nix-collect-garbage) @@ -526,6 +564,7 @@ Host Container │ ├── .env (merged from environment + environmentFiles) │ ├── config.yaml (Nix-generated, deep-merged by activation) │ ├── .managed (marker file) + │ ├── .container-mode (routing metadata: backend, exec_user, etc.) │ ├── state.db, sessions/, memories/ (runtime state) │ └── mcp-tokens/ (OAuth tokens for MCP servers) ├── home/ ──► /home/hermes (rw) @@ -698,6 +737,7 @@ nix build .#checks.x86_64-linux.config-roundtrip # merge script preserves use | `container.image` | `str` | `"ubuntu:24.04"` | Base image (pulled at runtime) | | `container.extraVolumes` | `listOf str` | `[]` | Extra volume mounts (`host:container:mode`) | | `container.extraOptions` | `listOf str` | `[]` | Extra args passed to `docker create` | +| `container.hostUsers` | `listOf str` | `[]` | Interactive users who get a `~/.hermes` symlink to the service stateDir and are auto-added to the `hermes` group | --- @@ -818,3 +858,5 @@ nix-store --query --roots $(docker exec hermes-agent readlink /data/current-pack | `hermes version` shows old version | Container not restarted | `systemctl restart hermes-agent` | | Permission denied on `/var/lib/hermes` | State dir is `0750 hermes:hermes` | Use `docker exec` or `sudo -u hermes` | | `nix-collect-garbage` removed hermes | GC root missing | Restart the service (preStart recreates the GC root) | +| `no container with name or ID "hermes-agent"` (Podman) | Podman rootful container not visible to regular user | Add passwordless sudo for podman (see [Container-aware CLI](#container-aware-cli) section) | +| `unable to find user hermes` | Container still starting (entrypoint hasn't created user yet) | Wait a few seconds and retry — the CLI retries automatically |