From 928280ca2ca5a8565bc83f99ff93c6eac5ed5e76 Mon Sep 17 00:00:00 2001 From: emozilla Date: Wed, 20 May 2026 14:41:23 -0400 Subject: [PATCH] fix(desktop): probe steps 4 & 5 of resolveHermesBackend before trusting A user-reported failure on Windows-on-ARM: a pre-installed Python 3.13 on PATH makes findSystemPython() succeed, so resolveHermesBackend returns a backend pointing at it -- but hermes_cli isn't in that interpreter's site-packages. The spawn dies with ModuleNotFoundError and the user sees a dead GUI instead of the first-launch installer. Same shape can hit step 4 (existing `hermes` on PATH) when a stale shim survives a partial uninstall. Add cheap exit-code probes -- `python -c "import hermes_cli"` for step 5, ` --version` for step 4 -- and fall through to step 6 (bootstrap-needed) on failure. install.ps1 then runs as if on a clean box and the venv gets built. Probes live in a standalone electron/backend-probes.cjs module so they can be unit-tested with node --test, same pattern as bootstrap-platform.cjs and hardening.cjs. New test file wired into test:desktop:platforms. --- apps/desktop/electron/backend-probes.cjs | 106 ++++++++++++++++++ apps/desktop/electron/backend-probes.test.cjs | 80 +++++++++++++ apps/desktop/electron/main.cjs | 59 +++++++--- apps/desktop/package.json | 2 +- 4 files changed, 230 insertions(+), 17 deletions(-) create mode 100644 apps/desktop/electron/backend-probes.cjs create mode 100644 apps/desktop/electron/backend-probes.test.cjs diff --git a/apps/desktop/electron/backend-probes.cjs b/apps/desktop/electron/backend-probes.cjs new file mode 100644 index 0000000000..0e0e584848 --- /dev/null +++ b/apps/desktop/electron/backend-probes.cjs @@ -0,0 +1,106 @@ +/** + * backend-probes.cjs + * + * Cheap "does this candidate backend actually work" checks used by + * resolveHermesBackend (main.cjs). The resolver walks a ladder of + * candidates -- bootstrap marker, `hermes` on PATH, system Python with + * hermes_cli installed -- and historically returned the first candidate + * whose binary existed on disk. That assumption breaks when a user has + * a pre-installed Python 3.11-3.13 (so findSystemPython() returns a + * path) but no hermes_cli in its site-packages: the resolver hands back + * a backend the spawn step can't actually run, and the user gets a + * dead-on-arrival "ModuleNotFoundError: No module named 'hermes_cli'" + * instead of the first-launch installer. + * + * These probes give the resolver a way to verify a candidate before + * trusting it. Failure (non-zero exit, exception, timeout) means "skip + * this rung, try the next one"; success means "spawn this for real." + * Falling off the bottom of the ladder lands on the bootstrap-needed + * sentinel, which is exactly what we want when nothing pre-existing + * actually works. + * + * Both probes are deliberately fast and forgiving: + * - 5s timeout (a hung interpreter beats forever, but we still give + * slow disks / cold caches room to breathe) + * - stdio ignored (we only care about exit code; stdout/stderr are + * not surfaced to the user, just to recentHermesLog for forensics + * via the caller's catch block if it chooses) + * - any throw -> false (never propagate -- resolver wants a boolean) + * + * Kept in a standalone cjs module so it can be unit-tested with + * `node --test` without dragging in the electron runtime (same pattern + * as bootstrap-platform.cjs and hardening.cjs). + */ + +const { execFileSync } = require('node:child_process') + +const PROBE_TIMEOUT_MS = 5000 + +/** + * Return true iff `python -c "import hermes_cli"` exits 0. + * + * Used to gate the "fallback to system Python with hermes_cli installed" + * rung of resolveHermesBackend. Without this, a system Python 3.11-3.13 + * registered in PEP 514 makes findSystemPython() succeed regardless of + * whether hermes_cli has actually been pip-installed into its + * site-packages -- and the resolver returns a backend that immediately + * dies on spawn. + * + * @param {string} pythonPath - Absolute path to a python.exe / python. + * @returns {boolean} + */ +function canImportHermesCli(pythonPath) { + if (!pythonPath) return false + try { + execFileSync(pythonPath, ['-c', 'import hermes_cli'], { + stdio: 'ignore', + timeout: PROBE_TIMEOUT_MS, + windowsHide: true + }) + return true + } catch { + return false + } +} + +/** + * Return true iff ` --version` exits 0. + * + * Used to gate the "existing `hermes` on PATH" rung. Without this, a + * stale hermes.cmd shim left behind by an uninstalled pip install (or + * a half-built venv whose `hermes` entry-point points at a deleted + * Python) survives findOnPath() and gets selected as the backend. + * + * We intentionally avoid invoking the command with the dashboard args + * here -- `--version` is the cheapest "is this binary alive" smoke + * test that every hermes_cli entry-point has supported since 0.1. + * + * @param {string} hermesCommand - Resolved absolute path to a hermes + * executable (or an interpreter+script wrapper). + * @param {object} [opts] + * @param {boolean} [opts.shell] - Whether to run through a shell. For + * .cmd/.bat shims on Windows execFileSync needs shell:true to find + * the cmd interpreter; mirrors the same flag isCommandScript() drives + * in resolveHermesBackend. + * @returns {boolean} + */ +function verifyHermesCli(hermesCommand, opts = {}) { + if (!hermesCommand) return false + try { + execFileSync(hermesCommand, ['--version'], { + stdio: 'ignore', + timeout: PROBE_TIMEOUT_MS, + shell: Boolean(opts.shell), + windowsHide: true + }) + return true + } catch { + return false + } +} + +module.exports = { + canImportHermesCli, + verifyHermesCli, + PROBE_TIMEOUT_MS +} diff --git a/apps/desktop/electron/backend-probes.test.cjs b/apps/desktop/electron/backend-probes.test.cjs new file mode 100644 index 0000000000..db39ebcee3 --- /dev/null +++ b/apps/desktop/electron/backend-probes.test.cjs @@ -0,0 +1,80 @@ +/** + * Tests for electron/backend-probes.cjs. + * + * Run with: node --test electron/backend-probes.test.cjs + * (Wired into npm test:desktop:platforms in package.json.) + */ + +const test = require('node:test') +const assert = require('node:assert/strict') +const fs = require('node:fs') +const os = require('node:os') +const path = require('node:path') + +const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs') + +// Resolve the host's own Node binary -- guaranteed to be on disk and +// runnable. We use it as both a stand-in for "a python that doesn't +// have hermes_cli" (since `node -c "import hermes_cli"` will exit +// non-zero) and as a way to script verifyHermesCli's success path +// (a tiny script we write to disk that exits 0 on --version). +const NODE_BIN = process.execPath + +test('canImportHermesCli returns false when path is falsy', () => { + assert.equal(canImportHermesCli(''), false) + assert.equal(canImportHermesCli(null), false) + assert.equal(canImportHermesCli(undefined), false) +}) + +test('canImportHermesCli returns false when interpreter cannot run -c', () => { + // node IS an interpreter, but `node -c "import hermes_cli"` is a + // SyntaxError -- different exit reason from a real Python's + // ModuleNotFoundError, but the predicate is "exit 0 or not" and + // both land on "not", which is exactly what we want for the + // resolver fall-through. + assert.equal(canImportHermesCli(NODE_BIN), false) +}) + +test('canImportHermesCli returns false when binary does not exist', () => { + const ghost = path.join(os.tmpdir(), 'hermes-probes-ghost-' + Date.now() + '.exe') + assert.equal(canImportHermesCli(ghost), false) +}) + +test('verifyHermesCli returns false when command is falsy', () => { + assert.equal(verifyHermesCli(''), false) + assert.equal(verifyHermesCli(null), false) + assert.equal(verifyHermesCli(undefined), false) +}) + +test('verifyHermesCli returns false when binary does not exist', () => { + const ghost = path.join(os.tmpdir(), 'hermes-probes-ghost-' + Date.now() + '.exe') + assert.equal(verifyHermesCli(ghost), false) +}) + +test('verifyHermesCli returns true when --version exits 0', () => { + // Write a tiny script that exits 0 regardless of args, then invoke + // it through node. This stands in for a working hermes binary -- + // verifyHermesCli only cares about the exit code. + const scriptPath = path.join(os.tmpdir(), `hermes-probes-ok-${Date.now()}-${process.pid}.cjs`) + fs.writeFileSync(scriptPath, 'process.exit(0)\n') + try { + // Use node as the launcher and our script as the "command". Pass + // shell:false (default) -- node is a real binary, no shim. + // execFileSync passes ['--version'] as args, which node ignores + // gracefully (well, it prints its version and exits 0, which is + // perfect -- exit code 0 is the only signal we read). + assert.equal(verifyHermesCli(NODE_BIN), true) + } finally { + try { + fs.unlinkSync(scriptPath) + } catch {} + } +}) + +test('verifyHermesCli swallows timeouts (does not throw)', () => { + // We can't easily provoke a real 5s hang in CI without slowing the + // suite, but we CAN confirm that an invocation that DOES throw + // (because the binary is missing) returns false rather than + // propagating. Same code path the timeout case takes. + assert.equal(verifyHermesCli('/definitely/not/a/real/binary/anywhere'), false) +}) diff --git a/apps/desktop/electron/main.cjs b/apps/desktop/electron/main.cjs index 039da6f49c..677a027267 100644 --- a/apps/desktop/electron/main.cjs +++ b/apps/desktop/electron/main.cjs @@ -23,6 +23,7 @@ const { fileURLToPath, pathToFileURL } = require('node:url') const { execFileSync, spawn } = require('node:child_process') const { isWindowsBinaryPathInWsl, isWslEnvironment } = require('./bootstrap-platform.cjs') const { runBootstrap } = require('./bootstrap-runner.cjs') +const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs') const { DATA_URL_READ_MAX_BYTES, DEFAULT_FETCH_TIMEOUT_MS, @@ -1350,15 +1351,28 @@ function resolveHermesBackend(dashboardArgs) { } if (hermesCommand) { - return { - label: `existing Hermes CLI at ${hermesCommand}`, - command: hermesCommand, - args: dashboardArgs, - bootstrap: false, - env: {}, - kind: 'command', - shell: isCommandScript(hermesCommand) + // Smoke-test the candidate before trusting it. A `hermes` shim + // left behind by a half-uninstalled pip install (or a venv + // entry-point pointing at a deleted interpreter) still resolves + // via findOnPath but explodes on spawn -- the user then sees a + // dead backend instead of the first-launch installer. The cheap + // `--version` probe (see backend-probes.cjs) catches that case + // and lets the resolver fall through to step 6 / bootstrap. + const shellForProbe = isCommandScript(hermesCommand) + if (verifyHermesCli(hermesCommand, { shell: shellForProbe })) { + return { + label: `existing Hermes CLI at ${hermesCommand}`, + command: hermesCommand, + args: dashboardArgs, + bootstrap: false, + env: {}, + kind: 'command', + shell: shellForProbe + } } + rememberLog( + `Ignoring existing Hermes CLI at ${hermesCommand}: --version probe failed; falling through to bootstrap.` + ) } } @@ -1367,15 +1381,28 @@ function resolveHermesBackend(dashboardArgs) { // take ownership. const python = findSystemPython() if (python) { - return { - kind: 'python', - label: `installed hermes_cli module via ${python}`, - command: python, - args: ['-m', 'hermes_cli.main', ...dashboardArgs], - bootstrap: false, - env: {}, - shell: false + // Same smoke-test rationale as step 4: a system Python in the + // SUPPORTED_VERSIONS range can be registered (PEP 514) without + // having hermes_cli installed -- common on dev boxes that have + // a python.org install from prior unrelated work. Returning that + // backend hands the spawn step a guaranteed ModuleNotFoundError. + // Verify the import works before trusting the candidate; on + // failure, fall through to step 6 so the bootstrap runner pulls + // a uv-managed 3.11 into %LOCALAPPDATA%\hermes\hermes-agent\venv. + if (canImportHermesCli(python)) { + return { + kind: 'python', + label: `installed hermes_cli module via ${python}`, + command: python, + args: ['-m', 'hermes_cli.main', ...dashboardArgs], + bootstrap: false, + env: {}, + shell: false + } } + rememberLog( + `Ignoring system Python ${python}: hermes_cli is not importable; falling through to bootstrap.` + ) } // 6. Nothing usable yet -- signal the bootstrap runner that we need to diff --git a/apps/desktop/package.json b/apps/desktop/package.json index 9bf79511b7..7cdf55b7dd 100644 --- a/apps/desktop/package.json +++ b/apps/desktop/package.json @@ -31,7 +31,7 @@ "test:desktop:nsis": "node scripts/test-desktop.mjs nsis", "test:desktop:existing": "node scripts/test-desktop.mjs existing", "test:desktop:fresh": "node scripts/test-desktop.mjs fresh", - "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs", + "test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-probes.test.cjs", "type-check": "tsc -b", "lint": "eslint src/ electron/", "lint:fix": "eslint src/ electron/ --fix",