fix(desktop): probe steps 4 & 5 of resolveHermesBackend before trusting

A user-reported failure on Windows-on-ARM: a pre-installed Python 3.13
on PATH makes findSystemPython() succeed, so resolveHermesBackend
returns a backend pointing at it -- but hermes_cli isn't in that
interpreter's site-packages. The spawn dies with ModuleNotFoundError
and the user sees a dead GUI instead of the first-launch installer.

Same shape can hit step 4 (existing `hermes` on PATH) when a stale
shim survives a partial uninstall.

Add cheap exit-code probes -- `python -c "import hermes_cli"` for
step 5, `<hermes> --version` for step 4 -- and fall through to step 6
(bootstrap-needed) on failure. install.ps1 then runs as if on a clean
box and the venv gets built.

Probes live in a standalone electron/backend-probes.cjs module so they
can be unit-tested with node --test, same pattern as bootstrap-platform.cjs
and hardening.cjs. New test file wired into test:desktop:platforms.
This commit is contained in:
emozilla
2026-05-20 14:41:23 -04:00
parent 85c583dc34
commit 928280ca2c
4 changed files with 230 additions and 17 deletions
+106
View File
@@ -0,0 +1,106 @@
/**
* backend-probes.cjs
*
* Cheap "does this candidate backend actually work" checks used by
* resolveHermesBackend (main.cjs). The resolver walks a ladder of
* candidates -- bootstrap marker, `hermes` on PATH, system Python with
* hermes_cli installed -- and historically returned the first candidate
* whose binary existed on disk. That assumption breaks when a user has
* a pre-installed Python 3.11-3.13 (so findSystemPython() returns a
* path) but no hermes_cli in its site-packages: the resolver hands back
* a backend the spawn step can't actually run, and the user gets a
* dead-on-arrival "ModuleNotFoundError: No module named 'hermes_cli'"
* instead of the first-launch installer.
*
* These probes give the resolver a way to verify a candidate before
* trusting it. Failure (non-zero exit, exception, timeout) means "skip
* this rung, try the next one"; success means "spawn this for real."
* Falling off the bottom of the ladder lands on the bootstrap-needed
* sentinel, which is exactly what we want when nothing pre-existing
* actually works.
*
* Both probes are deliberately fast and forgiving:
* - 5s timeout (a hung interpreter beats forever, but we still give
* slow disks / cold caches room to breathe)
* - stdio ignored (we only care about exit code; stdout/stderr are
* not surfaced to the user, just to recentHermesLog for forensics
* via the caller's catch block if it chooses)
* - any throw -> false (never propagate -- resolver wants a boolean)
*
* Kept in a standalone cjs module so it can be unit-tested with
* `node --test` without dragging in the electron runtime (same pattern
* as bootstrap-platform.cjs and hardening.cjs).
*/
const { execFileSync } = require('node:child_process')
const PROBE_TIMEOUT_MS = 5000
/**
* Return true iff `python -c "import hermes_cli"` exits 0.
*
* Used to gate the "fallback to system Python with hermes_cli installed"
* rung of resolveHermesBackend. Without this, a system Python 3.11-3.13
* registered in PEP 514 makes findSystemPython() succeed regardless of
* whether hermes_cli has actually been pip-installed into its
* site-packages -- and the resolver returns a backend that immediately
* dies on spawn.
*
* @param {string} pythonPath - Absolute path to a python.exe / python.
* @returns {boolean}
*/
function canImportHermesCli(pythonPath) {
if (!pythonPath) return false
try {
execFileSync(pythonPath, ['-c', 'import hermes_cli'], {
stdio: 'ignore',
timeout: PROBE_TIMEOUT_MS,
windowsHide: true
})
return true
} catch {
return false
}
}
/**
* Return true iff `<hermesCommand> --version` exits 0.
*
* Used to gate the "existing `hermes` on PATH" rung. Without this, a
* stale hermes.cmd shim left behind by an uninstalled pip install (or
* a half-built venv whose `hermes` entry-point points at a deleted
* Python) survives findOnPath() and gets selected as the backend.
*
* We intentionally avoid invoking the command with the dashboard args
* here -- `--version` is the cheapest "is this binary alive" smoke
* test that every hermes_cli entry-point has supported since 0.1.
*
* @param {string} hermesCommand - Resolved absolute path to a hermes
* executable (or an interpreter+script wrapper).
* @param {object} [opts]
* @param {boolean} [opts.shell] - Whether to run through a shell. For
* .cmd/.bat shims on Windows execFileSync needs shell:true to find
* the cmd interpreter; mirrors the same flag isCommandScript() drives
* in resolveHermesBackend.
* @returns {boolean}
*/
function verifyHermesCli(hermesCommand, opts = {}) {
if (!hermesCommand) return false
try {
execFileSync(hermesCommand, ['--version'], {
stdio: 'ignore',
timeout: PROBE_TIMEOUT_MS,
shell: Boolean(opts.shell),
windowsHide: true
})
return true
} catch {
return false
}
}
module.exports = {
canImportHermesCli,
verifyHermesCli,
PROBE_TIMEOUT_MS
}
@@ -0,0 +1,80 @@
/**
* Tests for electron/backend-probes.cjs.
*
* Run with: node --test electron/backend-probes.test.cjs
* (Wired into npm test:desktop:platforms in package.json.)
*/
const test = require('node:test')
const assert = require('node:assert/strict')
const fs = require('node:fs')
const os = require('node:os')
const path = require('node:path')
const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs')
// Resolve the host's own Node binary -- guaranteed to be on disk and
// runnable. We use it as both a stand-in for "a python that doesn't
// have hermes_cli" (since `node -c "import hermes_cli"` will exit
// non-zero) and as a way to script verifyHermesCli's success path
// (a tiny script we write to disk that exits 0 on --version).
const NODE_BIN = process.execPath
test('canImportHermesCli returns false when path is falsy', () => {
assert.equal(canImportHermesCli(''), false)
assert.equal(canImportHermesCli(null), false)
assert.equal(canImportHermesCli(undefined), false)
})
test('canImportHermesCli returns false when interpreter cannot run -c', () => {
// node IS an interpreter, but `node -c "import hermes_cli"` is a
// SyntaxError -- different exit reason from a real Python's
// ModuleNotFoundError, but the predicate is "exit 0 or not" and
// both land on "not", which is exactly what we want for the
// resolver fall-through.
assert.equal(canImportHermesCli(NODE_BIN), false)
})
test('canImportHermesCli returns false when binary does not exist', () => {
const ghost = path.join(os.tmpdir(), 'hermes-probes-ghost-' + Date.now() + '.exe')
assert.equal(canImportHermesCli(ghost), false)
})
test('verifyHermesCli returns false when command is falsy', () => {
assert.equal(verifyHermesCli(''), false)
assert.equal(verifyHermesCli(null), false)
assert.equal(verifyHermesCli(undefined), false)
})
test('verifyHermesCli returns false when binary does not exist', () => {
const ghost = path.join(os.tmpdir(), 'hermes-probes-ghost-' + Date.now() + '.exe')
assert.equal(verifyHermesCli(ghost), false)
})
test('verifyHermesCli returns true when --version exits 0', () => {
// Write a tiny script that exits 0 regardless of args, then invoke
// it through node. This stands in for a working hermes binary --
// verifyHermesCli only cares about the exit code.
const scriptPath = path.join(os.tmpdir(), `hermes-probes-ok-${Date.now()}-${process.pid}.cjs`)
fs.writeFileSync(scriptPath, 'process.exit(0)\n')
try {
// Use node as the launcher and our script as the "command". Pass
// shell:false (default) -- node is a real binary, no shim.
// execFileSync passes ['--version'] as args, which node ignores
// gracefully (well, it prints its version and exits 0, which is
// perfect -- exit code 0 is the only signal we read).
assert.equal(verifyHermesCli(NODE_BIN), true)
} finally {
try {
fs.unlinkSync(scriptPath)
} catch {}
}
})
test('verifyHermesCli swallows timeouts (does not throw)', () => {
// We can't easily provoke a real 5s hang in CI without slowing the
// suite, but we CAN confirm that an invocation that DOES throw
// (because the binary is missing) returns false rather than
// propagating. Same code path the timeout case takes.
assert.equal(verifyHermesCli('/definitely/not/a/real/binary/anywhere'), false)
})
+43 -16
View File
@@ -23,6 +23,7 @@ const { fileURLToPath, pathToFileURL } = require('node:url')
const { execFileSync, spawn } = require('node:child_process')
const { isWindowsBinaryPathInWsl, isWslEnvironment } = require('./bootstrap-platform.cjs')
const { runBootstrap } = require('./bootstrap-runner.cjs')
const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs')
const {
DATA_URL_READ_MAX_BYTES,
DEFAULT_FETCH_TIMEOUT_MS,
@@ -1350,15 +1351,28 @@ function resolveHermesBackend(dashboardArgs) {
}
if (hermesCommand) {
return {
label: `existing Hermes CLI at ${hermesCommand}`,
command: hermesCommand,
args: dashboardArgs,
bootstrap: false,
env: {},
kind: 'command',
shell: isCommandScript(hermesCommand)
// Smoke-test the candidate before trusting it. A `hermes` shim
// left behind by a half-uninstalled pip install (or a venv
// entry-point pointing at a deleted interpreter) still resolves
// via findOnPath but explodes on spawn -- the user then sees a
// dead backend instead of the first-launch installer. The cheap
// `--version` probe (see backend-probes.cjs) catches that case
// and lets the resolver fall through to step 6 / bootstrap.
const shellForProbe = isCommandScript(hermesCommand)
if (verifyHermesCli(hermesCommand, { shell: shellForProbe })) {
return {
label: `existing Hermes CLI at ${hermesCommand}`,
command: hermesCommand,
args: dashboardArgs,
bootstrap: false,
env: {},
kind: 'command',
shell: shellForProbe
}
}
rememberLog(
`Ignoring existing Hermes CLI at ${hermesCommand}: --version probe failed; falling through to bootstrap.`
)
}
}
@@ -1367,15 +1381,28 @@ function resolveHermesBackend(dashboardArgs) {
// take ownership.
const python = findSystemPython()
if (python) {
return {
kind: 'python',
label: `installed hermes_cli module via ${python}`,
command: python,
args: ['-m', 'hermes_cli.main', ...dashboardArgs],
bootstrap: false,
env: {},
shell: false
// Same smoke-test rationale as step 4: a system Python in the
// SUPPORTED_VERSIONS range can be registered (PEP 514) without
// having hermes_cli installed -- common on dev boxes that have
// a python.org install from prior unrelated work. Returning that
// backend hands the spawn step a guaranteed ModuleNotFoundError.
// Verify the import works before trusting the candidate; on
// failure, fall through to step 6 so the bootstrap runner pulls
// a uv-managed 3.11 into %LOCALAPPDATA%\hermes\hermes-agent\venv.
if (canImportHermesCli(python)) {
return {
kind: 'python',
label: `installed hermes_cli module via ${python}`,
command: python,
args: ['-m', 'hermes_cli.main', ...dashboardArgs],
bootstrap: false,
env: {},
shell: false
}
}
rememberLog(
`Ignoring system Python ${python}: hermes_cli is not importable; falling through to bootstrap.`
)
}
// 6. Nothing usable yet -- signal the bootstrap runner that we need to
+1 -1
View File
@@ -31,7 +31,7 @@
"test:desktop:nsis": "node scripts/test-desktop.mjs nsis",
"test:desktop:existing": "node scripts/test-desktop.mjs existing",
"test:desktop:fresh": "node scripts/test-desktop.mjs fresh",
"test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs",
"test:desktop:platforms": "node --test electron/bootstrap-platform.test.cjs electron/hardening.test.cjs electron/backend-probes.test.cjs",
"type-check": "tsc -b",
"lint": "eslint src/ electron/",
"lint:fix": "eslint src/ electron/ --fix",