fix(sessions): widen #3023 to all 5 session-id validators via shared is_safe_session_id helper

PR #3023 only updated Session.load() and Session.load_metadata_only(), leaving
three sibling validators (Session-internal _repair_stale_pending and the
/api/session/worktree/remove + /api/session/delete route handlers) still
gated on the old lowercase-only character set.  That would have shipped a
confusing UX where api-* and reachy-voice-* sessions could be loaded into
the sidebar but rejected with HTTP 400 on delete or worktree removal.

This commit factors the validation into a single is_safe_session_id helper
in api.models and updates all five call sites to use it.  Adds regression
coverage in tests/test_issue3023_safe_session_id_validators.py for both
the helper itself and a repo-wide guarantee that no narrow lowercase-only
magic string survives.

Closes the follow-up flagged by the parallel reviewer agent on #3023.
This commit is contained in:
nesquena-hermes
2026-05-28 02:09:05 +00:00
parent d76e23a9f2
commit c1942a1cd8
3 changed files with 120 additions and 5 deletions
+26 -3
View File
@@ -54,6 +54,29 @@ _INDEX_WRITE_LOCK = threading.RLock()
_SESSION_INDEX_REBUILD_LOCK = threading.Lock()
_SESSION_INDEX_REBUILD_THREAD = None
# Path-safety contract for session IDs. Accept alphanumerics, underscore, and
# hyphen so API/gateway-issued ids (``api-*``, ``reachy-voice-*``) round-trip
# through filesystem load/save/delete/worktree paths without traversal risk.
# Dots and slashes are rejected so the id can never name a parent directory
# or hide an unexpected extension.
_SAFE_SID_CHARS = frozenset(
'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-'
)
def is_safe_session_id(sid) -> bool:
"""Return True iff ``sid`` is a non-empty path-safe session id.
Centralizes the validation previously duplicated across
``Session.load``, ``Session.load_metadata_only``,
``_repair_stale_pending``, ``/api/session/worktree/remove``, and
``/api/session/delete`` so every call site agrees on what characters
are allowed. See #3023.
"""
if not sid or not isinstance(sid, str):
return False
return all(c in _SAFE_SID_CHARS for c in sid)
def _cleanup_stale_tmp_files() -> None:
"""Best-effort removal of stale ``*.tmp.*`` files from SESSION_DIR.
@@ -693,7 +716,7 @@ class Session:
# Validate session ID format to prevent path traversal. API/gateway
# session ids may contain hyphens (for example ``api-*`` and
# ``reachy-voice-*``); allow those but still reject dots/slashes.
if not sid or not all(c in '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-' for c in sid):
if not is_safe_session_id(sid):
return None
p = SESSION_DIR / f'{sid}.json'
if not p.exists():
@@ -722,7 +745,7 @@ class Session:
"""
# Same path-safety contract as load(): hyphens are valid session ids,
# path separators and traversal dots are not.
if not sid or not all(c in '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-' for c in sid):
if not is_safe_session_id(sid):
return None
p = SESSION_DIR / f'{sid}.json'
if not p.exists():
@@ -1871,7 +1894,7 @@ def _repair_stale_pending(session) -> bool:
_age = float('inf')
sid = session.session_id
if not sid or not all(c in '0123456789abcdefghijklmnopqrstuvwxyz_' for c in sid):
if not is_safe_session_id(sid):
return False
try:
+3 -2
View File
@@ -2554,6 +2554,7 @@ from api.models import (
prune_session_from_index,
ensure_cron_project,
is_cron_session,
is_safe_session_id,
)
from api.workspace import (
load_workspaces,
@@ -5500,7 +5501,7 @@ def handle_post(handler, parsed) -> bool:
if not sid or not isinstance(sid, str) or not sid.strip():
return bad(handler, "session_id must be a non-empty string", status=400)
sid = sid.strip()
if not all(c in '0123456789abcdefghijklmnopqrstuvwxyz_' for c in sid):
if not is_safe_session_id(sid):
return bad(handler, "Invalid session_id", 400)
try:
s = get_session(sid, metadata_only=True)
@@ -5522,7 +5523,7 @@ def handle_post(handler, parsed) -> bool:
sid = body.get("session_id", "")
if not sid:
return bad(handler, "session_id is required")
if not all(c in '0123456789abcdefghijklmnopqrstuvwxyz_' for c in sid):
if not is_safe_session_id(sid):
return bad(handler, "Invalid session_id", 400)
cli_meta_for_delete = _lookup_cli_session_metadata(sid)
if cli_meta_for_delete.get("read_only"):
@@ -0,0 +1,91 @@
"""Regression coverage for #3023: hyphenated session ids must be safe at every
filesystem-touching call site, not just Session.load() / load_metadata_only().
Before this fix, ``api-*`` and ``reachy-voice-*`` sessions could be loaded
into the sidebar but rejected with HTTP 400 by ``/api/session/delete`` and
``/api/session/worktree/remove``, producing a confusing "visible but
undeletable" UX. The fix factored validation into ``is_safe_session_id``
and applied it consistently across the five known call sites.
"""
from api.models import is_safe_session_id
def test_is_safe_session_id_accepts_hyphenated_gateway_ids():
assert is_safe_session_id("api-182894de593468b6") is True
assert is_safe_session_id("reachy-voice-20260513-1131-d5542adf") is True
def test_is_safe_session_id_accepts_classic_lowercase_ids():
assert is_safe_session_id("sess_a") is True
assert is_safe_session_id("20260528_010551_b8e14a") is True
def test_is_safe_session_id_accepts_uppercase_and_mixed_case():
# Some API/gateway producers emit mixed case; the helper allows it.
# Filesystem case-folding on macOS/Windows is the caller's contract.
assert is_safe_session_id("API-CAFEBABE") is True
assert is_safe_session_id("Foo") is True
def test_is_safe_session_id_rejects_path_traversal():
assert is_safe_session_id("bad/../id") is False
assert is_safe_session_id("../etc/passwd") is False
assert is_safe_session_id("a/b") is False
def test_is_safe_session_id_rejects_dots_and_extensions():
assert is_safe_session_id("bad.id") is False
assert is_safe_session_id("session.json") is False
def test_is_safe_session_id_rejects_empty_and_non_string():
assert is_safe_session_id("") is False
assert is_safe_session_id(None) is False
assert is_safe_session_id(123) is False
assert is_safe_session_id([]) is False
def test_is_safe_session_id_rejects_whitespace_and_control_chars():
assert is_safe_session_id("api 1234") is False
assert is_safe_session_id("api\n1234") is False
assert is_safe_session_id("api\t1234") is False
def test_session_delete_validator_accepts_hyphenated_ids():
"""``/api/session/delete`` validator path must accept hyphens (#3023)."""
import inspect
import api.routes as routes
src = inspect.getsource(routes.handle_post if hasattr(routes, "handle_post") else routes)
# Should call the shared helper, not the old magic-string check
assert "is_safe_session_id" in src
assert "'0123456789abcdefghijklmnopqrstuvwxyz_'" not in src
def test_session_worktree_remove_validator_accepts_hyphenated_ids():
"""``/api/session/worktree/remove`` validator path must accept hyphens (#3023)."""
routes_src = open("api/routes.py", encoding="utf-8").read()
# The worktree-remove block must use the shared helper
block_start = routes_src.find('/api/session/worktree/remove')
block_end = routes_src.find('/api/session/delete', block_start)
assert block_start != -1 and block_end != -1
block = routes_src[block_start:block_end]
assert "is_safe_session_id" in block
assert "'0123456789abcdefghijklmnopqrstuvwxyz_'" not in block
def test_repair_stale_pending_validator_accepts_hyphenated_ids():
"""``_repair_stale_pending`` in models.py must accept hyphens (#3023)."""
models_src = open("api/models.py", encoding="utf-8").read()
assert "is_safe_session_id" in models_src
# No magic-string validator should survive anywhere in models.py
assert "'0123456789abcdefghijklmnopqrstuvwxyz_'" not in models_src
def test_no_lowercase_only_magic_string_remains_in_session_validators():
"""Repo-wide guarantee: no validator falls back to the old lowercase-only set."""
for path in ("api/models.py", "api/routes.py"):
src = open(path, encoding="utf-8").read()
# The old narrow class is gone everywhere; only the helper's frozenset remains.
narrow = "'0123456789abcdefghijklmnopqrstuvwxyz_'"
assert narrow not in src, f"{path} still uses the narrow lowercase-only validator"