mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-25 11:10:18 +00:00
Stage 406: PR #2827 — fix(state-sync): pass profile explicitly so background-thread DB writes hit the right state.db (#2762) by @Koraji95-coder
This commit is contained in:
+83
-14
@@ -20,22 +20,78 @@ from pathlib import Path
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_state_db():
|
||||
"""Get a SessionDB instance for the active profile's state.db.
|
||||
Returns None if hermes_state is not importable or DB is unavailable.
|
||||
Each caller is responsible for calling db.close() when done.
|
||||
def _get_state_db(profile: str=None):
|
||||
"""Get a SessionDB instance for a profile's state.db.
|
||||
|
||||
When ``profile`` is provided the function resolves *that* profile's
|
||||
home directory directly (via ``_resolve_profile_home_for_name``).
|
||||
If resolution fails (unknown profile name, IO error, etc.) the
|
||||
function returns ``None`` rather than silently falling back to
|
||||
``HERMES_HOME`` — silently routing the write to the wrong DB
|
||||
would defeat the point of the explicit-profile path (#2762).
|
||||
|
||||
When ``profile`` is None it falls back to the TLS-based
|
||||
``get_active_hermes_home()`` lookup for backward compatibility,
|
||||
with a final ``HERMES_HOME`` fallback only on that path. TLS may be
|
||||
unset in background/worker threads, in which case the lookup falls
|
||||
through to the process-global active profile and can write to the
|
||||
wrong DB. Callers that know the session's profile (e.g.
|
||||
``sync_session_usage`` after a stream completes on a background
|
||||
thread) should pass it explicitly to avoid that race.
|
||||
|
||||
Returns None if hermes_state is not importable, the explicit
|
||||
profile cannot be resolved, or the DB is unavailable. Each caller
|
||||
is responsible for calling db.close() when done.
|
||||
"""
|
||||
try:
|
||||
from hermes_state import SessionDB
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
try:
|
||||
from api.profiles import get_active_hermes_home
|
||||
hermes_home = Path(get_active_hermes_home()).expanduser().resolve()
|
||||
except Exception:
|
||||
logger.debug("Failed to resolve hermes home, using default")
|
||||
hermes_home = Path(os.getenv('HERMES_HOME', str(Path.home() / '.hermes')))
|
||||
if profile is not None:
|
||||
# Explicit-profile path — a resolution failure here MUST NOT
|
||||
# silently fall back to HERMES_HOME or the caller's "write to
|
||||
# the named profile" contract is broken (the original #2762
|
||||
# symptom: writes leaking into the wrong profile's state.db).
|
||||
#
|
||||
# Defense-in-depth (per #2827 maintainer review): validate the
|
||||
# name shape BEFORE handing it to ``_resolve_profile_home_for_name``.
|
||||
# The resolver itself rarely raises — for an invalid-but-non-
|
||||
# malicious name (e.g. one that fails ``_PROFILE_ID_RE``) it
|
||||
# quietly returns ``_DEFAULT_HERMES_HOME``, which is the exact
|
||||
# leak we're trying to prevent on the explicit-profile path.
|
||||
# Validating up-front turns that quiet leak into an explicit
|
||||
# "refuse + log + return None" so the contract is "write to
|
||||
# the EXACT named profile, or write nowhere."
|
||||
try:
|
||||
from api.profiles import (
|
||||
_resolve_profile_home_for_name,
|
||||
_PROFILE_ID_RE,
|
||||
_is_root_profile,
|
||||
)
|
||||
if not (_is_root_profile(profile) or _PROFILE_ID_RE.fullmatch(profile)):
|
||||
logger.warning(
|
||||
"state_sync: refusing invalid profile name %r — skipping "
|
||||
"write rather than leaking to the default state.db (#2762).",
|
||||
profile,
|
||||
)
|
||||
return None
|
||||
hermes_home = Path(_resolve_profile_home_for_name(profile)).expanduser().resolve()
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"state_sync: could not resolve profile %r — skipping write rather "
|
||||
"than leaking to the active profile (#2762).", profile,
|
||||
)
|
||||
return None
|
||||
else:
|
||||
# Implicit / TLS-fallback path — preserves pre-#2762 behavior
|
||||
# for any caller that doesn't pass profile= explicitly.
|
||||
try:
|
||||
from api.profiles import get_active_hermes_home
|
||||
hermes_home = Path(get_active_hermes_home()).expanduser().resolve()
|
||||
except Exception:
|
||||
logger.debug("Failed to resolve hermes home, using default")
|
||||
hermes_home = Path(os.getenv('HERMES_HOME', str(Path.home() / '.hermes')))
|
||||
|
||||
db_path = hermes_home / 'state.db'
|
||||
if not db_path.exists():
|
||||
@@ -48,11 +104,16 @@ def _get_state_db():
|
||||
return None
|
||||
|
||||
|
||||
def sync_session_start(session_id: str, model=None) -> None:
|
||||
def sync_session_start(session_id: str, model=None, profile: str=None) -> None:
|
||||
"""Register a WebUI session in state.db (idempotent).
|
||||
Called when a session's first message is sent.
|
||||
|
||||
``profile`` lets the caller name the target state.db explicitly,
|
||||
avoiding the TLS-vs-background-thread mismatch in #2762. When
|
||||
omitted, the active profile is resolved from TLS (then process
|
||||
globals) as before.
|
||||
"""
|
||||
db = _get_state_db()
|
||||
db = _get_state_db(profile=profile)
|
||||
if not db:
|
||||
return
|
||||
try:
|
||||
@@ -72,12 +133,20 @@ def sync_session_start(session_id: str, model=None) -> None:
|
||||
|
||||
def sync_session_usage(session_id: str, input_tokens: int=0, output_tokens: int=0,
|
||||
estimated_cost=None, model=None, title: str=None,
|
||||
message_count: int=None) -> None:
|
||||
message_count: int=None, profile: str=None) -> None:
|
||||
"""Update token usage and title for a WebUI session in state.db.
|
||||
Called after each turn completes. Uses absolute=True to set totals
|
||||
(the WebUI Session already accumulates across turns).
|
||||
|
||||
``profile`` lets the caller name the target state.db explicitly,
|
||||
which is what fixes #2762: this function is invoked from the
|
||||
agent streaming worker thread, where the request-thread's TLS
|
||||
profile context has not been propagated. Without an explicit
|
||||
profile, the TLS lookup falls back to the process-global active
|
||||
profile and writes the session's usage to the wrong state.db
|
||||
(e.g. ``hiyuki``'s instead of the cookie-switched ``maiko``'s).
|
||||
"""
|
||||
db = _get_state_db()
|
||||
db = _get_state_db(profile=profile)
|
||||
if not db:
|
||||
return
|
||||
try:
|
||||
|
||||
@@ -5070,6 +5070,12 @@ def _run_agent_streaming(
|
||||
model=model,
|
||||
title=s.title,
|
||||
message_count=len(s.messages),
|
||||
# #2762: pass the session's profile explicitly so the
|
||||
# background-thread state.db lookup doesn't fall
|
||||
# through to the process-global active profile and
|
||||
# write to the wrong DB (TLS profile is set on the
|
||||
# HTTP thread but not propagated to this worker).
|
||||
profile=getattr(s, 'profile', None),
|
||||
)
|
||||
except Exception:
|
||||
logger.debug("Failed to sync session to insights")
|
||||
|
||||
@@ -0,0 +1,284 @@
|
||||
"""
|
||||
Regression test for #2762 — state_sync writes to wrong profile's state.db
|
||||
when profile is switched via WebUI cookie.
|
||||
|
||||
Root cause: ``_get_state_db()`` relied on TLS-based
|
||||
``get_active_hermes_home()`` to pick the DB path. TLS gets set on the HTTP
|
||||
thread by the cookie middleware, but the agent streaming worker thread that
|
||||
calls ``sync_session_usage`` does NOT inherit that TLS, so the lookup falls
|
||||
through to the process-global active profile and writes to the wrong DB.
|
||||
|
||||
Fix: ``_get_state_db(profile=...)`` accepts an explicit profile name and
|
||||
resolves *that* profile's home directly via
|
||||
``_resolve_profile_home_for_name``. Callers that know the session's profile
|
||||
(e.g. ``sync_session_usage`` after streaming completes) pass it explicitly,
|
||||
avoiding the TLS race.
|
||||
|
||||
These tests pin:
|
||||
1. ``_get_state_db(profile='X')`` resolves X's home, not the active profile's.
|
||||
2. ``sync_session_usage(..., profile='X')`` writes to X's state.db only,
|
||||
even when the global active profile is set to Y.
|
||||
3. ``sync_session_usage`` with no profile kwarg falls back to the old
|
||||
TLS behavior (so existing callers don't regress).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# Make sure we can import the api package the same way the server does.
|
||||
_REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
if str(_REPO_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(_REPO_ROOT))
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def two_profile_homes(tmp_path, monkeypatch):
|
||||
"""Stand up two minimal profile homes with state.db initialized via
|
||||
``hermes_state.SessionDB`` itself (so the schema matches what the
|
||||
production code expects — `sync_session_usage` does a raw-SQL
|
||||
UPDATE of `message_count`, which hand-rolled schemas could miss).
|
||||
Per Copilot review on PR #2827.
|
||||
"""
|
||||
# Skip the fixture cleanly if the production package isn't importable
|
||||
# in this env — same gate the tests below use.
|
||||
pytest.importorskip("hermes_state")
|
||||
from hermes_state import SessionDB
|
||||
|
||||
hiyuki_home = tmp_path / 'hiyuki'
|
||||
maiko_home = tmp_path / 'maiko'
|
||||
for home in (hiyuki_home, maiko_home):
|
||||
home.mkdir(parents=True)
|
||||
# Touch state.db then open via SessionDB so its own constructor
|
||||
# runs whatever schema init / migration the production code
|
||||
# would see at runtime. Then close immediately — each test
|
||||
# opens its own handle through the production code path.
|
||||
(home / 'state.db').touch()
|
||||
SessionDB(home / 'state.db').close()
|
||||
|
||||
# Stub api.profiles to return our temp paths
|
||||
import api.profiles as profiles_mod
|
||||
|
||||
def fake_resolve(name):
|
||||
if name == 'hiyuki':
|
||||
return hiyuki_home
|
||||
if name == 'maiko':
|
||||
return maiko_home
|
||||
raise LookupError(name)
|
||||
|
||||
monkeypatch.setattr(profiles_mod, '_resolve_profile_home_for_name', fake_resolve)
|
||||
# Active profile is hiyuki — the WRONG one for tests that pass profile='maiko'
|
||||
monkeypatch.setattr(profiles_mod, 'get_active_hermes_home', lambda: hiyuki_home)
|
||||
|
||||
return {'hiyuki': hiyuki_home, 'maiko': maiko_home}
|
||||
|
||||
|
||||
def _read_session(db_path: Path, session_id: str):
|
||||
if not db_path.exists():
|
||||
return None
|
||||
conn = sqlite3.connect(db_path)
|
||||
try:
|
||||
cur = conn.execute(
|
||||
"SELECT session_id, title, input_tokens, output_tokens "
|
||||
"FROM sessions WHERE session_id = ?",
|
||||
(session_id,),
|
||||
)
|
||||
row = cur.fetchone()
|
||||
return row
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_get_state_db_honors_explicit_profile_kwarg(two_profile_homes):
|
||||
"""_get_state_db(profile='maiko') resolves to maiko's home, NOT
|
||||
the active profile (hiyuki)."""
|
||||
from api.state_sync import _get_state_db
|
||||
|
||||
# Some installs ship without the hermes_state package; the function
|
||||
# returns None gracefully and there's nothing to assert.
|
||||
try:
|
||||
import hermes_state # noqa: F401
|
||||
except ImportError:
|
||||
pytest.skip("hermes_state package not available in this test env")
|
||||
|
||||
db = _get_state_db(profile='maiko')
|
||||
if db is None:
|
||||
pytest.skip("SessionDB could not open the test db (env issue)")
|
||||
# We don't have a public accessor for the underlying path on SessionDB,
|
||||
# but writing through it and reading the raw file should work.
|
||||
db.ensure_session(session_id='probe-2762', source='webui', model='test')
|
||||
try:
|
||||
db.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# maiko's state.db should have the row; hiyuki's should not.
|
||||
assert _read_session(two_profile_homes['maiko'] / 'state.db', 'probe-2762') is not None, \
|
||||
"session was not written to maiko's state.db"
|
||||
assert _read_session(two_profile_homes['hiyuki'] / 'state.db', 'probe-2762') is None, \
|
||||
"session leaked into hiyuki's state.db — TLS-fallback regressed"
|
||||
|
||||
|
||||
def test_sync_session_usage_writes_only_to_named_profile(two_profile_homes):
|
||||
"""sync_session_usage(..., profile='maiko') is the actual scenario from
|
||||
the streaming worker thread post-#2762. The write must land in maiko's
|
||||
state.db only, regardless of what the global active profile is."""
|
||||
try:
|
||||
import hermes_state # noqa: F401
|
||||
except ImportError:
|
||||
pytest.skip("hermes_state package not available in this test env")
|
||||
|
||||
from api.state_sync import sync_session_usage
|
||||
|
||||
sync_session_usage(
|
||||
session_id='2762-regression',
|
||||
input_tokens=42,
|
||||
output_tokens=17,
|
||||
estimated_cost=0.001,
|
||||
model='test-model',
|
||||
title='2762 regression test',
|
||||
message_count=3,
|
||||
profile='maiko',
|
||||
)
|
||||
|
||||
maiko_row = _read_session(two_profile_homes['maiko'] / 'state.db', '2762-regression')
|
||||
hiyuki_row = _read_session(two_profile_homes['hiyuki'] / 'state.db', '2762-regression')
|
||||
|
||||
assert maiko_row is not None, \
|
||||
"sync_session_usage(profile='maiko') did not write to maiko's state.db"
|
||||
assert hiyuki_row is None, \
|
||||
"sync_session_usage(profile='maiko') leaked into hiyuki's state.db — #2762 regression"
|
||||
|
||||
|
||||
def test_sync_session_usage_without_profile_kwarg_uses_active(two_profile_homes):
|
||||
"""Backward compatibility: when called without a profile kwarg (the
|
||||
pre-#2762 call shape), the function falls back to the active profile
|
||||
(here: hiyuki). Existing callers should not regress."""
|
||||
try:
|
||||
import hermes_state # noqa: F401
|
||||
except ImportError:
|
||||
pytest.skip("hermes_state package not available in this test env")
|
||||
|
||||
from api.state_sync import sync_session_usage
|
||||
|
||||
sync_session_usage(
|
||||
session_id='legacy-call-shape',
|
||||
input_tokens=1,
|
||||
output_tokens=2,
|
||||
model='legacy',
|
||||
title='legacy',
|
||||
message_count=1,
|
||||
)
|
||||
|
||||
hiyuki_row = _read_session(two_profile_homes['hiyuki'] / 'state.db', 'legacy-call-shape')
|
||||
assert hiyuki_row is not None, \
|
||||
"sync_session_usage() without profile= regressed: did not write to active profile's state.db"
|
||||
|
||||
|
||||
def test_unknown_explicit_profile_returns_none_not_falls_back(two_profile_homes):
|
||||
"""Copilot review of PR #2827: when ``profile`` is explicit and
|
||||
resolution fails (e.g. typoed profile name, IO error), the
|
||||
function MUST return None rather than silently fall back to
|
||||
HERMES_HOME and write to the wrong DB. That fallback would
|
||||
re-introduce the exact #2762 symptom (writes leaking into the
|
||||
active profile).
|
||||
|
||||
The fixture's `fake_resolve` raises LookupError for any name
|
||||
that isn't 'hiyuki' or 'maiko', so passing 'does-not-exist'
|
||||
here exercises the failure path.
|
||||
"""
|
||||
try:
|
||||
import hermes_state # noqa: F401
|
||||
except ImportError:
|
||||
pytest.skip("hermes_state package not available in this test env")
|
||||
|
||||
from api.state_sync import sync_session_usage
|
||||
|
||||
# Passing an unknown profile name MUST NOT cause a write to land in
|
||||
# hiyuki (the active profile's home). If we leaked there, that's
|
||||
# the exact bug we're guarding against.
|
||||
sync_session_usage(
|
||||
session_id='unknown-profile-probe',
|
||||
input_tokens=99,
|
||||
output_tokens=99,
|
||||
model='probe',
|
||||
title='probe',
|
||||
message_count=1,
|
||||
profile='does-not-exist',
|
||||
)
|
||||
|
||||
hiyuki_row = _read_session(two_profile_homes['hiyuki'] / 'state.db', 'unknown-profile-probe')
|
||||
maiko_row = _read_session(two_profile_homes['maiko'] / 'state.db', 'unknown-profile-probe')
|
||||
|
||||
assert hiyuki_row is None, (
|
||||
"unknown explicit profile leaked write into hiyuki's state.db — #2762 regression"
|
||||
)
|
||||
assert maiko_row is None, (
|
||||
"unknown explicit profile somehow ended up in maiko's state.db"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bad_name", [
|
||||
"../etc", # path traversal attempt
|
||||
"Foo Bar", # space — invalid chars
|
||||
"FOO", # uppercase — invalid per _PROFILE_ID_RE
|
||||
"-leading-dash", # leading dash — invalid per regex (must start [a-z0-9])
|
||||
"_underscore", # leading underscore — invalid per regex
|
||||
"a" * 100, # too long (> 64 chars)
|
||||
"", # empty string is handled by _is_root_profile, separate case
|
||||
])
|
||||
def test_invalid_profile_name_refused_not_falls_back(two_profile_homes, bad_name):
|
||||
"""Per PR #2827 maintainer review: an invalid-but-non-malicious
|
||||
profile name on the explicit-profile path must be REFUSED, not
|
||||
quietly routed to the default state.db.
|
||||
|
||||
Before this defense, ``_resolve_profile_home_for_name`` would return
|
||||
``_DEFAULT_HERMES_HOME`` for any name failing ``_PROFILE_ID_RE``
|
||||
without raising — which is the exact #2762 leak symptom with a
|
||||
different trigger. The new regex check up-front turns that quiet
|
||||
leak into an explicit "refuse + log + return None" so the
|
||||
explicit-path contract is "write to the EXACT named profile, or
|
||||
write nowhere."
|
||||
|
||||
The empty string is intentionally in the parametrize set because
|
||||
we want to confirm it's refused — ``_is_root_profile('')`` returns
|
||||
False (per ``api/profiles.py:216-217`` it short-circuits on falsy
|
||||
input), so an empty explicit profile fails both the
|
||||
``_is_root_profile`` check and the regex, and the contract refuses
|
||||
the write. That's the expected behavior — an empty explicit name
|
||||
is itself a bug at the caller, not "I want the default."
|
||||
"""
|
||||
try:
|
||||
import hermes_state # noqa: F401
|
||||
except ImportError:
|
||||
pytest.skip("hermes_state package not available in this test env")
|
||||
|
||||
from api.state_sync import sync_session_usage
|
||||
|
||||
sync_session_usage(
|
||||
session_id=f'invalid-name-probe-{abs(hash(bad_name))}',
|
||||
input_tokens=99,
|
||||
output_tokens=99,
|
||||
model='probe',
|
||||
title='probe',
|
||||
message_count=1,
|
||||
profile=bad_name,
|
||||
)
|
||||
|
||||
sid = f'invalid-name-probe-{abs(hash(bad_name))}'
|
||||
hiyuki_row = _read_session(two_profile_homes['hiyuki'] / 'state.db', sid)
|
||||
maiko_row = _read_session(two_profile_homes['maiko'] / 'state.db', sid)
|
||||
|
||||
# All invalid names (including empty string) MUST be refused —
|
||||
# no row should appear in either profile's state.db.
|
||||
assert hiyuki_row is None, (
|
||||
f"invalid profile name {bad_name!r} leaked write into hiyuki's "
|
||||
"state.db — defense missed; #2762 regression"
|
||||
)
|
||||
assert maiko_row is None, (
|
||||
f"invalid profile name {bad_name!r} somehow ended up in maiko's state.db"
|
||||
)
|
||||
Reference in New Issue
Block a user