Merge pull request #2918

# Conflicts:
#	CHANGELOG.md
This commit is contained in:
nesquena-hermes
2026-05-25 17:17:31 +00:00
3 changed files with 57 additions and 7 deletions
+1
View File
@@ -6,6 +6,7 @@
### Fixed
- When the session index is missing, WebUI now starts a background rebuild while preserving the first sidebar full-scan result, so the index is primed for later requests without temporarily hiding existing sessions.
- Live token-usage hints now cap the cumulative in-flight tool-result prompt estimate per assistant turn, preventing many large tool callbacks from temporarily inflating the context ring before exact provider accounting arrives.
## [v0.51.135] — 2026-05-25 — Release DG (stage-batch17 — 9-PR small-fix batch)
+28 -7
View File
@@ -2851,6 +2851,7 @@ _TOOL_RESULT_SNIPPET_MAX = 4000
_LIVE_TOOL_PROMPT_DELTA_MAX = 12_000
_LIVE_TOOL_PROMPT_TURN_MAX = 24_000
def _bounded_live_tool_prompt_delta(messages, *, cap: int = _LIVE_TOOL_PROMPT_DELTA_MAX) -> int:
@@ -2878,19 +2879,32 @@ def live_usage_prompt_estimate_after_tool_delta(
exact_prompt_tokens: int = 0,
messages=None,
cap: int = _LIVE_TOOL_PROMPT_DELTA_MAX,
turn_tool_prompt_tokens: int = 0,
turn_cap: int = _LIVE_TOOL_PROMPT_TURN_MAX,
) -> dict:
"""Compute the live `last_prompt_tokens` estimate after a tool update.
Exact compressor/provider prompt accounting wins. When no newer exact prompt
is available, add only a bounded live tool delta to the persisted base.
is available, add only bounded live tool deltas to the persisted base.
"""
base = int(base_prompt_tokens or 0)
exact = int(exact_prompt_tokens or 0)
if exact and exact != base:
return {'last_prompt_tokens': exact, 'estimated': False}
return {
'last_prompt_tokens': exact,
'estimated': False,
'turn_tool_prompt_tokens': 0,
}
prior_turn_delta = max(0, int(turn_tool_prompt_tokens or 0))
turn_ceiling = max(0, int(turn_cap or 0))
next_turn_delta = min(
prior_turn_delta + _bounded_live_tool_prompt_delta(messages, cap=cap),
turn_ceiling,
)
return {
'last_prompt_tokens': base + _bounded_live_tool_prompt_delta(messages, cap=cap),
'last_prompt_tokens': base + next_turn_delta,
'estimated': True,
'turn_tool_prompt_tokens': next_turn_delta,
}
@@ -3396,6 +3410,7 @@ def _run_agent_streaming(
agent = None
_live_prompt_estimate_tokens = [0]
_live_prompt_exact_tokens = [0]
_live_prompt_estimate_tool_delta_tokens = [0]
_live_prompt_estimate_seen_ids = set()
def _seed_live_prompt_estimate() -> int:
@@ -3425,10 +3440,15 @@ def _run_agent_streaming(
"""Increment a rough next-prompt estimate from live tool activity."""
if not messages:
return _live_prompt_estimate_tokens[0]
_delta = _bounded_live_tool_prompt_delta(messages)
if _delta > 0:
_seed_live_prompt_estimate()
_live_prompt_estimate_tokens[0] += _delta
_seed_live_prompt_estimate()
_usage = live_usage_prompt_estimate_after_tool_delta(
base_prompt_tokens=_live_prompt_exact_tokens[0],
exact_prompt_tokens=_live_prompt_exact_tokens[0],
messages=messages,
turn_tool_prompt_tokens=_live_prompt_estimate_tool_delta_tokens[0],
)
_live_prompt_estimate_tokens[0] = _usage['last_prompt_tokens']
_live_prompt_estimate_tool_delta_tokens[0] = _usage['turn_tool_prompt_tokens']
return _live_prompt_estimate_tokens[0]
def _live_usage_snapshot():
@@ -3490,6 +3510,7 @@ def _run_agent_streaming(
if _real_prompt_tokens and _real_prompt_tokens != _live_prompt_exact_tokens[0]:
_live_prompt_exact_tokens[0] = _real_prompt_tokens
_live_prompt_estimate_tokens[0] = _real_prompt_tokens
_live_prompt_estimate_tool_delta_tokens[0] = 0
elif _live_prompt_estimate_tokens[0] > _real_prompt_tokens:
_usage['last_prompt_tokens'] = _live_prompt_estimate_tokens[0]
@@ -20,3 +20,31 @@ def test_live_usage_estimate_preserves_real_prompt_when_exact_prompt_advances():
)
assert usage["last_prompt_tokens"] == 136_000
assert usage["turn_tool_prompt_tokens"] == 0
def test_live_usage_estimate_caps_cumulative_tool_delta_per_turn():
from api import streaming
base_prompt_tokens = 86_723
turn_tool_prompt_tokens = 0
usage = None
original_delta = streaming._bounded_live_tool_prompt_delta
streaming._bounded_live_tool_prompt_delta = lambda messages, cap=12_000: int(cap or 0)
try:
for _ in range(20):
usage = live_usage_prompt_estimate_after_tool_delta(
base_prompt_tokens=base_prompt_tokens,
exact_prompt_tokens=base_prompt_tokens,
messages=[{"role": "tool", "content": "x" * 80_000}],
turn_tool_prompt_tokens=turn_tool_prompt_tokens,
)
turn_tool_prompt_tokens = usage["turn_tool_prompt_tokens"]
finally:
streaming._bounded_live_tool_prompt_delta = original_delta
assert usage is not None
assert usage["turn_tool_prompt_tokens"] == 24_000
assert usage["last_prompt_tokens"] == base_prompt_tokens + 24_000
assert usage["last_prompt_tokens"] < base_prompt_tokens + (20 * 12_000)