Stage 386: PR #2579

2026-05-25 11:10:18 +00:00 · 2026-05-19 18:20:06 +00:00
parent 4b72539b3c 2e9ca283dc
commit 42c2eda0fc
8 changed files with 117 additions and 14 deletions
@@ -19,6 +19,7 @@ from api.config import (
    get_effective_default_model, _get_session_agent_lock,
 )
 from api.workspace import get_last_workspace
+from api.usage import prompt_cache_hit_percent
 from api.agent_sessions import (
    _is_continuation_session,
    read_importable_agent_session_rows,
@@ -634,6 +635,7 @@ class Session:
            'estimated_cost': self.estimated_cost,
            'cache_read_tokens': self.cache_read_tokens,
            'cache_write_tokens': self.cache_write_tokens,
+            'cache_hit_percent': prompt_cache_hit_percent(self.cache_read_tokens, self.input_tokens),
            'personality': self.personality,
            'compression_anchor_visible_idx': self.compression_anchor_visible_idx,
            'compression_anchor_message_key': self.compression_anchor_message_key,
@@ -39,6 +39,7 @@ from api.compression_anchor import visible_messages_for_anchor
 from api.metering import meter
 from api.run_journal import RunJournalWriter
 from api.turn_journal import append_turn_journal_event_for_stream
+from api.usage import prompt_cache_hit_percent

 # Global lock for os.environ writes. Per-session locks (_agent_lock) prevent
 # concurrent runs of the SAME session, but two DIFFERENT sessions can still
@@ -2988,6 +2989,7 @@ def _run_agent_streaming(
            'estimated_cost': 0,
            'cache_read_tokens': 0,
            'cache_write_tokens': 0,
+            'cache_hit_percent': None,
            'context_length': 0,
            'threshold_tokens': 0,
            'last_prompt_tokens': 0,
@@ -3025,6 +3027,10 @@ def _run_agent_streaming(
                        pass

        _real_prompt_tokens = int(_usage.get('last_prompt_tokens') or 0)
+        _usage['cache_hit_percent'] = prompt_cache_hit_percent(
+            _usage.get('cache_read_tokens') or 0,
+            _usage.get('input_tokens') or 0,
+        )
        if _real_prompt_tokens and _real_prompt_tokens != _live_prompt_exact_tokens[0]:
            _live_prompt_exact_tokens[0] = _real_prompt_tokens
            _live_prompt_estimate_tokens[0] = _real_prompt_tokens
@@ -4474,6 +4480,15 @@ def _run_agent_streaming(
                estimated_cost = getattr(agent, 'session_estimated_cost_usd', None)
                cache_read_tokens = getattr(agent, 'session_cache_read_tokens', 0) or 0
                cache_write_tokens = getattr(agent, 'session_cache_write_tokens', 0) or 0
+                prev_input_tokens = getattr(s, 'input_tokens', 0) or 0
+                prev_cache_read_tokens = getattr(s, 'cache_read_tokens', 0) or 0
+                turn_input_tokens = max(0, input_tokens - prev_input_tokens)
+                turn_cache_read_tokens = max(0, cache_read_tokens - prev_cache_read_tokens)
+                # Per-turn percent is computed server-side from persisted session
+                # counters so the message label uses the same denominator as the
+                # final usage payload even if the browser missed an intermediate event.
+                cache_hit_percent = prompt_cache_hit_percent(cache_read_tokens, input_tokens)
+                turn_cache_hit_percent = prompt_cache_hit_percent(turn_cache_read_tokens, turn_input_tokens)
                if input_tokens > 0:
                    s.input_tokens = input_tokens
                if output_tokens > 0:
@@ -4730,6 +4745,8 @@ def _run_agent_streaming(
                'estimated_cost': estimated_cost,
                'cache_read_tokens': cache_read_tokens,
                'cache_write_tokens': cache_write_tokens,
+                'cache_hit_percent': cache_hit_percent,
+                'turn_cache_hit_percent': turn_cache_hit_percent,
                'duration_seconds': round(_turn_duration_seconds, 3),
            }
            if _turn_tps is not None:
@@ -0,0 +1,26 @@
+"""Usage metric helpers for WebUI display payloads.
+
+Prompt-cache hit percentage is cached prompt reads over the full prompt total
+(input + cache reads + cache writes). Keep this calculation in the backend so
+browser display code cannot drift across context indicator and per-turn labels.
+"""
+
+
+def _to_int(value) -> int:
+    try:
+        return int(value or 0)
+    except (TypeError, ValueError):
+        return 0
+
+
+def prompt_cache_hit_percent(cache_read_tokens, prompt_tokens):
+    """Return cached reads as a percent of full prompt-token total.
+
+    ``prompt_tokens`` must include ordinary input, cache reads, and cache writes
+    (matching Agent's ``session_prompt_tokens`` value).
+    """
+    cache_read = _to_int(cache_read_tokens)
+    prompt = _to_int(prompt_tokens)
+    if cache_read <= 0 or prompt <= 0:
+        return None
+    return min(100, round((cache_read / prompt) * 100))
@@ -215,6 +215,8 @@ const LOCALES = {
    focus_label: 'Focus',
    token_usage_on: 'Token usage on',
    token_usage_off: 'Token usage off',
+    usage_cache_hit_detail: 'Cache: {0}% hit ({1} read / {2} write)',
+    usage_cached_percent: '{0}% cached',
    theme_usage: 'Usage: /theme ',
    theme_set: 'Theme: ',
    no_active_session: 'No active session',
@@ -1434,6 +1436,8 @@ const LOCALES = {
    focus_label: 'Focus',
    token_usage_on: 'Uso token attivo',
    token_usage_off: 'Uso token disattivo',
+    usage_cache_hit_detail: 'Cache: {0}% in cache ({1} letti / {2} scritti)',
+    usage_cached_percent: '{0}% in cache',
    theme_usage: 'Uso: /theme ',
    theme_set: 'Tema: ',
    no_active_session: 'Nessuna sessione attiva',
@@ -2645,6 +2649,8 @@ const LOCALES = {
    focus_label: 'フォーカス',
    token_usage_on: 'トークン使用量: ON',
    token_usage_off: 'トークン使用量: OFF',
+    usage_cache_hit_detail: 'キャッシュ: {0}% ヒット（読み取り {1} / 書き込み {2}）',
+    usage_cached_percent: '{0}% キャッシュ済み',
    theme_usage: '使い方: /theme ',
    theme_set: 'テーマ: ',
    no_active_session: 'アクティブなセッションがありません',
@@ -3817,6 +3823,8 @@ const LOCALES = {
    token_usage_on: 'Отображение токенов включено',
    usage_personality_none: 'none', // TODO: translate
    token_usage_off: 'Отображение токенов выключено',
+    usage_cache_hit_detail: 'Кэш: {0}% попаданий ({1} чтение / {2} запись)',
+    usage_cached_percent: '{0}% из кэша',
    theme_usage: 'Использование: /theme ',
    theme_set: 'Тема: ',
    no_active_session: 'Нет активной сессии',
@@ -5004,6 +5012,8 @@ const LOCALES = {
    token_usage_on: 'Uso de tokens activado',
    usage_personality_none: 'none', // TODO: translate
    token_usage_off: 'Uso de tokens desactivado',
+    usage_cache_hit_detail: 'Caché: {0}% de acierto ({1} lectura / {2} escritura)',
+    usage_cached_percent: '{0}% en caché',
    theme_usage: 'Uso: /theme ',
    theme_set: 'Tema: ',
    no_active_session: 'No hay ninguna sesión activa',
@@ -6128,6 +6138,8 @@ const LOCALES = {
    token_usage_on: 'Token-Verbrauch an',
    usage_personality_none: 'none', // TODO: translate
    token_usage_off: 'Token-Verbrauch aus',
+    usage_cache_hit_detail: 'Cache: {0}% Treffer ({1} gelesen / {2} geschrieben)',
+    usage_cached_percent: '{0}% im Cache',
    theme_usage: 'Nutzung: /theme ',
    theme_set: 'Theme: ',
    no_active_session: 'Keine aktive Sitzung',
@@ -7303,6 +7315,8 @@ const LOCALES = {
    token_usage_on: 'Token 用量显示已开启',
    usage_personality_none: '无',
    token_usage_off: 'Token 用量显示已关闭',
+    usage_cache_hit_detail: '缓存：{0}% 命中（读取 {1} / 写入 {2}）',
+    usage_cached_percent: '{0}% 已缓存',
    theme_usage: '用法：/theme ',
    theme_set: '主题：',
    no_active_session: '当前没有活动会话',
@@ -8414,6 +8428,8 @@ const LOCALES = {
    focus_label: '\u4e3b\u984c',
    token_usage_on: 'Token \u7528\u91cf\u986f\u793a\u5df2\u958b\u555f',
    token_usage_off: 'Token \u7528\u91cf\u986f\u793a\u5df2\u95dc\u9589',
+    usage_cache_hit_detail: '快取：{0}% 命中（讀取 {1} / 寫入 {2}）',
+    usage_cached_percent: '{0}% 已快取',
    theme_usage: '\u7528\u6cd5\uff1a/theme ',
    theme_set: '\u4e3b\u984c\uff1a',
    no_active_session: '\u7576\u524d\u6c92\u6709\u6d3b\u52d5\u6703\u8a71',
@@ -9617,6 +9633,8 @@ const LOCALES = {
    focus_label: 'Foco',
    token_usage_on: 'Uso de tokens ligado',
    token_usage_off: 'Uso de tokens desligado',
+    usage_cache_hit_detail: 'Cache: {0}% de acerto ({1} leitura / {2} escrita)',
+    usage_cached_percent: '{0}% em cache',
    theme_usage: 'Uso: /theme ',
    theme_set: 'Tema: ',
    no_active_session: 'Nenhuma sessão ativa',
@@ -10716,6 +10734,8 @@ const LOCALES = {
    focus_label: 'Focus',
    token_usage_on: 'Token usage on',
    token_usage_off: 'Token usage off',
+    usage_cache_hit_detail: '캐시: {0}% 적중({1} 읽기 / {2} 쓰기)',
+    usage_cached_percent: '{0}% 캐시됨',
    theme_usage: 'Usage: /theme ',
    theme_set: 'Theme: ',
    no_active_session: '활성 세션 없음',
@@ -11919,6 +11939,8 @@ const LOCALES = {
    focus_label: 'Se concentrer',
    token_usage_on: 'Utilisation du jeton sur',
    token_usage_off: 'Utilisation des jetons désactivée',
+    usage_cache_hit_detail: 'Cache : {0}% de réussite ({1} lecture / {2} écriture)',
+    usage_cached_percent: '{0}% en cache',
    theme_usage: 'Utilisation : /theme ',
    theme_set: 'Thème:',
    no_active_session: 'Aucune session active',
@@ -1681,6 +1681,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
                  estimated_cost:Math.max(0,curCost-prevCost),
                  cache_read_tokens:Math.max(0,curCacheRead-_prevCacheRead),
                  cache_write_tokens:Math.max(0,curCacheWrite-_prevCacheWrite),
+                  cache_hit_percent:d.usage.turn_cache_hit_percent,
                };
              }
              if(typeof d.usage.duration_seconds==='number'){
@@ -500,6 +500,9 @@ async function newSession(flash, options={}){
        input_tokens:data.session.input_tokens||0,
        output_tokens:data.session.output_tokens||0,
        estimated_cost:data.session.estimated_cost||0,
+        cache_read_tokens:data.session.cache_read_tokens||0,
+        cache_write_tokens:data.session.cache_write_tokens||0,
+        cache_hit_percent:data.session.cache_hit_percent,
        context_length:data.session.context_length||0,
        last_prompt_tokens:data.session.last_prompt_tokens||0,
        threshold_tokens:data.session.threshold_tokens||0,
@@ -768,6 +771,9 @@ async function loadSession(sid){
      input_tokens:      _pick(u.input_tokens,      _s.input_tokens),
      output_tokens:     _pick(u.output_tokens,     _s.output_tokens),
      estimated_cost:    _pick(u.estimated_cost,    _s.estimated_cost),
+      cache_read_tokens: _pick(u.cache_read_tokens, _s.cache_read_tokens),
+      cache_write_tokens:_pick(u.cache_write_tokens,_s.cache_write_tokens),
+      cache_hit_percent: _pick(u.cache_hit_percent, _s.cache_hit_percent, null),
      context_length:    _pick(_s.context_length,    u.context_length),
      last_prompt_tokens:_pick(u.last_prompt_tokens,_s.last_prompt_tokens),
      threshold_tokens:  _pick(_s.threshold_tokens,  u.threshold_tokens),
@@ -1176,6 +1182,9 @@ function _resolveSessionModelForDisplaySoon(sid){
          input_tokens:_pick(u.input_tokens,S.session.input_tokens),
          output_tokens:_pick(u.output_tokens,S.session.output_tokens),
          estimated_cost:_pick(u.estimated_cost,S.session.estimated_cost),
+          cache_read_tokens:_pick(u.cache_read_tokens,S.session.cache_read_tokens),
+          cache_write_tokens:_pick(u.cache_write_tokens,S.session.cache_write_tokens),
+          cache_hit_percent:_pick(u.cache_hit_percent,S.session.cache_hit_percent,null),
          context_length:data.session.context_length||0,
          last_prompt_tokens:_pick(u.last_prompt_tokens,S.session.last_prompt_tokens),
          threshold_tokens:data.session.threshold_tokens||0,
@@ -2262,9 +2262,8 @@ function _syncCtxIndicator(usage){
  const compressText=pct>=75?t('ctx_compress_action'):(pct>=50?t('ctx_compress_hint'):'');
  if(compressWrap) compressWrap.style.display=compressText?'':'none';
  _setCtxCompressButton(compressBtn,compressText);
-  const cacheTotalTok=cacheReadTok+cacheWriteTok;
-  const cacheHitPct=cacheTotalTok?Math.round((cacheReadTok/cacheTotalTok)*100):null;
-  const cacheText=cacheTotalTok?`cache: ${cacheHitPct}% hit (${_fmtTokens(cacheReadTok)} read / ${_fmtTokens(cacheWriteTok)} write)`:'';
+  const cacheHitPct=usage.cache_hit_percent;
+  const cacheText=cacheHitPct!=null?t('usage_cache_hit_detail',cacheHitPct,_fmtTokens(cacheReadTok),_fmtTokens(cacheWriteTok)):'';
  let label=hasPromptTok?`Context window ${pct}% used`:`${_fmtTokens(totalTok)} tokens used`;
  if(!hasExplicitCtx&&hasPromptTok) label+=' (est. 128K)';
  if(cost) label+=` \u00b7 $${cost<0.01?cost.toFixed(4):cost.toFixed(2)}`;
@@ -6210,12 +6209,10 @@ function renderMessages(options){
        const inTok=msg._turnUsage.input_tokens||0;
        const outTok=msg._turnUsage.output_tokens||0;
        const cost=msg._turnUsage.estimated_cost;
-        const cacheRead=msg._turnUsage.cache_read_tokens||0;
-        const cacheWrite=msg._turnUsage.cache_write_tokens||0;
        let text=`${_fmtTokens(inTok)} in · ${_fmtTokens(outTok)} out`;
        if(cost) text+=` · ~$${cost<0.01?cost.toFixed(4):cost.toFixed(2)}`;
-        const cacheTotal=cacheRead+cacheWrite;
-        if(cacheTotal) text+=` · cache ${Math.round((cacheRead/cacheTotal)*100)}% hit`;
+        const cacheHitPct=msg._turnUsage.cache_hit_percent;
+        if(cacheHitPct!=null) text+=` · ${t('usage_cached_percent',cacheHitPct)}`;
        usage.textContent=text;
        fragments.push(usage);
      }
@@ -3,23 +3,34 @@ from pathlib import Path
 ROOT = Path(__file__).resolve().parents[1]


+def test_webui_backend_prompt_cache_hit_percent_uses_prompt_total_denominator():
+    from api.usage import prompt_cache_hit_percent
+
+    assert prompt_cache_hit_percent(100_000, 125_000) == 80
+    assert prompt_cache_hit_percent(0, 125_000) is None
+    assert prompt_cache_hit_percent(100, 0) is None
+    assert prompt_cache_hit_percent(None, None) is None
+    assert prompt_cache_hit_percent(200, 100) == 100
+
+
 def test_session_compact_exposes_prompt_cache_counters():
    from api.models import Session

    session = Session(
        session_id="issue2419_cache_usage",
        workspace="/tmp",
-        input_tokens=120_000,
+        input_tokens=125_000,
        output_tokens=5_000,
        estimated_cost=0.44,
        cache_read_tokens=100_000,
-        cache_write_tokens=20_000,
+        cache_write_tokens=5_000,
    )

    compact = session.compact()

    assert compact["cache_read_tokens"] == 100_000
-    assert compact["cache_write_tokens"] == 20_000
+    assert compact["cache_write_tokens"] == 5_000
+    assert compact["cache_hit_percent"] == 80


 def test_streaming_usage_payload_includes_prompt_cache_counters():
@@ -27,8 +38,9 @@ def test_streaming_usage_payload_includes_prompt_cache_counters():

    assert "session_cache_read_tokens" in src
    assert "session_cache_write_tokens" in src
-    assert "'cache_read_tokens': cache_read_tokens" in src
-    assert "'cache_write_tokens': cache_write_tokens" in src
+    assert "prompt_cache_hit_percent(" in src
+    assert "'cache_hit_percent':" in src
+    assert "'turn_cache_hit_percent':" in src


 def test_context_indicator_surfaces_cache_hit_rate():
@@ -36,9 +48,25 @@ def test_context_indicator_surfaces_cache_hit_rate():

    assert "cacheReadTok=usage.cache_read_tokens||0" in src
    assert "cacheWriteTok=usage.cache_write_tokens||0" in src
-    assert "cache: ${cacheHitPct}% hit" in src
+    assert "cacheHitPct=usage.cache_hit_percent" in src
+    assert "t('usage_cache_hit_detail',cacheHitPct" in src
    assert "Estimated cost: $${cost<0.01?cost.toFixed(4):cost.toFixed(2)}" in src
-    assert "cache ${Math.round((cacheRead/cacheTotal)*100)}% hit" in src
+    assert "cacheHitPct=msg._turnUsage.cache_hit_percent" in src
+    assert "t('usage_cached_percent',cacheHitPct)" in src
+    assert "cacheHitPct!=null" in src
+    assert "cacheReadTok/cacheTotalTok" not in src
+    assert "cacheRead/cacheTotal" not in src
+    assert "cacheReadTok/promptTok" not in src
+    assert "cacheRead/cacheDenom" not in src
+
+
+def test_cache_usage_labels_are_localized():
+    src = (ROOT / "static" / "i18n.js").read_text()
+
+    assert src.count("usage_cache_hit_detail:") == 11
+    assert src.count("usage_cached_percent:") == 11
+    assert "usage_cache_hit_detail: 'Cache: {0}% hit ({1} read / {2} write)'" in src
+    assert "usage_cached_percent: '{0}% cached'" in src


 def test_done_handler_preserves_per_turn_cache_deltas():
@@ -48,3 +76,4 @@ def test_done_handler_preserves_per_turn_cache_deltas():
    assert "curCacheRead=d.usage.cache_read_tokens||0" in src
    assert "cache_read_tokens:Math.max(0,curCacheRead-_prevCacheRead)" in src
    assert "cache_write_tokens:Math.max(0,curCacheWrite-_prevCacheWrite)" in src
+    assert "cache_hit_percent:d.usage.turn_cache_hit_percent" in src