From bbdacdca5c6c946a818a8e1a34cbdcf91ef6280d Mon Sep 17 00:00:00 2001 From: nesquena-hermes Date: Thu, 30 Apr 2026 21:32:45 +0000 Subject: [PATCH] fix: context window indicator overflow (#1356) - api/streaming.py SSE payload now falls back to agent.model_metadata.get_model_context_length when compressor doesn't supply context_length (mirrors the session-save fallback shipped in v0.50.247). - api/streaming.py also falls back to s.last_prompt_tokens to avoid using the cumulative input_tokens counter. - static/ui.js tracks rawPct separately from pct and shows '(context exceeded)' tooltip when rawPct > 100 instead of misleading '100% used (0% left)'. - static/messages.js clears 'Uploading...' composer status after upload completes. Co-authored-by: nesquena-hermes --- api/streaming.py | 22 ++++++++++++++++++++++ static/messages.js | 4 ++++ static/ui.js | 6 ++++-- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/api/streaming.py b/api/streaming.py index c30f1b05..62a034a8 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -2241,6 +2241,28 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta usage['context_length'] = getattr(_cc, 'context_length', 0) or 0 usage['threshold_tokens'] = getattr(_cc, 'threshold_tokens', 0) or 0 usage['last_prompt_tokens'] = getattr(_cc, 'last_prompt_tokens', 0) or 0 + # Fallback: when the compressor is absent or reports context_length=0, + # resolve the model's context window from metadata so the UI indicator + # shows the correct percentage rather than overflowing against the 128K + # JS default. Mirrors the session-save fallback above (lines ~2205-2217). + if not usage.get('context_length'): + try: + from agent.model_metadata import get_model_context_length as _get_cl + _fb_cl = _get_cl( + getattr(agent, 'model', resolved_model or '') or '', + getattr(agent, 'base_url', '') or '', + ) + if _fb_cl: + usage['context_length'] = _fb_cl + except Exception: + pass + # Fallback: when last_prompt_tokens is missing (no compressor), use the + # session-persisted value rather than letting the frontend fall back to + # the cumulative input_tokens counter, which overflows for long sessions. + if not usage.get('last_prompt_tokens'): + _sess_lpt = getattr(s, 'last_prompt_tokens', 0) or 0 + if _sess_lpt: + usage['last_prompt_tokens'] = _sess_lpt # (reasoning trace already attached + saved above, before s.save()) # Leftover-steer delivery: if a /steer was queued (via # api/chat/steer) but the agent finished its turn before diff --git a/static/messages.js b/static/messages.js index c4f92d9f..913419cb 100644 --- a/static/messages.js +++ b/static/messages.js @@ -141,6 +141,10 @@ async function send(){ let uploaded=[]; try{uploaded=await uploadPendingFiles();} catch(e){if(!text){setComposerStatus(`Upload error: ${e.message}`);return;}} + // Clear the uploading status now that upload is done — if we don't clear here + // it stays visible for the entire duration of the agent stream, since + // setComposerStatus('') is only called in setBusy(false), not setBusy(true). + setComposerStatus(''); const uploadedNames=uploaded.map(u=>u.name||u); const uploadedPaths=uploaded.map(u=>u&&u.is_image?(u.name||u.filename||u):(u.path||u.name||u)); diff --git a/static/ui.js b/static/ui.js index 6d01dec3..823a1c31 100644 --- a/static/ui.js +++ b/static/ui.js @@ -864,7 +864,9 @@ function _syncCtxIndicator(usage){ } if(wrap) wrap.style.display=''; const hasPromptTok=!!promptTok; - const pct=hasPromptTok?Math.min(100,Math.round((promptTok/ctxWindow)*100)):0; + const rawPct=hasPromptTok?Math.round((promptTok/ctxWindow)*100):0; + const pct=Math.min(100,rawPct); + const overflowed=rawPct>100; const ring=$('ctxRingValue'); const center=$('ctxPercent'); const usageLine=$('ctxTooltipUsage'); @@ -908,7 +910,7 @@ function _syncCtxIndicator(usage){ if(!hasExplicitCtx&&hasPromptTok) label+=' (est. 128K)'; if(cost) label+=` \u00b7 $${cost<0.01?cost.toFixed(4):cost.toFixed(2)}`; el.setAttribute('aria-label',label); - if(usageLine) usageLine.textContent=hasPromptTok?`${pct}% used (${Math.max(0,100-pct)}% left)`:`${_fmtTokens(totalTok)} tokens used`; + if(usageLine) usageLine.textContent=hasPromptTok?(overflowed?`${rawPct}% used (context exceeded)`:`${pct}% used (${100-pct}% left)`):`${_fmtTokens(totalTok)} tokens used`; if(tokensLine) tokensLine.textContent=hasPromptTok?`${_fmtTokens(promptTok)} / ${_fmtTokens(ctxWindow)} tokens used`:`In: ${_fmtTokens(usage.input_tokens||0)} \u00b7 Out: ${_fmtTokens(usage.output_tokens||0)}`; const threshold=usage.threshold_tokens||0; if(thresholdLine){