Merge pull request #3038 from nesquena/release/stage-batch33

stage-batch33: v0.51.151 / Release DW — SSE reattach + title-lang + composer cap
2026-06-07 17:30:21 +00:00 · 2026-05-27 20:10:49 -07:00
parent 5bc3cdb3dd 7cbc5c1d89
commit 4d6269eae6
12 changed files with 304 additions and 18 deletions
@@ -3,6 +3,17 @@

 ## [Unreleased]

+## [v0.51.151] — 2026-05-28 — Release DW (stage-batch33 — 3-PR mid-risk batch: SSE reattach + title-lang + composer cap)
+
+### Fixed
+
+- Live SSE stream now reattaches when returning to a session that lost its connection during a session switch, closing the connection-leak window where stale `EventSource`s could accumulate. Also fixes a `_dirty_suffix` correctness path and yields the GIL after every SSE put so the HTTP server stays responsive under burst load. (#2924, #2925)
+- Generated session titles now stay in the conversation language by adding an explicit title-generation instruction to the auxiliary prompt. Prevents the default prompt from drifting into English for non-English conversations. (#2984)
+
+### Changed
+
+- Composer box max-width is now capped at 1600px on ultrawide viewports (≥1600px) so chips stay anchored against a content-sized boundary instead of stretching across 3440px+ displays. Maintainer-confirmed cap from the #2856 thread. (#2946)
+
 ## [v0.51.150] — 2026-05-28 — Release DV (stage-batch32 — single-PR reasoning-effort agent metadata)

 ### Fixed
@@ -1378,12 +1378,60 @@ def _is_provisional_title(current_title: str, messages) -> bool:
    return current == candidate


+def _detect_title_language(text: str) -> str:
+    """Best-effort language hint for title generation/validation."""
+    s = re.sub(r'\s+', ' ', str(text or '')).strip().lower()
+    if not s:
+        return ''
+    german_markers = {
+        'warum', 'werden', 'wird', 'wurde', 'hier', 'nicht', 'mehr', 'alte', 'alten',
+        'bilder', 'angezeigt', 'session', 'prüfe', 'ich', 'die', 'der', 'das', 'den',
+        'und', 'oder', 'mit', 'für', 'von', 'zu', 'ist', 'sind', 'bitte', 'kannst',
+    }
+    tokens = re.findall(r'[A-Za-zÀ-ÖØ-öø-ÿ]+', s)
+    german_hits = sum(1 for tok in tokens if tok in german_markers)
+    if re.search(r'[äöüß]', s) or german_hits >= 2:
+        return 'de'
+    return ''
+
+
+def _title_prompt_language_rule(user_text: str) -> str:
+    lang = _detect_title_language(user_text)
+    if lang == 'de':
+        return (
+            "Match the language of the user question.\n"
+            "If the user writes German, output a German title.\n"
+            "German good: Alte Session Bilder, WebUI Attachment-Pfade, Kontextkompression Status.\n"
+        )
+    return "Match the language of the user question.\n"
+
+
+def _title_language_mismatch(user_text: str, title: str) -> bool:
+    """Reject obvious English titles for German conversation starts."""
+    if _detect_title_language(user_text) != 'de':
+        return False
+    candidate = str(title or '').strip().lower()
+    if not candidate:
+        return False
+    if _detect_title_language(candidate) == 'de':
+        return False
+    english_markers = {
+        'old', 'image', 'display', 'issue', 'problem', 'discussion', 'conversation',
+        'session', 'title', 'fix', 'bug', 'attachment', 'attachments', 'context',
+    }
+    tokens = re.findall(r'[a-z]+', candidate)
+    english_hits = sum(1 for tok in tokens if tok in english_markers)
+    return english_hits >= 2
+
+
 def _title_prompts(user_text: str, assistant_text: str) -> tuple[str, list[str]]:
    qa = f"User question:\n{user_text[:500]}\n\nAssistant answer:\n{assistant_text[:500]}"
+    language_rule = _title_prompt_language_rule(user_text)
    prompts = [
        (
            "Generate a short session title from this conversation start.\n"
            "Use BOTH the user's question and the assistant's visible answer.\n"
+            f"{language_rule}"
            "Return only the title text, 3-8 words, as a topic label.\n"
            "Do not use markdown, bullets, labels, or prefixes like Session Title:.\n"
            "Do not output a full sentence.\n"
@@ -1395,6 +1443,7 @@ def _title_prompts(user_text: str, assistant_text: str) -> tuple[str, list[str]]
        (
            "Rewrite this conversation start as a concise noun-phrase title.\n"
            "Use the actual topic, not the task outcome.\n"
+            f"{language_rule}"
            "Return title text only.\n"
            "Do not use markdown, bullets, labels, or prefixes like Session Title:.\n"
            "Never output acknowledgements, completion status, or meta commentary."
@@ -1750,6 +1799,8 @@ def _generate_llm_session_title_for_agent(agent, user_text: str, assistant_text:
        return None, status, ''
    title = _sanitize_generated_title(raw)
    if title:
+        if _title_language_mismatch(user_text, title):
+            return None, 'llm_language_mismatch', str(raw)[:120]
        return title, status, ''
    return None, 'llm_invalid', str(raw)[:120]

@@ -1782,6 +1833,8 @@ def _generate_llm_session_title_via_aux(user_text: str, assistant_text: str, age
        return None, status, ''
    title = _sanitize_generated_title(raw)
    if title:
+        if _title_language_mismatch(user_text, title):
+            return None, 'llm_language_mismatch_aux', str(raw)[:120]
        return title, status, ''
    return None, 'llm_invalid_aux', str(raw)[:120]

@@ -1816,6 +1869,12 @@ def _fallback_title_from_exchange(user_text: str, assistant_text: str) -> Option
    assistant_text = re.sub(r'\s+', ' ', assistant_text).strip()
    combined = f"{user_text} {assistant_text}".strip().lower()
    combined_raw = f"{user_text} {assistant_text}".strip()
+    source_lang = _detect_title_language(user_text)
+
+    if source_lang == 'de' and 'bilder' in combined and 'session' in combined:
+        if 'alt' in combined or 'alte' in combined or 'alten' in combined:
+            return 'Alte Session Bilder'
+        return 'Session Bilder'

    def _contains_latin(text: str) -> bool:
        return bool(re.search(r'[A-Za-z]', text or ''))
@@ -116,9 +116,17 @@ def _dirty_suffix(path: Path, timeout=1) -> str:
    out, ok = _run_git(['diff-index', '--quiet', 'HEAD', '--'], path, timeout=timeout)
    if ok:
        return ""
-    # diff-index exits 1 with no output for a dirty tree. Timeouts and real git
-    # failures include a diagnostic; skip the suffix so the base version remains.
-    return "-dirty" if not out else ""
+    # diff-index --quiet exits 1 with no stdout/stderr to *signal* a dirty tree
+    # (not an error). _run_git() substitutes a synthetic "git exited with
+    # status N" diagnostic when both streams are empty, which makes the naive
+    # `if not out` guard always false on dirty trees — silently dropping the
+    # suffix and defeating dev-build cache busting (static/foo.js?v=… stays
+    # identical to the last-committed version). Treat the synthetic shape as
+    # the dirty signal; real errors (timeouts, missing git) carry a different
+    # diagnostic and correctly suppress the suffix.
+    if not out or out.startswith('git exited with status '):
+        return "-dirty"
+    return ""


 def _describe_git_version(path: Path, *, timeout=5, dirty_timeout=1) -> str | None:
@@ -616,6 +616,16 @@ function closeLiveStream(sessionId, streamId, source){
  if(source&&live.source!==source) return;
  try{live.source.close();}catch(_){ }
  delete LIVE_STREAMS[sessionId];
+  // closeLiveStream() is called during session-switch teardown for any session
+  // the user is no longer viewing. The stream is still active on the server,
+  // so mark the in-memory INFLIGHT entry for reattach — otherwise
+  // loadSession() returning to this session skips the reattach branch
+  // (`INFLIGHT.reattach` was only set by the storage-load path) and the SSE
+  // is never reopened. The user then sees no streamed tokens until the LLM
+  // finishes and a metadata refresh swaps in the final reply.
+  // If the stream is terminating cleanly, _clearOwnerInflightState() has
+  // already deleted INFLIGHT[sessionId], so this is a safe no-op.
+  if(INFLIGHT[sessionId]) INFLIGHT[sessionId].reattach=true;
 }

 function closeOtherLiveStreams(activeSid){
@@ -648,9 +658,16 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
  closeOtherLiveStreams(activeSid);
  closeLiveStream(activeSid);

-  let assistantText='';
-  let reasoningText='';
-  let liveReasoningText='';
+  // On reconnect, restore accumulated text from INFLIGHT so we don't lose
+  // progress made before the session switch. Without this the closure starts
+  // empty and tokens arriving on the new SSE connection append to nothing —
+  // the already-rendered content vanishes.
+  const _lastLiveAssistant = reconnecting
+    ? INFLIGHT[activeSid]?.messages?.findLast?.(m => m.role === 'assistant' && m._live)
+    : null;
+  let assistantText = _lastLiveAssistant ? (_lastLiveAssistant.content || '') : '';
+  let reasoningText = _lastLiveAssistant ? (_lastLiveAssistant.reasoning || '') : '';
+  let liveReasoningText = reasoningText;
  let visibleInterimSnippets=[];
  let _latestGoalStatus=null;
  let _pendingGoalContinuation=null;
@@ -2135,6 +2152,13 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
      if(_deferStreamErrorIfOffline()) return;
      if(_deferStreamErrorIfPageHidden(source)) return;
      _closeSource(source);
+      // If the user has switched to a different session, don't attempt to
+      // reconnect — the old stream's EventSource was closed intentionally
+      // during session switch and reconnecting would leak a background stream.
+      if(!_isSessionActivelyViewed(activeSid)) return;
+      if(_terminalStateReached || _streamFinalized){
+        return;
+      }
      // Attempt one reconnect if the stream is still active server-side
      if(!_reconnectAttempted && streamId){
        _reconnectAttempted=true;
@@ -589,6 +589,13 @@ async function loadSession(sid){
    S.toolCalls = [];
    _messagesTruncated = false;
    _oldestIdx = 0;
+    // Close live SSE streams from the session we're leaving. The error
+    // handler checks _isSessionActivelyViewed() and won't auto-reconnect
+    // for a backgrounded session, preventing leaked connections that would
+    // pump token events into an orphaned closure, freezing the main thread.
+    if (currentSid && currentSid !== sid && typeof closeOtherLiveStreams === 'function') {
+      closeOtherLiveStreams(sid);
+    }
    _loadingOlder = false;
    const _msgInner = $('msgInner');
    if (_msgInner && currentSid !== sid) _msgInner.innerHTML = '<div style="display:flex;align-items:center;justify-content:center;height:100%;color:var(--text-muted);font-size:14px;padding:40px;text-align:center;">Loading conversation...</div>';
@@ -4535,3 +4535,8 @@ main.main.showing-logs > #mainLogs{display:flex;}
  text-align:left;
  unicode-bidi:isolate;
 }
+
+/* Cap composer width on very wide displays so the chip-cluster gap stays bounded */
+@media (min-width:1600px) {
+  .composer-box{max-width:1600px;margin:0 auto;}
+}
@@ -1,4 +1,5 @@
 """Regression tests for preserving live streams across session switches."""
+import re
 from pathlib import Path

 REPO_ROOT = Path(__file__).parent.parent
@@ -99,3 +100,74 @@ def test_load_session_reattach_path_uses_attach_live_stream_for_running_sessions
    assert reattach_pos != -1
    assert active_pos < reattach_pos
    assert "{reconnecting:true}" in body[reattach_pos : reattach_pos + 200]
+
+
+def test_close_live_stream_marks_inflight_for_reattach_on_return():
+    """When closeLiveStream() tears down a still-active SSE transport (e.g. the
+    user switched to another session), the corresponding INFLIGHT entry must be
+    flagged so loadSession() reopens the SSE on return.
+
+    Without this flag the in-memory INFLIGHT entry stays as it was (no
+    `reattach:true`, which is only set on the storage-load path), so
+    loadSession()'s reattach branch is skipped — the SSE is never reopened and
+    the user sees no streamed tokens until the LLM finishes and a metadata
+    refresh swaps in the final reply.
+    """
+    body = _function_body(MESSAGES_JS, "closeLiveStream")
+    assert "INFLIGHT" in body, (
+        "closeLiveStream() must touch INFLIGHT so loadSession() reattaches the "
+        "SSE when the user switches back to a still-streaming session"
+    )
+    assert re.search(r"INFLIGHT\[\w+\]\s*&&\s*\(?INFLIGHT\[\w+\]\.reattach\s*=\s*true", body) \
+           or re.search(r"if\s*\(\s*INFLIGHT\[\w+\]\s*\)\s*INFLIGHT\[\w+\]\.reattach\s*=\s*true", body), (
+        "closeLiveStream() must set INFLIGHT[sessionId].reattach = true "
+        "(guarded by an existence check) so loadSession()'s reattach branch fires"
+    )
+
+
+def test_close_other_live_streams_triggers_reattach_for_backgrounded_sessions():
+    """closeOtherLiveStreams() during session switch must mark every closed
+    background session for reattach. Otherwise switching back to a session whose
+    stream was closed during the switch leaves the SSE permanently disconnected.
+    """
+    helper_body = _function_body(MESSAGES_JS, "closeOtherLiveStreams")
+    close_body = _function_body(MESSAGES_JS, "closeLiveStream")
+    # closeOtherLiveStreams delegates per-session teardown to closeLiveStream,
+    # so the reattach flag must be set inside closeLiveStream itself for the
+    # chain to work — this guards the indirection.
+    assert "closeLiveStream(sid)" in helper_body.replace(" ", ""), (
+        "closeOtherLiveStreams() must delegate teardown to closeLiveStream()"
+    )
+    assert "reattach" in close_body, (
+        "closeLiveStream() must set the reattach flag so closeOtherLiveStreams() "
+        "propagates the reattach intent to every backgrounded session"
+    )
+
+
+def test_load_session_reattaches_when_inflight_is_in_memory_and_marked_for_reattach():
+    """The session-switch return path must hit attachLiveStream() even when
+    INFLIGHT[sid] is already in memory (i.e. wasn't loaded from storage).
+
+    Before the fix, only the storage-load path set `reattach:true` on INFLIGHT,
+    so a switch-back through an in-memory INFLIGHT entry skipped the reattach
+    branch. Once closeLiveStream() also sets reattach=true, the existing
+    `INFLIGHT[sid].reattach && activeStreamId` gate is enough — this test
+    pins the gate's shape so future refactors don't drop the flag check.
+    """
+    body = _function_body(SESSIONS_JS, "loadSession")
+    inflight_idx = body.find("if(INFLIGHT[sid]){")
+    assert inflight_idx >= 0, "INFLIGHT branch not found in loadSession"
+    inflight_block = body[inflight_idx : inflight_idx + 2400]
+    assert "INFLIGHT[sid].reattach" in inflight_block, (
+        "loadSession()'s INFLIGHT branch must gate the SSE reattach on the "
+        "reattach flag so closeLiveStream()'s marking flows through"
+    )
+    reattach_gate = re.search(
+        r"if\(INFLIGHT\[sid\]\.reattach\s*&&\s*activeStreamId.*?attachLiveStream\(sid, activeStreamId",
+        inflight_block,
+        re.DOTALL,
+    )
+    assert reattach_gate, (
+        "loadSession() must reattach via attachLiveStream() when "
+        "INFLIGHT[sid].reattach && activeStreamId"
+    )
@@ -116,7 +116,14 @@ def test_named_custom_provider_models_endpoint_network_error_uses_short_timeout(
    observed_timeouts = []

    def fake_urlopen(req, timeout=10):
-        observed_timeouts.append(timeout)
+        # Only record timeouts for the broken-proxy custom endpoint — unrelated
+        # background probes (Copilot token fetch, OpenRouter free-tier discovery, etc.)
+        # also call urlopen during get_available_models() and would otherwise pollute
+        # the assertion. The contract we're pinning: the broken-proxy /v1/models call
+        # uses CUSTOM_MODELS_ENDPOINT_TIMEOUT_SECONDS, not the urllib default 10.
+        full_url = getattr(req, "full_url", "")
+        if "broken.example" in str(full_url):
+            observed_timeouts.append(timeout)
        raise urllib.error.URLError("timed out")

    monkeypatch.setattr(urllib.request, "urlopen", fake_urlopen)
@@ -9,6 +9,7 @@ Four optimizations to reduce session-switch latency:
 """

 import pathlib
+import re
 import threading
 import time
 from unittest.mock import patch, MagicMock
@@ -487,10 +488,20 @@ class TestSessionSwitchCancellation:

    def test_loading_older_reset_on_session_switch(self):
        """loadSession must reset _loadingOlder when switching sessions."""
-        # Find the reset block in loadSession
-        marker = "_messagesTruncated = false;\n    _oldestIdx = 0;\n    _loadingOlder = false;"
-        idx = SESSIONS_JS.find(marker)
-        assert idx >= 0, (
+        # Locate the on-switch reset block — it lives in the `if (currentSid !== sid || forceReload)`
+        # arm of loadSession. Match by the surrounding state-resets rather than by a fragile
+        # multi-line substring, so unrelated code (like the closeOtherLiveStreams teardown
+        # that was inserted between _oldestIdx and _loadingOlder) doesn't break the test.
+        switch_arm = re.search(
+            r"if \(currentSid !== sid \|\| forceReload\) \{(.*?)\n  \}",
+            SESSIONS_JS,
+            re.DOTALL,
+        )
+        assert switch_arm, "loadSession's session-switch reset arm not found"
+        block = switch_arm.group(1)
+        assert "_messagesTruncated = false;" in block
+        assert "_oldestIdx = 0;" in block
+        assert "_loadingOlder = false;" in block, (
            "loadSession must reset _loadingOlder=false on session switch "
            "to prevent a stale _loadOlderMessages lock from blocking the "
            "new session's scroll-to-top loading."
@@ -517,13 +528,20 @@ class TestSessionSwitchCancellation:

    def test_messages_truncated_reset_on_switch(self):
        """loadSession must reset _messagesTruncated on session switch."""
-        marker = "_messagesTruncated = false;\n    _oldestIdx = 0;\n    _loadingOlder = false;"
-        idx = SESSIONS_JS.find(marker)
-        assert idx >= 0, (
+        switch_arm = re.search(
+            r"if \(currentSid !== sid \|\| forceReload\) \{(.*?)\n  \}",
+            SESSIONS_JS,
+            re.DOTALL,
+        )
+        assert switch_arm, "loadSession's session-switch reset arm not found"
+        block = switch_arm.group(1)
+        assert "_messagesTruncated = false;" in block, (
            "_messagesTruncated must be reset to false on session switch "
            "to prevent the scroll-to-top handler from trying to load "
            "older messages from the previous session."
        )
+        assert "_oldestIdx = 0;" in block
+        assert "_loadingOlder = false;" in block

    def test_oldest_idx_reset_prevents_wrong_cursor(self):
        """_oldestIdx=0 after switch prevents passing stale cursor to API."""
@@ -761,9 +761,13 @@ def test_messages_js_supports_live_reasoning_and_tool_completion(cleanup_test_se
    until the final done snapshot redraws the whole turn.
    """
    src = (REPO_ROOT / "static/messages.js").read_text()
-    assert "let reasoningText=''" in src, \
+    # reasoningText is initialised at closure scope in attachLiveStream.
+    # On initial connect it defaults to ''; on reconnect it restores from
+    # INFLIGHT so the already-rendered content survives the session switch.
+    assert ("let reasoningText=''" in src
+            or "let reasoningText = _lastLiveAssistant" in src), \
        "messages.js must track streamed reasoning text separately from assistant text"
-    assert "let liveReasoningText=''" in src or 'let liveReasoningText = ""' in src, \
+    assert ("let liveReasoningText=''" in src or "let liveReasoningText = reasoningText" in src), \
        "messages.js must track the currently active reasoning segment separately from cumulative reasoning"
    assert "source.addEventListener('reasoning'" in src or 'source.addEventListener("reasoning"' in src, \
        "messages.js must listen for live reasoning SSE events"
@@ -154,8 +154,14 @@ class TestReconnectAccumulatorPreservation:
        )
        assert m, "attachLiveStream prelude not found"
        prelude = m.group(0)
-        assert "let assistantText=''" in prelude or 'let assistantText = ""' in prelude, (
-            "assistantText must be initialised to '' at closure scope — "
+        # On initial connect, assistantText and reasoningText are initialised to ''
+        # at closure scope (the ternary defaults to '' when reconnecting is false
+        # or INFLIGHT has no _live assistant message). On reconnect, they restore
+        # from INFLIGHT so the already-rendered content survives the session switch.
+        assert ("let assistantText=''" in prelude
+                or 'let assistantText = _lastLiveAssistant' in prelude
+                or 'let assistantText = ""' in prelude), (
+            "assistantText must be initialised at closure scope — "
            "this is the only legitimate reset; _wireSSE must not re-reset"
        )

@@ -198,6 +198,71 @@ class TestGenerateTitleRawViaAuxTimeout(unittest.TestCase):
        self.assertEqual(captured.get('base_url'), 'http://openrouter:4000/v1')
        self.assertEqual(captured.get('api_key'), 'test-title-api-key')

+    def test_title_prompt_requires_matching_user_language(self):
+        """German conversation starts should not invite English title output."""
+        from api.streaming import generate_title_raw_via_aux
+
+        mock_resp = types.SimpleNamespace(
+            choices=[
+                types.SimpleNamespace(
+                    message=types.SimpleNamespace(content='Alte Session Bilder'),
+                    finish_reason='stop',
+                )
+            ]
+        )
+        captured = {}
+
+        def fake_call_llm(**kwargs):
+            captured.update(kwargs)
+            return mock_resp
+
+        with _patch_tg_config({'provider': '', 'model': 'title-model', 'base_url': ''}):
+            with patch('agent.auxiliary_client.call_llm', side_effect=fake_call_llm, create=True):
+                result, status = generate_title_raw_via_aux(
+                    user_text='Warum werden hier die Bilder der alten Session nicht mehr angezeigt?',
+                    assistant_text='Ich prüfe die Attachment-Pfade im WebUI.',
+                )
+
+        self.assertEqual(result, 'Alte Session Bilder')
+        self.assertEqual(status, 'llm_aux')
+        messages = captured.get('messages') or []
+        self.assertIn('Match the language of the user question', messages[0]['content'])
+        self.assertIn('If the user writes German, output a German title', messages[0]['content'])
+
+    def test_german_source_rejects_english_aux_title(self):
+        """Regression: an English aux title must not overwrite a German conversation."""
+        from api.streaming import _generate_llm_session_title_via_aux
+
+        mock_resp = types.SimpleNamespace(
+            choices=[
+                types.SimpleNamespace(
+                    message=types.SimpleNamespace(content='Old Session Image Display Issue'),
+                    finish_reason='stop',
+                )
+            ]
+        )
+
+        with _patch_tg_config({'provider': '', 'model': 'title-model', 'base_url': ''}):
+            with patch('agent.auxiliary_client.call_llm', return_value=mock_resp, create=True):
+                title, status, raw_preview = _generate_llm_session_title_via_aux(
+                    'Warum werden hier die Bilder der alten Session nicht mehr angezeigt?',
+                    'Ich prüfe die Attachment-Pfade im WebUI.',
+                )
+
+        self.assertIsNone(title)
+        self.assertEqual(status, 'llm_language_mismatch_aux')
+        self.assertEqual(raw_preview, 'Old Session Image Display Issue')
+
+    def test_german_fallback_keeps_german_topic_words(self):
+        from api.streaming import _fallback_title_from_exchange
+
+        title = _fallback_title_from_exchange(
+            'Warum werden hier die Bilder der alten Session nicht mehr angezeigt?',
+            'Ich prüfe die Rendering- und Attachment-Pfade im WebUI.',
+        )
+
+        self.assertEqual(title, 'Alte Session Bilder')
+
    def test_configured_api_key_is_not_sent_to_caller_supplied_route(self):
        """Regression: title task keys must not leak to explicit fallback routes.