diff --git a/CHANGELOG.md b/CHANGELOG.md index a990d204..29b79585 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ ## [Unreleased] +### Fixed + +- Title-language detection no longer treats common English tech/jargon text such as "session die" or DAS/DER references as German just because of shared tokens. (Refs #3040) + ## [v0.51.152] — 2026-05-28 — Release DX (stage-batch34 — single-PR optional gateway-backed browser chat) ### Added diff --git a/api/streaming.py b/api/streaming.py index 6c70234d..55e4ae91 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -1385,12 +1385,12 @@ def _detect_title_language(text: str) -> str: return '' german_markers = { 'warum', 'werden', 'wird', 'wurde', 'hier', 'nicht', 'mehr', 'alte', 'alten', - 'bilder', 'angezeigt', 'session', 'prüfe', 'ich', 'die', 'der', 'das', 'den', - 'und', 'oder', 'mit', 'für', 'von', 'zu', 'ist', 'sind', 'bitte', 'kannst', + 'bilder', 'angezeigt', 'prüfe', 'ich', 'und', 'oder', 'mit', 'für', 'von', + 'zu', 'ist', 'sind', 'bitte', 'kannst', } tokens = re.findall(r'[A-Za-zÀ-ÖØ-öø-ÿ]+', s) german_hits = sum(1 for tok in tokens if tok in german_markers) - if re.search(r'[äöüß]', s) or german_hits >= 2: + if re.search(r'[äöüß]', s) or german_hits >= 3: return 'de' return '' diff --git a/tests/test_title_aux_routing.py b/tests/test_title_aux_routing.py index f31418eb..be448fdf 100644 --- a/tests/test_title_aux_routing.py +++ b/tests/test_title_aux_routing.py @@ -229,6 +229,28 @@ class TestGenerateTitleRawViaAuxTimeout(unittest.TestCase): self.assertIn('Match the language of the user question', messages[0]['content']) self.assertIn('If the user writes German, output a German title', messages[0]['content']) + def test_title_language_detection_avoids_english_tech_false_positives(self): + """English tech/jargon text must not be classified as German by shared tokens.""" + from api.streaming import _detect_title_language + + examples = [ + 'Why did the session die after the DAS storage failover?', + 'The session can die when DAS storage disconnects.', + 'Debug the session and DER certificate import failure.', + ] + for text in examples: + with self.subTest(text=text): + self.assertEqual(_detect_title_language(text), '') + + def test_title_language_detection_keeps_german_without_umlaut(self): + """German without umlauts still needs a language hint when evidence is specific.""" + from api.streaming import _detect_title_language + + self.assertEqual( + _detect_title_language('Warum werden hier die Bilder der alten Session nicht angezeigt?'), + 'de', + ) + def test_german_source_rejects_english_aux_title(self): """Regression: an English aux title must not overwrite a German conversation.""" from api.streaming import _generate_llm_session_title_via_aux