mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-06-07 17:30:21 +00:00
fix: tighten title language detection
This commit is contained in:
@@ -3,6 +3,10 @@
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Fixed
|
||||
|
||||
- Title-language detection no longer treats common English tech/jargon text such as "session die" or DAS/DER references as German just because of shared tokens. (Refs #3040)
|
||||
|
||||
## [v0.51.152] — 2026-05-28 — Release DX (stage-batch34 — single-PR optional gateway-backed browser chat)
|
||||
|
||||
### Added
|
||||
|
||||
+3
-3
@@ -1385,12 +1385,12 @@ def _detect_title_language(text: str) -> str:
|
||||
return ''
|
||||
german_markers = {
|
||||
'warum', 'werden', 'wird', 'wurde', 'hier', 'nicht', 'mehr', 'alte', 'alten',
|
||||
'bilder', 'angezeigt', 'session', 'prüfe', 'ich', 'die', 'der', 'das', 'den',
|
||||
'und', 'oder', 'mit', 'für', 'von', 'zu', 'ist', 'sind', 'bitte', 'kannst',
|
||||
'bilder', 'angezeigt', 'prüfe', 'ich', 'und', 'oder', 'mit', 'für', 'von',
|
||||
'zu', 'ist', 'sind', 'bitte', 'kannst',
|
||||
}
|
||||
tokens = re.findall(r'[A-Za-zÀ-ÖØ-öø-ÿ]+', s)
|
||||
german_hits = sum(1 for tok in tokens if tok in german_markers)
|
||||
if re.search(r'[äöüß]', s) or german_hits >= 2:
|
||||
if re.search(r'[äöüß]', s) or german_hits >= 3:
|
||||
return 'de'
|
||||
return ''
|
||||
|
||||
|
||||
@@ -229,6 +229,28 @@ class TestGenerateTitleRawViaAuxTimeout(unittest.TestCase):
|
||||
self.assertIn('Match the language of the user question', messages[0]['content'])
|
||||
self.assertIn('If the user writes German, output a German title', messages[0]['content'])
|
||||
|
||||
def test_title_language_detection_avoids_english_tech_false_positives(self):
|
||||
"""English tech/jargon text must not be classified as German by shared tokens."""
|
||||
from api.streaming import _detect_title_language
|
||||
|
||||
examples = [
|
||||
'Why did the session die after the DAS storage failover?',
|
||||
'The session can die when DAS storage disconnects.',
|
||||
'Debug the session and DER certificate import failure.',
|
||||
]
|
||||
for text in examples:
|
||||
with self.subTest(text=text):
|
||||
self.assertEqual(_detect_title_language(text), '')
|
||||
|
||||
def test_title_language_detection_keeps_german_without_umlaut(self):
|
||||
"""German without umlauts still needs a language hint when evidence is specific."""
|
||||
from api.streaming import _detect_title_language
|
||||
|
||||
self.assertEqual(
|
||||
_detect_title_language('Warum werden hier die Bilder der alten Session nicht angezeigt?'),
|
||||
'de',
|
||||
)
|
||||
|
||||
def test_german_source_rejects_english_aux_title(self):
|
||||
"""Regression: an English aux title must not overwrite a German conversation."""
|
||||
from api.streaming import _generate_llm_session_title_via_aux
|
||||
|
||||
Reference in New Issue
Block a user