🐛 fix(memory): keep inline memory-context mentions visible

This commit is contained in:
墨綠BG
2026-05-01 20:29:22 +08:00
committed by kshitij
parent 956dd44625
commit 341c8d3030
2 changed files with 66 additions and 13 deletions
+40 -4
View File
@@ -91,10 +91,12 @@ class StreamingContextScrubber:
def __init__(self) -> None:
self._in_span: bool = False
self._buf: str = ""
self._at_block_boundary: bool = True
def reset(self) -> None:
self._in_span = False
self._buf = ""
self._at_block_boundary = True
def feed(self, text: str) -> str:
"""Return the visible portion of ``text`` after scrubbing.
@@ -121,19 +123,19 @@ class StreamingContextScrubber:
buf = buf[idx + len(self._CLOSE_TAG):]
self._in_span = False
else:
idx = buf.lower().find(self._OPEN_TAG)
idx = self._find_boundary_open_tag(buf)
if idx == -1:
# No open tag — hold back a potential partial open tag
held = self._max_partial_suffix(buf, self._OPEN_TAG)
if held:
out.append(buf[:-held])
self._append_visible(out, buf[:-held])
self._buf = buf[-held:]
else:
out.append(buf)
self._append_visible(out, buf)
return "".join(out)
# Emit text before the tag, enter span
if idx > 0:
out.append(buf[:idx])
self._append_visible(out, buf[:idx])
buf = buf[idx + len(self._OPEN_TAG):]
self._in_span = True
@@ -169,6 +171,40 @@ class StreamingContextScrubber:
return i
return 0
def _find_boundary_open_tag(self, buf: str) -> int:
"""Find an opening fence only when it starts a block-like span."""
buf_lower = buf.lower()
search_start = 0
while True:
idx = buf_lower.find(self._OPEN_TAG, search_start)
if idx == -1:
return -1
if self._is_block_boundary(buf, idx):
return idx
search_start = idx + 1
def _is_block_boundary(self, buf: str, idx: int) -> bool:
if idx == 0:
return self._at_block_boundary
preceding = buf[:idx]
last_newline = preceding.rfind("\n")
if last_newline == -1:
return self._at_block_boundary and preceding.strip() == ""
return preceding[last_newline + 1:].strip() == ""
def _append_visible(self, out: list[str], text: str) -> None:
if not text:
return
out.append(text)
self._update_block_boundary(text)
def _update_block_boundary(self, text: str) -> None:
last_newline = text.rfind("\n")
if last_newline != -1:
self._at_block_boundary = text[last_newline + 1:].strip() == ""
else:
self._at_block_boundary = self._at_block_boundary and text.strip() == ""
def build_memory_context_block(raw_context: str) -> str:
"""Wrap prefetched memory in a fenced block with system note."""
+26 -9
View File
@@ -37,13 +37,13 @@ class TestStreamingContextScrubberBasics:
"""The real streaming case: tag pair split across deltas."""
s = StreamingContextScrubber()
deltas = [
"Hello ",
"Hello\n",
"<memory-context>\npayload ",
"more payload\n",
"</memory-context> world",
]
out = "".join(s.feed(d) for d in deltas) + s.flush()
assert out == "Hello world"
assert out == "Hello\n world"
assert "payload" not in out
def test_realistic_fragmented_chunks_strip_memory_payload(self):
@@ -72,22 +72,22 @@ class TestStreamingContextScrubberBasics:
"""The open tag itself arriving in two fragments."""
s = StreamingContextScrubber()
out = (
s.feed("pre <memory")
s.feed("pre \n<memory")
+ s.feed("-context>leak</memory-context> post")
+ s.flush()
)
assert out == "pre post"
assert out == "pre \n post"
assert "leak" not in out
def test_close_tag_split_across_two_deltas(self):
"""The close tag arriving in two fragments."""
s = StreamingContextScrubber()
out = (
s.feed("pre <memory-context>leak</memory")
s.feed("pre \n<memory-context>leak</memory")
+ s.feed("-context> post")
+ s.flush()
)
assert out == "pre post"
assert out == "pre \n post"
assert "leak" not in out
@@ -105,13 +105,30 @@ class TestStreamingContextScrubberPartialTagFalsePositives:
out = s.feed("price < ") + s.feed("10 dollars") + s.flush()
assert out == "price < 10 dollars"
def test_inline_memory_context_tag_mention_is_not_scrubbed(self):
"""A prose mention of the fence tag must not swallow the answer."""
s = StreamingContextScrubber()
out = (
s.feed("In that previous `<memory")
+ s.feed("-context>` block, ")
+ s.feed("there was no matching fact.")
+ s.flush()
)
assert out == "In that previous `<memory-context>` block, there was no matching fact."
def test_mid_sentence_memory_context_pair_is_not_scrubbed(self):
"""Only block-like memory-context spans are treated as leaked context."""
s = StreamingContextScrubber()
out = s.feed("The <memory-context> tag name is documented here.") + s.flush()
assert out == "The <memory-context> tag name is documented here."
class TestStreamingContextScrubberUnterminatedSpan:
def test_unterminated_span_drops_payload(self):
"""Provider drops close tag — better to lose output than to leak."""
s = StreamingContextScrubber()
out = s.feed("pre <memory-context>secret never closed") + s.flush()
assert out == "pre "
out = s.feed("pre \n<memory-context>secret never closed") + s.flush()
assert out == "pre \n"
assert "secret" not in out
def test_reset_clears_hung_span(self):
@@ -171,7 +188,7 @@ class TestStreamingContextScrubberCrossTurn:
def test_reset_clears_in_span_state(self):
s = StreamingContextScrubber()
s.feed("text<memory-context>secret-tail")
s.feed("text\n<memory-context>secret-tail")
# Mid-span state held — without reset, subsequent text would be
# discarded until we see </memory-context>.
s.reset()