mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-21 03:39:54 +00:00
🐛 fix(memory): keep inline memory-context mentions visible
This commit is contained in:
+40
-4
@@ -91,10 +91,12 @@ class StreamingContextScrubber:
|
||||
def __init__(self) -> None:
|
||||
self._in_span: bool = False
|
||||
self._buf: str = ""
|
||||
self._at_block_boundary: bool = True
|
||||
|
||||
def reset(self) -> None:
|
||||
self._in_span = False
|
||||
self._buf = ""
|
||||
self._at_block_boundary = True
|
||||
|
||||
def feed(self, text: str) -> str:
|
||||
"""Return the visible portion of ``text`` after scrubbing.
|
||||
@@ -121,19 +123,19 @@ class StreamingContextScrubber:
|
||||
buf = buf[idx + len(self._CLOSE_TAG):]
|
||||
self._in_span = False
|
||||
else:
|
||||
idx = buf.lower().find(self._OPEN_TAG)
|
||||
idx = self._find_boundary_open_tag(buf)
|
||||
if idx == -1:
|
||||
# No open tag — hold back a potential partial open tag
|
||||
held = self._max_partial_suffix(buf, self._OPEN_TAG)
|
||||
if held:
|
||||
out.append(buf[:-held])
|
||||
self._append_visible(out, buf[:-held])
|
||||
self._buf = buf[-held:]
|
||||
else:
|
||||
out.append(buf)
|
||||
self._append_visible(out, buf)
|
||||
return "".join(out)
|
||||
# Emit text before the tag, enter span
|
||||
if idx > 0:
|
||||
out.append(buf[:idx])
|
||||
self._append_visible(out, buf[:idx])
|
||||
buf = buf[idx + len(self._OPEN_TAG):]
|
||||
self._in_span = True
|
||||
|
||||
@@ -169,6 +171,40 @@ class StreamingContextScrubber:
|
||||
return i
|
||||
return 0
|
||||
|
||||
def _find_boundary_open_tag(self, buf: str) -> int:
|
||||
"""Find an opening fence only when it starts a block-like span."""
|
||||
buf_lower = buf.lower()
|
||||
search_start = 0
|
||||
while True:
|
||||
idx = buf_lower.find(self._OPEN_TAG, search_start)
|
||||
if idx == -1:
|
||||
return -1
|
||||
if self._is_block_boundary(buf, idx):
|
||||
return idx
|
||||
search_start = idx + 1
|
||||
|
||||
def _is_block_boundary(self, buf: str, idx: int) -> bool:
|
||||
if idx == 0:
|
||||
return self._at_block_boundary
|
||||
preceding = buf[:idx]
|
||||
last_newline = preceding.rfind("\n")
|
||||
if last_newline == -1:
|
||||
return self._at_block_boundary and preceding.strip() == ""
|
||||
return preceding[last_newline + 1:].strip() == ""
|
||||
|
||||
def _append_visible(self, out: list[str], text: str) -> None:
|
||||
if not text:
|
||||
return
|
||||
out.append(text)
|
||||
self._update_block_boundary(text)
|
||||
|
||||
def _update_block_boundary(self, text: str) -> None:
|
||||
last_newline = text.rfind("\n")
|
||||
if last_newline != -1:
|
||||
self._at_block_boundary = text[last_newline + 1:].strip() == ""
|
||||
else:
|
||||
self._at_block_boundary = self._at_block_boundary and text.strip() == ""
|
||||
|
||||
|
||||
def build_memory_context_block(raw_context: str) -> str:
|
||||
"""Wrap prefetched memory in a fenced block with system note."""
|
||||
|
||||
@@ -37,13 +37,13 @@ class TestStreamingContextScrubberBasics:
|
||||
"""The real streaming case: tag pair split across deltas."""
|
||||
s = StreamingContextScrubber()
|
||||
deltas = [
|
||||
"Hello ",
|
||||
"Hello\n",
|
||||
"<memory-context>\npayload ",
|
||||
"more payload\n",
|
||||
"</memory-context> world",
|
||||
]
|
||||
out = "".join(s.feed(d) for d in deltas) + s.flush()
|
||||
assert out == "Hello world"
|
||||
assert out == "Hello\n world"
|
||||
assert "payload" not in out
|
||||
|
||||
def test_realistic_fragmented_chunks_strip_memory_payload(self):
|
||||
@@ -72,22 +72,22 @@ class TestStreamingContextScrubberBasics:
|
||||
"""The open tag itself arriving in two fragments."""
|
||||
s = StreamingContextScrubber()
|
||||
out = (
|
||||
s.feed("pre <memory")
|
||||
s.feed("pre \n<memory")
|
||||
+ s.feed("-context>leak</memory-context> post")
|
||||
+ s.flush()
|
||||
)
|
||||
assert out == "pre post"
|
||||
assert out == "pre \n post"
|
||||
assert "leak" not in out
|
||||
|
||||
def test_close_tag_split_across_two_deltas(self):
|
||||
"""The close tag arriving in two fragments."""
|
||||
s = StreamingContextScrubber()
|
||||
out = (
|
||||
s.feed("pre <memory-context>leak</memory")
|
||||
s.feed("pre \n<memory-context>leak</memory")
|
||||
+ s.feed("-context> post")
|
||||
+ s.flush()
|
||||
)
|
||||
assert out == "pre post"
|
||||
assert out == "pre \n post"
|
||||
assert "leak" not in out
|
||||
|
||||
|
||||
@@ -105,13 +105,30 @@ class TestStreamingContextScrubberPartialTagFalsePositives:
|
||||
out = s.feed("price < ") + s.feed("10 dollars") + s.flush()
|
||||
assert out == "price < 10 dollars"
|
||||
|
||||
def test_inline_memory_context_tag_mention_is_not_scrubbed(self):
|
||||
"""A prose mention of the fence tag must not swallow the answer."""
|
||||
s = StreamingContextScrubber()
|
||||
out = (
|
||||
s.feed("In that previous `<memory")
|
||||
+ s.feed("-context>` block, ")
|
||||
+ s.feed("there was no matching fact.")
|
||||
+ s.flush()
|
||||
)
|
||||
assert out == "In that previous `<memory-context>` block, there was no matching fact."
|
||||
|
||||
def test_mid_sentence_memory_context_pair_is_not_scrubbed(self):
|
||||
"""Only block-like memory-context spans are treated as leaked context."""
|
||||
s = StreamingContextScrubber()
|
||||
out = s.feed("The <memory-context> tag name is documented here.") + s.flush()
|
||||
assert out == "The <memory-context> tag name is documented here."
|
||||
|
||||
|
||||
class TestStreamingContextScrubberUnterminatedSpan:
|
||||
def test_unterminated_span_drops_payload(self):
|
||||
"""Provider drops close tag — better to lose output than to leak."""
|
||||
s = StreamingContextScrubber()
|
||||
out = s.feed("pre <memory-context>secret never closed") + s.flush()
|
||||
assert out == "pre "
|
||||
out = s.feed("pre \n<memory-context>secret never closed") + s.flush()
|
||||
assert out == "pre \n"
|
||||
assert "secret" not in out
|
||||
|
||||
def test_reset_clears_hung_span(self):
|
||||
@@ -171,7 +188,7 @@ class TestStreamingContextScrubberCrossTurn:
|
||||
|
||||
def test_reset_clears_in_span_state(self):
|
||||
s = StreamingContextScrubber()
|
||||
s.feed("text<memory-context>secret-tail")
|
||||
s.feed("text\n<memory-context>secret-tail")
|
||||
# Mid-span state held — without reset, subsequent text would be
|
||||
# discarded until we see </memory-context>.
|
||||
s.reset()
|
||||
|
||||
Reference in New Issue
Block a user