diff --git a/run_agent.py b/run_agent.py index 8908562b38..ccd851f766 100644 --- a/run_agent.py +++ b/run_agent.py @@ -10295,6 +10295,7 @@ class AIAgent: limit=function_args.get("limit", 3), db=session_db, current_session_id=self.session_id, + mode=function_args.get("mode", "fast"), ) elif function_name == "memory": target = function_args.get("target", "memory") @@ -10921,6 +10922,7 @@ class AIAgent: limit=function_args.get("limit", 3), db=session_db, current_session_id=self.session_id, + mode=function_args.get("mode", "fast"), ) tool_duration = time.time() - tool_start_time if self._should_emit_quiet_tool_messages(): diff --git a/tests/tools/test_session_search.py b/tests/tools/test_session_search.py index 8e67f23034..dec1457933 100644 --- a/tests/tools/test_session_search.py +++ b/tests/tools/test_session_search.py @@ -234,9 +234,10 @@ class TestSessionSearchConcurrency: {"role": "assistant", "content": "response"}, ] - result = json.loads(session_search(query="message", db=mock_db, limit=3)) + result = json.loads(session_search(query="message", db=mock_db, limit=3, mode="summary")) assert result["success"] is True + assert result["mode"] == "summary" assert result["count"] == 3 assert max_seen["value"] == 1 @@ -400,6 +401,165 @@ class TestSessionSearch: assert result["sessions_searched"] == 1 assert current_sid not in [r.get("session_id") for r in result.get("results", [])] + def test_default_search_returns_fast_hits_without_llm_or_full_session_load(self, monkeypatch): + """Default keyword search should stay on the DB/snippet path and avoid LLM latency.""" + from unittest.mock import MagicMock + from tools.session_search_tool import session_search + + async def fail_summarize(*_args, **_kwargs): + raise AssertionError("default session_search must not call the summarizer") + + monkeypatch.setattr("tools.session_search_tool._summarize_session", fail_summarize) + + mock_db = MagicMock() + mock_db.search_messages.return_value = [ + { + "id": 123, + "session_id": "other_sid", + "role": "user", + "snippet": "we discussed >>>session_search<<< latency", + "context": [ + {"role": "user", "content": "session_search is slow"}, + {"role": "assistant", "content": "the LLM summary is the bottleneck"}, + ], + "source": "cli", + "session_started": 1709400000, + "model": "test-model", + }, + ] + mock_db.get_session.return_value = {"parent_session_id": None, "title": "Latency debug"} + + result = json.loads(session_search(query="session_search", db=mock_db)) + + assert result["success"] is True + assert result["mode"] == "fast" + assert result["count"] == 1 + entry = result["results"][0] + assert entry["summary"] == "[Search hit — summary not generated in fast mode] Use snippet/context fields, or set mode='summary' for LLM-generated recall." + assert "we discussed" not in entry["summary"] + assert entry["model"] == "test-model" + assert entry["snippet"] == "we discussed >>>session_search<<< latency" + assert entry["context"][1]["content"] == "the LLM summary is the bottleneck" + mock_db.get_messages_as_conversation.assert_not_called() + + @pytest.mark.parametrize("mode", ["summarized", "summarise", "summarize", "deep"]) + def test_summary_mode_aliases_use_llm_summarization_path(self, monkeypatch, mode): + """Common natural-language mode aliases should map to summary mode.""" + from unittest.mock import MagicMock + from tools.session_search_tool import session_search + + async def fake_summarize(_text, _query, _meta): + return "alias summary" + + monkeypatch.setattr("tools.session_search_tool._summarize_session", fake_summarize) + monkeypatch.setattr("model_tools._run_async", lambda coro: asyncio.run(coro)) + + mock_db = MagicMock() + mock_db.search_messages.return_value = [{"session_id": "sid", "source": "cli"}] + mock_db.get_session.return_value = {"parent_session_id": None, "source": "cli"} + mock_db.get_messages_as_conversation.return_value = [ + {"role": "user", "content": "full transcript"}, + ] + + result = json.loads(session_search(query="session_search", db=mock_db, mode=mode)) + + assert result["success"] is True + assert result["mode"] == "summary" + assert result["results"][0]["summary"] == "alias summary" + + @pytest.mark.parametrize("mode", ["", "unknown", 42, True, None]) + def test_invalid_or_empty_mode_falls_back_to_fast_without_llm(self, monkeypatch, mode): + """Loose tool-call args should degrade to fast mode rather than crashing.""" + from unittest.mock import MagicMock + from tools.session_search_tool import session_search + + async def fail_summarize(*_args, **_kwargs): + raise AssertionError("invalid modes should fall back to fast mode") + + monkeypatch.setattr("tools.session_search_tool._summarize_session", fail_summarize) + + mock_db = MagicMock() + mock_db.search_messages.return_value = [ + {"session_id": "sid", "snippet": "hit", "context": "not-a-list", "source": "cli"}, + ] + mock_db.get_session.return_value = {"parent_session_id": None} + + result = json.loads(session_search(query="session_search", db=mock_db, mode=mode)) + + assert result["success"] is True + assert result["mode"] == "fast" + assert result["results"][0]["context"] == [] + assert result["results"][0]["model"] == "unknown" + mock_db.get_messages_as_conversation.assert_not_called() + + def test_fast_mode_tolerates_session_metadata_lookup_failure(self): + """Fast mode should still return the FTS hit when parent metadata is unavailable.""" + from unittest.mock import MagicMock + from tools.session_search_tool import session_search + + mock_db = MagicMock() + mock_db.search_messages.return_value = [ + {"session_id": "sid", "snippet": "hit", "source": "cli", "model": None}, + ] + mock_db.get_session.side_effect = RuntimeError("metadata unavailable") + + result = json.loads(session_search(query="session_search", db=mock_db)) + + assert result["success"] is True + assert result["results"][0]["source"] == "cli" + assert result["results"][0]["model"] == "unknown" + + def test_summary_mode_preserves_llm_summarization_path(self, monkeypatch): + """Explicit summary mode keeps the previous behavior for deeper recall.""" + from unittest.mock import MagicMock + from tools.session_search_tool import session_search + + async def fake_summarize(text, query, meta): + assert "full transcript" in text + assert query == "session_search" + assert meta["source"] == "cli" + return "focused session summary" + + monkeypatch.setattr("tools.session_search_tool._summarize_session", fake_summarize) + monkeypatch.setattr("model_tools._run_async", lambda coro: asyncio.run(coro)) + + mock_db = MagicMock() + mock_db.search_messages.return_value = [ + {"session_id": "other_sid", "source": "cli", "session_started": 1709400000, "model": "test-model"}, + ] + mock_db.get_session.return_value = {"parent_session_id": None, "source": "cli", "started_at": 1709400000} + mock_db.get_messages_as_conversation.return_value = [ + {"role": "user", "content": "full transcript about session_search"}, + ] + + result = json.loads(session_search(query="session_search", db=mock_db, mode="summary")) + + assert result["success"] is True + assert result["mode"] == "summary" + assert result["results"][0]["summary"] == "focused session summary" + mock_db.get_messages_as_conversation.assert_called_once_with("other_sid") + + def test_positional_db_argument_remains_backwards_compatible(self): + """Keep the historical positional order: query, role_filter, limit, db, current_session_id.""" + from unittest.mock import MagicMock + from tools.session_search_tool import session_search + + mock_db = MagicMock() + mock_db.search_messages.return_value = [] + + result = json.loads(session_search("session_search", None, 3, mock_db, None)) + + assert result["success"] is True + assert result["mode"] == "fast" + mock_db.search_messages.assert_called_once() + + def test_run_agent_special_session_search_paths_forward_mode(self): + """run_agent has two direct session_search call sites outside registry dispatch.""" + from pathlib import Path + + source = (Path(__file__).parent.parent.parent / "run_agent.py").read_text() + assert source.count('mode=function_args.get("mode", "fast")') == 2 + def test_current_child_session_excludes_parent_lineage(self): """Compression/delegation parents should be excluded for the active child session.""" from unittest.mock import MagicMock diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py index e73cce6bbd..35b7191fb4 100644 --- a/tools/session_search_tool.py +++ b/tools/session_search_tool.py @@ -2,19 +2,16 @@ """ Session Search Tool - Long-Term Conversation Recall -Searches past session transcripts in SQLite via FTS5, then summarizes the top -matching sessions using the configured auxiliary session_search model (same -pattern as web_extract). By default, auxiliary "auto" routing uses the main -chat provider/model unless the user overrides auxiliary.session_search. -Returns focused summaries of past conversations rather than raw transcripts, -keeping the main model's context window clean. +Searches past session transcripts in SQLite via FTS5. Keyword search defaults +to fast snippet/context hits without any LLM call; callers can opt into focused +LLM summaries with mode="summary" when deeper recall is worth the latency. Flow: 1. FTS5 search finds matching messages ranked by relevance 2. Groups by session, takes the top N unique sessions (default 3) - 3. Loads each session's conversation, truncates to ~100k chars centered on matches - 4. Sends to the configured auxiliary model with a focused summarization prompt - 5. Returns per-session summaries with metadata + 3. Fast mode returns snippets and nearby context immediately + 4. Summary mode loads each session, truncates around matches, and calls an LLM + 5. Returns per-session hits/summaries with metadata """ import asyncio @@ -328,13 +325,11 @@ def session_search( limit: int = 3, db=None, current_session_id: str = None, + mode: str = "fast", ) -> str: """ - Search past sessions and return focused summaries of matching conversations. - - Uses FTS5 to find matches, then summarizes the top sessions with the - configured auxiliary session_search model. - The current session is excluded from results since the agent already has that context. + Search past sessions. Fast mode returns FTS snippets without LLM calls; + summary mode preserves the previous focused summarization behavior. """ if db is None: try: @@ -346,6 +341,12 @@ def session_search( from hermes_state import format_session_db_unavailable return tool_error(format_session_db_unavailable(), success=False) + mode = (mode or "fast").strip().lower() if isinstance(mode, str) else "fast" + if mode in ("summarized", "summarise", "summarize", "deep"): + mode = "summary" + if mode not in ("fast", "summary"): + mode = "fast" + # Defensive: models (especially open-source) may send non-int limit values # (None when JSON null, string "int", or even a type object). Coerce to a # safe integer before any arithmetic/comparison to prevent TypeError. @@ -381,6 +382,7 @@ def session_search( if not raw_results: return json.dumps({ "success": True, + "mode": mode, "query": query, "results": [], "count": 0, @@ -438,6 +440,41 @@ def session_search( if len(seen_sessions) >= limit: break + if mode == "fast": + results = [] + for session_id, match_info in seen_sessions.items(): + try: + session_meta = db.get_session(session_id) or {} + except Exception: + session_meta = {} + snippet = match_info.get("snippet") or "" + context = match_info.get("context") or [] + if not isinstance(context, list): + context = [] + results.append({ + "session_id": session_id, + "when": _format_timestamp( + session_meta.get("started_at") or match_info.get("session_started") + ), + "source": session_meta.get("source") or match_info.get("source", "unknown"), + "model": session_meta.get("model") or match_info.get("model") or "unknown", + "matched_role": match_info.get("role"), + "title": session_meta.get("title") or None, + "snippet": snippet, + "context": context, + "summary": "[Search hit — summary not generated in fast mode] Use snippet/context fields, or set mode='summary' for LLM-generated recall.", + }) + + return json.dumps({ + "success": True, + "mode": "fast", + "query": query, + "results": results, + "count": len(results), + "sessions_searched": len(seen_sessions), + "message": "Fast search returned FTS snippets without LLM summarization. Use mode='summary' for focused summaries when needed.", + }, ensure_ascii=False) + # Prepare all sessions for parallel summarization tasks = [] for session_id, match_info in seen_sessions.items(): @@ -527,6 +564,7 @@ def session_search( return json.dumps({ "success": True, + "mode": "summary", "query": query, "results": summaries, "count": len(summaries), @@ -539,7 +577,7 @@ def session_search( def check_session_search_requirements() -> bool: - """Requires SQLite state database and an auxiliary text model.""" + """Requires SQLite state database; summary mode also needs an auxiliary model.""" try: from hermes_state import DEFAULT_DB_PATH return DEFAULT_DB_PATH.parent.exists() @@ -551,13 +589,14 @@ SESSION_SEARCH_SCHEMA = { "name": "session_search", "description": ( "Search your long-term memory of past conversations, or browse recent sessions. This is your recall -- " - "every past session is searchable, and this tool summarizes what happened.\n\n" + "every past session is searchable. Keyword search defaults to fast FTS snippets with no LLM call.\n\n" "TWO MODES:\n" "1. Recent sessions (no query): Call with no arguments to see what was worked on recently. " "Returns titles, previews, and timestamps. Zero LLM cost, instant. " "Start here when the user asks what were we working on or what did we do recently.\n" "2. Keyword search (with query): Search for specific topics across all past sessions. " - "Returns LLM-generated summaries of matching sessions.\n\n" + "Defaults to mode='fast', returning snippets and nearby context instantly without LLM summarization. " + "Use mode='summary' only when a focused LLM-generated recap is worth the latency.\n\n" "USE THIS PROACTIVELY when:\n" "- The user says 'we did this before', 'remember when', 'last time', 'as I mentioned'\n" "- The user asks about a topic you worked on before but don't have in current context\n" @@ -570,7 +609,7 @@ SESSION_SEARCH_SCHEMA = { "phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). " "IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses " "sessions that only mention some terms. If a broad OR query returns nothing, try individual " - "keyword searches in parallel. Returns summaries of the top matching sessions." + "keyword searches in parallel. Returns fast search hits by default." ), "parameters": { "type": "object", @@ -585,9 +624,15 @@ SESSION_SEARCH_SCHEMA = { }, "limit": { "type": "integer", - "description": "Max sessions to summarize (default: 3, max: 5).", + "description": "Max sessions to return (default: 3, max: 5).", "default": 3, }, + "mode": { + "type": "string", + "enum": ["fast", "summary"], + "description": "fast (default) returns FTS snippets + surrounding context without LLM calls (~0.02s). Start here for most recall needs. summary loads the full session transcript and runs the LLM summarizer (~10-30s). Use summary only when the fast results do not give enough context to answer the user's question, or when the user explicitly asks for a 'summary' or 'recap' of past conversations. You can call twice: first fast, then summary if more detail is needed.", + "default": "fast", + }, }, "required": [], }, @@ -605,6 +650,7 @@ registry.register( query=args.get("query") or "", role_filter=args.get("role_filter"), limit=args.get("limit", 3), + mode=args.get("mode", "fast"), db=kw.get("db"), current_session_id=kw.get("current_session_id")), check_fn=check_session_search_requirements,