Filter low-value CLI agent sessions

This commit is contained in:
Frank Song
2026-05-04 09:44:11 +08:00
committed by test
parent e23ba59df2
commit 79d0762d8c
6 changed files with 228 additions and 10 deletions
-2
View File
@@ -182,7 +182,6 @@ This was a large stack of work. Massive thanks to **@ai-ag2026** for the full Ka
### Note on closed-as-superseded
PR #1656 (also @Michaelyklam) was closed as superseded by #1657. Both target #1458 Bug #3, both add accept-loop heartbeat + `/health?deep=1` + 503-on-degraded. #1657 adds beyond #1656: state.db connectivity check, projects state check, FD soft-limit raise, and `docs/supervisor.md` watchdog recipe. Same author iterated; the second PR was the keeper.
## [v0.50.296] — 2026-05-04
### Fixed (3 PRs — closes #1406, #1617; refs #1362)
@@ -448,7 +447,6 @@ Two stale source-string assertions were broken by #1591's compact() and messages
- **Auto-fix on #1464:** ternary inversion + regression test, with `Co-authored-by: Josh Jameson` preserved.
- **Auto-fix on stage:** widened source-string anchors in two pre-existing brittle tests broken by #1591's structural changes.
## [v0.50.289] — 2026-05-03
### Fixed (1 PR — TCP keepalive on accepted connections — closes #1580)
+114
View File
@@ -14,6 +14,9 @@ MESSAGING_SOURCES = {
'weixin',
}
CLI_MIN_UNTITLED_MESSAGE_COUNT = 6
CLI_MIN_UNTITLED_USER_MESSAGE_COUNT = 2
SOURCE_LABELS = {
'api_server': 'API',
'cli': 'CLI',
@@ -71,6 +74,115 @@ def _optional_col(name: str, columns: set[str], fallback: str = "NULL") -> str:
return f"s.{name}" if name in columns else f"{fallback} AS {name}"
def _safe_lower(value) -> str:
return str(value or "").strip().lower()
def _normalize_source_name(value: object) -> str:
source = _safe_lower(value)
if not source:
return ""
if source.endswith(" session"):
source = source[:-len(" session")].strip()
return source
def _looks_like_default_cli_title(row: dict) -> bool:
"""Return True when a CLI row looks like framework-generated metadata."""
title = _safe_lower(row.get("title"))
if not title or title == "untitled":
return True
if title in {"cli", "cli session"}:
return True
source_candidates = {
_normalize_source_name(row.get("source")),
_normalize_source_name(row.get("session_source")),
_normalize_source_name(row.get("source_tag")),
_normalize_source_name(row.get("raw_source")),
_normalize_source_name(row.get("source_label")),
}
source_candidates.discard("")
source_candidates.add("cli")
return any(title == f"{candidate} session" for candidate in source_candidates)
def _as_positive_int(value) -> int:
try:
return max(0, int(float(value)))
except (TypeError, ValueError):
return 0
def _count_user_turns(row: dict) -> int:
user_turns = row.get("actual_user_message_count")
if user_turns is None:
user_turns = row.get("user_message_count")
if user_turns is None:
messages = row.get("messages") or []
if isinstance(messages, list):
return sum(
1
for msg in messages
if _safe_lower(msg.get("role") if isinstance(msg, dict) else msg) == "user"
)
return 0
return _as_positive_int(user_turns)
def _has_cli_lineage(row: dict) -> bool:
segment_count = _as_positive_int(row.get("_compression_segment_count"))
return segment_count > 1 or bool(row.get("_lineage_root_id"))
def is_cli_session_row(row: dict) -> bool:
"""Return True for rows that should be treated as CLI-imported sessions."""
if not isinstance(row, dict):
return False
source = _safe_lower(row.get("session_source"))
if source == "messaging":
return False
if source == "cli":
return True
source_tag = _safe_lower(row.get("source_tag"))
raw_source = _safe_lower(row.get("raw_source"))
source_name = _safe_lower(row.get("source"))
source_label = _safe_lower(row.get("source_label"))
if source_tag == "cli" or raw_source == "cli" or source_name == "cli" or source_label == "cli":
return True
# Legacy imported CLI rows may only be marked as CLI in sidebar metadata.
# Keep this conservative to avoid treating messaging sessions as CLI.
return bool(
row.get("is_cli_session")
and source not in MESSAGING_SOURCES
and source_tag not in MESSAGING_SOURCES
and raw_source not in MESSAGING_SOURCES
and source_name not in MESSAGING_SOURCES
and _looks_like_default_cli_title(row)
)
def is_cli_session_row_visible(row: dict) -> bool:
"""Return whether a CLI-related row should remain visible in the sidebar."""
if not isinstance(row, dict):
return False
if not is_cli_session_row(row):
return True
message_count = _as_positive_int(row.get("actual_message_count") or row.get("message_count"))
if message_count <= 0:
return False
if _has_cli_lineage(row):
return True
if not _looks_like_default_cli_title(row):
return True
return _count_user_turns(row) >= CLI_MIN_UNTITLED_USER_MESSAGE_COUNT
def _is_continuation_session(parent: dict | None, child: dict | None) -> bool:
"""Return True when ``child`` is the next segment of the same conversation.
@@ -301,6 +413,7 @@ def read_importable_agent_session_rows(
{ended_expr},
{end_reason_expr},
COUNT(m.id) AS actual_message_count,
COUNT(CASE WHEN LOWER(m.role) = 'user' THEN 1 END) AS actual_user_message_count,
MAX(m.timestamp) AS last_activity
FROM sessions s
LEFT JOIN messages m ON m.session_id = s.id
@@ -312,6 +425,7 @@ def read_importable_agent_session_rows(
)
projected = _project_agent_session_rows([dict(row) for row in cur.fetchall()])
projected = [_with_normalized_source(row) for row in projected]
projected = [row for row in projected if is_cli_session_row_visible(row)]
if limit is None:
return projected
return projected[:max(0, int(limit))]
+16 -1
View File
@@ -21,6 +21,7 @@ from api.workspace import get_last_workspace
from api.agent_sessions import read_importable_agent_session_rows, read_session_lineage_metadata
logger = logging.getLogger(__name__)
CLI_VISIBLE_SESSION_LIMIT = 20
# ---------------------------------------------------------------------------
# Stale temp-file cleanup
@@ -537,6 +538,11 @@ class Session:
last_message_at = _last_message_timestamp(self.messages) or self.updated_at
if has_pending_user_message and self.pending_started_at:
last_message_at = self.pending_started_at
def _role(message):
if not isinstance(message, dict):
return ""
return str(message.get('role', '')).strip().lower()
return {
'session_id': self.session_id,
'title': self.title,
@@ -554,6 +560,9 @@ class Session:
'input_tokens': self.input_tokens,
'output_tokens': self.output_tokens,
'estimated_cost': self.estimated_cost,
'user_message_count': sum(
1 for message in self.messages if _role(message) == 'user'
) if isinstance(self.messages, list) else 0,
'personality': self.personality,
'compression_anchor_visible_idx': self.compression_anchor_visible_idx,
'compression_anchor_message_key': self.compression_anchor_message_key,
@@ -1507,7 +1516,12 @@ def get_cli_sessions() -> list:
return _cron_pid_cache[0]
try:
for row in read_importable_agent_session_rows(db_path, limit=200, log=logger, exclude_sources=None):
for row in read_importable_agent_session_rows(
db_path,
limit=CLI_VISIBLE_SESSION_LIMIT,
log=logger,
exclude_sources=None,
):
sid = row['id']
raw_ts = row['last_activity'] or row['started_at']
# Prefer the CLI session's own profile from the DB; fall back to
@@ -1573,6 +1587,7 @@ def get_cli_sessions() -> list:
'_parent_lineage_root_id': row.get('_parent_lineage_root_id'),
'end_reason': row.get('end_reason'),
'actual_message_count': row.get('actual_message_count'),
'user_message_count': row.get('actual_user_message_count'),
'_lineage_root_id': row.get('_lineage_root_id'),
'_lineage_tip_id': row.get('_lineage_tip_id'),
'_compression_segment_count': row.get('_compression_segment_count'),
+52 -5
View File
@@ -22,7 +22,11 @@ import re
from pathlib import Path
from contextlib import closing
from urllib.parse import parse_qs
from api.agent_sessions import MESSAGING_SOURCES
from api.agent_sessions import (
MESSAGING_SOURCES,
is_cli_session_row,
is_cli_session_row_visible,
)
logger = logging.getLogger(__name__)
@@ -1185,6 +1189,44 @@ def _session_sort_timestamp(session: dict) -> float:
) or 0.0
def _is_cli_session_for_settings(session: dict) -> bool:
"""Return True for importable CLI sessions that are safe to classify for settings."""
if not isinstance(session, dict):
return False
if is_cli_session_row(session):
return True
# Fallback for legacy local copies that had weak/empty metadata:
# keep this conservative so messaging sessions do not collapse incorrectly.
if not session.get("is_cli_session"):
return False
source = str(session.get("source") or "").strip().lower()
if source in MESSAGING_SOURCES:
return False
title = str(session.get("title") or "").strip().lower()
return title in ("", "untitled", "cli", "cli session") or title.endswith(" session") and (
not source or source == "cli"
)
CLI_VISIBLE_SESSION_CAP = 20
def _cap_recent_cli_sessions(sessions: list[dict], cli_cap: int = CLI_VISIBLE_SESSION_CAP) -> list[dict]:
"""Keep only the most recent CLI-visible sessions after filtering."""
if cli_cap <= 0:
return sessions
kept = []
cli_seen = 0
for session in sessions:
if _is_cli_session_for_settings(session):
cli_seen += 1
if cli_seen > cli_cap:
continue
kept.append(session)
return kept
def _merge_cli_sidebar_metadata(ui_session: dict, cli_meta: dict) -> dict:
"""Merge source-of-truth CLI metadata into a sidebar session row.
@@ -2431,7 +2473,8 @@ def handle_get(handler, parsed) -> bool:
if parsed.path == "/api/sessions":
webui_sessions = all_sessions()
settings = load_settings()
if settings.get("show_cli_sessions"):
show_cli_sessions = bool(settings.get("show_cli_sessions"))
if show_cli_sessions:
cli = get_cli_sessions()
cli_by_id = {s["session_id"]: s for s in cli}
for s in webui_sessions:
@@ -2446,12 +2489,14 @@ def handle_get(handler, parsed) -> bool:
for key in ("source_tag", "raw_source", "session_source", "source_label"):
if not s.get(key) and meta.get(key):
s[key] = meta[key]
# Apply the same CLI visibility semantics to imported local copies so
# low-value imported artifacts do not leak into the sidebar.
webui_sessions = [s for s in webui_sessions if is_cli_session_row_visible(s)]
webui_ids = {s["session_id"] for s in webui_sessions}
from api.models import _hide_from_default_sidebar as _cron_hide
deduped_cli = [s for s in cli
if s["session_id"] not in webui_ids
and not _cron_hide(s)]
deduped_cli = [s for s in cli if s["session_id"] not in webui_ids and is_cli_session_row_visible(s) and not _cron_hide(s)]
else:
webui_sessions = [s for s in webui_sessions if not _is_cli_session_for_settings(s)]
deduped_cli = []
merged = webui_sessions + deduped_cli
merged.sort(
@@ -2483,6 +2528,8 @@ def handle_get(handler, parsed) -> bool:
if _profiles_match(s.get("profile"), active_profile)]
other_profile_count = len(merged) - len(scoped)
scoped = _keep_latest_messaging_session_per_source(scoped)
if show_cli_sessions:
scoped = _cap_recent_cli_sessions(scoped, cli_cap=CLI_VISIBLE_SESSION_CAP)
safe_merged = []
for s in scoped:
item = dict(s)
+22 -2
View File
@@ -584,6 +584,24 @@ function _sourceKeyForSession(session) {
return (session && (session.raw_source || session.source_tag || session.source || '') || '').toLowerCase();
}
function _isCliSession(session) {
if (!session) return false;
// session_source is set by upstream normalization for CLI sessions as 'cli'
if (session.session_source === 'cli') return true;
// Legacy payloads often use raw/source tags to convey the source.
const raw = (
session.raw_source
|| session.source_tag
|| session.source
|| session.source_label
|| ''
).toLowerCase();
if (raw === 'cli') return true;
// If messaging-like, don't classify as legacy CLI even when is_cli_session is true.
if (_isMessagingSession(session)) return false;
return session.is_cli_session === true;
}
function _normalizeMessageForCliImportComparison(message) {
if (!message || typeof message !== 'object') return message;
const clone = { ...message };
@@ -1281,6 +1299,8 @@ function _openSessionActionMenu(session, anchorEl){
}
closeSessionActionMenu();
const isMessagingSession = _isMessagingSession(session);
const isCliSession = _isCliSession(session);
const isExternalSession = isMessagingSession || isCliSession;
const menu=document.createElement('div');
menu.className='session-action-menu open';
menu.appendChild(_buildSessionAction(
@@ -1323,7 +1343,7 @@ function _openSessionActionMenu(session, anchorEl){
}catch(err){showToast(t('session_archive_failed')+err.message);}
}
));
if(!isMessagingSession){
if(!isExternalSession){
_appendSessionDuplicateAction(menu, session);
}
if(session.active_stream_id){
@@ -1338,7 +1358,7 @@ function _openSessionActionMenu(session, anchorEl){
}
));
}
if(!isMessagingSession){
if(!isExternalSession){
menu.appendChild(_buildSessionAction(
t('session_delete'),
t('session_delete_desc'),
+24
View File
@@ -52,3 +52,27 @@ class TestSidebarCancelAction:
)
assert "hideClarifyCard(true" in body
assert "hideApprovalCard(true" in body
def test_cli_session_helper_identifies_cli_origin(self):
"""CLI sessions should be treated as external-only for destructive action gating."""
body = _function_body(SESSIONS_JS, "_isCliSession", 900)
assert "function _isCliSession(session) {" in body
assert "session.session_source === 'cli'" in body
assert "session.raw_source" in body
assert "session.source_tag" in body
assert "session.source" in body
assert "session.source_label" in body
assert "if (_isMessagingSession(session)) return false;" in body
assert "return session.is_cli_session === true;" in body
def test_cli_sessions_hide_duplicate_and_delete_in_action_menu(self):
"""Session action menu should hide duplicate/delete for CLI-origin sessions."""
body = _function_body(SESSIONS_JS, "_openSessionActionMenu", 3600)
assert "const isCliSession = _isCliSession(session);" in body
assert "const isExternalSession = isMessagingSession || isCliSession;" in body
assert "if(!isExternalSession)" in body
# duplicate/delete should both be gated by the same external-session check
first = body.find("_appendSessionDuplicateAction")
second = body.find("t('session_delete')")
assert first > 0 and second > 0, "menu actions should still include duplicate/delete nodes"
assert first < second, "duplicate action should render before delete action"