Files
alt-glitch 23344a9a3c feat(lsp): plugin-based LSP diagnostics with zero core changes
Ship LSP semantic diagnostics as a bundled plugin (plugins/lsp/) using
existing hook system.  Zero lines of core code modified.

Plugin wiring:
- pre_tool_call: capture LSP baseline before write_file/patch
- transform_tool_result: inject diagnostics into tool result JSON
- on_session_start/on_session_end + atexit: lifecycle management

Key design:
- Baselines keyed by (session_id, abs_path) for concurrent safety
- Diagnostics added as 'lsp_diagnostics' JSON field (preserves shape)
- Per-file workspace detection (no static session-start gate)
- V4A multi-file patch skipped for MVP
- Short timeout (3s) — cold start degrades gracefully
- os.path.exists heuristic for Docker/SSH backend skip
- First relevant write with no server → INFO log with install hint

Tests: 77/77 pass including:
- Protocol framing, reporter formatting, workspace resolution
- Client E2E against mock LSP server (live_system_guard_bypass)
- Eventlog steady-state silence contract
- Backend-gate heuristic (local vs non-local paths)
- Full hook flow integration (pre→write→transform with diagnostics)

Source: PR #24168 by @teknium1, PR #24155 by @OutThisLife
Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com>
2026-05-12 13:01:13 +00:00

224 lines
7.6 KiB
Python

"""Workspace and project-root resolution for LSP.
Two concerns live here:
1. **Workspace gate** — the upper-level "is this directory a project?"
check. Hermes only runs LSP when the cwd (or the file being edited)
sits inside a git worktree. Files outside any git root never
trigger LSP, even if a server is configured. This keeps Telegram
gateway users on user-home cwd's from spawning daemons.
2. **NearestRoot** — the per-server project-root walk. Each language
server cares about a different marker (``pyproject.toml`` for
Python, ``Cargo.toml`` for Rust, ``go.mod`` for Go, etc.) and
wants the directory containing that marker. ``nearest_root()``
walks up from a starting path looking for any of a list of marker
files, optionally bailing if an exclude marker shows up first.
"""
from __future__ import annotations
import logging
import os
from pathlib import Path
from typing import Iterable, Optional, Tuple
logger = logging.getLogger("agent.lsp.workspace")
# Cache: cwd → (worktree_root, is_git) so repeated calls don't re-stat.
# Cleared on shutdown. Keyed by absolute resolved path so symlink
# folds collapse to one entry.
_workspace_cache: dict = {}
def normalize_path(path: str) -> str:
"""Normalize a path for use as a stable map key.
Resolves ``~``, makes absolute, and collapses ``.``/``..``. We do
NOT resolve symlinks here — symlink stability matters for some
LSP servers (rust-analyzer cares about Cargo workspace identity)
and we want the canonical path the user typed when possible.
"""
return os.path.abspath(os.path.expanduser(path))
def find_git_worktree(start: str) -> Optional[str]:
"""Walk up from ``start`` looking for a ``.git`` entry (file or dir).
Returns the directory containing ``.git``, or ``None`` if no git
root is found before hitting the filesystem root.
A ``.git`` *file* (not directory) means we're inside a git
worktree set up via ``git worktree add`` — both forms count.
"""
try:
start_path = Path(normalize_path(start))
if start_path.is_file():
start_path = start_path.parent
except (OSError, RuntimeError, ValueError):
# Pathological input (loop in symlinks, encoding error, etc.) —
# bail out rather than crash the lint hook.
return None
# Cache check
cached = _workspace_cache.get(str(start_path))
if cached is not None:
root, _is_git = cached
return root
cur = start_path
# Defensive cap: the deepest reasonable monorepo is well under 64
# levels. Caps the walk so a pathological cwd or a symlink cycle
# we somehow traverse can't keep us looping.
for _ in range(64):
git_marker = cur / ".git"
try:
if git_marker.exists():
resolved = str(cur)
_workspace_cache[str(start_path)] = (resolved, True)
return resolved
except OSError:
# Permission error on a parent dir — bail out cleanly.
break
parent = cur.parent
if parent == cur:
break
cur = parent
_workspace_cache[str(start_path)] = (None, False)
return None
def is_inside_workspace(path: str, workspace_root: str) -> bool:
"""Return True iff ``path`` is inside (or equal to) ``workspace_root``.
Uses absolute paths but does not resolve symlinks — a file accessed
via a symlink that points outside the workspace still counts as
outside. This is the conservative interpretation; matches LSP
behaviour where servers reject didOpen for unrelated files.
"""
p = normalize_path(path)
root = normalize_path(workspace_root)
if p == root:
return True
# Use os.path.commonpath to handle case-insensitive filesystems
# correctly on macOS/Windows.
try:
common = os.path.commonpath([p, root])
except ValueError:
# Different drives on Windows.
return False
return common == root
def nearest_root(
start: str,
markers: Iterable[str],
*,
excludes: Optional[Iterable[str]] = None,
ceiling: Optional[str] = None,
) -> Optional[str]:
"""Walk up from ``start`` looking for any of the given marker files.
Returns the **directory containing** the first matched marker, or
``None`` if no marker is found before hitting ``ceiling`` (or the
filesystem root if no ceiling).
If ``excludes`` is provided and an exclude marker matches *first*
in the upward walk, returns ``None`` — the server is gated off
for that file. Mirrors OpenCode's NearestRoot exclude semantics
(e.g. typescript skips deno projects when ``deno.json`` is found
before ``package.json``).
"""
start_path = Path(normalize_path(start))
try:
if start_path.is_file():
start_path = start_path.parent
except (OSError, RuntimeError, ValueError):
return None
ceiling_path = Path(normalize_path(ceiling)) if ceiling else None
markers_list = list(markers)
excludes_list = list(excludes) if excludes else []
cur = start_path
# Defensive cap matching ``find_git_worktree``. Bounded walk
# protects against pathological inputs even though the
# parent-equality stop normally terminates within ~10 steps.
for _ in range(64):
# Check excludes first — if an exclude is found at this level,
# the server is gated off for this file.
for exc in excludes_list:
try:
if (cur / exc).exists():
return None
except OSError:
continue
# Then check markers.
for marker in markers_list:
try:
if (cur / marker).exists():
return str(cur)
except OSError:
continue
# Stop conditions.
if ceiling_path is not None and cur == ceiling_path:
return None
parent = cur.parent
if parent == cur:
return None
cur = parent
return None
def resolve_workspace_for_file(
file_path: str,
*,
cwd: Optional[str] = None,
) -> Tuple[Optional[str], bool]:
"""Resolve the workspace root for a file.
Returns ``(workspace_root, gated_in)`` where ``gated_in`` is True
iff LSP should run for this file at all. Currently the gate is
"file is inside a git worktree found by walking up from cwd OR
from the file itself".
The cwd path takes precedence — if the agent was launched in a
git project, that worktree is the workspace, and any edit inside
it (regardless of where the file lives) is in-scope. If the cwd
isn't in a git worktree, we try the file's own location as a
fallback.
Returns ``(None, False)`` when neither path is in a git worktree.
"""
cwd = cwd or os.getcwd()
cwd_root = find_git_worktree(cwd)
if cwd_root is not None:
if is_inside_workspace(file_path, cwd_root):
return cwd_root, True
# File is outside the cwd's worktree — try the file's own
# location as a secondary anchor. Useful for monorepos where
# the user opens an unrelated checkout.
file_root = find_git_worktree(file_path)
if file_root is not None:
return file_root, True
return None, False
def clear_cache() -> None:
"""Clear the workspace-resolution cache.
Called on service shutdown so a subsequent re-init doesn't pick
up stale results from a previous session.
"""
_workspace_cache.clear()
__all__ = [
"find_git_worktree",
"is_inside_workspace",
"nearest_root",
"normalize_path",
"resolve_workspace_for_file",
"clear_cache",
]