mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-21 03:39:54 +00:00
23344a9a3c
Ship LSP semantic diagnostics as a bundled plugin (plugins/lsp/) using existing hook system. Zero lines of core code modified. Plugin wiring: - pre_tool_call: capture LSP baseline before write_file/patch - transform_tool_result: inject diagnostics into tool result JSON - on_session_start/on_session_end + atexit: lifecycle management Key design: - Baselines keyed by (session_id, abs_path) for concurrent safety - Diagnostics added as 'lsp_diagnostics' JSON field (preserves shape) - Per-file workspace detection (no static session-start gate) - V4A multi-file patch skipped for MVP - Short timeout (3s) — cold start degrades gracefully - os.path.exists heuristic for Docker/SSH backend skip - First relevant write with no server → INFO log with install hint Tests: 77/77 pass including: - Protocol framing, reporter formatting, workspace resolution - Client E2E against mock LSP server (live_system_guard_bypass) - Eventlog steady-state silence contract - Backend-gate heuristic (local vs non-local paths) - Full hook flow integration (pre→write→transform with diagnostics) Source: PR #24168 by @teknium1, PR #24155 by @OutThisLife Co-authored-by: Teknium <127238744+teknium1@users.noreply.github.com>
224 lines
7.6 KiB
Python
224 lines
7.6 KiB
Python
"""Workspace and project-root resolution for LSP.
|
|
|
|
Two concerns live here:
|
|
|
|
1. **Workspace gate** — the upper-level "is this directory a project?"
|
|
check. Hermes only runs LSP when the cwd (or the file being edited)
|
|
sits inside a git worktree. Files outside any git root never
|
|
trigger LSP, even if a server is configured. This keeps Telegram
|
|
gateway users on user-home cwd's from spawning daemons.
|
|
|
|
2. **NearestRoot** — the per-server project-root walk. Each language
|
|
server cares about a different marker (``pyproject.toml`` for
|
|
Python, ``Cargo.toml`` for Rust, ``go.mod`` for Go, etc.) and
|
|
wants the directory containing that marker. ``nearest_root()``
|
|
walks up from a starting path looking for any of a list of marker
|
|
files, optionally bailing if an exclude marker shows up first.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Iterable, Optional, Tuple
|
|
|
|
logger = logging.getLogger("agent.lsp.workspace")
|
|
|
|
# Cache: cwd → (worktree_root, is_git) so repeated calls don't re-stat.
|
|
# Cleared on shutdown. Keyed by absolute resolved path so symlink
|
|
# folds collapse to one entry.
|
|
_workspace_cache: dict = {}
|
|
|
|
|
|
def normalize_path(path: str) -> str:
|
|
"""Normalize a path for use as a stable map key.
|
|
|
|
Resolves ``~``, makes absolute, and collapses ``.``/``..``. We do
|
|
NOT resolve symlinks here — symlink stability matters for some
|
|
LSP servers (rust-analyzer cares about Cargo workspace identity)
|
|
and we want the canonical path the user typed when possible.
|
|
"""
|
|
return os.path.abspath(os.path.expanduser(path))
|
|
|
|
|
|
def find_git_worktree(start: str) -> Optional[str]:
|
|
"""Walk up from ``start`` looking for a ``.git`` entry (file or dir).
|
|
|
|
Returns the directory containing ``.git``, or ``None`` if no git
|
|
root is found before hitting the filesystem root.
|
|
|
|
A ``.git`` *file* (not directory) means we're inside a git
|
|
worktree set up via ``git worktree add`` — both forms count.
|
|
"""
|
|
try:
|
|
start_path = Path(normalize_path(start))
|
|
if start_path.is_file():
|
|
start_path = start_path.parent
|
|
except (OSError, RuntimeError, ValueError):
|
|
# Pathological input (loop in symlinks, encoding error, etc.) —
|
|
# bail out rather than crash the lint hook.
|
|
return None
|
|
|
|
# Cache check
|
|
cached = _workspace_cache.get(str(start_path))
|
|
if cached is not None:
|
|
root, _is_git = cached
|
|
return root
|
|
|
|
cur = start_path
|
|
# Defensive cap: the deepest reasonable monorepo is well under 64
|
|
# levels. Caps the walk so a pathological cwd or a symlink cycle
|
|
# we somehow traverse can't keep us looping.
|
|
for _ in range(64):
|
|
git_marker = cur / ".git"
|
|
try:
|
|
if git_marker.exists():
|
|
resolved = str(cur)
|
|
_workspace_cache[str(start_path)] = (resolved, True)
|
|
return resolved
|
|
except OSError:
|
|
# Permission error on a parent dir — bail out cleanly.
|
|
break
|
|
parent = cur.parent
|
|
if parent == cur:
|
|
break
|
|
cur = parent
|
|
|
|
_workspace_cache[str(start_path)] = (None, False)
|
|
return None
|
|
|
|
|
|
def is_inside_workspace(path: str, workspace_root: str) -> bool:
|
|
"""Return True iff ``path`` is inside (or equal to) ``workspace_root``.
|
|
|
|
Uses absolute paths but does not resolve symlinks — a file accessed
|
|
via a symlink that points outside the workspace still counts as
|
|
outside. This is the conservative interpretation; matches LSP
|
|
behaviour where servers reject didOpen for unrelated files.
|
|
"""
|
|
p = normalize_path(path)
|
|
root = normalize_path(workspace_root)
|
|
if p == root:
|
|
return True
|
|
# Use os.path.commonpath to handle case-insensitive filesystems
|
|
# correctly on macOS/Windows.
|
|
try:
|
|
common = os.path.commonpath([p, root])
|
|
except ValueError:
|
|
# Different drives on Windows.
|
|
return False
|
|
return common == root
|
|
|
|
|
|
def nearest_root(
|
|
start: str,
|
|
markers: Iterable[str],
|
|
*,
|
|
excludes: Optional[Iterable[str]] = None,
|
|
ceiling: Optional[str] = None,
|
|
) -> Optional[str]:
|
|
"""Walk up from ``start`` looking for any of the given marker files.
|
|
|
|
Returns the **directory containing** the first matched marker, or
|
|
``None`` if no marker is found before hitting ``ceiling`` (or the
|
|
filesystem root if no ceiling).
|
|
|
|
If ``excludes`` is provided and an exclude marker matches *first*
|
|
in the upward walk, returns ``None`` — the server is gated off
|
|
for that file. Mirrors OpenCode's NearestRoot exclude semantics
|
|
(e.g. typescript skips deno projects when ``deno.json`` is found
|
|
before ``package.json``).
|
|
"""
|
|
start_path = Path(normalize_path(start))
|
|
try:
|
|
if start_path.is_file():
|
|
start_path = start_path.parent
|
|
except (OSError, RuntimeError, ValueError):
|
|
return None
|
|
ceiling_path = Path(normalize_path(ceiling)) if ceiling else None
|
|
|
|
markers_list = list(markers)
|
|
excludes_list = list(excludes) if excludes else []
|
|
|
|
cur = start_path
|
|
# Defensive cap matching ``find_git_worktree``. Bounded walk
|
|
# protects against pathological inputs even though the
|
|
# parent-equality stop normally terminates within ~10 steps.
|
|
for _ in range(64):
|
|
# Check excludes first — if an exclude is found at this level,
|
|
# the server is gated off for this file.
|
|
for exc in excludes_list:
|
|
try:
|
|
if (cur / exc).exists():
|
|
return None
|
|
except OSError:
|
|
continue
|
|
# Then check markers.
|
|
for marker in markers_list:
|
|
try:
|
|
if (cur / marker).exists():
|
|
return str(cur)
|
|
except OSError:
|
|
continue
|
|
# Stop conditions.
|
|
if ceiling_path is not None and cur == ceiling_path:
|
|
return None
|
|
parent = cur.parent
|
|
if parent == cur:
|
|
return None
|
|
cur = parent
|
|
return None
|
|
|
|
|
|
def resolve_workspace_for_file(
|
|
file_path: str,
|
|
*,
|
|
cwd: Optional[str] = None,
|
|
) -> Tuple[Optional[str], bool]:
|
|
"""Resolve the workspace root for a file.
|
|
|
|
Returns ``(workspace_root, gated_in)`` where ``gated_in`` is True
|
|
iff LSP should run for this file at all. Currently the gate is
|
|
"file is inside a git worktree found by walking up from cwd OR
|
|
from the file itself".
|
|
|
|
The cwd path takes precedence — if the agent was launched in a
|
|
git project, that worktree is the workspace, and any edit inside
|
|
it (regardless of where the file lives) is in-scope. If the cwd
|
|
isn't in a git worktree, we try the file's own location as a
|
|
fallback.
|
|
|
|
Returns ``(None, False)`` when neither path is in a git worktree.
|
|
"""
|
|
cwd = cwd or os.getcwd()
|
|
cwd_root = find_git_worktree(cwd)
|
|
if cwd_root is not None:
|
|
if is_inside_workspace(file_path, cwd_root):
|
|
return cwd_root, True
|
|
# File is outside the cwd's worktree — try the file's own
|
|
# location as a secondary anchor. Useful for monorepos where
|
|
# the user opens an unrelated checkout.
|
|
file_root = find_git_worktree(file_path)
|
|
if file_root is not None:
|
|
return file_root, True
|
|
return None, False
|
|
|
|
|
|
def clear_cache() -> None:
|
|
"""Clear the workspace-resolution cache.
|
|
|
|
Called on service shutdown so a subsequent re-init doesn't pick
|
|
up stale results from a previous session.
|
|
"""
|
|
_workspace_cache.clear()
|
|
|
|
|
|
__all__ = [
|
|
"find_git_worktree",
|
|
"is_inside_workspace",
|
|
"nearest_root",
|
|
"normalize_path",
|
|
"resolve_workspace_for_file",
|
|
"clear_cache",
|
|
]
|