mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-21 03:39:54 +00:00
a0c031299b
Adds a new bundled web search provider plugin backed by xAI's agentic Web Search tool (server-side `web_search` on the Responses API). Slots in alongside the existing Firecrawl / Tavily / Exa / Brave / SearXNG / DDGS providers; opt in via `web.backend: xai` (or auto-selected by the registry's single-provider shortcut when it's the only available web provider, matching every other backend's behavior). Reuses the existing xAI HTTP credential plumbing (`tools/xai_http.py`) so it works with both `hermes auth login xai-oauth` (SuperGrok OAuth) and `XAI_API_KEY` — no new credential paths, no new env vars, no new setup-wizard prompts. The existing `xai_grok` post_setup hook handles credential collection. Reference: https://docs.x.ai/developers/tools/web-search Provider behavior ----------------- - Sends a structured prompt to Grok with `tools=[{"type": "web_search"}]` enabled and `include=["no_inline_citations"]`, then parses results from a `{"results": [...]}` JSON block (primary), falling back to `url_citation` annotations (secondary) and the top-level `citations` list (last-ditch). Annotation fallback falls through to citations when no rows are extractable, so future annotation types xAI may add don't silently mask real data. - HTTP 200 + `{"error": {...}}` envelopes (model-overload, refusal) are surfaced as failures rather than masked as success-with-empty- results. - HTTP 401 on the OAuth path triggers a single `force_refresh=True` retry — closes two gaps the resolver's proactive JWT-exp shortcut doesn't cover: opaque (non-JWT) access tokens and mid-window revocation. Env-var (`XAI_API_KEY`) credentials never retry; they can't be refreshed and an immediate retry would just burn quota. - `is_available()` is a cheap probe (env var OR auth.json read), never invokes the OAuth resolver — required by the ABC contract because it runs on every `hermes tools` repaint and at tool-registration time. - Class docstring documents the LLM-in-a-trench-coat trust model so callers piping untrusted input into `web_search` know returned URLs are model-generated and should be validated before fetching. Config (`config.yaml`): web: backend: xai xai: model: grok-4.3 # optional, defaults to grok-4.3 allowed_domains: # optional, max 5 — mutex with excluded_domains - arxiv.org excluded_domains: # optional, max 5 - example-spam.com timeout: 90 # optional, seconds Files ----- - plugins/web/xai/plugin.yaml (new) plugin manifest - plugins/web/xai/__init__.py (new) register(ctx) hook - plugins/web/xai/provider.py (new) XAIWebSearchProvider impl - tools/xai_http.py (+47) has_xai_credentials() cheap-probe helper + keyword-only force_refresh arg on resolve_xai_http_ credentials() (backwards compatible; all 9 other call sites unaffected) - tools/web_tools.py (+11) "xai" added to configured- backend set + branch in _is_backend_available() - tests/tools/test_web_providers_xai.py (new, 39 tests) covers identity, cheap-probe semantics, JSON / annotation / citations parse paths, request payload shape, error envelopes, OAuth force-refresh-on-401 retry, env-var-no-retry guard, 500-not- retried guard, refresh-returns- same-token guard, OAuth runtime resolution, and backend wiring. Tests ----- - 39 xai-suite passes - 79 sibling web-provider tests (brave-free, ddgs, searxng, base) pass - 119 cross-suite tests for other xai_http callers (transcription, x_search, tts) pass — verifies the new keyword-only arg is BC - scripts/check-windows-footguns.py: clean on all 5 modified files No edits to run_agent.py, cli.py, gateway/, toolsets, config schema, plugin core, or auth core.
561 lines
22 KiB
Python
561 lines
22 KiB
Python
"""xAI Web Search — plugin form.
|
|
|
|
Routes ``web_search`` tool calls through xAI's agentic Web Search tool
|
|
(server-side ``web_search`` on the Responses API). Grok runs the actual
|
|
searching and page-browsing server-side; we ask it to return the top
|
|
results as structured JSON so we can hand back the same
|
|
``{title, url, description, position}`` rows every other Hermes web
|
|
provider produces.
|
|
|
|
Reference: https://docs.x.ai/developers/tools/web-search
|
|
|
|
Config keys this provider responds to::
|
|
|
|
web:
|
|
search_backend: "xai" # explicit per-capability
|
|
backend: "xai" # shared fallback
|
|
|
|
Optional knobs (under ``web.xai`` in ``config.yaml``)::
|
|
|
|
web:
|
|
xai:
|
|
model: "grok-4.3" # reasoning model required by web_search
|
|
allowed_domains: ["x.ai"] # max 5 — mutually exclusive with excluded_domains
|
|
excluded_domains: ["bad.com"] # max 5 — mutually exclusive with allowed_domains
|
|
timeout: 90 # seconds (default 90)
|
|
|
|
Auth: reuses :func:`tools.xai_http.resolve_xai_http_credentials`, which
|
|
prefers Hermes-managed xAI Grok OAuth (via ``hermes auth``) and falls back
|
|
to ``XAI_API_KEY`` (resolved through ``~/.hermes/.env``, then
|
|
``os.environ``).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from agent.web_search_provider import WebSearchProvider
|
|
from tools.xai_http import (
|
|
has_xai_credentials,
|
|
hermes_xai_user_agent,
|
|
resolve_xai_http_credentials,
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
DEFAULT_MODEL = "grok-4.3"
|
|
DEFAULT_TIMEOUT = 90
|
|
_MAX_DOMAIN_FILTERS = 5 # xAI hard cap on allowed_domains / excluded_domains
|
|
|
|
# Match the JSON object Grok is asked to emit. Tolerates leading/trailing
|
|
# prose since reasoning models occasionally narrate before the JSON block
|
|
# even when explicitly asked not to.
|
|
_JSON_BLOCK_RE = re.compile(r"\{[\s\S]*\}", re.MULTILINE)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Config
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _load_xai_web_config() -> Dict[str, Any]:
|
|
"""Read ``web.xai`` from config.yaml (returns {} on miss)."""
|
|
try:
|
|
from hermes_cli.config import load_config
|
|
|
|
cfg = load_config()
|
|
web_section = cfg.get("web") if isinstance(cfg, dict) else None
|
|
xai_section = web_section.get("xai") if isinstance(web_section, dict) else None
|
|
return xai_section if isinstance(xai_section, dict) else {}
|
|
except Exception as exc: # noqa: BLE001
|
|
logger.debug("Could not load web.xai config: %s", exc)
|
|
return {}
|
|
|
|
|
|
def _coerce_domain_list(value: Any) -> List[str]:
|
|
"""Coerce a config value to a clean list of <=5 domain strings."""
|
|
if not isinstance(value, list):
|
|
return []
|
|
cleaned: List[str] = []
|
|
for item in value:
|
|
if isinstance(item, str) and item.strip():
|
|
cleaned.append(item.strip())
|
|
if len(cleaned) >= _MAX_DOMAIN_FILTERS:
|
|
break
|
|
return cleaned
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Provider
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class XAIWebSearchProvider(WebSearchProvider):
|
|
"""Search-only provider backed by xAI's agentic Web Search tool.
|
|
|
|
Sends a structured prompt to Grok with ``tools=[{"type": "web_search"}]``
|
|
enabled and asks it to return the top *limit* results as JSON. Falls
|
|
back to the Responses API ``citations`` list if Grok ignores the JSON
|
|
schema instruction (rare for grok-4.3 but cheap insurance).
|
|
|
|
No extract capability — pair with Firecrawl / Tavily / Exa for
|
|
``web_extract`` if you need page content.
|
|
|
|
Trust model
|
|
-----------
|
|
Unlike index-backed providers (Brave / Tavily / Exa) which return
|
|
verbatim search-engine results, this backend is an LLM in a trench
|
|
coat: Grok decides which URLs to surface, generates the titles and
|
|
descriptions itself, and is influenced by the *content of the query*.
|
|
A maliciously crafted query (e.g. injected via untrusted upstream
|
|
input the agent picked up) can in principle steer Grok into emitting
|
|
attacker-chosen URLs. Callers that pipe untrusted text directly into
|
|
``web_search`` should treat returned URLs the same way they would
|
|
treat any model-generated link — validate before fetching.
|
|
"""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "xai"
|
|
|
|
@property
|
|
def display_name(self) -> str:
|
|
return "xAI Web Search (Grok)"
|
|
|
|
def is_available(self) -> bool:
|
|
"""Cheap availability probe — env var OR auth-store has OAuth tokens.
|
|
|
|
Delegates to :func:`tools.xai_http.has_xai_credentials`, which is
|
|
deliberately *not* the same as :func:`resolve_xai_http_credentials`:
|
|
it never triggers OAuth token refresh or acquires the auth-store
|
|
lock. The ABC contract requires this method to be safe to call on
|
|
every ``hermes tools`` repaint and at tool-registration time.
|
|
Token freshness / refresh is handled inside :meth:`search`.
|
|
"""
|
|
return has_xai_credentials()
|
|
|
|
def supports_search(self) -> bool:
|
|
return True
|
|
|
|
def supports_extract(self) -> bool:
|
|
return False
|
|
|
|
def supports_crawl(self) -> bool:
|
|
return False
|
|
|
|
# -- Search -----------------------------------------------------------
|
|
|
|
def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
|
|
"""Execute a Grok-backed web search.
|
|
|
|
Returns ``{"success": True, "data": {"web": [{title, url, description, position}, ...]}}``
|
|
on success, ``{"success": False, "error": str}`` on failure.
|
|
"""
|
|
try:
|
|
from tools.interrupt import is_interrupted
|
|
|
|
if is_interrupted():
|
|
return {"success": False, "error": "Interrupted"}
|
|
except Exception: # noqa: BLE001 — interrupt module is best-effort
|
|
pass
|
|
|
|
creds = resolve_xai_http_credentials()
|
|
api_key = str(creds.get("api_key") or "").strip()
|
|
base_url = str(creds.get("base_url") or "https://api.x.ai/v1").strip().rstrip("/")
|
|
if not api_key:
|
|
return {
|
|
"success": False,
|
|
"error": (
|
|
"No xAI credentials found. Run `hermes auth` to sign in with "
|
|
"xAI Grok OAuth, or set XAI_API_KEY."
|
|
),
|
|
}
|
|
|
|
# Clamp limit to the same range the caller (web_search_tool) accepts,
|
|
# so we don't silently downgrade explicit limits. Grok happily
|
|
# produces longer lists; cost scales linearly with the requested
|
|
# count via reasoning tokens, but that's the caller's call to make.
|
|
try:
|
|
limit = int(limit)
|
|
except (TypeError, ValueError):
|
|
limit = 5
|
|
limit = max(1, min(limit, 100))
|
|
|
|
cfg = _load_xai_web_config()
|
|
model = cfg.get("model") if isinstance(cfg.get("model"), str) else DEFAULT_MODEL
|
|
model = model.strip() or DEFAULT_MODEL
|
|
|
|
try:
|
|
timeout = float(cfg.get("timeout", DEFAULT_TIMEOUT))
|
|
except (TypeError, ValueError):
|
|
timeout = DEFAULT_TIMEOUT
|
|
|
|
allowed = _coerce_domain_list(cfg.get("allowed_domains"))
|
|
excluded = _coerce_domain_list(cfg.get("excluded_domains"))
|
|
if allowed and excluded:
|
|
# xAI explicitly rejects this combo — surface a clear error
|
|
# rather than a 400 from the API.
|
|
return {
|
|
"success": False,
|
|
"error": (
|
|
"web.xai.allowed_domains and web.xai.excluded_domains "
|
|
"cannot both be set (xAI restriction)."
|
|
),
|
|
}
|
|
|
|
web_search_tool: Dict[str, Any] = {"type": "web_search"}
|
|
if allowed:
|
|
web_search_tool["filters"] = {"allowed_domains": allowed}
|
|
elif excluded:
|
|
web_search_tool["filters"] = {"excluded_domains": excluded}
|
|
|
|
prompt = self._build_prompt(query, limit)
|
|
|
|
payload: Dict[str, Any] = {
|
|
"model": model,
|
|
"input": [{"role": "user", "content": prompt}],
|
|
"tools": [web_search_tool],
|
|
# Drop inline citation markdown — we want the JSON block clean,
|
|
# and we read URLs from annotations / citations separately.
|
|
"include": ["no_inline_citations"],
|
|
}
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {api_key}",
|
|
"Content-Type": "application/json",
|
|
"User-Agent": hermes_xai_user_agent(),
|
|
}
|
|
|
|
try:
|
|
import httpx
|
|
except ImportError:
|
|
return {
|
|
"success": False,
|
|
"error": "httpx is not installed (required for xAI web search)",
|
|
}
|
|
|
|
logger.info(
|
|
"xAI web search via %s: '%s' (limit=%d, model=%s)",
|
|
base_url, query, limit, model,
|
|
)
|
|
|
|
# Two-attempt loop: if the first call returns 401 and our creds came
|
|
# from the OAuth path, force-refresh the token once and retry. This
|
|
# closes two gaps the proactive resolver check doesn't cover:
|
|
# (1) opaque (non-JWT) access tokens — `_xai_access_token_is_expiring`
|
|
# can't decode them and returns False, so refresh never fires
|
|
# until the server hands us a 401.
|
|
# (2) mid-window revocation — admin revoke, refresh-token rotation,
|
|
# or clock skew can produce 401s on a token whose JWT `exp` claim
|
|
# is still in the future.
|
|
# Env-var (`XAI_API_KEY`) credentials skip the retry entirely — we
|
|
# can't refresh those and an immediate retry would just burn quota.
|
|
is_oauth_path = (creds.get("provider") == "xai-oauth")
|
|
resp = None
|
|
for attempt in range(2):
|
|
try:
|
|
resp = httpx.post(
|
|
f"{base_url}/responses",
|
|
headers=headers,
|
|
json=payload,
|
|
timeout=timeout,
|
|
)
|
|
resp.raise_for_status()
|
|
break
|
|
except httpx.HTTPStatusError as exc:
|
|
status = exc.response.status_code if exc.response is not None else 0
|
|
if status == 401 and attempt == 0 and is_oauth_path:
|
|
logger.info(
|
|
"xAI web search got 401 on first attempt; forcing OAuth "
|
|
"refresh and retrying once.",
|
|
)
|
|
try:
|
|
refreshed = resolve_xai_http_credentials(force_refresh=True)
|
|
refreshed_key = str(refreshed.get("api_key") or "").strip()
|
|
if refreshed_key and refreshed_key != api_key:
|
|
api_key = refreshed_key
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
continue
|
|
# Refresh returned the same (or empty) token — no point
|
|
# in retrying. Fall through to the error return below.
|
|
except Exception as refresh_exc: # noqa: BLE001
|
|
logger.warning(
|
|
"xAI web search OAuth refresh after 401 failed: %s",
|
|
refresh_exc,
|
|
)
|
|
body = ""
|
|
try:
|
|
body = exc.response.text[:300] if exc.response is not None else ""
|
|
except Exception:
|
|
body = ""
|
|
logger.warning("xAI web search HTTP %d: %s", status, body)
|
|
return {
|
|
"success": False,
|
|
"error": f"xAI web search returned HTTP {status}: {body}".rstrip(),
|
|
}
|
|
except httpx.RequestError as exc:
|
|
logger.warning("xAI web search request error: %s", exc)
|
|
return {"success": False, "error": f"Could not reach xAI: {exc}"}
|
|
|
|
if resp is None:
|
|
# Defensive — both attempts somehow exited the loop without resp.
|
|
return {"success": False, "error": "xAI web search produced no response"}
|
|
|
|
try:
|
|
data = resp.json()
|
|
except Exception as exc: # noqa: BLE001
|
|
logger.warning("xAI web search bad JSON: %s", exc)
|
|
return {
|
|
"success": False,
|
|
"error": "Could not parse xAI Responses API reply as JSON",
|
|
}
|
|
|
|
# xAI's Responses surface sometimes returns HTTP 200 with an error
|
|
# envelope (model overloaded, content-policy refusal, etc.). Without
|
|
# this check, ``_extract_results`` would silently produce an empty
|
|
# list and we'd report success-with-no-rows — masking a real failure
|
|
# the agent should see and decide whether to retry.
|
|
api_error = data.get("error") if isinstance(data, dict) else None
|
|
if isinstance(api_error, dict):
|
|
err_msg = (
|
|
api_error.get("message")
|
|
or api_error.get("code")
|
|
or "unknown error"
|
|
)
|
|
logger.warning("xAI web search returned error envelope: %s", err_msg)
|
|
return {"success": False, "error": f"xAI returned an error: {err_msg}"}
|
|
|
|
web_results = self._extract_results(data, limit=limit)
|
|
if not web_results:
|
|
# Successful call, just no usable rows — return success with an
|
|
# empty list so the model can decide whether to retry. Matches
|
|
# what brave-free / exa do when the upstream API returns 0 hits.
|
|
return {"success": True, "data": {"web": []}}
|
|
|
|
return {"success": True, "data": {"web": web_results}}
|
|
|
|
# -- Prompt + parsing -------------------------------------------------
|
|
|
|
@staticmethod
|
|
def _build_prompt(query: str, limit: int) -> str:
|
|
"""Compose the prompt that asks Grok to act as a search engine.
|
|
|
|
We deliberately ask for a JSON object (not bare array) so we can
|
|
match it cheaply with ``_JSON_BLOCK_RE``; we explicitly forbid
|
|
prose, markdown fences, and inline-citation links to keep the
|
|
payload parseable.
|
|
"""
|
|
return (
|
|
"Use the web_search tool to find current information for the query below, "
|
|
"then respond with ONLY a single JSON object — no prose, no markdown "
|
|
"fences, no inline citation links — matching this exact schema:\n\n"
|
|
'{"results": [{"title": "string", "url": "string", '
|
|
'"description": "1-2 sentence summary"}]}\n\n'
|
|
f'Return at most {limit} results, ordered by relevance, with absolute '
|
|
"https:// URLs. If no usable results exist, return "
|
|
'{"results": []}.\n\n'
|
|
f"Query: {query}"
|
|
)
|
|
|
|
@classmethod
|
|
def _extract_results(
|
|
cls,
|
|
response_data: Dict[str, Any],
|
|
*,
|
|
limit: int,
|
|
) -> List[Dict[str, Any]]:
|
|
"""Pull a ``[{title, url, description, position}, ...]`` list out of a
|
|
Responses-API reply.
|
|
|
|
Strategy:
|
|
|
|
1. Walk ``output[*].content[*].text`` for ``output_text`` blocks and
|
|
try to parse the first JSON object that has a ``results`` list.
|
|
2. If the JSON path fails, fall back to the message annotations
|
|
(``url_citation`` entries) — every annotation carries a URL and
|
|
a ``title`` (citation number); we pair those URLs with surrounding
|
|
text from the message body as a best-effort description.
|
|
"""
|
|
text_blocks, annotations = cls._collect_output_text(response_data)
|
|
|
|
# Primary path: parse the JSON object Grok was asked for.
|
|
for block in text_blocks:
|
|
parsed = cls._try_parse_json_results(block, limit=limit)
|
|
if parsed:
|
|
return parsed
|
|
|
|
# Secondary path: derive results from message annotations + raw text.
|
|
# Only short-circuit when annotations actually yielded usable rows;
|
|
# otherwise fall through to the citations list. (xAI currently only
|
|
# emits ``url_citation`` annotations, but future annotation types
|
|
# would silently produce an empty result set if we returned here
|
|
# unconditionally — masking real data in ``citations``.)
|
|
if annotations:
|
|
joined_text = "\n".join(text_blocks)
|
|
annotation_results = cls._results_from_annotations(
|
|
annotations, joined_text, limit=limit,
|
|
)
|
|
if annotation_results:
|
|
return annotation_results
|
|
|
|
# Last-ditch: raw citations list (no titles or descriptions).
|
|
citations = response_data.get("citations") or []
|
|
if isinstance(citations, list):
|
|
return [
|
|
{
|
|
"title": "",
|
|
"url": str(u),
|
|
"description": "",
|
|
"position": i + 1,
|
|
}
|
|
for i, u in enumerate(citations[:limit])
|
|
if isinstance(u, str) and u.strip()
|
|
]
|
|
|
|
return []
|
|
|
|
@staticmethod
|
|
def _collect_output_text(
|
|
response_data: Dict[str, Any],
|
|
) -> tuple[List[str], List[Dict[str, Any]]]:
|
|
"""Return (text_blocks, annotations) extracted from ``response.output``."""
|
|
text_blocks: List[str] = []
|
|
annotations: List[Dict[str, Any]] = []
|
|
output = response_data.get("output")
|
|
if not isinstance(output, list):
|
|
return text_blocks, annotations
|
|
|
|
for item in output:
|
|
if not isinstance(item, dict) or item.get("type") != "message":
|
|
continue
|
|
content = item.get("content")
|
|
if not isinstance(content, list):
|
|
continue
|
|
for chunk in content:
|
|
if not isinstance(chunk, dict) or chunk.get("type") != "output_text":
|
|
continue
|
|
text = chunk.get("text")
|
|
if isinstance(text, str) and text.strip():
|
|
text_blocks.append(text)
|
|
chunk_annotations = chunk.get("annotations")
|
|
if isinstance(chunk_annotations, list):
|
|
for ann in chunk_annotations:
|
|
if isinstance(ann, dict):
|
|
annotations.append(ann)
|
|
return text_blocks, annotations
|
|
|
|
@staticmethod
|
|
def _try_parse_json_results(
|
|
text: str,
|
|
*,
|
|
limit: int,
|
|
) -> Optional[List[Dict[str, Any]]]:
|
|
"""Parse a JSON object with a ``results`` array out of ``text``.
|
|
|
|
Returns the normalized result list on success, ``None`` when the
|
|
block has no valid JSON object or no ``results`` key. Tolerates
|
|
leading/trailing prose because reasoning models sometimes prefix a
|
|
short narration even when told not to.
|
|
"""
|
|
# Try the whole string first — cheapest path when Grok obeys.
|
|
candidates = [text]
|
|
match = _JSON_BLOCK_RE.search(text)
|
|
if match and match.group(0) != text:
|
|
candidates.append(match.group(0))
|
|
|
|
for candidate in candidates:
|
|
try:
|
|
parsed = json.loads(candidate)
|
|
except (json.JSONDecodeError, ValueError):
|
|
continue
|
|
if not isinstance(parsed, dict):
|
|
continue
|
|
results = parsed.get("results")
|
|
if not isinstance(results, list):
|
|
continue
|
|
normalized: List[Dict[str, Any]] = []
|
|
for row in results[:limit]:
|
|
if not isinstance(row, dict):
|
|
continue
|
|
url = str(row.get("url", "")).strip()
|
|
if not url:
|
|
continue
|
|
normalized.append(
|
|
{
|
|
"title": str(row.get("title", "")).strip(),
|
|
"url": url,
|
|
"description": str(row.get("description", "")).strip(),
|
|
# Renumber from the kept results, not the raw input
|
|
# index, so a dropped malformed row doesn't leave a
|
|
# gap in the positions handed back to the agent.
|
|
"position": len(normalized) + 1,
|
|
}
|
|
)
|
|
if normalized:
|
|
return normalized
|
|
return None
|
|
|
|
@staticmethod
|
|
def _results_from_annotations(
|
|
annotations: List[Dict[str, Any]],
|
|
joined_text: str,
|
|
*,
|
|
limit: int,
|
|
) -> List[Dict[str, Any]]:
|
|
"""Best-effort fallback when JSON parsing fails.
|
|
|
|
Uses each ``url_citation`` annotation's ``url`` (the citation
|
|
title is just the integer label, so we don't surface it) and
|
|
slices ~200 characters of surrounding text as the description.
|
|
"""
|
|
seen: set[str] = set()
|
|
results: List[Dict[str, Any]] = []
|
|
for ann in annotations:
|
|
if ann.get("type") != "url_citation":
|
|
continue
|
|
url = str(ann.get("url", "")).strip()
|
|
if not url or url in seen:
|
|
continue
|
|
seen.add(url)
|
|
|
|
description = ""
|
|
start = ann.get("start_index")
|
|
end = ann.get("end_index")
|
|
if isinstance(start, int) and isinstance(end, int) and 0 <= start < end <= len(joined_text):
|
|
window_start = max(0, start - 200)
|
|
description = joined_text[window_start:start].strip()
|
|
if len(description) > 200:
|
|
description = description[-200:].strip()
|
|
|
|
results.append(
|
|
{
|
|
"title": "",
|
|
"url": url,
|
|
"description": description,
|
|
"position": len(results) + 1,
|
|
}
|
|
)
|
|
if len(results) >= limit:
|
|
break
|
|
return results
|
|
|
|
# -- Setup picker -----------------------------------------------------
|
|
|
|
def get_setup_schema(self) -> Dict[str, Any]:
|
|
# Auth resolution is delegated to the shared ``xai_grok`` post_setup
|
|
# hook (same one image_gen.xai and tts.xai use) so users see the
|
|
# familiar OAuth-or-API-key prompt for every xAI service.
|
|
return {
|
|
"name": "xAI Web Search (Grok)",
|
|
"badge": "paid",
|
|
"tag": (
|
|
"Agentic web search via Grok's web_search tool — uses xAI "
|
|
"Grok OAuth or XAI_API_KEY."
|
|
),
|
|
"env_vars": [],
|
|
"post_setup": "xai_grok",
|
|
}
|