feat(browser): browser-use + firecrawl plugins; drop single-eligible shortcut

Migrates the remaining two cloud browser providers to plugins:

  plugins/browser/browser_use/    — dual auth (direct BROWSER_USE_API_KEY
                                    or managed Nous gateway), idempotency-
                                    key handling for retried managed-mode
                                    creates, x-external-call-id capture.
  plugins/browser/firecrawl/      — direct FIRECRAWL_API_KEY only;
                                    distinct from plugins/web/firecrawl/
                                    (same key, different endpoint).

Also drops the 'single-eligible shortcut' rule from
agent.browser_registry._resolve(). Was a copy-paste from
web_search_registry that would have introduced a real behavior change:
a user with only FIRECRAWL_API_KEY set (for web-extract) would silently
get routed to a paid Firecrawl cloud browser on a fresh install — not
matching origin/main, which only auto-detected between Browser Use and
Browserbase. Third-party browser plugins are subject to the same gate:
they require explicit `browser.cloud_provider` to take effect.

Verified end-to-end via plugin discovery:
  - 3 plugins register (browser-use, browserbase, firecrawl)
  - _resolve(None) with no creds: None (local mode)
  - _resolve(None) with only FIRECRAWL_API_KEY: None (matches main)
  - _resolve('firecrawl'): firecrawl (explicit wins)
  - _resolve(None) with BU+firecrawl: browser-use (legacy walk first hit)
  - _resolve(None) with all three: browser-use (legacy walk order)
This commit is contained in:
kshitijk4poor
2026-05-14 14:11:48 +05:30
committed by Teknium
parent b8138ac405
commit a15cdfb050
7 changed files with 530 additions and 12 deletions
+19 -12
View File
@@ -12,8 +12,7 @@ Active selection
The active provider is chosen by configuration with this precedence:
1. ``browser.cloud_provider`` in ``config.yaml`` (explicit override).
2. If exactly one registered provider is available, use it.
3. Legacy preference order — ``browser-use`` → ``browserbase`` — filtered by
2. Legacy preference order — ``browser-use`` → ``browserbase`` — filtered by
availability. Matches the historic auto-detect order in
:func:`tools.browser_tool._get_cloud_provider` (Browser Use checked first
because it covers both the managed Nous gateway and direct API key path;
@@ -22,7 +21,7 @@ The active provider is chosen by configuration with this precedence:
cloud browser when they explicitly set ``browser.cloud_provider:
firecrawl``, matching pre-migration behaviour where Firecrawl was never
auto-selected.
4. Otherwise ``None`` — the dispatcher falls back to local browser mode.
3. Otherwise ``None`` — the dispatcher falls back to local browser mode.
The explicit-config branch (rule 1) intentionally ignores ``is_available()``
so the dispatcher surfaces a typed "X_API_KEY is not set" error to the user
@@ -132,12 +131,22 @@ def _resolve(configured: Optional[str]) -> Optional[BrowserProvider]:
:meth:`is_available` returns False — the dispatcher will surface a
precise "X_API_KEY is not set" error instead of silently routing
somewhere else.
3. **Single-provider shortcut.** When only one registered provider
reports ``is_available() == True``, return it.
4. **Legacy preference walk, filtered by availability.** Walk
3. **Legacy preference walk, filtered by availability.** Walk
:data:`_LEGACY_PREFERENCE` (``browser-use`` → ``browserbase``) looking
for a provider whose ``is_available()`` is True.
There is intentionally NO "single-eligible shortcut" rule here (unlike
:func:`agent.web_search_registry._resolve`). Pre-migration, the
auto-detect branch in ``tools.browser_tool._get_cloud_provider`` only
considered Browser Use and Browserbase; Firecrawl was reachable only
via an explicit ``browser.cloud_provider: firecrawl`` config key.
Preserving that gate matters because Firecrawl shares its API key with
the *web* extract plugin (``plugins/web/firecrawl/``), so users who set
``FIRECRAWL_API_KEY`` for web extract must NOT get silently routed to a
paid cloud browser on a fresh install. Third-party browser-provider
plugins added under ``~/.hermes/plugins/browser/<vendor>/`` are subject
to the same gate — they must be explicitly configured to take effect.
Returns None when no provider is configured AND no available provider
matches the legacy preference; the dispatcher then falls back to local
browser mode.
@@ -170,12 +179,10 @@ def _resolve(configured: Optional[str]) -> Optional[BrowserProvider]:
configured,
)
# 3. + 4. Auto-detect path — filter by availability so we don't surface
# a provider the user has no credentials for.
eligible = [p for p in snapshot.values() if _is_available_safe(p)]
if len(eligible) == 1:
return eligible[0]
# 3. Legacy preference walk — only providers in _LEGACY_PREFERENCE are
# auto-eligible. Filtered by availability so we don't surface a
# provider the user has no credentials for. See docstring for why
# we do NOT fall back to "any single-eligible registered provider".
for legacy in _LEGACY_PREFERENCE:
provider = snapshot.get(legacy)
if provider is not None and _is_available_safe(provider):
+14
View File
@@ -0,0 +1,14 @@
"""Browser Use cloud browser plugin — bundled, auto-loaded.
Mirrors the ``plugins/web/<vendor>/`` layout: ``provider.py`` holds the
provider class; ``__init__.py::register`` instantiates and registers it.
"""
from __future__ import annotations
from plugins.browser.browser_use.provider import BrowserUseBrowserProvider
def register(ctx) -> None:
"""Register the Browser Use provider with the plugin context."""
ctx.register_browser_provider(BrowserUseBrowserProvider())
+7
View File
@@ -0,0 +1,7 @@
name: browser-browser-use
version: 1.0.0
description: "Browser Use (https://browser-use.com) cloud browser backend. Supports both direct BROWSER_USE_API_KEY and the managed Nous tool gateway. Also powers the 'Nous Subscription' UX flow that bills usage to a Nous subscription."
author: NousResearch
kind: backend
provides_browser_providers:
- browser-use
+305
View File
@@ -0,0 +1,305 @@
"""Browser Use cloud browser provider — plugin form.
Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
ABC introduced in PR #25214). The legacy in-tree module
``tools.browser_providers.browser_use`` was removed in the same PR; this file
is now the canonical implementation.
Browser Use is the only browser backend with dual auth: a direct
``BROWSER_USE_API_KEY`` for self-billed users, or the managed Nous tool
gateway (which Hermes uses to bill Browser Use sessions to a Nous
subscription). The dispatch order — direct API key first, managed gateway
second — preserves the pre-migration behaviour in
``tools.browser_providers.browser_use.BrowserUseProvider._get_config_or_none``.
Config keys this provider responds to::
browser:
cloud_provider: "browser-use" # explicit selection
tool_gateway:
browser: "gateway" # optional: prefer managed gateway
# even when BROWSER_USE_API_KEY is set
Auth env vars (one of)::
BROWSER_USE_API_KEY=... # https://browser-use.com
# OR a managed Nous gateway entry (configured via 'hermes setup')
"""
from __future__ import annotations
import logging
import os
import threading
import uuid
from typing import Any, Dict, Optional
import requests
from agent.browser_provider import BrowserProvider
logger = logging.getLogger(__name__)
# Idempotency tracking for managed-mode session creation. The managed Nous
# gateway returns 409 "already in progress" on retried POSTs; we forward the
# original idempotency key so the gateway can deduplicate. Cleared on
# success or terminal failure.
_pending_create_keys: Dict[str, str] = {}
_pending_create_keys_lock = threading.Lock()
_BASE_URL = "https://api.browser-use.com/api/v3"
_DEFAULT_MANAGED_TIMEOUT_MINUTES = 5
_DEFAULT_MANAGED_PROXY_COUNTRY_CODE = "us"
def _get_or_create_pending_create_key(task_id: str) -> str:
with _pending_create_keys_lock:
existing = _pending_create_keys.get(task_id)
if existing:
return existing
created = f"browser-use-session-create:{uuid.uuid4().hex}"
_pending_create_keys[task_id] = created
return created
def _clear_pending_create_key(task_id: str) -> None:
with _pending_create_keys_lock:
_pending_create_keys.pop(task_id, None)
def _should_preserve_pending_create_key(response: requests.Response) -> bool:
"""Decide whether to keep the idempotency key after a failed create.
Preserve the key when the failure looks retryable (5xx) OR when the
gateway reports the original request is still in flight (409 "already
in progress") — in either case, retrying with the same key lets the
gateway deduplicate.
Drop the key on any other 4xx (auth failure, bad request, etc.) — those
won't succeed by being retried.
"""
if response.status_code >= 500:
return True
if response.status_code != 409:
return False
try:
payload = response.json()
except Exception:
return False
if not isinstance(payload, dict):
return False
error = payload.get("error")
if not isinstance(error, dict):
return False
message = str(error.get("message") or "").lower()
return "already in progress" in message
class BrowserUseBrowserProvider(BrowserProvider):
"""Browser Use (https://browser-use.com) cloud browser backend.
Dual auth: prefers a direct BROWSER_USE_API_KEY when set, falling back
to the managed Nous tool gateway when ``tool_gateway.browser`` config
routes through it. Setting ``tool_gateway.browser: gateway`` flips the
order so managed billing wins even when BROWSER_USE_API_KEY is present.
"""
@property
def name(self) -> str:
return "browser-use"
@property
def display_name(self) -> str:
return "Browser Use"
def is_available(self) -> bool:
return self._get_config_or_none() is not None
# ------------------------------------------------------------------
# Config resolution (direct API key OR managed Nous gateway)
# ------------------------------------------------------------------
def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
# Import here to avoid a hard dependency at module-import time —
# managed_tool_gateway pulls in the Nous auth stack which can be
# heavy and is not needed for direct-API-key users.
from tools.managed_tool_gateway import resolve_managed_tool_gateway
from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway
# 1. Direct API key path (unless user explicitly prefers gateway).
api_key = os.environ.get("BROWSER_USE_API_KEY")
if api_key and not prefers_gateway("browser"):
return {
"api_key": api_key,
"base_url": _BASE_URL,
"managed_mode": False,
}
# 2. Managed Nous gateway path.
managed = resolve_managed_tool_gateway("browser-use")
if managed is None:
return None
# Hold reference to managed_nous_tools_enabled so static analysis
# doesn't flag the import as unused — the helper is consulted by
# _get_config() below to compose a more accurate error message.
_ = managed_nous_tools_enabled
return {
"api_key": managed.nous_user_token,
"base_url": managed.gateway_origin.rstrip("/"),
"managed_mode": True,
}
def _get_config(self) -> Dict[str, Any]:
from tools.tool_backend_helpers import managed_nous_tools_enabled
config = self._get_config_or_none()
if config is None:
message = (
"Browser Use requires a direct BROWSER_USE_API_KEY credential."
)
if managed_nous_tools_enabled():
message = (
"Browser Use requires either a direct BROWSER_USE_API_KEY "
"credential or a managed Browser Use gateway configuration."
)
raise ValueError(message)
return config
# ------------------------------------------------------------------
# Session lifecycle
# ------------------------------------------------------------------
def _headers(self, config: Dict[str, Any]) -> Dict[str, str]:
return {
"Content-Type": "application/json",
"X-Browser-Use-API-Key": config["api_key"],
}
def create_session(self, task_id: str) -> Dict[str, object]:
config = self._get_config()
managed_mode = bool(config.get("managed_mode"))
headers = self._headers(config)
if managed_mode:
headers["X-Idempotency-Key"] = _get_or_create_pending_create_key(task_id)
# Keep gateway-backed sessions short so billing authorization does not
# default to a long Browser-Use timeout when Hermes only needs a task-
# scoped ephemeral browser.
payload = (
{
"timeout": _DEFAULT_MANAGED_TIMEOUT_MINUTES,
"proxyCountryCode": _DEFAULT_MANAGED_PROXY_COUNTRY_CODE,
}
if managed_mode
else {}
)
response = requests.post(
f"{config['base_url']}/browsers",
headers=headers,
json=payload,
timeout=30,
)
if not response.ok:
if managed_mode and not _should_preserve_pending_create_key(response):
_clear_pending_create_key(task_id)
raise RuntimeError(
f"Failed to create Browser Use session: "
f"{response.status_code} {response.text}"
)
session_data = response.json()
if managed_mode:
_clear_pending_create_key(task_id)
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
external_call_id = (
response.headers.get("x-external-call-id") if managed_mode else None
)
logger.info("Created Browser Use session %s", session_name)
cdp_url = session_data.get("cdpUrl") or session_data.get("connectUrl") or ""
return {
"session_name": session_name,
"bb_session_id": session_data["id"],
"cdp_url": cdp_url,
"features": {"browser_use": True},
"external_call_id": external_call_id,
}
def close_session(self, session_id: str) -> bool:
try:
config = self._get_config()
except ValueError:
logger.warning(
"Cannot close Browser Use session %s — missing credentials", session_id
)
return False
try:
response = requests.patch(
f"{config['base_url']}/browsers/{session_id}",
headers=self._headers(config),
json={"action": "stop"},
timeout=10,
)
if response.status_code in {200, 201, 204}:
logger.debug("Successfully closed Browser Use session %s", session_id)
return True
else:
logger.warning(
"Failed to close Browser Use session %s: HTTP %s - %s",
session_id,
response.status_code,
response.text[:200],
)
return False
except Exception as e:
logger.error("Exception closing Browser Use session %s: %s", session_id, e)
return False
def emergency_cleanup(self, session_id: str) -> None:
config = self._get_config_or_none()
if config is None:
logger.warning(
"Cannot emergency-cleanup Browser Use session %s — missing credentials",
session_id,
)
return
try:
requests.patch(
f"{config['base_url']}/browsers/{session_id}",
headers=self._headers(config),
json={"action": "stop"},
timeout=5,
)
except Exception as e:
logger.debug(
"Emergency cleanup failed for Browser Use session %s: %s", session_id, e
)
def get_setup_schema(self) -> Dict[str, Any]:
return {
"name": "Browser Use",
"badge": "paid",
"tag": "Cloud browser with remote execution",
"env_vars": [
{
"key": "BROWSER_USE_API_KEY",
"prompt": "Browser Use API key",
"url": "https://browser-use.com",
},
],
"post_setup": "agent_browser",
}
+16
View File
@@ -0,0 +1,16 @@
"""Firecrawl cloud browser plugin — bundled, auto-loaded.
Distinct from ``plugins/web/firecrawl/`` (the web search/extract/crawl
plugin); both share the FIRECRAWL_API_KEY but speak to different endpoints
(``/v2/browser`` here vs ``/v2/search`` / ``/v2/scrape`` / ``/v2/crawl``
over there).
"""
from __future__ import annotations
from plugins.browser.firecrawl.provider import FirecrawlBrowserProvider
def register(ctx) -> None:
"""Register the Firecrawl cloud-browser provider with the plugin context."""
ctx.register_browser_provider(FirecrawlBrowserProvider())
+7
View File
@@ -0,0 +1,7 @@
name: browser-firecrawl
version: 1.0.0
description: "Firecrawl (https://firecrawl.dev) cloud browser backend. Requires FIRECRAWL_API_KEY. Distinct from the firecrawl WEB search/extract plugin — the two share an API key but operate on different endpoints."
author: NousResearch
kind: backend
provides_browser_providers:
- firecrawl
+162
View File
@@ -0,0 +1,162 @@
"""Firecrawl cloud browser provider — plugin form.
Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
ABC introduced in PR #25214). The legacy in-tree module
``tools.browser_providers.firecrawl`` was removed in the same PR; this file
is now the canonical implementation.
This is the cloud-browser path — distinct from the firecrawl WEB plugin at
``plugins/web/firecrawl/`` which handles search/extract/crawl on
``/v2/search`` / ``/v2/scrape`` / ``/v2/crawl``. The two plugins share the
``FIRECRAWL_API_KEY`` env var but talk to different endpoints (this one
hits ``/v2/browser``).
Config keys this provider responds to::
browser:
cloud_provider: "firecrawl" # explicit selection only — not in the
# legacy auto-detect walk
Auth env vars::
FIRECRAWL_API_KEY=... # https://firecrawl.dev
FIRECRAWL_API_URL=... # optional override (default https://api.firecrawl.dev)
FIRECRAWL_BROWSER_TTL=... # optional, default 300 seconds
"""
from __future__ import annotations
import logging
import os
import uuid
from typing import Any, Dict
import requests
from agent.browser_provider import BrowserProvider
logger = logging.getLogger(__name__)
_BASE_URL = "https://api.firecrawl.dev"
class FirecrawlBrowserProvider(BrowserProvider):
"""Firecrawl (https://firecrawl.dev) cloud browser backend.
Cloud-browser path only — search/extract/crawl live in the separate
``plugins/web/firecrawl/`` plugin.
"""
@property
def name(self) -> str:
return "firecrawl"
@property
def display_name(self) -> str:
return "Firecrawl"
def is_available(self) -> bool:
return bool(os.environ.get("FIRECRAWL_API_KEY"))
# ------------------------------------------------------------------
# Session lifecycle
# ------------------------------------------------------------------
def _api_url(self) -> str:
return os.environ.get("FIRECRAWL_API_URL", _BASE_URL)
def _headers(self) -> Dict[str, str]:
api_key = os.environ.get("FIRECRAWL_API_KEY")
if not api_key:
raise ValueError(
"FIRECRAWL_API_KEY environment variable is required. "
"Get your key at https://firecrawl.dev"
)
return {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}",
}
def create_session(self, task_id: str) -> Dict[str, object]:
ttl = int(os.environ.get("FIRECRAWL_BROWSER_TTL", "300"))
body: Dict[str, object] = {"ttl": ttl}
response = requests.post(
f"{self._api_url()}/v2/browser",
headers=self._headers(),
json=body,
timeout=30,
)
if not response.ok:
raise RuntimeError(
f"Failed to create Firecrawl browser session: "
f"{response.status_code} {response.text}"
)
data = response.json()
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
logger.info("Created Firecrawl browser session %s", session_name)
return {
"session_name": session_name,
"bb_session_id": data["id"],
"cdp_url": data["cdpUrl"],
"features": {"firecrawl": True},
}
def close_session(self, session_id: str) -> bool:
try:
response = requests.delete(
f"{self._api_url()}/v2/browser/{session_id}",
headers=self._headers(),
timeout=10,
)
if response.status_code in {200, 201, 204}:
logger.debug("Successfully closed Firecrawl session %s", session_id)
return True
else:
logger.warning(
"Failed to close Firecrawl session %s: HTTP %s - %s",
session_id,
response.status_code,
response.text[:200],
)
return False
except Exception as e:
logger.error("Exception closing Firecrawl session %s: %s", session_id, e)
return False
def emergency_cleanup(self, session_id: str) -> None:
try:
requests.delete(
f"{self._api_url()}/v2/browser/{session_id}",
headers=self._headers(),
timeout=5,
)
except ValueError:
logger.warning(
"Cannot emergency-cleanup Firecrawl session %s — missing credentials",
session_id,
)
except Exception as e:
logger.debug(
"Emergency cleanup failed for Firecrawl session %s: %s", session_id, e
)
def get_setup_schema(self) -> Dict[str, Any]:
return {
"name": "Firecrawl",
"badge": "paid",
"tag": "Cloud browser with remote execution",
"env_vars": [
{
"key": "FIRECRAWL_API_KEY",
"prompt": "Firecrawl API key",
"url": "https://firecrawl.dev",
},
],
"post_setup": "agent_browser",
}