mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-21 03:39:54 +00:00
f36c89cd57
PR #25580 was authored before #2746 landed on main, so its plugin
versions of browser_use/browserbase/firecrawl ship without the
requests.RequestException → RuntimeError wrapping that 13c72fb4 added
to the legacy tools/browser_providers/ files for #2746. Cherry-picking
the PR + git rm'ing the legacy files (the migration's intent) would
silently revert that network-error fix.
Port the same try/except pattern into the three plugin create_session()
methods. Browser Use managed-mode keeps its raw-exception propagation
(idempotency-key retry semantics).
Co-authored-by: nidhi-singh02 <nidhi2894@gmail.com>
311 lines
11 KiB
Python
311 lines
11 KiB
Python
"""Browser Use cloud browser provider — plugin form.
|
|
|
|
Subclasses :class:`agent.browser_provider.BrowserProvider` (the plugin-facing
|
|
ABC introduced in PR #25214). The legacy in-tree module
|
|
``tools.browser_providers.browser_use`` was removed in the same PR; this file
|
|
is now the canonical implementation.
|
|
|
|
Browser Use is the only browser backend with dual auth: a direct
|
|
``BROWSER_USE_API_KEY`` for self-billed users, or the managed Nous tool
|
|
gateway (which Hermes uses to bill Browser Use sessions to a Nous
|
|
subscription). The dispatch order — direct API key first, managed gateway
|
|
second — preserves the pre-migration behaviour in
|
|
``tools.browser_providers.browser_use.BrowserUseProvider._get_config_or_none``.
|
|
|
|
Config keys this provider responds to::
|
|
|
|
browser:
|
|
cloud_provider: "browser-use" # explicit selection
|
|
tool_gateway:
|
|
browser: "gateway" # optional: prefer managed gateway
|
|
# even when BROWSER_USE_API_KEY is set
|
|
|
|
Auth env vars (one of)::
|
|
|
|
BROWSER_USE_API_KEY=... # https://browser-use.com
|
|
# OR a managed Nous gateway entry (configured via 'hermes setup')
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
import threading
|
|
import uuid
|
|
from typing import Any, Dict, Optional
|
|
|
|
import requests
|
|
|
|
from agent.browser_provider import BrowserProvider
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Idempotency tracking for managed-mode session creation. The managed Nous
|
|
# gateway returns 409 "already in progress" on retried POSTs; we forward the
|
|
# original idempotency key so the gateway can deduplicate. Cleared on
|
|
# success or terminal failure.
|
|
_pending_create_keys: Dict[str, str] = {}
|
|
_pending_create_keys_lock = threading.Lock()
|
|
|
|
_BASE_URL = "https://api.browser-use.com/api/v3"
|
|
_DEFAULT_MANAGED_TIMEOUT_MINUTES = 5
|
|
_DEFAULT_MANAGED_PROXY_COUNTRY_CODE = "us"
|
|
|
|
|
|
def _get_or_create_pending_create_key(task_id: str) -> str:
|
|
with _pending_create_keys_lock:
|
|
existing = _pending_create_keys.get(task_id)
|
|
if existing:
|
|
return existing
|
|
|
|
created = f"browser-use-session-create:{uuid.uuid4().hex}"
|
|
_pending_create_keys[task_id] = created
|
|
return created
|
|
|
|
|
|
def _clear_pending_create_key(task_id: str) -> None:
|
|
with _pending_create_keys_lock:
|
|
_pending_create_keys.pop(task_id, None)
|
|
|
|
|
|
def _should_preserve_pending_create_key(response: requests.Response) -> bool:
|
|
"""Decide whether to keep the idempotency key after a failed create.
|
|
|
|
Preserve the key when the failure looks retryable (5xx) OR when the
|
|
gateway reports the original request is still in flight (409 "already
|
|
in progress") — in either case, retrying with the same key lets the
|
|
gateway deduplicate.
|
|
|
|
Drop the key on any other 4xx (auth failure, bad request, etc.) — those
|
|
won't succeed by being retried.
|
|
"""
|
|
if response.status_code >= 500:
|
|
return True
|
|
|
|
if response.status_code != 409:
|
|
return False
|
|
|
|
try:
|
|
payload = response.json()
|
|
except Exception:
|
|
return False
|
|
|
|
if not isinstance(payload, dict):
|
|
return False
|
|
|
|
error = payload.get("error")
|
|
if not isinstance(error, dict):
|
|
return False
|
|
|
|
message = str(error.get("message") or "").lower()
|
|
return "already in progress" in message
|
|
|
|
|
|
class BrowserUseBrowserProvider(BrowserProvider):
|
|
"""Browser Use (https://browser-use.com) cloud browser backend.
|
|
|
|
Dual auth: prefers a direct BROWSER_USE_API_KEY when set, falling back
|
|
to the managed Nous tool gateway when ``tool_gateway.browser`` config
|
|
routes through it. Setting ``tool_gateway.browser: gateway`` flips the
|
|
order so managed billing wins even when BROWSER_USE_API_KEY is present.
|
|
"""
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "browser-use"
|
|
|
|
@property
|
|
def display_name(self) -> str:
|
|
return "Browser Use"
|
|
|
|
def is_available(self) -> bool:
|
|
return self._get_config_or_none() is not None
|
|
|
|
# ------------------------------------------------------------------
|
|
# Config resolution (direct API key OR managed Nous gateway)
|
|
# ------------------------------------------------------------------
|
|
|
|
def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
|
|
# Import here to avoid a hard dependency at module-import time —
|
|
# managed_tool_gateway pulls in the Nous auth stack which can be
|
|
# heavy and is not needed for direct-API-key users.
|
|
from tools.managed_tool_gateway import resolve_managed_tool_gateway
|
|
from tools.tool_backend_helpers import prefers_gateway
|
|
|
|
# Direct API key wins unless the user has explicitly opted into the
|
|
# managed Nous gateway via ``tool_gateway.browser: gateway``.
|
|
api_key = os.environ.get("BROWSER_USE_API_KEY")
|
|
if api_key and not prefers_gateway("browser"):
|
|
return {
|
|
"api_key": api_key,
|
|
"base_url": _BASE_URL,
|
|
"managed_mode": False,
|
|
}
|
|
|
|
managed = resolve_managed_tool_gateway("browser-use")
|
|
if managed is None:
|
|
return None
|
|
|
|
return {
|
|
"api_key": managed.nous_user_token,
|
|
"base_url": managed.gateway_origin.rstrip("/"),
|
|
"managed_mode": True,
|
|
}
|
|
|
|
def _get_config(self) -> Dict[str, Any]:
|
|
from tools.tool_backend_helpers import managed_nous_tools_enabled
|
|
|
|
config = self._get_config_or_none()
|
|
if config is None:
|
|
message = (
|
|
"Browser Use requires a direct BROWSER_USE_API_KEY credential."
|
|
)
|
|
if managed_nous_tools_enabled():
|
|
message = (
|
|
"Browser Use requires either a direct BROWSER_USE_API_KEY "
|
|
"credential or a managed Browser Use gateway configuration."
|
|
)
|
|
raise ValueError(message)
|
|
return config
|
|
|
|
# ------------------------------------------------------------------
|
|
# Session lifecycle
|
|
# ------------------------------------------------------------------
|
|
|
|
def _headers(self, config: Dict[str, Any]) -> Dict[str, str]:
|
|
return {
|
|
"Content-Type": "application/json",
|
|
"X-Browser-Use-API-Key": config["api_key"],
|
|
}
|
|
|
|
def create_session(self, task_id: str) -> Dict[str, object]:
|
|
config = self._get_config()
|
|
managed_mode = bool(config.get("managed_mode"))
|
|
|
|
headers = self._headers(config)
|
|
if managed_mode:
|
|
headers["X-Idempotency-Key"] = _get_or_create_pending_create_key(task_id)
|
|
|
|
# Keep gateway-backed sessions short so billing authorization does not
|
|
# default to a long Browser-Use timeout when Hermes only needs a task-
|
|
# scoped ephemeral browser.
|
|
payload = (
|
|
{
|
|
"timeout": _DEFAULT_MANAGED_TIMEOUT_MINUTES,
|
|
"proxyCountryCode": _DEFAULT_MANAGED_PROXY_COUNTRY_CODE,
|
|
}
|
|
if managed_mode
|
|
else {}
|
|
)
|
|
|
|
try:
|
|
response = requests.post(
|
|
f"{config['base_url']}/browsers",
|
|
headers=headers,
|
|
json=payload,
|
|
timeout=30,
|
|
)
|
|
except requests.RequestException as exc:
|
|
# Managed mode: propagate raw so callers can retry with the
|
|
# preserved idempotency key. Direct mode: wrap network failures
|
|
# into a clean RuntimeError for end users.
|
|
if managed_mode:
|
|
raise
|
|
raise RuntimeError(
|
|
f"Browser Use API connection failed: {exc}"
|
|
) from exc
|
|
|
|
if not response.ok:
|
|
if managed_mode and not _should_preserve_pending_create_key(response):
|
|
_clear_pending_create_key(task_id)
|
|
raise RuntimeError(
|
|
f"Failed to create Browser Use session: "
|
|
f"{response.status_code} {response.text}"
|
|
)
|
|
|
|
session_data = response.json()
|
|
if managed_mode:
|
|
_clear_pending_create_key(task_id)
|
|
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
|
|
external_call_id = (
|
|
response.headers.get("x-external-call-id") if managed_mode else None
|
|
)
|
|
|
|
logger.info("Created Browser Use session %s", session_name)
|
|
|
|
cdp_url = session_data.get("cdpUrl") or session_data.get("connectUrl") or ""
|
|
|
|
return {
|
|
"session_name": session_name,
|
|
"bb_session_id": session_data["id"],
|
|
"cdp_url": cdp_url,
|
|
"features": {"browser_use": True},
|
|
"external_call_id": external_call_id,
|
|
}
|
|
|
|
def close_session(self, session_id: str) -> bool:
|
|
try:
|
|
config = self._get_config()
|
|
except ValueError:
|
|
logger.warning(
|
|
"Cannot close Browser Use session %s — missing credentials", session_id
|
|
)
|
|
return False
|
|
|
|
try:
|
|
response = requests.patch(
|
|
f"{config['base_url']}/browsers/{session_id}",
|
|
headers=self._headers(config),
|
|
json={"action": "stop"},
|
|
timeout=10,
|
|
)
|
|
if response.status_code in {200, 201, 204}:
|
|
logger.debug("Successfully closed Browser Use session %s", session_id)
|
|
return True
|
|
else:
|
|
logger.warning(
|
|
"Failed to close Browser Use session %s: HTTP %s - %s",
|
|
session_id,
|
|
response.status_code,
|
|
response.text[:200],
|
|
)
|
|
return False
|
|
except Exception as e:
|
|
logger.error("Exception closing Browser Use session %s: %s", session_id, e)
|
|
return False
|
|
|
|
def emergency_cleanup(self, session_id: str) -> None:
|
|
config = self._get_config_or_none()
|
|
if config is None:
|
|
logger.warning(
|
|
"Cannot emergency-cleanup Browser Use session %s — missing credentials",
|
|
session_id,
|
|
)
|
|
return
|
|
try:
|
|
requests.patch(
|
|
f"{config['base_url']}/browsers/{session_id}",
|
|
headers=self._headers(config),
|
|
json={"action": "stop"},
|
|
timeout=5,
|
|
)
|
|
except Exception as e:
|
|
logger.debug(
|
|
"Emergency cleanup failed for Browser Use session %s: %s", session_id, e
|
|
)
|
|
|
|
def get_setup_schema(self) -> Dict[str, Any]:
|
|
return {
|
|
"name": "Browser Use",
|
|
"badge": "paid",
|
|
"tag": "Cloud browser with remote execution",
|
|
"env_vars": [
|
|
{
|
|
"key": "BROWSER_USE_API_KEY",
|
|
"prompt": "Browser Use API key",
|
|
"url": "https://browser-use.com",
|
|
},
|
|
],
|
|
"post_setup": "agent_browser",
|
|
}
|