feat: add n8n serve node and harden remote access

- Add the n8n community node package with credentials, command mapping, direct serve TCP client, and browser-cli protocol crypto helpers.

- Cover Ed25519 signing, canonical JSON, PQ transport encryption, request mapping, and security behavior with unit tests.

- Harden serve-http with per-address rate limiting, an 8 MB request body cap, and clear warnings when binding plain HTTP beyond loopback.

- Stop one-shot --key overrides from being persisted automatically; document explicit remote trust and keep key-management behind the keys policy tier.

- Make HTML-to-Markdown conversion safer by bounding tree depth and dropping unsafe link/image URL schemes.

- Bump package and extension release metadata to 0.16.3.
This commit is contained in:
2026-06-19 10:00:23 +02:00
parent 7fe0e27fec
commit cea8a7e994
28 changed files with 3687 additions and 164 deletions
-4
View File
@@ -35,8 +35,6 @@ def add_remote_auth_fields(msg: dict, command: str, requested_profile: str | Non
msg["accept_encoding"] = transport.client_accept_encoding()
key_spec = key if key is not None else remote_registry.key_for_remote(remote_endpoint)
private_key = load_private_key(key_spec)
if key is not None:
remote_registry.save_remote_key(remote_endpoint, str(key))
route_profile = requested_profile
if not route_profile and command not in NO_ROUTE_COMMANDS:
@@ -52,8 +50,6 @@ async def add_remote_auth_fields_async(msg: dict, command: str, requested_profil
msg["accept_encoding"] = transport.client_accept_encoding()
key_spec = key if key is not None else await asyncio.to_thread(remote_registry.key_for_remote, remote_endpoint)
private_key = await asyncio.to_thread(load_private_key, key_spec)
if key is not None:
await asyncio.to_thread(remote_registry.save_remote_key, remote_endpoint, str(key))
route_profile = requested_profile
if not route_profile and command not in NO_ROUTE_COMMANDS:
+43 -13
View File
@@ -11,9 +11,13 @@ from rich.console import Console
from browser_cli import BrowserCLI
from browser_cli.command_security import CommandPolicy, assert_command_allowed
from browser_cli.commands import command_policy_from_options, command_policy_options
from browser_cli.serve.security import RateLimiter
console = Console()
# Hard cap on request body size so a bogus Content-Length can't exhaust memory.
MAX_BODY_BYTES = 8 * 1024 * 1024
def _is_loopback(host: str) -> bool:
return host in {"127.0.0.1", "localhost", "::1"}
@@ -21,6 +25,7 @@ class _Handler(BaseHTTPRequestHandler):
client: BrowserCLI
token: str | None = None
policy: CommandPolicy = CommandPolicy()
rate_limiter: RateLimiter | None = None
def _authorized(self) -> bool:
if self.token is None:
@@ -37,6 +42,12 @@ class _Handler(BaseHTTPRequestHandler):
self._send(401, {"error": "missing or invalid token"})
return False
def _within_rate_limit(self) -> bool:
if self.rate_limiter is None or self.rate_limiter.allow(self.client_address[0]):
return True
self._send(429, {"error": "rate limit exceeded; slow down and retry"})
return False
def _send(self, status: int, payload):
raw = json.dumps(payload, default=str).encode("utf-8")
self.send_response(status)
@@ -48,8 +59,11 @@ class _Handler(BaseHTTPRequestHandler):
def do_GET(self):
path = urlparse(self.path).path
try:
if path != "/health" and not self._require_auth():
return
if path != "/health":
if not self._require_auth():
return
if not self._within_rate_limit():
return
if path == "/tabs":
self._send(200, [t.__dict__ for t in self.client.tabs.list()])
elif path == "/clients":
@@ -64,16 +78,21 @@ class _Handler(BaseHTTPRequestHandler):
def do_POST(self):
path = urlparse(self.path).path
try:
length = int(self.headers.get("Content-Length", "0"))
body = json.loads(self.rfile.read(length) or b"{}")
if path == "/command":
if not self._require_auth():
return
command = body.get("command")
assert_command_allowed(command, self.policy)
self._send(200, {"result": self.client.command(command, body.get("args") or {})})
else:
if path != "/command":
self._send(404, {"error": "not found"})
return
if not self._require_auth():
return
if not self._within_rate_limit():
return
length = int(self.headers.get("Content-Length", "0"))
if length > MAX_BODY_BYTES:
self._send(413, {"error": f"request body too large (max {MAX_BODY_BYTES} bytes)"})
return
body = json.loads(self.rfile.read(length) or b"{}")
command = body.get("command")
assert_command_allowed(command, self.policy)
self._send(200, {"result": self.client.command(command, body.get("args") or {})})
except PermissionError as exc:
self._send(403, {"error": str(exc)})
except Exception as exc:
@@ -90,21 +109,32 @@ class _Handler(BaseHTTPRequestHandler):
@click.option("--key", default=None, help="Remote auth key spec")
@click.option("--token", default=None, help="Bearer token required for HTTP access (generated by default)")
@click.option("--no-auth", is_flag=True, help="Disable HTTP auth (only allowed on loopback hosts)")
@click.option("--rate-limit", default=100.0, show_default=True, type=float, help="Max requests/sec per client address (0 disables)")
@command_policy_options
def cmd_serve_http(host, port, browser, remote, key, token, no_auth, allow_read_page, allow_control, allow_dangerous, allow_keys, allow_all):
def cmd_serve_http(host, port, browser, remote, key, token, no_auth, rate_limit, allow_read_page, allow_control, allow_dangerous, allow_keys, allow_all):
"""Expose a tiny local HTTP JSON gateway (/tabs, /clients, /command).
Auth is enabled by default. Pass the printed token as either
``Authorization: Bearer <token>`` or ``X-Browser-CLI-Token: <token>``.
This gateway speaks plain HTTP — the token is sent in clear text. Keep it on
loopback, or put a TLS-terminating reverse proxy in front before exposing it.
"""
if no_auth and not _is_loopback(host):
raise click.ClickException("--no-auth is only allowed on loopback hosts")
if not _is_loopback(host):
console.print(
"[yellow]Warning:[/yellow] binding beyond loopback — this gateway is plain HTTP and the "
"token travels in clear text. Put a TLS-terminating reverse proxy in front, or use "
"[bold]browser-cli serve[/bold] (encrypted) instead."
)
auth_token = None if no_auth else (token or secrets.token_urlsafe(32))
policy = command_policy_from_options(allow_read_page=allow_read_page, allow_control=allow_control, allow_dangerous=allow_dangerous, allow_keys=allow_keys, allow_all=allow_all)
rate_limiter = RateLimiter(rate_limit) if rate_limit and rate_limit > 0 else None
handler = type(
"BrowserCLIHTTPHandler",
(_Handler,),
{"client": BrowserCLI(browser=browser, remote=remote, key=key), "token": auth_token, "policy": policy},
{"client": BrowserCLI(browser=browser, remote=remote, key=key), "token": auth_token, "policy": policy, "rate_limiter": rate_limiter},
)
server = ThreadingHTTPServer((host, port), handler)
console.print(f"[green]HTTP gateway listening on http://{host}:{port}[/green]")
+26 -4
View File
@@ -11,6 +11,13 @@ class _HtmlNode:
self.text = text
self.children = []
# Cap how deep the parsed tree may nest. Hostile page content (thousands of
# nested elements) would otherwise blow Python's recursion limit in the
# depth-first render walkers below. Bounding here protects every walker at once.
# 200 levels is far beyond any real document; deeper content is flattened, not
# dropped (its text still reaches the output).
_MAX_TREE_DEPTH = 200
class _HtmlTreeBuilder(HTMLParser):
_VOID_TAGS = {"br", "hr", "img"}
@@ -22,7 +29,9 @@ class _HtmlTreeBuilder(HTMLParser):
def handle_starttag(self, tag, attrs):
node = _HtmlNode(tag=tag.lower(), attrs=dict(attrs))
self._stack[-1].children.append(node)
if node.tag not in self._VOID_TAGS:
# Only descend while under the depth cap; beyond it, children of this node
# attach to the current (capped) parent — flattened but preserved.
if node.tag not in self._VOID_TAGS and len(self._stack) < _MAX_TREE_DEPTH:
self._stack.append(node)
def handle_startendtag(self, tag, attrs):
@@ -57,6 +66,14 @@ def _collapse_blank_lines(value):
def _escape_markdown(text):
return re.sub(r"([\\`[\]])", r"\\\1", text)
# Schemes that are dangerous if the produced markdown is later rendered as HTML
# by a downstream consumer. The output is plain text here, but neutralising them
# keeps the converter from laundering an XSS payload through to such a consumer.
_UNSAFE_URL_SCHEME = re.compile(r"^\s*(?:javascript|vbscript|data)\s*:", re.IGNORECASE)
def _safe_url(url):
return "" if _UNSAFE_URL_SCHEME.match(url or "") else url
def _escape_table_cell(text):
return text.replace("|", r"\|").replace("\n", " ").strip()
@@ -86,14 +103,14 @@ def _inline_text(node):
if tag == "br":
return "\n"
if tag == "img":
src = node.attrs.get("src") or ""
src = _safe_url(node.attrs.get("src") or "")
alt = _normalize_text(node.attrs.get("alt") or "")
if not src:
return ""
return f"![{_escape_markdown(alt)}]({src})" if alt else f"![]({src})"
if tag == "a":
text = _normalize_inline("".join(_inline_text(child) for child in node.children))
href = node.attrs.get("href") or ""
href = _safe_url(node.attrs.get("href") or "")
return f"[{text or href}]({href})" if href else text
if tag == "code":
text = _normalize_inline("".join(_inline_text(child) for child in node.children))
@@ -235,5 +252,10 @@ def _block_to_markdown(node):
def convert_html_to_markdown(html, clean_markdown_output):
parser = _HtmlTreeBuilder()
parser.feed(html or "")
markdown = _block_to_markdown(parser.root)
try:
markdown = _block_to_markdown(parser.root)
except RecursionError:
# The depth cap should prevent this, but never let hostile page content
# crash the caller: fall back to a flat, tag-stripped text extraction.
markdown = _normalize_inline(re.sub(r"<[^>]*>", " ", html or ""))
return clean_markdown_output(markdown)