feat: add n8n serve node and harden remote access

- Add the n8n community node package with credentials, command mapping, direct serve TCP client, and browser-cli protocol crypto helpers. - Cover Ed25519 signing, canonical JSON, PQ transport encryption, request mapping, and security behavior with unit tests. - Harden serve-http with per-address rate limiting, an 8 MB request body cap, and clear warnings when binding plain HTTP beyond loopback. - Stop one-shot --key overrides from being persisted automatically; document explicit remote trust and keep key-management behind the keys policy tier. - Make HTML-to-Markdown conversion safer by bounding tree depth and dropping unsafe link/image URL schemes. - Bump package and extension release metadata to 0.16.3.
2026-06-19 10:00:23 +02:00
parent 7fe0e27fec
commit cea8a7e994
28 changed files with 3687 additions and 164 deletions
@@ -35,8 +35,6 @@ def add_remote_auth_fields(msg: dict, command: str, requested_profile: str | Non
  msg["accept_encoding"] = transport.client_accept_encoding()
  key_spec = key if key is not None else remote_registry.key_for_remote(remote_endpoint)
  private_key = load_private_key(key_spec)
-  if key is not None:
-    remote_registry.save_remote_key(remote_endpoint, str(key))

  route_profile = requested_profile
  if not route_profile and command not in NO_ROUTE_COMMANDS:
@@ -52,8 +50,6 @@ async def add_remote_auth_fields_async(msg: dict, command: str, requested_profil
  msg["accept_encoding"] = transport.client_accept_encoding()
  key_spec = key if key is not None else await asyncio.to_thread(remote_registry.key_for_remote, remote_endpoint)
  private_key = await asyncio.to_thread(load_private_key, key_spec)
-  if key is not None:
-    await asyncio.to_thread(remote_registry.save_remote_key, remote_endpoint, str(key))

  route_profile = requested_profile
  if not route_profile and command not in NO_ROUTE_COMMANDS:
@@ -11,9 +11,13 @@ from rich.console import Console
 from browser_cli import BrowserCLI
 from browser_cli.command_security import CommandPolicy, assert_command_allowed
 from browser_cli.commands import command_policy_from_options, command_policy_options
+from browser_cli.serve.security import RateLimiter

 console = Console()

+# Hard cap on request body size so a bogus Content-Length can't exhaust memory.
+MAX_BODY_BYTES = 8 * 1024 * 1024
+
 def _is_loopback(host: str) -> bool:
  return host in {"127.0.0.1", "localhost", "::1"}

@@ -21,6 +25,7 @@ class _Handler(BaseHTTPRequestHandler):
  client: BrowserCLI
  token: str | None = None
  policy: CommandPolicy = CommandPolicy()
+  rate_limiter: RateLimiter | None = None

  def _authorized(self) -> bool:
    if self.token is None:
@@ -37,6 +42,12 @@ class _Handler(BaseHTTPRequestHandler):
    self._send(401, {"error": "missing or invalid token"})
    return False

+  def _within_rate_limit(self) -> bool:
+    if self.rate_limiter is None or self.rate_limiter.allow(self.client_address[0]):
+      return True
+    self._send(429, {"error": "rate limit exceeded; slow down and retry"})
+    return False
+
  def _send(self, status: int, payload):
    raw = json.dumps(payload, default=str).encode("utf-8")
    self.send_response(status)
@@ -48,8 +59,11 @@ class _Handler(BaseHTTPRequestHandler):
  def do_GET(self):
    path = urlparse(self.path).path
    try:
-      if path != "/health" and not self._require_auth():
-        return
+      if path != "/health":
+        if not self._require_auth():
+          return
+        if not self._within_rate_limit():
+          return
      if path == "/tabs":
        self._send(200, [t.__dict__ for t in self.client.tabs.list()])
      elif path == "/clients":
@@ -64,16 +78,21 @@ class _Handler(BaseHTTPRequestHandler):
  def do_POST(self):
    path = urlparse(self.path).path
    try:
-      length = int(self.headers.get("Content-Length", "0"))
-      body = json.loads(self.rfile.read(length) or b"{}")
-      if path == "/command":
-        if not self._require_auth():
-          return
-        command = body.get("command")
-        assert_command_allowed(command, self.policy)
-        self._send(200, {"result": self.client.command(command, body.get("args") or {})})
-      else:
+      if path != "/command":
        self._send(404, {"error": "not found"})
+        return
+      if not self._require_auth():
+        return
+      if not self._within_rate_limit():
+        return
+      length = int(self.headers.get("Content-Length", "0"))
+      if length > MAX_BODY_BYTES:
+        self._send(413, {"error": f"request body too large (max {MAX_BODY_BYTES} bytes)"})
+        return
+      body = json.loads(self.rfile.read(length) or b"{}")
+      command = body.get("command")
+      assert_command_allowed(command, self.policy)
+      self._send(200, {"result": self.client.command(command, body.get("args") or {})})
    except PermissionError as exc:
      self._send(403, {"error": str(exc)})
    except Exception as exc:
@@ -90,21 +109,32 @@ class _Handler(BaseHTTPRequestHandler):
@click.option("--key", default=None, help="Remote auth key spec")
@click.option("--token", default=None, help="Bearer token required for HTTP access (generated by default)")
@click.option("--no-auth", is_flag=True, help="Disable HTTP auth (only allowed on loopback hosts)")
+@click.option("--rate-limit", default=100.0, show_default=True, type=float, help="Max requests/sec per client address (0 disables)")
@command_policy_options
-def cmd_serve_http(host, port, browser, remote, key, token, no_auth, allow_read_page, allow_control, allow_dangerous, allow_keys, allow_all):
+def cmd_serve_http(host, port, browser, remote, key, token, no_auth, rate_limit, allow_read_page, allow_control, allow_dangerous, allow_keys, allow_all):
  """Expose a tiny local HTTP JSON gateway (/tabs, /clients, /command).

  Auth is enabled by default. Pass the printed token as either
  ``Authorization: Bearer <token>`` or ``X-Browser-CLI-Token: <token>``.
+
+  This gateway speaks plain HTTP — the token is sent in clear text. Keep it on
+  loopback, or put a TLS-terminating reverse proxy in front before exposing it.
  """
  if no_auth and not _is_loopback(host):
    raise click.ClickException("--no-auth is only allowed on loopback hosts")
+  if not _is_loopback(host):
+    console.print(
+      "[yellow]Warning:[/yellow] binding beyond loopback — this gateway is plain HTTP and the "
+      "token travels in clear text. Put a TLS-terminating reverse proxy in front, or use "
+      "[bold]browser-cli serve[/bold] (encrypted) instead."
+    )
  auth_token = None if no_auth else (token or secrets.token_urlsafe(32))
  policy = command_policy_from_options(allow_read_page=allow_read_page, allow_control=allow_control, allow_dangerous=allow_dangerous, allow_keys=allow_keys, allow_all=allow_all)
+  rate_limiter = RateLimiter(rate_limit) if rate_limit and rate_limit > 0 else None
  handler = type(
    "BrowserCLIHTTPHandler",
    (_Handler,),
-    {"client": BrowserCLI(browser=browser, remote=remote, key=key), "token": auth_token, "policy": policy},
+    {"client": BrowserCLI(browser=browser, remote=remote, key=key), "token": auth_token, "policy": policy, "rate_limiter": rate_limiter},
  )
  server = ThreadingHTTPServer((host, port), handler)
  console.print(f"[green]HTTP gateway listening on http://{host}:{port}[/green]")
@@ -11,6 +11,13 @@ class _HtmlNode:
    self.text = text
    self.children = []

+# Cap how deep the parsed tree may nest. Hostile page content (thousands of
+# nested elements) would otherwise blow Python's recursion limit in the
+# depth-first render walkers below. Bounding here protects every walker at once.
+# 200 levels is far beyond any real document; deeper content is flattened, not
+# dropped (its text still reaches the output).
+_MAX_TREE_DEPTH = 200
+
 class _HtmlTreeBuilder(HTMLParser):
  _VOID_TAGS = {"br", "hr", "img"}

@@ -22,7 +29,9 @@ class _HtmlTreeBuilder(HTMLParser):
  def handle_starttag(self, tag, attrs):
    node = _HtmlNode(tag=tag.lower(), attrs=dict(attrs))
    self._stack[-1].children.append(node)
-    if node.tag not in self._VOID_TAGS:
+    # Only descend while under the depth cap; beyond it, children of this node
+    # attach to the current (capped) parent — flattened but preserved.
+    if node.tag not in self._VOID_TAGS and len(self._stack) < _MAX_TREE_DEPTH:
      self._stack.append(node)

  def handle_startendtag(self, tag, attrs):
@@ -57,6 +66,14 @@ def _collapse_blank_lines(value):
 def _escape_markdown(text):
  return re.sub(r"([\\`[\]])", r"\\\1", text)

+# Schemes that are dangerous if the produced markdown is later rendered as HTML
+# by a downstream consumer. The output is plain text here, but neutralising them
+# keeps the converter from laundering an XSS payload through to such a consumer.
+_UNSAFE_URL_SCHEME = re.compile(r"^\s*(?:javascript|vbscript|data)\s*:", re.IGNORECASE)
+
+def _safe_url(url):
+  return "" if _UNSAFE_URL_SCHEME.match(url or "") else url
+
 def _escape_table_cell(text):
  return text.replace("|", r"\|").replace("\n", " ").strip()

@@ -86,14 +103,14 @@ def _inline_text(node):
  if tag == "br":
    return "\n"
  if tag == "img":
-    src = node.attrs.get("src") or ""
+    src = _safe_url(node.attrs.get("src") or "")
    alt = _normalize_text(node.attrs.get("alt") or "")
    if not src:
      return ""
    return f"![{_escape_markdown(alt)}]({src})" if alt else f"![]({src})"
  if tag == "a":
    text = _normalize_inline("".join(_inline_text(child) for child in node.children))
-    href = node.attrs.get("href") or ""
+    href = _safe_url(node.attrs.get("href") or "")
    return f"[{text or href}]({href})" if href else text
  if tag == "code":
    text = _normalize_inline("".join(_inline_text(child) for child in node.children))
@@ -235,5 +252,10 @@ def _block_to_markdown(node):
 def convert_html_to_markdown(html, clean_markdown_output):
  parser = _HtmlTreeBuilder()
  parser.feed(html or "")
-  markdown = _block_to_markdown(parser.root)
+  try:
+    markdown = _block_to_markdown(parser.root)
+  except RecursionError:
+    # The depth cap should prevent this, but never let hostile page content
+    # crash the caller: fall back to a flat, tag-stripped text extraction.
+    markdown = _normalize_inline(re.sub(r"<[^>]*>", " ", html or ""))
  return clean_markdown_output(markdown)