feat: add n8n serve node and harden remote access

- Add the n8n community node package with credentials, command mapping, direct serve TCP client, and browser-cli protocol crypto helpers. - Cover Ed25519 signing, canonical JSON, PQ transport encryption, request mapping, and security behavior with unit tests. - Harden serve-http with per-address rate limiting, an 8 MB request body cap, and clear warnings when binding plain HTTP beyond loopback. - Stop one-shot --key overrides from being persisted automatically; document explicit remote trust and keep key-management behind the keys policy tier. - Make HTML-to-Markdown conversion safer by bounding tree depth and dropping unsafe link/image URL schemes. - Bump package and extension release metadata to 0.16.3.
2026-06-19 10:00:23 +02:00
parent 7fe0e27fec
commit cea8a7e994
28 changed files with 3687 additions and 164 deletions
@@ -11,6 +11,13 @@ class _HtmlNode:
    self.text = text
    self.children = []

+# Cap how deep the parsed tree may nest. Hostile page content (thousands of
+# nested elements) would otherwise blow Python's recursion limit in the
+# depth-first render walkers below. Bounding here protects every walker at once.
+# 200 levels is far beyond any real document; deeper content is flattened, not
+# dropped (its text still reaches the output).
+_MAX_TREE_DEPTH = 200
+
 class _HtmlTreeBuilder(HTMLParser):
  _VOID_TAGS = {"br", "hr", "img"}

@@ -22,7 +29,9 @@ class _HtmlTreeBuilder(HTMLParser):
  def handle_starttag(self, tag, attrs):
    node = _HtmlNode(tag=tag.lower(), attrs=dict(attrs))
    self._stack[-1].children.append(node)
-    if node.tag not in self._VOID_TAGS:
+    # Only descend while under the depth cap; beyond it, children of this node
+    # attach to the current (capped) parent — flattened but preserved.
+    if node.tag not in self._VOID_TAGS and len(self._stack) < _MAX_TREE_DEPTH:
      self._stack.append(node)

  def handle_startendtag(self, tag, attrs):
@@ -57,6 +66,14 @@ def _collapse_blank_lines(value):
 def _escape_markdown(text):
  return re.sub(r"([\\`[\]])", r"\\\1", text)

+# Schemes that are dangerous if the produced markdown is later rendered as HTML
+# by a downstream consumer. The output is plain text here, but neutralising them
+# keeps the converter from laundering an XSS payload through to such a consumer.
+_UNSAFE_URL_SCHEME = re.compile(r"^\s*(?:javascript|vbscript|data)\s*:", re.IGNORECASE)
+
+def _safe_url(url):
+  return "" if _UNSAFE_URL_SCHEME.match(url or "") else url
+
 def _escape_table_cell(text):
  return text.replace("|", r"\|").replace("\n", " ").strip()

@@ -86,14 +103,14 @@ def _inline_text(node):
  if tag == "br":
    return "\n"
  if tag == "img":
-    src = node.attrs.get("src") or ""
+    src = _safe_url(node.attrs.get("src") or "")
    alt = _normalize_text(node.attrs.get("alt") or "")
    if not src:
      return ""
    return f"![{_escape_markdown(alt)}]({src})" if alt else f"![]({src})"
  if tag == "a":
    text = _normalize_inline("".join(_inline_text(child) for child in node.children))
-    href = node.attrs.get("href") or ""
+    href = _safe_url(node.attrs.get("href") or "")
    return f"[{text or href}]({href})" if href else text
  if tag == "code":
    text = _normalize_inline("".join(_inline_text(child) for child in node.children))
@@ -235,5 +252,10 @@ def _block_to_markdown(node):
 def convert_html_to_markdown(html, clean_markdown_output):
  parser = _HtmlTreeBuilder()
  parser.feed(html or "")
-  markdown = _block_to_markdown(parser.root)
+  try:
+    markdown = _block_to_markdown(parser.root)
+  except RecursionError:
+    # The depth cap should prevent this, but never let hostile page content
+    # crash the caller: fall back to a flat, tag-stripped text extraction.
+    markdown = _normalize_inline(re.sub(r"<[^>]*>", " ", html or ""))
  return clean_markdown_output(markdown)