feat: add n8n serve node and harden remote access

- Add the n8n community node package with credentials, command mapping, direct serve TCP client, and browser-cli protocol crypto helpers.

- Cover Ed25519 signing, canonical JSON, PQ transport encryption, request mapping, and security behavior with unit tests.

- Harden serve-http with per-address rate limiting, an 8 MB request body cap, and clear warnings when binding plain HTTP beyond loopback.

- Stop one-shot --key overrides from being persisted automatically; document explicit remote trust and keep key-management behind the keys policy tier.

- Make HTML-to-Markdown conversion safer by bounding tree depth and dropping unsafe link/image URL schemes.

- Bump package and extension release metadata to 0.16.3.
This commit is contained in:
2026-06-19 10:00:23 +02:00
parent 7fe0e27fec
commit cea8a7e994
28 changed files with 3687 additions and 164 deletions
+26 -4
View File
@@ -11,6 +11,13 @@ class _HtmlNode:
self.text = text
self.children = []
# Cap how deep the parsed tree may nest. Hostile page content (thousands of
# nested elements) would otherwise blow Python's recursion limit in the
# depth-first render walkers below. Bounding here protects every walker at once.
# 200 levels is far beyond any real document; deeper content is flattened, not
# dropped (its text still reaches the output).
_MAX_TREE_DEPTH = 200
class _HtmlTreeBuilder(HTMLParser):
_VOID_TAGS = {"br", "hr", "img"}
@@ -22,7 +29,9 @@ class _HtmlTreeBuilder(HTMLParser):
def handle_starttag(self, tag, attrs):
node = _HtmlNode(tag=tag.lower(), attrs=dict(attrs))
self._stack[-1].children.append(node)
if node.tag not in self._VOID_TAGS:
# Only descend while under the depth cap; beyond it, children of this node
# attach to the current (capped) parent — flattened but preserved.
if node.tag not in self._VOID_TAGS and len(self._stack) < _MAX_TREE_DEPTH:
self._stack.append(node)
def handle_startendtag(self, tag, attrs):
@@ -57,6 +66,14 @@ def _collapse_blank_lines(value):
def _escape_markdown(text):
return re.sub(r"([\\`[\]])", r"\\\1", text)
# Schemes that are dangerous if the produced markdown is later rendered as HTML
# by a downstream consumer. The output is plain text here, but neutralising them
# keeps the converter from laundering an XSS payload through to such a consumer.
_UNSAFE_URL_SCHEME = re.compile(r"^\s*(?:javascript|vbscript|data)\s*:", re.IGNORECASE)
def _safe_url(url):
return "" if _UNSAFE_URL_SCHEME.match(url or "") else url
def _escape_table_cell(text):
return text.replace("|", r"\|").replace("\n", " ").strip()
@@ -86,14 +103,14 @@ def _inline_text(node):
if tag == "br":
return "\n"
if tag == "img":
src = node.attrs.get("src") or ""
src = _safe_url(node.attrs.get("src") or "")
alt = _normalize_text(node.attrs.get("alt") or "")
if not src:
return ""
return f"![{_escape_markdown(alt)}]({src})" if alt else f"![]({src})"
if tag == "a":
text = _normalize_inline("".join(_inline_text(child) for child in node.children))
href = node.attrs.get("href") or ""
href = _safe_url(node.attrs.get("href") or "")
return f"[{text or href}]({href})" if href else text
if tag == "code":
text = _normalize_inline("".join(_inline_text(child) for child in node.children))
@@ -235,5 +252,10 @@ def _block_to_markdown(node):
def convert_html_to_markdown(html, clean_markdown_output):
parser = _HtmlTreeBuilder()
parser.feed(html or "")
markdown = _block_to_markdown(parser.root)
try:
markdown = _block_to_markdown(parser.root)
except RecursionError:
# The depth cap should prevent this, but never let hostile page content
# crash the caller: fall back to a flat, tag-stripped text extraction.
markdown = _normalize_inline(re.sub(r"<[^>]*>", " ", html or ""))
return clean_markdown_output(markdown)