feat: add n8n serve node and harden remote access
- Add the n8n community node package with credentials, command mapping, direct serve TCP client, and browser-cli protocol crypto helpers. - Cover Ed25519 signing, canonical JSON, PQ transport encryption, request mapping, and security behavior with unit tests. - Harden serve-http with per-address rate limiting, an 8 MB request body cap, and clear warnings when binding plain HTTP beyond loopback. - Stop one-shot --key overrides from being persisted automatically; document explicit remote trust and keep key-management behind the keys policy tier. - Make HTML-to-Markdown conversion safer by bounding tree depth and dropping unsafe link/image URL schemes. - Bump package and extension release metadata to 0.16.3.
This commit is contained in:
@@ -11,6 +11,13 @@ class _HtmlNode:
|
||||
self.text = text
|
||||
self.children = []
|
||||
|
||||
# Cap how deep the parsed tree may nest. Hostile page content (thousands of
|
||||
# nested elements) would otherwise blow Python's recursion limit in the
|
||||
# depth-first render walkers below. Bounding here protects every walker at once.
|
||||
# 200 levels is far beyond any real document; deeper content is flattened, not
|
||||
# dropped (its text still reaches the output).
|
||||
_MAX_TREE_DEPTH = 200
|
||||
|
||||
class _HtmlTreeBuilder(HTMLParser):
|
||||
_VOID_TAGS = {"br", "hr", "img"}
|
||||
|
||||
@@ -22,7 +29,9 @@ class _HtmlTreeBuilder(HTMLParser):
|
||||
def handle_starttag(self, tag, attrs):
|
||||
node = _HtmlNode(tag=tag.lower(), attrs=dict(attrs))
|
||||
self._stack[-1].children.append(node)
|
||||
if node.tag not in self._VOID_TAGS:
|
||||
# Only descend while under the depth cap; beyond it, children of this node
|
||||
# attach to the current (capped) parent — flattened but preserved.
|
||||
if node.tag not in self._VOID_TAGS and len(self._stack) < _MAX_TREE_DEPTH:
|
||||
self._stack.append(node)
|
||||
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
@@ -57,6 +66,14 @@ def _collapse_blank_lines(value):
|
||||
def _escape_markdown(text):
|
||||
return re.sub(r"([\\`[\]])", r"\\\1", text)
|
||||
|
||||
# Schemes that are dangerous if the produced markdown is later rendered as HTML
|
||||
# by a downstream consumer. The output is plain text here, but neutralising them
|
||||
# keeps the converter from laundering an XSS payload through to such a consumer.
|
||||
_UNSAFE_URL_SCHEME = re.compile(r"^\s*(?:javascript|vbscript|data)\s*:", re.IGNORECASE)
|
||||
|
||||
def _safe_url(url):
|
||||
return "" if _UNSAFE_URL_SCHEME.match(url or "") else url
|
||||
|
||||
def _escape_table_cell(text):
|
||||
return text.replace("|", r"\|").replace("\n", " ").strip()
|
||||
|
||||
@@ -86,14 +103,14 @@ def _inline_text(node):
|
||||
if tag == "br":
|
||||
return "\n"
|
||||
if tag == "img":
|
||||
src = node.attrs.get("src") or ""
|
||||
src = _safe_url(node.attrs.get("src") or "")
|
||||
alt = _normalize_text(node.attrs.get("alt") or "")
|
||||
if not src:
|
||||
return ""
|
||||
return f"" if alt else f""
|
||||
if tag == "a":
|
||||
text = _normalize_inline("".join(_inline_text(child) for child in node.children))
|
||||
href = node.attrs.get("href") or ""
|
||||
href = _safe_url(node.attrs.get("href") or "")
|
||||
return f"[{text or href}]({href})" if href else text
|
||||
if tag == "code":
|
||||
text = _normalize_inline("".join(_inline_text(child) for child in node.children))
|
||||
@@ -235,5 +252,10 @@ def _block_to_markdown(node):
|
||||
def convert_html_to_markdown(html, clean_markdown_output):
|
||||
parser = _HtmlTreeBuilder()
|
||||
parser.feed(html or "")
|
||||
markdown = _block_to_markdown(parser.root)
|
||||
try:
|
||||
markdown = _block_to_markdown(parser.root)
|
||||
except RecursionError:
|
||||
# The depth cap should prevent this, but never let hostile page content
|
||||
# crash the caller: fall back to a flat, tag-stripped text extraction.
|
||||
markdown = _normalize_inline(re.sub(r"<[^>]*>", " ", html or ""))
|
||||
return clean_markdown_output(markdown)
|
||||
|
||||
Reference in New Issue
Block a user