refactor: modularize auth transport and markdown
Testing / remote-protocol-compat (0.9.5) (push) Successful in 1m4s
Testing / test (push) Successful in 1m22s
Testing / remote-protocol-compat (0.9.3) (push) Successful in 1m7s
Package Extension / package-extension (push) Successful in 1m1s
Build & Publish Package / publish (push) Successful in 1m5s

- Split auth into focused package modules for agent keys, file keys,
  signing, and post-quantum transport helpers while keeping the public
  browser_cli.auth import surface intact.
- Move transport encoding internals into a package with separate codec and
  binary-hoisting helpers, preserving browser_cli.transport compatibility.
- Extract remote TCP auth/socket helpers and serve challenge setup out of the
  runtime paths to make connection handling easier to reason about.
- Move the extension markdown extractor into a dedicated content/markdown
  folder with separate root selection, code normalization, renderer, and utils.
- Centralize CLI Rich rendering helpers for tab/window tree and table output,
  and add rendering tests for the shared builders.
- Remove local typing ignores in SDK/decorator/script plumbing and bump the
  package and extension version to 0.15.3.
This commit is contained in:
2026-06-15 01:23:57 +02:00
parent 0b43408a8d
commit 7cb2a8b618
34 changed files with 1502 additions and 1325 deletions
+1 -1
View File
@@ -1,7 +1,7 @@
{
"manifest_version": 3,
"name": "browser-cli",
"version": "0.15.2",
"version": "0.15.3",
"description": "Control your browser from the terminal or Python SDK",
"browser_specific_settings": {
"gecko": {
-404
View File
@@ -1,404 +0,0 @@
import type { ContentArgs } from '../types';
export function extractMarkdown({ selector }: ContentArgs) {
const BLOCKS = new Set([
"article", "aside", "blockquote", "body", "div", "dl", "fieldset", "figcaption",
"figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr",
"li", "main", "nav", "ol", "p", "pre", "section", "table", "tbody", "td", "tfoot",
"th", "thead", "tr", "ul"
]);
const NOISE_SELECTOR = [
"script",
"style",
"noscript",
"template",
"svg",
"canvas",
"iframe",
"dialog",
"button",
"input",
"textarea",
"select",
"option",
"form",
"[hidden]",
"[aria-hidden='true']",
".sr-only",
"[class*='sr-only']",
"[class*='file-tile']",
"form[data-type='unified-composer']",
".composer-btn",
"[data-composer-surface='true']",
"#thread-bottom-container",
"[data-testid*='action-button']",
].join(", ");
function normalizeText(value: string) {
return value.replace(/\s+/g, " ").trim();
}
function normalizeInline(value: string) {
return value
.replace(/[ \t]+\n/g, "\n")
.replace(/\n[ \t]+/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.replace(/[ \t]{2,}/g, " ")
.trim();
}
function collapseBlankLines(value: string) {
return value
.replace(/[ \t]+\n/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.trim();
}
function escapeMarkdown(text: string) {
return text.replace(/([\\`[\]])/g, "\\$1");
}
function escapeTableCell(text: string) {
return text.replace(/\|/g, "\\|").replace(/\n+/g, " ").trim();
}
function absoluteUrl(attr: string | null | undefined, fallback?: string) {
return attr || fallback || "";
}
function isNoiseElement(node: Node | null): boolean {
if (!node || node.nodeType !== Node.ELEMENT_NODE) return false;
const el = node as Element;
const tag = el.tagName.toLowerCase();
if (["script", "style", "noscript", "template", "svg", "canvas", "iframe", "dialog"].includes(tag)) return true;
if (["button", "input", "textarea", "select", "option", "form"].includes(tag)) return true;
if (el.hasAttribute("hidden")) return true;
if ((el.getAttribute("aria-hidden") || "").toLowerCase() === "true") return true;
if (el.matches(".sr-only, [class*='sr-only']")) return true;
if (el.matches("[class*='file-tile'], form[data-type='unified-composer'], .composer-btn, [data-composer-surface='true'], #thread-bottom-container")) return true;
if (el.matches("[data-testid*='action-button']")) return true;
return false;
}
function stripNoise(root: Element): Element {
const clone = root.cloneNode(true) as Element;
clone.querySelectorAll(NOISE_SELECTOR).forEach(node => node.remove());
return clone;
}
function candidateScore(node: Element) {
const text = normalizeText((node as HTMLElement).innerText || "");
if (!text) return -Infinity;
const headings = node.querySelectorAll("h1, h2, h3, h4, h5, h6").length;
const paragraphs = node.querySelectorAll("p").length;
const listItems = node.querySelectorAll("li").length;
const tables = node.querySelectorAll("table").length;
const codeBlocks = node.querySelectorAll("pre, code").length;
const images = node.querySelectorAll("img, figure").length;
const mainLike = node.matches("main, article, [role='main']") ? 1 : 0;
const proseBlocks = node.matches(".markdown, .prose, [data-message-author-role='assistant']") ? 1 : 0;
const buttons = node.querySelectorAll("button, input, textarea, select").length;
const forms = node.querySelectorAll("form").length;
const svgs = node.querySelectorAll("svg, canvas").length;
return text.length
+ (mainLike * 4000)
+ (proseBlocks * 5000)
+ (headings * 250)
+ (paragraphs * 60)
+ (listItems * 35)
+ (tables * 80)
+ (codeBlocks * 60)
+ (images * 25)
- (buttons * 120)
- (forms * 200)
- (svgs * 40);
}
function pickRoot() {
if (selector) {
const matched = document.querySelector(selector);
if (!matched) throw new Error(`No element: ${selector}`);
return matched;
}
const candidates = Array.from(document.querySelectorAll(
"main, article, [role='main'], section, .markdown, .prose, [data-message-author-role]"
))
.filter(node => normalizeText((node as HTMLElement).innerText || "").length > 0);
if (!candidates.length) return document.body;
candidates.sort((a, b) => candidateScore(b) - candidateScore(a));
return candidates[0];
}
function inlineText(node: Node): string {
if (node.nodeType === Node.TEXT_NODE) {
return escapeMarkdown(node.textContent || "");
}
if (node.nodeType !== Node.ELEMENT_NODE) return "";
if (isNoiseElement(node)) return "";
const el = node as HTMLElement;
const tag = el.tagName.toLowerCase();
if (tag === "br") return "\n";
if (tag === "img") {
const img = el as HTMLImageElement;
const src = absoluteUrl(img.getAttribute("src"), img.src);
if (!src) return "";
const alt = normalizeText(img.getAttribute("alt") || "");
return alt ? `![${escapeMarkdown(alt)}](${src})` : `![](${src})`;
}
if (tag === "a") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
const href = absoluteUrl(el.getAttribute("href"), (el as HTMLAnchorElement).href);
if (!href) return text;
return `[${text || href}](${href})`;
}
if (tag === "code") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
return text ? `\`${text.replace(/`/g, "\\`")}\`` : "";
}
if (tag === "strong" || tag === "b") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
return text ? `**${text}**` : "";
}
if (tag === "em" || tag === "i") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
return text ? `*${text}*` : "";
}
const chunks: string[] = [];
for (const child of el.childNodes) {
const rendered = inlineText(child);
if (!rendered) continue;
chunks.push(rendered);
if (child.nodeType === Node.ELEMENT_NODE && BLOCKS.has((child as Element).tagName.toLowerCase())) {
chunks.push("\n");
}
}
return chunks.join("");
}
function textBlock(node: Node): string {
return collapseBlankLines(normalizeInline(Array.from(node.childNodes).map(inlineText).join("")));
}
function preserveNodeText(node: Node): string {
if (node.nodeType === Node.TEXT_NODE) {
return node.textContent || "";
}
if (node.nodeType !== Node.ELEMENT_NODE) return "";
const el = node as HTMLElement;
const tag = el.tagName.toLowerCase();
if (tag === "br") return "\n";
const parts: string[] = [];
for (const child of el.childNodes) {
const rendered = preserveNodeText(child);
if (!rendered) continue;
parts.push(rendered);
}
if (["div", "p", "li"].includes(tag)) {
return `${parts.join("")}\n`;
}
return parts.join("");
}
function repairFlattenedDiagram(text: string): string {
if (text.includes("\n")) return text;
const markerCount = (text.match(/[│▼├└]/g) || []).length;
if (markerCount < 2) return text;
let repaired = text;
repaired = repaired.replace(/\s{2,}([│▼])/g, "\n $1");
repaired = repaired.replace(/([│▼])\s{2,}/g, "$1\n");
repaired = repaired.replace(/([│▼])(?=[^\s\n│▼├└])/g, "$1\n");
repaired = repaired.replace(/(?<=[^\s\n])([├└])/g, "\n$1");
repaired = repaired.replace(/([^\s\n])(\()/g, "$1\n$2");
return repaired
.split("\n")
.map(line => line.replace(/\s+$/, ""))
.filter(line => line.trim())
.join("\n");
}
function convertDashListsToBranches(lines: string[]): string[] {
const converted: string[] = [];
let index = 0;
while (index < lines.length) {
const match = lines[index].match(/^(\s*)-\s+(.*)$/);
if (!match) {
converted.push(lines[index]);
index += 1;
continue;
}
const indent = match[1];
const items = [];
while (index < lines.length) {
const nextMatch = lines[index].match(new RegExp(`^${indent.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}-\\s+(.*)$`));
if (!nextMatch) break;
items.push(nextMatch[1]);
index += 1;
}
items.forEach((item, itemIndex) => {
const branch = itemIndex === items.length - 1 ? "└" : "├";
converted.push(`${indent}${branch} ${item}`);
});
}
return converted;
}
function normalizeCodeBlock(text: string): string {
let lines = text.replace(/\r\n?/g, "\n").split("\n").map(line => line.replace(/\s+$/, ""));
while (lines.length && !lines[0].trim()) lines.shift();
while (lines.length && !lines[lines.length - 1].trim()) lines.pop();
const flattened = repairFlattenedDiagram(lines.join("\n"));
lines = flattened ? flattened.split("\n") : [];
lines = lines.map(line => {
const trimmed = line.trim();
if ((trimmed === "│" || trimmed === "▼") && !/^\s+[│▼]\s*$/.test(line)) {
return ` ${trimmed}`;
}
return line;
});
lines = convertDashListsToBranches(lines);
return lines.join("\n");
}
function tableToMarkdown(table: Element) {
const rows = Array.from(table.querySelectorAll("tr"))
.map(row => Array.from(row.children)
.filter(cell => cell.tagName === "TD" || cell.tagName === "TH")
.map(cell => escapeTableCell(textBlock(cell)))
)
.filter(cells => cells.length > 0);
if (!rows.length) return "";
const widths = rows.reduce((max, row) => Math.max(max, row.length), 0);
const normalizedRows = rows.map(row => {
const next = row.slice();
while (next.length < widths) next.push("");
return next;
});
let headers = normalizedRows[0];
let bodyRows = normalizedRows.slice(1);
const firstRowIsBlank = headers.every(cell => !cell.trim());
if (firstRowIsBlank && normalizedRows.length > 1) {
headers = normalizedRows[1];
bodyRows = normalizedRows.slice(2);
}
const firstRow = table.querySelector("tr");
const thead = table.querySelector("thead");
const firstRowHasTh = firstRow && Array.from(firstRow.children).some(cell => cell.tagName === "TH");
if (!(thead || firstRowHasTh || firstRowIsBlank)) {
headers = new Array(widths).fill("");
bodyRows = normalizedRows;
}
const separator = new Array(widths).fill("---");
const lines = [
`| ${headers.join(" | ")} |`,
`| ${separator.join(" | ")} |`,
];
for (const row of bodyRows) {
lines.push(`| ${row.join(" | ")} |`);
}
return lines.join("\n");
}
function listToMarkdown(list: Element, depth = 0): string {
const ordered = list.tagName.toLowerCase() === "ol";
const items: string[] = [];
const children = Array.from(list.children).filter(child => child.tagName === "LI");
children.forEach((item, index) => {
const marker = ordered ? `${index + 1}. ` : "- ";
const indent = " ".repeat(depth);
const nested: string[] = [];
const content: string[] = [];
for (const child of item.childNodes) {
const childEl = child as Element;
if (child.nodeType === Node.ELEMENT_NODE && (childEl.tagName === "UL" || childEl.tagName === "OL")) {
nested.push(listToMarkdown(childEl, depth + 1));
} else {
content.push(inlineText(child));
}
}
const line = collapseBlankLines(normalizeInline(content.join("")));
if (line) {
const lineParts = line.split("\n");
items.push(`${indent}${marker}${lineParts[0]}`);
const continuationIndent = `${indent}${" ".repeat(marker.length)}`;
lineParts.slice(1).forEach(part => items.push(`${continuationIndent}${part}`));
}
nested.filter(Boolean).forEach(block => items.push(block));
});
return items.join("\n");
}
function blockToMarkdown(node: Node): string {
if (node.nodeType === Node.TEXT_NODE) {
return normalizeText(node.textContent || "");
}
if (node.nodeType !== Node.ELEMENT_NODE) return "";
if (isNoiseElement(node)) return "";
const el = node as HTMLElement;
const tag = el.tagName.toLowerCase();
if (tag === "table") return tableToMarkdown(el);
if (tag === "ul" || tag === "ol") return listToMarkdown(el);
if (el.matches(".cm-editor[data-is-code-block-view='true']")) {
const lines = Array.from(el.querySelectorAll(".cm-line")).map(line => {
const text = preserveNodeText(line);
return text === "\n" ? "" : text.replace(/\n$/, "");
});
const code = normalizeCodeBlock(lines.join("\n"));
return code ? `\`\`\`\n${code}\n\`\`\`` : "";
}
if (tag === "pre") {
const code = normalizeCodeBlock(preserveNodeText(el));
return code ? `\`\`\`\n${code}\n\`\`\`` : "";
}
if (tag === "blockquote") {
const content = collapseBlankLines(Array.from(el.childNodes).map(blockToMarkdown).join("\n\n"));
return content
.split("\n")
.map(line => line ? `> ${line}` : ">")
.join("\n");
}
if (/^h[1-6]$/.test(tag)) {
const level = Number(tag.slice(1));
const text = textBlock(el);
return text ? `${"#".repeat(level)} ${text}` : "";
}
if (tag === "p" || tag === "figcaption") {
return textBlock(el);
}
if (tag === "hr") {
return "---";
}
if (tag === "img") {
return inlineText(el);
}
const childBlocks = Array.from(el.childNodes)
.map(child => blockToMarkdown(child))
.filter(Boolean);
if (childBlocks.length) return collapseBlankLines(childBlocks.join("\n\n"));
return textBlock(node);
}
const root = stripNoise(pickRoot());
const markdown = blockToMarkdown(root);
return collapseBlankLines(markdown);
}
+63
View File
@@ -0,0 +1,63 @@
function repairFlattenedDiagram(text: string): string {
if (text.includes("\n")) return text;
const markerCount = (text.match(/[│▼├└]/g) || []).length;
if (markerCount < 2) return text;
let repaired = text;
repaired = repaired.replace(/\s{2,}([│▼])/g, "\n $1");
repaired = repaired.replace(/([│▼])\s{2,}/g, "$1\n");
repaired = repaired.replace(/([│▼])(?=[^\s\n│▼├└])/g, "$1\n");
repaired = repaired.replace(/(?<=[^\s\n])([├└])/g, "\n$1");
repaired = repaired.replace(/([^\s\n])(\()/g, "$1\n$2");
return repaired
.split("\n")
.map(line => line.replace(/\s+$/, ""))
.filter(line => line.trim())
.join("\n");
}
function convertDashListsToBranches(lines: string[]): string[] {
const converted: string[] = [];
let index = 0;
while (index < lines.length) {
const match = lines[index].match(/^(\s*)-\s+(.*)$/);
if (!match) {
converted.push(lines[index]);
index += 1;
continue;
}
const indent = match[1];
const items = [];
while (index < lines.length) {
const nextMatch = lines[index].match(new RegExp(`^${indent.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}-\\s+(.*)$`));
if (!nextMatch) break;
items.push(nextMatch[1]);
index += 1;
}
items.forEach((item, itemIndex) => {
const branch = itemIndex === items.length - 1 ? "└" : "├";
converted.push(`${indent}${branch} ${item}`);
});
}
return converted;
}
export function normalizeCodeBlock(text: string): string {
let lines = text.replace(/\r\n?/g, "\n").split("\n").map(line => line.replace(/\s+$/, ""));
while (lines.length && !lines[0].trim()) lines.shift();
while (lines.length && !lines[lines.length - 1].trim()) lines.pop();
const flattened = repairFlattenedDiagram(lines.join("\n"));
lines = flattened ? flattened.split("\n") : [];
lines = lines.map(line => {
const trimmed = line.trim();
if ((trimmed === "│" || trimmed === "▼") && !/^\s+[│▼]\s*$/.test(line)) {
return ` ${trimmed}`;
}
return line;
});
lines = convertDashListsToBranches(lines);
return lines.join("\n");
}
+9
View File
@@ -0,0 +1,9 @@
import type { ContentArgs } from '../../types';
import { pickMarkdownRoot } from './root';
import { renderMarkdown } from './renderer';
import { stripNoise } from './utils';
export function extractMarkdown({ selector }: ContentArgs) {
const root = stripNoise(pickMarkdownRoot(selector));
return renderMarkdown(root);
}
+217
View File
@@ -0,0 +1,217 @@
import { normalizeCodeBlock } from './code';
import {
absoluteUrl,
BLOCK_TAGS,
collapseBlankLines,
escapeMarkdown,
escapeTableCell,
isNoiseElement,
normalizeInline,
normalizeText,
} from './utils';
function inlineText(node: Node): string {
if (node.nodeType === Node.TEXT_NODE) {
return escapeMarkdown(node.textContent || "");
}
if (node.nodeType !== Node.ELEMENT_NODE) return "";
if (isNoiseElement(node)) return "";
const el = node as HTMLElement;
const tag = el.tagName.toLowerCase();
if (tag === "br") return "\n";
if (tag === "img") {
const img = el as HTMLImageElement;
const src = absoluteUrl(img.getAttribute("src"), img.src);
if (!src) return "";
const alt = normalizeText(img.getAttribute("alt") || "");
return alt ? `![${escapeMarkdown(alt)}](${src})` : `![](${src})`;
}
if (tag === "a") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
const href = absoluteUrl(el.getAttribute("href"), (el as HTMLAnchorElement).href);
if (!href) return text;
return `[${text || href}](${href})`;
}
if (tag === "code") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
return text ? `\`${text.replace(/`/g, "\\`")}\`` : "";
}
if (tag === "strong" || tag === "b") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
return text ? `**${text}**` : "";
}
if (tag === "em" || tag === "i") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
return text ? `*${text}*` : "";
}
const chunks: string[] = [];
for (const child of el.childNodes) {
const rendered = inlineText(child);
if (!rendered) continue;
chunks.push(rendered);
if (child.nodeType === Node.ELEMENT_NODE && BLOCK_TAGS.has((child as Element).tagName.toLowerCase())) {
chunks.push("\n");
}
}
return chunks.join("");
}
function textBlock(node: Node): string {
return collapseBlankLines(normalizeInline(Array.from(node.childNodes).map(inlineText).join("")));
}
function preserveNodeText(node: Node): string {
if (node.nodeType === Node.TEXT_NODE) {
return node.textContent || "";
}
if (node.nodeType !== Node.ELEMENT_NODE) return "";
const el = node as HTMLElement;
const tag = el.tagName.toLowerCase();
if (tag === "br") return "\n";
const parts: string[] = [];
for (const child of el.childNodes) {
const rendered = preserveNodeText(child);
if (!rendered) continue;
parts.push(rendered);
}
if (["div", "p", "li"].includes(tag)) {
return `${parts.join("")}\n`;
}
return parts.join("");
}
function tableToMarkdown(table: Element) {
const rows = Array.from(table.querySelectorAll("tr"))
.map(row => Array.from(row.children)
.filter(cell => cell.tagName === "TD" || cell.tagName === "TH")
.map(cell => escapeTableCell(textBlock(cell)))
)
.filter(cells => cells.length > 0);
if (!rows.length) return "";
const widths = rows.reduce((max, row) => Math.max(max, row.length), 0);
const normalizedRows = rows.map(row => {
const next = row.slice();
while (next.length < widths) next.push("");
return next;
});
let headers = normalizedRows[0];
let bodyRows = normalizedRows.slice(1);
const firstRowIsBlank = headers.every(cell => !cell.trim());
if (firstRowIsBlank && normalizedRows.length > 1) {
headers = normalizedRows[1];
bodyRows = normalizedRows.slice(2);
}
const firstRow = table.querySelector("tr");
const thead = table.querySelector("thead");
const firstRowHasTh = firstRow && Array.from(firstRow.children).some(cell => cell.tagName === "TH");
if (!(thead || firstRowHasTh || firstRowIsBlank)) {
headers = new Array(widths).fill("");
bodyRows = normalizedRows;
}
const separator = new Array(widths).fill("---");
const lines = [
`| ${headers.join(" | ")} |`,
`| ${separator.join(" | ")} |`,
];
for (const row of bodyRows) {
lines.push(`| ${row.join(" | ")} |`);
}
return lines.join("\n");
}
function listToMarkdown(list: Element, depth = 0): string {
const ordered = list.tagName.toLowerCase() === "ol";
const items: string[] = [];
const children = Array.from(list.children).filter(child => child.tagName === "LI");
children.forEach((item, index) => {
const marker = ordered ? `${index + 1}. ` : "- ";
const indent = " ".repeat(depth);
const nested: string[] = [];
const content: string[] = [];
for (const child of item.childNodes) {
const childEl = child as Element;
if (child.nodeType === Node.ELEMENT_NODE && (childEl.tagName === "UL" || childEl.tagName === "OL")) {
nested.push(listToMarkdown(childEl, depth + 1));
} else {
content.push(inlineText(child));
}
}
const line = collapseBlankLines(normalizeInline(content.join("")));
if (line) {
const lineParts = line.split("\n");
items.push(`${indent}${marker}${lineParts[0]}`);
const continuationIndent = `${indent}${" ".repeat(marker.length)}`;
lineParts.slice(1).forEach(part => items.push(`${continuationIndent}${part}`));
}
nested.filter(Boolean).forEach(block => items.push(block));
});
return items.join("\n");
}
function blockToMarkdown(node: Node): string {
if (node.nodeType === Node.TEXT_NODE) {
return normalizeText(node.textContent || "");
}
if (node.nodeType !== Node.ELEMENT_NODE) return "";
if (isNoiseElement(node)) return "";
const el = node as HTMLElement;
const tag = el.tagName.toLowerCase();
if (tag === "table") return tableToMarkdown(el);
if (tag === "ul" || tag === "ol") return listToMarkdown(el);
if (el.matches(".cm-editor[data-is-code-block-view='true']")) {
const lines = Array.from(el.querySelectorAll(".cm-line")).map(line => {
const text = preserveNodeText(line);
return text === "\n" ? "" : text.replace(/\n$/, "");
});
const code = normalizeCodeBlock(lines.join("\n"));
return code ? `\`\`\`\n${code}\n\`\`\`` : "";
}
if (tag === "pre") {
const code = normalizeCodeBlock(preserveNodeText(el));
return code ? `\`\`\`\n${code}\n\`\`\`` : "";
}
if (tag === "blockquote") {
const content = collapseBlankLines(Array.from(el.childNodes).map(blockToMarkdown).join("\n\n"));
return content
.split("\n")
.map(line => line ? `> ${line}` : ">")
.join("\n");
}
if (/^h[1-6]$/.test(tag)) {
const level = Number(tag.slice(1));
const text = textBlock(el);
return text ? `${"#".repeat(level)} ${text}` : "";
}
if (tag === "p" || tag === "figcaption") {
return textBlock(el);
}
if (tag === "hr") {
return "---";
}
if (tag === "img") {
return inlineText(el);
}
const childBlocks = Array.from(el.childNodes)
.map(child => blockToMarkdown(child))
.filter(Boolean);
if (childBlocks.length) return collapseBlankLines(childBlocks.join("\n\n"));
return textBlock(node);
}
export function renderMarkdown(root: Element): string {
return collapseBlankLines(blockToMarkdown(root));
}
+47
View File
@@ -0,0 +1,47 @@
import { normalizeText } from './utils';
function candidateScore(node: Element) {
const text = normalizeText((node as HTMLElement).innerText || "");
if (!text) return -Infinity;
const headings = node.querySelectorAll("h1, h2, h3, h4, h5, h6").length;
const paragraphs = node.querySelectorAll("p").length;
const listItems = node.querySelectorAll("li").length;
const tables = node.querySelectorAll("table").length;
const codeBlocks = node.querySelectorAll("pre, code").length;
const images = node.querySelectorAll("img, figure").length;
const mainLike = node.matches("main, article, [role='main']") ? 1 : 0;
const proseBlocks = node.matches(".markdown, .prose, [data-message-author-role='assistant']") ? 1 : 0;
const buttons = node.querySelectorAll("button, input, textarea, select").length;
const forms = node.querySelectorAll("form").length;
const svgs = node.querySelectorAll("svg, canvas").length;
return text.length
+ (mainLike * 4000)
+ (proseBlocks * 5000)
+ (headings * 250)
+ (paragraphs * 60)
+ (listItems * 35)
+ (tables * 80)
+ (codeBlocks * 60)
+ (images * 25)
- (buttons * 120)
- (forms * 200)
- (svgs * 40);
}
export function pickMarkdownRoot(selector?: string) {
if (selector) {
const matched = document.querySelector(selector);
if (!matched) throw new Error(`No element: ${selector}`);
return matched;
}
const candidates = Array.from(document.querySelectorAll(
"main, article, [role='main'], section, .markdown, .prose, [data-message-author-role]"
))
.filter(node => normalizeText((node as HTMLElement).innerText || "").length > 0);
if (!candidates.length) return document.body;
candidates.sort((a, b) => candidateScore(b) - candidateScore(a));
return candidates[0];
}
+85
View File
@@ -0,0 +1,85 @@
export const BLOCK_TAGS = new Set([
"article", "aside", "blockquote", "body", "div", "dl", "fieldset", "figcaption",
"figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr",
"li", "main", "nav", "ol", "p", "pre", "section", "table", "tbody", "td", "tfoot",
"th", "thead", "tr", "ul"
]);
export const NOISE_SELECTOR = [
"script",
"style",
"noscript",
"template",
"svg",
"canvas",
"iframe",
"dialog",
"button",
"input",
"textarea",
"select",
"option",
"form",
"[hidden]",
"[aria-hidden='true']",
".sr-only",
"[class*='sr-only']",
"[class*='file-tile']",
"form[data-type='unified-composer']",
".composer-btn",
"[data-composer-surface='true']",
"#thread-bottom-container",
"[data-testid*='action-button']",
].join(", ");
export function normalizeText(value: string) {
return value.replace(/\s+/g, " ").trim();
}
export function normalizeInline(value: string) {
return value
.replace(/[ \t]+\n/g, "\n")
.replace(/\n[ \t]+/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.replace(/[ \t]{2,}/g, " ")
.trim();
}
export function collapseBlankLines(value: string) {
return value
.replace(/[ \t]+\n/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.trim();
}
export function escapeMarkdown(text: string) {
return text.replace(/([\\`[\]])/g, "\\$1");
}
export function escapeTableCell(text: string) {
return text.replace(/\|/g, "\\|").replace(/\n+/g, " ").trim();
}
export function absoluteUrl(attr: string | null | undefined, fallback?: string) {
return attr || fallback || "";
}
export function isNoiseElement(node: Node | null): boolean {
if (!node || node.nodeType !== Node.ELEMENT_NODE) return false;
const el = node as Element;
const tag = el.tagName.toLowerCase();
if (["script", "style", "noscript", "template", "svg", "canvas", "iframe", "dialog"].includes(tag)) return true;
if (["button", "input", "textarea", "select", "option", "form"].includes(tag)) return true;
if (el.hasAttribute("hidden")) return true;
if ((el.getAttribute("aria-hidden") || "").toLowerCase() === "true") return true;
if (el.matches(".sr-only, [class*='sr-only']")) return true;
if (el.matches("[class*='file-tile'], form[data-type='unified-composer'], .composer-btn, [data-composer-surface='true'], #thread-bottom-container")) return true;
if (el.matches("[data-testid*='action-button']")) return true;
return false;
}
export function stripNoise(root: Element): Element {
const clone = root.cloneNode(true) as Element;
clone.querySelectorAll(NOISE_SELECTOR).forEach(node => node.remove());
return clone;
}