adding better markdown extracting and filter out to not have broken staff, allow that session can list over multiple browsers
This commit is contained in:
@@ -122,7 +122,7 @@ browser-cli/
|
||||
|
||||
All commands are run with `uv run browser-cli [--browser ALIAS] <command>`.
|
||||
|
||||
If exactly one browser instance is connected, commands auto-target it. Use `--browser ALIAS` when multiple browser instances are connected. `tabs list`, `tabs count`, `groups list`, `groups count`, and `windows list` are the only commands that aggregate across all active browsers when `--browser` is omitted; in that mode they show the source browser alias or UUID. You can inspect the active instances with `browser-cli clients` and assign a persistent profile alias from inside the target browser with `browser-cli clients rename --browser <current-alias> <new-alias>`. Closed browsers are removed from the client registry automatically.
|
||||
If exactly one browser instance is connected, commands auto-target it. Use `--browser ALIAS` when multiple browser instances are connected. `tabs list`, `tabs count`, `groups list`, `groups count`, `windows list`, and `session list` aggregate across all active browsers when `--browser` is omitted; in that mode they show the source browser alias or UUID. You can inspect the active instances with `browser-cli clients` and assign a persistent profile alias from inside the target browser with `browser-cli clients rename --browser <current-alias> <new-alias>`. Closed browsers are removed from the client registry automatically.
|
||||
|
||||
Important: profile aliases are browser-instance aliases, not window aliases. Window aliases created with `windows rename` are only for targeting windows in commands like `nav open --window work`. If a browser instance has no explicit profile alias set, the native host gives it a generated UUID alias so multiple unaliased browsers stay distinct.
|
||||
|
||||
|
||||
@@ -346,6 +346,17 @@ class BrowserCLI:
|
||||
return self._cmd("session.diff", {"nameA": name_a, "nameB": name_b})
|
||||
|
||||
def session_list(self) -> list[dict]:
|
||||
"""Return saved sessions.
|
||||
|
||||
In implicit multi-browser mode each session dict includes a ``browser`` key.
|
||||
"""
|
||||
multi_results = self._collect_multi_browser("session.list", {})
|
||||
if multi_results:
|
||||
return [
|
||||
{**session, "browser": target.display_name}
|
||||
for target, sessions in multi_results
|
||||
for session in (sessions or [])
|
||||
]
|
||||
return self._cmd("session.list", {})
|
||||
|
||||
def session_remove(self, name: str) -> None:
|
||||
|
||||
@@ -1,10 +1,426 @@
|
||||
import click
|
||||
import json
|
||||
import re
|
||||
from html.parser import HTMLParser
|
||||
|
||||
import click
|
||||
from browser_cli.client import send_command, BrowserNotConnected
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
console = Console()
|
||||
_FENCE_RE = re.compile(r"```(?:[^\n`]*)\n.*?\n```", re.DOTALL)
|
||||
_ESCAPED_MARKDOWN_RE = re.compile(r"\\([_-])")
|
||||
_TABLE_SEPARATOR_RE = re.compile(r"^\|(?:\s*:?-{3,}:?\s*\|)+\s*$")
|
||||
|
||||
|
||||
class _HtmlNode:
|
||||
def __init__(self, tag=None, attrs=None, text=None):
|
||||
self.tag = tag
|
||||
self.attrs = attrs or {}
|
||||
self.text = text
|
||||
self.children = []
|
||||
|
||||
|
||||
class _HtmlTreeBuilder(HTMLParser):
|
||||
_VOID_TAGS = {"br", "hr", "img"}
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(convert_charrefs=True)
|
||||
self.root = _HtmlNode(tag="document")
|
||||
self._stack = [self.root]
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
node = _HtmlNode(tag=tag.lower(), attrs=dict(attrs))
|
||||
self._stack[-1].children.append(node)
|
||||
if node.tag not in self._VOID_TAGS:
|
||||
self._stack.append(node)
|
||||
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
node = _HtmlNode(tag=tag.lower(), attrs=dict(attrs))
|
||||
self._stack[-1].children.append(node)
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
lowered = tag.lower()
|
||||
for index in range(len(self._stack) - 1, 0, -1):
|
||||
if self._stack[index].tag == lowered:
|
||||
del self._stack[index:]
|
||||
break
|
||||
|
||||
def handle_data(self, data):
|
||||
if data:
|
||||
self._stack[-1].children.append(_HtmlNode(text=data))
|
||||
|
||||
|
||||
def _normalize_text(value):
|
||||
return re.sub(r"\s+", " ", value or "").strip()
|
||||
|
||||
|
||||
def _normalize_inline(value):
|
||||
value = value.replace("\xa0", " ")
|
||||
value = re.sub(r"[ \t\r\f\v]+", " ", value)
|
||||
value = re.sub(r" *\n *", "\n", value)
|
||||
return value.strip()
|
||||
|
||||
|
||||
def _collapse_blank_lines(value):
|
||||
value = re.sub(r"[ \t]+\n", "\n", value)
|
||||
value = re.sub(r"\n{3,}", "\n\n", value)
|
||||
return value.strip()
|
||||
|
||||
|
||||
def _escape_markdown(text):
|
||||
return re.sub(r"([\\`[\]])", r"\\\1", text)
|
||||
|
||||
|
||||
def _escape_table_cell(text):
|
||||
return text.replace("|", r"\|").replace("\n", " ").strip()
|
||||
|
||||
|
||||
def _iter_descendants(node):
|
||||
for child in getattr(node, "children", []):
|
||||
yield child
|
||||
yield from _iter_descendants(child)
|
||||
|
||||
|
||||
def _has_class(node, class_name):
|
||||
classes = (node.attrs.get("class") or "").split()
|
||||
return class_name in classes
|
||||
|
||||
|
||||
def _is_code_block_node(node):
|
||||
if not node or not node.tag:
|
||||
return False
|
||||
if node.attrs.get("data-is-code-block-view") == "true":
|
||||
return True
|
||||
return node.tag == "pre"
|
||||
|
||||
|
||||
def _inline_text(node):
|
||||
if node.text is not None:
|
||||
return _escape_markdown(node.text)
|
||||
if not node.tag:
|
||||
return ""
|
||||
|
||||
tag = node.tag
|
||||
if tag == "br":
|
||||
return "\n"
|
||||
if tag == "img":
|
||||
src = node.attrs.get("src") or ""
|
||||
alt = _normalize_text(node.attrs.get("alt") or "")
|
||||
if not src:
|
||||
return ""
|
||||
return f"" if alt else f""
|
||||
if tag == "a":
|
||||
text = _normalize_inline("".join(_inline_text(child) for child in node.children))
|
||||
href = node.attrs.get("href") or ""
|
||||
return f"[{text or href}]({href})" if href else text
|
||||
if tag == "code":
|
||||
text = _normalize_inline("".join(_inline_text(child) for child in node.children))
|
||||
return f"`{text.replace('`', r'\\`')}`" if text else ""
|
||||
if tag in {"strong", "b"}:
|
||||
text = _normalize_inline("".join(_inline_text(child) for child in node.children))
|
||||
return f"**{text}**" if text else ""
|
||||
if tag in {"em", "i"}:
|
||||
text = _normalize_inline("".join(_inline_text(child) for child in node.children))
|
||||
return f"*{text}*" if text else ""
|
||||
|
||||
chunks = []
|
||||
for child in node.children:
|
||||
rendered = _inline_text(child)
|
||||
if rendered:
|
||||
chunks.append(rendered)
|
||||
if child.tag in {"p", "div", "table", "ul", "ol", "pre"}:
|
||||
chunks.append("\n")
|
||||
return "".join(chunks)
|
||||
|
||||
|
||||
def _text_block(node):
|
||||
return _collapse_blank_lines(_normalize_inline("".join(_inline_text(child) for child in node.children)))
|
||||
|
||||
|
||||
def _inner_text_preserve(node):
|
||||
if node.text is not None:
|
||||
return node.text
|
||||
if not node.tag:
|
||||
return ""
|
||||
if node.tag == "br":
|
||||
return ""
|
||||
return "".join(_inner_text_preserve(child) for child in node.children)
|
||||
|
||||
|
||||
def _table_to_markdown(node):
|
||||
rows = []
|
||||
for descendant in _iter_descendants(node):
|
||||
if descendant.tag != "tr":
|
||||
continue
|
||||
row = []
|
||||
for cell in descendant.children:
|
||||
if cell.tag in {"td", "th"}:
|
||||
row.append(_escape_table_cell(_text_block(cell)))
|
||||
if row:
|
||||
rows.append(row)
|
||||
if not rows:
|
||||
return ""
|
||||
|
||||
widths = max(len(row) for row in rows)
|
||||
normalized_rows = [row + [""] * (widths - len(row)) for row in rows]
|
||||
|
||||
headers = normalized_rows[0]
|
||||
body_rows = normalized_rows[1:]
|
||||
first_row_blank = all(not cell.strip() for cell in headers)
|
||||
if first_row_blank and len(normalized_rows) > 1:
|
||||
headers = normalized_rows[1]
|
||||
body_rows = normalized_rows[2:]
|
||||
|
||||
has_thead = any(child.tag == "thead" for child in node.children)
|
||||
first_row = next((child for child in _iter_descendants(node) if child.tag == "tr"), None)
|
||||
first_row_has_th = bool(first_row and any(child.tag == "th" for child in first_row.children))
|
||||
if not (has_thead or first_row_has_th or first_row_blank):
|
||||
headers = [""] * widths
|
||||
body_rows = normalized_rows
|
||||
|
||||
separator = ["---"] * widths
|
||||
lines = [
|
||||
f"| {' | '.join(headers)} |",
|
||||
f"| {' | '.join(separator)} |",
|
||||
]
|
||||
lines.extend(f"| {' | '.join(row)} |" for row in body_rows)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _list_to_markdown(node, depth=0):
|
||||
ordered = node.tag == "ol"
|
||||
items = []
|
||||
index = 1
|
||||
for child in node.children:
|
||||
if child.tag != "li":
|
||||
continue
|
||||
marker = f"{index}. " if ordered else "- "
|
||||
index += 1
|
||||
content = []
|
||||
nested = []
|
||||
for item_child in child.children:
|
||||
if item_child.tag in {"ul", "ol"}:
|
||||
nested.append(_list_to_markdown(item_child, depth + 1))
|
||||
else:
|
||||
content.append(_inline_text(item_child))
|
||||
line = _collapse_blank_lines(_normalize_inline("".join(content)))
|
||||
indent = " " * depth
|
||||
if line:
|
||||
line_parts = line.splitlines()
|
||||
items.append(f"{indent}{marker}{line_parts[0]}")
|
||||
continuation_indent = f"{indent}{' ' * len(marker)}"
|
||||
items.extend(f"{continuation_indent}{part}" for part in line_parts[1:])
|
||||
items.extend(block for block in nested if block)
|
||||
return "\n".join(items)
|
||||
|
||||
|
||||
def _code_block_to_markdown(node):
|
||||
if node.tag == "pre":
|
||||
text = _inner_text_preserve(node).rstrip("\n")
|
||||
return f"```\n{text}\n```" if text else ""
|
||||
|
||||
lines = []
|
||||
for descendant in _iter_descendants(node):
|
||||
if descendant.tag and _has_class(descendant, "cm-line"):
|
||||
lines.append(_inner_text_preserve(descendant))
|
||||
code = "\n".join(lines).rstrip("\n")
|
||||
return f"```\n{code}\n```" if code else ""
|
||||
|
||||
|
||||
def _block_to_markdown(node):
|
||||
if node.text is not None:
|
||||
return _normalize_text(node.text)
|
||||
if not node.tag:
|
||||
return ""
|
||||
if _is_code_block_node(node):
|
||||
return _code_block_to_markdown(node)
|
||||
if node.tag == "table":
|
||||
return _table_to_markdown(node)
|
||||
if node.tag in {"ul", "ol"}:
|
||||
return _list_to_markdown(node)
|
||||
if re.fullmatch(r"h[1-6]", node.tag):
|
||||
text = _text_block(node)
|
||||
return f"{'#' * int(node.tag[1])} {text}" if text else ""
|
||||
if node.tag in {"p", "figcaption"}:
|
||||
return _text_block(node)
|
||||
if node.tag == "blockquote":
|
||||
content = _collapse_blank_lines("\n\n".join(filter(None, (_block_to_markdown(child) for child in node.children))))
|
||||
return "\n".join(f"> {line}" if line else ">" for line in content.splitlines()) if content else ""
|
||||
if node.tag == "hr":
|
||||
return "---"
|
||||
if node.tag == "img":
|
||||
return _inline_text(node)
|
||||
|
||||
child_blocks = [block for block in (_block_to_markdown(child) for child in node.children) if block]
|
||||
if child_blocks:
|
||||
return _collapse_blank_lines("\n\n".join(child_blocks))
|
||||
return _text_block(node)
|
||||
|
||||
|
||||
def _parse_table_row(line):
|
||||
stripped = line.strip()
|
||||
if not stripped.startswith("|") or not stripped.endswith("|"):
|
||||
return None
|
||||
return [cell.strip() for cell in stripped.strip("|").split("|")]
|
||||
|
||||
|
||||
def _repair_table_headers(lines):
|
||||
repaired = []
|
||||
index = 0
|
||||
while index < len(lines):
|
||||
if (
|
||||
index + 2 < len(lines)
|
||||
and _parse_table_row(lines[index]) is not None
|
||||
and _TABLE_SEPARATOR_RE.match(lines[index + 1].strip())
|
||||
and _parse_table_row(lines[index + 2]) is not None
|
||||
):
|
||||
first = _parse_table_row(lines[index])
|
||||
third = _parse_table_row(lines[index + 2])
|
||||
if first and all(not cell for cell in first) and any(cell for cell in third):
|
||||
repaired.append(lines[index + 2].strip())
|
||||
repaired.append(lines[index + 1].strip())
|
||||
index += 3
|
||||
continue
|
||||
repaired.append(lines[index].strip())
|
||||
index += 1
|
||||
return repaired
|
||||
|
||||
|
||||
def _repair_list_continuations(lines):
|
||||
repaired = []
|
||||
previous_was_list_item = False
|
||||
previous_continuation_indent = ""
|
||||
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
list_match = re.match(r"^(\s*)([-*+]|\d+\.)\s+.+$", stripped)
|
||||
is_markdown_block_start = (
|
||||
not stripped
|
||||
or stripped.startswith(("```", "#", ">", "|"))
|
||||
or _TABLE_SEPARATOR_RE.match(stripped)
|
||||
or re.match(r"^(\s*)([-*+]|\d+\.)\s+", stripped)
|
||||
)
|
||||
|
||||
if previous_was_list_item and stripped and not is_markdown_block_start:
|
||||
repaired.append(f"{previous_continuation_indent}{stripped}")
|
||||
previous_was_list_item = False
|
||||
continue
|
||||
|
||||
repaired.append(stripped)
|
||||
if list_match:
|
||||
marker = list_match.group(2)
|
||||
base_indent = list_match.group(1)
|
||||
previous_continuation_indent = f"{base_indent}{' ' * (len(marker) + 1)}"
|
||||
previous_was_list_item = True
|
||||
else:
|
||||
previous_was_list_item = False
|
||||
|
||||
return repaired
|
||||
|
||||
|
||||
def _repair_flattened_diagram(text):
|
||||
if "\n" in text:
|
||||
return text
|
||||
if sum(text.count(char) for char in "│▼├└") < 2:
|
||||
return text
|
||||
|
||||
text = re.sub(r"\s{2,}([│▼])", r"\n \1", text)
|
||||
text = re.sub(r"([│▼])\s{2,}", r"\1\n", text)
|
||||
text = re.sub(r"([│▼])(?=[^\s\n│▼├└])", r"\1\n", text)
|
||||
text = re.sub(r"(?<=[^\s\n])([├└])", r"\n\1", text)
|
||||
text = re.sub(r"([^\s\n])(\()", r"\1\n\2", text)
|
||||
return "\n".join(line.rstrip() for line in text.splitlines() if line.strip())
|
||||
|
||||
|
||||
def _convert_dash_lists_to_branches(lines):
|
||||
converted = []
|
||||
index = 0
|
||||
while index < len(lines):
|
||||
match = re.match(r"^(\s*)-\s+(.*)$", lines[index])
|
||||
if not match:
|
||||
converted.append(lines[index])
|
||||
index += 1
|
||||
continue
|
||||
|
||||
indent = match.group(1)
|
||||
items = []
|
||||
while index < len(lines):
|
||||
next_match = re.match(rf"^{re.escape(indent)}-\s+(.*)$", lines[index])
|
||||
if not next_match:
|
||||
break
|
||||
items.append(next_match.group(1))
|
||||
index += 1
|
||||
|
||||
for item_index, item in enumerate(items):
|
||||
branch = "└" if item_index == len(items) - 1 else "├"
|
||||
converted.append(f"{indent}{branch} {item}")
|
||||
return converted
|
||||
|
||||
|
||||
def _clean_code_block(code):
|
||||
lines = [line.rstrip() for line in code.splitlines()]
|
||||
while lines and not lines[0].strip():
|
||||
lines.pop(0)
|
||||
while lines and not lines[-1].strip():
|
||||
lines.pop()
|
||||
|
||||
flattened = _repair_flattened_diagram("\n".join(lines))
|
||||
lines = flattened.splitlines() if flattened else []
|
||||
lines = [
|
||||
f" {line.strip()}"
|
||||
if line.strip() in {"│", "▼"} and not re.match(r"^\s+[│▼]\s*$", line)
|
||||
else line
|
||||
for line in lines
|
||||
]
|
||||
lines = _convert_dash_lists_to_branches(lines)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _clean_markdown_output(markdown):
|
||||
if not markdown:
|
||||
return ""
|
||||
|
||||
pieces = []
|
||||
last_index = 0
|
||||
for match in _FENCE_RE.finditer(markdown):
|
||||
prose = markdown[last_index:match.start()]
|
||||
if prose:
|
||||
cleaned = _ESCAPED_MARKDOWN_RE.sub(r"\1", prose)
|
||||
lines = [line.strip() for line in cleaned.splitlines()]
|
||||
lines = _repair_table_headers(lines)
|
||||
lines = _repair_list_continuations(lines)
|
||||
cleaned = "\n".join(lines)
|
||||
cleaned = _collapse_blank_lines(cleaned)
|
||||
if cleaned:
|
||||
pieces.append(cleaned)
|
||||
|
||||
fence = match.group(0)
|
||||
header, _, tail = fence.partition("\n")
|
||||
body, _, _ = tail.rpartition("\n")
|
||||
cleaned_body = _clean_code_block(body)
|
||||
pieces.append(f"{header}\n{cleaned_body}\n```" if cleaned_body else f"{header}\n```")
|
||||
last_index = match.end()
|
||||
|
||||
trailing = markdown[last_index:]
|
||||
if trailing:
|
||||
cleaned = _ESCAPED_MARKDOWN_RE.sub(r"\1", trailing)
|
||||
lines = [line.strip() for line in cleaned.splitlines()]
|
||||
lines = _repair_table_headers(lines)
|
||||
lines = _repair_list_continuations(lines)
|
||||
cleaned = "\n".join(lines)
|
||||
cleaned = _collapse_blank_lines(cleaned)
|
||||
if cleaned:
|
||||
pieces.append(cleaned)
|
||||
|
||||
return "\n\n".join(piece for piece in pieces if piece)
|
||||
|
||||
|
||||
def _convert_html_to_markdown(html):
|
||||
parser = _HtmlTreeBuilder()
|
||||
parser.feed(html or "")
|
||||
markdown = _block_to_markdown(parser.root)
|
||||
return _clean_markdown_output(markdown)
|
||||
|
||||
|
||||
def _handle(command, args=None):
|
||||
@@ -80,4 +496,8 @@ def extract_html():
|
||||
def extract_markdown(selector):
|
||||
"""Extract the page's main content as Markdown."""
|
||||
markdown = _handle("extract.markdown", {"selector": selector})
|
||||
if (markdown or "").lstrip().startswith("<"):
|
||||
markdown = _convert_html_to_markdown(markdown)
|
||||
else:
|
||||
markdown = _clean_markdown_output(markdown or "")
|
||||
click.echo(markdown or "", nl=not (markdown or "").endswith("\n"))
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
import click
|
||||
import json
|
||||
from browser_cli.client import send_command, BrowserNotConnected
|
||||
from browser_cli.client import active_browser_targets, send_command, BrowserNotConnected
|
||||
from rich.console import Console
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def _handle(command, args=None):
|
||||
def _handle(command, args=None, profile=None):
|
||||
try:
|
||||
return send_command(command, args or {})
|
||||
return send_command(command, args or {}, profile=profile)
|
||||
except BrowserNotConnected as e:
|
||||
console.print(f"[red]Error:[/red] {e}")
|
||||
raise SystemExit(1)
|
||||
@@ -17,6 +16,23 @@ def _handle(command, args=None):
|
||||
raise SystemExit(1)
|
||||
|
||||
|
||||
def _handle_multi(command, args=None, profile=None):
|
||||
try:
|
||||
return send_command(command, args or {}, profile=profile)
|
||||
except (BrowserNotConnected, RuntimeError):
|
||||
return None
|
||||
|
||||
|
||||
def _multi_browser_targets():
|
||||
root = click.get_current_context().find_root()
|
||||
if root.obj.get("browser_explicit"):
|
||||
return []
|
||||
targets = active_browser_targets()
|
||||
if len(targets) <= 1:
|
||||
return []
|
||||
return targets
|
||||
|
||||
|
||||
@click.group("session")
|
||||
def session_group():
|
||||
"""Save and restore browser sessions."""
|
||||
@@ -71,18 +87,35 @@ def session_diff(name_a, name_b):
|
||||
def session_list():
|
||||
"""List all saved sessions."""
|
||||
from rich.table import Table
|
||||
targets = _multi_browser_targets()
|
||||
show_browser = bool(targets)
|
||||
if targets:
|
||||
sessions = []
|
||||
for target in targets:
|
||||
result = _handle_multi("session.list", profile=target.profile)
|
||||
if result is None:
|
||||
continue
|
||||
sessions.extend({**session, "browser": target.display_name} for session in result)
|
||||
if not sessions:
|
||||
console.print("[red]Error:[/red] Cannot resolve a browser socket automatically.")
|
||||
raise SystemExit(1)
|
||||
else:
|
||||
sessions = _handle("session.list")
|
||||
if not sessions:
|
||||
console.print("[yellow]No saved sessions[/yellow]")
|
||||
return
|
||||
table = Table(show_header=True, header_style="bold cyan")
|
||||
if show_browser:
|
||||
table.add_column("Browser")
|
||||
table.add_column("Name")
|
||||
table.add_column("Tabs", width=6)
|
||||
table.add_column("Saved at")
|
||||
for s in sessions:
|
||||
from datetime import datetime
|
||||
saved = datetime.fromtimestamp(s["savedAt"] / 1000).strftime("%Y-%m-%d %H:%M") if s.get("savedAt") else ""
|
||||
table.add_row(s["name"], str(s["tabs"]), saved)
|
||||
row = [s.get("browser", "")] if show_browser else []
|
||||
row.extend([s["name"], str(s["tabs"]), saved])
|
||||
table.add_row(*row)
|
||||
console.print(table)
|
||||
|
||||
|
||||
|
||||
+200
-12
@@ -659,6 +659,32 @@ function contentDispatch(funcName, args) {
|
||||
"li", "main", "nav", "ol", "p", "pre", "section", "table", "tbody", "td", "tfoot",
|
||||
"th", "thead", "tr", "ul"
|
||||
]);
|
||||
const NOISE_SELECTOR = [
|
||||
"script",
|
||||
"style",
|
||||
"noscript",
|
||||
"template",
|
||||
"svg",
|
||||
"canvas",
|
||||
"iframe",
|
||||
"dialog",
|
||||
"button",
|
||||
"input",
|
||||
"textarea",
|
||||
"select",
|
||||
"option",
|
||||
"form",
|
||||
"[hidden]",
|
||||
"[aria-hidden='true']",
|
||||
".sr-only",
|
||||
"[class*='sr-only']",
|
||||
"[class*='file-tile']",
|
||||
"form[data-type='unified-composer']",
|
||||
".composer-btn",
|
||||
"[data-composer-surface='true']",
|
||||
"#thread-bottom-container",
|
||||
"[data-testid*='action-button']",
|
||||
].join(", ");
|
||||
|
||||
function normalizeText(value) {
|
||||
return value.replace(/\s+/g, " ").trim();
|
||||
@@ -681,7 +707,7 @@ function contentDispatch(funcName, args) {
|
||||
}
|
||||
|
||||
function escapeMarkdown(text) {
|
||||
return text.replace(/([\\`*_{}\[\]()#+\-!|>])/g, "\\$1");
|
||||
return text.replace(/([\\`[\]])/g, "\\$1");
|
||||
}
|
||||
|
||||
function escapeTableCell(text) {
|
||||
@@ -692,12 +718,55 @@ function contentDispatch(funcName, args) {
|
||||
return attr || fallback || "";
|
||||
}
|
||||
|
||||
function isNoiseElement(node) {
|
||||
if (!node || node.nodeType !== Node.ELEMENT_NODE) return false;
|
||||
const tag = node.tagName.toLowerCase();
|
||||
if (["script", "style", "noscript", "template", "svg", "canvas", "iframe", "dialog"].includes(tag)) return true;
|
||||
if (["button", "input", "textarea", "select", "option", "form"].includes(tag)) return true;
|
||||
if (node.hasAttribute("hidden")) return true;
|
||||
if ((node.getAttribute("aria-hidden") || "").toLowerCase() === "true") return true;
|
||||
if (node.matches(".sr-only, [class*='sr-only']")) return true;
|
||||
if (node.matches("[class*='file-tile'], form[data-type='unified-composer'], .composer-btn, [data-composer-surface='true'], #thread-bottom-container")) return true;
|
||||
if (node.matches("[data-testid*='action-button']")) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
function stripNoise(root) {
|
||||
const clone = root.cloneNode(true);
|
||||
clone.querySelectorAll("script, style, noscript, template").forEach(node => node.remove());
|
||||
clone.querySelectorAll(NOISE_SELECTOR).forEach(node => node.remove());
|
||||
return clone;
|
||||
}
|
||||
|
||||
function candidateScore(node) {
|
||||
const text = normalizeText(node.innerText || "");
|
||||
if (!text) return -Infinity;
|
||||
|
||||
const headings = node.querySelectorAll("h1, h2, h3, h4, h5, h6").length;
|
||||
const paragraphs = node.querySelectorAll("p").length;
|
||||
const listItems = node.querySelectorAll("li").length;
|
||||
const tables = node.querySelectorAll("table").length;
|
||||
const codeBlocks = node.querySelectorAll("pre, code").length;
|
||||
const images = node.querySelectorAll("img, figure").length;
|
||||
const mainLike = node.matches("main, article, [role='main']") ? 1 : 0;
|
||||
const proseBlocks = node.matches(".markdown, .prose, [data-message-author-role='assistant']") ? 1 : 0;
|
||||
const buttons = node.querySelectorAll("button, input, textarea, select").length;
|
||||
const forms = node.querySelectorAll("form").length;
|
||||
const svgs = node.querySelectorAll("svg, canvas").length;
|
||||
|
||||
return text.length
|
||||
+ (mainLike * 4000)
|
||||
+ (proseBlocks * 5000)
|
||||
+ (headings * 250)
|
||||
+ (paragraphs * 60)
|
||||
+ (listItems * 35)
|
||||
+ (tables * 80)
|
||||
+ (codeBlocks * 60)
|
||||
+ (images * 25)
|
||||
- (buttons * 120)
|
||||
- (forms * 200)
|
||||
- (svgs * 40);
|
||||
}
|
||||
|
||||
function pickRoot() {
|
||||
if (selector) {
|
||||
const matched = document.querySelector(selector);
|
||||
@@ -705,10 +774,12 @@ function contentDispatch(funcName, args) {
|
||||
return matched;
|
||||
}
|
||||
|
||||
const candidates = Array.from(document.querySelectorAll("main, article, [role='main']"))
|
||||
const candidates = Array.from(document.querySelectorAll(
|
||||
"main, article, [role='main'], section, .markdown, .prose, [data-message-author-role]"
|
||||
))
|
||||
.filter(node => normalizeText(node.innerText || "").length > 0);
|
||||
if (!candidates.length) return document.body;
|
||||
candidates.sort((a, b) => (b.innerText || "").length - (a.innerText || "").length);
|
||||
candidates.sort((a, b) => candidateScore(b) - candidateScore(a));
|
||||
return candidates[0];
|
||||
}
|
||||
|
||||
@@ -717,9 +788,9 @@ function contentDispatch(funcName, args) {
|
||||
return escapeMarkdown(node.textContent || "");
|
||||
}
|
||||
if (node.nodeType !== Node.ELEMENT_NODE) return "";
|
||||
if (isNoiseElement(node)) return "";
|
||||
|
||||
const tag = node.tagName.toLowerCase();
|
||||
if (tag === "script" || tag === "style" || tag === "noscript" || tag === "template") return "";
|
||||
if (tag === "br") return "\n";
|
||||
if (tag === "img") {
|
||||
const src = absoluteUrl(node.getAttribute("src"), node.src);
|
||||
@@ -762,6 +833,92 @@ function contentDispatch(funcName, args) {
|
||||
return collapseBlankLines(normalizeInline(Array.from(node.childNodes).map(inlineText).join("")));
|
||||
}
|
||||
|
||||
function preserveNodeText(node) {
|
||||
if (node.nodeType === Node.TEXT_NODE) {
|
||||
return node.textContent || "";
|
||||
}
|
||||
if (node.nodeType !== Node.ELEMENT_NODE) return "";
|
||||
|
||||
const tag = node.tagName.toLowerCase();
|
||||
if (tag === "br") return "\n";
|
||||
|
||||
const parts = [];
|
||||
for (const child of node.childNodes) {
|
||||
const rendered = preserveNodeText(child);
|
||||
if (!rendered) continue;
|
||||
parts.push(rendered);
|
||||
}
|
||||
|
||||
if (["div", "p", "li"].includes(tag)) {
|
||||
return `${parts.join("")}\n`;
|
||||
}
|
||||
return parts.join("");
|
||||
}
|
||||
|
||||
function repairFlattenedDiagram(text) {
|
||||
if (text.includes("\n")) return text;
|
||||
const markerCount = (text.match(/[│▼├└]/g) || []).length;
|
||||
if (markerCount < 2) return text;
|
||||
|
||||
let repaired = text;
|
||||
repaired = repaired.replace(/\s{2,}([│▼])/g, "\n $1");
|
||||
repaired = repaired.replace(/([│▼])\s{2,}/g, "$1\n");
|
||||
repaired = repaired.replace(/([│▼])(?=[^\s\n│▼├└])/g, "$1\n");
|
||||
repaired = repaired.replace(/(?<=[^\s\n])([├└])/g, "\n$1");
|
||||
repaired = repaired.replace(/([^\s\n])(\()/g, "$1\n$2");
|
||||
return repaired
|
||||
.split("\n")
|
||||
.map(line => line.replace(/\s+$/, ""))
|
||||
.filter(line => line.trim())
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
function convertDashListsToBranches(lines) {
|
||||
const converted = [];
|
||||
let index = 0;
|
||||
while (index < lines.length) {
|
||||
const match = lines[index].match(/^(\s*)-\s+(.*)$/);
|
||||
if (!match) {
|
||||
converted.push(lines[index]);
|
||||
index += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
const indent = match[1];
|
||||
const items = [];
|
||||
while (index < lines.length) {
|
||||
const nextMatch = lines[index].match(new RegExp(`^${indent.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}-\\s+(.*)$`));
|
||||
if (!nextMatch) break;
|
||||
items.push(nextMatch[1]);
|
||||
index += 1;
|
||||
}
|
||||
|
||||
items.forEach((item, itemIndex) => {
|
||||
const branch = itemIndex === items.length - 1 ? "└" : "├";
|
||||
converted.push(`${indent}${branch} ${item}`);
|
||||
});
|
||||
}
|
||||
return converted;
|
||||
}
|
||||
|
||||
function normalizeCodeBlock(text) {
|
||||
let lines = text.replace(/\r\n?/g, "\n").split("\n").map(line => line.replace(/\s+$/, ""));
|
||||
while (lines.length && !lines[0].trim()) lines.shift();
|
||||
while (lines.length && !lines[lines.length - 1].trim()) lines.pop();
|
||||
|
||||
const flattened = repairFlattenedDiagram(lines.join("\n"));
|
||||
lines = flattened ? flattened.split("\n") : [];
|
||||
lines = lines.map(line => {
|
||||
const trimmed = line.trim();
|
||||
if ((trimmed === "│" || trimmed === "▼") && !/^\s+[│▼]\s*$/.test(line)) {
|
||||
return ` ${trimmed}`;
|
||||
}
|
||||
return line;
|
||||
});
|
||||
lines = convertDashListsToBranches(lines);
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
function tableToMarkdown(table) {
|
||||
const rows = Array.from(table.querySelectorAll("tr"))
|
||||
.map(row => Array.from(row.children)
|
||||
@@ -780,10 +937,16 @@ function contentDispatch(funcName, args) {
|
||||
|
||||
let headers = normalizedRows[0];
|
||||
let bodyRows = normalizedRows.slice(1);
|
||||
const firstRowIsBlank = headers.every(cell => !cell.trim());
|
||||
if (firstRowIsBlank && normalizedRows.length > 1) {
|
||||
headers = normalizedRows[1];
|
||||
bodyRows = normalizedRows.slice(2);
|
||||
}
|
||||
|
||||
const firstRow = table.querySelector("tr");
|
||||
const thead = table.querySelector("thead");
|
||||
const firstRowHasTh = firstRow && Array.from(firstRow.children).some(cell => cell.tagName === "TH");
|
||||
if (!(thead || firstRowHasTh)) {
|
||||
if (!(thead || firstRowHasTh || firstRowIsBlank)) {
|
||||
headers = new Array(widths).fill("");
|
||||
bodyRows = normalizedRows;
|
||||
}
|
||||
@@ -818,7 +981,12 @@ function contentDispatch(funcName, args) {
|
||||
}
|
||||
|
||||
const line = collapseBlankLines(normalizeInline(content.join("")));
|
||||
if (line) items.push(`${indent}${marker}${line}`);
|
||||
if (line) {
|
||||
const lineParts = line.split("\n");
|
||||
items.push(`${indent}${marker}${lineParts[0]}`);
|
||||
const continuationIndent = `${indent}${" ".repeat(marker.length)}`;
|
||||
lineParts.slice(1).forEach(part => items.push(`${continuationIndent}${part}`));
|
||||
}
|
||||
nested.filter(Boolean).forEach(block => items.push(block));
|
||||
});
|
||||
return items.join("\n");
|
||||
@@ -829,13 +997,21 @@ function contentDispatch(funcName, args) {
|
||||
return normalizeText(node.textContent || "");
|
||||
}
|
||||
if (node.nodeType !== Node.ELEMENT_NODE) return "";
|
||||
if (isNoiseElement(node)) return "";
|
||||
|
||||
const tag = node.tagName.toLowerCase();
|
||||
if (tag === "script" || tag === "style" || tag === "noscript" || tag === "template") return "";
|
||||
if (tag === "table") return tableToMarkdown(node);
|
||||
if (tag === "ul" || tag === "ol") return listToMarkdown(node);
|
||||
if (node.matches(".cm-editor[data-is-code-block-view='true']")) {
|
||||
const lines = Array.from(node.querySelectorAll(".cm-line")).map(line => {
|
||||
const text = preserveNodeText(line);
|
||||
return text === "\n" ? "" : text.replace(/\n$/, "");
|
||||
});
|
||||
const code = normalizeCodeBlock(lines.join("\n"));
|
||||
return code ? `\`\`\`\n${code}\n\`\`\`` : "";
|
||||
}
|
||||
if (tag === "pre") {
|
||||
const code = node.innerText.replace(/\n$/, "");
|
||||
const code = normalizeCodeBlock(preserveNodeText(node));
|
||||
return code ? `\`\`\`\n${code}\n\`\`\`` : "";
|
||||
}
|
||||
if (tag === "blockquote") {
|
||||
@@ -1011,9 +1187,21 @@ async function clientsRenameProfile({ alias }) {
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
async function getActiveTab() {
|
||||
const [tab] = await chrome.tabs.query({ active: true, lastFocusedWindow: true });
|
||||
if (!tab) throw new Error("No active tab found");
|
||||
return tab;
|
||||
const activeTabs = await chrome.tabs.query({ active: true });
|
||||
if (!activeTabs.length) throw new Error("No active tab found");
|
||||
|
||||
const windows = await chrome.windows.getAll({ populate: false });
|
||||
const focusedWindowIds = new Set(windows.filter(window => window.focused).map(window => window.id));
|
||||
|
||||
const chooseTab = (predicate) => activeTabs.find(predicate);
|
||||
const byFocusAndScriptable = tab => focusedWindowIds.has(tab.windowId) && isScriptableUrl(tab.url || tab.pendingUrl || "");
|
||||
const byScriptable = tab => isScriptableUrl(tab.url || tab.pendingUrl || "");
|
||||
const byFocus = tab => focusedWindowIds.has(tab.windowId);
|
||||
|
||||
return chooseTab(byFocusAndScriptable)
|
||||
|| chooseTab(byScriptable)
|
||||
|| chooseTab(byFocus)
|
||||
|| activeTabs[0];
|
||||
}
|
||||
|
||||
async function resolveGroupId(nameOrId) {
|
||||
|
||||
@@ -488,6 +488,39 @@ class TestWindows:
|
||||
mock_send.assert_called_once_with("windows.open", {"url": "https://example.com"}, profile=None)
|
||||
|
||||
|
||||
class TestSession:
|
||||
def test_session_list(self, b, mock_send):
|
||||
mock_send.return_value = [{"name": "saved", "tabs": 3, "savedAt": 1712707200000}]
|
||||
|
||||
result = b.session_list()
|
||||
|
||||
assert result == [{"name": "saved", "tabs": 3, "savedAt": 1712707200000}]
|
||||
mock_send.assert_called_once_with("session.list", {}, profile=None)
|
||||
|
||||
def test_session_list_multi_browser_adds_browser(self, b, mock_send):
|
||||
with patch(
|
||||
"browser_cli.active_browser_targets",
|
||||
return_value=[
|
||||
BrowserTarget("default", "uuid-1", "/tmp/uuid-1.sock"),
|
||||
BrowserTarget("work", "work", "/tmp/work.sock"),
|
||||
],
|
||||
):
|
||||
mock_send.side_effect = [
|
||||
[{"name": "first", "tabs": 2, "savedAt": 1712707200000}],
|
||||
[{"name": "second", "tabs": 5, "savedAt": 1712707300000}],
|
||||
]
|
||||
result = b.session_list()
|
||||
|
||||
assert result == [
|
||||
{"name": "first", "tabs": 2, "savedAt": 1712707200000, "browser": "uuid-1"},
|
||||
{"name": "second", "tabs": 5, "savedAt": 1712707300000, "browser": "work"},
|
||||
]
|
||||
assert mock_send.call_args_list == [
|
||||
call("session.list", {}, profile="default"),
|
||||
call("session.list", {}, profile="work"),
|
||||
]
|
||||
|
||||
|
||||
# ── Tab model ─────────────────────────────────────────────────────────────────
|
||||
|
||||
class TestTabModel:
|
||||
|
||||
+194
-2
@@ -5,6 +5,7 @@ from unittest.mock import patch
|
||||
|
||||
from browser_cli.cli import main, _project_version
|
||||
from browser_cli.client import BrowserTarget
|
||||
from browser_cli.commands.extract import _clean_markdown_output, _convert_html_to_markdown
|
||||
|
||||
def _expected_version() -> str:
|
||||
pyproject = Path(__file__).resolve().parent.parent / "pyproject.toml"
|
||||
@@ -204,6 +205,46 @@ def test_windows_list_multi_browser_shows_browser_column():
|
||||
assert "work" in result.output
|
||||
|
||||
|
||||
def test_session_list_multi_browser_shows_browser_column():
|
||||
def fake_send_command(command, args=None, profile=None):
|
||||
assert command == "session.list"
|
||||
return [{"name": f"{profile}-session", "tabs": 2, "savedAt": 1712707200000}]
|
||||
|
||||
with patch(
|
||||
"browser_cli.commands.session.active_browser_targets",
|
||||
return_value=[
|
||||
BrowserTarget("default", "uuid-1", "/tmp/default.sock"),
|
||||
BrowserTarget("work", "work", "/tmp/work.sock"),
|
||||
],
|
||||
), patch("browser_cli.commands.session.send_command", side_effect=fake_send_command):
|
||||
result = CliRunner().invoke(main, ["session", "list"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "Browser" in result.output
|
||||
assert "uuid-1" in result.output
|
||||
assert "work" in result.output
|
||||
assert "default-session" in result.output
|
||||
assert "work-session" in result.output
|
||||
|
||||
|
||||
def test_session_list_with_explicit_browser_does_not_show_browser_column():
|
||||
with patch(
|
||||
"browser_cli.commands.session.active_browser_targets",
|
||||
return_value=[
|
||||
BrowserTarget("default", "uuid-1", "/tmp/default.sock"),
|
||||
BrowserTarget("work", "work", "/tmp/work.sock"),
|
||||
],
|
||||
), patch(
|
||||
"browser_cli.commands.session.send_command",
|
||||
return_value=[{"name": "work-session", "tabs": 2, "savedAt": 1712707200000}],
|
||||
) as send_command:
|
||||
result = CliRunner().invoke(main, ["--browser", "work", "session", "list"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "Browser" not in result.output
|
||||
send_command.assert_called_once_with("session.list", {}, profile=None)
|
||||
|
||||
|
||||
def test_windows_open_passes_url():
|
||||
with patch("browser_cli.commands.windows.send_command", return_value={"id": 7}) as send_command:
|
||||
result = CliRunner().invoke(main, ["windows", "open", "https://example.com"])
|
||||
@@ -213,7 +254,7 @@ def test_windows_open_passes_url():
|
||||
send_command.assert_called_once_with("windows.open", {"url": "https://example.com"}, profile=None)
|
||||
|
||||
def test_extract_markdown_command():
|
||||
with patch("browser_cli.commands.extract.send_command", return_value="# Title\n") as send_command:
|
||||
with patch("browser_cli.commands.extract.send_command", return_value="# Title") as send_command:
|
||||
result = CliRunner().invoke(main, ["extract", "markdown"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
@@ -221,9 +262,160 @@ def test_extract_markdown_command():
|
||||
send_command.assert_called_once_with("extract.markdown", {"selector": None})
|
||||
|
||||
def test_extract_markdown_command_with_selector():
|
||||
with patch("browser_cli.commands.extract.send_command", return_value="## Post\n") as send_command:
|
||||
with patch("browser_cli.commands.extract.send_command", return_value="## Post") as send_command:
|
||||
result = CliRunner().invoke(main, ["extract", "markdown", "--selector", "article"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert result.output == "## Post\n"
|
||||
send_command.assert_called_once_with("extract.markdown", {"selector": "article"})
|
||||
|
||||
|
||||
def test_clean_markdown_output_removes_escaped_underscores_and_dashes():
|
||||
assert _clean_markdown_output(r"hello\_world \- item") == "hello_world - item"
|
||||
|
||||
|
||||
def test_clean_markdown_output_trims_useless_whitespace():
|
||||
raw = " # Title \n\n\n paragraph with space \n next line\t \n"
|
||||
assert _clean_markdown_output(raw) == "# Title\n\nparagraph with space\nnext line"
|
||||
|
||||
|
||||
def test_clean_markdown_output_repairs_empty_table_header_rows():
|
||||
raw = (
|
||||
"| | | |\n"
|
||||
"| --- | --- | --- |\n"
|
||||
"| Bereich | Plan | Ist |\n"
|
||||
"| A | B | C |\n"
|
||||
)
|
||||
assert _clean_markdown_output(raw) == (
|
||||
"| Bereich | Plan | Ist |\n"
|
||||
"| --- | --- | --- |\n"
|
||||
"| A | B | C |"
|
||||
)
|
||||
|
||||
|
||||
def test_clean_markdown_output_preserves_graph_code_blocks():
|
||||
raw = "```\n\nA\n │\n ▼\nB\n\n```"
|
||||
assert _clean_markdown_output(raw) == "```\nA\n │\n ▼\nB\n```"
|
||||
|
||||
|
||||
def test_clean_markdown_output_renders_code_block_list_branches():
|
||||
raw = "```\nPlattformen\n- Omnifact\n- Open WebUI + Ollama\n- Le Chat\n```"
|
||||
assert _clean_markdown_output(raw) == (
|
||||
"```\n"
|
||||
"Plattformen\n"
|
||||
"├ Omnifact\n"
|
||||
"├ Open WebUI + Ollama\n"
|
||||
"└ Le Chat\n"
|
||||
"```"
|
||||
)
|
||||
|
||||
|
||||
def test_clean_markdown_output_unflattens_graph_code_blocks():
|
||||
raw = (
|
||||
"```\n"
|
||||
"Golden Set │ ▼Promptfoo(Testausführung) │ ▼UpTrain(Qualitätsbewertung) │ "
|
||||
"▼Langfuse(Logging / Observability) │ ▼Plattformen├ Omnifact├ Open WebUI + Ollama└ Le Chat\n"
|
||||
"```"
|
||||
)
|
||||
assert _clean_markdown_output(raw) == (
|
||||
"```\n"
|
||||
"Golden Set\n"
|
||||
" │\n"
|
||||
" ▼\n"
|
||||
"Promptfoo\n"
|
||||
"(Testausführung)\n"
|
||||
" │\n"
|
||||
" ▼\n"
|
||||
"UpTrain\n"
|
||||
"(Qualitätsbewertung)\n"
|
||||
" │\n"
|
||||
" ▼\n"
|
||||
"Langfuse\n"
|
||||
"(Logging / Observability)\n"
|
||||
" │\n"
|
||||
" ▼\n"
|
||||
"Plattformen\n"
|
||||
"├ Omnifact\n"
|
||||
"├ Open WebUI + Ollama\n"
|
||||
"└ Le Chat\n"
|
||||
"```"
|
||||
)
|
||||
|
||||
|
||||
def test_extract_markdown_command_repairs_malformed_tables_and_code_blocks():
|
||||
raw = (
|
||||
"| | | |\n"
|
||||
"| --- | --- | --- |\n"
|
||||
"| Bereich | Plan | Ist |\n"
|
||||
"| Eval-Stack | Testumgebung | funktionsfähig |\n\n"
|
||||
"```\n"
|
||||
"Golden Set │ ▼Promptfoo(Testausführung) │ ▼Plattformen├ Omnifact└ Le Chat\n"
|
||||
"```"
|
||||
)
|
||||
with patch("browser_cli.commands.extract.send_command", return_value=raw):
|
||||
result = CliRunner().invoke(main, ["extract", "markdown"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "| Bereich | Plan | Ist |" in result.output
|
||||
assert "| | | |" not in result.output
|
||||
assert "Golden Set\n │\n ▼\nPromptfoo\n(Testausführung)" in result.output
|
||||
assert "├ Omnifact" in result.output
|
||||
assert "└ Le Chat" in result.output
|
||||
|
||||
|
||||
def test_convert_html_to_markdown_normalizes_blank_table_header_rows():
|
||||
html = """
|
||||
<main>
|
||||
<table>
|
||||
<tr><td></td><td></td><td></td><td></td></tr>
|
||||
<tr><td>Risiko</td><td>Beschreibung</td><td>Auswirkung</td><td>Gegenmaßnahme</td></tr>
|
||||
<tr><td>Datenschutz</td><td>X</td><td>Y</td><td>Z</td></tr>
|
||||
</table>
|
||||
</main>
|
||||
"""
|
||||
markdown = _convert_html_to_markdown(html)
|
||||
assert "| Risiko | Beschreibung | Auswirkung | Gegenmaßnahme |" in markdown
|
||||
assert "| | | | |" not in markdown
|
||||
|
||||
|
||||
def test_convert_html_to_markdown_preserves_codemirror_graph_blocks():
|
||||
html = """
|
||||
<main>
|
||||
<h1>Teil 5 - Eval-Stack Architektur</h1>
|
||||
<div class="cm-editor" data-is-code-block-view="true" contenteditable="false">
|
||||
<div class="cm-line">Golden Set</div>
|
||||
<div class="cm-line"> │</div>
|
||||
<div class="cm-line"> ▼</div>
|
||||
<div class="cm-line">Promptfoo</div>
|
||||
<div class="cm-line">(Testausführung)</div>
|
||||
<div class="cm-line"> │</div>
|
||||
<div class="cm-line"> ▼</div>
|
||||
<div class="cm-line">Plattformen</div>
|
||||
<div class="cm-line">- Omnifact</div>
|
||||
<div class="cm-line">- Open WebUI + Ollama</div>
|
||||
<div class="cm-line">- Le Chat</div>
|
||||
</div>
|
||||
</main>
|
||||
"""
|
||||
markdown = _convert_html_to_markdown(html)
|
||||
assert "```\nGolden Set\n │\n ▼\nPromptfoo" in markdown
|
||||
assert "├ Omnifact" in markdown
|
||||
assert "└ Le Chat" in markdown
|
||||
|
||||
|
||||
def test_convert_html_to_markdown_indents_multiline_list_items():
|
||||
html = """
|
||||
<main>
|
||||
<h2>2. <strong>Zielarchitektur</strong></h2>
|
||||
<ul>
|
||||
<li>
|
||||
<p>Unternehmensdaten → RAG → KI-Orchestrierung →<br>Local LLMs / API Modelle / Spezialmodelle</p>
|
||||
</li>
|
||||
</ul>
|
||||
</main>
|
||||
"""
|
||||
markdown = _convert_html_to_markdown(html)
|
||||
assert (
|
||||
"- Unternehmensdaten → RAG → KI-Orchestrierung →\n"
|
||||
" Local LLMs / API Modelle / Spezialmodelle"
|
||||
) in markdown
|
||||
|
||||
Reference in New Issue
Block a user