adding better markdown extracting and filter out to not have broken staff, allow that session can list over multiple browsers

2026-04-12 17:10:19 +02:00
parent 51054422fb
commit 64d804cf32
7 changed files with 899 additions and 22 deletions
@@ -122,7 +122,7 @@ browser-cli/

 All commands are run with `uv run browser-cli [--browser ALIAS] <command>`.

-If exactly one browser instance is connected, commands auto-target it. Use `--browser ALIAS` when multiple browser instances are connected. `tabs list`, `tabs count`, `groups list`, `groups count`, and `windows list` are the only commands that aggregate across all active browsers when `--browser` is omitted; in that mode they show the source browser alias or UUID. You can inspect the active instances with `browser-cli clients` and assign a persistent profile alias from inside the target browser with `browser-cli clients rename --browser <current-alias> <new-alias>`. Closed browsers are removed from the client registry automatically.
+If exactly one browser instance is connected, commands auto-target it. Use `--browser ALIAS` when multiple browser instances are connected. `tabs list`, `tabs count`, `groups list`, `groups count`, `windows list`, and `session list` aggregate across all active browsers when `--browser` is omitted; in that mode they show the source browser alias or UUID. You can inspect the active instances with `browser-cli clients` and assign a persistent profile alias from inside the target browser with `browser-cli clients rename --browser <current-alias> <new-alias>`. Closed browsers are removed from the client registry automatically.

 Important: profile aliases are browser-instance aliases, not window aliases. Window aliases created with `windows rename` are only for targeting windows in commands like `nav open --window work`. If a browser instance has no explicit profile alias set, the native host gives it a generated UUID alias so multiple unaliased browsers stay distinct.

@@ -346,6 +346,17 @@ class BrowserCLI:
        return self._cmd("session.diff", {"nameA": name_a, "nameB": name_b})

    def session_list(self) -> list[dict]:
+        """Return saved sessions.
+
+        In implicit multi-browser mode each session dict includes a ``browser`` key.
+        """
+        multi_results = self._collect_multi_browser("session.list", {})
+        if multi_results:
+            return [
+                {**session, "browser": target.display_name}
+                for target, sessions in multi_results
+                for session in (sessions or [])
+            ]
        return self._cmd("session.list", {})

    def session_remove(self, name: str) -> None:
@@ -1,10 +1,426 @@
-import click
 import json
+import re
+from html.parser import HTMLParser
+
+import click
 from browser_cli.client import send_command, BrowserNotConnected
 from rich.console import Console
 from rich.table import Table

 console = Console()
+_FENCE_RE = re.compile(r"```(?:[^\n`]*)\n.*?\n```", re.DOTALL)
+_ESCAPED_MARKDOWN_RE = re.compile(r"\\([_-])")
+_TABLE_SEPARATOR_RE = re.compile(r"^\|(?:\s*:?-{3,}:?\s*\|)+\s*$")
+
+
+class _HtmlNode:
+    def __init__(self, tag=None, attrs=None, text=None):
+        self.tag = tag
+        self.attrs = attrs or {}
+        self.text = text
+        self.children = []
+
+
+class _HtmlTreeBuilder(HTMLParser):
+    _VOID_TAGS = {"br", "hr", "img"}
+
+    def __init__(self):
+        super().__init__(convert_charrefs=True)
+        self.root = _HtmlNode(tag="document")
+        self._stack = [self.root]
+
+    def handle_starttag(self, tag, attrs):
+        node = _HtmlNode(tag=tag.lower(), attrs=dict(attrs))
+        self._stack[-1].children.append(node)
+        if node.tag not in self._VOID_TAGS:
+            self._stack.append(node)
+
+    def handle_startendtag(self, tag, attrs):
+        node = _HtmlNode(tag=tag.lower(), attrs=dict(attrs))
+        self._stack[-1].children.append(node)
+
+    def handle_endtag(self, tag):
+        lowered = tag.lower()
+        for index in range(len(self._stack) - 1, 0, -1):
+            if self._stack[index].tag == lowered:
+                del self._stack[index:]
+                break
+
+    def handle_data(self, data):
+        if data:
+            self._stack[-1].children.append(_HtmlNode(text=data))
+
+
+def _normalize_text(value):
+    return re.sub(r"\s+", " ", value or "").strip()
+
+
+def _normalize_inline(value):
+    value = value.replace("\xa0", " ")
+    value = re.sub(r"[ \t\r\f\v]+", " ", value)
+    value = re.sub(r" *\n *", "\n", value)
+    return value.strip()
+
+
+def _collapse_blank_lines(value):
+    value = re.sub(r"[ \t]+\n", "\n", value)
+    value = re.sub(r"\n{3,}", "\n\n", value)
+    return value.strip()
+
+
+def _escape_markdown(text):
+    return re.sub(r"([\\`[\]])", r"\\\1", text)
+
+
+def _escape_table_cell(text):
+    return text.replace("|", r"\|").replace("\n", " ").strip()
+
+
+def _iter_descendants(node):
+    for child in getattr(node, "children", []):
+        yield child
+        yield from _iter_descendants(child)
+
+
+def _has_class(node, class_name):
+    classes = (node.attrs.get("class") or "").split()
+    return class_name in classes
+
+
+def _is_code_block_node(node):
+    if not node or not node.tag:
+        return False
+    if node.attrs.get("data-is-code-block-view") == "true":
+        return True
+    return node.tag == "pre"
+
+
+def _inline_text(node):
+    if node.text is not None:
+        return _escape_markdown(node.text)
+    if not node.tag:
+        return ""
+
+    tag = node.tag
+    if tag == "br":
+        return "\n"
+    if tag == "img":
+        src = node.attrs.get("src") or ""
+        alt = _normalize_text(node.attrs.get("alt") or "")
+        if not src:
+            return ""
+        return f"![{_escape_markdown(alt)}]({src})" if alt else f"![]({src})"
+    if tag == "a":
+        text = _normalize_inline("".join(_inline_text(child) for child in node.children))
+        href = node.attrs.get("href") or ""
+        return f"[{text or href}]({href})" if href else text
+    if tag == "code":
+        text = _normalize_inline("".join(_inline_text(child) for child in node.children))
+        return f"`{text.replace('`', r'\\`')}`" if text else ""
+    if tag in {"strong", "b"}:
+        text = _normalize_inline("".join(_inline_text(child) for child in node.children))
+        return f"**{text}**" if text else ""
+    if tag in {"em", "i"}:
+        text = _normalize_inline("".join(_inline_text(child) for child in node.children))
+        return f"*{text}*" if text else ""
+
+    chunks = []
+    for child in node.children:
+        rendered = _inline_text(child)
+        if rendered:
+            chunks.append(rendered)
+            if child.tag in {"p", "div", "table", "ul", "ol", "pre"}:
+                chunks.append("\n")
+    return "".join(chunks)
+
+
+def _text_block(node):
+    return _collapse_blank_lines(_normalize_inline("".join(_inline_text(child) for child in node.children)))
+
+
+def _inner_text_preserve(node):
+    if node.text is not None:
+        return node.text
+    if not node.tag:
+        return ""
+    if node.tag == "br":
+        return ""
+    return "".join(_inner_text_preserve(child) for child in node.children)
+
+
+def _table_to_markdown(node):
+    rows = []
+    for descendant in _iter_descendants(node):
+        if descendant.tag != "tr":
+            continue
+        row = []
+        for cell in descendant.children:
+            if cell.tag in {"td", "th"}:
+                row.append(_escape_table_cell(_text_block(cell)))
+        if row:
+            rows.append(row)
+    if not rows:
+        return ""
+
+    widths = max(len(row) for row in rows)
+    normalized_rows = [row + [""] * (widths - len(row)) for row in rows]
+
+    headers = normalized_rows[0]
+    body_rows = normalized_rows[1:]
+    first_row_blank = all(not cell.strip() for cell in headers)
+    if first_row_blank and len(normalized_rows) > 1:
+        headers = normalized_rows[1]
+        body_rows = normalized_rows[2:]
+
+    has_thead = any(child.tag == "thead" for child in node.children)
+    first_row = next((child for child in _iter_descendants(node) if child.tag == "tr"), None)
+    first_row_has_th = bool(first_row and any(child.tag == "th" for child in first_row.children))
+    if not (has_thead or first_row_has_th or first_row_blank):
+        headers = [""] * widths
+        body_rows = normalized_rows
+
+    separator = ["---"] * widths
+    lines = [
+        f"| {' | '.join(headers)} |",
+        f"| {' | '.join(separator)} |",
+    ]
+    lines.extend(f"| {' | '.join(row)} |" for row in body_rows)
+    return "\n".join(lines)
+
+
+def _list_to_markdown(node, depth=0):
+    ordered = node.tag == "ol"
+    items = []
+    index = 1
+    for child in node.children:
+        if child.tag != "li":
+            continue
+        marker = f"{index}. " if ordered else "- "
+        index += 1
+        content = []
+        nested = []
+        for item_child in child.children:
+            if item_child.tag in {"ul", "ol"}:
+                nested.append(_list_to_markdown(item_child, depth + 1))
+            else:
+                content.append(_inline_text(item_child))
+        line = _collapse_blank_lines(_normalize_inline("".join(content)))
+        indent = "  " * depth
+        if line:
+            line_parts = line.splitlines()
+            items.append(f"{indent}{marker}{line_parts[0]}")
+            continuation_indent = f"{indent}{' ' * len(marker)}"
+            items.extend(f"{continuation_indent}{part}" for part in line_parts[1:])
+        items.extend(block for block in nested if block)
+    return "\n".join(items)
+
+
+def _code_block_to_markdown(node):
+    if node.tag == "pre":
+        text = _inner_text_preserve(node).rstrip("\n")
+        return f"```\n{text}\n```" if text else ""
+
+    lines = []
+    for descendant in _iter_descendants(node):
+        if descendant.tag and _has_class(descendant, "cm-line"):
+            lines.append(_inner_text_preserve(descendant))
+    code = "\n".join(lines).rstrip("\n")
+    return f"```\n{code}\n```" if code else ""
+
+
+def _block_to_markdown(node):
+    if node.text is not None:
+        return _normalize_text(node.text)
+    if not node.tag:
+        return ""
+    if _is_code_block_node(node):
+        return _code_block_to_markdown(node)
+    if node.tag == "table":
+        return _table_to_markdown(node)
+    if node.tag in {"ul", "ol"}:
+        return _list_to_markdown(node)
+    if re.fullmatch(r"h[1-6]", node.tag):
+        text = _text_block(node)
+        return f"{'#' * int(node.tag[1])} {text}" if text else ""
+    if node.tag in {"p", "figcaption"}:
+        return _text_block(node)
+    if node.tag == "blockquote":
+        content = _collapse_blank_lines("\n\n".join(filter(None, (_block_to_markdown(child) for child in node.children))))
+        return "\n".join(f"> {line}" if line else ">" for line in content.splitlines()) if content else ""
+    if node.tag == "hr":
+        return "---"
+    if node.tag == "img":
+        return _inline_text(node)
+
+    child_blocks = [block for block in (_block_to_markdown(child) for child in node.children) if block]
+    if child_blocks:
+        return _collapse_blank_lines("\n\n".join(child_blocks))
+    return _text_block(node)
+
+
+def _parse_table_row(line):
+    stripped = line.strip()
+    if not stripped.startswith("|") or not stripped.endswith("|"):
+        return None
+    return [cell.strip() for cell in stripped.strip("|").split("|")]
+
+
+def _repair_table_headers(lines):
+    repaired = []
+    index = 0
+    while index < len(lines):
+        if (
+            index + 2 < len(lines)
+            and _parse_table_row(lines[index]) is not None
+            and _TABLE_SEPARATOR_RE.match(lines[index + 1].strip())
+            and _parse_table_row(lines[index + 2]) is not None
+        ):
+            first = _parse_table_row(lines[index])
+            third = _parse_table_row(lines[index + 2])
+            if first and all(not cell for cell in first) and any(cell for cell in third):
+                repaired.append(lines[index + 2].strip())
+                repaired.append(lines[index + 1].strip())
+                index += 3
+                continue
+        repaired.append(lines[index].strip())
+        index += 1
+    return repaired
+
+
+def _repair_list_continuations(lines):
+    repaired = []
+    previous_was_list_item = False
+    previous_continuation_indent = ""
+
+    for line in lines:
+        stripped = line.strip()
+        list_match = re.match(r"^(\s*)([-*+]|\d+\.)\s+.+$", stripped)
+        is_markdown_block_start = (
+            not stripped
+            or stripped.startswith(("```", "#", ">", "|"))
+            or _TABLE_SEPARATOR_RE.match(stripped)
+            or re.match(r"^(\s*)([-*+]|\d+\.)\s+", stripped)
+        )
+
+        if previous_was_list_item and stripped and not is_markdown_block_start:
+            repaired.append(f"{previous_continuation_indent}{stripped}")
+            previous_was_list_item = False
+            continue
+
+        repaired.append(stripped)
+        if list_match:
+            marker = list_match.group(2)
+            base_indent = list_match.group(1)
+            previous_continuation_indent = f"{base_indent}{' ' * (len(marker) + 1)}"
+            previous_was_list_item = True
+        else:
+            previous_was_list_item = False
+
+    return repaired
+
+
+def _repair_flattened_diagram(text):
+    if "\n" in text:
+        return text
+    if sum(text.count(char) for char in "│▼├└") < 2:
+        return text
+
+    text = re.sub(r"\s{2,}([│▼])", r"\n   \1", text)
+    text = re.sub(r"([│▼])\s{2,}", r"\1\n", text)
+    text = re.sub(r"([│▼])(?=[^\s\n│▼├└])", r"\1\n", text)
+    text = re.sub(r"(?<=[^\s\n])([├└])", r"\n\1", text)
+    text = re.sub(r"([^\s\n])(\()", r"\1\n\2", text)
+    return "\n".join(line.rstrip() for line in text.splitlines() if line.strip())
+
+
+def _convert_dash_lists_to_branches(lines):
+    converted = []
+    index = 0
+    while index < len(lines):
+        match = re.match(r"^(\s*)-\s+(.*)$", lines[index])
+        if not match:
+            converted.append(lines[index])
+            index += 1
+            continue
+
+        indent = match.group(1)
+        items = []
+        while index < len(lines):
+            next_match = re.match(rf"^{re.escape(indent)}-\s+(.*)$", lines[index])
+            if not next_match:
+                break
+            items.append(next_match.group(1))
+            index += 1
+
+        for item_index, item in enumerate(items):
+            branch = "└" if item_index == len(items) - 1 else "├"
+            converted.append(f"{indent}{branch} {item}")
+    return converted
+
+
+def _clean_code_block(code):
+    lines = [line.rstrip() for line in code.splitlines()]
+    while lines and not lines[0].strip():
+        lines.pop(0)
+    while lines and not lines[-1].strip():
+        lines.pop()
+
+    flattened = _repair_flattened_diagram("\n".join(lines))
+    lines = flattened.splitlines() if flattened else []
+    lines = [
+        f"   {line.strip()}"
+        if line.strip() in {"│", "▼"} and not re.match(r"^\s+[│▼]\s*$", line)
+        else line
+        for line in lines
+    ]
+    lines = _convert_dash_lists_to_branches(lines)
+    return "\n".join(lines)
+
+
+def _clean_markdown_output(markdown):
+    if not markdown:
+        return ""
+
+    pieces = []
+    last_index = 0
+    for match in _FENCE_RE.finditer(markdown):
+        prose = markdown[last_index:match.start()]
+        if prose:
+            cleaned = _ESCAPED_MARKDOWN_RE.sub(r"\1", prose)
+            lines = [line.strip() for line in cleaned.splitlines()]
+            lines = _repair_table_headers(lines)
+            lines = _repair_list_continuations(lines)
+            cleaned = "\n".join(lines)
+            cleaned = _collapse_blank_lines(cleaned)
+            if cleaned:
+                pieces.append(cleaned)
+
+        fence = match.group(0)
+        header, _, tail = fence.partition("\n")
+        body, _, _ = tail.rpartition("\n")
+        cleaned_body = _clean_code_block(body)
+        pieces.append(f"{header}\n{cleaned_body}\n```" if cleaned_body else f"{header}\n```")
+        last_index = match.end()
+
+    trailing = markdown[last_index:]
+    if trailing:
+        cleaned = _ESCAPED_MARKDOWN_RE.sub(r"\1", trailing)
+        lines = [line.strip() for line in cleaned.splitlines()]
+        lines = _repair_table_headers(lines)
+        lines = _repair_list_continuations(lines)
+        cleaned = "\n".join(lines)
+        cleaned = _collapse_blank_lines(cleaned)
+        if cleaned:
+            pieces.append(cleaned)
+
+    return "\n\n".join(piece for piece in pieces if piece)
+
+
+def _convert_html_to_markdown(html):
+    parser = _HtmlTreeBuilder()
+    parser.feed(html or "")
+    markdown = _block_to_markdown(parser.root)
+    return _clean_markdown_output(markdown)


 def _handle(command, args=None):
@@ -80,4 +496,8 @@ def extract_html():
 def extract_markdown(selector):
    """Extract the page's main content as Markdown."""
    markdown = _handle("extract.markdown", {"selector": selector})
+    if (markdown or "").lstrip().startswith("<"):
+        markdown = _convert_html_to_markdown(markdown)
+    else:
+        markdown = _clean_markdown_output(markdown or "")
    click.echo(markdown or "", nl=not (markdown or "").endswith("\n"))
@@ -1,14 +1,13 @@
 import click
-import json
-from browser_cli.client import send_command, BrowserNotConnected
+from browser_cli.client import active_browser_targets, send_command, BrowserNotConnected
 from rich.console import Console

 console = Console()


-def _handle(command, args=None):
+def _handle(command, args=None, profile=None):
    try:
-        return send_command(command, args or {})
+        return send_command(command, args or {}, profile=profile)
    except BrowserNotConnected as e:
        console.print(f"[red]Error:[/red] {e}")
        raise SystemExit(1)
@@ -17,6 +16,23 @@ def _handle(command, args=None):
        raise SystemExit(1)


+def _handle_multi(command, args=None, profile=None):
+    try:
+        return send_command(command, args or {}, profile=profile)
+    except (BrowserNotConnected, RuntimeError):
+        return None
+
+
+def _multi_browser_targets():
+    root = click.get_current_context().find_root()
+    if root.obj.get("browser_explicit"):
+        return []
+    targets = active_browser_targets()
+    if len(targets) <= 1:
+        return []
+    return targets
+
+
@click.group("session")
 def session_group():
    """Save and restore browser sessions."""
@@ -71,18 +87,35 @@ def session_diff(name_a, name_b):
 def session_list():
    """List all saved sessions."""
    from rich.table import Table
+    targets = _multi_browser_targets()
+    show_browser = bool(targets)
+    if targets:
+        sessions = []
+        for target in targets:
+            result = _handle_multi("session.list", profile=target.profile)
+            if result is None:
+                continue
+            sessions.extend({**session, "browser": target.display_name} for session in result)
+        if not sessions:
+            console.print("[red]Error:[/red] Cannot resolve a browser socket automatically.")
+            raise SystemExit(1)
+    else:
        sessions = _handle("session.list")
    if not sessions:
        console.print("[yellow]No saved sessions[/yellow]")
        return
    table = Table(show_header=True, header_style="bold cyan")
+    if show_browser:
+        table.add_column("Browser")
    table.add_column("Name")
    table.add_column("Tabs", width=6)
    table.add_column("Saved at")
    for s in sessions:
        from datetime import datetime
        saved = datetime.fromtimestamp(s["savedAt"] / 1000).strftime("%Y-%m-%d %H:%M") if s.get("savedAt") else ""
-        table.add_row(s["name"], str(s["tabs"]), saved)
+        row = [s.get("browser", "")] if show_browser else []
+        row.extend([s["name"], str(s["tabs"]), saved])
+        table.add_row(*row)
    console.print(table)


@@ -659,6 +659,32 @@ function contentDispatch(funcName, args) {
      "li", "main", "nav", "ol", "p", "pre", "section", "table", "tbody", "td", "tfoot",
      "th", "thead", "tr", "ul"
    ]);
+    const NOISE_SELECTOR = [
+      "script",
+      "style",
+      "noscript",
+      "template",
+      "svg",
+      "canvas",
+      "iframe",
+      "dialog",
+      "button",
+      "input",
+      "textarea",
+      "select",
+      "option",
+      "form",
+      "[hidden]",
+      "[aria-hidden='true']",
+      ".sr-only",
+      "[class*='sr-only']",
+      "[class*='file-tile']",
+      "form[data-type='unified-composer']",
+      ".composer-btn",
+      "[data-composer-surface='true']",
+      "#thread-bottom-container",
+      "[data-testid*='action-button']",
+    ].join(", ");

    function normalizeText(value) {
      return value.replace(/\s+/g, " ").trim();
@@ -681,7 +707,7 @@ function contentDispatch(funcName, args) {
    }

    function escapeMarkdown(text) {
-      return text.replace(/([\\`*_{}\[\]()#+\-!|>])/g, "\\$1");
+      return text.replace(/([\\`[\]])/g, "\\$1");
    }

    function escapeTableCell(text) {
@@ -692,12 +718,55 @@ function contentDispatch(funcName, args) {
      return attr || fallback || "";
    }

+    function isNoiseElement(node) {
+      if (!node || node.nodeType !== Node.ELEMENT_NODE) return false;
+      const tag = node.tagName.toLowerCase();
+      if (["script", "style", "noscript", "template", "svg", "canvas", "iframe", "dialog"].includes(tag)) return true;
+      if (["button", "input", "textarea", "select", "option", "form"].includes(tag)) return true;
+      if (node.hasAttribute("hidden")) return true;
+      if ((node.getAttribute("aria-hidden") || "").toLowerCase() === "true") return true;
+      if (node.matches(".sr-only, [class*='sr-only']")) return true;
+      if (node.matches("[class*='file-tile'], form[data-type='unified-composer'], .composer-btn, [data-composer-surface='true'], #thread-bottom-container")) return true;
+      if (node.matches("[data-testid*='action-button']")) return true;
+      return false;
+    }
+
    function stripNoise(root) {
      const clone = root.cloneNode(true);
-      clone.querySelectorAll("script, style, noscript, template").forEach(node => node.remove());
+      clone.querySelectorAll(NOISE_SELECTOR).forEach(node => node.remove());
      return clone;
    }

+    function candidateScore(node) {
+      const text = normalizeText(node.innerText || "");
+      if (!text) return -Infinity;
+
+      const headings = node.querySelectorAll("h1, h2, h3, h4, h5, h6").length;
+      const paragraphs = node.querySelectorAll("p").length;
+      const listItems = node.querySelectorAll("li").length;
+      const tables = node.querySelectorAll("table").length;
+      const codeBlocks = node.querySelectorAll("pre, code").length;
+      const images = node.querySelectorAll("img, figure").length;
+      const mainLike = node.matches("main, article, [role='main']") ? 1 : 0;
+      const proseBlocks = node.matches(".markdown, .prose, [data-message-author-role='assistant']") ? 1 : 0;
+      const buttons = node.querySelectorAll("button, input, textarea, select").length;
+      const forms = node.querySelectorAll("form").length;
+      const svgs = node.querySelectorAll("svg, canvas").length;
+
+      return text.length
+        + (mainLike * 4000)
+        + (proseBlocks * 5000)
+        + (headings * 250)
+        + (paragraphs * 60)
+        + (listItems * 35)
+        + (tables * 80)
+        + (codeBlocks * 60)
+        + (images * 25)
+        - (buttons * 120)
+        - (forms * 200)
+        - (svgs * 40);
+    }
+
    function pickRoot() {
      if (selector) {
        const matched = document.querySelector(selector);
@@ -705,10 +774,12 @@ function contentDispatch(funcName, args) {
        return matched;
      }

-      const candidates = Array.from(document.querySelectorAll("main, article, [role='main']"))
+      const candidates = Array.from(document.querySelectorAll(
+        "main, article, [role='main'], section, .markdown, .prose, [data-message-author-role]"
+      ))
        .filter(node => normalizeText(node.innerText || "").length > 0);
      if (!candidates.length) return document.body;
-      candidates.sort((a, b) => (b.innerText || "").length - (a.innerText || "").length);
+      candidates.sort((a, b) => candidateScore(b) - candidateScore(a));
      return candidates[0];
    }

@@ -717,9 +788,9 @@ function contentDispatch(funcName, args) {
        return escapeMarkdown(node.textContent || "");
      }
      if (node.nodeType !== Node.ELEMENT_NODE) return "";
+      if (isNoiseElement(node)) return "";

      const tag = node.tagName.toLowerCase();
-      if (tag === "script" || tag === "style" || tag === "noscript" || tag === "template") return "";
      if (tag === "br") return "\n";
      if (tag === "img") {
        const src = absoluteUrl(node.getAttribute("src"), node.src);
@@ -762,6 +833,92 @@ function contentDispatch(funcName, args) {
      return collapseBlankLines(normalizeInline(Array.from(node.childNodes).map(inlineText).join("")));
    }

+    function preserveNodeText(node) {
+      if (node.nodeType === Node.TEXT_NODE) {
+        return node.textContent || "";
+      }
+      if (node.nodeType !== Node.ELEMENT_NODE) return "";
+
+      const tag = node.tagName.toLowerCase();
+      if (tag === "br") return "\n";
+
+      const parts = [];
+      for (const child of node.childNodes) {
+        const rendered = preserveNodeText(child);
+        if (!rendered) continue;
+        parts.push(rendered);
+      }
+
+      if (["div", "p", "li"].includes(tag)) {
+        return `${parts.join("")}\n`;
+      }
+      return parts.join("");
+    }
+
+    function repairFlattenedDiagram(text) {
+      if (text.includes("\n")) return text;
+      const markerCount = (text.match(/[│▼├└]/g) || []).length;
+      if (markerCount < 2) return text;
+
+      let repaired = text;
+      repaired = repaired.replace(/\s{2,}([│▼])/g, "\n   $1");
+      repaired = repaired.replace(/([│▼])\s{2,}/g, "$1\n");
+      repaired = repaired.replace(/([│▼])(?=[^\s\n│▼├└])/g, "$1\n");
+      repaired = repaired.replace(/(?<=[^\s\n])([├└])/g, "\n$1");
+      repaired = repaired.replace(/([^\s\n])(\()/g, "$1\n$2");
+      return repaired
+        .split("\n")
+        .map(line => line.replace(/\s+$/, ""))
+        .filter(line => line.trim())
+        .join("\n");
+    }
+
+    function convertDashListsToBranches(lines) {
+      const converted = [];
+      let index = 0;
+      while (index < lines.length) {
+        const match = lines[index].match(/^(\s*)-\s+(.*)$/);
+        if (!match) {
+          converted.push(lines[index]);
+          index += 1;
+          continue;
+        }
+
+        const indent = match[1];
+        const items = [];
+        while (index < lines.length) {
+          const nextMatch = lines[index].match(new RegExp(`^${indent.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}-\\s+(.*)$`));
+          if (!nextMatch) break;
+          items.push(nextMatch[1]);
+          index += 1;
+        }
+
+        items.forEach((item, itemIndex) => {
+          const branch = itemIndex === items.length - 1 ? "└" : "├";
+          converted.push(`${indent}${branch} ${item}`);
+        });
+      }
+      return converted;
+    }
+
+    function normalizeCodeBlock(text) {
+      let lines = text.replace(/\r\n?/g, "\n").split("\n").map(line => line.replace(/\s+$/, ""));
+      while (lines.length && !lines[0].trim()) lines.shift();
+      while (lines.length && !lines[lines.length - 1].trim()) lines.pop();
+
+      const flattened = repairFlattenedDiagram(lines.join("\n"));
+      lines = flattened ? flattened.split("\n") : [];
+      lines = lines.map(line => {
+        const trimmed = line.trim();
+        if ((trimmed === "│" || trimmed === "▼") && !/^\s+[│▼]\s*$/.test(line)) {
+          return `   ${trimmed}`;
+        }
+        return line;
+      });
+      lines = convertDashListsToBranches(lines);
+      return lines.join("\n");
+    }
+
    function tableToMarkdown(table) {
      const rows = Array.from(table.querySelectorAll("tr"))
        .map(row => Array.from(row.children)
@@ -780,10 +937,16 @@ function contentDispatch(funcName, args) {

      let headers = normalizedRows[0];
      let bodyRows = normalizedRows.slice(1);
+      const firstRowIsBlank = headers.every(cell => !cell.trim());
+      if (firstRowIsBlank && normalizedRows.length > 1) {
+        headers = normalizedRows[1];
+        bodyRows = normalizedRows.slice(2);
+      }
+
      const firstRow = table.querySelector("tr");
      const thead = table.querySelector("thead");
      const firstRowHasTh = firstRow && Array.from(firstRow.children).some(cell => cell.tagName === "TH");
-      if (!(thead || firstRowHasTh)) {
+      if (!(thead || firstRowHasTh || firstRowIsBlank)) {
        headers = new Array(widths).fill("");
        bodyRows = normalizedRows;
      }
@@ -818,7 +981,12 @@ function contentDispatch(funcName, args) {
        }

        const line = collapseBlankLines(normalizeInline(content.join("")));
-        if (line) items.push(`${indent}${marker}${line}`);
+        if (line) {
+          const lineParts = line.split("\n");
+          items.push(`${indent}${marker}${lineParts[0]}`);
+          const continuationIndent = `${indent}${" ".repeat(marker.length)}`;
+          lineParts.slice(1).forEach(part => items.push(`${continuationIndent}${part}`));
+        }
        nested.filter(Boolean).forEach(block => items.push(block));
      });
      return items.join("\n");
@@ -829,13 +997,21 @@ function contentDispatch(funcName, args) {
        return normalizeText(node.textContent || "");
      }
      if (node.nodeType !== Node.ELEMENT_NODE) return "";
+      if (isNoiseElement(node)) return "";

      const tag = node.tagName.toLowerCase();
-      if (tag === "script" || tag === "style" || tag === "noscript" || tag === "template") return "";
      if (tag === "table") return tableToMarkdown(node);
      if (tag === "ul" || tag === "ol") return listToMarkdown(node);
+      if (node.matches(".cm-editor[data-is-code-block-view='true']")) {
+        const lines = Array.from(node.querySelectorAll(".cm-line")).map(line => {
+          const text = preserveNodeText(line);
+          return text === "\n" ? "" : text.replace(/\n$/, "");
+        });
+        const code = normalizeCodeBlock(lines.join("\n"));
+        return code ? `\`\`\`\n${code}\n\`\`\`` : "";
+      }
      if (tag === "pre") {
-        const code = node.innerText.replace(/\n$/, "");
+        const code = normalizeCodeBlock(preserveNodeText(node));
        return code ? `\`\`\`\n${code}\n\`\`\`` : "";
      }
      if (tag === "blockquote") {
@@ -1011,9 +1187,21 @@ async function clientsRenameProfile({ alias }) {
 // ── Helpers ───────────────────────────────────────────────────────────────────

 async function getActiveTab() {
-  const [tab] = await chrome.tabs.query({ active: true, lastFocusedWindow: true });
-  if (!tab) throw new Error("No active tab found");
-  return tab;
+  const activeTabs = await chrome.tabs.query({ active: true });
+  if (!activeTabs.length) throw new Error("No active tab found");
+
+  const windows = await chrome.windows.getAll({ populate: false });
+  const focusedWindowIds = new Set(windows.filter(window => window.focused).map(window => window.id));
+
+  const chooseTab = (predicate) => activeTabs.find(predicate);
+  const byFocusAndScriptable = tab => focusedWindowIds.has(tab.windowId) && isScriptableUrl(tab.url || tab.pendingUrl || "");
+  const byScriptable = tab => isScriptableUrl(tab.url || tab.pendingUrl || "");
+  const byFocus = tab => focusedWindowIds.has(tab.windowId);
+
+  return chooseTab(byFocusAndScriptable)
+    || chooseTab(byScriptable)
+    || chooseTab(byFocus)
+    || activeTabs[0];
 }

 async function resolveGroupId(nameOrId) {
@@ -488,6 +488,39 @@ class TestWindows:
        mock_send.assert_called_once_with("windows.open", {"url": "https://example.com"}, profile=None)


+class TestSession:
+    def test_session_list(self, b, mock_send):
+        mock_send.return_value = [{"name": "saved", "tabs": 3, "savedAt": 1712707200000}]
+
+        result = b.session_list()
+
+        assert result == [{"name": "saved", "tabs": 3, "savedAt": 1712707200000}]
+        mock_send.assert_called_once_with("session.list", {}, profile=None)
+
+    def test_session_list_multi_browser_adds_browser(self, b, mock_send):
+        with patch(
+            "browser_cli.active_browser_targets",
+            return_value=[
+                BrowserTarget("default", "uuid-1", "/tmp/uuid-1.sock"),
+                BrowserTarget("work", "work", "/tmp/work.sock"),
+            ],
+        ):
+            mock_send.side_effect = [
+                [{"name": "first", "tabs": 2, "savedAt": 1712707200000}],
+                [{"name": "second", "tabs": 5, "savedAt": 1712707300000}],
+            ]
+            result = b.session_list()
+
+        assert result == [
+            {"name": "first", "tabs": 2, "savedAt": 1712707200000, "browser": "uuid-1"},
+            {"name": "second", "tabs": 5, "savedAt": 1712707300000, "browser": "work"},
+        ]
+        assert mock_send.call_args_list == [
+            call("session.list", {}, profile="default"),
+            call("session.list", {}, profile="work"),
+        ]
+
+
 # ── Tab model ─────────────────────────────────────────────────────────────────

 class TestTabModel:
@@ -5,6 +5,7 @@ from unittest.mock import patch

 from browser_cli.cli import main, _project_version
 from browser_cli.client import BrowserTarget
+from browser_cli.commands.extract import _clean_markdown_output, _convert_html_to_markdown

 def _expected_version() -> str:
    pyproject = Path(__file__).resolve().parent.parent / "pyproject.toml"
@@ -204,6 +205,46 @@ def test_windows_list_multi_browser_shows_browser_column():
    assert "work" in result.output


+def test_session_list_multi_browser_shows_browser_column():
+    def fake_send_command(command, args=None, profile=None):
+        assert command == "session.list"
+        return [{"name": f"{profile}-session", "tabs": 2, "savedAt": 1712707200000}]
+
+    with patch(
+        "browser_cli.commands.session.active_browser_targets",
+        return_value=[
+            BrowserTarget("default", "uuid-1", "/tmp/default.sock"),
+            BrowserTarget("work", "work", "/tmp/work.sock"),
+        ],
+    ), patch("browser_cli.commands.session.send_command", side_effect=fake_send_command):
+        result = CliRunner().invoke(main, ["session", "list"])
+
+    assert result.exit_code == 0
+    assert "Browser" in result.output
+    assert "uuid-1" in result.output
+    assert "work" in result.output
+    assert "default-session" in result.output
+    assert "work-session" in result.output
+
+
+def test_session_list_with_explicit_browser_does_not_show_browser_column():
+    with patch(
+        "browser_cli.commands.session.active_browser_targets",
+        return_value=[
+            BrowserTarget("default", "uuid-1", "/tmp/default.sock"),
+            BrowserTarget("work", "work", "/tmp/work.sock"),
+        ],
+    ), patch(
+        "browser_cli.commands.session.send_command",
+        return_value=[{"name": "work-session", "tabs": 2, "savedAt": 1712707200000}],
+    ) as send_command:
+        result = CliRunner().invoke(main, ["--browser", "work", "session", "list"])
+
+    assert result.exit_code == 0
+    assert "Browser" not in result.output
+    send_command.assert_called_once_with("session.list", {}, profile=None)
+
+
 def test_windows_open_passes_url():
    with patch("browser_cli.commands.windows.send_command", return_value={"id": 7}) as send_command:
        result = CliRunner().invoke(main, ["windows", "open", "https://example.com"])
@@ -213,7 +254,7 @@ def test_windows_open_passes_url():
    send_command.assert_called_once_with("windows.open", {"url": "https://example.com"}, profile=None)

 def test_extract_markdown_command():
-    with patch("browser_cli.commands.extract.send_command", return_value="# Title\n") as send_command:
+    with patch("browser_cli.commands.extract.send_command", return_value="# Title") as send_command:
        result = CliRunner().invoke(main, ["extract", "markdown"])

    assert result.exit_code == 0
@@ -221,9 +262,160 @@ def test_extract_markdown_command():
    send_command.assert_called_once_with("extract.markdown", {"selector": None})

 def test_extract_markdown_command_with_selector():
-    with patch("browser_cli.commands.extract.send_command", return_value="## Post\n") as send_command:
+    with patch("browser_cli.commands.extract.send_command", return_value="## Post") as send_command:
        result = CliRunner().invoke(main, ["extract", "markdown", "--selector", "article"])

    assert result.exit_code == 0
    assert result.output == "## Post\n"
    send_command.assert_called_once_with("extract.markdown", {"selector": "article"})
+
+
+def test_clean_markdown_output_removes_escaped_underscores_and_dashes():
+    assert _clean_markdown_output(r"hello\_world \- item") == "hello_world - item"
+
+
+def test_clean_markdown_output_trims_useless_whitespace():
+    raw = "  # Title  \n\n\n  paragraph with space  \n   next line\t \n"
+    assert _clean_markdown_output(raw) == "# Title\n\nparagraph with space\nnext line"
+
+
+def test_clean_markdown_output_repairs_empty_table_header_rows():
+    raw = (
+        "|  |  |  |\n"
+        "| --- | --- | --- |\n"
+        "| Bereich | Plan | Ist |\n"
+        "| A | B | C |\n"
+    )
+    assert _clean_markdown_output(raw) == (
+        "| Bereich | Plan | Ist |\n"
+        "| --- | --- | --- |\n"
+        "| A | B | C |"
+    )
+
+
+def test_clean_markdown_output_preserves_graph_code_blocks():
+    raw = "```\n\nA\n  │\n  ▼\nB\n\n```"
+    assert _clean_markdown_output(raw) == "```\nA\n  │\n  ▼\nB\n```"
+
+
+def test_clean_markdown_output_renders_code_block_list_branches():
+    raw = "```\nPlattformen\n- Omnifact\n- Open WebUI + Ollama\n- Le Chat\n```"
+    assert _clean_markdown_output(raw) == (
+        "```\n"
+        "Plattformen\n"
+        "├ Omnifact\n"
+        "├ Open WebUI + Ollama\n"
+        "└ Le Chat\n"
+        "```"
+    )
+
+
+def test_clean_markdown_output_unflattens_graph_code_blocks():
+    raw = (
+        "```\n"
+        "Golden Set   │   ▼Promptfoo(Testausführung)   │   ▼UpTrain(Qualitätsbewertung)   │   "
+        "▼Langfuse(Logging / Observability)   │   ▼Plattformen├ Omnifact├ Open WebUI + Ollama└ Le Chat\n"
+        "```"
+    )
+    assert _clean_markdown_output(raw) == (
+        "```\n"
+        "Golden Set\n"
+        "   │\n"
+        "   ▼\n"
+        "Promptfoo\n"
+        "(Testausführung)\n"
+        "   │\n"
+        "   ▼\n"
+        "UpTrain\n"
+        "(Qualitätsbewertung)\n"
+        "   │\n"
+        "   ▼\n"
+        "Langfuse\n"
+        "(Logging / Observability)\n"
+        "   │\n"
+        "   ▼\n"
+        "Plattformen\n"
+        "├ Omnifact\n"
+        "├ Open WebUI + Ollama\n"
+        "└ Le Chat\n"
+        "```"
+    )
+
+
+def test_extract_markdown_command_repairs_malformed_tables_and_code_blocks():
+    raw = (
+        "|  |  |  |\n"
+        "| --- | --- | --- |\n"
+        "| Bereich | Plan | Ist |\n"
+        "| Eval-Stack | Testumgebung | funktionsfähig |\n\n"
+        "```\n"
+        "Golden Set   │   ▼Promptfoo(Testausführung)   │   ▼Plattformen├ Omnifact└ Le Chat\n"
+        "```"
+    )
+    with patch("browser_cli.commands.extract.send_command", return_value=raw):
+        result = CliRunner().invoke(main, ["extract", "markdown"])
+
+    assert result.exit_code == 0
+    assert "| Bereich | Plan | Ist |" in result.output
+    assert "|  |  |  |" not in result.output
+    assert "Golden Set\n   │\n   ▼\nPromptfoo\n(Testausführung)" in result.output
+    assert "├ Omnifact" in result.output
+    assert "└ Le Chat" in result.output
+
+
+def test_convert_html_to_markdown_normalizes_blank_table_header_rows():
+    html = """
+    <main>
+      <table>
+        <tr><td></td><td></td><td></td><td></td></tr>
+        <tr><td>Risiko</td><td>Beschreibung</td><td>Auswirkung</td><td>Gegenmaßnahme</td></tr>
+        <tr><td>Datenschutz</td><td>X</td><td>Y</td><td>Z</td></tr>
+      </table>
+    </main>
+    """
+    markdown = _convert_html_to_markdown(html)
+    assert "| Risiko | Beschreibung | Auswirkung | Gegenmaßnahme |" in markdown
+    assert "|  |  |  |  |" not in markdown
+
+
+def test_convert_html_to_markdown_preserves_codemirror_graph_blocks():
+    html = """
+    <main>
+      <h1>Teil 5 - Eval-Stack Architektur</h1>
+      <div class="cm-editor" data-is-code-block-view="true" contenteditable="false">
+        <div class="cm-line">Golden Set</div>
+        <div class="cm-line">   │</div>
+        <div class="cm-line">   ▼</div>
+        <div class="cm-line">Promptfoo</div>
+        <div class="cm-line">(Testausführung)</div>
+        <div class="cm-line">   │</div>
+        <div class="cm-line">   ▼</div>
+        <div class="cm-line">Plattformen</div>
+        <div class="cm-line">- Omnifact</div>
+        <div class="cm-line">- Open WebUI + Ollama</div>
+        <div class="cm-line">- Le Chat</div>
+      </div>
+    </main>
+    """
+    markdown = _convert_html_to_markdown(html)
+    assert "```\nGolden Set\n   │\n   ▼\nPromptfoo" in markdown
+    assert "├ Omnifact" in markdown
+    assert "└ Le Chat" in markdown
+
+
+def test_convert_html_to_markdown_indents_multiline_list_items():
+    html = """
+    <main>
+      <h2>2. <strong>Zielarchitektur</strong></h2>
+      <ul>
+        <li>
+          <p>Unternehmensdaten → RAG → KI-Orchestrierung →<br>Local LLMs / API Modelle / Spezialmodelle</p>
+        </li>
+      </ul>
+    </main>
+    """
+    markdown = _convert_html_to_markdown(html)
+    assert (
+        "- Unternehmensdaten → RAG → KI-Orchestrierung →\n"
+        "  Local LLMs / API Modelle / Spezialmodelle"
+    ) in markdown