Files
daniel156161 076914e5b7 refactor: reorganize client transport and extension internals
- Split client, native, remote, serve, markdown, and SDK internals into focused packages with direct imports.
- Move local and remote transport framing/protocol helpers behind clearer module boundaries.
- Break up the extension injected DOM logic into a separate content dispatch bundle and dedicated content modules.
- Add explicit client handling for passive remote discovery without noisy PQ warnings.
- Keep behavior covered with updated unit, integration, and extension tests.
2026-06-13 23:31:24 +02:00

71 lines
2.2 KiB
Python

import json
import click
from browser_cli.commands import client_from_ctx, handle_errors
from rich.console import Console
from rich.table import Table
console = Console()
@click.group("extract")
def extract_group():
"""Extract content from the active tab."""
@extract_group.command("links")
@handle_errors
def extract_links():
"""Extract all links from the active tab."""
links = client_from_ctx().extract.links()
if not links:
console.print("[yellow]No links found[/yellow]")
return
table = Table(show_header=True, header_style="bold cyan")
table.add_column("Text", width=40)
table.add_column("URL")
for lnk in links:
table.add_row((lnk.get("text") or "")[:60], lnk.get("href") or "")
console.print(table)
@extract_group.command("images")
@handle_errors
def extract_images():
"""Extract all images from the active tab."""
images = client_from_ctx().extract.images()
if not images:
console.print("[yellow]No images found[/yellow]")
return
table = Table(show_header=True, header_style="bold cyan")
table.add_column("Alt", width=30)
table.add_column("Src")
for img in images:
table.add_row((img.get("alt") or "")[:40], img.get("src") or "")
console.print(table)
@extract_group.command("text")
@handle_errors
def extract_text():
"""Extract all visible text from the active tab."""
console.print(client_from_ctx().extract.text())
@extract_group.command("json")
@click.argument("selector")
@handle_errors
def extract_json(selector):
"""Parse and pretty-print JSON content inside SELECTOR."""
data = client_from_ctx().extract.json(selector)
console.print_json(json.dumps(data))
@extract_group.command("html")
@handle_errors
def extract_html():
"""Print the full HTML of the active tab to stdout."""
click.echo(client_from_ctx().extract.html())
@extract_group.command("markdown")
@click.option("--selector", help="Extract only the DOM subtree matching this CSS selector.")
@handle_errors
def extract_markdown(selector):
"""Extract the page's main content as Markdown."""
markdown = client_from_ctx().extract.markdown(selector)
click.echo(markdown or "", nl=not (markdown or "").endswith("\n"))