076914e5b7
- Split client, native, remote, serve, markdown, and SDK internals into focused packages with direct imports. - Move local and remote transport framing/protocol helpers behind clearer module boundaries. - Break up the extension injected DOM logic into a separate content dispatch bundle and dedicated content modules. - Add explicit client handling for passive remote discovery without noisy PQ warnings. - Keep behavior covered with updated unit, integration, and extension tests.
71 lines
2.2 KiB
Python
71 lines
2.2 KiB
Python
import json
|
|
|
|
import click
|
|
from browser_cli.commands import client_from_ctx, handle_errors
|
|
from rich.console import Console
|
|
from rich.table import Table
|
|
|
|
console = Console()
|
|
|
|
@click.group("extract")
|
|
def extract_group():
|
|
"""Extract content from the active tab."""
|
|
|
|
@extract_group.command("links")
|
|
@handle_errors
|
|
def extract_links():
|
|
"""Extract all links from the active tab."""
|
|
links = client_from_ctx().extract.links()
|
|
if not links:
|
|
console.print("[yellow]No links found[/yellow]")
|
|
return
|
|
table = Table(show_header=True, header_style="bold cyan")
|
|
table.add_column("Text", width=40)
|
|
table.add_column("URL")
|
|
for lnk in links:
|
|
table.add_row((lnk.get("text") or "")[:60], lnk.get("href") or "")
|
|
console.print(table)
|
|
|
|
@extract_group.command("images")
|
|
@handle_errors
|
|
def extract_images():
|
|
"""Extract all images from the active tab."""
|
|
images = client_from_ctx().extract.images()
|
|
if not images:
|
|
console.print("[yellow]No images found[/yellow]")
|
|
return
|
|
table = Table(show_header=True, header_style="bold cyan")
|
|
table.add_column("Alt", width=30)
|
|
table.add_column("Src")
|
|
for img in images:
|
|
table.add_row((img.get("alt") or "")[:40], img.get("src") or "")
|
|
console.print(table)
|
|
|
|
@extract_group.command("text")
|
|
@handle_errors
|
|
def extract_text():
|
|
"""Extract all visible text from the active tab."""
|
|
console.print(client_from_ctx().extract.text())
|
|
|
|
@extract_group.command("json")
|
|
@click.argument("selector")
|
|
@handle_errors
|
|
def extract_json(selector):
|
|
"""Parse and pretty-print JSON content inside SELECTOR."""
|
|
data = client_from_ctx().extract.json(selector)
|
|
console.print_json(json.dumps(data))
|
|
|
|
@extract_group.command("html")
|
|
@handle_errors
|
|
def extract_html():
|
|
"""Print the full HTML of the active tab to stdout."""
|
|
click.echo(client_from_ctx().extract.html())
|
|
|
|
@extract_group.command("markdown")
|
|
@click.option("--selector", help="Extract only the DOM subtree matching this CSS selector.")
|
|
@handle_errors
|
|
def extract_markdown(selector):
|
|
"""Extract the page's main content as Markdown."""
|
|
markdown = client_from_ctx().extract.markdown(selector)
|
|
click.echo(markdown or "", nl=not (markdown or "").endswith("\n"))
|