Files
browser-cli/browser_cli/commands/extract.py
T
daniel156161 f2a7f85ee3
Package Extension / package-extension (push) Successful in 12s
Build & Publish Package / publish (push) Failing after 21s
adding new extract command to extract selector or main content as markdown, updateing version as 0.5.0
2026-04-10 03:44:49 +02:00

84 lines
2.5 KiB
Python

import click
import json
from browser_cli.client import send_command, BrowserNotConnected
from rich.console import Console
from rich.table import Table
console = Console()
def _handle(command, args=None):
try:
return send_command(command, args or {})
except BrowserNotConnected as e:
console.print(f"[red]Error:[/red] {e}")
raise SystemExit(1)
except RuntimeError as e:
console.print(f"[red]Browser error:[/red] {e}")
raise SystemExit(1)
@click.group("extract")
def extract_group():
"""Extract content from the active tab."""
@extract_group.command("links")
def extract_links():
"""Extract all links from the active tab."""
links = _handle("extract.links")
if not links:
console.print("[yellow]No links found[/yellow]")
return
table = Table(show_header=True, header_style="bold cyan")
table.add_column("Text", width=40)
table.add_column("URL")
for lnk in links:
table.add_row((lnk.get("text") or "")[:60], lnk.get("href") or "")
console.print(table)
@extract_group.command("images")
def extract_images():
"""Extract all images from the active tab."""
images = _handle("extract.images")
if not images:
console.print("[yellow]No images found[/yellow]")
return
table = Table(show_header=True, header_style="bold cyan")
table.add_column("Alt", width=30)
table.add_column("Src")
for img in images:
table.add_row((img.get("alt") or "")[:40], img.get("src") or "")
console.print(table)
@extract_group.command("text")
def extract_text():
"""Extract all visible text from the active tab."""
text = _handle("extract.text")
console.print(text or "")
@extract_group.command("json")
@click.argument("selector")
def extract_json(selector):
"""Parse and pretty-print JSON content inside SELECTOR."""
data = _handle("extract.json", {"selector": selector})
console.print_json(json.dumps(data))
@extract_group.command("html")
def extract_html():
"""Print the full HTML of the active tab to stdout."""
html = _handle("extract.html")
click.echo(html or "")
@extract_group.command("markdown")
@click.option("--selector", help="Extract only the DOM subtree matching this CSS selector.")
def extract_markdown(selector):
"""Extract the page's main content as Markdown."""
markdown = _handle("extract.markdown", {"selector": selector})
click.echo(markdown or "", nl=not (markdown or "").endswith("\n"))