fd5447cbb9
Testing / remote-protocol-compat (0.9.3) (push) Successful in 42s
Testing / remote-protocol-compat (0.9.5) (push) Successful in 44s
Package Extension / package-extension (push) Successful in 43s
Build & Publish Package / publish (push) Successful in 43s
Testing / test (push) Successful in 45s
Restructure the Python API and internals around composable namespaces and a standalone transport/endpoint layer. Bump to 0.12.0. Python API: - Replace flat methods (b.tabs_list(), b.group_list()) with namespaces: b.nav, b.tabs, b.groups, b.windows, b.dom, b.extract, b.page, b.storage, b.cookies, b.session, b.perf, b.extension. - Shrink browser_cli/__init__.py to a thin composition root; move all behaviour into browser_cli/sdk/ (one module per namespace + factories, base, routing). Internals: - Add browser_cli/transport.py and remote_transport.py to isolate IPC from command logic; client.py now delegates instead of owning transport. - Add browser_cli/endpoints.py for endpoint resolution and browser_cli/errors.py for shared error types. - Extract markdown rendering into browser_cli/markdown.py (out of extract). - Add USER_AGENT to version_manager. Tooling & tests: - Add justfile with common dev tasks. - Update CLI commands and demo to the namespaced API. - Rework tests for the new layout; add test_transport.py and test_refactor_boundaries.py to lock in module boundaries. BREAKING CHANGE: flat API methods are removed in favour of namespaces (e.g. b.tabs_list() -> b.tabs.list(), b.group_list() -> b.groups.list()).
74 lines
2.4 KiB
Python
74 lines
2.4 KiB
Python
import json
|
|
|
|
import click
|
|
from browser_cli.commands import client_from_ctx, handle_errors
|
|
# Re-exported for backward compatibility: the HTML→Markdown engine now lives in
|
|
# browser_cli.markdown and is applied by the SDK (ExtractNS.markdown).
|
|
from browser_cli.markdown import _clean_markdown_output, _convert_html_to_markdown # noqa: F401
|
|
from rich.console import Console
|
|
from rich.table import Table
|
|
|
|
console = Console()
|
|
|
|
@click.group("extract")
|
|
def extract_group():
|
|
"""Extract content from the active tab."""
|
|
|
|
@extract_group.command("links")
|
|
@handle_errors
|
|
def extract_links():
|
|
"""Extract all links from the active tab."""
|
|
links = client_from_ctx().extract.links()
|
|
if not links:
|
|
console.print("[yellow]No links found[/yellow]")
|
|
return
|
|
table = Table(show_header=True, header_style="bold cyan")
|
|
table.add_column("Text", width=40)
|
|
table.add_column("URL")
|
|
for lnk in links:
|
|
table.add_row((lnk.get("text") or "")[:60], lnk.get("href") or "")
|
|
console.print(table)
|
|
|
|
@extract_group.command("images")
|
|
@handle_errors
|
|
def extract_images():
|
|
"""Extract all images from the active tab."""
|
|
images = client_from_ctx().extract.images()
|
|
if not images:
|
|
console.print("[yellow]No images found[/yellow]")
|
|
return
|
|
table = Table(show_header=True, header_style="bold cyan")
|
|
table.add_column("Alt", width=30)
|
|
table.add_column("Src")
|
|
for img in images:
|
|
table.add_row((img.get("alt") or "")[:40], img.get("src") or "")
|
|
console.print(table)
|
|
|
|
@extract_group.command("text")
|
|
@handle_errors
|
|
def extract_text():
|
|
"""Extract all visible text from the active tab."""
|
|
console.print(client_from_ctx().extract.text())
|
|
|
|
@extract_group.command("json")
|
|
@click.argument("selector")
|
|
@handle_errors
|
|
def extract_json(selector):
|
|
"""Parse and pretty-print JSON content inside SELECTOR."""
|
|
data = client_from_ctx().extract.json(selector)
|
|
console.print_json(json.dumps(data))
|
|
|
|
@extract_group.command("html")
|
|
@handle_errors
|
|
def extract_html():
|
|
"""Print the full HTML of the active tab to stdout."""
|
|
click.echo(client_from_ctx().extract.html())
|
|
|
|
@extract_group.command("markdown")
|
|
@click.option("--selector", help="Extract only the DOM subtree matching this CSS selector.")
|
|
@handle_errors
|
|
def extract_markdown(selector):
|
|
"""Extract the page's main content as Markdown."""
|
|
markdown = client_from_ctx().extract.markdown(selector)
|
|
click.echo(markdown or "", nl=not (markdown or "").endswith("\n"))
|