refactor: modularize auth transport and markdown
Testing / remote-protocol-compat (0.9.5) (push) Successful in 1m4s
Testing / test (push) Successful in 1m22s
Testing / remote-protocol-compat (0.9.3) (push) Successful in 1m7s
Package Extension / package-extension (push) Successful in 1m1s
Build & Publish Package / publish (push) Successful in 1m5s

- Split auth into focused package modules for agent keys, file keys,
  signing, and post-quantum transport helpers while keeping the public
  browser_cli.auth import surface intact.
- Move transport encoding internals into a package with separate codec and
  binary-hoisting helpers, preserving browser_cli.transport compatibility.
- Extract remote TCP auth/socket helpers and serve challenge setup out of the
  runtime paths to make connection handling easier to reason about.
- Move the extension markdown extractor into a dedicated content/markdown
  folder with separate root selection, code normalization, renderer, and utils.
- Centralize CLI Rich rendering helpers for tab/window tree and table output,
  and add rendering tests for the shared builders.
- Remove local typing ignores in SDK/decorator/script plumbing and bump the
  package and extension version to 0.15.3.
This commit is contained in:
2026-06-15 01:23:57 +02:00
parent 0b43408a8d
commit 7cb2a8b618
34 changed files with 1502 additions and 1325 deletions
+5 -5
View File
@@ -12,7 +12,7 @@ from __future__ import annotations
import asyncio
import functools
from collections.abc import Callable
from typing import TypeVar
from typing import TypeVar, cast
from browser_cli.models import Group, Tab
from browser_cli.sdk import NAMESPACE_NAMES
@@ -74,7 +74,7 @@ class AsyncDecoratorsNS(WorkflowDecoratorsMixin):
finally:
if cleanup is not None:
await self._maybe_await(cleanup(value))
return wrapper # type: ignore[return-value]
return cast(F, wrapper)
return decorator(func) if func is not None else decorator
def new_tab(
@@ -117,7 +117,7 @@ class AsyncDecoratorsNS(WorkflowDecoratorsMixin):
finally:
if previous:
await self._c.perf.set_profile(previous)
return wrapper # type: ignore[return-value]
return cast(F, wrapper)
return decorator
def retry(
@@ -142,8 +142,8 @@ class AsyncDecoratorsNS(WorkflowDecoratorsMixin):
raise
if delay > 0:
await asyncio.sleep(delay)
raise last_error # type: ignore[misc]
return wrapper # type: ignore[return-value]
raise cast(BaseException, last_error)
return cast(F, wrapper)
return decorator
class AsyncBrowserCLI:
-263
View File
@@ -1,263 +0,0 @@
"""Ed25519 keypair management, ML-KEM key exchange, and auth helpers."""
import hashlib
import json
import os
import secrets
import socket
import struct
from dataclasses import dataclass
from pathlib import Path
from cryptography.exceptions import InvalidSignature
from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey, Ed25519PublicKey
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.ciphers.aead import ChaCha20Poly1305
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
from cryptography.hazmat.primitives.serialization import (
Encoding,
NoEncryption,
PrivateFormat,
PublicFormat,
load_pem_private_key,
)
from browser_cli.constants import (
DEFAULT_AUTHORIZED_KEYS_PATH,
DEFAULT_KEY_PATH,
PQ_KEX_ALG,
PQ_TRANSPORT_ALG,
SSH_AGENT_IDENTITIES_ANSWER,
SSH_AGENT_SIGN_RESPONSE,
SSH_AGENTC_REQUEST_IDENTITIES,
SSH_AGENTC_SIGN_REQUEST,
)
def _pack_str(s: bytes) -> bytes:
return struct.pack(">I", len(s)) + s
def _unpack_str(data: bytes, off: int) -> tuple[bytes, int]:
n = struct.unpack_from(">I", data, off)[0]
return data[off + 4 : off + 4 + n], off + 4 + n
def _agent_roundtrip(msg: bytes) -> bytes:
sock_path = os.environ.get("SSH_AUTH_SOCK")
if not sock_path:
raise RuntimeError("SSH_AUTH_SOCK not set — is gpg-agent / ssh-agent running?")
with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as sock:
sock.settimeout(10)
sock.connect(sock_path)
sock.sendall(struct.pack(">I", len(msg)) + msg)
raw_len = b""
while len(raw_len) < 4:
chunk = sock.recv(4 - len(raw_len))
if not chunk:
raise RuntimeError("SSH agent closed connection")
raw_len += chunk
n = struct.unpack(">I", raw_len)[0]
resp = b""
while len(resp) < n:
chunk = sock.recv(n - len(resp))
if not chunk:
raise RuntimeError("SSH agent closed connection mid-response")
resp += chunk
return resp
# ── AgentKey ───────────────────────────────────────────────────────────────────
@dataclass
class AgentKey:
"""Ed25519 key backed by an SSH agent (YubiKey, TPM, ssh-agent, gpg-agent …)."""
blob: bytes
comment: str
@property
def pubkey_bytes(self) -> bytes:
_algo, off = _unpack_str(self.blob, 0)
key_bytes, _ = _unpack_str(self.blob, off)
return key_bytes
# ── Agent helpers ──────────────────────────────────────────────────────────────
def agent_list_keys() -> list[AgentKey]:
"""Return all Ed25519 keys currently held by the SSH agent."""
resp = _agent_roundtrip(bytes([SSH_AGENTC_REQUEST_IDENTITIES]))
if resp[0] != SSH_AGENT_IDENTITIES_ANSWER:
raise RuntimeError(f"Unexpected agent response: {resp[0]}")
n_keys = struct.unpack_from(">I", resp, 1)[0]
keys: list[AgentKey] = []
off = 5
for _ in range(n_keys):
blob, off = _unpack_str(resp, off)
comment, off = _unpack_str(resp, off)
algo, _ = _unpack_str(blob, 0)
if algo == b"ssh-ed25519":
keys.append(AgentKey(blob=blob, comment=comment.decode("utf-8", errors="replace")))
return keys
def agent_find_key(selector: str | None = None) -> AgentKey | None:
"""Return the first agent Ed25519 key whose comment contains selector (or any if None)."""
try:
keys = agent_list_keys()
except Exception:
return None
for key in keys:
if key.comment == "(none)":
continue
if selector is None or selector in key.comment:
return key
return None
def agent_sign_raw(key: AgentKey, data: bytes) -> bytes:
"""Ask the SSH agent to sign data and return the raw 64-byte Ed25519 signature."""
msg = (
bytes([SSH_AGENTC_SIGN_REQUEST])
+ _pack_str(key.blob)
+ _pack_str(data)
+ struct.pack(">I", 0)
)
resp = _agent_roundtrip(msg)
if resp[0] != SSH_AGENT_SIGN_RESPONSE:
raise RuntimeError(f"SSH agent refused to sign (response code {resp[0]})")
sig_blob, _ = _unpack_str(resp, 1)
_algo, soff = _unpack_str(sig_blob, 0)
raw_sig, _ = _unpack_str(sig_blob, soff)
if len(raw_sig) != 64:
raise RuntimeError(f"Unexpected signature length {len(raw_sig)}")
return raw_sig
# ── File-based key helpers ─────────────────────────────────────────────────────
def generate_keypair() -> tuple[bytes, str]:
"""Return (private_key_pem_bytes, public_key_hex)."""
priv = Ed25519PrivateKey.generate()
pem = priv.private_bytes(Encoding.PEM, PrivateFormat.PKCS8, NoEncryption())
pub_hex = priv.public_key().public_bytes(Encoding.Raw, PublicFormat.Raw).hex()
return pem, pub_hex
def load_private_key(path: Path) -> Ed25519PrivateKey:
return load_pem_private_key(path.read_bytes(), password=None)
def public_key_hex(key: Ed25519PrivateKey | AgentKey) -> str:
if isinstance(key, AgentKey):
return key.pubkey_bytes.hex()
return key.public_key().public_bytes(Encoding.Raw, PublicFormat.Raw).hex()
# ── Canonical payload + sign/verify ───────────────────────────────────────────
def canonical_payload(msg: dict) -> bytes:
"""Deterministic JSON encoding of msg without auth protocol fields."""
return json.dumps(
{k: v for k, v in msg.items() if k not in {"pubkey", "sig", "pq_kex"}},
sort_keys=True,
separators=(",", ":"),
).encode("utf-8")
def _auth_message(nonce: bytes, msg: dict, pq_shared_secret: bytes | None = None) -> bytes:
"""Bytes signed for auth; optionally binds a post-quantum KEX secret."""
data = nonce + hashlib.sha256(canonical_payload(msg)).digest()
if pq_shared_secret is not None:
data += hashlib.sha256(b"browser-cli ml-kem-768 v1" + pq_shared_secret).digest()
return data
def sign(key: Ed25519PrivateKey | AgentKey, nonce: bytes, msg: dict, pq_shared_secret: bytes | None = None) -> bytes:
"""Sign nonce + payload hash, optionally bound to an ML-KEM shared secret."""
data = _auth_message(nonce, msg, pq_shared_secret)
if isinstance(key, AgentKey):
return agent_sign_raw(key, data)
return key.sign(data)
def verify(pub_hex: str, nonce: bytes, msg: dict, sig_hex: str, pq_shared_secret: bytes | None = None) -> bool:
"""Return True if sig_hex is a valid signature over the canonical payload/auth secret."""
try:
pub_bytes = bytes.fromhex(pub_hex)
pub_key = Ed25519PublicKey.from_public_bytes(pub_bytes)
pub_key.verify(bytes.fromhex(sig_hex), _auth_message(nonce, msg, pq_shared_secret))
return True
except (InvalidSignature, ValueError):
return False
# ── Post-quantum key exchange (ML-KEM / Kyber) ────────────────────────────────
def pq_kex_server_keypair():
"""Return an ephemeral ML-KEM-768 private key and raw public key bytes.
Returns ``None`` when the installed cryptography/OpenSSL backend does not
support ML-KEM yet. The serve/client protocol treats this as graceful
downgrade instead of breaking local installs on older OpenSSL builds.
"""
try:
from cryptography.hazmat.primitives.asymmetric import mlkem
priv = mlkem.MLKEM768PrivateKey.generate()
pub = priv.public_key().public_bytes_raw()
return priv, pub
except Exception:
return None
def pq_kex_client_encapsulate(public_key_hex: str) -> tuple[str, bytes]:
"""Encapsulate to a server ML-KEM public key. Returns (ciphertext_hex, secret)."""
from cryptography.hazmat.primitives.asymmetric import mlkem
pub = mlkem.MLKEM768PublicKey.from_public_bytes(bytes.fromhex(public_key_hex))
shared_secret, ciphertext = pub.encapsulate()
return ciphertext.hex(), shared_secret
def pq_kex_server_decapsulate(private_key, ciphertext_hex: str) -> bytes:
"""Decapsulate a client ML-KEM ciphertext and return the shared secret."""
return private_key.decapsulate(bytes.fromhex(ciphertext_hex))
def _pq_transport_key(shared_secret: bytes, direction: str) -> bytes:
return HKDF(
algorithm=hashes.SHA256(),
length=32,
salt=None,
info=f"browser-cli pq transport v1 {direction}".encode("ascii"),
).derive(shared_secret)
def pq_encrypt(shared_secret: bytes, direction: str, plaintext: bytes) -> dict:
"""Encrypt an app-layer frame with a key derived from the ML-KEM secret."""
nonce = secrets.token_bytes(12)
key = _pq_transport_key(shared_secret, direction)
ciphertext = ChaCha20Poly1305(key).encrypt(nonce, plaintext, None)
return {"alg": PQ_TRANSPORT_ALG, "nonce": nonce.hex(), "ciphertext": ciphertext.hex()}
def pq_decrypt(shared_secret: bytes, direction: str, envelope: dict) -> bytes:
"""Decrypt an app-layer frame produced by pq_encrypt()."""
if not isinstance(envelope, dict) or envelope.get("alg") != PQ_TRANSPORT_ALG:
raise ValueError("unsupported encrypted transport envelope")
key = _pq_transport_key(shared_secret, direction)
return ChaCha20Poly1305(key).decrypt(
bytes.fromhex(str(envelope["nonce"])),
bytes.fromhex(str(envelope["ciphertext"])),
None,
)
def new_nonce() -> str:
return secrets.token_hex(32)
def load_authorized_keys_with_names(path: Path) -> list[tuple[str, str]]:
"""Return list of (pubkey_hex, name) pairs. Name is empty string if not set."""
if not path.exists():
return []
result = []
for line in path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
parts = line.split(None, 1)
pubkey = parts[0]
name = parts[1].strip() if len(parts) > 1 else ""
result.append((pubkey, name))
return result
def load_authorized_keys(path: Path) -> list[str]:
return [pk for pk, _ in load_authorized_keys_with_names(path)]
def add_authorized_key(path: Path, pub_hex: str, name: str = "") -> bool:
"""Append pub_hex to authorized_keys. Returns False if already present."""
path.parent.mkdir(parents=True, exist_ok=True)
existing = {pk for pk, _ in load_authorized_keys_with_names(path)}
if pub_hex in existing:
return False
line = (f"{pub_hex} {name}".rstrip()) + "\n"
with open(path, "a", encoding="utf-8") as f:
f.write(line)
return True
+67
View File
@@ -0,0 +1,67 @@
"""Public auth API for browser-cli.
Implementation lives in focused modules:
- ``auth.agent``: SSH-agent/YubiKey helpers
- ``auth.keys``: file keys and authorized_keys management
- ``auth.signing``: canonical payload signing/verification
- ``auth.pq``: ML-KEM KEX and encrypted transport helpers
"""
from browser_cli.auth.agent import (
AgentKey,
agent_find_key,
agent_list_keys,
agent_roundtrip as _agent_roundtrip,
agent_sign_raw,
pack_ssh_string as _pack_str,
unpack_ssh_string as _unpack_str,
)
from browser_cli.auth.keys import (
add_authorized_key,
generate_keypair,
load_authorized_keys,
load_authorized_keys_with_names,
load_private_key,
public_key_hex,
)
from browser_cli.auth.pq import (
new_nonce,
pq_decrypt,
pq_encrypt,
pq_kex_client_encapsulate,
pq_kex_server_decapsulate,
pq_kex_server_keypair,
pq_transport_key as _pq_transport_key,
)
from browser_cli.auth.signing import (
auth_message as _auth_message,
canonical_payload,
sign,
verify,
)
from browser_cli.constants import DEFAULT_AUTHORIZED_KEYS_PATH, DEFAULT_KEY_PATH, PQ_KEX_ALG, PQ_TRANSPORT_ALG
__all__ = [
"AgentKey",
"DEFAULT_AUTHORIZED_KEYS_PATH",
"DEFAULT_KEY_PATH",
"PQ_KEX_ALG",
"PQ_TRANSPORT_ALG",
"add_authorized_key",
"agent_find_key",
"agent_list_keys",
"agent_sign_raw",
"canonical_payload",
"generate_keypair",
"load_authorized_keys",
"load_authorized_keys_with_names",
"load_private_key",
"new_nonce",
"pq_decrypt",
"pq_encrypt",
"pq_kex_client_encapsulate",
"pq_kex_server_decapsulate",
"pq_kex_server_keypair",
"public_key_hex",
"sign",
"verify",
]
+103
View File
@@ -0,0 +1,103 @@
"""SSH-agent backed Ed25519 key helpers."""
from __future__ import annotations
import os
import socket
import struct
from dataclasses import dataclass
from browser_cli.constants import (
SSH_AGENT_IDENTITIES_ANSWER,
SSH_AGENT_SIGN_RESPONSE,
SSH_AGENTC_REQUEST_IDENTITIES,
SSH_AGENTC_SIGN_REQUEST,
)
def pack_ssh_string(value: bytes) -> bytes:
return struct.pack(">I", len(value)) + value
def unpack_ssh_string(data: bytes, offset: int) -> tuple[bytes, int]:
length = struct.unpack_from(">I", data, offset)[0]
return data[offset + 4 : offset + 4 + length], offset + 4 + length
def agent_roundtrip(msg: bytes) -> bytes:
sock_path = os.environ.get("SSH_AUTH_SOCK")
if not sock_path:
raise RuntimeError("SSH_AUTH_SOCK not set — is gpg-agent / ssh-agent running?")
with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as sock:
sock.settimeout(10)
sock.connect(sock_path)
sock.sendall(struct.pack(">I", len(msg)) + msg)
raw_len = b""
while len(raw_len) < 4:
chunk = sock.recv(4 - len(raw_len))
if not chunk:
raise RuntimeError("SSH agent closed connection")
raw_len += chunk
length = struct.unpack(">I", raw_len)[0]
response = b""
while len(response) < length:
chunk = sock.recv(length - len(response))
if not chunk:
raise RuntimeError("SSH agent closed connection mid-response")
response += chunk
return response
@dataclass
class AgentKey:
"""Ed25519 key backed by an SSH agent (YubiKey, TPM, ssh-agent, gpg-agent …)."""
blob: bytes
comment: str
@property
def pubkey_bytes(self) -> bytes:
_algo, offset = unpack_ssh_string(self.blob, 0)
key_bytes, _ = unpack_ssh_string(self.blob, offset)
return key_bytes
def agent_list_keys() -> list[AgentKey]:
"""Return all Ed25519 keys currently held by the SSH agent."""
response = agent_roundtrip(bytes([SSH_AGENTC_REQUEST_IDENTITIES]))
if response[0] != SSH_AGENT_IDENTITIES_ANSWER:
raise RuntimeError(f"Unexpected agent response: {response[0]}")
key_count = struct.unpack_from(">I", response, 1)[0]
keys: list[AgentKey] = []
offset = 5
for _ in range(key_count):
blob, offset = unpack_ssh_string(response, offset)
comment, offset = unpack_ssh_string(response, offset)
algo, _ = unpack_ssh_string(blob, 0)
if algo == b"ssh-ed25519":
keys.append(AgentKey(blob=blob, comment=comment.decode("utf-8", errors="replace")))
return keys
def agent_find_key(selector: str | None = None) -> AgentKey | None:
"""Return the first agent Ed25519 key whose comment contains selector (or any if None)."""
try:
keys = agent_list_keys()
except Exception:
return None
for key in keys:
if key.comment == "(none)":
continue
if selector is None or selector in key.comment:
return key
return None
def agent_sign_raw(key: AgentKey, data: bytes) -> bytes:
"""Ask the SSH agent to sign data and return the raw 64-byte Ed25519 signature."""
msg = (
bytes([SSH_AGENTC_SIGN_REQUEST])
+ pack_ssh_string(key.blob)
+ pack_ssh_string(data)
+ struct.pack(">I", 0)
)
response = agent_roundtrip(msg)
if response[0] != SSH_AGENT_SIGN_RESPONSE:
raise RuntimeError(f"SSH agent refused to sign (response code {response[0]})")
sig_blob, _ = unpack_ssh_string(response, 1)
_algo, sig_offset = unpack_ssh_string(sig_blob, 0)
raw_sig, _ = unpack_ssh_string(sig_blob, sig_offset)
if len(raw_sig) != 64:
raise RuntimeError(f"Unexpected signature length {len(raw_sig)}")
return raw_sig
+59
View File
@@ -0,0 +1,59 @@
"""File-based Ed25519 keys and authorized_keys helpers."""
from __future__ import annotations
from pathlib import Path
from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey
from cryptography.hazmat.primitives.serialization import (
Encoding,
NoEncryption,
PrivateFormat,
PublicFormat,
load_pem_private_key,
)
from browser_cli.auth.agent import AgentKey
def generate_keypair() -> tuple[bytes, str]:
"""Return (private_key_pem_bytes, public_key_hex)."""
private_key = Ed25519PrivateKey.generate()
pem = private_key.private_bytes(Encoding.PEM, PrivateFormat.PKCS8, NoEncryption())
public_hex = private_key.public_key().public_bytes(Encoding.Raw, PublicFormat.Raw).hex()
return pem, public_hex
def load_private_key(path: Path) -> Ed25519PrivateKey:
return load_pem_private_key(path.read_bytes(), password=None)
def public_key_hex(key: Ed25519PrivateKey | AgentKey) -> str:
if isinstance(key, AgentKey):
return key.pubkey_bytes.hex()
return key.public_key().public_bytes(Encoding.Raw, PublicFormat.Raw).hex()
def load_authorized_keys_with_names(path: Path) -> list[tuple[str, str]]:
"""Return list of (pubkey_hex, name) pairs. Name is empty string if not set."""
if not path.exists():
return []
result = []
for line in path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
parts = line.split(None, 1)
pubkey = parts[0]
name = parts[1].strip() if len(parts) > 1 else ""
result.append((pubkey, name))
return result
def load_authorized_keys(path: Path) -> list[str]:
return [pubkey for pubkey, _name in load_authorized_keys_with_names(path)]
def add_authorized_key(path: Path, pub_hex: str, name: str = "") -> bool:
"""Append pub_hex to authorized_keys. Returns False if already present."""
path.parent.mkdir(parents=True, exist_ok=True)
existing = {pubkey for pubkey, _name in load_authorized_keys_with_names(path)}
if pub_hex in existing:
return False
line = (f"{pub_hex} {name}".rstrip()) + "\n"
with open(path, "a", encoding="utf-8") as file:
file.write(line)
return True
+65
View File
@@ -0,0 +1,65 @@
"""Post-quantum ML-KEM key exchange and app-layer transport encryption."""
from __future__ import annotations
import secrets
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.ciphers.aead import ChaCha20Poly1305
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
from browser_cli.constants import PQ_TRANSPORT_ALG
def pq_kex_server_keypair():
"""Return an ephemeral ML-KEM-768 private key and raw public key bytes.
Returns ``None`` when the installed cryptography/OpenSSL backend does not
support ML-KEM yet. The serve/client protocol treats this as graceful
downgrade instead of breaking local installs on older OpenSSL builds.
"""
try:
from cryptography.hazmat.primitives.asymmetric import mlkem
private_key = mlkem.MLKEM768PrivateKey.generate()
public_key = private_key.public_key().public_bytes_raw()
return private_key, public_key
except Exception:
return None
def pq_kex_client_encapsulate(public_key_hex: str) -> tuple[str, bytes]:
"""Encapsulate to a server ML-KEM public key. Returns (ciphertext_hex, secret)."""
from cryptography.hazmat.primitives.asymmetric import mlkem
public_key = mlkem.MLKEM768PublicKey.from_public_bytes(bytes.fromhex(public_key_hex))
shared_secret, ciphertext = public_key.encapsulate()
return ciphertext.hex(), shared_secret
def pq_kex_server_decapsulate(private_key, ciphertext_hex: str) -> bytes:
"""Decapsulate a client ML-KEM ciphertext and return the shared secret."""
return private_key.decapsulate(bytes.fromhex(ciphertext_hex))
def pq_transport_key(shared_secret: bytes, direction: str) -> bytes:
return HKDF(
algorithm=hashes.SHA256(),
length=32,
salt=None,
info=f"browser-cli pq transport v1 {direction}".encode("ascii"),
).derive(shared_secret)
def pq_encrypt(shared_secret: bytes, direction: str, plaintext: bytes) -> dict:
"""Encrypt an app-layer frame with a key derived from the ML-KEM secret."""
nonce = secrets.token_bytes(12)
key = pq_transport_key(shared_secret, direction)
ciphertext = ChaCha20Poly1305(key).encrypt(nonce, plaintext, None)
return {"alg": PQ_TRANSPORT_ALG, "nonce": nonce.hex(), "ciphertext": ciphertext.hex()}
def pq_decrypt(shared_secret: bytes, direction: str, envelope: dict) -> bytes:
"""Decrypt an app-layer frame produced by pq_encrypt()."""
if not isinstance(envelope, dict) or envelope.get("alg") != PQ_TRANSPORT_ALG:
raise ValueError("unsupported encrypted transport envelope")
key = pq_transport_key(shared_secret, direction)
return ChaCha20Poly1305(key).decrypt(
bytes.fromhex(str(envelope["nonce"])),
bytes.fromhex(str(envelope["ciphertext"])),
None,
)
def new_nonce() -> str:
return secrets.token_hex(32)
+42
View File
@@ -0,0 +1,42 @@
"""Canonical browser-cli auth payload signing and verification."""
from __future__ import annotations
import hashlib
import json
from cryptography.exceptions import InvalidSignature
from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey, Ed25519PublicKey
from browser_cli.auth.agent import AgentKey, agent_sign_raw
def canonical_payload(msg: dict) -> bytes:
"""Deterministic JSON encoding of msg without auth protocol fields."""
return json.dumps(
{key: value for key, value in msg.items() if key not in {"pubkey", "sig", "pq_kex"}},
sort_keys=True,
separators=(",", ":"),
).encode("utf-8")
def auth_message(nonce: bytes, msg: dict, pq_shared_secret: bytes | None = None) -> bytes:
"""Bytes signed for auth; optionally binds a post-quantum KEX secret."""
data = nonce + hashlib.sha256(canonical_payload(msg)).digest()
if pq_shared_secret is not None:
data += hashlib.sha256(b"browser-cli ml-kem-768 v1" + pq_shared_secret).digest()
return data
def sign(key: Ed25519PrivateKey | AgentKey, nonce: bytes, msg: dict, pq_shared_secret: bytes | None = None) -> bytes:
"""Sign nonce + payload hash, optionally bound to an ML-KEM shared secret."""
data = auth_message(nonce, msg, pq_shared_secret)
if isinstance(key, AgentKey):
return agent_sign_raw(key, data)
return key.sign(data)
def verify(pub_hex: str, nonce: bytes, msg: dict, sig_hex: str, pq_shared_secret: bytes | None = None) -> bool:
"""Return True if sig_hex is a valid signature over the canonical payload/auth secret."""
try:
pub_bytes = bytes.fromhex(pub_hex)
pub_key = Ed25519PublicKey.from_public_bytes(pub_bytes)
pub_key.verify(bytes.fromhex(sig_hex), auth_message(nonce, msg, pq_shared_secret))
return True
except (InvalidSignature, ValueError):
return False
+138 -6
View File
@@ -2,11 +2,22 @@
from __future__ import annotations
import shutil
from collections.abc import Callable, Iterable, Sequence
from typing import Any
from rich.console import Console
from rich.table import Table
from rich.text import Text
from rich.tree import Tree
Column = tuple[str, Callable[[Any], Any]]
def item_value(item: Any, name: str, default: Any = None) -> Any:
"""Read *name* from a dict-like or attribute object."""
if isinstance(item, dict):
return item.get(name, default)
return getattr(item, name, default)
def shorten(value: str | None, limit: int) -> str:
"""Return *value* shortened to *limit* cells-ish, using an ellipsis."""
value = value or ""
@@ -38,18 +49,139 @@ def no_wrap_text() -> Text:
"""Text configured for one-line tree labels with edge ellipsis."""
return Text(no_wrap=True, overflow="ellipsis")
def tab_tree_label(tab, *, title_limit: int, show_urls: bool = False, url_limit: int = 55) -> Text:
def tab_tree_label(tab: Any, *, title_limit: int, show_urls: bool = False, url_limit: int = 55) -> Text:
"""Reusable one-line label for a browser tab in tree views."""
label = no_wrap_text()
label.append(f"[{tab.id}] ", style="dim")
label.append(shorten(tab.title or "(untitled)", title_limit))
if tab.active:
label.append(f"[{item_value(tab, 'id')}] ", style="dim")
label.append(shorten(item_value(tab, 'title') or "(untitled)", title_limit))
if item_value(tab, "active", False):
label.append(" *", style="green")
if show_urls and tab.url:
url = item_value(tab, "url")
if show_urls and url:
label.append("", style="dim")
label.append(shorten(tab.url, url_limit), style="dim")
label.append(shorten(url, url_limit), style="dim")
return label
def group_tree_label(group_id: int, group: Any, *, title_limit: int) -> Text:
"""Reusable one-line label for a browser tab group in tree views."""
title = item_value(group, "title", "") or f"Group {group_id}"
color = item_value(group, "color", "") or "group"
count = item_value(group, "tab_count", item_value(group, "tabCount", 0)) or 0
collapsed = bool(item_value(group, "collapsed", False))
label = no_wrap_text()
label.append(shorten(title, title_limit), style="bold")
meta = [color]
if count:
meta.append(f"{count} tab" + ("" if count == 1 else "s"))
if collapsed:
meta.append("collapsed")
label.append(" (" + ", ".join(meta) + ")", style="dim")
return label
def tab_sort_key(tab: Any) -> tuple:
"""Stable tab ordering across multi-browser responses."""
group_id = item_value(tab, "group_id", item_value(tab, "groupId"))
return (
item_value(tab, "browser") or "",
item_value(tab, "window_id", item_value(tab, "windowId", 0)),
item_value(tab, "index", 0) or 0,
group_id if group_id is not None else -1,
item_value(tab, "id", 0),
)
def print_tree(tree: Tree, *, console: Console | None = None) -> None:
"""Render a Rich tree using the detected full terminal width."""
Console(width=terminal_width(console)).print(tree)
def print_table_rows(
rows: Sequence[Any],
columns: Sequence[Column],
*,
console: Console,
empty_message: str,
show_header: bool = True,
header_style: str = "bold cyan",
) -> None:
"""Render a small Rich table from arbitrary row objects."""
if not rows:
console.print(empty_message)
return
table = Table(show_header=show_header, header_style=header_style)
for header, _getter in columns:
table.add_column(header)
for row in rows:
table.add_row(*[str(getter(row) or "") for _header, getter in columns])
Console(width=terminal_width(console)).print(table)
def build_tabs_tree(
tabs: Iterable[Any],
groups: Iterable[Any],
*,
console: Console,
show_urls: bool = False,
) -> Tree:
"""Build a browser → window → group/tab tree from tab and group responses."""
tabs = sorted(tabs, key=tab_sort_key)
show_browser = any(item_value(tab, "browser") for tab in tabs)
title_limit = tree_title_limit(console=console, show_browser=show_browser, show_urls=show_urls)
url_limit = tree_url_limit(title_limit, console=console)
group_info = {
(
item_value(group, "browser") or "local",
item_value(group, "window_id", item_value(group, "windowId")),
item_value(group, "id"),
): group
for group in groups
}
root = Tree("[bold]Tabs[/bold]")
browser_nodes: dict[str, Tree] = {}
window_nodes: dict[tuple[str, int], Tree] = {}
group_nodes: dict[tuple[str, int, int], Tree] = {}
for tab in tabs:
browser_key = item_value(tab, "browser") or "local"
browser_node = browser_nodes.get(browser_key)
if browser_node is None:
browser_node = root.add(Text(browser_key, style="bold cyan")) if show_browser else root
browser_nodes[browser_key] = browser_node
window_id = item_value(tab, "window_id", item_value(tab, "windowId", 0))
window_key = (browser_key, window_id)
window_node = window_nodes.get(window_key)
if window_node is None:
window_node = browser_node.add(f"Window {window_id}")
window_nodes[window_key] = window_node
group_id = item_value(tab, "group_id", item_value(tab, "groupId"))
if group_id is None:
window_node.add(tab_tree_label(tab, title_limit=title_limit, show_urls=show_urls, url_limit=url_limit))
continue
group_key = (browser_key, window_id, group_id)
group_node = group_nodes.get(group_key)
if group_node is None:
group = group_info.get(group_key) or group_info.get((browser_key, None, group_id))
group_node = window_node.add(group_tree_label(group_id, group, title_limit=title_limit))
group_nodes[group_key] = group_node
group_node.add(tab_tree_label(tab, title_limit=title_limit, show_urls=show_urls, url_limit=url_limit))
return root
def build_windows_tree(windows: Iterable[dict], tabs: Iterable[Any], *, console: Console) -> Tree:
"""Build a window → tab tree from window and tab responses."""
windows = list(windows)
tabs = list(tabs)
title_limit = tree_title_limit(console=console, show_browser=any("browser" in w for w in windows), show_urls=True)
url_limit = tree_url_limit(title_limit, console=console)
root = Tree("[bold]Windows[/bold]")
for window in sorted(windows, key=lambda item: (item.get("browser", ""), item.get("id", 0))):
window_id = window.get("id")
label = f"Window {window_id}"
if window.get("alias"):
label += f" ({window['alias']})"
if window.get("browser"):
label = f"{window['browser']}: " + label
node = root.add(label)
window_tabs = [
tab for tab in tabs
if item_value(tab, "window_id", item_value(tab, "windowId")) == window_id
and (not window.get("browser") or item_value(tab, "browser") == window.get("browser"))
]
for tab in sorted(window_tabs, key=lambda item: item_value(item, "index", 0) or 0):
node.add(tab_tree_label(tab, title_limit=title_limit, show_urls=True, url_limit=url_limit))
return root
+44 -42
View File
@@ -1,7 +1,9 @@
from __future__ import annotations
import importlib
import json
from pathlib import Path
from typing import Any, cast
import click
from rich.console import Console
@@ -12,25 +14,25 @@ from browser_cli.commands import client_from_ctx, handle_errors
console = Console()
def _load_steps(path: Path):
text = path.read_text(encoding="utf-8")
if path.suffix.lower() in {".yaml", ".yml"}:
try:
import yaml # type: ignore
except Exception as exc:
raise click.ClickException("YAML scripts require PyYAML; use JSON or install PyYAML") from exc
return yaml.safe_load(text)
return json.loads(text)
text = path.read_text(encoding="utf-8")
if path.suffix.lower() in {".yaml", ".yml"}:
try:
yaml = cast(Any, importlib.import_module("yaml"))
except Exception as exc:
raise click.ClickException("YAML scripts require PyYAML; use JSON or install PyYAML") from exc
return yaml.safe_load(text)
return json.loads(text)
def _parse_step(step):
if isinstance(step, str):
return step, {}
if isinstance(step, dict):
if "command" in step:
return step["command"], step.get("args") or {}
if len(step) == 1:
command, args = next(iter(step.items()))
return command, args or {}
raise click.ClickException(f"Invalid script step: {step!r}")
if isinstance(step, str):
return step, {}
if isinstance(step, dict):
if "command" in step:
return step["command"], step.get("args") or {}
if len(step) == 1:
command, args = next(iter(step.items()))
return command, args or {}
raise click.ClickException(f"Invalid script step: {step!r}")
@click.command("script")
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@@ -41,28 +43,28 @@ def _parse_step(step):
@click.option("--allow-dangerous", is_flag=True, help="Allow high-risk commands such as dom.eval, storage.*, screenshots")
@handle_errors
def cmd_script(file: Path, json_output: bool, continue_on_error: bool, allow_read_page: bool, allow_control: bool, allow_dangerous: bool):
"""Run a JSON/YAML batch script of browser-cli wire commands."""
steps = _load_steps(file)
if not isinstance(steps, list):
raise click.ClickException("Script root must be a list")
client = client_from_ctx()
policy = CommandPolicy(allow_read_page=allow_read_page, allow_control=allow_control, allow_dangerous=allow_dangerous)
results = []
for index, step in enumerate(steps, start=1):
command, args = _parse_step(step)
try:
assert_command_allowed(command, policy)
result = client.command(command, args)
results.append({"index": index, "command": command, "ok": True, "result": result})
if not json_output:
console.print(f"[green]✓[/green] {index}: {command}")
except Exception as exc:
results.append({"index": index, "command": command, "ok": False, "error": str(exc)})
if not continue_on_error:
if json_output:
click.echo(json.dumps(results, indent=2, default=str))
raise
if not json_output:
console.print(f"[red]✗[/red] {index}: {command}: {exc}")
if json_output:
click.echo(json.dumps(results, indent=2, default=str))
"""Run a JSON/YAML batch script of browser-cli wire commands."""
steps = _load_steps(file)
if not isinstance(steps, list):
raise click.ClickException("Script root must be a list")
client = client_from_ctx()
policy = CommandPolicy(allow_read_page=allow_read_page, allow_control=allow_control, allow_dangerous=allow_dangerous)
results = []
for index, step in enumerate(steps, start=1):
command, args = _parse_step(step)
try:
assert_command_allowed(command, policy)
result = client.command(command, args)
results.append({"index": index, "command": command, "ok": True, "result": result})
if not json_output:
console.print(f"[green]✓[/green] {index}: {command}")
except Exception as exc:
results.append({"index": index, "command": command, "ok": False, "error": str(exc)})
if not continue_on_error:
if json_output:
click.echo(json.dumps(results, indent=2, default=str))
raise
if not json_output:
console.print(f"[red]✗[/red] {index}: {command}: {exc}")
if json_output:
click.echo(json.dumps(results, indent=2, default=str))
+13 -85
View File
@@ -2,65 +2,25 @@ import base64
import binascii
import click
from browser_cli.commands import client_from_ctx, gentle_mode_option, handle_errors, print_counts, tab_option
from browser_cli.commands.rendering import no_wrap_text, print_tree, shorten, tab_tree_label, tree_title_limit, tree_url_limit
from browser_cli.commands.rendering import build_tabs_tree, print_table_rows, print_tree
from rich.console import Console
from rich.table import Table
from rich.text import Text
from rich.tree import Tree
console = Console()
def _group_tree_label(group_id: int, group, *, title_limit: int) -> Text:
title = getattr(group, "title", "") or f"Group {group_id}"
color = getattr(group, "color", "") or "group"
count = getattr(group, "tab_count", 0) or 0
collapsed = bool(getattr(group, "collapsed", False))
label = no_wrap_text()
label.append(shorten(title, title_limit), style="bold")
meta = [color]
if count:
meta.append(f"{count} tab" + ("" if count == 1 else "s"))
if collapsed:
meta.append("collapsed")
label.append(" (" + ", ".join(meta) + ")", style="dim")
return label
def _tab_sort_key(tab):
return (
tab.browser or "",
tab.window_id,
getattr(tab, "index", 0),
tab.group_id if tab.group_id is not None else -1,
tab.id,
)
def _print_tabs(tabs, *, show_browser: bool = False) -> None:
if not tabs:
console.print("[yellow]No tabs found[/yellow]")
return
table = Table(show_header=True, header_style="bold cyan")
columns = []
if show_browser:
table.add_column("Browser", no_wrap=True)
table.add_column("ID", style="dim", no_wrap=True)
table.add_column("Window", no_wrap=True)
table.add_column("Active", width=7)
table.add_column("Muted", width=7)
table.add_column("Title")
table.add_column("URL")
for t in tabs:
active = "[green]✓[/green]" if t.active else ""
muted = "[yellow]✓[/yellow]" if t.muted else ""
row = [
(t.browser or "") if show_browser else None,
str(t.id),
str(t.window_id),
active,
muted,
(t.title or "")[:60],
(t.url or "")[:80],
]
table.add_row(*[value for value in row if value is not None])
console.print(table)
columns.append(("Browser", lambda tab: tab.browser or ""))
columns.extend([
("ID", lambda tab: tab.id),
("Window", lambda tab: tab.window_id),
("Active", lambda tab: "[green]✓[/green]" if tab.active else ""),
("Muted", lambda tab: "[yellow]✓[/yellow]" if tab.muted else ""),
("Title", lambda tab: (tab.title or "")[:60]),
("URL", lambda tab: (tab.url or "")[:80]),
])
print_table_rows(tabs, columns, console=console, empty_message="[yellow]No tabs found[/yellow]")
@click.group("tabs")
def tabs_group():
@@ -79,39 +39,7 @@ def tabs_list():
def tabs_tree(show_urls):
"""Show tabs grouped as a window/group tree."""
client = client_from_ctx()
tabs = sorted(client.tabs.list(), key=_tab_sort_key)
title_limit = tree_title_limit(console=console, show_browser=any(t.browser for t in tabs), show_urls=show_urls)
url_limit = tree_url_limit(title_limit, console=console)
group_info = {
(group.browser or "local", group.id): group
for group in client.groups.list()
}
root = Tree("[bold]Tabs[/bold]")
browsers = {}
windows = {}
groups = {}
show_browser = any(t.browser for t in tabs)
for tab in tabs:
browser_key = tab.browser or "local"
browser_node = browsers.get(browser_key)
if browser_node is None:
browser_node = root.add(Text(browser_key, style="bold cyan")) if show_browser else root
browsers[browser_key] = browser_node
win_key = (browser_key, tab.window_id)
win_node = windows.get(win_key)
if win_node is None:
win_node = browser_node.add(f"Window {tab.window_id}")
windows[win_key] = win_node
if tab.group_id is None:
win_node.add(tab_tree_label(tab, title_limit=title_limit, show_urls=show_urls, url_limit=url_limit))
continue
group_key = (browser_key, tab.window_id, tab.group_id)
group_node = groups.get(group_key)
group = group_info.get((browser_key, tab.group_id))
if group_node is None:
group_node = win_node.add(_group_tree_label(tab.group_id, group, title_limit=title_limit))
groups[group_key] = group_node
group_node.add(tab_tree_label(tab, title_limit=title_limit, show_urls=show_urls, url_limit=url_limit))
root = build_tabs_tree(client.tabs.list(), client.groups.list(), console=console, show_urls=show_urls)
print_tree(root, console=console)
@tabs_group.command("close")
+11 -37
View File
@@ -1,33 +1,21 @@
import click
from browser_cli.commands import client_from_ctx, handle_errors
from browser_cli.commands.rendering import print_tree, tab_tree_label, tree_title_limit, tree_url_limit
from browser_cli.commands.rendering import build_windows_tree, print_table_rows, print_tree
from rich.console import Console
from rich.table import Table
from rich.tree import Tree
console = Console()
def _print_windows(windows: list[dict], *, show_browser: bool = False) -> None:
if not windows:
console.print("[yellow]No windows found[/yellow]")
return
table = Table(show_header=True, header_style="bold cyan")
columns = []
if show_browser:
table.add_column("Browser")
table.add_column("ID", style="dim", no_wrap=True)
table.add_column("Alias", width=20)
table.add_column("Tabs", width=6)
table.add_column("State", width=12)
for w in windows:
row = [
w.get("browser", "") if show_browser else None,
str(w.get("id", "")),
w.get("alias") or "",
str(w.get("tabCount", "")),
w.get("state") or "",
]
table.add_row(*[value for value in row if value is not None])
console.print(table)
columns.append(("Browser", lambda window: window.get("browser", "")))
columns.extend([
("ID", lambda window: window.get("id", "")),
("Alias", lambda window: window.get("alias") or ""),
("Tabs", lambda window: window.get("tabCount", "")),
("State", lambda window: window.get("state") or ""),
])
print_table_rows(windows, columns, console=console, empty_message="[yellow]No windows found[/yellow]")
@click.group("windows")
def windows_group():
@@ -45,21 +33,7 @@ def windows_list():
def windows_tree():
"""Show windows and their tabs as a tree."""
client = client_from_ctx()
windows = client.windows.list()
tabs = client.tabs.list()
root = Tree("[bold]Windows[/bold]")
title_limit = tree_title_limit(console=console, show_browser=any("browser" in w for w in windows), show_urls=True)
url_limit = tree_url_limit(title_limit, console=console)
for w in sorted(windows, key=lambda item: (item.get("browser", ""), item.get("id", 0))):
wid = w.get("id")
label = f"Window {wid}"
if w.get("alias"):
label += f" ({w['alias']})"
if w.get("browser"):
label = f"{w['browser']}: " + label
node = root.add(label)
for tab in sorted([t for t in tabs if t.window_id == wid and (not w.get("browser") or t.browser == w.get("browser"))], key=lambda t: getattr(t, "index", 0)):
node.add(tab_tree_label(tab, title_limit=title_limit, show_urls=True, url_limit=url_limit))
root = build_windows_tree(client.windows.list(), client.tabs.list(), console=console)
print_tree(root, console=console)
@windows_group.command("rename")
-20
View File
@@ -4,26 +4,6 @@ from __future__ import annotations
import re
from html.parser import HTMLParser
def _normalize_text(value):
return re.sub(r"\s+", " ", value or "").strip()
def _normalize_inline(value):
value = value.replace("\xa0", " ")
value = re.sub(r"[ \t\r\f\v]+", " ", value)
value = re.sub(r" *\n *", "\n", value)
return value.strip()
def _collapse_blank_lines(value):
value = re.sub(r"[ \t]+\n", "\n", value)
value = re.sub(r"\n{3,}", "\n\n", value)
return value.strip()
def _escape_markdown(text):
return re.sub(r"([\\`[\]])", r"\\\1", text)
def _escape_table_cell(text):
return text.replace("|", r"\|").replace("\n", " ").strip()
class _HtmlNode:
def __init__(self, tag=None, attrs=None, text=None):
self.tag = tag
+145
View File
@@ -0,0 +1,145 @@
"""Challenge/response auth helpers for remote TCP transport."""
from __future__ import annotations
import asyncio
import json
import sys
from collections.abc import Callable
from typing import TypeVar
from browser_cli.errors import BrowserNotConnected
from browser_cli.version_manager import USER_AGENT
T = TypeVar("T")
AUTH_FIELDS = {"token", "pubkey", "sig", "pq_kex", "encrypted", "_suppress_pq_warning"}
PQ_WARNING = (
"** WARNING: connection is not using a post-quantum key exchange algorithm.\n"
"** This session may be vulnerable to store now, decrypt later attacks.\n"
)
def parse_challenge(raw: bytes) -> tuple[dict | None, str | None]:
try:
challenge = json.loads(raw)
nonce_hex = challenge.get("nonce") if challenge.get("type") == "challenge" else None
return challenge, nonce_hex
except (json.JSONDecodeError, AttributeError):
return None, None
def check_min_client_version(challenge: dict | None) -> None:
min_ver = challenge.get("min_client_version") if isinstance(challenge, dict) else None
if not min_ver:
return
from browser_cli.version_manager import parse_version
try:
client_ver = USER_AGENT.split("/", 1)[1]
if parse_version(client_ver) < parse_version(min_ver):
raise BrowserNotConnected(
f"Client version {client_ver} is too old for this server "
f"(requires >= {min_ver}). Run: pip install --upgrade browser-cli"
)
except (IndexError, ValueError):
pass
def clean_message(msg: dict) -> dict:
return {key: value for key, value in msg.items() if key not in AUTH_FIELDS}
def get_pq_public_key(challenge: dict | None) -> str | None:
if not isinstance(challenge, dict):
return None
from browser_cli.auth import PQ_KEX_ALG
kex = challenge.get("pq_kex")
if isinstance(kex, dict) and kex.get("alg") == PQ_KEX_ALG and kex.get("public_key"):
return str(kex["public_key"])
return None
def signed_payload(clean_msg: dict, private_key, nonce_hex: str, pq_shared_secret: bytes | None) -> dict:
from browser_cli.auth import pq_encrypt, public_key_hex, sign
nonce = bytes.fromhex(nonce_hex)
sig = sign(private_key, nonce, clean_msg, pq_shared_secret)
pubkey = public_key_hex(private_key)
if pq_shared_secret is None:
return {**clean_msg, "pubkey": pubkey, "sig": sig.hex()}
encrypted = pq_encrypt(pq_shared_secret, "request", json.dumps(clean_msg).encode("utf-8"))
return {
"id": clean_msg.get("id"),
"user_agent": clean_msg.get("user_agent"),
"pubkey": pubkey,
"sig": sig.hex(),
"pq_kex": clean_msg["pq_kex"],
"encrypted": encrypted,
}
def emit_no_pq_warning(enabled: bool) -> None:
if enabled:
sys.stderr.write(PQ_WARNING)
def build_auth_message(
msg: dict,
challenge: dict | None,
nonce_hex: str | None,
private_key,
encapsulate: Callable[[str], tuple[str, bytes]],
*,
warn_no_pq: bool = True,
) -> tuple[dict, bytes | None]:
if not nonce_hex or private_key is None:
emit_no_pq_warning(warn_no_pq)
return msg, None
clean_msg = clean_message(msg)
pq_shared_secret = None
pq_public_key = get_pq_public_key(challenge)
if pq_public_key:
from browser_cli.auth import PQ_KEX_ALG
ciphertext_hex, pq_shared_secret = encapsulate(pq_public_key)
clean_msg["pq_kex"] = {"alg": PQ_KEX_ALG, "ciphertext": ciphertext_hex}
else:
emit_no_pq_warning(warn_no_pq)
return signed_payload(clean_msg, private_key, nonce_hex, pq_shared_secret), pq_shared_secret
async def build_auth_message_async(
msg: dict,
challenge: dict | None,
nonce_hex: str | None,
private_key,
*,
warn_no_pq: bool = True,
) -> tuple[dict, bytes | None]:
def encapsulate(public_key: str) -> tuple[str, bytes]:
from browser_cli.auth import pq_kex_client_encapsulate
return pq_kex_client_encapsulate(public_key)
return await asyncio.to_thread(
build_auth_message,
msg,
challenge,
nonce_hex,
private_key,
encapsulate,
warn_no_pq=warn_no_pq,
)
def decode_pq_response(response: bytes | None, pq_shared_secret: bytes | None) -> bytes | None:
if response is None or pq_shared_secret is None:
return response
try:
from browser_cli.auth import pq_decrypt
envelope = json.loads(response)
if isinstance(envelope, dict) and "encrypted" in envelope:
return pq_decrypt(pq_shared_secret, "response", envelope["encrypted"])
except Exception as exc:
raise BrowserNotConnected(f"Cannot decrypt post-quantum remote response: {exc}") from exc
return response
def with_challenge(challenge_raw: bytes, msg: dict, private_key, build_auth: Callable[[dict, dict | None, str | None, object], T]) -> T:
if challenge_raw is None:
raise BrowserNotConnected("No challenge received from remote endpoint")
challenge, nonce_hex = parse_challenge(challenge_raw)
check_min_client_version(challenge)
return build_auth(msg, challenge, nonce_hex, private_key)
def should_warn_no_pq(msg: dict) -> bool:
return not bool(msg.pop("_suppress_pq_warning", False))
+52
View File
@@ -0,0 +1,52 @@
"""Socket helpers for remote TCP/TLS transport."""
from __future__ import annotations
import asyncio
import socket
from contextlib import contextmanager
from browser_cli.endpoints import _resolve_connect_endpoint
from browser_cli.framing import async_recv_exact, async_recv_frame, recv_exact, recv_frame
def recv_exact_bytes(sock: socket.socket, n: int) -> bytes:
return recv_exact(sock, n) or b""
def recv_all(sock: socket.socket) -> bytes:
return recv_frame(sock, label="Response") or b""
async def async_recv_exact_bytes(reader: asyncio.StreamReader, n: int) -> bytes:
return await async_recv_exact(reader, n) or b""
async def async_recv_all(reader: asyncio.StreamReader) -> bytes:
return await async_recv_frame(reader, label="Response") or b""
def split_endpoint(endpoint: str) -> tuple[str, int]:
connect_ep = _resolve_connect_endpoint(endpoint)
host, _, port_str = connect_ep.rpartition(":")
return host, int(port_str)
@contextmanager
def open_socket(endpoint: str):
host, port = split_endpoint(endpoint)
raw_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
raw_sock.settimeout(30)
try:
raw_sock.connect((host, port))
if port == 443:
import ssl
sock = ssl.create_default_context().wrap_socket(raw_sock, server_hostname=host)
else:
sock = raw_sock
except Exception:
raw_sock.close()
raise
with sock:
yield sock
async def open_async_connection(endpoint: str) -> tuple[asyncio.StreamReader, asyncio.StreamWriter]:
host, port = split_endpoint(endpoint)
ssl_ctx = None
if port == 443:
import ssl
ssl_ctx = ssl.create_default_context()
return await asyncio.open_connection(host, port, ssl=ssl_ctx, server_hostname=host if ssl_ctx else None)
+28 -199
View File
@@ -1,202 +1,43 @@
"""TCP/TLS transport for talking to a remote ``browser-cli serve``.
Owns the wire mechanics of the remote leg: open a socket (TLS on :443),
complete the signed challenge/response handshake with an optional post-quantum
key exchange, frame the request, and read the framed (possibly encrypted)
response. The higher-level "which endpoint / which profile / which key"
decisions stay in :mod:`browser_cli.client.core`.
This module keeps the public/private compatibility surface used by older tests
and callers, while delegating socket mechanics and auth-handshake details to
focused helper modules.
"""
from __future__ import annotations
import asyncio
import json
import socket
import sys
from collections.abc import Callable
from contextlib import contextmanager
from typing import TypeVar
from browser_cli.errors import BrowserNotConnected
from browser_cli.endpoints import _resolve_connect_endpoint
from browser_cli.framing import async_recv_exact, async_recv_frame, async_send_frame, frame, recv_exact, recv_frame
from browser_cli.version_manager import USER_AGENT as _USER_AGENT
T = TypeVar("T")
_AUTH_FIELDS = {"token", "pubkey", "sig", "pq_kex", "encrypted", "_suppress_pq_warning"}
_PQ_WARNING = (
"** WARNING: connection is not using a post-quantum key exchange algorithm.\n"
"** This session may be vulnerable to store now, decrypt later attacks.\n"
from browser_cli.framing import async_send_frame, frame
from browser_cli.remote.auth import (
build_auth_message as _build_auth_message,
build_auth_message_async as _build_auth_message_async,
decode_pq_response as _decode_pq_response,
parse_challenge as _parse_challenge,
should_warn_no_pq as _should_warn_no_pq,
with_challenge as _with_challenge,
)
from browser_cli.remote.socket import (
async_recv_all as _async_recv_all,
async_recv_exact_bytes as _async_recv_exact,
open_async_connection as _open_async_connection,
open_socket as _open_socket,
recv_all as _recv_all,
recv_exact_bytes as _recv_exact,
split_endpoint as _split_endpoint,
)
def _recv_exact(sock: socket.socket, n: int) -> bytes:
return recv_exact(sock, n) or b""
def _send_remote(endpoint: str, msg: dict, private_key=None, *, warn_no_pq: bool | None = None) -> bytes | None:
warn = _should_warn_no_pq(msg) if warn_no_pq is None else warn_no_pq
def _recv_all(sock: socket.socket) -> bytes:
return recv_frame(sock, label="Response") or b""
async def _async_recv_exact(reader: asyncio.StreamReader, n: int) -> bytes:
return await async_recv_exact(reader, n) or b""
async def _async_recv_all(reader: asyncio.StreamReader) -> bytes:
return await async_recv_frame(reader, label="Response") or b""
def _split_endpoint(endpoint: str) -> tuple[str, int]:
connect_ep = _resolve_connect_endpoint(endpoint)
host, _, port_str = connect_ep.rpartition(":")
return host, int(port_str)
@contextmanager
def _open_socket(endpoint: str):
host, port = _split_endpoint(endpoint)
raw_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
raw_sock.settimeout(30)
try:
raw_sock.connect((host, port))
if port == 443:
import ssl
sock = ssl.create_default_context().wrap_socket(raw_sock, server_hostname=host)
else:
sock = raw_sock
except Exception:
raw_sock.close()
raise
with sock:
yield sock
async def _open_async_connection(endpoint: str) -> tuple[asyncio.StreamReader, asyncio.StreamWriter]:
host, port = _split_endpoint(endpoint)
ssl_ctx = None
if port == 443:
import ssl
ssl_ctx = ssl.create_default_context()
return await asyncio.open_connection(host, port, ssl=ssl_ctx, server_hostname=host if ssl_ctx else None)
def _parse_challenge(raw: bytes) -> tuple[dict | None, str | None]:
try:
challenge = json.loads(raw)
nonce_hex = challenge.get("nonce") if challenge.get("type") == "challenge" else None
return challenge, nonce_hex
except (json.JSONDecodeError, AttributeError):
return None, None
def _check_min_client_version(challenge: dict | None) -> None:
min_ver = challenge.get("min_client_version") if isinstance(challenge, dict) else None
if not min_ver:
return
from browser_cli.version_manager import parse_version
try:
client_ver = _USER_AGENT.split("/", 1)[1]
if parse_version(client_ver) < parse_version(min_ver):
raise BrowserNotConnected(
f"Client version {client_ver} is too old for this server "
f"(requires >= {min_ver}). Run: pip install --upgrade browser-cli"
)
except (IndexError, ValueError):
pass
def _clean_message(msg: dict) -> dict:
return {k: v for k, v in msg.items() if k not in _AUTH_FIELDS}
def _get_pq_public_key(challenge: dict | None) -> str | None:
if not isinstance(challenge, dict):
return None
from browser_cli.auth import PQ_KEX_ALG
kex = challenge.get("pq_kex")
if isinstance(kex, dict) and kex.get("alg") == PQ_KEX_ALG and kex.get("public_key"):
return str(kex["public_key"])
return None
def _signed_payload(clean_msg: dict, private_key, nonce_hex: str, pq_shared_secret: bytes | None) -> dict:
from browser_cli.auth import PQ_KEX_ALG, pq_encrypt, public_key_hex, sign
nonce = bytes.fromhex(nonce_hex)
sig = sign(private_key, nonce, clean_msg, pq_shared_secret)
pubkey = public_key_hex(private_key)
if pq_shared_secret is None:
return {**clean_msg, "pubkey": pubkey, "sig": sig.hex()}
encrypted = pq_encrypt(pq_shared_secret, "request", json.dumps(clean_msg).encode("utf-8"))
return {
"id": clean_msg.get("id"),
"user_agent": clean_msg.get("user_agent"),
"pubkey": pubkey,
"sig": sig.hex(),
"pq_kex": clean_msg["pq_kex"],
"encrypted": encrypted,
}
def _warn_no_pq(enabled: bool) -> None:
if enabled:
sys.stderr.write(_PQ_WARNING)
def _build_auth_message(
msg: dict,
challenge: dict | None,
nonce_hex: str | None,
private_key,
encapsulate: Callable[[str], tuple[str, bytes]],
*,
warn_no_pq: bool = True,
) -> tuple[dict, bytes | None]:
if not nonce_hex or private_key is None:
_warn_no_pq(warn_no_pq)
return msg, None
clean_msg = _clean_message(msg)
pq_shared_secret = None
pq_public_key = _get_pq_public_key(challenge)
if pq_public_key:
from browser_cli.auth import PQ_KEX_ALG
ciphertext_hex, pq_shared_secret = encapsulate(pq_public_key)
clean_msg["pq_kex"] = {"alg": PQ_KEX_ALG, "ciphertext": ciphertext_hex}
else:
_warn_no_pq(warn_no_pq)
return _signed_payload(clean_msg, private_key, nonce_hex, pq_shared_secret), pq_shared_secret
async def _build_auth_message_async(
msg: dict,
challenge: dict | None,
nonce_hex: str | None,
private_key,
*,
warn_no_pq: bool = True,
) -> tuple[dict, bytes | None]:
def encapsulate(public_key: str) -> tuple[str, bytes]:
def build_auth(sync_msg: dict, challenge: dict | None, nonce_hex: str | None, key):
from browser_cli.auth import pq_kex_client_encapsulate
return pq_kex_client_encapsulate(public_key)
return _build_auth_message(sync_msg, challenge, nonce_hex, key, pq_kex_client_encapsulate, warn_no_pq=warn)
return await asyncio.to_thread(
_build_auth_message,
msg,
challenge,
nonce_hex,
private_key,
encapsulate,
warn_no_pq=warn_no_pq,
)
def _decode_pq_response(response: bytes | None, pq_shared_secret: bytes | None) -> bytes | None:
if response is None or pq_shared_secret is None:
return response
try:
from browser_cli.auth import pq_decrypt
envelope = json.loads(response)
if isinstance(envelope, dict) and "encrypted" in envelope:
return pq_decrypt(pq_shared_secret, "response", envelope["encrypted"])
except Exception as e:
raise BrowserNotConnected(f"Cannot decrypt post-quantum remote response: {e}") from e
return response
def _with_challenge(challenge_raw: bytes, msg: dict, private_key, build_auth: Callable[[dict, dict | None, str | None, object], T]) -> T:
if challenge_raw is None:
raise BrowserNotConnected("No challenge received from remote endpoint")
challenge, nonce_hex = _parse_challenge(challenge_raw)
_check_min_client_version(challenge)
return build_auth(msg, challenge, nonce_hex, private_key)
def _should_warn_no_pq(msg: dict) -> bool:
return not bool(msg.pop("_suppress_pq_warning", False))
with _open_socket(endpoint) as sock:
payload_msg, pq_shared_secret = _with_challenge(_recv_all(sock), msg, private_key, build_auth)
sock.sendall(frame(json.dumps(payload_msg).encode("utf-8")))
return _decode_pq_response(_recv_all(sock), pq_shared_secret)
async def _send_remote_async(endpoint: str, msg: dict, private_key=None, *, warn_no_pq: bool | None = None) -> bytes | None:
reader, writer = await _open_async_connection(endpoint)
@@ -216,15 +57,3 @@ async def _send_remote_async(endpoint: str, msg: dict, private_key=None, *, warn
await writer.wait_closed()
except Exception:
pass
def _send_remote(endpoint: str, msg: dict, private_key=None, *, warn_no_pq: bool | None = None) -> bytes | None:
warn = _should_warn_no_pq(msg) if warn_no_pq is None else warn_no_pq
def build_auth(sync_msg: dict, challenge: dict | None, nonce_hex: str | None, key):
from browser_cli.auth import pq_kex_client_encapsulate
return _build_auth_message(sync_msg, challenge, nonce_hex, key, pq_kex_client_encapsulate, warn_no_pq=warn)
with _open_socket(endpoint) as sock:
payload_msg, pq_shared_secret = _with_challenge(_recv_all(sock), msg, private_key, build_auth)
sock.sendall(frame(json.dumps(payload_msg).encode("utf-8")))
return _decode_pq_response(_recv_all(sock), pq_shared_secret)
+3 -3
View File
@@ -9,7 +9,7 @@ from __future__ import annotations
from collections.abc import Callable
from functools import wraps
from typing import Any, TypeVar
from typing import Any, TypeVar, cast
F = TypeVar("F", bound=Callable)
_MISSING = object()
@@ -54,8 +54,8 @@ def sdk_command(
return _clone_default(default)
return result
wrapper._browser_cli_command = name # type: ignore[attr-defined]
return wrapper # type: ignore[return-value]
setattr(wrapper, "_browser_cli_command", name)
return cast(F, wrapper)
return decorator
+5 -5
View File
@@ -5,7 +5,7 @@ import asyncio
import functools
import inspect
from collections.abc import Callable
from typing import TypeVar
from typing import TypeVar, cast
from browser_cli.sdk.base import Namespace
from browser_cli.sdk.workflow_decorators import WorkflowDecoratorsMixin, _NO_INJECT
@@ -53,7 +53,7 @@ class DecoratorsNS(WorkflowDecoratorsMixin, Namespace):
finally:
if cleanup is not None:
await asyncio.to_thread(cleanup, value)
return async_wrapper # type: ignore[return-value]
return cast(F, async_wrapper)
return WorkflowDecoratorsMixin._value_decorator(
self, fn, get_value, keyword=keyword, cleanup=cleanup
)
@@ -74,7 +74,7 @@ class DecoratorsNS(WorkflowDecoratorsMixin, Namespace):
finally:
if previous:
await asyncio.to_thread(self._c.perf.set_profile, previous)
return async_wrapper # type: ignore[return-value]
return cast(F, async_wrapper)
return WorkflowDecoratorsMixin.performance_profile(self, profile, restore=restore)(fn)
return decorator
@@ -101,7 +101,7 @@ class DecoratorsNS(WorkflowDecoratorsMixin, Namespace):
raise
if delay > 0:
await asyncio.sleep(delay)
raise last_error # type: ignore[misc]
return async_wrapper # type: ignore[return-value]
raise cast(BaseException, last_error)
return cast(F, async_wrapper)
return WorkflowDecoratorsMixin.retry(self, times=times, delay=delay, exceptions=exceptions)(fn)
return decorator
+35 -14
View File
@@ -4,11 +4,32 @@ from __future__ import annotations
import functools
import time
from collections.abc import Callable
from typing import TypeVar
from typing import Protocol, TypeVar, cast
F = TypeVar("F", bound=Callable)
_NO_INJECT = object()
class _WorkflowTabs(Protocol):
def active(self): ...
def open(self, *args, **kwargs): ...
def watch_url(self, *args, **kwargs): ...
class _WorkflowDom(Protocol):
def wait_for(self, *args, **kwargs): ...
class _WorkflowPerf(Protocol):
def status(self): ...
def set_profile(self, profile: str): ...
class _WorkflowSession(Protocol):
def save(self, name: str): ...
class _WorkflowClient(Protocol):
tabs: _WorkflowTabs
dom: _WorkflowDom
perf: _WorkflowPerf
session: _WorkflowSession
class WorkflowDecoratorsMixin:
"""Shared implementation for sync and async workflow decorators.
@@ -17,7 +38,7 @@ class WorkflowDecoratorsMixin:
in lockstep.
"""
_c: object
_c: _WorkflowClient
@staticmethod
def _inject(kwargs: dict, keyword: str | None, value):
@@ -62,7 +83,7 @@ class WorkflowDecoratorsMixin:
finally:
if cleanup is not None:
self._run(cleanup, value)
return wrapper # type: ignore[return-value]
return cast(F, wrapper)
return decorator(func) if func is not None else decorator
@@ -72,7 +93,7 @@ class WorkflowDecoratorsMixin:
By default the tab is injected as ``tab=...``. Pass ``keyword=None`` to
pass it as the first positional argument instead.
"""
return self._value_decorator(func, self._c.tabs.active, keyword=keyword) # type: ignore[attr-defined]
return self._value_decorator(func, self._c.tabs.active, keyword=keyword)
def new_tab(
self,
@@ -93,7 +114,7 @@ class WorkflowDecoratorsMixin:
wrapped function returns or raises.
"""
def open_tab():
return self._c.tabs.open( # type: ignore[attr-defined]
return self._c.tabs.open(
url,
wait=wait,
timeout=timeout,
@@ -124,7 +145,7 @@ class WorkflowDecoratorsMixin:
the wrapped function. By default the result is not injected.
"""
def wait():
return self._c.dom.wait_for( # type: ignore[attr-defined]
return self._c.dom.wait_for(
selector,
timeout=timeout,
visible=visible,
@@ -145,7 +166,7 @@ class WorkflowDecoratorsMixin:
):
"""Wait until a tab URL matches *pattern* before calling the function."""
def wait():
return self._c.tabs.watch_url(pattern, tab_id=tab_id, timeout=timeout) # type: ignore[attr-defined]
return self._c.tabs.watch_url(pattern, tab_id=tab_id, timeout=timeout)
inject = keyword if keyword is not None else _NO_INJECT
return self._value_decorator(None, wait, keyword=inject)
@@ -157,19 +178,19 @@ class WorkflowDecoratorsMixin:
def wrapper(*args, **kwargs):
previous = None
if restore:
previous = self._run(self._c.perf.status).get("performanceProfile") # type: ignore[attr-defined]
self._run(self._c.perf.set_profile, profile) # type: ignore[attr-defined]
previous = self._run(self._c.perf.status).get("performanceProfile")
self._run(self._c.perf.set_profile, profile)
try:
return self._call_wrapped(fn, *args, **kwargs)
finally:
if previous:
self._run(self._c.perf.set_profile, previous) # type: ignore[attr-defined]
return wrapper # type: ignore[return-value]
self._run(self._c.perf.set_profile, previous)
return cast(F, wrapper)
return decorator
def save_session_before(self, name: str):
"""Save the current browser session before running the function."""
return self._value_decorator(None, lambda: self._c.session.save(name), keyword=_NO_INJECT) # type: ignore[attr-defined]
return self._value_decorator(None, lambda: self._c.session.save(name), keyword=_NO_INJECT)
def retry(
self,
@@ -194,7 +215,7 @@ class WorkflowDecoratorsMixin:
raise
if delay > 0:
self._sleep(delay)
raise last_error # type: ignore[misc]
return wrapper # type: ignore[return-value]
raise cast(BaseException, last_error)
return cast(F, wrapper)
return decorator
+31
View File
@@ -0,0 +1,31 @@
"""Challenge-frame helpers for ``browser-cli serve``."""
from __future__ import annotations
import asyncio
import secrets
from pathlib import Path
from browser_cli.version_manager import PROTOCOL_MIN_CLIENT, get_installed_version
async def load_auth_keys(auth_keys_path: Path | None) -> list[str] | None:
if auth_keys_path is None:
return None
from browser_cli.auth import load_authorized_keys
return await asyncio.to_thread(load_authorized_keys, auth_keys_path)
async def build_challenge(auth_keys_path: Path | None) -> tuple[str, object | None, dict]:
nonce = secrets.token_hex(32)
pq_private_key = None
challenge_msg = {
"type": "challenge",
"nonce": nonce,
"server_version": get_installed_version(),
"min_client_version": PROTOCOL_MIN_CLIENT,
}
if auth_keys_path is not None:
from browser_cli.auth import PQ_KEX_ALG, pq_kex_server_keypair
pq_keypair = await asyncio.to_thread(pq_kex_server_keypair)
if pq_keypair is not None:
pq_private_key, pq_public_key = pq_keypair
challenge_msg["pq_kex"] = {"alg": PQ_KEX_ALG, "public_key": pq_public_key.hex()}
return nonce, pq_private_key, challenge_msg
+1 -25
View File
@@ -8,7 +8,6 @@ from __future__ import annotations
import asyncio
import json
import secrets
import socket
from dataclasses import dataclass
from pathlib import Path
@@ -17,10 +16,10 @@ from browser_cli import transport
from browser_cli.compat import adapt_auth
from browser_cli.framing import async_recv_frame, async_send_frame
from browser_cli.serve.auth import ServeAuthMixin
from browser_cli.serve.challenge import build_challenge as _build_challenge, load_auth_keys as _load_auth_keys
from browser_cli.serve.control import ServeControlMixin
from browser_cli.serve.logging import console, log_request
from browser_cli.serve.proxy import ServeProxyMixin
from browser_cli.version_manager import PROTOCOL_MIN_CLIENT, get_installed_version
async def _async_framed_send(writer: asyncio.StreamWriter, data: bytes) -> None:
await async_send_frame(writer, data)
@@ -140,29 +139,6 @@ async def _async_handle_client(
except Exception:
pass
async def _load_auth_keys(auth_keys_path: Path | None) -> list[str] | None:
if auth_keys_path is None:
return None
from browser_cli.auth import load_authorized_keys
return await asyncio.to_thread(load_authorized_keys, auth_keys_path)
async def _build_challenge(auth_keys_path: Path | None) -> tuple[str, object | None, dict]:
nonce = secrets.token_hex(32)
pq_private_key = None
challenge_msg = {
"type": "challenge",
"nonce": nonce,
"server_version": get_installed_version(),
"min_client_version": PROTOCOL_MIN_CLIENT,
}
if auth_keys_path is not None:
from browser_cli.auth import PQ_KEX_ALG, pq_kex_server_keypair
pq_keypair = await asyncio.to_thread(pq_kex_server_keypair)
if pq_keypair is not None:
pq_private_key, pq_public_key = pq_keypair
challenge_msg["pq_kex"] = {"alg": PQ_KEX_ALG, "public_key": pq_public_key.hex()}
return nonce, pq_private_key, challenge_msg
def _handle_client(
client_sock: socket.socket,
addr: tuple,
-214
View File
@@ -1,214 +0,0 @@
"""Response payload encoding for the TCP serve <-> client leg.
The wire frame stays ``4-byte LE length + payload``. The payload is made
self-describing so old peers keep working unchanged:
* A payload that starts with ``{`` or ``[`` is plain JSON (the historical
format). Old clients and old servers only ever produce/consume this.
* Any other leading byte is a 1-byte codec tag followed by the encoded body.
The tag's high nibble selects serialization, the low nibble compression::
tag = (serialization << 4) | compression
This is only ever emitted toward a peer that advertised support for it, so it
is fully backward compatible: clients announce what they can decode via the
``accept_encoding`` field in their request, and the server encodes the
response accordingly. Requests themselves stay plain JSON (they are tiny).
Compression is the big win — response payloads (``extract.html``,
``dom.query``, ``tabs.list`` over hundreds of tabs, base64 screenshots) are
heavy and text-like. msgpack additionally lets ``tabs.screenshot`` ship the
image as raw bytes instead of a base64 data URL (~33% smaller before
compression); the client transparently rebuilds the data URL so the SDK/CLI
API is unchanged.
"""
from __future__ import annotations
import base64
import gzip
import json
import re
import zlib
from browser_cli.constants import (
COMP_GZIP,
COMP_NONE,
COMP_ZLIB,
COMP_ZSTD,
DEFAULT_TRANSPORT_THRESHOLD,
SER_JSON,
SER_MSGPACK,
)
try: # optional: better ratio + speed than zlib/gzip
import zstandard as _zstd
except Exception: # pragma: no cover - depends on optional extra
_zstd = None
try: # optional: alternate serialization + raw binary for screenshots
import msgpack as _msgpack
except Exception: # pragma: no cover - depends on optional extra
_msgpack = None
# ── codec ids ────────────────────────────────────────────────────────────────
_SER_NAME = {SER_JSON: "json", SER_MSGPACK: "msgpack"}
_SER_ID = {v: k for k, v in _SER_NAME.items()}
_COMP_NAME = {COMP_NONE: "none", COMP_ZLIB: "zlib", COMP_GZIP: "gzip", COMP_ZSTD: "zstd"}
_COMP_ID = {v: k for k, v in _COMP_NAME.items()}
# Don't compress payloads smaller than this — the header/CPU cost is not worth it.
# JSON top-level values always start with one of these bytes; a tag byte never does.
_JSON_FIRST_BYTES = frozenset(b"{[")
def msgpack_available() -> bool:
return _msgpack is not None
def zstd_available() -> bool:
return _zstd is not None
def supported_serialization() -> list[str]:
"""Serializations this build can produce/consume, best first."""
return (["msgpack"] if _msgpack is not None else []) + ["json"]
def supported_compression() -> list[str]:
"""Compression codecs this build can produce/consume, best first."""
return (["zstd"] if _zstd is not None else []) + ["gzip", "zlib"]
def client_accept_encoding() -> dict:
"""What the local client advertises it can decode (sent with each request)."""
return {"ser": supported_serialization(), "comp": supported_compression()}
# ── compression primitives ────────────────────────────────────────────────────
def _compress(comp_id: int, data: bytes) -> bytes:
if comp_id == COMP_NONE:
return data
if comp_id == COMP_ZLIB:
return zlib.compress(data, 6)
if comp_id == COMP_GZIP:
return gzip.compress(data, compresslevel=6)
if comp_id == COMP_ZSTD:
if _zstd is None:
raise ValueError("zstd compression requested but zstandard is not installed")
return _zstd.ZstdCompressor(level=10).compress(data)
raise ValueError(f"unknown compression id {comp_id}")
def _decompress(comp_id: int, data: bytes) -> bytes:
if comp_id == COMP_NONE:
return data
if comp_id == COMP_ZLIB:
return zlib.decompress(data)
if comp_id == COMP_GZIP:
return gzip.decompress(data)
if comp_id == COMP_ZSTD:
if _zstd is None:
raise ValueError("zstd payload received but zstandard is not installed")
return _zstd.ZstdDecompressor().decompress(data)
raise ValueError(f"unknown compression id {comp_id}")
# ── codec negotiation ──────────────────────────────────────────────────────────
def _choose(accept: dict | None) -> tuple[int, int]:
"""Pick (serialization_id, compression_id) the peer accepts, server preference first."""
accept = accept if isinstance(accept, dict) else {}
accept_ser = accept.get("ser") or ["json"]
accept_comp = accept.get("comp") or []
ser = SER_JSON
if _msgpack is not None and "msgpack" in accept_ser:
ser = SER_MSGPACK
comp = COMP_NONE
for name in supported_compression(): # server preference: zstd > gzip > zlib
if name in accept_comp:
comp = _COMP_ID[name]
break
return ser, comp
# ── raw-binary hoisting (screenshots) ──────────────────────────────────────────
_DATA_URL_RE = re.compile(r"^data:([^;,]+);base64,(.+)$", re.S)
_B64_MARKER = "__b64__"
def _hoist_screenshot(obj, command: str | None):
"""Replace a screenshot data URL with raw bytes so msgpack ships it unencoded.
Gated to ``tabs.screenshot`` so we never touch arbitrary page-derived data.
"""
if command != "tabs.screenshot" or not isinstance(obj, dict):
return obj
data = obj.get("data")
if not isinstance(data, dict):
return obj
url = data.get("dataUrl")
if not isinstance(url, str):
return obj
m = _DATA_URL_RE.match(url)
if not m:
return obj
try:
raw = base64.b64decode(m.group(2))
except Exception:
return obj
new_data = dict(data)
new_data["dataUrl"] = {_B64_MARKER: True, "mime": m.group(1), "raw": raw}
return {**obj, "data": new_data}
def _unhoist_binary(obj):
"""Rebuild any hoisted data URL so callers see the original string again."""
if isinstance(obj, dict):
raw = obj.get("raw")
if obj.get(_B64_MARKER) and isinstance(raw, (bytes, bytearray)):
mime = obj.get("mime") or "application/octet-stream"
return f"data:{mime};base64," + base64.b64encode(bytes(raw)).decode("ascii")
return {k: _unhoist_binary(v) for k, v in obj.items()}
if isinstance(obj, list):
return [_unhoist_binary(v) for v in obj]
return obj
# ── encode / decode ─────────────────────────────────────────────────────────────
def encode_response(obj, accept: dict | None = None, command: str | None = None,
threshold: int = DEFAULT_TRANSPORT_THRESHOLD) -> bytes:
"""Encode a response object for the chosen/accepted codec.
Returns bare JSON bytes when no encoding is negotiated, which is byte-for-byte
what an old server would have sent.
"""
ser, comp = _choose(accept)
if ser == SER_MSGPACK:
body = _msgpack.packb(_hoist_screenshot(obj, command), use_bin_type=True)
else:
body = json.dumps(obj).encode("utf-8")
if comp != COMP_NONE and len(body) >= threshold:
body = _compress(comp, body)
else:
comp = COMP_NONE
if ser == SER_JSON and comp == COMP_NONE:
return body # plain JSON — historical wire format, no tag byte
return bytes([(ser << 4) | comp]) + body
def decode_response(raw: bytes | None):
"""Decode a payload produced by :func:`encode_response` (or plain JSON)."""
if raw is None:
return None
if not raw:
raise ValueError("empty response payload")
if raw[0] in _JSON_FIRST_BYTES:
return json.loads(raw)
tag = raw[0]
ser, comp = tag >> 4, tag & 0x0F
body = _decompress(comp, raw[1:])
if ser == SER_MSGPACK:
if _msgpack is None:
raise ValueError("msgpack payload received but msgpack is not installed")
return _unhoist_binary(_msgpack.unpackb(body, raw=False))
if ser == SER_JSON:
return json.loads(body)
raise ValueError(f"unknown serialization id {ser}")
+72
View File
@@ -0,0 +1,72 @@
"""Response payload encoding for the TCP serve <-> client leg.
The wire frame stays ``4-byte LE length + payload``. Payloads are plain JSON
for legacy peers, or a 1-byte codec tag followed by serialized/compressed data
when the peer advertised support for it.
"""
from __future__ import annotations
import json
from browser_cli.constants import COMP_GZIP, COMP_NONE, COMP_ZLIB, COMP_ZSTD, DEFAULT_TRANSPORT_THRESHOLD, SER_JSON, SER_MSGPACK
from browser_cli.transport.binary import hoist_screenshot as _hoist_screenshot, unhoist_binary as _unhoist_binary
from browser_cli.transport.codecs import (
JSON_FIRST_BYTES as _JSON_FIRST_BYTES,
_msgpack,
choose_codec as _choose,
client_accept_encoding,
compress_payload as _compress,
decompress_payload as _decompress,
msgpack_available,
supported_compression,
supported_serialization,
zstd_available,
)
def encode_response(
obj,
accept: dict | None = None,
command: str | None = None,
threshold: int = DEFAULT_TRANSPORT_THRESHOLD,
) -> bytes:
"""Encode a response object for the chosen/accepted codec.
Returns bare JSON bytes when no encoding is negotiated, which is byte-for-byte
what an old server would have sent.
"""
ser, comp = _choose(accept)
if ser == SER_MSGPACK:
body = _msgpack.packb(_hoist_screenshot(obj, command), use_bin_type=True)
else:
body = json.dumps(obj).encode("utf-8")
if comp != COMP_NONE and len(body) >= threshold:
body = _compress(comp, body)
else:
comp = COMP_NONE
if ser == SER_JSON and comp == COMP_NONE:
return body # plain JSON — historical wire format, no tag byte
return bytes([(ser << 4) | comp]) + body
def decode_response(raw: bytes | None):
"""Decode a payload produced by :func:`encode_response` (or plain JSON)."""
if raw is None:
return None
if not raw:
raise ValueError("empty response payload")
if raw[0] in _JSON_FIRST_BYTES:
return json.loads(raw)
tag = raw[0]
ser, comp = tag >> 4, tag & 0x0F
body = _decompress(comp, raw[1:])
if ser == SER_MSGPACK:
if _msgpack is None:
raise ValueError("msgpack payload received but msgpack is not installed")
return _unhoist_binary(_msgpack.unpackb(body, raw=False))
if ser == SER_JSON:
return json.loads(body)
raise ValueError(f"unknown serialization id {ser}")
+44
View File
@@ -0,0 +1,44 @@
"""Raw-binary hoisting helpers for encoded response payloads."""
from __future__ import annotations
import base64
import re
DATA_URL_RE = re.compile(r"^data:([^;,]+);base64,(.+)$", re.S)
B64_MARKER = "__b64__"
def hoist_screenshot(obj, command: str | None):
"""Replace a screenshot data URL with raw bytes so msgpack ships it unencoded.
Gated to ``tabs.screenshot`` so arbitrary page-derived data is never touched.
"""
if command != "tabs.screenshot" or not isinstance(obj, dict):
return obj
data = obj.get("data")
if not isinstance(data, dict):
return obj
url = data.get("dataUrl")
if not isinstance(url, str):
return obj
match = DATA_URL_RE.match(url)
if not match:
return obj
try:
raw = base64.b64decode(match.group(2))
except Exception:
return obj
new_data = dict(data)
new_data["dataUrl"] = {B64_MARKER: True, "mime": match.group(1), "raw": raw}
return {**obj, "data": new_data}
def unhoist_binary(obj):
"""Rebuild any hoisted data URL so callers see the original string again."""
if isinstance(obj, dict):
raw = obj.get("raw")
if obj.get(B64_MARKER) and isinstance(raw, (bytes, bytearray)):
mime = obj.get("mime") or "application/octet-stream"
return f"data:{mime};base64," + base64.b64encode(bytes(raw)).decode("ascii")
return {key: unhoist_binary(value) for key, value in obj.items()}
if isinstance(obj, list):
return [unhoist_binary(value) for value in obj]
return obj
+84
View File
@@ -0,0 +1,84 @@
"""Serialization/compression primitives for TCP response payloads."""
from __future__ import annotations
import gzip
import zlib
from browser_cli.constants import COMP_GZIP, COMP_NONE, COMP_ZLIB, COMP_ZSTD, SER_JSON, SER_MSGPACK
try: # optional: better ratio + speed than zlib/gzip
import zstandard as _zstd
except Exception: # pragma: no cover - depends on optional extra
_zstd = None
try: # optional: alternate serialization + raw binary for screenshots
import msgpack as _msgpack
except Exception: # pragma: no cover - depends on optional extra
_msgpack = None
SERIALIZATION_NAME = {SER_JSON: "json", SER_MSGPACK: "msgpack"}
SERIALIZATION_ID = {value: key for key, value in SERIALIZATION_NAME.items()}
COMPRESSION_NAME = {COMP_NONE: "none", COMP_ZLIB: "zlib", COMP_GZIP: "gzip", COMP_ZSTD: "zstd"}
COMPRESSION_ID = {value: key for key, value in COMPRESSION_NAME.items()}
JSON_FIRST_BYTES = frozenset(b"{[")
def msgpack_available() -> bool:
return _msgpack is not None
def zstd_available() -> bool:
return _zstd is not None
def supported_serialization() -> list[str]:
"""Serializations this build can produce/consume, best first."""
return (["msgpack"] if _msgpack is not None else []) + ["json"]
def supported_compression() -> list[str]:
"""Compression codecs this build can produce/consume, best first."""
return (["zstd"] if _zstd is not None else []) + ["gzip", "zlib"]
def client_accept_encoding() -> dict:
"""What the local client advertises it can decode (sent with each request)."""
return {"ser": supported_serialization(), "comp": supported_compression()}
def compress_payload(comp_id: int, data: bytes) -> bytes:
if comp_id == COMP_NONE:
return data
if comp_id == COMP_ZLIB:
return zlib.compress(data, 6)
if comp_id == COMP_GZIP:
return gzip.compress(data, compresslevel=6)
if comp_id == COMP_ZSTD:
if _zstd is None:
raise ValueError("zstd compression requested but zstandard is not installed")
return _zstd.ZstdCompressor(level=10).compress(data)
raise ValueError(f"unknown compression id {comp_id}")
def decompress_payload(comp_id: int, data: bytes) -> bytes:
if comp_id == COMP_NONE:
return data
if comp_id == COMP_ZLIB:
return zlib.decompress(data)
if comp_id == COMP_GZIP:
return gzip.decompress(data)
if comp_id == COMP_ZSTD:
if _zstd is None:
raise ValueError("zstd payload received but zstandard is not installed")
return _zstd.ZstdDecompressor().decompress(data)
raise ValueError(f"unknown compression id {comp_id}")
def choose_codec(accept: dict | None) -> tuple[int, int]:
"""Pick (serialization_id, compression_id) the peer accepts, server preference first."""
accept = accept if isinstance(accept, dict) else {}
accept_ser = accept.get("ser") or ["json"]
accept_comp = accept.get("comp") or []
serialization = SER_JSON
if _msgpack is not None and "msgpack" in accept_ser:
serialization = SER_MSGPACK
compression = COMP_NONE
for name in supported_compression(): # server preference: zstd > gzip > zlib
if name in accept_comp:
compression = COMPRESSION_ID[name]
break
return serialization, compression
+1 -1
View File
@@ -1,7 +1,7 @@
{
"manifest_version": 3,
"name": "browser-cli",
"version": "0.15.2",
"version": "0.15.3",
"description": "Control your browser from the terminal or Python SDK",
"browser_specific_settings": {
"gecko": {
-404
View File
@@ -1,404 +0,0 @@
import type { ContentArgs } from '../types';
export function extractMarkdown({ selector }: ContentArgs) {
const BLOCKS = new Set([
"article", "aside", "blockquote", "body", "div", "dl", "fieldset", "figcaption",
"figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr",
"li", "main", "nav", "ol", "p", "pre", "section", "table", "tbody", "td", "tfoot",
"th", "thead", "tr", "ul"
]);
const NOISE_SELECTOR = [
"script",
"style",
"noscript",
"template",
"svg",
"canvas",
"iframe",
"dialog",
"button",
"input",
"textarea",
"select",
"option",
"form",
"[hidden]",
"[aria-hidden='true']",
".sr-only",
"[class*='sr-only']",
"[class*='file-tile']",
"form[data-type='unified-composer']",
".composer-btn",
"[data-composer-surface='true']",
"#thread-bottom-container",
"[data-testid*='action-button']",
].join(", ");
function normalizeText(value: string) {
return value.replace(/\s+/g, " ").trim();
}
function normalizeInline(value: string) {
return value
.replace(/[ \t]+\n/g, "\n")
.replace(/\n[ \t]+/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.replace(/[ \t]{2,}/g, " ")
.trim();
}
function collapseBlankLines(value: string) {
return value
.replace(/[ \t]+\n/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.trim();
}
function escapeMarkdown(text: string) {
return text.replace(/([\\`[\]])/g, "\\$1");
}
function escapeTableCell(text: string) {
return text.replace(/\|/g, "\\|").replace(/\n+/g, " ").trim();
}
function absoluteUrl(attr: string | null | undefined, fallback?: string) {
return attr || fallback || "";
}
function isNoiseElement(node: Node | null): boolean {
if (!node || node.nodeType !== Node.ELEMENT_NODE) return false;
const el = node as Element;
const tag = el.tagName.toLowerCase();
if (["script", "style", "noscript", "template", "svg", "canvas", "iframe", "dialog"].includes(tag)) return true;
if (["button", "input", "textarea", "select", "option", "form"].includes(tag)) return true;
if (el.hasAttribute("hidden")) return true;
if ((el.getAttribute("aria-hidden") || "").toLowerCase() === "true") return true;
if (el.matches(".sr-only, [class*='sr-only']")) return true;
if (el.matches("[class*='file-tile'], form[data-type='unified-composer'], .composer-btn, [data-composer-surface='true'], #thread-bottom-container")) return true;
if (el.matches("[data-testid*='action-button']")) return true;
return false;
}
function stripNoise(root: Element): Element {
const clone = root.cloneNode(true) as Element;
clone.querySelectorAll(NOISE_SELECTOR).forEach(node => node.remove());
return clone;
}
function candidateScore(node: Element) {
const text = normalizeText((node as HTMLElement).innerText || "");
if (!text) return -Infinity;
const headings = node.querySelectorAll("h1, h2, h3, h4, h5, h6").length;
const paragraphs = node.querySelectorAll("p").length;
const listItems = node.querySelectorAll("li").length;
const tables = node.querySelectorAll("table").length;
const codeBlocks = node.querySelectorAll("pre, code").length;
const images = node.querySelectorAll("img, figure").length;
const mainLike = node.matches("main, article, [role='main']") ? 1 : 0;
const proseBlocks = node.matches(".markdown, .prose, [data-message-author-role='assistant']") ? 1 : 0;
const buttons = node.querySelectorAll("button, input, textarea, select").length;
const forms = node.querySelectorAll("form").length;
const svgs = node.querySelectorAll("svg, canvas").length;
return text.length
+ (mainLike * 4000)
+ (proseBlocks * 5000)
+ (headings * 250)
+ (paragraphs * 60)
+ (listItems * 35)
+ (tables * 80)
+ (codeBlocks * 60)
+ (images * 25)
- (buttons * 120)
- (forms * 200)
- (svgs * 40);
}
function pickRoot() {
if (selector) {
const matched = document.querySelector(selector);
if (!matched) throw new Error(`No element: ${selector}`);
return matched;
}
const candidates = Array.from(document.querySelectorAll(
"main, article, [role='main'], section, .markdown, .prose, [data-message-author-role]"
))
.filter(node => normalizeText((node as HTMLElement).innerText || "").length > 0);
if (!candidates.length) return document.body;
candidates.sort((a, b) => candidateScore(b) - candidateScore(a));
return candidates[0];
}
function inlineText(node: Node): string {
if (node.nodeType === Node.TEXT_NODE) {
return escapeMarkdown(node.textContent || "");
}
if (node.nodeType !== Node.ELEMENT_NODE) return "";
if (isNoiseElement(node)) return "";
const el = node as HTMLElement;
const tag = el.tagName.toLowerCase();
if (tag === "br") return "\n";
if (tag === "img") {
const img = el as HTMLImageElement;
const src = absoluteUrl(img.getAttribute("src"), img.src);
if (!src) return "";
const alt = normalizeText(img.getAttribute("alt") || "");
return alt ? `![${escapeMarkdown(alt)}](${src})` : `![](${src})`;
}
if (tag === "a") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
const href = absoluteUrl(el.getAttribute("href"), (el as HTMLAnchorElement).href);
if (!href) return text;
return `[${text || href}](${href})`;
}
if (tag === "code") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
return text ? `\`${text.replace(/`/g, "\\`")}\`` : "";
}
if (tag === "strong" || tag === "b") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
return text ? `**${text}**` : "";
}
if (tag === "em" || tag === "i") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
return text ? `*${text}*` : "";
}
const chunks: string[] = [];
for (const child of el.childNodes) {
const rendered = inlineText(child);
if (!rendered) continue;
chunks.push(rendered);
if (child.nodeType === Node.ELEMENT_NODE && BLOCKS.has((child as Element).tagName.toLowerCase())) {
chunks.push("\n");
}
}
return chunks.join("");
}
function textBlock(node: Node): string {
return collapseBlankLines(normalizeInline(Array.from(node.childNodes).map(inlineText).join("")));
}
function preserveNodeText(node: Node): string {
if (node.nodeType === Node.TEXT_NODE) {
return node.textContent || "";
}
if (node.nodeType !== Node.ELEMENT_NODE) return "";
const el = node as HTMLElement;
const tag = el.tagName.toLowerCase();
if (tag === "br") return "\n";
const parts: string[] = [];
for (const child of el.childNodes) {
const rendered = preserveNodeText(child);
if (!rendered) continue;
parts.push(rendered);
}
if (["div", "p", "li"].includes(tag)) {
return `${parts.join("")}\n`;
}
return parts.join("");
}
function repairFlattenedDiagram(text: string): string {
if (text.includes("\n")) return text;
const markerCount = (text.match(/[│▼├└]/g) || []).length;
if (markerCount < 2) return text;
let repaired = text;
repaired = repaired.replace(/\s{2,}([│▼])/g, "\n $1");
repaired = repaired.replace(/([│▼])\s{2,}/g, "$1\n");
repaired = repaired.replace(/([│▼])(?=[^\s\n│▼├└])/g, "$1\n");
repaired = repaired.replace(/(?<=[^\s\n])([├└])/g, "\n$1");
repaired = repaired.replace(/([^\s\n])(\()/g, "$1\n$2");
return repaired
.split("\n")
.map(line => line.replace(/\s+$/, ""))
.filter(line => line.trim())
.join("\n");
}
function convertDashListsToBranches(lines: string[]): string[] {
const converted: string[] = [];
let index = 0;
while (index < lines.length) {
const match = lines[index].match(/^(\s*)-\s+(.*)$/);
if (!match) {
converted.push(lines[index]);
index += 1;
continue;
}
const indent = match[1];
const items = [];
while (index < lines.length) {
const nextMatch = lines[index].match(new RegExp(`^${indent.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}-\\s+(.*)$`));
if (!nextMatch) break;
items.push(nextMatch[1]);
index += 1;
}
items.forEach((item, itemIndex) => {
const branch = itemIndex === items.length - 1 ? "└" : "├";
converted.push(`${indent}${branch} ${item}`);
});
}
return converted;
}
function normalizeCodeBlock(text: string): string {
let lines = text.replace(/\r\n?/g, "\n").split("\n").map(line => line.replace(/\s+$/, ""));
while (lines.length && !lines[0].trim()) lines.shift();
while (lines.length && !lines[lines.length - 1].trim()) lines.pop();
const flattened = repairFlattenedDiagram(lines.join("\n"));
lines = flattened ? flattened.split("\n") : [];
lines = lines.map(line => {
const trimmed = line.trim();
if ((trimmed === "│" || trimmed === "▼") && !/^\s+[│▼]\s*$/.test(line)) {
return ` ${trimmed}`;
}
return line;
});
lines = convertDashListsToBranches(lines);
return lines.join("\n");
}
function tableToMarkdown(table: Element) {
const rows = Array.from(table.querySelectorAll("tr"))
.map(row => Array.from(row.children)
.filter(cell => cell.tagName === "TD" || cell.tagName === "TH")
.map(cell => escapeTableCell(textBlock(cell)))
)
.filter(cells => cells.length > 0);
if (!rows.length) return "";
const widths = rows.reduce((max, row) => Math.max(max, row.length), 0);
const normalizedRows = rows.map(row => {
const next = row.slice();
while (next.length < widths) next.push("");
return next;
});
let headers = normalizedRows[0];
let bodyRows = normalizedRows.slice(1);
const firstRowIsBlank = headers.every(cell => !cell.trim());
if (firstRowIsBlank && normalizedRows.length > 1) {
headers = normalizedRows[1];
bodyRows = normalizedRows.slice(2);
}
const firstRow = table.querySelector("tr");
const thead = table.querySelector("thead");
const firstRowHasTh = firstRow && Array.from(firstRow.children).some(cell => cell.tagName === "TH");
if (!(thead || firstRowHasTh || firstRowIsBlank)) {
headers = new Array(widths).fill("");
bodyRows = normalizedRows;
}
const separator = new Array(widths).fill("---");
const lines = [
`| ${headers.join(" | ")} |`,
`| ${separator.join(" | ")} |`,
];
for (const row of bodyRows) {
lines.push(`| ${row.join(" | ")} |`);
}
return lines.join("\n");
}
function listToMarkdown(list: Element, depth = 0): string {
const ordered = list.tagName.toLowerCase() === "ol";
const items: string[] = [];
const children = Array.from(list.children).filter(child => child.tagName === "LI");
children.forEach((item, index) => {
const marker = ordered ? `${index + 1}. ` : "- ";
const indent = " ".repeat(depth);
const nested: string[] = [];
const content: string[] = [];
for (const child of item.childNodes) {
const childEl = child as Element;
if (child.nodeType === Node.ELEMENT_NODE && (childEl.tagName === "UL" || childEl.tagName === "OL")) {
nested.push(listToMarkdown(childEl, depth + 1));
} else {
content.push(inlineText(child));
}
}
const line = collapseBlankLines(normalizeInline(content.join("")));
if (line) {
const lineParts = line.split("\n");
items.push(`${indent}${marker}${lineParts[0]}`);
const continuationIndent = `${indent}${" ".repeat(marker.length)}`;
lineParts.slice(1).forEach(part => items.push(`${continuationIndent}${part}`));
}
nested.filter(Boolean).forEach(block => items.push(block));
});
return items.join("\n");
}
function blockToMarkdown(node: Node): string {
if (node.nodeType === Node.TEXT_NODE) {
return normalizeText(node.textContent || "");
}
if (node.nodeType !== Node.ELEMENT_NODE) return "";
if (isNoiseElement(node)) return "";
const el = node as HTMLElement;
const tag = el.tagName.toLowerCase();
if (tag === "table") return tableToMarkdown(el);
if (tag === "ul" || tag === "ol") return listToMarkdown(el);
if (el.matches(".cm-editor[data-is-code-block-view='true']")) {
const lines = Array.from(el.querySelectorAll(".cm-line")).map(line => {
const text = preserveNodeText(line);
return text === "\n" ? "" : text.replace(/\n$/, "");
});
const code = normalizeCodeBlock(lines.join("\n"));
return code ? `\`\`\`\n${code}\n\`\`\`` : "";
}
if (tag === "pre") {
const code = normalizeCodeBlock(preserveNodeText(el));
return code ? `\`\`\`\n${code}\n\`\`\`` : "";
}
if (tag === "blockquote") {
const content = collapseBlankLines(Array.from(el.childNodes).map(blockToMarkdown).join("\n\n"));
return content
.split("\n")
.map(line => line ? `> ${line}` : ">")
.join("\n");
}
if (/^h[1-6]$/.test(tag)) {
const level = Number(tag.slice(1));
const text = textBlock(el);
return text ? `${"#".repeat(level)} ${text}` : "";
}
if (tag === "p" || tag === "figcaption") {
return textBlock(el);
}
if (tag === "hr") {
return "---";
}
if (tag === "img") {
return inlineText(el);
}
const childBlocks = Array.from(el.childNodes)
.map(child => blockToMarkdown(child))
.filter(Boolean);
if (childBlocks.length) return collapseBlankLines(childBlocks.join("\n\n"));
return textBlock(node);
}
const root = stripNoise(pickRoot());
const markdown = blockToMarkdown(root);
return collapseBlankLines(markdown);
}
+63
View File
@@ -0,0 +1,63 @@
function repairFlattenedDiagram(text: string): string {
if (text.includes("\n")) return text;
const markerCount = (text.match(/[│▼├└]/g) || []).length;
if (markerCount < 2) return text;
let repaired = text;
repaired = repaired.replace(/\s{2,}([│▼])/g, "\n $1");
repaired = repaired.replace(/([│▼])\s{2,}/g, "$1\n");
repaired = repaired.replace(/([│▼])(?=[^\s\n│▼├└])/g, "$1\n");
repaired = repaired.replace(/(?<=[^\s\n])([├└])/g, "\n$1");
repaired = repaired.replace(/([^\s\n])(\()/g, "$1\n$2");
return repaired
.split("\n")
.map(line => line.replace(/\s+$/, ""))
.filter(line => line.trim())
.join("\n");
}
function convertDashListsToBranches(lines: string[]): string[] {
const converted: string[] = [];
let index = 0;
while (index < lines.length) {
const match = lines[index].match(/^(\s*)-\s+(.*)$/);
if (!match) {
converted.push(lines[index]);
index += 1;
continue;
}
const indent = match[1];
const items = [];
while (index < lines.length) {
const nextMatch = lines[index].match(new RegExp(`^${indent.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}-\\s+(.*)$`));
if (!nextMatch) break;
items.push(nextMatch[1]);
index += 1;
}
items.forEach((item, itemIndex) => {
const branch = itemIndex === items.length - 1 ? "└" : "├";
converted.push(`${indent}${branch} ${item}`);
});
}
return converted;
}
export function normalizeCodeBlock(text: string): string {
let lines = text.replace(/\r\n?/g, "\n").split("\n").map(line => line.replace(/\s+$/, ""));
while (lines.length && !lines[0].trim()) lines.shift();
while (lines.length && !lines[lines.length - 1].trim()) lines.pop();
const flattened = repairFlattenedDiagram(lines.join("\n"));
lines = flattened ? flattened.split("\n") : [];
lines = lines.map(line => {
const trimmed = line.trim();
if ((trimmed === "│" || trimmed === "▼") && !/^\s+[│▼]\s*$/.test(line)) {
return ` ${trimmed}`;
}
return line;
});
lines = convertDashListsToBranches(lines);
return lines.join("\n");
}
+9
View File
@@ -0,0 +1,9 @@
import type { ContentArgs } from '../../types';
import { pickMarkdownRoot } from './root';
import { renderMarkdown } from './renderer';
import { stripNoise } from './utils';
export function extractMarkdown({ selector }: ContentArgs) {
const root = stripNoise(pickMarkdownRoot(selector));
return renderMarkdown(root);
}
+217
View File
@@ -0,0 +1,217 @@
import { normalizeCodeBlock } from './code';
import {
absoluteUrl,
BLOCK_TAGS,
collapseBlankLines,
escapeMarkdown,
escapeTableCell,
isNoiseElement,
normalizeInline,
normalizeText,
} from './utils';
function inlineText(node: Node): string {
if (node.nodeType === Node.TEXT_NODE) {
return escapeMarkdown(node.textContent || "");
}
if (node.nodeType !== Node.ELEMENT_NODE) return "";
if (isNoiseElement(node)) return "";
const el = node as HTMLElement;
const tag = el.tagName.toLowerCase();
if (tag === "br") return "\n";
if (tag === "img") {
const img = el as HTMLImageElement;
const src = absoluteUrl(img.getAttribute("src"), img.src);
if (!src) return "";
const alt = normalizeText(img.getAttribute("alt") || "");
return alt ? `![${escapeMarkdown(alt)}](${src})` : `![](${src})`;
}
if (tag === "a") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
const href = absoluteUrl(el.getAttribute("href"), (el as HTMLAnchorElement).href);
if (!href) return text;
return `[${text || href}](${href})`;
}
if (tag === "code") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
return text ? `\`${text.replace(/`/g, "\\`")}\`` : "";
}
if (tag === "strong" || tag === "b") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
return text ? `**${text}**` : "";
}
if (tag === "em" || tag === "i") {
const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join(""));
return text ? `*${text}*` : "";
}
const chunks: string[] = [];
for (const child of el.childNodes) {
const rendered = inlineText(child);
if (!rendered) continue;
chunks.push(rendered);
if (child.nodeType === Node.ELEMENT_NODE && BLOCK_TAGS.has((child as Element).tagName.toLowerCase())) {
chunks.push("\n");
}
}
return chunks.join("");
}
function textBlock(node: Node): string {
return collapseBlankLines(normalizeInline(Array.from(node.childNodes).map(inlineText).join("")));
}
function preserveNodeText(node: Node): string {
if (node.nodeType === Node.TEXT_NODE) {
return node.textContent || "";
}
if (node.nodeType !== Node.ELEMENT_NODE) return "";
const el = node as HTMLElement;
const tag = el.tagName.toLowerCase();
if (tag === "br") return "\n";
const parts: string[] = [];
for (const child of el.childNodes) {
const rendered = preserveNodeText(child);
if (!rendered) continue;
parts.push(rendered);
}
if (["div", "p", "li"].includes(tag)) {
return `${parts.join("")}\n`;
}
return parts.join("");
}
function tableToMarkdown(table: Element) {
const rows = Array.from(table.querySelectorAll("tr"))
.map(row => Array.from(row.children)
.filter(cell => cell.tagName === "TD" || cell.tagName === "TH")
.map(cell => escapeTableCell(textBlock(cell)))
)
.filter(cells => cells.length > 0);
if (!rows.length) return "";
const widths = rows.reduce((max, row) => Math.max(max, row.length), 0);
const normalizedRows = rows.map(row => {
const next = row.slice();
while (next.length < widths) next.push("");
return next;
});
let headers = normalizedRows[0];
let bodyRows = normalizedRows.slice(1);
const firstRowIsBlank = headers.every(cell => !cell.trim());
if (firstRowIsBlank && normalizedRows.length > 1) {
headers = normalizedRows[1];
bodyRows = normalizedRows.slice(2);
}
const firstRow = table.querySelector("tr");
const thead = table.querySelector("thead");
const firstRowHasTh = firstRow && Array.from(firstRow.children).some(cell => cell.tagName === "TH");
if (!(thead || firstRowHasTh || firstRowIsBlank)) {
headers = new Array(widths).fill("");
bodyRows = normalizedRows;
}
const separator = new Array(widths).fill("---");
const lines = [
`| ${headers.join(" | ")} |`,
`| ${separator.join(" | ")} |`,
];
for (const row of bodyRows) {
lines.push(`| ${row.join(" | ")} |`);
}
return lines.join("\n");
}
function listToMarkdown(list: Element, depth = 0): string {
const ordered = list.tagName.toLowerCase() === "ol";
const items: string[] = [];
const children = Array.from(list.children).filter(child => child.tagName === "LI");
children.forEach((item, index) => {
const marker = ordered ? `${index + 1}. ` : "- ";
const indent = " ".repeat(depth);
const nested: string[] = [];
const content: string[] = [];
for (const child of item.childNodes) {
const childEl = child as Element;
if (child.nodeType === Node.ELEMENT_NODE && (childEl.tagName === "UL" || childEl.tagName === "OL")) {
nested.push(listToMarkdown(childEl, depth + 1));
} else {
content.push(inlineText(child));
}
}
const line = collapseBlankLines(normalizeInline(content.join("")));
if (line) {
const lineParts = line.split("\n");
items.push(`${indent}${marker}${lineParts[0]}`);
const continuationIndent = `${indent}${" ".repeat(marker.length)}`;
lineParts.slice(1).forEach(part => items.push(`${continuationIndent}${part}`));
}
nested.filter(Boolean).forEach(block => items.push(block));
});
return items.join("\n");
}
function blockToMarkdown(node: Node): string {
if (node.nodeType === Node.TEXT_NODE) {
return normalizeText(node.textContent || "");
}
if (node.nodeType !== Node.ELEMENT_NODE) return "";
if (isNoiseElement(node)) return "";
const el = node as HTMLElement;
const tag = el.tagName.toLowerCase();
if (tag === "table") return tableToMarkdown(el);
if (tag === "ul" || tag === "ol") return listToMarkdown(el);
if (el.matches(".cm-editor[data-is-code-block-view='true']")) {
const lines = Array.from(el.querySelectorAll(".cm-line")).map(line => {
const text = preserveNodeText(line);
return text === "\n" ? "" : text.replace(/\n$/, "");
});
const code = normalizeCodeBlock(lines.join("\n"));
return code ? `\`\`\`\n${code}\n\`\`\`` : "";
}
if (tag === "pre") {
const code = normalizeCodeBlock(preserveNodeText(el));
return code ? `\`\`\`\n${code}\n\`\`\`` : "";
}
if (tag === "blockquote") {
const content = collapseBlankLines(Array.from(el.childNodes).map(blockToMarkdown).join("\n\n"));
return content
.split("\n")
.map(line => line ? `> ${line}` : ">")
.join("\n");
}
if (/^h[1-6]$/.test(tag)) {
const level = Number(tag.slice(1));
const text = textBlock(el);
return text ? `${"#".repeat(level)} ${text}` : "";
}
if (tag === "p" || tag === "figcaption") {
return textBlock(el);
}
if (tag === "hr") {
return "---";
}
if (tag === "img") {
return inlineText(el);
}
const childBlocks = Array.from(el.childNodes)
.map(child => blockToMarkdown(child))
.filter(Boolean);
if (childBlocks.length) return collapseBlankLines(childBlocks.join("\n\n"));
return textBlock(node);
}
export function renderMarkdown(root: Element): string {
return collapseBlankLines(blockToMarkdown(root));
}
+47
View File
@@ -0,0 +1,47 @@
import { normalizeText } from './utils';
function candidateScore(node: Element) {
const text = normalizeText((node as HTMLElement).innerText || "");
if (!text) return -Infinity;
const headings = node.querySelectorAll("h1, h2, h3, h4, h5, h6").length;
const paragraphs = node.querySelectorAll("p").length;
const listItems = node.querySelectorAll("li").length;
const tables = node.querySelectorAll("table").length;
const codeBlocks = node.querySelectorAll("pre, code").length;
const images = node.querySelectorAll("img, figure").length;
const mainLike = node.matches("main, article, [role='main']") ? 1 : 0;
const proseBlocks = node.matches(".markdown, .prose, [data-message-author-role='assistant']") ? 1 : 0;
const buttons = node.querySelectorAll("button, input, textarea, select").length;
const forms = node.querySelectorAll("form").length;
const svgs = node.querySelectorAll("svg, canvas").length;
return text.length
+ (mainLike * 4000)
+ (proseBlocks * 5000)
+ (headings * 250)
+ (paragraphs * 60)
+ (listItems * 35)
+ (tables * 80)
+ (codeBlocks * 60)
+ (images * 25)
- (buttons * 120)
- (forms * 200)
- (svgs * 40);
}
export function pickMarkdownRoot(selector?: string) {
if (selector) {
const matched = document.querySelector(selector);
if (!matched) throw new Error(`No element: ${selector}`);
return matched;
}
const candidates = Array.from(document.querySelectorAll(
"main, article, [role='main'], section, .markdown, .prose, [data-message-author-role]"
))
.filter(node => normalizeText((node as HTMLElement).innerText || "").length > 0);
if (!candidates.length) return document.body;
candidates.sort((a, b) => candidateScore(b) - candidateScore(a));
return candidates[0];
}
+85
View File
@@ -0,0 +1,85 @@
export const BLOCK_TAGS = new Set([
"article", "aside", "blockquote", "body", "div", "dl", "fieldset", "figcaption",
"figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr",
"li", "main", "nav", "ol", "p", "pre", "section", "table", "tbody", "td", "tfoot",
"th", "thead", "tr", "ul"
]);
export const NOISE_SELECTOR = [
"script",
"style",
"noscript",
"template",
"svg",
"canvas",
"iframe",
"dialog",
"button",
"input",
"textarea",
"select",
"option",
"form",
"[hidden]",
"[aria-hidden='true']",
".sr-only",
"[class*='sr-only']",
"[class*='file-tile']",
"form[data-type='unified-composer']",
".composer-btn",
"[data-composer-surface='true']",
"#thread-bottom-container",
"[data-testid*='action-button']",
].join(", ");
export function normalizeText(value: string) {
return value.replace(/\s+/g, " ").trim();
}
export function normalizeInline(value: string) {
return value
.replace(/[ \t]+\n/g, "\n")
.replace(/\n[ \t]+/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.replace(/[ \t]{2,}/g, " ")
.trim();
}
export function collapseBlankLines(value: string) {
return value
.replace(/[ \t]+\n/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.trim();
}
export function escapeMarkdown(text: string) {
return text.replace(/([\\`[\]])/g, "\\$1");
}
export function escapeTableCell(text: string) {
return text.replace(/\|/g, "\\|").replace(/\n+/g, " ").trim();
}
export function absoluteUrl(attr: string | null | undefined, fallback?: string) {
return attr || fallback || "";
}
export function isNoiseElement(node: Node | null): boolean {
if (!node || node.nodeType !== Node.ELEMENT_NODE) return false;
const el = node as Element;
const tag = el.tagName.toLowerCase();
if (["script", "style", "noscript", "template", "svg", "canvas", "iframe", "dialog"].includes(tag)) return true;
if (["button", "input", "textarea", "select", "option", "form"].includes(tag)) return true;
if (el.hasAttribute("hidden")) return true;
if ((el.getAttribute("aria-hidden") || "").toLowerCase() === "true") return true;
if (el.matches(".sr-only, [class*='sr-only']")) return true;
if (el.matches("[class*='file-tile'], form[data-type='unified-composer'], .composer-btn, [data-composer-surface='true'], #thread-bottom-container")) return true;
if (el.matches("[data-testid*='action-button']")) return true;
return false;
}
export function stripNoise(root: Element): Element {
const clone = root.cloneNode(true) as Element;
clone.querySelectorAll(NOISE_SELECTOR).forEach(node => node.remove());
return clone;
}
+1 -1
View File
@@ -1,6 +1,6 @@
[project]
name = "real-browser-cli"
version = "0.15.2"
version = "0.15.3"
description = "Control your real running browser from the terminal or Python SDK"
readme = "README.md"
license = { file = "LICENSE" }
+31
View File
@@ -3,6 +3,7 @@ from os import terminal_size
from rich.console import Console
from rich.tree import Tree
from browser_cli.models import Tab
from browser_cli.commands import rendering
def test_shorten_uses_ellipsis():
@@ -32,3 +33,33 @@ def test_print_tree_uses_detected_width(monkeypatch):
monkeypatch.setattr(rendering, "terminal_width", lambda console=None: 132)
rendering.print_tree(Tree("Root"))
assert widths == [132]
def test_build_tabs_tree_groups_by_browser_window_and_group():
tabs = [
Tab(id=1, window_id=5, active=False, muted=False, title="Before", url="https://example.com/before", group_id=None, index=0, browser="work"),
Tab(id=2, window_id=5, active=False, muted=False, title="Inside", url="https://example.com/inside", group_id=9, index=1, browser="work"),
]
groups = [{"id": 9, "windowId": 5, "browser": "work", "title": "Research", "color": "blue", "tabCount": 1, "collapsed": True}]
tree = rendering.build_tabs_tree(tabs, groups, console=Console(width=120), show_urls=True)
text = "\n".join(str(line) for line in tree.__rich_console__(Console(width=120), Console(width=120).options))
assert "work" in text
assert "Window 5" in text
assert "Research" in text
assert "collapsed" in text
assert "Inside" in text
def test_build_windows_tree_keeps_multi_browser_windows_separate():
tabs = [
Tab(id=1, window_id=5, active=False, muted=False, title="Work Tab", url="https://example.com/work", index=0, browser="work"),
Tab(id=2, window_id=5, active=False, muted=False, title="Personal Tab", url="https://example.com/personal", index=0, browser="personal"),
]
windows = [
{"id": 5, "alias": "main", "browser": "work", "tabCount": 1, "state": "normal"},
{"id": 5, "alias": "main", "browser": "personal", "tabCount": 1, "state": "normal"},
]
tree = rendering.build_windows_tree(windows, tabs, console=Console(width=120))
text = "\n".join(str(line) for line in tree.__rich_console__(Console(width=120), Console(width=120).options))
assert "work: Window 5" in text
assert "personal: Window 5" in text
assert "Work Tab" in text
assert "Personal Tab" in text
Generated
+1 -1
View File
@@ -465,7 +465,7 @@ wheels = [
[[package]]
name = "real-browser-cli"
version = "0.15.2"
version = "0.15.3"
source = { editable = "." }
dependencies = [
{ name = "click" },