From 7cb2a8b618e2c71689c3be610e5c5c13aaf2034b Mon Sep 17 00:00:00 2001 From: Daniel Dolezal Date: Mon, 15 Jun 2026 01:23:57 +0200 Subject: [PATCH] refactor: modularize auth transport and markdown - Split auth into focused package modules for agent keys, file keys, signing, and post-quantum transport helpers while keeping the public browser_cli.auth import surface intact. - Move transport encoding internals into a package with separate codec and binary-hoisting helpers, preserving browser_cli.transport compatibility. - Extract remote TCP auth/socket helpers and serve challenge setup out of the runtime paths to make connection handling easier to reason about. - Move the extension markdown extractor into a dedicated content/markdown folder with separate root selection, code normalization, renderer, and utils. - Centralize CLI Rich rendering helpers for tab/window tree and table output, and add rendering tests for the shared builders. - Remove local typing ignores in SDK/decorator/script plumbing and bump the package and extension version to 0.15.3. --- browser_cli/async_sdk.py | 10 +- browser_cli/auth.py | 263 -------------- browser_cli/auth/__init__.py | 67 ++++ browser_cli/auth/agent.py | 103 ++++++ browser_cli/auth/keys.py | 59 +++ browser_cli/auth/pq.py | 65 ++++ browser_cli/auth/signing.py | 42 +++ browser_cli/commands/rendering.py | 144 +++++++- browser_cli/commands/script.py | 86 ++--- browser_cli/commands/tabs.py | 98 +---- browser_cli/commands/windows.py | 48 +-- browser_cli/markdown/html.py | 20 - browser_cli/remote/auth.py | 145 ++++++++ browser_cli/remote/socket.py | 52 +++ browser_cli/remote/transport.py | 227 ++---------- browser_cli/sdk/base.py | 6 +- browser_cli/sdk/decorators.py | 10 +- browser_cli/sdk/workflow_decorators.py | 49 ++- browser_cli/serve/challenge.py | 31 ++ browser_cli/serve/runtime.py | 26 +- browser_cli/transport.py | 214 ----------- browser_cli/transport/__init__.py | 72 ++++ browser_cli/transport/binary.py | 44 +++ browser_cli/transport/codecs.py | 84 +++++ extension/manifest.json | 2 +- extension/src/content/markdown.ts | 404 --------------------- extension/src/content/markdown/code.ts | 63 ++++ extension/src/content/markdown/index.ts | 9 + extension/src/content/markdown/renderer.ts | 217 +++++++++++ extension/src/content/markdown/root.ts | 47 +++ extension/src/content/markdown/utils.ts | 85 +++++ pyproject.toml | 2 +- tests/test_rendering.py | 31 ++ uv.lock | 2 +- 34 files changed, 1502 insertions(+), 1325 deletions(-) delete mode 100644 browser_cli/auth.py create mode 100644 browser_cli/auth/__init__.py create mode 100644 browser_cli/auth/agent.py create mode 100644 browser_cli/auth/keys.py create mode 100644 browser_cli/auth/pq.py create mode 100644 browser_cli/auth/signing.py create mode 100644 browser_cli/remote/auth.py create mode 100644 browser_cli/remote/socket.py create mode 100644 browser_cli/serve/challenge.py delete mode 100644 browser_cli/transport.py create mode 100644 browser_cli/transport/__init__.py create mode 100644 browser_cli/transport/binary.py create mode 100644 browser_cli/transport/codecs.py delete mode 100644 extension/src/content/markdown.ts create mode 100644 extension/src/content/markdown/code.ts create mode 100644 extension/src/content/markdown/index.ts create mode 100644 extension/src/content/markdown/renderer.ts create mode 100644 extension/src/content/markdown/root.ts create mode 100644 extension/src/content/markdown/utils.ts diff --git a/browser_cli/async_sdk.py b/browser_cli/async_sdk.py index 175fede..b84cdf9 100644 --- a/browser_cli/async_sdk.py +++ b/browser_cli/async_sdk.py @@ -12,7 +12,7 @@ from __future__ import annotations import asyncio import functools from collections.abc import Callable -from typing import TypeVar +from typing import TypeVar, cast from browser_cli.models import Group, Tab from browser_cli.sdk import NAMESPACE_NAMES @@ -74,7 +74,7 @@ class AsyncDecoratorsNS(WorkflowDecoratorsMixin): finally: if cleanup is not None: await self._maybe_await(cleanup(value)) - return wrapper # type: ignore[return-value] + return cast(F, wrapper) return decorator(func) if func is not None else decorator def new_tab( @@ -117,7 +117,7 @@ class AsyncDecoratorsNS(WorkflowDecoratorsMixin): finally: if previous: await self._c.perf.set_profile(previous) - return wrapper # type: ignore[return-value] + return cast(F, wrapper) return decorator def retry( @@ -142,8 +142,8 @@ class AsyncDecoratorsNS(WorkflowDecoratorsMixin): raise if delay > 0: await asyncio.sleep(delay) - raise last_error # type: ignore[misc] - return wrapper # type: ignore[return-value] + raise cast(BaseException, last_error) + return cast(F, wrapper) return decorator class AsyncBrowserCLI: diff --git a/browser_cli/auth.py b/browser_cli/auth.py deleted file mode 100644 index 494599b..0000000 --- a/browser_cli/auth.py +++ /dev/null @@ -1,263 +0,0 @@ -"""Ed25519 keypair management, ML-KEM key exchange, and auth helpers.""" -import hashlib -import json -import os -import secrets -import socket -import struct -from dataclasses import dataclass -from pathlib import Path - -from cryptography.exceptions import InvalidSignature -from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey, Ed25519PublicKey -from cryptography.hazmat.primitives import hashes -from cryptography.hazmat.primitives.ciphers.aead import ChaCha20Poly1305 -from cryptography.hazmat.primitives.kdf.hkdf import HKDF -from cryptography.hazmat.primitives.serialization import ( - Encoding, - NoEncryption, - PrivateFormat, - PublicFormat, - load_pem_private_key, -) - -from browser_cli.constants import ( - DEFAULT_AUTHORIZED_KEYS_PATH, - DEFAULT_KEY_PATH, - PQ_KEX_ALG, - PQ_TRANSPORT_ALG, - SSH_AGENT_IDENTITIES_ANSWER, - SSH_AGENT_SIGN_RESPONSE, - SSH_AGENTC_REQUEST_IDENTITIES, - SSH_AGENTC_SIGN_REQUEST, -) - -def _pack_str(s: bytes) -> bytes: - return struct.pack(">I", len(s)) + s - -def _unpack_str(data: bytes, off: int) -> tuple[bytes, int]: - n = struct.unpack_from(">I", data, off)[0] - return data[off + 4 : off + 4 + n], off + 4 + n - -def _agent_roundtrip(msg: bytes) -> bytes: - sock_path = os.environ.get("SSH_AUTH_SOCK") - if not sock_path: - raise RuntimeError("SSH_AUTH_SOCK not set — is gpg-agent / ssh-agent running?") - with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as sock: - sock.settimeout(10) - sock.connect(sock_path) - sock.sendall(struct.pack(">I", len(msg)) + msg) - raw_len = b"" - while len(raw_len) < 4: - chunk = sock.recv(4 - len(raw_len)) - if not chunk: - raise RuntimeError("SSH agent closed connection") - raw_len += chunk - n = struct.unpack(">I", raw_len)[0] - resp = b"" - while len(resp) < n: - chunk = sock.recv(n - len(resp)) - if not chunk: - raise RuntimeError("SSH agent closed connection mid-response") - resp += chunk - return resp - -# ── AgentKey ─────────────────────────────────────────────────────────────────── - -@dataclass -class AgentKey: - """Ed25519 key backed by an SSH agent (YubiKey, TPM, ssh-agent, gpg-agent …).""" - blob: bytes - comment: str - - @property - def pubkey_bytes(self) -> bytes: - _algo, off = _unpack_str(self.blob, 0) - key_bytes, _ = _unpack_str(self.blob, off) - return key_bytes - -# ── Agent helpers ────────────────────────────────────────────────────────────── - -def agent_list_keys() -> list[AgentKey]: - """Return all Ed25519 keys currently held by the SSH agent.""" - resp = _agent_roundtrip(bytes([SSH_AGENTC_REQUEST_IDENTITIES])) - if resp[0] != SSH_AGENT_IDENTITIES_ANSWER: - raise RuntimeError(f"Unexpected agent response: {resp[0]}") - n_keys = struct.unpack_from(">I", resp, 1)[0] - keys: list[AgentKey] = [] - off = 5 - for _ in range(n_keys): - blob, off = _unpack_str(resp, off) - comment, off = _unpack_str(resp, off) - algo, _ = _unpack_str(blob, 0) - if algo == b"ssh-ed25519": - keys.append(AgentKey(blob=blob, comment=comment.decode("utf-8", errors="replace"))) - return keys - -def agent_find_key(selector: str | None = None) -> AgentKey | None: - """Return the first agent Ed25519 key whose comment contains selector (or any if None).""" - try: - keys = agent_list_keys() - except Exception: - return None - for key in keys: - if key.comment == "(none)": - continue - if selector is None or selector in key.comment: - return key - return None - -def agent_sign_raw(key: AgentKey, data: bytes) -> bytes: - """Ask the SSH agent to sign data and return the raw 64-byte Ed25519 signature.""" - msg = ( - bytes([SSH_AGENTC_SIGN_REQUEST]) - + _pack_str(key.blob) - + _pack_str(data) - + struct.pack(">I", 0) - ) - resp = _agent_roundtrip(msg) - if resp[0] != SSH_AGENT_SIGN_RESPONSE: - raise RuntimeError(f"SSH agent refused to sign (response code {resp[0]})") - sig_blob, _ = _unpack_str(resp, 1) - _algo, soff = _unpack_str(sig_blob, 0) - raw_sig, _ = _unpack_str(sig_blob, soff) - if len(raw_sig) != 64: - raise RuntimeError(f"Unexpected signature length {len(raw_sig)}") - return raw_sig - -# ── File-based key helpers ───────────────────────────────────────────────────── - -def generate_keypair() -> tuple[bytes, str]: - """Return (private_key_pem_bytes, public_key_hex).""" - priv = Ed25519PrivateKey.generate() - pem = priv.private_bytes(Encoding.PEM, PrivateFormat.PKCS8, NoEncryption()) - pub_hex = priv.public_key().public_bytes(Encoding.Raw, PublicFormat.Raw).hex() - return pem, pub_hex - -def load_private_key(path: Path) -> Ed25519PrivateKey: - return load_pem_private_key(path.read_bytes(), password=None) - -def public_key_hex(key: Ed25519PrivateKey | AgentKey) -> str: - if isinstance(key, AgentKey): - return key.pubkey_bytes.hex() - return key.public_key().public_bytes(Encoding.Raw, PublicFormat.Raw).hex() - -# ── Canonical payload + sign/verify ─────────────────────────────────────────── - -def canonical_payload(msg: dict) -> bytes: - """Deterministic JSON encoding of msg without auth protocol fields.""" - return json.dumps( - {k: v for k, v in msg.items() if k not in {"pubkey", "sig", "pq_kex"}}, - sort_keys=True, - separators=(",", ":"), - ).encode("utf-8") - -def _auth_message(nonce: bytes, msg: dict, pq_shared_secret: bytes | None = None) -> bytes: - """Bytes signed for auth; optionally binds a post-quantum KEX secret.""" - data = nonce + hashlib.sha256(canonical_payload(msg)).digest() - if pq_shared_secret is not None: - data += hashlib.sha256(b"browser-cli ml-kem-768 v1" + pq_shared_secret).digest() - return data - -def sign(key: Ed25519PrivateKey | AgentKey, nonce: bytes, msg: dict, pq_shared_secret: bytes | None = None) -> bytes: - """Sign nonce + payload hash, optionally bound to an ML-KEM shared secret.""" - data = _auth_message(nonce, msg, pq_shared_secret) - if isinstance(key, AgentKey): - return agent_sign_raw(key, data) - return key.sign(data) - -def verify(pub_hex: str, nonce: bytes, msg: dict, sig_hex: str, pq_shared_secret: bytes | None = None) -> bool: - """Return True if sig_hex is a valid signature over the canonical payload/auth secret.""" - try: - pub_bytes = bytes.fromhex(pub_hex) - pub_key = Ed25519PublicKey.from_public_bytes(pub_bytes) - pub_key.verify(bytes.fromhex(sig_hex), _auth_message(nonce, msg, pq_shared_secret)) - return True - except (InvalidSignature, ValueError): - return False - -# ── Post-quantum key exchange (ML-KEM / Kyber) ──────────────────────────────── - -def pq_kex_server_keypair(): - """Return an ephemeral ML-KEM-768 private key and raw public key bytes. - - Returns ``None`` when the installed cryptography/OpenSSL backend does not - support ML-KEM yet. The serve/client protocol treats this as graceful - downgrade instead of breaking local installs on older OpenSSL builds. - """ - try: - from cryptography.hazmat.primitives.asymmetric import mlkem - priv = mlkem.MLKEM768PrivateKey.generate() - pub = priv.public_key().public_bytes_raw() - return priv, pub - except Exception: - return None - -def pq_kex_client_encapsulate(public_key_hex: str) -> tuple[str, bytes]: - """Encapsulate to a server ML-KEM public key. Returns (ciphertext_hex, secret).""" - from cryptography.hazmat.primitives.asymmetric import mlkem - pub = mlkem.MLKEM768PublicKey.from_public_bytes(bytes.fromhex(public_key_hex)) - shared_secret, ciphertext = pub.encapsulate() - return ciphertext.hex(), shared_secret - -def pq_kex_server_decapsulate(private_key, ciphertext_hex: str) -> bytes: - """Decapsulate a client ML-KEM ciphertext and return the shared secret.""" - return private_key.decapsulate(bytes.fromhex(ciphertext_hex)) - -def _pq_transport_key(shared_secret: bytes, direction: str) -> bytes: - return HKDF( - algorithm=hashes.SHA256(), - length=32, - salt=None, - info=f"browser-cli pq transport v1 {direction}".encode("ascii"), - ).derive(shared_secret) - -def pq_encrypt(shared_secret: bytes, direction: str, plaintext: bytes) -> dict: - """Encrypt an app-layer frame with a key derived from the ML-KEM secret.""" - nonce = secrets.token_bytes(12) - key = _pq_transport_key(shared_secret, direction) - ciphertext = ChaCha20Poly1305(key).encrypt(nonce, plaintext, None) - return {"alg": PQ_TRANSPORT_ALG, "nonce": nonce.hex(), "ciphertext": ciphertext.hex()} - -def pq_decrypt(shared_secret: bytes, direction: str, envelope: dict) -> bytes: - """Decrypt an app-layer frame produced by pq_encrypt().""" - if not isinstance(envelope, dict) or envelope.get("alg") != PQ_TRANSPORT_ALG: - raise ValueError("unsupported encrypted transport envelope") - key = _pq_transport_key(shared_secret, direction) - return ChaCha20Poly1305(key).decrypt( - bytes.fromhex(str(envelope["nonce"])), - bytes.fromhex(str(envelope["ciphertext"])), - None, - ) - -def new_nonce() -> str: - return secrets.token_hex(32) - -def load_authorized_keys_with_names(path: Path) -> list[tuple[str, str]]: - """Return list of (pubkey_hex, name) pairs. Name is empty string if not set.""" - if not path.exists(): - return [] - result = [] - for line in path.read_text(encoding="utf-8").splitlines(): - line = line.strip() - if not line or line.startswith("#"): - continue - parts = line.split(None, 1) - pubkey = parts[0] - name = parts[1].strip() if len(parts) > 1 else "" - result.append((pubkey, name)) - return result - -def load_authorized_keys(path: Path) -> list[str]: - return [pk for pk, _ in load_authorized_keys_with_names(path)] - -def add_authorized_key(path: Path, pub_hex: str, name: str = "") -> bool: - """Append pub_hex to authorized_keys. Returns False if already present.""" - path.parent.mkdir(parents=True, exist_ok=True) - existing = {pk for pk, _ in load_authorized_keys_with_names(path)} - if pub_hex in existing: - return False - line = (f"{pub_hex} {name}".rstrip()) + "\n" - with open(path, "a", encoding="utf-8") as f: - f.write(line) - return True diff --git a/browser_cli/auth/__init__.py b/browser_cli/auth/__init__.py new file mode 100644 index 0000000..1c17694 --- /dev/null +++ b/browser_cli/auth/__init__.py @@ -0,0 +1,67 @@ +"""Public auth API for browser-cli. + +Implementation lives in focused modules: +- ``auth.agent``: SSH-agent/YubiKey helpers +- ``auth.keys``: file keys and authorized_keys management +- ``auth.signing``: canonical payload signing/verification +- ``auth.pq``: ML-KEM KEX and encrypted transport helpers +""" +from browser_cli.auth.agent import ( + AgentKey, + agent_find_key, + agent_list_keys, + agent_roundtrip as _agent_roundtrip, + agent_sign_raw, + pack_ssh_string as _pack_str, + unpack_ssh_string as _unpack_str, +) +from browser_cli.auth.keys import ( + add_authorized_key, + generate_keypair, + load_authorized_keys, + load_authorized_keys_with_names, + load_private_key, + public_key_hex, +) +from browser_cli.auth.pq import ( + new_nonce, + pq_decrypt, + pq_encrypt, + pq_kex_client_encapsulate, + pq_kex_server_decapsulate, + pq_kex_server_keypair, + pq_transport_key as _pq_transport_key, +) +from browser_cli.auth.signing import ( + auth_message as _auth_message, + canonical_payload, + sign, + verify, +) +from browser_cli.constants import DEFAULT_AUTHORIZED_KEYS_PATH, DEFAULT_KEY_PATH, PQ_KEX_ALG, PQ_TRANSPORT_ALG + +__all__ = [ + "AgentKey", + "DEFAULT_AUTHORIZED_KEYS_PATH", + "DEFAULT_KEY_PATH", + "PQ_KEX_ALG", + "PQ_TRANSPORT_ALG", + "add_authorized_key", + "agent_find_key", + "agent_list_keys", + "agent_sign_raw", + "canonical_payload", + "generate_keypair", + "load_authorized_keys", + "load_authorized_keys_with_names", + "load_private_key", + "new_nonce", + "pq_decrypt", + "pq_encrypt", + "pq_kex_client_encapsulate", + "pq_kex_server_decapsulate", + "pq_kex_server_keypair", + "public_key_hex", + "sign", + "verify", +] diff --git a/browser_cli/auth/agent.py b/browser_cli/auth/agent.py new file mode 100644 index 0000000..63c7329 --- /dev/null +++ b/browser_cli/auth/agent.py @@ -0,0 +1,103 @@ +"""SSH-agent backed Ed25519 key helpers.""" +from __future__ import annotations + +import os +import socket +import struct +from dataclasses import dataclass + +from browser_cli.constants import ( + SSH_AGENT_IDENTITIES_ANSWER, + SSH_AGENT_SIGN_RESPONSE, + SSH_AGENTC_REQUEST_IDENTITIES, + SSH_AGENTC_SIGN_REQUEST, +) + +def pack_ssh_string(value: bytes) -> bytes: + return struct.pack(">I", len(value)) + value + +def unpack_ssh_string(data: bytes, offset: int) -> tuple[bytes, int]: + length = struct.unpack_from(">I", data, offset)[0] + return data[offset + 4 : offset + 4 + length], offset + 4 + length + +def agent_roundtrip(msg: bytes) -> bytes: + sock_path = os.environ.get("SSH_AUTH_SOCK") + if not sock_path: + raise RuntimeError("SSH_AUTH_SOCK not set — is gpg-agent / ssh-agent running?") + with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as sock: + sock.settimeout(10) + sock.connect(sock_path) + sock.sendall(struct.pack(">I", len(msg)) + msg) + raw_len = b"" + while len(raw_len) < 4: + chunk = sock.recv(4 - len(raw_len)) + if not chunk: + raise RuntimeError("SSH agent closed connection") + raw_len += chunk + length = struct.unpack(">I", raw_len)[0] + response = b"" + while len(response) < length: + chunk = sock.recv(length - len(response)) + if not chunk: + raise RuntimeError("SSH agent closed connection mid-response") + response += chunk + return response + +@dataclass +class AgentKey: + """Ed25519 key backed by an SSH agent (YubiKey, TPM, ssh-agent, gpg-agent …).""" + blob: bytes + comment: str + + @property + def pubkey_bytes(self) -> bytes: + _algo, offset = unpack_ssh_string(self.blob, 0) + key_bytes, _ = unpack_ssh_string(self.blob, offset) + return key_bytes + +def agent_list_keys() -> list[AgentKey]: + """Return all Ed25519 keys currently held by the SSH agent.""" + response = agent_roundtrip(bytes([SSH_AGENTC_REQUEST_IDENTITIES])) + if response[0] != SSH_AGENT_IDENTITIES_ANSWER: + raise RuntimeError(f"Unexpected agent response: {response[0]}") + key_count = struct.unpack_from(">I", response, 1)[0] + keys: list[AgentKey] = [] + offset = 5 + for _ in range(key_count): + blob, offset = unpack_ssh_string(response, offset) + comment, offset = unpack_ssh_string(response, offset) + algo, _ = unpack_ssh_string(blob, 0) + if algo == b"ssh-ed25519": + keys.append(AgentKey(blob=blob, comment=comment.decode("utf-8", errors="replace"))) + return keys + +def agent_find_key(selector: str | None = None) -> AgentKey | None: + """Return the first agent Ed25519 key whose comment contains selector (or any if None).""" + try: + keys = agent_list_keys() + except Exception: + return None + for key in keys: + if key.comment == "(none)": + continue + if selector is None or selector in key.comment: + return key + return None + +def agent_sign_raw(key: AgentKey, data: bytes) -> bytes: + """Ask the SSH agent to sign data and return the raw 64-byte Ed25519 signature.""" + msg = ( + bytes([SSH_AGENTC_SIGN_REQUEST]) + + pack_ssh_string(key.blob) + + pack_ssh_string(data) + + struct.pack(">I", 0) + ) + response = agent_roundtrip(msg) + if response[0] != SSH_AGENT_SIGN_RESPONSE: + raise RuntimeError(f"SSH agent refused to sign (response code {response[0]})") + sig_blob, _ = unpack_ssh_string(response, 1) + _algo, sig_offset = unpack_ssh_string(sig_blob, 0) + raw_sig, _ = unpack_ssh_string(sig_blob, sig_offset) + if len(raw_sig) != 64: + raise RuntimeError(f"Unexpected signature length {len(raw_sig)}") + return raw_sig diff --git a/browser_cli/auth/keys.py b/browser_cli/auth/keys.py new file mode 100644 index 0000000..da1993a --- /dev/null +++ b/browser_cli/auth/keys.py @@ -0,0 +1,59 @@ +"""File-based Ed25519 keys and authorized_keys helpers.""" +from __future__ import annotations + +from pathlib import Path + +from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey +from cryptography.hazmat.primitives.serialization import ( + Encoding, + NoEncryption, + PrivateFormat, + PublicFormat, + load_pem_private_key, +) + +from browser_cli.auth.agent import AgentKey + +def generate_keypair() -> tuple[bytes, str]: + """Return (private_key_pem_bytes, public_key_hex).""" + private_key = Ed25519PrivateKey.generate() + pem = private_key.private_bytes(Encoding.PEM, PrivateFormat.PKCS8, NoEncryption()) + public_hex = private_key.public_key().public_bytes(Encoding.Raw, PublicFormat.Raw).hex() + return pem, public_hex + +def load_private_key(path: Path) -> Ed25519PrivateKey: + return load_pem_private_key(path.read_bytes(), password=None) + +def public_key_hex(key: Ed25519PrivateKey | AgentKey) -> str: + if isinstance(key, AgentKey): + return key.pubkey_bytes.hex() + return key.public_key().public_bytes(Encoding.Raw, PublicFormat.Raw).hex() + +def load_authorized_keys_with_names(path: Path) -> list[tuple[str, str]]: + """Return list of (pubkey_hex, name) pairs. Name is empty string if not set.""" + if not path.exists(): + return [] + result = [] + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + parts = line.split(None, 1) + pubkey = parts[0] + name = parts[1].strip() if len(parts) > 1 else "" + result.append((pubkey, name)) + return result + +def load_authorized_keys(path: Path) -> list[str]: + return [pubkey for pubkey, _name in load_authorized_keys_with_names(path)] + +def add_authorized_key(path: Path, pub_hex: str, name: str = "") -> bool: + """Append pub_hex to authorized_keys. Returns False if already present.""" + path.parent.mkdir(parents=True, exist_ok=True) + existing = {pubkey for pubkey, _name in load_authorized_keys_with_names(path)} + if pub_hex in existing: + return False + line = (f"{pub_hex} {name}".rstrip()) + "\n" + with open(path, "a", encoding="utf-8") as file: + file.write(line) + return True diff --git a/browser_cli/auth/pq.py b/browser_cli/auth/pq.py new file mode 100644 index 0000000..821813e --- /dev/null +++ b/browser_cli/auth/pq.py @@ -0,0 +1,65 @@ +"""Post-quantum ML-KEM key exchange and app-layer transport encryption.""" +from __future__ import annotations + +import secrets + +from cryptography.hazmat.primitives import hashes +from cryptography.hazmat.primitives.ciphers.aead import ChaCha20Poly1305 +from cryptography.hazmat.primitives.kdf.hkdf import HKDF + +from browser_cli.constants import PQ_TRANSPORT_ALG + +def pq_kex_server_keypair(): + """Return an ephemeral ML-KEM-768 private key and raw public key bytes. + + Returns ``None`` when the installed cryptography/OpenSSL backend does not + support ML-KEM yet. The serve/client protocol treats this as graceful + downgrade instead of breaking local installs on older OpenSSL builds. + """ + try: + from cryptography.hazmat.primitives.asymmetric import mlkem + private_key = mlkem.MLKEM768PrivateKey.generate() + public_key = private_key.public_key().public_bytes_raw() + return private_key, public_key + except Exception: + return None + +def pq_kex_client_encapsulate(public_key_hex: str) -> tuple[str, bytes]: + """Encapsulate to a server ML-KEM public key. Returns (ciphertext_hex, secret).""" + from cryptography.hazmat.primitives.asymmetric import mlkem + public_key = mlkem.MLKEM768PublicKey.from_public_bytes(bytes.fromhex(public_key_hex)) + shared_secret, ciphertext = public_key.encapsulate() + return ciphertext.hex(), shared_secret + +def pq_kex_server_decapsulate(private_key, ciphertext_hex: str) -> bytes: + """Decapsulate a client ML-KEM ciphertext and return the shared secret.""" + return private_key.decapsulate(bytes.fromhex(ciphertext_hex)) + +def pq_transport_key(shared_secret: bytes, direction: str) -> bytes: + return HKDF( + algorithm=hashes.SHA256(), + length=32, + salt=None, + info=f"browser-cli pq transport v1 {direction}".encode("ascii"), + ).derive(shared_secret) + +def pq_encrypt(shared_secret: bytes, direction: str, plaintext: bytes) -> dict: + """Encrypt an app-layer frame with a key derived from the ML-KEM secret.""" + nonce = secrets.token_bytes(12) + key = pq_transport_key(shared_secret, direction) + ciphertext = ChaCha20Poly1305(key).encrypt(nonce, plaintext, None) + return {"alg": PQ_TRANSPORT_ALG, "nonce": nonce.hex(), "ciphertext": ciphertext.hex()} + +def pq_decrypt(shared_secret: bytes, direction: str, envelope: dict) -> bytes: + """Decrypt an app-layer frame produced by pq_encrypt().""" + if not isinstance(envelope, dict) or envelope.get("alg") != PQ_TRANSPORT_ALG: + raise ValueError("unsupported encrypted transport envelope") + key = pq_transport_key(shared_secret, direction) + return ChaCha20Poly1305(key).decrypt( + bytes.fromhex(str(envelope["nonce"])), + bytes.fromhex(str(envelope["ciphertext"])), + None, + ) + +def new_nonce() -> str: + return secrets.token_hex(32) diff --git a/browser_cli/auth/signing.py b/browser_cli/auth/signing.py new file mode 100644 index 0000000..280718b --- /dev/null +++ b/browser_cli/auth/signing.py @@ -0,0 +1,42 @@ +"""Canonical browser-cli auth payload signing and verification.""" +from __future__ import annotations + +import hashlib +import json + +from cryptography.exceptions import InvalidSignature +from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey, Ed25519PublicKey + +from browser_cli.auth.agent import AgentKey, agent_sign_raw + +def canonical_payload(msg: dict) -> bytes: + """Deterministic JSON encoding of msg without auth protocol fields.""" + return json.dumps( + {key: value for key, value in msg.items() if key not in {"pubkey", "sig", "pq_kex"}}, + sort_keys=True, + separators=(",", ":"), + ).encode("utf-8") + +def auth_message(nonce: bytes, msg: dict, pq_shared_secret: bytes | None = None) -> bytes: + """Bytes signed for auth; optionally binds a post-quantum KEX secret.""" + data = nonce + hashlib.sha256(canonical_payload(msg)).digest() + if pq_shared_secret is not None: + data += hashlib.sha256(b"browser-cli ml-kem-768 v1" + pq_shared_secret).digest() + return data + +def sign(key: Ed25519PrivateKey | AgentKey, nonce: bytes, msg: dict, pq_shared_secret: bytes | None = None) -> bytes: + """Sign nonce + payload hash, optionally bound to an ML-KEM shared secret.""" + data = auth_message(nonce, msg, pq_shared_secret) + if isinstance(key, AgentKey): + return agent_sign_raw(key, data) + return key.sign(data) + +def verify(pub_hex: str, nonce: bytes, msg: dict, sig_hex: str, pq_shared_secret: bytes | None = None) -> bool: + """Return True if sig_hex is a valid signature over the canonical payload/auth secret.""" + try: + pub_bytes = bytes.fromhex(pub_hex) + pub_key = Ed25519PublicKey.from_public_bytes(pub_bytes) + pub_key.verify(bytes.fromhex(sig_hex), auth_message(nonce, msg, pq_shared_secret)) + return True + except (InvalidSignature, ValueError): + return False diff --git a/browser_cli/commands/rendering.py b/browser_cli/commands/rendering.py index 731ef03..ff0d3f1 100644 --- a/browser_cli/commands/rendering.py +++ b/browser_cli/commands/rendering.py @@ -2,11 +2,22 @@ from __future__ import annotations import shutil +from collections.abc import Callable, Iterable, Sequence +from typing import Any from rich.console import Console +from rich.table import Table from rich.text import Text from rich.tree import Tree +Column = tuple[str, Callable[[Any], Any]] + +def item_value(item: Any, name: str, default: Any = None) -> Any: + """Read *name* from a dict-like or attribute object.""" + if isinstance(item, dict): + return item.get(name, default) + return getattr(item, name, default) + def shorten(value: str | None, limit: int) -> str: """Return *value* shortened to *limit* cells-ish, using an ellipsis.""" value = value or "" @@ -38,18 +49,139 @@ def no_wrap_text() -> Text: """Text configured for one-line tree labels with edge ellipsis.""" return Text(no_wrap=True, overflow="ellipsis") -def tab_tree_label(tab, *, title_limit: int, show_urls: bool = False, url_limit: int = 55) -> Text: +def tab_tree_label(tab: Any, *, title_limit: int, show_urls: bool = False, url_limit: int = 55) -> Text: """Reusable one-line label for a browser tab in tree views.""" label = no_wrap_text() - label.append(f"[{tab.id}] ", style="dim") - label.append(shorten(tab.title or "(untitled)", title_limit)) - if tab.active: + label.append(f"[{item_value(tab, 'id')}] ", style="dim") + label.append(shorten(item_value(tab, 'title') or "(untitled)", title_limit)) + if item_value(tab, "active", False): label.append(" *", style="green") - if show_urls and tab.url: + url = item_value(tab, "url") + if show_urls and url: label.append(" — ", style="dim") - label.append(shorten(tab.url, url_limit), style="dim") + label.append(shorten(url, url_limit), style="dim") return label +def group_tree_label(group_id: int, group: Any, *, title_limit: int) -> Text: + """Reusable one-line label for a browser tab group in tree views.""" + title = item_value(group, "title", "") or f"Group {group_id}" + color = item_value(group, "color", "") or "group" + count = item_value(group, "tab_count", item_value(group, "tabCount", 0)) or 0 + collapsed = bool(item_value(group, "collapsed", False)) + label = no_wrap_text() + label.append(shorten(title, title_limit), style="bold") + meta = [color] + if count: + meta.append(f"{count} tab" + ("" if count == 1 else "s")) + if collapsed: + meta.append("collapsed") + label.append(" (" + ", ".join(meta) + ")", style="dim") + return label + +def tab_sort_key(tab: Any) -> tuple: + """Stable tab ordering across multi-browser responses.""" + group_id = item_value(tab, "group_id", item_value(tab, "groupId")) + return ( + item_value(tab, "browser") or "", + item_value(tab, "window_id", item_value(tab, "windowId", 0)), + item_value(tab, "index", 0) or 0, + group_id if group_id is not None else -1, + item_value(tab, "id", 0), + ) + def print_tree(tree: Tree, *, console: Console | None = None) -> None: """Render a Rich tree using the detected full terminal width.""" Console(width=terminal_width(console)).print(tree) + +def print_table_rows( + rows: Sequence[Any], + columns: Sequence[Column], + *, + console: Console, + empty_message: str, + show_header: bool = True, + header_style: str = "bold cyan", +) -> None: + """Render a small Rich table from arbitrary row objects.""" + if not rows: + console.print(empty_message) + return + table = Table(show_header=show_header, header_style=header_style) + for header, _getter in columns: + table.add_column(header) + for row in rows: + table.add_row(*[str(getter(row) or "") for _header, getter in columns]) + Console(width=terminal_width(console)).print(table) + +def build_tabs_tree( + tabs: Iterable[Any], + groups: Iterable[Any], + *, + console: Console, + show_urls: bool = False, +) -> Tree: + """Build a browser → window → group/tab tree from tab and group responses.""" + tabs = sorted(tabs, key=tab_sort_key) + show_browser = any(item_value(tab, "browser") for tab in tabs) + title_limit = tree_title_limit(console=console, show_browser=show_browser, show_urls=show_urls) + url_limit = tree_url_limit(title_limit, console=console) + group_info = { + ( + item_value(group, "browser") or "local", + item_value(group, "window_id", item_value(group, "windowId")), + item_value(group, "id"), + ): group + for group in groups + } + root = Tree("[bold]Tabs[/bold]") + browser_nodes: dict[str, Tree] = {} + window_nodes: dict[tuple[str, int], Tree] = {} + group_nodes: dict[tuple[str, int, int], Tree] = {} + for tab in tabs: + browser_key = item_value(tab, "browser") or "local" + browser_node = browser_nodes.get(browser_key) + if browser_node is None: + browser_node = root.add(Text(browser_key, style="bold cyan")) if show_browser else root + browser_nodes[browser_key] = browser_node + window_id = item_value(tab, "window_id", item_value(tab, "windowId", 0)) + window_key = (browser_key, window_id) + window_node = window_nodes.get(window_key) + if window_node is None: + window_node = browser_node.add(f"Window {window_id}") + window_nodes[window_key] = window_node + group_id = item_value(tab, "group_id", item_value(tab, "groupId")) + if group_id is None: + window_node.add(tab_tree_label(tab, title_limit=title_limit, show_urls=show_urls, url_limit=url_limit)) + continue + group_key = (browser_key, window_id, group_id) + group_node = group_nodes.get(group_key) + if group_node is None: + group = group_info.get(group_key) or group_info.get((browser_key, None, group_id)) + group_node = window_node.add(group_tree_label(group_id, group, title_limit=title_limit)) + group_nodes[group_key] = group_node + group_node.add(tab_tree_label(tab, title_limit=title_limit, show_urls=show_urls, url_limit=url_limit)) + return root + +def build_windows_tree(windows: Iterable[dict], tabs: Iterable[Any], *, console: Console) -> Tree: + """Build a window → tab tree from window and tab responses.""" + windows = list(windows) + tabs = list(tabs) + title_limit = tree_title_limit(console=console, show_browser=any("browser" in w for w in windows), show_urls=True) + url_limit = tree_url_limit(title_limit, console=console) + root = Tree("[bold]Windows[/bold]") + for window in sorted(windows, key=lambda item: (item.get("browser", ""), item.get("id", 0))): + window_id = window.get("id") + label = f"Window {window_id}" + if window.get("alias"): + label += f" ({window['alias']})" + if window.get("browser"): + label = f"{window['browser']}: " + label + node = root.add(label) + window_tabs = [ + tab for tab in tabs + if item_value(tab, "window_id", item_value(tab, "windowId")) == window_id + and (not window.get("browser") or item_value(tab, "browser") == window.get("browser")) + ] + for tab in sorted(window_tabs, key=lambda item: item_value(item, "index", 0) or 0): + node.add(tab_tree_label(tab, title_limit=title_limit, show_urls=True, url_limit=url_limit)) + return root diff --git a/browser_cli/commands/script.py b/browser_cli/commands/script.py index 9a64349..ae88387 100644 --- a/browser_cli/commands/script.py +++ b/browser_cli/commands/script.py @@ -1,7 +1,9 @@ from __future__ import annotations +import importlib import json from pathlib import Path +from typing import Any, cast import click from rich.console import Console @@ -12,25 +14,25 @@ from browser_cli.commands import client_from_ctx, handle_errors console = Console() def _load_steps(path: Path): - text = path.read_text(encoding="utf-8") - if path.suffix.lower() in {".yaml", ".yml"}: - try: - import yaml # type: ignore - except Exception as exc: - raise click.ClickException("YAML scripts require PyYAML; use JSON or install PyYAML") from exc - return yaml.safe_load(text) - return json.loads(text) + text = path.read_text(encoding="utf-8") + if path.suffix.lower() in {".yaml", ".yml"}: + try: + yaml = cast(Any, importlib.import_module("yaml")) + except Exception as exc: + raise click.ClickException("YAML scripts require PyYAML; use JSON or install PyYAML") from exc + return yaml.safe_load(text) + return json.loads(text) def _parse_step(step): - if isinstance(step, str): - return step, {} - if isinstance(step, dict): - if "command" in step: - return step["command"], step.get("args") or {} - if len(step) == 1: - command, args = next(iter(step.items())) - return command, args or {} - raise click.ClickException(f"Invalid script step: {step!r}") + if isinstance(step, str): + return step, {} + if isinstance(step, dict): + if "command" in step: + return step["command"], step.get("args") or {} + if len(step) == 1: + command, args = next(iter(step.items())) + return command, args or {} + raise click.ClickException(f"Invalid script step: {step!r}") @click.command("script") @click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path)) @@ -41,28 +43,28 @@ def _parse_step(step): @click.option("--allow-dangerous", is_flag=True, help="Allow high-risk commands such as dom.eval, storage.*, screenshots") @handle_errors def cmd_script(file: Path, json_output: bool, continue_on_error: bool, allow_read_page: bool, allow_control: bool, allow_dangerous: bool): - """Run a JSON/YAML batch script of browser-cli wire commands.""" - steps = _load_steps(file) - if not isinstance(steps, list): - raise click.ClickException("Script root must be a list") - client = client_from_ctx() - policy = CommandPolicy(allow_read_page=allow_read_page, allow_control=allow_control, allow_dangerous=allow_dangerous) - results = [] - for index, step in enumerate(steps, start=1): - command, args = _parse_step(step) - try: - assert_command_allowed(command, policy) - result = client.command(command, args) - results.append({"index": index, "command": command, "ok": True, "result": result}) - if not json_output: - console.print(f"[green]✓[/green] {index}: {command}") - except Exception as exc: - results.append({"index": index, "command": command, "ok": False, "error": str(exc)}) - if not continue_on_error: - if json_output: - click.echo(json.dumps(results, indent=2, default=str)) - raise - if not json_output: - console.print(f"[red]✗[/red] {index}: {command}: {exc}") - if json_output: - click.echo(json.dumps(results, indent=2, default=str)) + """Run a JSON/YAML batch script of browser-cli wire commands.""" + steps = _load_steps(file) + if not isinstance(steps, list): + raise click.ClickException("Script root must be a list") + client = client_from_ctx() + policy = CommandPolicy(allow_read_page=allow_read_page, allow_control=allow_control, allow_dangerous=allow_dangerous) + results = [] + for index, step in enumerate(steps, start=1): + command, args = _parse_step(step) + try: + assert_command_allowed(command, policy) + result = client.command(command, args) + results.append({"index": index, "command": command, "ok": True, "result": result}) + if not json_output: + console.print(f"[green]✓[/green] {index}: {command}") + except Exception as exc: + results.append({"index": index, "command": command, "ok": False, "error": str(exc)}) + if not continue_on_error: + if json_output: + click.echo(json.dumps(results, indent=2, default=str)) + raise + if not json_output: + console.print(f"[red]✗[/red] {index}: {command}: {exc}") + if json_output: + click.echo(json.dumps(results, indent=2, default=str)) diff --git a/browser_cli/commands/tabs.py b/browser_cli/commands/tabs.py index 761a3b8..ba3ff19 100644 --- a/browser_cli/commands/tabs.py +++ b/browser_cli/commands/tabs.py @@ -2,65 +2,25 @@ import base64 import binascii import click from browser_cli.commands import client_from_ctx, gentle_mode_option, handle_errors, print_counts, tab_option -from browser_cli.commands.rendering import no_wrap_text, print_tree, shorten, tab_tree_label, tree_title_limit, tree_url_limit +from browser_cli.commands.rendering import build_tabs_tree, print_table_rows, print_tree from rich.console import Console from rich.table import Table -from rich.text import Text -from rich.tree import Tree console = Console() -def _group_tree_label(group_id: int, group, *, title_limit: int) -> Text: - title = getattr(group, "title", "") or f"Group {group_id}" - color = getattr(group, "color", "") or "group" - count = getattr(group, "tab_count", 0) or 0 - collapsed = bool(getattr(group, "collapsed", False)) - label = no_wrap_text() - label.append(shorten(title, title_limit), style="bold") - meta = [color] - if count: - meta.append(f"{count} tab" + ("" if count == 1 else "s")) - if collapsed: - meta.append("collapsed") - label.append(" (" + ", ".join(meta) + ")", style="dim") - return label - -def _tab_sort_key(tab): - return ( - tab.browser or "", - tab.window_id, - getattr(tab, "index", 0), - tab.group_id if tab.group_id is not None else -1, - tab.id, - ) - def _print_tabs(tabs, *, show_browser: bool = False) -> None: - if not tabs: - console.print("[yellow]No tabs found[/yellow]") - return - table = Table(show_header=True, header_style="bold cyan") + columns = [] if show_browser: - table.add_column("Browser", no_wrap=True) - table.add_column("ID", style="dim", no_wrap=True) - table.add_column("Window", no_wrap=True) - table.add_column("Active", width=7) - table.add_column("Muted", width=7) - table.add_column("Title") - table.add_column("URL") - for t in tabs: - active = "[green]✓[/green]" if t.active else "" - muted = "[yellow]✓[/yellow]" if t.muted else "" - row = [ - (t.browser or "") if show_browser else None, - str(t.id), - str(t.window_id), - active, - muted, - (t.title or "")[:60], - (t.url or "")[:80], - ] - table.add_row(*[value for value in row if value is not None]) - console.print(table) + columns.append(("Browser", lambda tab: tab.browser or "")) + columns.extend([ + ("ID", lambda tab: tab.id), + ("Window", lambda tab: tab.window_id), + ("Active", lambda tab: "[green]✓[/green]" if tab.active else ""), + ("Muted", lambda tab: "[yellow]✓[/yellow]" if tab.muted else ""), + ("Title", lambda tab: (tab.title or "")[:60]), + ("URL", lambda tab: (tab.url or "")[:80]), + ]) + print_table_rows(tabs, columns, console=console, empty_message="[yellow]No tabs found[/yellow]") @click.group("tabs") def tabs_group(): @@ -79,39 +39,7 @@ def tabs_list(): def tabs_tree(show_urls): """Show tabs grouped as a window/group tree.""" client = client_from_ctx() - tabs = sorted(client.tabs.list(), key=_tab_sort_key) - title_limit = tree_title_limit(console=console, show_browser=any(t.browser for t in tabs), show_urls=show_urls) - url_limit = tree_url_limit(title_limit, console=console) - group_info = { - (group.browser or "local", group.id): group - for group in client.groups.list() - } - root = Tree("[bold]Tabs[/bold]") - browsers = {} - windows = {} - groups = {} - show_browser = any(t.browser for t in tabs) - for tab in tabs: - browser_key = tab.browser or "local" - browser_node = browsers.get(browser_key) - if browser_node is None: - browser_node = root.add(Text(browser_key, style="bold cyan")) if show_browser else root - browsers[browser_key] = browser_node - win_key = (browser_key, tab.window_id) - win_node = windows.get(win_key) - if win_node is None: - win_node = browser_node.add(f"Window {tab.window_id}") - windows[win_key] = win_node - if tab.group_id is None: - win_node.add(tab_tree_label(tab, title_limit=title_limit, show_urls=show_urls, url_limit=url_limit)) - continue - group_key = (browser_key, tab.window_id, tab.group_id) - group_node = groups.get(group_key) - group = group_info.get((browser_key, tab.group_id)) - if group_node is None: - group_node = win_node.add(_group_tree_label(tab.group_id, group, title_limit=title_limit)) - groups[group_key] = group_node - group_node.add(tab_tree_label(tab, title_limit=title_limit, show_urls=show_urls, url_limit=url_limit)) + root = build_tabs_tree(client.tabs.list(), client.groups.list(), console=console, show_urls=show_urls) print_tree(root, console=console) @tabs_group.command("close") diff --git a/browser_cli/commands/windows.py b/browser_cli/commands/windows.py index 45ba3f0..4f3367c 100644 --- a/browser_cli/commands/windows.py +++ b/browser_cli/commands/windows.py @@ -1,33 +1,21 @@ import click from browser_cli.commands import client_from_ctx, handle_errors -from browser_cli.commands.rendering import print_tree, tab_tree_label, tree_title_limit, tree_url_limit +from browser_cli.commands.rendering import build_windows_tree, print_table_rows, print_tree from rich.console import Console -from rich.table import Table -from rich.tree import Tree console = Console() def _print_windows(windows: list[dict], *, show_browser: bool = False) -> None: - if not windows: - console.print("[yellow]No windows found[/yellow]") - return - table = Table(show_header=True, header_style="bold cyan") + columns = [] if show_browser: - table.add_column("Browser") - table.add_column("ID", style="dim", no_wrap=True) - table.add_column("Alias", width=20) - table.add_column("Tabs", width=6) - table.add_column("State", width=12) - for w in windows: - row = [ - w.get("browser", "") if show_browser else None, - str(w.get("id", "")), - w.get("alias") or "", - str(w.get("tabCount", "")), - w.get("state") or "", - ] - table.add_row(*[value for value in row if value is not None]) - console.print(table) + columns.append(("Browser", lambda window: window.get("browser", ""))) + columns.extend([ + ("ID", lambda window: window.get("id", "")), + ("Alias", lambda window: window.get("alias") or ""), + ("Tabs", lambda window: window.get("tabCount", "")), + ("State", lambda window: window.get("state") or ""), + ]) + print_table_rows(windows, columns, console=console, empty_message="[yellow]No windows found[/yellow]") @click.group("windows") def windows_group(): @@ -45,21 +33,7 @@ def windows_list(): def windows_tree(): """Show windows and their tabs as a tree.""" client = client_from_ctx() - windows = client.windows.list() - tabs = client.tabs.list() - root = Tree("[bold]Windows[/bold]") - title_limit = tree_title_limit(console=console, show_browser=any("browser" in w for w in windows), show_urls=True) - url_limit = tree_url_limit(title_limit, console=console) - for w in sorted(windows, key=lambda item: (item.get("browser", ""), item.get("id", 0))): - wid = w.get("id") - label = f"Window {wid}" - if w.get("alias"): - label += f" ({w['alias']})" - if w.get("browser"): - label = f"{w['browser']}: " + label - node = root.add(label) - for tab in sorted([t for t in tabs if t.window_id == wid and (not w.get("browser") or t.browser == w.get("browser"))], key=lambda t: getattr(t, "index", 0)): - node.add(tab_tree_label(tab, title_limit=title_limit, show_urls=True, url_limit=url_limit)) + root = build_windows_tree(client.windows.list(), client.tabs.list(), console=console) print_tree(root, console=console) @windows_group.command("rename") diff --git a/browser_cli/markdown/html.py b/browser_cli/markdown/html.py index 5ab733d..39b99ce 100644 --- a/browser_cli/markdown/html.py +++ b/browser_cli/markdown/html.py @@ -4,26 +4,6 @@ from __future__ import annotations import re from html.parser import HTMLParser -def _normalize_text(value): - return re.sub(r"\s+", " ", value or "").strip() - -def _normalize_inline(value): - value = value.replace("\xa0", " ") - value = re.sub(r"[ \t\r\f\v]+", " ", value) - value = re.sub(r" *\n *", "\n", value) - return value.strip() - -def _collapse_blank_lines(value): - value = re.sub(r"[ \t]+\n", "\n", value) - value = re.sub(r"\n{3,}", "\n\n", value) - return value.strip() - -def _escape_markdown(text): - return re.sub(r"([\\`[\]])", r"\\\1", text) - -def _escape_table_cell(text): - return text.replace("|", r"\|").replace("\n", " ").strip() - class _HtmlNode: def __init__(self, tag=None, attrs=None, text=None): self.tag = tag diff --git a/browser_cli/remote/auth.py b/browser_cli/remote/auth.py new file mode 100644 index 0000000..65117d5 --- /dev/null +++ b/browser_cli/remote/auth.py @@ -0,0 +1,145 @@ +"""Challenge/response auth helpers for remote TCP transport.""" +from __future__ import annotations + +import asyncio +import json +import sys +from collections.abc import Callable +from typing import TypeVar + +from browser_cli.errors import BrowserNotConnected +from browser_cli.version_manager import USER_AGENT + +T = TypeVar("T") +AUTH_FIELDS = {"token", "pubkey", "sig", "pq_kex", "encrypted", "_suppress_pq_warning"} +PQ_WARNING = ( + "** WARNING: connection is not using a post-quantum key exchange algorithm.\n" + "** This session may be vulnerable to store now, decrypt later attacks.\n" +) + +def parse_challenge(raw: bytes) -> tuple[dict | None, str | None]: + try: + challenge = json.loads(raw) + nonce_hex = challenge.get("nonce") if challenge.get("type") == "challenge" else None + return challenge, nonce_hex + except (json.JSONDecodeError, AttributeError): + return None, None + +def check_min_client_version(challenge: dict | None) -> None: + min_ver = challenge.get("min_client_version") if isinstance(challenge, dict) else None + if not min_ver: + return + from browser_cli.version_manager import parse_version + try: + client_ver = USER_AGENT.split("/", 1)[1] + if parse_version(client_ver) < parse_version(min_ver): + raise BrowserNotConnected( + f"Client version {client_ver} is too old for this server " + f"(requires >= {min_ver}). Run: pip install --upgrade browser-cli" + ) + except (IndexError, ValueError): + pass + +def clean_message(msg: dict) -> dict: + return {key: value for key, value in msg.items() if key not in AUTH_FIELDS} + +def get_pq_public_key(challenge: dict | None) -> str | None: + if not isinstance(challenge, dict): + return None + from browser_cli.auth import PQ_KEX_ALG + kex = challenge.get("pq_kex") + if isinstance(kex, dict) and kex.get("alg") == PQ_KEX_ALG and kex.get("public_key"): + return str(kex["public_key"]) + return None + +def signed_payload(clean_msg: dict, private_key, nonce_hex: str, pq_shared_secret: bytes | None) -> dict: + from browser_cli.auth import pq_encrypt, public_key_hex, sign + + nonce = bytes.fromhex(nonce_hex) + sig = sign(private_key, nonce, clean_msg, pq_shared_secret) + pubkey = public_key_hex(private_key) + if pq_shared_secret is None: + return {**clean_msg, "pubkey": pubkey, "sig": sig.hex()} + + encrypted = pq_encrypt(pq_shared_secret, "request", json.dumps(clean_msg).encode("utf-8")) + return { + "id": clean_msg.get("id"), + "user_agent": clean_msg.get("user_agent"), + "pubkey": pubkey, + "sig": sig.hex(), + "pq_kex": clean_msg["pq_kex"], + "encrypted": encrypted, + } + +def emit_no_pq_warning(enabled: bool) -> None: + if enabled: + sys.stderr.write(PQ_WARNING) + +def build_auth_message( + msg: dict, + challenge: dict | None, + nonce_hex: str | None, + private_key, + encapsulate: Callable[[str], tuple[str, bytes]], + *, + warn_no_pq: bool = True, +) -> tuple[dict, bytes | None]: + if not nonce_hex or private_key is None: + emit_no_pq_warning(warn_no_pq) + return msg, None + + clean_msg = clean_message(msg) + pq_shared_secret = None + pq_public_key = get_pq_public_key(challenge) + if pq_public_key: + from browser_cli.auth import PQ_KEX_ALG + ciphertext_hex, pq_shared_secret = encapsulate(pq_public_key) + clean_msg["pq_kex"] = {"alg": PQ_KEX_ALG, "ciphertext": ciphertext_hex} + else: + emit_no_pq_warning(warn_no_pq) + + return signed_payload(clean_msg, private_key, nonce_hex, pq_shared_secret), pq_shared_secret + +async def build_auth_message_async( + msg: dict, + challenge: dict | None, + nonce_hex: str | None, + private_key, + *, + warn_no_pq: bool = True, +) -> tuple[dict, bytes | None]: + def encapsulate(public_key: str) -> tuple[str, bytes]: + from browser_cli.auth import pq_kex_client_encapsulate + return pq_kex_client_encapsulate(public_key) + + return await asyncio.to_thread( + build_auth_message, + msg, + challenge, + nonce_hex, + private_key, + encapsulate, + warn_no_pq=warn_no_pq, + ) + +def decode_pq_response(response: bytes | None, pq_shared_secret: bytes | None) -> bytes | None: + if response is None or pq_shared_secret is None: + return response + try: + from browser_cli.auth import pq_decrypt + envelope = json.loads(response) + if isinstance(envelope, dict) and "encrypted" in envelope: + return pq_decrypt(pq_shared_secret, "response", envelope["encrypted"]) + except Exception as exc: + raise BrowserNotConnected(f"Cannot decrypt post-quantum remote response: {exc}") from exc + return response + +def with_challenge(challenge_raw: bytes, msg: dict, private_key, build_auth: Callable[[dict, dict | None, str | None, object], T]) -> T: + if challenge_raw is None: + raise BrowserNotConnected("No challenge received from remote endpoint") + challenge, nonce_hex = parse_challenge(challenge_raw) + check_min_client_version(challenge) + return build_auth(msg, challenge, nonce_hex, private_key) + +def should_warn_no_pq(msg: dict) -> bool: + return not bool(msg.pop("_suppress_pq_warning", False)) diff --git a/browser_cli/remote/socket.py b/browser_cli/remote/socket.py new file mode 100644 index 0000000..0d4c298 --- /dev/null +++ b/browser_cli/remote/socket.py @@ -0,0 +1,52 @@ +"""Socket helpers for remote TCP/TLS transport.""" +from __future__ import annotations + +import asyncio +import socket +from contextlib import contextmanager + +from browser_cli.endpoints import _resolve_connect_endpoint +from browser_cli.framing import async_recv_exact, async_recv_frame, recv_exact, recv_frame + +def recv_exact_bytes(sock: socket.socket, n: int) -> bytes: + return recv_exact(sock, n) or b"" + +def recv_all(sock: socket.socket) -> bytes: + return recv_frame(sock, label="Response") or b"" + +async def async_recv_exact_bytes(reader: asyncio.StreamReader, n: int) -> bytes: + return await async_recv_exact(reader, n) or b"" + +async def async_recv_all(reader: asyncio.StreamReader) -> bytes: + return await async_recv_frame(reader, label="Response") or b"" + +def split_endpoint(endpoint: str) -> tuple[str, int]: + connect_ep = _resolve_connect_endpoint(endpoint) + host, _, port_str = connect_ep.rpartition(":") + return host, int(port_str) + +@contextmanager +def open_socket(endpoint: str): + host, port = split_endpoint(endpoint) + raw_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + raw_sock.settimeout(30) + try: + raw_sock.connect((host, port)) + if port == 443: + import ssl + sock = ssl.create_default_context().wrap_socket(raw_sock, server_hostname=host) + else: + sock = raw_sock + except Exception: + raw_sock.close() + raise + with sock: + yield sock + +async def open_async_connection(endpoint: str) -> tuple[asyncio.StreamReader, asyncio.StreamWriter]: + host, port = split_endpoint(endpoint) + ssl_ctx = None + if port == 443: + import ssl + ssl_ctx = ssl.create_default_context() + return await asyncio.open_connection(host, port, ssl=ssl_ctx, server_hostname=host if ssl_ctx else None) diff --git a/browser_cli/remote/transport.py b/browser_cli/remote/transport.py index d017917..92ac836 100644 --- a/browser_cli/remote/transport.py +++ b/browser_cli/remote/transport.py @@ -1,202 +1,43 @@ """TCP/TLS transport for talking to a remote ``browser-cli serve``. -Owns the wire mechanics of the remote leg: open a socket (TLS on :443), -complete the signed challenge/response handshake with an optional post-quantum -key exchange, frame the request, and read the framed (possibly encrypted) -response. The higher-level "which endpoint / which profile / which key" -decisions stay in :mod:`browser_cli.client.core`. +This module keeps the public/private compatibility surface used by older tests +and callers, while delegating socket mechanics and auth-handshake details to +focused helper modules. """ from __future__ import annotations -import asyncio import json -import socket -import sys -from collections.abc import Callable -from contextlib import contextmanager -from typing import TypeVar -from browser_cli.errors import BrowserNotConnected -from browser_cli.endpoints import _resolve_connect_endpoint -from browser_cli.framing import async_recv_exact, async_recv_frame, async_send_frame, frame, recv_exact, recv_frame -from browser_cli.version_manager import USER_AGENT as _USER_AGENT - -T = TypeVar("T") -_AUTH_FIELDS = {"token", "pubkey", "sig", "pq_kex", "encrypted", "_suppress_pq_warning"} -_PQ_WARNING = ( - "** WARNING: connection is not using a post-quantum key exchange algorithm.\n" - "** This session may be vulnerable to store now, decrypt later attacks.\n" +from browser_cli.framing import async_send_frame, frame +from browser_cli.remote.auth import ( + build_auth_message as _build_auth_message, + build_auth_message_async as _build_auth_message_async, + decode_pq_response as _decode_pq_response, + parse_challenge as _parse_challenge, + should_warn_no_pq as _should_warn_no_pq, + with_challenge as _with_challenge, +) +from browser_cli.remote.socket import ( + async_recv_all as _async_recv_all, + async_recv_exact_bytes as _async_recv_exact, + open_async_connection as _open_async_connection, + open_socket as _open_socket, + recv_all as _recv_all, + recv_exact_bytes as _recv_exact, + split_endpoint as _split_endpoint, ) -def _recv_exact(sock: socket.socket, n: int) -> bytes: - return recv_exact(sock, n) or b"" +def _send_remote(endpoint: str, msg: dict, private_key=None, *, warn_no_pq: bool | None = None) -> bytes | None: + warn = _should_warn_no_pq(msg) if warn_no_pq is None else warn_no_pq -def _recv_all(sock: socket.socket) -> bytes: - return recv_frame(sock, label="Response") or b"" - -async def _async_recv_exact(reader: asyncio.StreamReader, n: int) -> bytes: - return await async_recv_exact(reader, n) or b"" - -async def _async_recv_all(reader: asyncio.StreamReader) -> bytes: - return await async_recv_frame(reader, label="Response") or b"" - -def _split_endpoint(endpoint: str) -> tuple[str, int]: - connect_ep = _resolve_connect_endpoint(endpoint) - host, _, port_str = connect_ep.rpartition(":") - return host, int(port_str) - -@contextmanager -def _open_socket(endpoint: str): - host, port = _split_endpoint(endpoint) - raw_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - raw_sock.settimeout(30) - try: - raw_sock.connect((host, port)) - if port == 443: - import ssl - sock = ssl.create_default_context().wrap_socket(raw_sock, server_hostname=host) - else: - sock = raw_sock - except Exception: - raw_sock.close() - raise - with sock: - yield sock - -async def _open_async_connection(endpoint: str) -> tuple[asyncio.StreamReader, asyncio.StreamWriter]: - host, port = _split_endpoint(endpoint) - ssl_ctx = None - if port == 443: - import ssl - ssl_ctx = ssl.create_default_context() - return await asyncio.open_connection(host, port, ssl=ssl_ctx, server_hostname=host if ssl_ctx else None) - -def _parse_challenge(raw: bytes) -> tuple[dict | None, str | None]: - try: - challenge = json.loads(raw) - nonce_hex = challenge.get("nonce") if challenge.get("type") == "challenge" else None - return challenge, nonce_hex - except (json.JSONDecodeError, AttributeError): - return None, None - -def _check_min_client_version(challenge: dict | None) -> None: - min_ver = challenge.get("min_client_version") if isinstance(challenge, dict) else None - if not min_ver: - return - from browser_cli.version_manager import parse_version - try: - client_ver = _USER_AGENT.split("/", 1)[1] - if parse_version(client_ver) < parse_version(min_ver): - raise BrowserNotConnected( - f"Client version {client_ver} is too old for this server " - f"(requires >= {min_ver}). Run: pip install --upgrade browser-cli" - ) - except (IndexError, ValueError): - pass - -def _clean_message(msg: dict) -> dict: - return {k: v for k, v in msg.items() if k not in _AUTH_FIELDS} - -def _get_pq_public_key(challenge: dict | None) -> str | None: - if not isinstance(challenge, dict): - return None - from browser_cli.auth import PQ_KEX_ALG - kex = challenge.get("pq_kex") - if isinstance(kex, dict) and kex.get("alg") == PQ_KEX_ALG and kex.get("public_key"): - return str(kex["public_key"]) - return None - -def _signed_payload(clean_msg: dict, private_key, nonce_hex: str, pq_shared_secret: bytes | None) -> dict: - from browser_cli.auth import PQ_KEX_ALG, pq_encrypt, public_key_hex, sign - - nonce = bytes.fromhex(nonce_hex) - sig = sign(private_key, nonce, clean_msg, pq_shared_secret) - pubkey = public_key_hex(private_key) - if pq_shared_secret is None: - return {**clean_msg, "pubkey": pubkey, "sig": sig.hex()} - - encrypted = pq_encrypt(pq_shared_secret, "request", json.dumps(clean_msg).encode("utf-8")) - return { - "id": clean_msg.get("id"), - "user_agent": clean_msg.get("user_agent"), - "pubkey": pubkey, - "sig": sig.hex(), - "pq_kex": clean_msg["pq_kex"], - "encrypted": encrypted, - } - -def _warn_no_pq(enabled: bool) -> None: - if enabled: - sys.stderr.write(_PQ_WARNING) - -def _build_auth_message( - msg: dict, - challenge: dict | None, - nonce_hex: str | None, - private_key, - encapsulate: Callable[[str], tuple[str, bytes]], - *, - warn_no_pq: bool = True, -) -> tuple[dict, bytes | None]: - if not nonce_hex or private_key is None: - _warn_no_pq(warn_no_pq) - return msg, None - - clean_msg = _clean_message(msg) - pq_shared_secret = None - pq_public_key = _get_pq_public_key(challenge) - if pq_public_key: - from browser_cli.auth import PQ_KEX_ALG - ciphertext_hex, pq_shared_secret = encapsulate(pq_public_key) - clean_msg["pq_kex"] = {"alg": PQ_KEX_ALG, "ciphertext": ciphertext_hex} - else: - _warn_no_pq(warn_no_pq) - - return _signed_payload(clean_msg, private_key, nonce_hex, pq_shared_secret), pq_shared_secret - -async def _build_auth_message_async( - msg: dict, - challenge: dict | None, - nonce_hex: str | None, - private_key, - *, - warn_no_pq: bool = True, -) -> tuple[dict, bytes | None]: - def encapsulate(public_key: str) -> tuple[str, bytes]: + def build_auth(sync_msg: dict, challenge: dict | None, nonce_hex: str | None, key): from browser_cli.auth import pq_kex_client_encapsulate - return pq_kex_client_encapsulate(public_key) + return _build_auth_message(sync_msg, challenge, nonce_hex, key, pq_kex_client_encapsulate, warn_no_pq=warn) - return await asyncio.to_thread( - _build_auth_message, - msg, - challenge, - nonce_hex, - private_key, - encapsulate, - warn_no_pq=warn_no_pq, - ) - -def _decode_pq_response(response: bytes | None, pq_shared_secret: bytes | None) -> bytes | None: - if response is None or pq_shared_secret is None: - return response - try: - from browser_cli.auth import pq_decrypt - envelope = json.loads(response) - if isinstance(envelope, dict) and "encrypted" in envelope: - return pq_decrypt(pq_shared_secret, "response", envelope["encrypted"]) - except Exception as e: - raise BrowserNotConnected(f"Cannot decrypt post-quantum remote response: {e}") from e - return response - -def _with_challenge(challenge_raw: bytes, msg: dict, private_key, build_auth: Callable[[dict, dict | None, str | None, object], T]) -> T: - if challenge_raw is None: - raise BrowserNotConnected("No challenge received from remote endpoint") - challenge, nonce_hex = _parse_challenge(challenge_raw) - _check_min_client_version(challenge) - return build_auth(msg, challenge, nonce_hex, private_key) - -def _should_warn_no_pq(msg: dict) -> bool: - return not bool(msg.pop("_suppress_pq_warning", False)) + with _open_socket(endpoint) as sock: + payload_msg, pq_shared_secret = _with_challenge(_recv_all(sock), msg, private_key, build_auth) + sock.sendall(frame(json.dumps(payload_msg).encode("utf-8"))) + return _decode_pq_response(_recv_all(sock), pq_shared_secret) async def _send_remote_async(endpoint: str, msg: dict, private_key=None, *, warn_no_pq: bool | None = None) -> bytes | None: reader, writer = await _open_async_connection(endpoint) @@ -216,15 +57,3 @@ async def _send_remote_async(endpoint: str, msg: dict, private_key=None, *, warn await writer.wait_closed() except Exception: pass - -def _send_remote(endpoint: str, msg: dict, private_key=None, *, warn_no_pq: bool | None = None) -> bytes | None: - warn = _should_warn_no_pq(msg) if warn_no_pq is None else warn_no_pq - - def build_auth(sync_msg: dict, challenge: dict | None, nonce_hex: str | None, key): - from browser_cli.auth import pq_kex_client_encapsulate - return _build_auth_message(sync_msg, challenge, nonce_hex, key, pq_kex_client_encapsulate, warn_no_pq=warn) - - with _open_socket(endpoint) as sock: - payload_msg, pq_shared_secret = _with_challenge(_recv_all(sock), msg, private_key, build_auth) - sock.sendall(frame(json.dumps(payload_msg).encode("utf-8"))) - return _decode_pq_response(_recv_all(sock), pq_shared_secret) diff --git a/browser_cli/sdk/base.py b/browser_cli/sdk/base.py index 026cb4b..08ffe95 100644 --- a/browser_cli/sdk/base.py +++ b/browser_cli/sdk/base.py @@ -9,7 +9,7 @@ from __future__ import annotations from collections.abc import Callable from functools import wraps -from typing import Any, TypeVar +from typing import Any, TypeVar, cast F = TypeVar("F", bound=Callable) _MISSING = object() @@ -54,8 +54,8 @@ def sdk_command( return _clone_default(default) return result - wrapper._browser_cli_command = name # type: ignore[attr-defined] - return wrapper # type: ignore[return-value] + setattr(wrapper, "_browser_cli_command", name) + return cast(F, wrapper) return decorator diff --git a/browser_cli/sdk/decorators.py b/browser_cli/sdk/decorators.py index 07d206f..1b62f37 100644 --- a/browser_cli/sdk/decorators.py +++ b/browser_cli/sdk/decorators.py @@ -5,7 +5,7 @@ import asyncio import functools import inspect from collections.abc import Callable -from typing import TypeVar +from typing import TypeVar, cast from browser_cli.sdk.base import Namespace from browser_cli.sdk.workflow_decorators import WorkflowDecoratorsMixin, _NO_INJECT @@ -53,7 +53,7 @@ class DecoratorsNS(WorkflowDecoratorsMixin, Namespace): finally: if cleanup is not None: await asyncio.to_thread(cleanup, value) - return async_wrapper # type: ignore[return-value] + return cast(F, async_wrapper) return WorkflowDecoratorsMixin._value_decorator( self, fn, get_value, keyword=keyword, cleanup=cleanup ) @@ -74,7 +74,7 @@ class DecoratorsNS(WorkflowDecoratorsMixin, Namespace): finally: if previous: await asyncio.to_thread(self._c.perf.set_profile, previous) - return async_wrapper # type: ignore[return-value] + return cast(F, async_wrapper) return WorkflowDecoratorsMixin.performance_profile(self, profile, restore=restore)(fn) return decorator @@ -101,7 +101,7 @@ class DecoratorsNS(WorkflowDecoratorsMixin, Namespace): raise if delay > 0: await asyncio.sleep(delay) - raise last_error # type: ignore[misc] - return async_wrapper # type: ignore[return-value] + raise cast(BaseException, last_error) + return cast(F, async_wrapper) return WorkflowDecoratorsMixin.retry(self, times=times, delay=delay, exceptions=exceptions)(fn) return decorator diff --git a/browser_cli/sdk/workflow_decorators.py b/browser_cli/sdk/workflow_decorators.py index edbe698..71fb521 100644 --- a/browser_cli/sdk/workflow_decorators.py +++ b/browser_cli/sdk/workflow_decorators.py @@ -4,11 +4,32 @@ from __future__ import annotations import functools import time from collections.abc import Callable -from typing import TypeVar +from typing import Protocol, TypeVar, cast F = TypeVar("F", bound=Callable) _NO_INJECT = object() +class _WorkflowTabs(Protocol): + def active(self): ... + def open(self, *args, **kwargs): ... + def watch_url(self, *args, **kwargs): ... + +class _WorkflowDom(Protocol): + def wait_for(self, *args, **kwargs): ... + +class _WorkflowPerf(Protocol): + def status(self): ... + def set_profile(self, profile: str): ... + +class _WorkflowSession(Protocol): + def save(self, name: str): ... + +class _WorkflowClient(Protocol): + tabs: _WorkflowTabs + dom: _WorkflowDom + perf: _WorkflowPerf + session: _WorkflowSession + class WorkflowDecoratorsMixin: """Shared implementation for sync and async workflow decorators. @@ -17,7 +38,7 @@ class WorkflowDecoratorsMixin: in lockstep. """ - _c: object + _c: _WorkflowClient @staticmethod def _inject(kwargs: dict, keyword: str | None, value): @@ -62,7 +83,7 @@ class WorkflowDecoratorsMixin: finally: if cleanup is not None: self._run(cleanup, value) - return wrapper # type: ignore[return-value] + return cast(F, wrapper) return decorator(func) if func is not None else decorator @@ -72,7 +93,7 @@ class WorkflowDecoratorsMixin: By default the tab is injected as ``tab=...``. Pass ``keyword=None`` to pass it as the first positional argument instead. """ - return self._value_decorator(func, self._c.tabs.active, keyword=keyword) # type: ignore[attr-defined] + return self._value_decorator(func, self._c.tabs.active, keyword=keyword) def new_tab( self, @@ -93,7 +114,7 @@ class WorkflowDecoratorsMixin: wrapped function returns or raises. """ def open_tab(): - return self._c.tabs.open( # type: ignore[attr-defined] + return self._c.tabs.open( url, wait=wait, timeout=timeout, @@ -124,7 +145,7 @@ class WorkflowDecoratorsMixin: the wrapped function. By default the result is not injected. """ def wait(): - return self._c.dom.wait_for( # type: ignore[attr-defined] + return self._c.dom.wait_for( selector, timeout=timeout, visible=visible, @@ -145,7 +166,7 @@ class WorkflowDecoratorsMixin: ): """Wait until a tab URL matches *pattern* before calling the function.""" def wait(): - return self._c.tabs.watch_url(pattern, tab_id=tab_id, timeout=timeout) # type: ignore[attr-defined] + return self._c.tabs.watch_url(pattern, tab_id=tab_id, timeout=timeout) inject = keyword if keyword is not None else _NO_INJECT return self._value_decorator(None, wait, keyword=inject) @@ -157,19 +178,19 @@ class WorkflowDecoratorsMixin: def wrapper(*args, **kwargs): previous = None if restore: - previous = self._run(self._c.perf.status).get("performanceProfile") # type: ignore[attr-defined] - self._run(self._c.perf.set_profile, profile) # type: ignore[attr-defined] + previous = self._run(self._c.perf.status).get("performanceProfile") + self._run(self._c.perf.set_profile, profile) try: return self._call_wrapped(fn, *args, **kwargs) finally: if previous: - self._run(self._c.perf.set_profile, previous) # type: ignore[attr-defined] - return wrapper # type: ignore[return-value] + self._run(self._c.perf.set_profile, previous) + return cast(F, wrapper) return decorator def save_session_before(self, name: str): """Save the current browser session before running the function.""" - return self._value_decorator(None, lambda: self._c.session.save(name), keyword=_NO_INJECT) # type: ignore[attr-defined] + return self._value_decorator(None, lambda: self._c.session.save(name), keyword=_NO_INJECT) def retry( self, @@ -194,7 +215,7 @@ class WorkflowDecoratorsMixin: raise if delay > 0: self._sleep(delay) - raise last_error # type: ignore[misc] - return wrapper # type: ignore[return-value] + raise cast(BaseException, last_error) + return cast(F, wrapper) return decorator diff --git a/browser_cli/serve/challenge.py b/browser_cli/serve/challenge.py new file mode 100644 index 0000000..70e787a --- /dev/null +++ b/browser_cli/serve/challenge.py @@ -0,0 +1,31 @@ +"""Challenge-frame helpers for ``browser-cli serve``.""" +from __future__ import annotations + +import asyncio +import secrets +from pathlib import Path + +from browser_cli.version_manager import PROTOCOL_MIN_CLIENT, get_installed_version + +async def load_auth_keys(auth_keys_path: Path | None) -> list[str] | None: + if auth_keys_path is None: + return None + from browser_cli.auth import load_authorized_keys + return await asyncio.to_thread(load_authorized_keys, auth_keys_path) + +async def build_challenge(auth_keys_path: Path | None) -> tuple[str, object | None, dict]: + nonce = secrets.token_hex(32) + pq_private_key = None + challenge_msg = { + "type": "challenge", + "nonce": nonce, + "server_version": get_installed_version(), + "min_client_version": PROTOCOL_MIN_CLIENT, + } + if auth_keys_path is not None: + from browser_cli.auth import PQ_KEX_ALG, pq_kex_server_keypair + pq_keypair = await asyncio.to_thread(pq_kex_server_keypair) + if pq_keypair is not None: + pq_private_key, pq_public_key = pq_keypair + challenge_msg["pq_kex"] = {"alg": PQ_KEX_ALG, "public_key": pq_public_key.hex()} + return nonce, pq_private_key, challenge_msg diff --git a/browser_cli/serve/runtime.py b/browser_cli/serve/runtime.py index e257cad..2331a8d 100644 --- a/browser_cli/serve/runtime.py +++ b/browser_cli/serve/runtime.py @@ -8,7 +8,6 @@ from __future__ import annotations import asyncio import json -import secrets import socket from dataclasses import dataclass from pathlib import Path @@ -17,10 +16,10 @@ from browser_cli import transport from browser_cli.compat import adapt_auth from browser_cli.framing import async_recv_frame, async_send_frame from browser_cli.serve.auth import ServeAuthMixin +from browser_cli.serve.challenge import build_challenge as _build_challenge, load_auth_keys as _load_auth_keys from browser_cli.serve.control import ServeControlMixin from browser_cli.serve.logging import console, log_request from browser_cli.serve.proxy import ServeProxyMixin -from browser_cli.version_manager import PROTOCOL_MIN_CLIENT, get_installed_version async def _async_framed_send(writer: asyncio.StreamWriter, data: bytes) -> None: await async_send_frame(writer, data) @@ -140,29 +139,6 @@ async def _async_handle_client( except Exception: pass -async def _load_auth_keys(auth_keys_path: Path | None) -> list[str] | None: - if auth_keys_path is None: - return None - from browser_cli.auth import load_authorized_keys - return await asyncio.to_thread(load_authorized_keys, auth_keys_path) - -async def _build_challenge(auth_keys_path: Path | None) -> tuple[str, object | None, dict]: - nonce = secrets.token_hex(32) - pq_private_key = None - challenge_msg = { - "type": "challenge", - "nonce": nonce, - "server_version": get_installed_version(), - "min_client_version": PROTOCOL_MIN_CLIENT, - } - if auth_keys_path is not None: - from browser_cli.auth import PQ_KEX_ALG, pq_kex_server_keypair - pq_keypair = await asyncio.to_thread(pq_kex_server_keypair) - if pq_keypair is not None: - pq_private_key, pq_public_key = pq_keypair - challenge_msg["pq_kex"] = {"alg": PQ_KEX_ALG, "public_key": pq_public_key.hex()} - return nonce, pq_private_key, challenge_msg - def _handle_client( client_sock: socket.socket, addr: tuple, diff --git a/browser_cli/transport.py b/browser_cli/transport.py deleted file mode 100644 index c919f1a..0000000 --- a/browser_cli/transport.py +++ /dev/null @@ -1,214 +0,0 @@ -"""Response payload encoding for the TCP serve <-> client leg. - -The wire frame stays ``4-byte LE length + payload``. The payload is made -self-describing so old peers keep working unchanged: - - * A payload that starts with ``{`` or ``[`` is plain JSON (the historical - format). Old clients and old servers only ever produce/consume this. - * Any other leading byte is a 1-byte codec tag followed by the encoded body. - The tag's high nibble selects serialization, the low nibble compression:: - - tag = (serialization << 4) | compression - -This is only ever emitted toward a peer that advertised support for it, so it -is fully backward compatible: clients announce what they can decode via the -``accept_encoding`` field in their request, and the server encodes the -response accordingly. Requests themselves stay plain JSON (they are tiny). - -Compression is the big win — response payloads (``extract.html``, -``dom.query``, ``tabs.list`` over hundreds of tabs, base64 screenshots) are -heavy and text-like. msgpack additionally lets ``tabs.screenshot`` ship the -image as raw bytes instead of a base64 data URL (~33% smaller before -compression); the client transparently rebuilds the data URL so the SDK/CLI -API is unchanged. -""" -from __future__ import annotations - -import base64 -import gzip -import json -import re -import zlib - -from browser_cli.constants import ( - COMP_GZIP, - COMP_NONE, - COMP_ZLIB, - COMP_ZSTD, - DEFAULT_TRANSPORT_THRESHOLD, - SER_JSON, - SER_MSGPACK, -) - -try: # optional: better ratio + speed than zlib/gzip - import zstandard as _zstd -except Exception: # pragma: no cover - depends on optional extra - _zstd = None - -try: # optional: alternate serialization + raw binary for screenshots - import msgpack as _msgpack -except Exception: # pragma: no cover - depends on optional extra - _msgpack = None - -# ── codec ids ──────────────────────────────────────────────────────────────── -_SER_NAME = {SER_JSON: "json", SER_MSGPACK: "msgpack"} -_SER_ID = {v: k for k, v in _SER_NAME.items()} -_COMP_NAME = {COMP_NONE: "none", COMP_ZLIB: "zlib", COMP_GZIP: "gzip", COMP_ZSTD: "zstd"} -_COMP_ID = {v: k for k, v in _COMP_NAME.items()} - -# Don't compress payloads smaller than this — the header/CPU cost is not worth it. - -# JSON top-level values always start with one of these bytes; a tag byte never does. -_JSON_FIRST_BYTES = frozenset(b"{[") - -def msgpack_available() -> bool: - return _msgpack is not None - -def zstd_available() -> bool: - return _zstd is not None - -def supported_serialization() -> list[str]: - """Serializations this build can produce/consume, best first.""" - return (["msgpack"] if _msgpack is not None else []) + ["json"] - -def supported_compression() -> list[str]: - """Compression codecs this build can produce/consume, best first.""" - return (["zstd"] if _zstd is not None else []) + ["gzip", "zlib"] - -def client_accept_encoding() -> dict: - """What the local client advertises it can decode (sent with each request).""" - return {"ser": supported_serialization(), "comp": supported_compression()} - -# ── compression primitives ──────────────────────────────────────────────────── - -def _compress(comp_id: int, data: bytes) -> bytes: - if comp_id == COMP_NONE: - return data - if comp_id == COMP_ZLIB: - return zlib.compress(data, 6) - if comp_id == COMP_GZIP: - return gzip.compress(data, compresslevel=6) - if comp_id == COMP_ZSTD: - if _zstd is None: - raise ValueError("zstd compression requested but zstandard is not installed") - return _zstd.ZstdCompressor(level=10).compress(data) - raise ValueError(f"unknown compression id {comp_id}") - -def _decompress(comp_id: int, data: bytes) -> bytes: - if comp_id == COMP_NONE: - return data - if comp_id == COMP_ZLIB: - return zlib.decompress(data) - if comp_id == COMP_GZIP: - return gzip.decompress(data) - if comp_id == COMP_ZSTD: - if _zstd is None: - raise ValueError("zstd payload received but zstandard is not installed") - return _zstd.ZstdDecompressor().decompress(data) - raise ValueError(f"unknown compression id {comp_id}") - -# ── codec negotiation ────────────────────────────────────────────────────────── - -def _choose(accept: dict | None) -> tuple[int, int]: - """Pick (serialization_id, compression_id) the peer accepts, server preference first.""" - accept = accept if isinstance(accept, dict) else {} - accept_ser = accept.get("ser") or ["json"] - accept_comp = accept.get("comp") or [] - - ser = SER_JSON - if _msgpack is not None and "msgpack" in accept_ser: - ser = SER_MSGPACK - - comp = COMP_NONE - for name in supported_compression(): # server preference: zstd > gzip > zlib - if name in accept_comp: - comp = _COMP_ID[name] - break - return ser, comp - -# ── raw-binary hoisting (screenshots) ────────────────────────────────────────── - -_DATA_URL_RE = re.compile(r"^data:([^;,]+);base64,(.+)$", re.S) -_B64_MARKER = "__b64__" - -def _hoist_screenshot(obj, command: str | None): - """Replace a screenshot data URL with raw bytes so msgpack ships it unencoded. - - Gated to ``tabs.screenshot`` so we never touch arbitrary page-derived data. - """ - if command != "tabs.screenshot" or not isinstance(obj, dict): - return obj - data = obj.get("data") - if not isinstance(data, dict): - return obj - url = data.get("dataUrl") - if not isinstance(url, str): - return obj - m = _DATA_URL_RE.match(url) - if not m: - return obj - try: - raw = base64.b64decode(m.group(2)) - except Exception: - return obj - new_data = dict(data) - new_data["dataUrl"] = {_B64_MARKER: True, "mime": m.group(1), "raw": raw} - return {**obj, "data": new_data} - -def _unhoist_binary(obj): - """Rebuild any hoisted data URL so callers see the original string again.""" - if isinstance(obj, dict): - raw = obj.get("raw") - if obj.get(_B64_MARKER) and isinstance(raw, (bytes, bytearray)): - mime = obj.get("mime") or "application/octet-stream" - return f"data:{mime};base64," + base64.b64encode(bytes(raw)).decode("ascii") - return {k: _unhoist_binary(v) for k, v in obj.items()} - if isinstance(obj, list): - return [_unhoist_binary(v) for v in obj] - return obj - -# ── encode / decode ───────────────────────────────────────────────────────────── - -def encode_response(obj, accept: dict | None = None, command: str | None = None, - threshold: int = DEFAULT_TRANSPORT_THRESHOLD) -> bytes: - """Encode a response object for the chosen/accepted codec. - - Returns bare JSON bytes when no encoding is negotiated, which is byte-for-byte - what an old server would have sent. - """ - ser, comp = _choose(accept) - - if ser == SER_MSGPACK: - body = _msgpack.packb(_hoist_screenshot(obj, command), use_bin_type=True) - else: - body = json.dumps(obj).encode("utf-8") - - if comp != COMP_NONE and len(body) >= threshold: - body = _compress(comp, body) - else: - comp = COMP_NONE - - if ser == SER_JSON and comp == COMP_NONE: - return body # plain JSON — historical wire format, no tag byte - - return bytes([(ser << 4) | comp]) + body - -def decode_response(raw: bytes | None): - """Decode a payload produced by :func:`encode_response` (or plain JSON).""" - if raw is None: - return None - if not raw: - raise ValueError("empty response payload") - if raw[0] in _JSON_FIRST_BYTES: - return json.loads(raw) - - tag = raw[0] - ser, comp = tag >> 4, tag & 0x0F - body = _decompress(comp, raw[1:]) - if ser == SER_MSGPACK: - if _msgpack is None: - raise ValueError("msgpack payload received but msgpack is not installed") - return _unhoist_binary(_msgpack.unpackb(body, raw=False)) - if ser == SER_JSON: - return json.loads(body) - raise ValueError(f"unknown serialization id {ser}") diff --git a/browser_cli/transport/__init__.py b/browser_cli/transport/__init__.py new file mode 100644 index 0000000..3a7aab1 --- /dev/null +++ b/browser_cli/transport/__init__.py @@ -0,0 +1,72 @@ +"""Response payload encoding for the TCP serve <-> client leg. + +The wire frame stays ``4-byte LE length + payload``. Payloads are plain JSON +for legacy peers, or a 1-byte codec tag followed by serialized/compressed data +when the peer advertised support for it. +""" +from __future__ import annotations + +import json + +from browser_cli.constants import COMP_GZIP, COMP_NONE, COMP_ZLIB, COMP_ZSTD, DEFAULT_TRANSPORT_THRESHOLD, SER_JSON, SER_MSGPACK +from browser_cli.transport.binary import hoist_screenshot as _hoist_screenshot, unhoist_binary as _unhoist_binary +from browser_cli.transport.codecs import ( + JSON_FIRST_BYTES as _JSON_FIRST_BYTES, + _msgpack, + choose_codec as _choose, + client_accept_encoding, + compress_payload as _compress, + decompress_payload as _decompress, + msgpack_available, + supported_compression, + supported_serialization, + zstd_available, +) + +def encode_response( + obj, + accept: dict | None = None, + command: str | None = None, + threshold: int = DEFAULT_TRANSPORT_THRESHOLD, +) -> bytes: + """Encode a response object for the chosen/accepted codec. + + Returns bare JSON bytes when no encoding is negotiated, which is byte-for-byte + what an old server would have sent. + """ + ser, comp = _choose(accept) + + if ser == SER_MSGPACK: + body = _msgpack.packb(_hoist_screenshot(obj, command), use_bin_type=True) + else: + body = json.dumps(obj).encode("utf-8") + + if comp != COMP_NONE and len(body) >= threshold: + body = _compress(comp, body) + else: + comp = COMP_NONE + + if ser == SER_JSON and comp == COMP_NONE: + return body # plain JSON — historical wire format, no tag byte + + return bytes([(ser << 4) | comp]) + body + +def decode_response(raw: bytes | None): + """Decode a payload produced by :func:`encode_response` (or plain JSON).""" + if raw is None: + return None + if not raw: + raise ValueError("empty response payload") + if raw[0] in _JSON_FIRST_BYTES: + return json.loads(raw) + + tag = raw[0] + ser, comp = tag >> 4, tag & 0x0F + body = _decompress(comp, raw[1:]) + if ser == SER_MSGPACK: + if _msgpack is None: + raise ValueError("msgpack payload received but msgpack is not installed") + return _unhoist_binary(_msgpack.unpackb(body, raw=False)) + if ser == SER_JSON: + return json.loads(body) + raise ValueError(f"unknown serialization id {ser}") diff --git a/browser_cli/transport/binary.py b/browser_cli/transport/binary.py new file mode 100644 index 0000000..8abe692 --- /dev/null +++ b/browser_cli/transport/binary.py @@ -0,0 +1,44 @@ +"""Raw-binary hoisting helpers for encoded response payloads.""" +from __future__ import annotations + +import base64 +import re + +DATA_URL_RE = re.compile(r"^data:([^;,]+);base64,(.+)$", re.S) +B64_MARKER = "__b64__" + +def hoist_screenshot(obj, command: str | None): + """Replace a screenshot data URL with raw bytes so msgpack ships it unencoded. + + Gated to ``tabs.screenshot`` so arbitrary page-derived data is never touched. + """ + if command != "tabs.screenshot" or not isinstance(obj, dict): + return obj + data = obj.get("data") + if not isinstance(data, dict): + return obj + url = data.get("dataUrl") + if not isinstance(url, str): + return obj + match = DATA_URL_RE.match(url) + if not match: + return obj + try: + raw = base64.b64decode(match.group(2)) + except Exception: + return obj + new_data = dict(data) + new_data["dataUrl"] = {B64_MARKER: True, "mime": match.group(1), "raw": raw} + return {**obj, "data": new_data} + +def unhoist_binary(obj): + """Rebuild any hoisted data URL so callers see the original string again.""" + if isinstance(obj, dict): + raw = obj.get("raw") + if obj.get(B64_MARKER) and isinstance(raw, (bytes, bytearray)): + mime = obj.get("mime") or "application/octet-stream" + return f"data:{mime};base64," + base64.b64encode(bytes(raw)).decode("ascii") + return {key: unhoist_binary(value) for key, value in obj.items()} + if isinstance(obj, list): + return [unhoist_binary(value) for value in obj] + return obj diff --git a/browser_cli/transport/codecs.py b/browser_cli/transport/codecs.py new file mode 100644 index 0000000..bf88b61 --- /dev/null +++ b/browser_cli/transport/codecs.py @@ -0,0 +1,84 @@ +"""Serialization/compression primitives for TCP response payloads.""" +from __future__ import annotations + +import gzip +import zlib + +from browser_cli.constants import COMP_GZIP, COMP_NONE, COMP_ZLIB, COMP_ZSTD, SER_JSON, SER_MSGPACK + +try: # optional: better ratio + speed than zlib/gzip + import zstandard as _zstd +except Exception: # pragma: no cover - depends on optional extra + _zstd = None + +try: # optional: alternate serialization + raw binary for screenshots + import msgpack as _msgpack +except Exception: # pragma: no cover - depends on optional extra + _msgpack = None + +SERIALIZATION_NAME = {SER_JSON: "json", SER_MSGPACK: "msgpack"} +SERIALIZATION_ID = {value: key for key, value in SERIALIZATION_NAME.items()} +COMPRESSION_NAME = {COMP_NONE: "none", COMP_ZLIB: "zlib", COMP_GZIP: "gzip", COMP_ZSTD: "zstd"} +COMPRESSION_ID = {value: key for key, value in COMPRESSION_NAME.items()} +JSON_FIRST_BYTES = frozenset(b"{[") + +def msgpack_available() -> bool: + return _msgpack is not None + +def zstd_available() -> bool: + return _zstd is not None + +def supported_serialization() -> list[str]: + """Serializations this build can produce/consume, best first.""" + return (["msgpack"] if _msgpack is not None else []) + ["json"] + +def supported_compression() -> list[str]: + """Compression codecs this build can produce/consume, best first.""" + return (["zstd"] if _zstd is not None else []) + ["gzip", "zlib"] + +def client_accept_encoding() -> dict: + """What the local client advertises it can decode (sent with each request).""" + return {"ser": supported_serialization(), "comp": supported_compression()} + +def compress_payload(comp_id: int, data: bytes) -> bytes: + if comp_id == COMP_NONE: + return data + if comp_id == COMP_ZLIB: + return zlib.compress(data, 6) + if comp_id == COMP_GZIP: + return gzip.compress(data, compresslevel=6) + if comp_id == COMP_ZSTD: + if _zstd is None: + raise ValueError("zstd compression requested but zstandard is not installed") + return _zstd.ZstdCompressor(level=10).compress(data) + raise ValueError(f"unknown compression id {comp_id}") + +def decompress_payload(comp_id: int, data: bytes) -> bytes: + if comp_id == COMP_NONE: + return data + if comp_id == COMP_ZLIB: + return zlib.decompress(data) + if comp_id == COMP_GZIP: + return gzip.decompress(data) + if comp_id == COMP_ZSTD: + if _zstd is None: + raise ValueError("zstd payload received but zstandard is not installed") + return _zstd.ZstdDecompressor().decompress(data) + raise ValueError(f"unknown compression id {comp_id}") + +def choose_codec(accept: dict | None) -> tuple[int, int]: + """Pick (serialization_id, compression_id) the peer accepts, server preference first.""" + accept = accept if isinstance(accept, dict) else {} + accept_ser = accept.get("ser") or ["json"] + accept_comp = accept.get("comp") or [] + + serialization = SER_JSON + if _msgpack is not None and "msgpack" in accept_ser: + serialization = SER_MSGPACK + + compression = COMP_NONE + for name in supported_compression(): # server preference: zstd > gzip > zlib + if name in accept_comp: + compression = COMPRESSION_ID[name] + break + return serialization, compression diff --git a/extension/manifest.json b/extension/manifest.json index 6448cf7..f748d32 100644 --- a/extension/manifest.json +++ b/extension/manifest.json @@ -1,7 +1,7 @@ { "manifest_version": 3, "name": "browser-cli", - "version": "0.15.2", + "version": "0.15.3", "description": "Control your browser from the terminal or Python SDK", "browser_specific_settings": { "gecko": { diff --git a/extension/src/content/markdown.ts b/extension/src/content/markdown.ts deleted file mode 100644 index b843c57..0000000 --- a/extension/src/content/markdown.ts +++ /dev/null @@ -1,404 +0,0 @@ -import type { ContentArgs } from '../types'; - -export function extractMarkdown({ selector }: ContentArgs) { - const BLOCKS = new Set([ - "article", "aside", "blockquote", "body", "div", "dl", "fieldset", "figcaption", - "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", - "li", "main", "nav", "ol", "p", "pre", "section", "table", "tbody", "td", "tfoot", - "th", "thead", "tr", "ul" - ]); - const NOISE_SELECTOR = [ - "script", - "style", - "noscript", - "template", - "svg", - "canvas", - "iframe", - "dialog", - "button", - "input", - "textarea", - "select", - "option", - "form", - "[hidden]", - "[aria-hidden='true']", - ".sr-only", - "[class*='sr-only']", - "[class*='file-tile']", - "form[data-type='unified-composer']", - ".composer-btn", - "[data-composer-surface='true']", - "#thread-bottom-container", - "[data-testid*='action-button']", - ].join(", "); - - function normalizeText(value: string) { - return value.replace(/\s+/g, " ").trim(); - } - - function normalizeInline(value: string) { - return value - .replace(/[ \t]+\n/g, "\n") - .replace(/\n[ \t]+/g, "\n") - .replace(/\n{3,}/g, "\n\n") - .replace(/[ \t]{2,}/g, " ") - .trim(); - } - - function collapseBlankLines(value: string) { - return value - .replace(/[ \t]+\n/g, "\n") - .replace(/\n{3,}/g, "\n\n") - .trim(); - } - - function escapeMarkdown(text: string) { - return text.replace(/([\\`[\]])/g, "\\$1"); - } - - function escapeTableCell(text: string) { - return text.replace(/\|/g, "\\|").replace(/\n+/g, " ").trim(); - } - - function absoluteUrl(attr: string | null | undefined, fallback?: string) { - return attr || fallback || ""; - } - - function isNoiseElement(node: Node | null): boolean { - if (!node || node.nodeType !== Node.ELEMENT_NODE) return false; - const el = node as Element; - const tag = el.tagName.toLowerCase(); - if (["script", "style", "noscript", "template", "svg", "canvas", "iframe", "dialog"].includes(tag)) return true; - if (["button", "input", "textarea", "select", "option", "form"].includes(tag)) return true; - if (el.hasAttribute("hidden")) return true; - if ((el.getAttribute("aria-hidden") || "").toLowerCase() === "true") return true; - if (el.matches(".sr-only, [class*='sr-only']")) return true; - if (el.matches("[class*='file-tile'], form[data-type='unified-composer'], .composer-btn, [data-composer-surface='true'], #thread-bottom-container")) return true; - if (el.matches("[data-testid*='action-button']")) return true; - return false; - } - - function stripNoise(root: Element): Element { - const clone = root.cloneNode(true) as Element; - clone.querySelectorAll(NOISE_SELECTOR).forEach(node => node.remove()); - return clone; - } - - function candidateScore(node: Element) { - const text = normalizeText((node as HTMLElement).innerText || ""); - if (!text) return -Infinity; - - const headings = node.querySelectorAll("h1, h2, h3, h4, h5, h6").length; - const paragraphs = node.querySelectorAll("p").length; - const listItems = node.querySelectorAll("li").length; - const tables = node.querySelectorAll("table").length; - const codeBlocks = node.querySelectorAll("pre, code").length; - const images = node.querySelectorAll("img, figure").length; - const mainLike = node.matches("main, article, [role='main']") ? 1 : 0; - const proseBlocks = node.matches(".markdown, .prose, [data-message-author-role='assistant']") ? 1 : 0; - const buttons = node.querySelectorAll("button, input, textarea, select").length; - const forms = node.querySelectorAll("form").length; - const svgs = node.querySelectorAll("svg, canvas").length; - - return text.length - + (mainLike * 4000) - + (proseBlocks * 5000) - + (headings * 250) - + (paragraphs * 60) - + (listItems * 35) - + (tables * 80) - + (codeBlocks * 60) - + (images * 25) - - (buttons * 120) - - (forms * 200) - - (svgs * 40); - } - - function pickRoot() { - if (selector) { - const matched = document.querySelector(selector); - if (!matched) throw new Error(`No element: ${selector}`); - return matched; - } - - const candidates = Array.from(document.querySelectorAll( - "main, article, [role='main'], section, .markdown, .prose, [data-message-author-role]" - )) - .filter(node => normalizeText((node as HTMLElement).innerText || "").length > 0); - if (!candidates.length) return document.body; - candidates.sort((a, b) => candidateScore(b) - candidateScore(a)); - return candidates[0]; - } - - function inlineText(node: Node): string { - if (node.nodeType === Node.TEXT_NODE) { - return escapeMarkdown(node.textContent || ""); - } - if (node.nodeType !== Node.ELEMENT_NODE) return ""; - if (isNoiseElement(node)) return ""; - - const el = node as HTMLElement; - const tag = el.tagName.toLowerCase(); - if (tag === "br") return "\n"; - if (tag === "img") { - const img = el as HTMLImageElement; - const src = absoluteUrl(img.getAttribute("src"), img.src); - if (!src) return ""; - const alt = normalizeText(img.getAttribute("alt") || ""); - return alt ? `![${escapeMarkdown(alt)}](${src})` : `![](${src})`; - } - if (tag === "a") { - const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join("")); - const href = absoluteUrl(el.getAttribute("href"), (el as HTMLAnchorElement).href); - if (!href) return text; - return `[${text || href}](${href})`; - } - if (tag === "code") { - const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join("")); - return text ? `\`${text.replace(/`/g, "\\`")}\`` : ""; - } - if (tag === "strong" || tag === "b") { - const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join("")); - return text ? `**${text}**` : ""; - } - if (tag === "em" || tag === "i") { - const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join("")); - return text ? `*${text}*` : ""; - } - - const chunks: string[] = []; - for (const child of el.childNodes) { - const rendered = inlineText(child); - if (!rendered) continue; - chunks.push(rendered); - if (child.nodeType === Node.ELEMENT_NODE && BLOCKS.has((child as Element).tagName.toLowerCase())) { - chunks.push("\n"); - } - } - return chunks.join(""); - } - - function textBlock(node: Node): string { - return collapseBlankLines(normalizeInline(Array.from(node.childNodes).map(inlineText).join(""))); - } - - function preserveNodeText(node: Node): string { - if (node.nodeType === Node.TEXT_NODE) { - return node.textContent || ""; - } - if (node.nodeType !== Node.ELEMENT_NODE) return ""; - - const el = node as HTMLElement; - const tag = el.tagName.toLowerCase(); - if (tag === "br") return "\n"; - - const parts: string[] = []; - for (const child of el.childNodes) { - const rendered = preserveNodeText(child); - if (!rendered) continue; - parts.push(rendered); - } - - if (["div", "p", "li"].includes(tag)) { - return `${parts.join("")}\n`; - } - return parts.join(""); - } - - function repairFlattenedDiagram(text: string): string { - if (text.includes("\n")) return text; - const markerCount = (text.match(/[│▼├└]/g) || []).length; - if (markerCount < 2) return text; - - let repaired = text; - repaired = repaired.replace(/\s{2,}([│▼])/g, "\n $1"); - repaired = repaired.replace(/([│▼])\s{2,}/g, "$1\n"); - repaired = repaired.replace(/([│▼])(?=[^\s\n│▼├└])/g, "$1\n"); - repaired = repaired.replace(/(?<=[^\s\n])([├└])/g, "\n$1"); - repaired = repaired.replace(/([^\s\n])(\()/g, "$1\n$2"); - return repaired - .split("\n") - .map(line => line.replace(/\s+$/, "")) - .filter(line => line.trim()) - .join("\n"); - } - - function convertDashListsToBranches(lines: string[]): string[] { - const converted: string[] = []; - let index = 0; - while (index < lines.length) { - const match = lines[index].match(/^(\s*)-\s+(.*)$/); - if (!match) { - converted.push(lines[index]); - index += 1; - continue; - } - - const indent = match[1]; - const items = []; - while (index < lines.length) { - const nextMatch = lines[index].match(new RegExp(`^${indent.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}-\\s+(.*)$`)); - if (!nextMatch) break; - items.push(nextMatch[1]); - index += 1; - } - - items.forEach((item, itemIndex) => { - const branch = itemIndex === items.length - 1 ? "└" : "├"; - converted.push(`${indent}${branch} ${item}`); - }); - } - return converted; - } - - function normalizeCodeBlock(text: string): string { - let lines = text.replace(/\r\n?/g, "\n").split("\n").map(line => line.replace(/\s+$/, "")); - while (lines.length && !lines[0].trim()) lines.shift(); - while (lines.length && !lines[lines.length - 1].trim()) lines.pop(); - - const flattened = repairFlattenedDiagram(lines.join("\n")); - lines = flattened ? flattened.split("\n") : []; - lines = lines.map(line => { - const trimmed = line.trim(); - if ((trimmed === "│" || trimmed === "▼") && !/^\s+[│▼]\s*$/.test(line)) { - return ` ${trimmed}`; - } - return line; - }); - lines = convertDashListsToBranches(lines); - return lines.join("\n"); - } - - function tableToMarkdown(table: Element) { - const rows = Array.from(table.querySelectorAll("tr")) - .map(row => Array.from(row.children) - .filter(cell => cell.tagName === "TD" || cell.tagName === "TH") - .map(cell => escapeTableCell(textBlock(cell))) - ) - .filter(cells => cells.length > 0); - if (!rows.length) return ""; - - const widths = rows.reduce((max, row) => Math.max(max, row.length), 0); - const normalizedRows = rows.map(row => { - const next = row.slice(); - while (next.length < widths) next.push(""); - return next; - }); - - let headers = normalizedRows[0]; - let bodyRows = normalizedRows.slice(1); - const firstRowIsBlank = headers.every(cell => !cell.trim()); - if (firstRowIsBlank && normalizedRows.length > 1) { - headers = normalizedRows[1]; - bodyRows = normalizedRows.slice(2); - } - - const firstRow = table.querySelector("tr"); - const thead = table.querySelector("thead"); - const firstRowHasTh = firstRow && Array.from(firstRow.children).some(cell => cell.tagName === "TH"); - if (!(thead || firstRowHasTh || firstRowIsBlank)) { - headers = new Array(widths).fill(""); - bodyRows = normalizedRows; - } - - const separator = new Array(widths).fill("---"); - const lines = [ - `| ${headers.join(" | ")} |`, - `| ${separator.join(" | ")} |`, - ]; - for (const row of bodyRows) { - lines.push(`| ${row.join(" | ")} |`); - } - return lines.join("\n"); - } - - function listToMarkdown(list: Element, depth = 0): string { - const ordered = list.tagName.toLowerCase() === "ol"; - const items: string[] = []; - const children = Array.from(list.children).filter(child => child.tagName === "LI"); - children.forEach((item, index) => { - const marker = ordered ? `${index + 1}. ` : "- "; - const indent = " ".repeat(depth); - const nested: string[] = []; - const content: string[] = []; - - for (const child of item.childNodes) { - const childEl = child as Element; - if (child.nodeType === Node.ELEMENT_NODE && (childEl.tagName === "UL" || childEl.tagName === "OL")) { - nested.push(listToMarkdown(childEl, depth + 1)); - } else { - content.push(inlineText(child)); - } - } - - const line = collapseBlankLines(normalizeInline(content.join(""))); - if (line) { - const lineParts = line.split("\n"); - items.push(`${indent}${marker}${lineParts[0]}`); - const continuationIndent = `${indent}${" ".repeat(marker.length)}`; - lineParts.slice(1).forEach(part => items.push(`${continuationIndent}${part}`)); - } - nested.filter(Boolean).forEach(block => items.push(block)); - }); - return items.join("\n"); - } - - function blockToMarkdown(node: Node): string { - if (node.nodeType === Node.TEXT_NODE) { - return normalizeText(node.textContent || ""); - } - if (node.nodeType !== Node.ELEMENT_NODE) return ""; - if (isNoiseElement(node)) return ""; - - const el = node as HTMLElement; - const tag = el.tagName.toLowerCase(); - if (tag === "table") return tableToMarkdown(el); - if (tag === "ul" || tag === "ol") return listToMarkdown(el); - if (el.matches(".cm-editor[data-is-code-block-view='true']")) { - const lines = Array.from(el.querySelectorAll(".cm-line")).map(line => { - const text = preserveNodeText(line); - return text === "\n" ? "" : text.replace(/\n$/, ""); - }); - const code = normalizeCodeBlock(lines.join("\n")); - return code ? `\`\`\`\n${code}\n\`\`\`` : ""; - } - if (tag === "pre") { - const code = normalizeCodeBlock(preserveNodeText(el)); - return code ? `\`\`\`\n${code}\n\`\`\`` : ""; - } - if (tag === "blockquote") { - const content = collapseBlankLines(Array.from(el.childNodes).map(blockToMarkdown).join("\n\n")); - return content - .split("\n") - .map(line => line ? `> ${line}` : ">") - .join("\n"); - } - if (/^h[1-6]$/.test(tag)) { - const level = Number(tag.slice(1)); - const text = textBlock(el); - return text ? `${"#".repeat(level)} ${text}` : ""; - } - if (tag === "p" || tag === "figcaption") { - return textBlock(el); - } - if (tag === "hr") { - return "---"; - } - if (tag === "img") { - return inlineText(el); - } - - const childBlocks = Array.from(el.childNodes) - .map(child => blockToMarkdown(child)) - .filter(Boolean); - if (childBlocks.length) return collapseBlankLines(childBlocks.join("\n\n")); - - return textBlock(node); - } - - const root = stripNoise(pickRoot()); - const markdown = blockToMarkdown(root); - return collapseBlankLines(markdown); -} diff --git a/extension/src/content/markdown/code.ts b/extension/src/content/markdown/code.ts new file mode 100644 index 0000000..f1a108d --- /dev/null +++ b/extension/src/content/markdown/code.ts @@ -0,0 +1,63 @@ +function repairFlattenedDiagram(text: string): string { + if (text.includes("\n")) return text; + const markerCount = (text.match(/[│▼├└]/g) || []).length; + if (markerCount < 2) return text; + + let repaired = text; + repaired = repaired.replace(/\s{2,}([│▼])/g, "\n $1"); + repaired = repaired.replace(/([│▼])\s{2,}/g, "$1\n"); + repaired = repaired.replace(/([│▼])(?=[^\s\n│▼├└])/g, "$1\n"); + repaired = repaired.replace(/(?<=[^\s\n])([├└])/g, "\n$1"); + repaired = repaired.replace(/([^\s\n])(\()/g, "$1\n$2"); + return repaired + .split("\n") + .map(line => line.replace(/\s+$/, "")) + .filter(line => line.trim()) + .join("\n"); +} + +function convertDashListsToBranches(lines: string[]): string[] { + const converted: string[] = []; + let index = 0; + while (index < lines.length) { + const match = lines[index].match(/^(\s*)-\s+(.*)$/); + if (!match) { + converted.push(lines[index]); + index += 1; + continue; + } + + const indent = match[1]; + const items = []; + while (index < lines.length) { + const nextMatch = lines[index].match(new RegExp(`^${indent.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}-\\s+(.*)$`)); + if (!nextMatch) break; + items.push(nextMatch[1]); + index += 1; + } + + items.forEach((item, itemIndex) => { + const branch = itemIndex === items.length - 1 ? "└" : "├"; + converted.push(`${indent}${branch} ${item}`); + }); + } + return converted; +} + +export function normalizeCodeBlock(text: string): string { + let lines = text.replace(/\r\n?/g, "\n").split("\n").map(line => line.replace(/\s+$/, "")); + while (lines.length && !lines[0].trim()) lines.shift(); + while (lines.length && !lines[lines.length - 1].trim()) lines.pop(); + + const flattened = repairFlattenedDiagram(lines.join("\n")); + lines = flattened ? flattened.split("\n") : []; + lines = lines.map(line => { + const trimmed = line.trim(); + if ((trimmed === "│" || trimmed === "▼") && !/^\s+[│▼]\s*$/.test(line)) { + return ` ${trimmed}`; + } + return line; + }); + lines = convertDashListsToBranches(lines); + return lines.join("\n"); +} diff --git a/extension/src/content/markdown/index.ts b/extension/src/content/markdown/index.ts new file mode 100644 index 0000000..de790b0 --- /dev/null +++ b/extension/src/content/markdown/index.ts @@ -0,0 +1,9 @@ +import type { ContentArgs } from '../../types'; +import { pickMarkdownRoot } from './root'; +import { renderMarkdown } from './renderer'; +import { stripNoise } from './utils'; + +export function extractMarkdown({ selector }: ContentArgs) { + const root = stripNoise(pickMarkdownRoot(selector)); + return renderMarkdown(root); +} diff --git a/extension/src/content/markdown/renderer.ts b/extension/src/content/markdown/renderer.ts new file mode 100644 index 0000000..7ef631d --- /dev/null +++ b/extension/src/content/markdown/renderer.ts @@ -0,0 +1,217 @@ +import { normalizeCodeBlock } from './code'; +import { + absoluteUrl, + BLOCK_TAGS, + collapseBlankLines, + escapeMarkdown, + escapeTableCell, + isNoiseElement, + normalizeInline, + normalizeText, +} from './utils'; + +function inlineText(node: Node): string { + if (node.nodeType === Node.TEXT_NODE) { + return escapeMarkdown(node.textContent || ""); + } + if (node.nodeType !== Node.ELEMENT_NODE) return ""; + if (isNoiseElement(node)) return ""; + + const el = node as HTMLElement; + const tag = el.tagName.toLowerCase(); + if (tag === "br") return "\n"; + if (tag === "img") { + const img = el as HTMLImageElement; + const src = absoluteUrl(img.getAttribute("src"), img.src); + if (!src) return ""; + const alt = normalizeText(img.getAttribute("alt") || ""); + return alt ? `![${escapeMarkdown(alt)}](${src})` : `![](${src})`; + } + if (tag === "a") { + const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join("")); + const href = absoluteUrl(el.getAttribute("href"), (el as HTMLAnchorElement).href); + if (!href) return text; + return `[${text || href}](${href})`; + } + if (tag === "code") { + const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join("")); + return text ? `\`${text.replace(/`/g, "\\`")}\`` : ""; + } + if (tag === "strong" || tag === "b") { + const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join("")); + return text ? `**${text}**` : ""; + } + if (tag === "em" || tag === "i") { + const text = normalizeInline(Array.from(el.childNodes).map(inlineText).join("")); + return text ? `*${text}*` : ""; + } + + const chunks: string[] = []; + for (const child of el.childNodes) { + const rendered = inlineText(child); + if (!rendered) continue; + chunks.push(rendered); + if (child.nodeType === Node.ELEMENT_NODE && BLOCK_TAGS.has((child as Element).tagName.toLowerCase())) { + chunks.push("\n"); + } + } + return chunks.join(""); +} + +function textBlock(node: Node): string { + return collapseBlankLines(normalizeInline(Array.from(node.childNodes).map(inlineText).join(""))); +} + +function preserveNodeText(node: Node): string { + if (node.nodeType === Node.TEXT_NODE) { + return node.textContent || ""; + } + if (node.nodeType !== Node.ELEMENT_NODE) return ""; + + const el = node as HTMLElement; + const tag = el.tagName.toLowerCase(); + if (tag === "br") return "\n"; + + const parts: string[] = []; + for (const child of el.childNodes) { + const rendered = preserveNodeText(child); + if (!rendered) continue; + parts.push(rendered); + } + + if (["div", "p", "li"].includes(tag)) { + return `${parts.join("")}\n`; + } + return parts.join(""); +} + +function tableToMarkdown(table: Element) { + const rows = Array.from(table.querySelectorAll("tr")) + .map(row => Array.from(row.children) + .filter(cell => cell.tagName === "TD" || cell.tagName === "TH") + .map(cell => escapeTableCell(textBlock(cell))) + ) + .filter(cells => cells.length > 0); + if (!rows.length) return ""; + + const widths = rows.reduce((max, row) => Math.max(max, row.length), 0); + const normalizedRows = rows.map(row => { + const next = row.slice(); + while (next.length < widths) next.push(""); + return next; + }); + + let headers = normalizedRows[0]; + let bodyRows = normalizedRows.slice(1); + const firstRowIsBlank = headers.every(cell => !cell.trim()); + if (firstRowIsBlank && normalizedRows.length > 1) { + headers = normalizedRows[1]; + bodyRows = normalizedRows.slice(2); + } + + const firstRow = table.querySelector("tr"); + const thead = table.querySelector("thead"); + const firstRowHasTh = firstRow && Array.from(firstRow.children).some(cell => cell.tagName === "TH"); + if (!(thead || firstRowHasTh || firstRowIsBlank)) { + headers = new Array(widths).fill(""); + bodyRows = normalizedRows; + } + + const separator = new Array(widths).fill("---"); + const lines = [ + `| ${headers.join(" | ")} |`, + `| ${separator.join(" | ")} |`, + ]; + for (const row of bodyRows) { + lines.push(`| ${row.join(" | ")} |`); + } + return lines.join("\n"); +} + +function listToMarkdown(list: Element, depth = 0): string { + const ordered = list.tagName.toLowerCase() === "ol"; + const items: string[] = []; + const children = Array.from(list.children).filter(child => child.tagName === "LI"); + children.forEach((item, index) => { + const marker = ordered ? `${index + 1}. ` : "- "; + const indent = " ".repeat(depth); + const nested: string[] = []; + const content: string[] = []; + + for (const child of item.childNodes) { + const childEl = child as Element; + if (child.nodeType === Node.ELEMENT_NODE && (childEl.tagName === "UL" || childEl.tagName === "OL")) { + nested.push(listToMarkdown(childEl, depth + 1)); + } else { + content.push(inlineText(child)); + } + } + + const line = collapseBlankLines(normalizeInline(content.join(""))); + if (line) { + const lineParts = line.split("\n"); + items.push(`${indent}${marker}${lineParts[0]}`); + const continuationIndent = `${indent}${" ".repeat(marker.length)}`; + lineParts.slice(1).forEach(part => items.push(`${continuationIndent}${part}`)); + } + nested.filter(Boolean).forEach(block => items.push(block)); + }); + return items.join("\n"); +} + +function blockToMarkdown(node: Node): string { + if (node.nodeType === Node.TEXT_NODE) { + return normalizeText(node.textContent || ""); + } + if (node.nodeType !== Node.ELEMENT_NODE) return ""; + if (isNoiseElement(node)) return ""; + + const el = node as HTMLElement; + const tag = el.tagName.toLowerCase(); + if (tag === "table") return tableToMarkdown(el); + if (tag === "ul" || tag === "ol") return listToMarkdown(el); + if (el.matches(".cm-editor[data-is-code-block-view='true']")) { + const lines = Array.from(el.querySelectorAll(".cm-line")).map(line => { + const text = preserveNodeText(line); + return text === "\n" ? "" : text.replace(/\n$/, ""); + }); + const code = normalizeCodeBlock(lines.join("\n")); + return code ? `\`\`\`\n${code}\n\`\`\`` : ""; + } + if (tag === "pre") { + const code = normalizeCodeBlock(preserveNodeText(el)); + return code ? `\`\`\`\n${code}\n\`\`\`` : ""; + } + if (tag === "blockquote") { + const content = collapseBlankLines(Array.from(el.childNodes).map(blockToMarkdown).join("\n\n")); + return content + .split("\n") + .map(line => line ? `> ${line}` : ">") + .join("\n"); + } + if (/^h[1-6]$/.test(tag)) { + const level = Number(tag.slice(1)); + const text = textBlock(el); + return text ? `${"#".repeat(level)} ${text}` : ""; + } + if (tag === "p" || tag === "figcaption") { + return textBlock(el); + } + if (tag === "hr") { + return "---"; + } + if (tag === "img") { + return inlineText(el); + } + + const childBlocks = Array.from(el.childNodes) + .map(child => blockToMarkdown(child)) + .filter(Boolean); + if (childBlocks.length) return collapseBlankLines(childBlocks.join("\n\n")); + + return textBlock(node); +} + +export function renderMarkdown(root: Element): string { + return collapseBlankLines(blockToMarkdown(root)); +} diff --git a/extension/src/content/markdown/root.ts b/extension/src/content/markdown/root.ts new file mode 100644 index 0000000..6767795 --- /dev/null +++ b/extension/src/content/markdown/root.ts @@ -0,0 +1,47 @@ +import { normalizeText } from './utils'; + +function candidateScore(node: Element) { + const text = normalizeText((node as HTMLElement).innerText || ""); + if (!text) return -Infinity; + + const headings = node.querySelectorAll("h1, h2, h3, h4, h5, h6").length; + const paragraphs = node.querySelectorAll("p").length; + const listItems = node.querySelectorAll("li").length; + const tables = node.querySelectorAll("table").length; + const codeBlocks = node.querySelectorAll("pre, code").length; + const images = node.querySelectorAll("img, figure").length; + const mainLike = node.matches("main, article, [role='main']") ? 1 : 0; + const proseBlocks = node.matches(".markdown, .prose, [data-message-author-role='assistant']") ? 1 : 0; + const buttons = node.querySelectorAll("button, input, textarea, select").length; + const forms = node.querySelectorAll("form").length; + const svgs = node.querySelectorAll("svg, canvas").length; + + return text.length + + (mainLike * 4000) + + (proseBlocks * 5000) + + (headings * 250) + + (paragraphs * 60) + + (listItems * 35) + + (tables * 80) + + (codeBlocks * 60) + + (images * 25) + - (buttons * 120) + - (forms * 200) + - (svgs * 40); +} + +export function pickMarkdownRoot(selector?: string) { + if (selector) { + const matched = document.querySelector(selector); + if (!matched) throw new Error(`No element: ${selector}`); + return matched; + } + + const candidates = Array.from(document.querySelectorAll( + "main, article, [role='main'], section, .markdown, .prose, [data-message-author-role]" + )) + .filter(node => normalizeText((node as HTMLElement).innerText || "").length > 0); + if (!candidates.length) return document.body; + candidates.sort((a, b) => candidateScore(b) - candidateScore(a)); + return candidates[0]; +} diff --git a/extension/src/content/markdown/utils.ts b/extension/src/content/markdown/utils.ts new file mode 100644 index 0000000..cdc913a --- /dev/null +++ b/extension/src/content/markdown/utils.ts @@ -0,0 +1,85 @@ +export const BLOCK_TAGS = new Set([ + "article", "aside", "blockquote", "body", "div", "dl", "fieldset", "figcaption", + "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", + "li", "main", "nav", "ol", "p", "pre", "section", "table", "tbody", "td", "tfoot", + "th", "thead", "tr", "ul" +]); + +export const NOISE_SELECTOR = [ + "script", + "style", + "noscript", + "template", + "svg", + "canvas", + "iframe", + "dialog", + "button", + "input", + "textarea", + "select", + "option", + "form", + "[hidden]", + "[aria-hidden='true']", + ".sr-only", + "[class*='sr-only']", + "[class*='file-tile']", + "form[data-type='unified-composer']", + ".composer-btn", + "[data-composer-surface='true']", + "#thread-bottom-container", + "[data-testid*='action-button']", +].join(", "); + +export function normalizeText(value: string) { + return value.replace(/\s+/g, " ").trim(); +} + +export function normalizeInline(value: string) { + return value + .replace(/[ \t]+\n/g, "\n") + .replace(/\n[ \t]+/g, "\n") + .replace(/\n{3,}/g, "\n\n") + .replace(/[ \t]{2,}/g, " ") + .trim(); +} + +export function collapseBlankLines(value: string) { + return value + .replace(/[ \t]+\n/g, "\n") + .replace(/\n{3,}/g, "\n\n") + .trim(); +} + +export function escapeMarkdown(text: string) { + return text.replace(/([\\`[\]])/g, "\\$1"); +} + +export function escapeTableCell(text: string) { + return text.replace(/\|/g, "\\|").replace(/\n+/g, " ").trim(); +} + +export function absoluteUrl(attr: string | null | undefined, fallback?: string) { + return attr || fallback || ""; +} + +export function isNoiseElement(node: Node | null): boolean { + if (!node || node.nodeType !== Node.ELEMENT_NODE) return false; + const el = node as Element; + const tag = el.tagName.toLowerCase(); + if (["script", "style", "noscript", "template", "svg", "canvas", "iframe", "dialog"].includes(tag)) return true; + if (["button", "input", "textarea", "select", "option", "form"].includes(tag)) return true; + if (el.hasAttribute("hidden")) return true; + if ((el.getAttribute("aria-hidden") || "").toLowerCase() === "true") return true; + if (el.matches(".sr-only, [class*='sr-only']")) return true; + if (el.matches("[class*='file-tile'], form[data-type='unified-composer'], .composer-btn, [data-composer-surface='true'], #thread-bottom-container")) return true; + if (el.matches("[data-testid*='action-button']")) return true; + return false; +} + +export function stripNoise(root: Element): Element { + const clone = root.cloneNode(true) as Element; + clone.querySelectorAll(NOISE_SELECTOR).forEach(node => node.remove()); + return clone; +} diff --git a/pyproject.toml b/pyproject.toml index 44dbe5f..8815f1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "real-browser-cli" -version = "0.15.2" +version = "0.15.3" description = "Control your real running browser from the terminal or Python SDK" readme = "README.md" license = { file = "LICENSE" } diff --git a/tests/test_rendering.py b/tests/test_rendering.py index 7136032..95b24bc 100644 --- a/tests/test_rendering.py +++ b/tests/test_rendering.py @@ -3,6 +3,7 @@ from os import terminal_size from rich.console import Console from rich.tree import Tree +from browser_cli.models import Tab from browser_cli.commands import rendering def test_shorten_uses_ellipsis(): @@ -32,3 +33,33 @@ def test_print_tree_uses_detected_width(monkeypatch): monkeypatch.setattr(rendering, "terminal_width", lambda console=None: 132) rendering.print_tree(Tree("Root")) assert widths == [132] + +def test_build_tabs_tree_groups_by_browser_window_and_group(): + tabs = [ + Tab(id=1, window_id=5, active=False, muted=False, title="Before", url="https://example.com/before", group_id=None, index=0, browser="work"), + Tab(id=2, window_id=5, active=False, muted=False, title="Inside", url="https://example.com/inside", group_id=9, index=1, browser="work"), + ] + groups = [{"id": 9, "windowId": 5, "browser": "work", "title": "Research", "color": "blue", "tabCount": 1, "collapsed": True}] + tree = rendering.build_tabs_tree(tabs, groups, console=Console(width=120), show_urls=True) + text = "\n".join(str(line) for line in tree.__rich_console__(Console(width=120), Console(width=120).options)) + assert "work" in text + assert "Window 5" in text + assert "Research" in text + assert "collapsed" in text + assert "Inside" in text + +def test_build_windows_tree_keeps_multi_browser_windows_separate(): + tabs = [ + Tab(id=1, window_id=5, active=False, muted=False, title="Work Tab", url="https://example.com/work", index=0, browser="work"), + Tab(id=2, window_id=5, active=False, muted=False, title="Personal Tab", url="https://example.com/personal", index=0, browser="personal"), + ] + windows = [ + {"id": 5, "alias": "main", "browser": "work", "tabCount": 1, "state": "normal"}, + {"id": 5, "alias": "main", "browser": "personal", "tabCount": 1, "state": "normal"}, + ] + tree = rendering.build_windows_tree(windows, tabs, console=Console(width=120)) + text = "\n".join(str(line) for line in tree.__rich_console__(Console(width=120), Console(width=120).options)) + assert "work: Window 5" in text + assert "personal: Window 5" in text + assert "Work Tab" in text + assert "Personal Tab" in text diff --git a/uv.lock b/uv.lock index efc59c8..0857078 100644 --- a/uv.lock +++ b/uv.lock @@ -465,7 +465,7 @@ wheels = [ [[package]] name = "real-browser-cli" -version = "0.15.2" +version = "0.15.3" source = { editable = "." } dependencies = [ { name = "click" },