Files
browser-cli/browser_cli/transport.py
T
daniel156161 076914e5b7 refactor: reorganize client transport and extension internals
- Split client, native, remote, serve, markdown, and SDK internals into focused packages with direct imports.
- Move local and remote transport framing/protocol helpers behind clearer module boundaries.
- Break up the extension injected DOM logic into a separate content dispatch bundle and dedicated content modules.
- Add explicit client handling for passive remote discovery without noisy PQ warnings.
- Keep behavior covered with updated unit, integration, and extension tests.
2026-06-13 23:31:24 +02:00

215 lines
8.0 KiB
Python

"""Response payload encoding for the TCP serve <-> client leg.
The wire frame stays ``4-byte LE length + payload``. The payload is made
self-describing so old peers keep working unchanged:
* A payload that starts with ``{`` or ``[`` is plain JSON (the historical
format). Old clients and old servers only ever produce/consume this.
* Any other leading byte is a 1-byte codec tag followed by the encoded body.
The tag's high nibble selects serialization, the low nibble compression::
tag = (serialization << 4) | compression
This is only ever emitted toward a peer that advertised support for it, so it
is fully backward compatible: clients announce what they can decode via the
``accept_encoding`` field in their request, and the server encodes the
response accordingly. Requests themselves stay plain JSON (they are tiny).
Compression is the big win — response payloads (``extract.html``,
``dom.query``, ``tabs.list`` over hundreds of tabs, base64 screenshots) are
heavy and text-like. msgpack additionally lets ``tabs.screenshot`` ship the
image as raw bytes instead of a base64 data URL (~33% smaller before
compression); the client transparently rebuilds the data URL so the SDK/CLI
API is unchanged.
"""
from __future__ import annotations
import base64
import gzip
import json
import re
import zlib
from browser_cli.constants import (
COMP_GZIP,
COMP_NONE,
COMP_ZLIB,
COMP_ZSTD,
DEFAULT_TRANSPORT_THRESHOLD,
SER_JSON,
SER_MSGPACK,
)
try: # optional: better ratio + speed than zlib/gzip
import zstandard as _zstd
except Exception: # pragma: no cover - depends on optional extra
_zstd = None
try: # optional: alternate serialization + raw binary for screenshots
import msgpack as _msgpack
except Exception: # pragma: no cover - depends on optional extra
_msgpack = None
# ── codec ids ────────────────────────────────────────────────────────────────
_SER_NAME = {SER_JSON: "json", SER_MSGPACK: "msgpack"}
_SER_ID = {v: k for k, v in _SER_NAME.items()}
_COMP_NAME = {COMP_NONE: "none", COMP_ZLIB: "zlib", COMP_GZIP: "gzip", COMP_ZSTD: "zstd"}
_COMP_ID = {v: k for k, v in _COMP_NAME.items()}
# Don't compress payloads smaller than this — the header/CPU cost is not worth it.
# JSON top-level values always start with one of these bytes; a tag byte never does.
_JSON_FIRST_BYTES = frozenset(b"{[")
def msgpack_available() -> bool:
return _msgpack is not None
def zstd_available() -> bool:
return _zstd is not None
def supported_serialization() -> list[str]:
"""Serializations this build can produce/consume, best first."""
return (["msgpack"] if _msgpack is not None else []) + ["json"]
def supported_compression() -> list[str]:
"""Compression codecs this build can produce/consume, best first."""
return (["zstd"] if _zstd is not None else []) + ["gzip", "zlib"]
def client_accept_encoding() -> dict:
"""What the local client advertises it can decode (sent with each request)."""
return {"ser": supported_serialization(), "comp": supported_compression()}
# ── compression primitives ────────────────────────────────────────────────────
def _compress(comp_id: int, data: bytes) -> bytes:
if comp_id == COMP_NONE:
return data
if comp_id == COMP_ZLIB:
return zlib.compress(data, 6)
if comp_id == COMP_GZIP:
return gzip.compress(data, compresslevel=6)
if comp_id == COMP_ZSTD:
if _zstd is None:
raise ValueError("zstd compression requested but zstandard is not installed")
return _zstd.ZstdCompressor(level=10).compress(data)
raise ValueError(f"unknown compression id {comp_id}")
def _decompress(comp_id: int, data: bytes) -> bytes:
if comp_id == COMP_NONE:
return data
if comp_id == COMP_ZLIB:
return zlib.decompress(data)
if comp_id == COMP_GZIP:
return gzip.decompress(data)
if comp_id == COMP_ZSTD:
if _zstd is None:
raise ValueError("zstd payload received but zstandard is not installed")
return _zstd.ZstdDecompressor().decompress(data)
raise ValueError(f"unknown compression id {comp_id}")
# ── codec negotiation ──────────────────────────────────────────────────────────
def _choose(accept: dict | None) -> tuple[int, int]:
"""Pick (serialization_id, compression_id) the peer accepts, server preference first."""
accept = accept if isinstance(accept, dict) else {}
accept_ser = accept.get("ser") or ["json"]
accept_comp = accept.get("comp") or []
ser = SER_JSON
if _msgpack is not None and "msgpack" in accept_ser:
ser = SER_MSGPACK
comp = COMP_NONE
for name in supported_compression(): # server preference: zstd > gzip > zlib
if name in accept_comp:
comp = _COMP_ID[name]
break
return ser, comp
# ── raw-binary hoisting (screenshots) ──────────────────────────────────────────
_DATA_URL_RE = re.compile(r"^data:([^;,]+);base64,(.+)$", re.S)
_B64_MARKER = "__b64__"
def _hoist_screenshot(obj, command: str | None):
"""Replace a screenshot data URL with raw bytes so msgpack ships it unencoded.
Gated to ``tabs.screenshot`` so we never touch arbitrary page-derived data.
"""
if command != "tabs.screenshot" or not isinstance(obj, dict):
return obj
data = obj.get("data")
if not isinstance(data, dict):
return obj
url = data.get("dataUrl")
if not isinstance(url, str):
return obj
m = _DATA_URL_RE.match(url)
if not m:
return obj
try:
raw = base64.b64decode(m.group(2))
except Exception:
return obj
new_data = dict(data)
new_data["dataUrl"] = {_B64_MARKER: True, "mime": m.group(1), "raw": raw}
return {**obj, "data": new_data}
def _unhoist_binary(obj):
"""Rebuild any hoisted data URL so callers see the original string again."""
if isinstance(obj, dict):
raw = obj.get("raw")
if obj.get(_B64_MARKER) and isinstance(raw, (bytes, bytearray)):
mime = obj.get("mime") or "application/octet-stream"
return f"data:{mime};base64," + base64.b64encode(bytes(raw)).decode("ascii")
return {k: _unhoist_binary(v) for k, v in obj.items()}
if isinstance(obj, list):
return [_unhoist_binary(v) for v in obj]
return obj
# ── encode / decode ─────────────────────────────────────────────────────────────
def encode_response(obj, accept: dict | None = None, command: str | None = None,
threshold: int = DEFAULT_TRANSPORT_THRESHOLD) -> bytes:
"""Encode a response object for the chosen/accepted codec.
Returns bare JSON bytes when no encoding is negotiated, which is byte-for-byte
what an old server would have sent.
"""
ser, comp = _choose(accept)
if ser == SER_MSGPACK:
body = _msgpack.packb(_hoist_screenshot(obj, command), use_bin_type=True)
else:
body = json.dumps(obj).encode("utf-8")
if comp != COMP_NONE and len(body) >= threshold:
body = _compress(comp, body)
else:
comp = COMP_NONE
if ser == SER_JSON and comp == COMP_NONE:
return body # plain JSON — historical wire format, no tag byte
return bytes([(ser << 4) | comp]) + body
def decode_response(raw: bytes | None):
"""Decode a payload produced by :func:`encode_response` (or plain JSON)."""
if raw is None:
return None
if not raw:
raise ValueError("empty response payload")
if raw[0] in _JSON_FIRST_BYTES:
return json.loads(raw)
tag = raw[0]
ser, comp = tag >> 4, tag & 0x0F
body = _decompress(comp, raw[1:])
if ser == SER_MSGPACK:
if _msgpack is None:
raise ValueError("msgpack payload received but msgpack is not installed")
return _unhoist_binary(_msgpack.unpackb(body, raw=False))
if ser == SER_JSON:
return json.loads(body)
raise ValueError(f"unknown serialization id {ser}")