"""Response payload encoding for the TCP serve <-> client leg. The wire frame stays ``4-byte LE length + payload``. The payload is made self-describing so old peers keep working unchanged: * A payload that starts with ``{`` or ``[`` is plain JSON (the historical format). Old clients and old servers only ever produce/consume this. * Any other leading byte is a 1-byte codec tag followed by the encoded body. The tag's high nibble selects serialization, the low nibble compression:: tag = (serialization << 4) | compression This is only ever emitted toward a peer that advertised support for it, so it is fully backward compatible: clients announce what they can decode via the ``accept_encoding`` field in their request, and the server encodes the response accordingly. Requests themselves stay plain JSON (they are tiny). Compression is the big win — response payloads (``extract.html``, ``dom.query``, ``tabs.list`` over hundreds of tabs, base64 screenshots) are heavy and text-like. msgpack additionally lets ``tabs.screenshot`` ship the image as raw bytes instead of a base64 data URL (~33% smaller before compression); the client transparently rebuilds the data URL so the SDK/CLI API is unchanged. """ from __future__ import annotations import base64 import gzip import json import re import zlib try: # optional: better ratio + speed than zlib/gzip import zstandard as _zstd except Exception: # pragma: no cover - depends on optional extra _zstd = None try: # optional: alternate serialization + raw binary for screenshots import msgpack as _msgpack except Exception: # pragma: no cover - depends on optional extra _msgpack = None # ── codec ids ──────────────────────────────────────────────────────────────── SER_JSON = 0 SER_MSGPACK = 1 COMP_NONE = 0 COMP_ZLIB = 1 COMP_GZIP = 2 COMP_ZSTD = 3 _SER_NAME = {SER_JSON: "json", SER_MSGPACK: "msgpack"} _SER_ID = {v: k for k, v in _SER_NAME.items()} _COMP_NAME = {COMP_NONE: "none", COMP_ZLIB: "zlib", COMP_GZIP: "gzip", COMP_ZSTD: "zstd"} _COMP_ID = {v: k for k, v in _COMP_NAME.items()} # Don't compress payloads smaller than this — the header/CPU cost is not worth it. DEFAULT_THRESHOLD = 512 # JSON top-level values always start with one of these bytes; a tag byte never does. _JSON_FIRST_BYTES = frozenset(b"{[") def msgpack_available() -> bool: return _msgpack is not None def zstd_available() -> bool: return _zstd is not None def supported_serialization() -> list[str]: """Serializations this build can produce/consume, best first.""" return (["msgpack"] if _msgpack is not None else []) + ["json"] def supported_compression() -> list[str]: """Compression codecs this build can produce/consume, best first.""" return (["zstd"] if _zstd is not None else []) + ["gzip", "zlib"] def client_accept_encoding() -> dict: """What the local client advertises it can decode (sent with each request).""" return {"ser": supported_serialization(), "comp": supported_compression()} # ── compression primitives ──────────────────────────────────────────────────── def _compress(comp_id: int, data: bytes) -> bytes: if comp_id == COMP_NONE: return data if comp_id == COMP_ZLIB: return zlib.compress(data, 6) if comp_id == COMP_GZIP: return gzip.compress(data, compresslevel=6) if comp_id == COMP_ZSTD: if _zstd is None: raise ValueError("zstd compression requested but zstandard is not installed") return _zstd.ZstdCompressor(level=10).compress(data) raise ValueError(f"unknown compression id {comp_id}") def _decompress(comp_id: int, data: bytes) -> bytes: if comp_id == COMP_NONE: return data if comp_id == COMP_ZLIB: return zlib.decompress(data) if comp_id == COMP_GZIP: return gzip.decompress(data) if comp_id == COMP_ZSTD: if _zstd is None: raise ValueError("zstd payload received but zstandard is not installed") return _zstd.ZstdDecompressor().decompress(data) raise ValueError(f"unknown compression id {comp_id}") # ── codec negotiation ────────────────────────────────────────────────────────── def _choose(accept: dict | None) -> tuple[int, int]: """Pick (serialization_id, compression_id) the peer accepts, server preference first.""" accept = accept if isinstance(accept, dict) else {} accept_ser = accept.get("ser") or ["json"] accept_comp = accept.get("comp") or [] ser = SER_JSON if _msgpack is not None and "msgpack" in accept_ser: ser = SER_MSGPACK comp = COMP_NONE for name in supported_compression(): # server preference: zstd > gzip > zlib if name in accept_comp: comp = _COMP_ID[name] break return ser, comp # ── raw-binary hoisting (screenshots) ────────────────────────────────────────── _DATA_URL_RE = re.compile(r"^data:([^;,]+);base64,(.+)$", re.S) _B64_MARKER = "__b64__" def _hoist_screenshot(obj, command: str | None): """Replace a screenshot data URL with raw bytes so msgpack ships it unencoded. Gated to ``tabs.screenshot`` so we never touch arbitrary page-derived data. """ if command != "tabs.screenshot" or not isinstance(obj, dict): return obj data = obj.get("data") if not isinstance(data, dict): return obj url = data.get("dataUrl") if not isinstance(url, str): return obj m = _DATA_URL_RE.match(url) if not m: return obj try: raw = base64.b64decode(m.group(2)) except Exception: return obj new_data = dict(data) new_data["dataUrl"] = {_B64_MARKER: True, "mime": m.group(1), "raw": raw} return {**obj, "data": new_data} def _unhoist_binary(obj): """Rebuild any hoisted data URL so callers see the original string again.""" if isinstance(obj, dict): raw = obj.get("raw") if obj.get(_B64_MARKER) and isinstance(raw, (bytes, bytearray)): mime = obj.get("mime") or "application/octet-stream" return f"data:{mime};base64," + base64.b64encode(bytes(raw)).decode("ascii") return {k: _unhoist_binary(v) for k, v in obj.items()} if isinstance(obj, list): return [_unhoist_binary(v) for v in obj] return obj # ── encode / decode ───────────────────────────────────────────────────────────── def encode_response(obj, accept: dict | None = None, command: str | None = None, threshold: int = DEFAULT_THRESHOLD) -> bytes: """Encode a response object for the chosen/accepted codec. Returns bare JSON bytes when no encoding is negotiated, which is byte-for-byte what an old server would have sent. """ ser, comp = _choose(accept) if ser == SER_MSGPACK: body = _msgpack.packb(_hoist_screenshot(obj, command), use_bin_type=True) else: body = json.dumps(obj).encode("utf-8") if comp != COMP_NONE and len(body) >= threshold: body = _compress(comp, body) else: comp = COMP_NONE if ser == SER_JSON and comp == COMP_NONE: return body # plain JSON — historical wire format, no tag byte return bytes([(ser << 4) | comp]) + body def decode_response(raw: bytes | None): """Decode a payload produced by :func:`encode_response` (or plain JSON).""" if raw is None: return None if not raw: raise ValueError("empty response payload") if raw[0] in _JSON_FIRST_BYTES: return json.loads(raw) tag = raw[0] ser, comp = tag >> 4, tag & 0x0F body = _decompress(comp, raw[1:]) if ser == SER_MSGPACK: if _msgpack is None: raise ValueError("msgpack payload received but msgpack is not installed") return _unhoist_binary(_msgpack.unpackb(body, raw=False)) if ser == SER_JSON: return json.loads(body) raise ValueError(f"unknown serialization id {ser}")