076914e5b7
- Split client, native, remote, serve, markdown, and SDK internals into focused packages with direct imports. - Move local and remote transport framing/protocol helpers behind clearer module boundaries. - Break up the extension injected DOM logic into a separate content dispatch bundle and dedicated content modules. - Add explicit client handling for passive remote discovery without noisy PQ warnings. - Keep behavior covered with updated unit, integration, and extension tests.
215 lines
8.0 KiB
Python
215 lines
8.0 KiB
Python
"""Response payload encoding for the TCP serve <-> client leg.
|
|
|
|
The wire frame stays ``4-byte LE length + payload``. The payload is made
|
|
self-describing so old peers keep working unchanged:
|
|
|
|
* A payload that starts with ``{`` or ``[`` is plain JSON (the historical
|
|
format). Old clients and old servers only ever produce/consume this.
|
|
* Any other leading byte is a 1-byte codec tag followed by the encoded body.
|
|
The tag's high nibble selects serialization, the low nibble compression::
|
|
|
|
tag = (serialization << 4) | compression
|
|
|
|
This is only ever emitted toward a peer that advertised support for it, so it
|
|
is fully backward compatible: clients announce what they can decode via the
|
|
``accept_encoding`` field in their request, and the server encodes the
|
|
response accordingly. Requests themselves stay plain JSON (they are tiny).
|
|
|
|
Compression is the big win — response payloads (``extract.html``,
|
|
``dom.query``, ``tabs.list`` over hundreds of tabs, base64 screenshots) are
|
|
heavy and text-like. msgpack additionally lets ``tabs.screenshot`` ship the
|
|
image as raw bytes instead of a base64 data URL (~33% smaller before
|
|
compression); the client transparently rebuilds the data URL so the SDK/CLI
|
|
API is unchanged.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import gzip
|
|
import json
|
|
import re
|
|
import zlib
|
|
|
|
from browser_cli.constants import (
|
|
COMP_GZIP,
|
|
COMP_NONE,
|
|
COMP_ZLIB,
|
|
COMP_ZSTD,
|
|
DEFAULT_TRANSPORT_THRESHOLD,
|
|
SER_JSON,
|
|
SER_MSGPACK,
|
|
)
|
|
|
|
try: # optional: better ratio + speed than zlib/gzip
|
|
import zstandard as _zstd
|
|
except Exception: # pragma: no cover - depends on optional extra
|
|
_zstd = None
|
|
|
|
try: # optional: alternate serialization + raw binary for screenshots
|
|
import msgpack as _msgpack
|
|
except Exception: # pragma: no cover - depends on optional extra
|
|
_msgpack = None
|
|
|
|
# ── codec ids ────────────────────────────────────────────────────────────────
|
|
_SER_NAME = {SER_JSON: "json", SER_MSGPACK: "msgpack"}
|
|
_SER_ID = {v: k for k, v in _SER_NAME.items()}
|
|
_COMP_NAME = {COMP_NONE: "none", COMP_ZLIB: "zlib", COMP_GZIP: "gzip", COMP_ZSTD: "zstd"}
|
|
_COMP_ID = {v: k for k, v in _COMP_NAME.items()}
|
|
|
|
# Don't compress payloads smaller than this — the header/CPU cost is not worth it.
|
|
|
|
# JSON top-level values always start with one of these bytes; a tag byte never does.
|
|
_JSON_FIRST_BYTES = frozenset(b"{[")
|
|
|
|
def msgpack_available() -> bool:
|
|
return _msgpack is not None
|
|
|
|
def zstd_available() -> bool:
|
|
return _zstd is not None
|
|
|
|
def supported_serialization() -> list[str]:
|
|
"""Serializations this build can produce/consume, best first."""
|
|
return (["msgpack"] if _msgpack is not None else []) + ["json"]
|
|
|
|
def supported_compression() -> list[str]:
|
|
"""Compression codecs this build can produce/consume, best first."""
|
|
return (["zstd"] if _zstd is not None else []) + ["gzip", "zlib"]
|
|
|
|
def client_accept_encoding() -> dict:
|
|
"""What the local client advertises it can decode (sent with each request)."""
|
|
return {"ser": supported_serialization(), "comp": supported_compression()}
|
|
|
|
# ── compression primitives ────────────────────────────────────────────────────
|
|
|
|
def _compress(comp_id: int, data: bytes) -> bytes:
|
|
if comp_id == COMP_NONE:
|
|
return data
|
|
if comp_id == COMP_ZLIB:
|
|
return zlib.compress(data, 6)
|
|
if comp_id == COMP_GZIP:
|
|
return gzip.compress(data, compresslevel=6)
|
|
if comp_id == COMP_ZSTD:
|
|
if _zstd is None:
|
|
raise ValueError("zstd compression requested but zstandard is not installed")
|
|
return _zstd.ZstdCompressor(level=10).compress(data)
|
|
raise ValueError(f"unknown compression id {comp_id}")
|
|
|
|
def _decompress(comp_id: int, data: bytes) -> bytes:
|
|
if comp_id == COMP_NONE:
|
|
return data
|
|
if comp_id == COMP_ZLIB:
|
|
return zlib.decompress(data)
|
|
if comp_id == COMP_GZIP:
|
|
return gzip.decompress(data)
|
|
if comp_id == COMP_ZSTD:
|
|
if _zstd is None:
|
|
raise ValueError("zstd payload received but zstandard is not installed")
|
|
return _zstd.ZstdDecompressor().decompress(data)
|
|
raise ValueError(f"unknown compression id {comp_id}")
|
|
|
|
# ── codec negotiation ──────────────────────────────────────────────────────────
|
|
|
|
def _choose(accept: dict | None) -> tuple[int, int]:
|
|
"""Pick (serialization_id, compression_id) the peer accepts, server preference first."""
|
|
accept = accept if isinstance(accept, dict) else {}
|
|
accept_ser = accept.get("ser") or ["json"]
|
|
accept_comp = accept.get("comp") or []
|
|
|
|
ser = SER_JSON
|
|
if _msgpack is not None and "msgpack" in accept_ser:
|
|
ser = SER_MSGPACK
|
|
|
|
comp = COMP_NONE
|
|
for name in supported_compression(): # server preference: zstd > gzip > zlib
|
|
if name in accept_comp:
|
|
comp = _COMP_ID[name]
|
|
break
|
|
return ser, comp
|
|
|
|
# ── raw-binary hoisting (screenshots) ──────────────────────────────────────────
|
|
|
|
_DATA_URL_RE = re.compile(r"^data:([^;,]+);base64,(.+)$", re.S)
|
|
_B64_MARKER = "__b64__"
|
|
|
|
def _hoist_screenshot(obj, command: str | None):
|
|
"""Replace a screenshot data URL with raw bytes so msgpack ships it unencoded.
|
|
|
|
Gated to ``tabs.screenshot`` so we never touch arbitrary page-derived data.
|
|
"""
|
|
if command != "tabs.screenshot" or not isinstance(obj, dict):
|
|
return obj
|
|
data = obj.get("data")
|
|
if not isinstance(data, dict):
|
|
return obj
|
|
url = data.get("dataUrl")
|
|
if not isinstance(url, str):
|
|
return obj
|
|
m = _DATA_URL_RE.match(url)
|
|
if not m:
|
|
return obj
|
|
try:
|
|
raw = base64.b64decode(m.group(2))
|
|
except Exception:
|
|
return obj
|
|
new_data = dict(data)
|
|
new_data["dataUrl"] = {_B64_MARKER: True, "mime": m.group(1), "raw": raw}
|
|
return {**obj, "data": new_data}
|
|
|
|
def _unhoist_binary(obj):
|
|
"""Rebuild any hoisted data URL so callers see the original string again."""
|
|
if isinstance(obj, dict):
|
|
raw = obj.get("raw")
|
|
if obj.get(_B64_MARKER) and isinstance(raw, (bytes, bytearray)):
|
|
mime = obj.get("mime") or "application/octet-stream"
|
|
return f"data:{mime};base64," + base64.b64encode(bytes(raw)).decode("ascii")
|
|
return {k: _unhoist_binary(v) for k, v in obj.items()}
|
|
if isinstance(obj, list):
|
|
return [_unhoist_binary(v) for v in obj]
|
|
return obj
|
|
|
|
# ── encode / decode ─────────────────────────────────────────────────────────────
|
|
|
|
def encode_response(obj, accept: dict | None = None, command: str | None = None,
|
|
threshold: int = DEFAULT_TRANSPORT_THRESHOLD) -> bytes:
|
|
"""Encode a response object for the chosen/accepted codec.
|
|
|
|
Returns bare JSON bytes when no encoding is negotiated, which is byte-for-byte
|
|
what an old server would have sent.
|
|
"""
|
|
ser, comp = _choose(accept)
|
|
|
|
if ser == SER_MSGPACK:
|
|
body = _msgpack.packb(_hoist_screenshot(obj, command), use_bin_type=True)
|
|
else:
|
|
body = json.dumps(obj).encode("utf-8")
|
|
|
|
if comp != COMP_NONE and len(body) >= threshold:
|
|
body = _compress(comp, body)
|
|
else:
|
|
comp = COMP_NONE
|
|
|
|
if ser == SER_JSON and comp == COMP_NONE:
|
|
return body # plain JSON — historical wire format, no tag byte
|
|
|
|
return bytes([(ser << 4) | comp]) + body
|
|
|
|
def decode_response(raw: bytes | None):
|
|
"""Decode a payload produced by :func:`encode_response` (or plain JSON)."""
|
|
if raw is None:
|
|
return None
|
|
if not raw:
|
|
raise ValueError("empty response payload")
|
|
if raw[0] in _JSON_FIRST_BYTES:
|
|
return json.loads(raw)
|
|
|
|
tag = raw[0]
|
|
ser, comp = tag >> 4, tag & 0x0F
|
|
body = _decompress(comp, raw[1:])
|
|
if ser == SER_MSGPACK:
|
|
if _msgpack is None:
|
|
raise ValueError("msgpack payload received but msgpack is not installed")
|
|
return _unhoist_binary(_msgpack.unpackb(body, raw=False))
|
|
if ser == SER_JSON:
|
|
return json.loads(body)
|
|
raise ValueError(f"unknown serialization id {ser}")
|