Files
browser-cli/tests/test_extract.py
T
daniel156161 f2a7f85ee3
Package Extension / package-extension (push) Successful in 12s
Build & Publish Package / publish (push) Failing after 21s
adding new extract command to extract selector or main content as markdown, updateing version as 0.5.0
2026-04-10 03:44:49 +02:00

69 lines
2.1 KiB
Python

"""Tests for extract.* commands (require an http/https active tab)."""
import pytest
from browser_cli.client import send_command
def test_extract_links(browser, http_tab):
browser("tabs.active", {"tabId": http_tab["id"]})
links = browser("extract.links")
assert isinstance(links, list)
hrefs = []
for lnk in links:
assert "href" in lnk
assert "text" in lnk
hrefs.append(lnk["href"])
assert len(hrefs) == len(set(hrefs))
def test_extract_images(browser, http_tab):
browser("tabs.active", {"tabId": http_tab["id"]})
images = browser("extract.images")
assert isinstance(images, list)
sources = []
for img in images:
assert "src" in img
assert img["src"] != ""
sources.append(img["src"])
assert len(sources) == len(set(sources))
def test_extract_text(browser, http_tab):
browser("tabs.active", {"tabId": http_tab["id"]})
text = browser("extract.text")
assert isinstance(text, str)
assert len(text) > 0
def test_extract_html(browser, http_tab):
browser("tabs.active", {"tabId": http_tab["id"]})
html = browser("extract.html")
assert isinstance(html, str)
assert "<" in html
def test_extract_markdown(browser, http_tab):
browser("tabs.active", {"tabId": http_tab["id"]})
markdown = browser("extract.markdown")
assert isinstance(markdown, str)
assert len(markdown.strip()) > 0
def test_extract_markdown_missing_selector_errors(browser, http_tab):
browser("tabs.active", {"tabId": http_tab["id"]})
with pytest.raises(RuntimeError, match="No element"):
browser("extract.markdown", {"selector": ".browser-cli-definitely-missing"})
def test_dom_exists(browser, http_tab):
browser("tabs.active", {"tabId": http_tab["id"]})
result = browser("dom.exists", {"selector": "body"})
assert result is True
def test_dom_query(browser, http_tab):
browser("tabs.active", {"tabId": http_tab["id"]})
elements = browser("dom.query", {"selector": "body"})
assert isinstance(elements, list)
assert len(elements) > 0
assert elements[0].get("tag") == "body"