adding better markdown extracting and filter out to not have broken staff, allow that session can list over multiple browsers

This commit is contained in:
2026-04-12 17:10:19 +02:00
parent 51054422fb
commit 64d804cf32
7 changed files with 899 additions and 22 deletions
+194 -2
View File
@@ -5,6 +5,7 @@ from unittest.mock import patch
from browser_cli.cli import main, _project_version
from browser_cli.client import BrowserTarget
from browser_cli.commands.extract import _clean_markdown_output, _convert_html_to_markdown
def _expected_version() -> str:
pyproject = Path(__file__).resolve().parent.parent / "pyproject.toml"
@@ -204,6 +205,46 @@ def test_windows_list_multi_browser_shows_browser_column():
assert "work" in result.output
def test_session_list_multi_browser_shows_browser_column():
def fake_send_command(command, args=None, profile=None):
assert command == "session.list"
return [{"name": f"{profile}-session", "tabs": 2, "savedAt": 1712707200000}]
with patch(
"browser_cli.commands.session.active_browser_targets",
return_value=[
BrowserTarget("default", "uuid-1", "/tmp/default.sock"),
BrowserTarget("work", "work", "/tmp/work.sock"),
],
), patch("browser_cli.commands.session.send_command", side_effect=fake_send_command):
result = CliRunner().invoke(main, ["session", "list"])
assert result.exit_code == 0
assert "Browser" in result.output
assert "uuid-1" in result.output
assert "work" in result.output
assert "default-session" in result.output
assert "work-session" in result.output
def test_session_list_with_explicit_browser_does_not_show_browser_column():
with patch(
"browser_cli.commands.session.active_browser_targets",
return_value=[
BrowserTarget("default", "uuid-1", "/tmp/default.sock"),
BrowserTarget("work", "work", "/tmp/work.sock"),
],
), patch(
"browser_cli.commands.session.send_command",
return_value=[{"name": "work-session", "tabs": 2, "savedAt": 1712707200000}],
) as send_command:
result = CliRunner().invoke(main, ["--browser", "work", "session", "list"])
assert result.exit_code == 0
assert "Browser" not in result.output
send_command.assert_called_once_with("session.list", {}, profile=None)
def test_windows_open_passes_url():
with patch("browser_cli.commands.windows.send_command", return_value={"id": 7}) as send_command:
result = CliRunner().invoke(main, ["windows", "open", "https://example.com"])
@@ -213,7 +254,7 @@ def test_windows_open_passes_url():
send_command.assert_called_once_with("windows.open", {"url": "https://example.com"}, profile=None)
def test_extract_markdown_command():
with patch("browser_cli.commands.extract.send_command", return_value="# Title\n") as send_command:
with patch("browser_cli.commands.extract.send_command", return_value="# Title") as send_command:
result = CliRunner().invoke(main, ["extract", "markdown"])
assert result.exit_code == 0
@@ -221,9 +262,160 @@ def test_extract_markdown_command():
send_command.assert_called_once_with("extract.markdown", {"selector": None})
def test_extract_markdown_command_with_selector():
with patch("browser_cli.commands.extract.send_command", return_value="## Post\n") as send_command:
with patch("browser_cli.commands.extract.send_command", return_value="## Post") as send_command:
result = CliRunner().invoke(main, ["extract", "markdown", "--selector", "article"])
assert result.exit_code == 0
assert result.output == "## Post\n"
send_command.assert_called_once_with("extract.markdown", {"selector": "article"})
def test_clean_markdown_output_removes_escaped_underscores_and_dashes():
assert _clean_markdown_output(r"hello\_world \- item") == "hello_world - item"
def test_clean_markdown_output_trims_useless_whitespace():
raw = " # Title \n\n\n paragraph with space \n next line\t \n"
assert _clean_markdown_output(raw) == "# Title\n\nparagraph with space\nnext line"
def test_clean_markdown_output_repairs_empty_table_header_rows():
raw = (
"| | | |\n"
"| --- | --- | --- |\n"
"| Bereich | Plan | Ist |\n"
"| A | B | C |\n"
)
assert _clean_markdown_output(raw) == (
"| Bereich | Plan | Ist |\n"
"| --- | --- | --- |\n"
"| A | B | C |"
)
def test_clean_markdown_output_preserves_graph_code_blocks():
raw = "```\n\nA\n\n\nB\n\n```"
assert _clean_markdown_output(raw) == "```\nA\n\n\nB\n```"
def test_clean_markdown_output_renders_code_block_list_branches():
raw = "```\nPlattformen\n- Omnifact\n- Open WebUI + Ollama\n- Le Chat\n```"
assert _clean_markdown_output(raw) == (
"```\n"
"Plattformen\n"
"├ Omnifact\n"
"├ Open WebUI + Ollama\n"
"└ Le Chat\n"
"```"
)
def test_clean_markdown_output_unflattens_graph_code_blocks():
raw = (
"```\n"
"Golden Set │ ▼Promptfoo(Testausführung) │ ▼UpTrain(Qualitätsbewertung) │ "
"▼Langfuse(Logging / Observability) │ ▼Plattformen├ Omnifact├ Open WebUI + Ollama└ Le Chat\n"
"```"
)
assert _clean_markdown_output(raw) == (
"```\n"
"Golden Set\n"
"\n"
"\n"
"Promptfoo\n"
"(Testausführung)\n"
"\n"
"\n"
"UpTrain\n"
"(Qualitätsbewertung)\n"
"\n"
"\n"
"Langfuse\n"
"(Logging / Observability)\n"
"\n"
"\n"
"Plattformen\n"
"├ Omnifact\n"
"├ Open WebUI + Ollama\n"
"└ Le Chat\n"
"```"
)
def test_extract_markdown_command_repairs_malformed_tables_and_code_blocks():
raw = (
"| | | |\n"
"| --- | --- | --- |\n"
"| Bereich | Plan | Ist |\n"
"| Eval-Stack | Testumgebung | funktionsfähig |\n\n"
"```\n"
"Golden Set │ ▼Promptfoo(Testausführung) │ ▼Plattformen├ Omnifact└ Le Chat\n"
"```"
)
with patch("browser_cli.commands.extract.send_command", return_value=raw):
result = CliRunner().invoke(main, ["extract", "markdown"])
assert result.exit_code == 0
assert "| Bereich | Plan | Ist |" in result.output
assert "| | | |" not in result.output
assert "Golden Set\n\n\nPromptfoo\n(Testausführung)" in result.output
assert "├ Omnifact" in result.output
assert "└ Le Chat" in result.output
def test_convert_html_to_markdown_normalizes_blank_table_header_rows():
html = """
<main>
<table>
<tr><td></td><td></td><td></td><td></td></tr>
<tr><td>Risiko</td><td>Beschreibung</td><td>Auswirkung</td><td>Gegenmaßnahme</td></tr>
<tr><td>Datenschutz</td><td>X</td><td>Y</td><td>Z</td></tr>
</table>
</main>
"""
markdown = _convert_html_to_markdown(html)
assert "| Risiko | Beschreibung | Auswirkung | Gegenmaßnahme |" in markdown
assert "| | | | |" not in markdown
def test_convert_html_to_markdown_preserves_codemirror_graph_blocks():
html = """
<main>
<h1>Teil 5 - Eval-Stack Architektur</h1>
<div class="cm-editor" data-is-code-block-view="true" contenteditable="false">
<div class="cm-line">Golden Set</div>
<div class="cm-line"> │</div>
<div class="cm-line"> ▼</div>
<div class="cm-line">Promptfoo</div>
<div class="cm-line">(Testausführung)</div>
<div class="cm-line"> │</div>
<div class="cm-line"> ▼</div>
<div class="cm-line">Plattformen</div>
<div class="cm-line">- Omnifact</div>
<div class="cm-line">- Open WebUI + Ollama</div>
<div class="cm-line">- Le Chat</div>
</div>
</main>
"""
markdown = _convert_html_to_markdown(html)
assert "```\nGolden Set\n\n\nPromptfoo" in markdown
assert "├ Omnifact" in markdown
assert "└ Le Chat" in markdown
def test_convert_html_to_markdown_indents_multiline_list_items():
html = """
<main>
<h2>2. <strong>Zielarchitektur</strong></h2>
<ul>
<li>
<p>Unternehmensdaten → RAG → KI-Orchestrierung →<br>Local LLMs / API Modelle / Spezialmodelle</p>
</li>
</ul>
</main>
"""
markdown = _convert_html_to_markdown(html)
assert (
"- Unternehmensdaten → RAG → KI-Orchestrierung →\n"
" Local LLMs / API Modelle / Spezialmodelle"
) in markdown