diff --git a/booru_viewer/gui/_source_html.py b/booru_viewer/gui/_source_html.py new file mode 100644 index 0000000..dca5dff --- /dev/null +++ b/booru_viewer/gui/_source_html.py @@ -0,0 +1,34 @@ +"""Pure helper for the info-panel Source line. + +Lives in its own module so the helper can be unit-tested from CI +without pulling in PySide6. ``info_panel.py`` imports it. +""" + +from __future__ import annotations + +from html import escape + + +def build_source_html(source: str | None) -> str: + """Build the rich-text fragment for the Source line in the info panel. + + The fragment is inserted into a QLabel set to RichText format with + setOpenExternalLinks(True) — that means QTextBrowser parses any HTML + in *source* as markup. Without escaping, a hostile booru can break + out of the href attribute, inject ```` tracking pixels, or make + the visible text disagree with the click target. + + The href is only emitted for an http(s) URL; everything else is + rendered as escaped plain text. Both the href value and the visible + display text are HTML-escaped (audit finding #6). + """ + if not source: + return "none" + # Truncate display text but keep the full URL for the link target. + display = source if len(source) <= 60 else source[:57] + "..." + if source.startswith(("http://", "https://")): + return ( + f'{escape(display)}' + ) + return escape(display) diff --git a/tests/gui/test_source_html.py b/tests/gui/test_source_html.py new file mode 100644 index 0000000..2376678 --- /dev/null +++ b/tests/gui/test_source_html.py @@ -0,0 +1,87 @@ +"""Tests for the pure info-panel source HTML builder. + +Pure Python. No Qt, no network. Validates audit finding #6 — that the +helper escapes booru-controlled `post.source` before it's interpolated +into a QTextBrowser RichText document. +""" + +from __future__ import annotations + +from booru_viewer.gui._source_html import build_source_html + + +def test_none_returns_literal_none(): + assert build_source_html(None) == "none" + assert build_source_html("") == "none" + + +def test_plain_https_url_renders_escaped_anchor(): + out = build_source_html("https://example.test/post/1") + assert out.startswith('https://example.test/post/1" in out + + +def test_long_url_display_text_truncated_but_href_full(): + long_url = "https://example.test/" + "a" * 200 + out = build_source_html(long_url) + # href contains the full URL + assert long_url in out.replace("&", "&") + # Display text is truncated to 57 chars + "..." + assert "..." in out + + +def test_double_quote_in_url_escaped(): + """A `"` in the source must not break out of the href attribute.""" + hostile = 'https://attacker.test/">' + out = build_source_html(hostile) + # Raw must NOT appear — html.escape converts < to < + assert "" not in out + assert "<script>" in out + + +def test_non_url_source_rendered_as_escaped_plain_text(): + """A source string that isn't an http(s) URL is rendered as plain + text — no tag, but still HTML-escaped.""" + out = build_source_html("not a url at all") + assert "" not in out + assert "<b>" in out + + +def test_javascript_url_does_not_become_anchor(): + """Sources that don't start with http(s) — including `javascript:` — + must NOT be wrapped in an tag where they'd become a clickable + link target.""" + out = build_source_html("javascript:alert(1)") + assert "x") + assert "" not in out + + +def test_ampersand_in_url_escaped(): + out = build_source_html("https://example.test/?a=1&b=2") + # `&` must be `&` inside the href attribute + assert "&" in out + # Raw `&b=` is NOT acceptable as an attribute value + assert 'href="https://example.test/?a=1&b=2"' in out + + +def test_pixiv_real_world_source_unchanged_visually(): + """Realistic input — a normal pixiv link — should pass through with + no surprising changes.""" + out = build_source_html("https://www.pixiv.net/artworks/12345") + assert 'href="https://www.pixiv.net/artworks/12345"' in out + assert "https://www.pixiv.net/artworks/12345" in out