From fa4f2cb2706c9f9660691a22d23270d4f2d40a9e Mon Sep 17 00:00:00 2001 From: pax Date: Sat, 11 Apr 2026 16:19:06 -0500 Subject: [PATCH] =?UTF-8?q?security:=20fix=20#6=20=E2=80=94=20add=20pure?= =?UTF-8?q?=20source=20HTML=20escape=20helper?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extracts the rich-text Source-line builder out of info_panel.py into a Qt-free module so it can be unit-tested under CI (which installs only httpx + Pillow + pytest, no PySide6). The helper html.escape()s both the href and the visible display text, and only emits an tag for http(s) URLs — non-URL sources (including javascript: and data: schemes) get rendered as escaped plain text without a clickable anchor. Not yet wired into InfoPanel.set_post; that lands in the next commit. Audit-Ref: SECURITY_AUDIT.md finding #6 Severity: Medium --- booru_viewer/gui/_source_html.py | 34 +++++++++++++ tests/gui/test_source_html.py | 87 ++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 booru_viewer/gui/_source_html.py create mode 100644 tests/gui/test_source_html.py diff --git a/booru_viewer/gui/_source_html.py b/booru_viewer/gui/_source_html.py new file mode 100644 index 0000000..dca5dff --- /dev/null +++ b/booru_viewer/gui/_source_html.py @@ -0,0 +1,34 @@ +"""Pure helper for the info-panel Source line. + +Lives in its own module so the helper can be unit-tested from CI +without pulling in PySide6. ``info_panel.py`` imports it. +""" + +from __future__ import annotations + +from html import escape + + +def build_source_html(source: str | None) -> str: + """Build the rich-text fragment for the Source line in the info panel. + + The fragment is inserted into a QLabel set to RichText format with + setOpenExternalLinks(True) — that means QTextBrowser parses any HTML + in *source* as markup. Without escaping, a hostile booru can break + out of the href attribute, inject ```` tracking pixels, or make + the visible text disagree with the click target. + + The href is only emitted for an http(s) URL; everything else is + rendered as escaped plain text. Both the href value and the visible + display text are HTML-escaped (audit finding #6). + """ + if not source: + return "none" + # Truncate display text but keep the full URL for the link target. + display = source if len(source) <= 60 else source[:57] + "..." + if source.startswith(("http://", "https://")): + return ( + f'{escape(display)}' + ) + return escape(display) diff --git a/tests/gui/test_source_html.py b/tests/gui/test_source_html.py new file mode 100644 index 0000000..2376678 --- /dev/null +++ b/tests/gui/test_source_html.py @@ -0,0 +1,87 @@ +"""Tests for the pure info-panel source HTML builder. + +Pure Python. No Qt, no network. Validates audit finding #6 — that the +helper escapes booru-controlled `post.source` before it's interpolated +into a QTextBrowser RichText document. +""" + +from __future__ import annotations + +from booru_viewer.gui._source_html import build_source_html + + +def test_none_returns_literal_none(): + assert build_source_html(None) == "none" + assert build_source_html("") == "none" + + +def test_plain_https_url_renders_escaped_anchor(): + out = build_source_html("https://example.test/post/1") + assert out.startswith('https://example.test/post/1" in out + + +def test_long_url_display_text_truncated_but_href_full(): + long_url = "https://example.test/" + "a" * 200 + out = build_source_html(long_url) + # href contains the full URL + assert long_url in out.replace("&", "&") + # Display text is truncated to 57 chars + "..." + assert "..." in out + + +def test_double_quote_in_url_escaped(): + """A `"` in the source must not break out of the href attribute.""" + hostile = 'https://attacker.test/">' + out = build_source_html(hostile) + # Raw must NOT appear — html.escape converts < to < + assert "" not in out + assert "<script>" in out + + +def test_non_url_source_rendered_as_escaped_plain_text(): + """A source string that isn't an http(s) URL is rendered as plain + text — no tag, but still HTML-escaped.""" + out = build_source_html("not a url at all") + assert "" not in out + assert "<b>" in out + + +def test_javascript_url_does_not_become_anchor(): + """Sources that don't start with http(s) — including `javascript:` — + must NOT be wrapped in an tag where they'd become a clickable + link target.""" + out = build_source_html("javascript:alert(1)") + assert "x") + assert "" not in out + + +def test_ampersand_in_url_escaped(): + out = build_source_html("https://example.test/?a=1&b=2") + # `&` must be `&` inside the href attribute + assert "&" in out + # Raw `&b=` is NOT acceptable as an attribute value + assert 'href="https://example.test/?a=1&b=2"' in out + + +def test_pixiv_real_world_source_unchanged_visually(): + """Realistic input — a normal pixiv link — should pass through with + no surprising changes.""" + out = build_source_html("https://www.pixiv.net/artworks/12345") + assert 'href="https://www.pixiv.net/artworks/12345"' in out + assert "https://www.pixiv.net/artworks/12345" in out