security: fix #6 — add pure source HTML escape helper
Extracts the rich-text Source-line builder out of info_panel.py into a Qt-free module so it can be unit-tested under CI (which installs only httpx + Pillow + pytest, no PySide6). The helper html.escape()s both the href and the visible display text, and only emits an <a> tag for http(s) URLs — non-URL sources (including javascript: and data: schemes) get rendered as escaped plain text without a clickable anchor. Not yet wired into InfoPanel.set_post; that lands in the next commit. Audit-Ref: SECURITY_AUDIT.md finding #6 Severity: Medium
This commit is contained in:
parent
5d348fa8be
commit
fa4f2cb270
34
booru_viewer/gui/_source_html.py
Normal file
34
booru_viewer/gui/_source_html.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
"""Pure helper for the info-panel Source line.
|
||||||
|
|
||||||
|
Lives in its own module so the helper can be unit-tested from CI
|
||||||
|
without pulling in PySide6. ``info_panel.py`` imports it.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from html import escape
|
||||||
|
|
||||||
|
|
||||||
|
def build_source_html(source: str | None) -> str:
|
||||||
|
"""Build the rich-text fragment for the Source line in the info panel.
|
||||||
|
|
||||||
|
The fragment is inserted into a QLabel set to RichText format with
|
||||||
|
setOpenExternalLinks(True) — that means QTextBrowser parses any HTML
|
||||||
|
in *source* as markup. Without escaping, a hostile booru can break
|
||||||
|
out of the href attribute, inject ``<img>`` tracking pixels, or make
|
||||||
|
the visible text disagree with the click target.
|
||||||
|
|
||||||
|
The href is only emitted for an http(s) URL; everything else is
|
||||||
|
rendered as escaped plain text. Both the href value and the visible
|
||||||
|
display text are HTML-escaped (audit finding #6).
|
||||||
|
"""
|
||||||
|
if not source:
|
||||||
|
return "none"
|
||||||
|
# Truncate display text but keep the full URL for the link target.
|
||||||
|
display = source if len(source) <= 60 else source[:57] + "..."
|
||||||
|
if source.startswith(("http://", "https://")):
|
||||||
|
return (
|
||||||
|
f'<a href="{escape(source, quote=True)}" '
|
||||||
|
f'style="color: #4fc3f7;">{escape(display)}</a>'
|
||||||
|
)
|
||||||
|
return escape(display)
|
||||||
87
tests/gui/test_source_html.py
Normal file
87
tests/gui/test_source_html.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
"""Tests for the pure info-panel source HTML builder.
|
||||||
|
|
||||||
|
Pure Python. No Qt, no network. Validates audit finding #6 — that the
|
||||||
|
helper escapes booru-controlled `post.source` before it's interpolated
|
||||||
|
into a QTextBrowser RichText document.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from booru_viewer.gui._source_html import build_source_html
|
||||||
|
|
||||||
|
|
||||||
|
def test_none_returns_literal_none():
|
||||||
|
assert build_source_html(None) == "none"
|
||||||
|
assert build_source_html("") == "none"
|
||||||
|
|
||||||
|
|
||||||
|
def test_plain_https_url_renders_escaped_anchor():
|
||||||
|
out = build_source_html("https://example.test/post/1")
|
||||||
|
assert out.startswith('<a href="https://example.test/post/1"')
|
||||||
|
assert ">https://example.test/post/1</a>" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_long_url_display_text_truncated_but_href_full():
|
||||||
|
long_url = "https://example.test/" + "a" * 200
|
||||||
|
out = build_source_html(long_url)
|
||||||
|
# href contains the full URL
|
||||||
|
assert long_url in out.replace("&", "&")
|
||||||
|
# Display text is truncated to 57 chars + "..."
|
||||||
|
assert "..." in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_double_quote_in_url_escaped():
|
||||||
|
"""A `"` in the source must not break out of the href attribute."""
|
||||||
|
hostile = 'https://attacker.test/"><img src=x>'
|
||||||
|
out = build_source_html(hostile)
|
||||||
|
# Raw <img> must NOT appear — html.escape converts < to <
|
||||||
|
assert "<img" not in out
|
||||||
|
# The display text must also have the raw markup escaped.
|
||||||
|
assert ">" in out or """ in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_html_tags_in_url_escaped():
|
||||||
|
hostile = 'https://attacker.test/<script>alert(1)</script>'
|
||||||
|
out = build_source_html(hostile)
|
||||||
|
assert "<script>" not in out
|
||||||
|
assert "<script>" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_url_source_rendered_as_escaped_plain_text():
|
||||||
|
"""A source string that isn't an http(s) URL is rendered as plain
|
||||||
|
text — no <a> tag, but still HTML-escaped."""
|
||||||
|
out = build_source_html("not a url <b>at all</b>")
|
||||||
|
assert "<a" not in out
|
||||||
|
assert "<b>" not in out
|
||||||
|
assert "<b>" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_javascript_url_does_not_become_anchor():
|
||||||
|
"""Sources that don't start with http(s) — including `javascript:` —
|
||||||
|
must NOT be wrapped in an <a> tag where they'd become a clickable
|
||||||
|
link target."""
|
||||||
|
out = build_source_html("javascript:alert(1)")
|
||||||
|
assert "<a " not in out
|
||||||
|
assert "alert(1)" in out # text content preserved (escaped)
|
||||||
|
|
||||||
|
|
||||||
|
def test_data_url_does_not_become_anchor():
|
||||||
|
out = build_source_html("data:text/html,<script>x</script>")
|
||||||
|
assert "<a " not in out
|
||||||
|
assert "<script>" not in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_ampersand_in_url_escaped():
|
||||||
|
out = build_source_html("https://example.test/?a=1&b=2")
|
||||||
|
# `&` must be `&` inside the href attribute
|
||||||
|
assert "&" in out
|
||||||
|
# Raw `&b=` is NOT acceptable as an attribute value
|
||||||
|
assert 'href="https://example.test/?a=1&b=2"' in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_pixiv_real_world_source_unchanged_visually():
|
||||||
|
"""Realistic input — a normal pixiv link — should pass through with
|
||||||
|
no surprising changes."""
|
||||||
|
out = build_source_html("https://www.pixiv.net/artworks/12345")
|
||||||
|
assert 'href="https://www.pixiv.net/artworks/12345"' in out
|
||||||
|
assert "https://www.pixiv.net/artworks/12345</a>" in out
|
||||||
Loading…
x
Reference in New Issue
Block a user