security: fix #2 — apply lavf protocol whitelist via property API

The previous attempt set ``demuxer_lavf_o`` as an init kwarg with a
comma-laden ``protocol_whitelist=file,http,https,tls,tcp`` value.
mpv rejected it with -7 OPT_FORMAT because python-mpv's init path
goes through ``mpv_set_option_string``, which routes through mpv's
keyvalue list parser — that parser splits on ``,`` to find entries,
shredding the protocol list into orphan tokens. Backslash-escaping
``\,`` did not unescape on this code path either.

Splits the option set into two helpers:

- ``build_mpv_kwargs`` — init kwargs only (ytdl=no, load_scripts=no,
  POSIX input_conf null, all the existing playback/audio/network
  tuning). The lavf option is intentionally absent.
- ``lavf_options`` — a dict applied post-construction via the
  python-mpv property API, which uses the node API and accepts
  dict values for keyvalue-list options without splitting on
  commas inside the value.

Tests cover both paths: that ``demuxer_lavf_o`` is NOT in the init
kwargs (regression guard), and that ``lavf_options`` returns the
expected protocol set.

Audit-Ref: SECURITY_AUDIT.md finding #2
Severity: High
This commit is contained in:
pax 2026-04-11 16:34:50 -05:00
parent 160db1f12a
commit 4db7943ac7
2 changed files with 64 additions and 21 deletions

View File

@ -1,11 +1,42 @@
"""Pure helper that builds the kwargs dict passed to ``mpv.MPV``. """Pure helpers that build the kwargs dict passed to ``mpv.MPV`` and
the post-construction options dict applied via the property API.
Kept free of any Qt or mpv imports so the options dict can be audited Kept free of any Qt or mpv imports so the options can be audited from
from a CI test that only installs the stdlib. a CI test that only installs the stdlib.
""" """
from __future__ import annotations from __future__ import annotations
# FFmpeg ``protocol_whitelist`` value applied via mpv's
# ``demuxer-lavf-o`` option (audit finding #2). ``file`` must stay so
# cached local clips and ``.part`` files keep playing; ``http``/
# ``https``/``tls``/``tcp`` are needed for fresh network video.
# ``crypto`` is intentionally omitted — it's an FFmpeg pseudo-protocol
# for AES-decrypted streams that boorus do not legitimately serve.
LAVF_PROTOCOL_WHITELIST = "file,http,https,tls,tcp"
def lavf_options() -> dict[str, str]:
"""Return the FFmpeg lavf demuxer options to apply post-construction.
These cannot be set via ``mpv.MPV(**kwargs)`` because python-mpv's
init path uses ``mpv_set_option_string``, which routes through
mpv's keyvalue list parser. That parser splits on ``,`` to find
entries, so the comma-laden ``protocol_whitelist`` value gets
shredded into orphan tokens and mpv rejects the option with
-7 OPT_FORMAT. mpv's documented backslash escape (``\\,``) is
not unescaped on this code path either.
The post-construction property API DOES accept dict values for
keyvalue-list options via the node API, so we set them after
``mpv.MPV()`` returns. Caller pattern:
m = mpv.MPV(**build_mpv_kwargs(is_windows=...))
for k, v in lavf_options().items():
m["demuxer-lavf-o"] = {k: v}
"""
return {"protocol_whitelist": LAVF_PROTOCOL_WHITELIST}
def build_mpv_kwargs(is_windows: bool) -> dict[str, object]: def build_mpv_kwargs(is_windows: bool) -> dict[str, object]:
"""Return the kwargs dict for constructing ``mpv.MPV``. """Return the kwargs dict for constructing ``mpv.MPV``.
@ -24,22 +55,15 @@ def build_mpv_kwargs(is_windows: bool) -> dict[str, object]:
``~/.config/mpv/scripts``. These scripts run in mpv's context ``~/.config/mpv/scripts``. These scripts run in mpv's context
every time the widget is created. every time the widget is created.
- ``demuxer_lavf_o="protocol_whitelist=file,http,https,tls,tcp"``:
restrict ffmpeg's lavf demuxer to HTTP(S) and local file reads.
The default accepts ``concat:``, ``subfile:``, ``data:``,
``udp://``, etc. ``concat:/etc/passwd|/dev/zero`` is the
canonical local-file-read gadget when a hostile container is
piped through lavf. ``file`` must stay in the whitelist because
cached local files (``.part``, promoted cache paths) rely on it.
``crypto`` is intentionally omitted; it's an FFmpeg pseudo-
protocol for AES-decrypted streams that boorus do not serve.
- ``input_conf="/dev/null"`` (POSIX only): skip loading - ``input_conf="/dev/null"`` (POSIX only): skip loading
``~/.config/mpv/input.conf``. The existing ``~/.config/mpv/input.conf``. The existing
``input_default_bindings=False`` + ``input_vo_keyboard=False`` ``input_default_bindings=False`` + ``input_vo_keyboard=False``
are the primary lockdown; this is defense-in-depth. Windows are the primary lockdown; this is defense-in-depth. Windows
uses a different null-device path and the load behavior varies uses a different null-device path and the load behavior varies
by mpv build, so it is skipped there. by mpv build, so it is skipped there.
The ffmpeg protocol whitelist (also part of finding #2) is NOT
in this dict see ``lavf_options`` for the explanation.
""" """
kwargs: dict[str, object] = { kwargs: dict[str, object] = {
"vo": "libmpv", "vo": "libmpv",
@ -59,7 +83,6 @@ def build_mpv_kwargs(is_windows: bool) -> dict[str, object]:
"network_timeout": "10", "network_timeout": "10",
"ytdl": "no", "ytdl": "no",
"load_scripts": "no", "load_scripts": "no",
"demuxer_lavf_o": "protocol_whitelist=file,http,https,tls,tcp",
} }
if not is_windows: if not is_windows:
kwargs["input_conf"] = "/dev/null" kwargs["input_conf"] = "/dev/null"

View File

@ -6,7 +6,11 @@ from the CI environment that installs only httpx + Pillow + pytest.
from __future__ import annotations from __future__ import annotations
from booru_viewer.gui.media._mpv_options import build_mpv_kwargs from booru_viewer.gui.media._mpv_options import (
LAVF_PROTOCOL_WHITELIST,
build_mpv_kwargs,
lavf_options,
)
def test_ytdl_disabled(): def test_ytdl_disabled():
@ -21,13 +25,27 @@ def test_load_scripts_disabled():
assert kwargs["load_scripts"] == "no" assert kwargs["load_scripts"] == "no"
def test_protocol_whitelist_restricts_to_file_and_http(): def test_protocol_whitelist_not_in_init_kwargs():
"""Finding #2 — lavf demuxer must only accept file + HTTP(S) + TLS/TCP.""" """Finding #2 — the lavf protocol whitelist must NOT be in the
init kwargs dict. python-mpv's init path uses
``mpv_set_option_string``, which trips on the comma-laden value
with -7 OPT_FORMAT. The whitelist is applied separately via the
property API in ``mpv_gl.py`` (see ``lavf_options``)."""
kwargs = build_mpv_kwargs(is_windows=False) kwargs = build_mpv_kwargs(is_windows=False)
value = kwargs["demuxer_lavf_o"] assert "demuxer_lavf_o" not in kwargs
assert isinstance(value, str) assert "demuxer-lavf-o" not in kwargs
assert value.startswith("protocol_whitelist=")
allowed = set(value.split("=", 1)[1].split(","))
def test_lavf_options_protocol_whitelist():
"""Finding #2 — lavf demuxer must only accept file + HTTP(S) + TLS/TCP.
Returned as a dict so callers can pass it through the python-mpv
property API (which uses the node API and handles comma-laden
values cleanly).
"""
opts = lavf_options()
assert opts.keys() == {"protocol_whitelist"}
allowed = set(opts["protocol_whitelist"].split(","))
# `file` must be present — cached local clips and .part files use it. # `file` must be present — cached local clips and .part files use it.
assert "file" in allowed assert "file" in allowed
# HTTP(S) + supporting protocols for network videos. # HTTP(S) + supporting protocols for network videos.
@ -38,6 +56,8 @@ def test_protocol_whitelist_restricts_to_file_and_http():
# Dangerous protocols must NOT appear. # Dangerous protocols must NOT appear.
for banned in ("concat", "subfile", "data", "udp", "rtp", "crypto"): for banned in ("concat", "subfile", "data", "udp", "rtp", "crypto"):
assert banned not in allowed assert banned not in allowed
# The constant and the helper return the same value.
assert opts["protocol_whitelist"] == LAVF_PROTOCOL_WHITELIST
def test_input_conf_nulled_on_posix(): def test_input_conf_nulled_on_posix():