Add Phase A test suite for core/ primitives
First regression-test layer for booru-viewer. Pure Python — no Qt, no
mpv, no network, no real filesystem outside tmp_path. Locks in the
security and concurrency invariants from the 54ccc40 + eb58d76 hardening
commits ahead of the upcoming popout state machine refactor (Prompt 3),
which needs a stable baseline to refactor against.
16 tests across five files mirroring the source layout under
booru_viewer/core/:
- tests/core/test_db.py (4):
- _validate_folder_name rejection rules (.., /foo, \\foo, .hidden,
~user, empty) and acceptance categories (unicode, spaces, parens)
- add_bookmark INSERT OR IGNORE collision returns the existing row
id (locks in the lastrowid=0 fix)
- get_bookmarks LIKE escaping (literal cat_ear does not match catear)
- tests/core/test_cache.py (7):
- _referer_for hostname suffix matching (gelbooru.com / donmai.us
apex rewrite, both exact-match and subdomain)
- _referer_for rejects substring attackers
(imgblahgelbooru.attacker.com does NOT pick up the booru referer)
- ugoira frame-count and uncompressed-size caps refuse zip bombs
before any decompression
- _do_download MAX_DOWNLOAD_BYTES enforced both at the
Content-Length pre-check AND in the chunk-loop running total
- _is_valid_media returns True on OSError (no delete + redownload
loop on transient EBUSY)
- tests/core/test_config.py (2):
- saved_folder_dir rejects literal .. and ../escape
- find_library_files walks root + 1 level, filters by
MEDIA_EXTENSIONS, exact post-id stem match
- tests/core/test_concurrency.py (2):
- get_app_loop raises RuntimeError before set_app_loop is called
- run_on_app_loop round-trips a coroutine result from a worker
thread loop back to the test thread
- tests/core/api/test_base.py (1):
- BooruClient._shared_client lazy singleton constructor-once under
10-thread first-call race
Plus tests/conftest.py with fixtures: tmp_db, tmp_library,
reset_app_loop, reset_shared_clients. All fixtures use tmp_path or
reset module-level globals around the test so the suite is parallel-
safe.
pyproject.toml:
- New [project.optional-dependencies] test extra: pytest>=8.0,
pytest-asyncio>=0.23
- New [tool.pytest.ini_options]: asyncio_mode = "auto",
testpaths = ["tests"]
README.md:
- Linux install section gains "Run tests" with the
pip install -e ".[test]" + pytest tests/ invocation
Phase B (post-sweep VideoPlayer regression tests for the seek slider
pin, _pending_mute lazy replay, and volume replay) is deferred to
Prompt 3's state machine work — VideoPlayer cannot be instantiated
without QApplication and a real mpv, which is out of scope for a
unit test suite. Once the state machine carves the pure-Python state
out of VideoPlayer, those tests become trivial against the helper
module.
Suite runs in 0.07s (16 tests). Independent of Qt/mpv/network/ffmpeg.
Test cases for Prompt 3:
- (already covered) — this IS the test suite Prompt 3 builds on top of
This commit is contained in:
parent
80001e64fe
commit
bf14466382
@ -147,6 +147,11 @@ booru-viewer
|
||||
|
||||
Or without installing: `python3 -m booru_viewer.main_gui`
|
||||
|
||||
**Run tests:**
|
||||
```sh
|
||||
pip install -e ".[test]"
|
||||
pytest tests/
|
||||
```
|
||||
|
||||
**Desktop entry:** To add booru-viewer to your app launcher, create `~/.local/share/applications/booru-viewer.desktop`:
|
||||
```ini
|
||||
|
||||
@ -14,6 +14,16 @@ dependencies = [
|
||||
"python-mpv>=1.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
test = [
|
||||
"pytest>=8.0",
|
||||
"pytest-asyncio>=0.23",
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
asyncio_mode = "auto"
|
||||
testpaths = ["tests"]
|
||||
|
||||
[project.scripts]
|
||||
booru-viewer = "booru_viewer.main_gui:main"
|
||||
|
||||
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
71
tests/conftest.py
Normal file
71
tests/conftest.py
Normal file
@ -0,0 +1,71 @@
|
||||
"""Shared fixtures for the booru-viewer test suite.
|
||||
|
||||
All fixtures here are pure-Python — no Qt, no mpv, no network. Filesystem
|
||||
writes go through `tmp_path` (or fixtures that wrap it). Module-level globals
|
||||
that the production code mutates (the concurrency loop, the httpx singletons)
|
||||
get reset around each test that touches them.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_db(tmp_path):
|
||||
"""Fresh `Database` instance writing to a temp file. Auto-closes."""
|
||||
from booru_viewer.core.db import Database
|
||||
db = Database(tmp_path / "test.db")
|
||||
yield db
|
||||
db.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_library(tmp_path):
|
||||
"""Point `saved_dir()` at `tmp_path/saved` for the duration of the test.
|
||||
|
||||
Uses `core.config.set_library_dir` (the official override hook) so the
|
||||
redirect goes through the same code path the GUI uses for the
|
||||
user-configurable library location. Tear-down restores the previous
|
||||
value so tests can run in any order without bleed.
|
||||
"""
|
||||
from booru_viewer.core import config
|
||||
saved = tmp_path / "saved"
|
||||
saved.mkdir()
|
||||
original = config._library_dir_override
|
||||
config.set_library_dir(saved)
|
||||
yield saved
|
||||
config.set_library_dir(original)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def reset_app_loop():
|
||||
"""Reset `concurrency._app_loop` between tests.
|
||||
|
||||
The module global is set once at app startup in production; tests need
|
||||
to start from a clean slate to assert the unset-state behavior.
|
||||
"""
|
||||
from booru_viewer.core import concurrency
|
||||
original = concurrency._app_loop
|
||||
concurrency._app_loop = None
|
||||
yield
|
||||
concurrency._app_loop = original
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def reset_shared_clients():
|
||||
"""Reset both shared httpx singletons (cache module + BooruClient class).
|
||||
|
||||
Both are class/module-level globals; tests that exercise the lazy-init
|
||||
+ lock pattern need them cleared so the test sees a fresh first-call
|
||||
race instead of a leftover instance from a previous test.
|
||||
"""
|
||||
from booru_viewer.core.api.base import BooruClient
|
||||
from booru_viewer.core import cache
|
||||
original_booru = BooruClient._shared_client
|
||||
original_cache = cache._shared_client
|
||||
BooruClient._shared_client = None
|
||||
cache._shared_client = None
|
||||
yield
|
||||
BooruClient._shared_client = original_booru
|
||||
cache._shared_client = original_cache
|
||||
0
tests/core/__init__.py
Normal file
0
tests/core/__init__.py
Normal file
0
tests/core/api/__init__.py
Normal file
0
tests/core/api/__init__.py
Normal file
77
tests/core/api/test_base.py
Normal file
77
tests/core/api/test_base.py
Normal file
@ -0,0 +1,77 @@
|
||||
"""Tests for `booru_viewer.core.api.base` — the lazy `_shared_client`
|
||||
singleton on `BooruClient`.
|
||||
|
||||
Locks in the lock-and-recheck pattern at `base.py:90-108`. Without it,
|
||||
two threads racing on first `.client` access would both see
|
||||
`_shared_client is None`, both build an `httpx.AsyncClient`, and one of
|
||||
them would leak (overwritten without aclose).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from booru_viewer.core.api.base import BooruClient
|
||||
|
||||
|
||||
class _StubClient(BooruClient):
|
||||
"""Concrete subclass so we can instantiate `BooruClient` for the test
|
||||
— the base class has abstract `search` / `get_post` methods."""
|
||||
api_type = "stub"
|
||||
|
||||
async def search(self, tags="", page=1, limit=40):
|
||||
return []
|
||||
|
||||
async def get_post(self, post_id):
|
||||
return None
|
||||
|
||||
|
||||
def test_shared_client_singleton_under_concurrency(reset_shared_clients):
|
||||
"""N threads racing on first `.client` access must result in exactly
|
||||
one `httpx.AsyncClient` constructor call. The threading.Lock guards
|
||||
the check-and-set so the second-and-later callers re-read the now-set
|
||||
`_shared_client` after acquiring the lock instead of building their
|
||||
own."""
|
||||
constructor_calls = 0
|
||||
constructor_lock = threading.Lock()
|
||||
|
||||
def _fake_async_client(*args, **kwargs):
|
||||
nonlocal constructor_calls
|
||||
with constructor_lock:
|
||||
constructor_calls += 1
|
||||
m = MagicMock()
|
||||
m.is_closed = False
|
||||
return m
|
||||
|
||||
# Barrier so all threads hit the property at the same moment
|
||||
n_threads = 10
|
||||
barrier = threading.Barrier(n_threads)
|
||||
results = []
|
||||
results_lock = threading.Lock()
|
||||
|
||||
client_instance = _StubClient("http://example.test")
|
||||
|
||||
def _worker():
|
||||
barrier.wait()
|
||||
c = client_instance.client
|
||||
with results_lock:
|
||||
results.append(c)
|
||||
|
||||
with patch("booru_viewer.core.api.base.httpx.AsyncClient",
|
||||
side_effect=_fake_async_client):
|
||||
threads = [threading.Thread(target=_worker) for _ in range(n_threads)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join(timeout=5)
|
||||
|
||||
assert constructor_calls == 1, (
|
||||
f"Expected exactly one httpx.AsyncClient construction, "
|
||||
f"got {constructor_calls}"
|
||||
)
|
||||
# All threads got back the same shared instance
|
||||
assert len(results) == n_threads
|
||||
assert all(r is results[0] for r in results)
|
||||
224
tests/core/test_cache.py
Normal file
224
tests/core/test_cache.py
Normal file
@ -0,0 +1,224 @@
|
||||
"""Tests for `booru_viewer.core.cache` — Referer hostname matching, ugoira
|
||||
zip-bomb defenses, download size caps, and validity-check fallback.
|
||||
|
||||
Locks in:
|
||||
- `_referer_for` proper hostname suffix matching (`54ccc40` security fix)
|
||||
guarding against `imgblahgelbooru.attacker.com` mapping to gelbooru.com
|
||||
- `_convert_ugoira_to_gif` cap enforcement (frame count + uncompressed size)
|
||||
before any decompression — defense against ugoira zip bombs
|
||||
- `_do_download` MAX_DOWNLOAD_BYTES enforcement, both the Content-Length
|
||||
pre-check and the running-total chunk-loop guard
|
||||
- `_is_valid_media` returning True on OSError so a transient EBUSY/lock
|
||||
doesn't kick off a delete + re-download loop
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import io
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import pytest
|
||||
|
||||
from booru_viewer.core import cache
|
||||
from booru_viewer.core.cache import (
|
||||
MAX_DOWNLOAD_BYTES,
|
||||
_convert_ugoira_to_gif,
|
||||
_do_download,
|
||||
_is_valid_media,
|
||||
_referer_for,
|
||||
)
|
||||
|
||||
|
||||
# -- _referer_for hostname suffix matching --
|
||||
|
||||
def test_referer_for_exact_and_suffix_match():
|
||||
"""Real booru hostnames map to the canonical Referer for their CDN.
|
||||
|
||||
Exact match and subdomain-suffix match both rewrite the Referer host
|
||||
to the canonical apex (gelbooru → `gelbooru.com`, donmai →
|
||||
`danbooru.donmai.us`). The actual request netloc is dropped — the
|
||||
point is to look like a navigation from the canonical site.
|
||||
"""
|
||||
# gelbooru exact host
|
||||
assert _referer_for(urlparse("https://gelbooru.com/index.php")) \
|
||||
== "https://gelbooru.com/"
|
||||
# gelbooru subdomain rewrites to the canonical apex
|
||||
assert _referer_for(urlparse("https://img3.gelbooru.com/images/abc.jpg")) \
|
||||
== "https://gelbooru.com/"
|
||||
|
||||
# donmai exact host
|
||||
assert _referer_for(urlparse("https://donmai.us/posts/123")) \
|
||||
== "https://danbooru.donmai.us/"
|
||||
# donmai subdomain rewrites to the canonical danbooru host
|
||||
assert _referer_for(urlparse("https://safebooru.donmai.us/posts/123")) \
|
||||
== "https://danbooru.donmai.us/"
|
||||
|
||||
|
||||
def test_referer_for_rejects_substring_attacker():
|
||||
"""An attacker host that contains `gelbooru.com` or `donmai.us` as a
|
||||
SUBSTRING (not a hostname suffix) must NOT pick up the booru Referer.
|
||||
|
||||
Without proper suffix matching, `imgblahgelbooru.attacker.com` would
|
||||
leak the gelbooru Referer to the attacker — that's the `54ccc40`
|
||||
security fix.
|
||||
"""
|
||||
# Attacker host that ends with attacker-controlled TLD
|
||||
parsed = urlparse("https://imgblahgelbooru.attacker.com/x.jpg")
|
||||
referer = _referer_for(parsed)
|
||||
assert "gelbooru.com" not in referer
|
||||
assert "imgblahgelbooru.attacker.com" in referer
|
||||
|
||||
parsed = urlparse("https://donmai.us.attacker.com/x.jpg")
|
||||
referer = _referer_for(parsed)
|
||||
assert "danbooru.donmai.us" not in referer
|
||||
assert "donmai.us.attacker.com" in referer
|
||||
|
||||
# Completely unrelated host preserved as-is
|
||||
parsed = urlparse("https://example.test/x.jpg")
|
||||
assert _referer_for(parsed) == "https://example.test/"
|
||||
|
||||
|
||||
# -- Ugoira zip-bomb defenses --
|
||||
|
||||
def _build_ugoira_zip(path: Path, n_frames: int, frame_bytes: bytes = b"x") -> Path:
|
||||
"""Build a synthetic ugoira-shaped zip with `n_frames` numbered .jpg
|
||||
entries. Content is whatever the caller passes; defaults to 1 byte.
|
||||
|
||||
The cap-enforcement tests don't need decodable JPEGs — the cap fires
|
||||
before any decode happens. The filenames just need .jpg suffixes so
|
||||
`_convert_ugoira_to_gif` recognizes them as frames.
|
||||
"""
|
||||
with zipfile.ZipFile(path, "w") as zf:
|
||||
for i in range(n_frames):
|
||||
zf.writestr(f"{i:04d}.jpg", frame_bytes)
|
||||
return path
|
||||
|
||||
|
||||
def test_ugoira_frame_count_cap_rejects_bomb(tmp_path, monkeypatch):
|
||||
"""A zip with more than `UGOIRA_MAX_FRAMES` frames must be refused
|
||||
BEFORE any decompression. We monkeypatch the cap down so the test
|
||||
builds a tiny zip instead of a 5001-entry one — the cap check is
|
||||
cap > N, not cap == 5000."""
|
||||
monkeypatch.setattr(cache, "UGOIRA_MAX_FRAMES", 2)
|
||||
zip_path = _build_ugoira_zip(tmp_path / "bomb.zip", n_frames=3)
|
||||
gif_path = zip_path.with_suffix(".gif")
|
||||
|
||||
result = _convert_ugoira_to_gif(zip_path)
|
||||
|
||||
# Function returned the original zip (refusal path)
|
||||
assert result == zip_path
|
||||
# No .gif was written
|
||||
assert not gif_path.exists()
|
||||
|
||||
|
||||
def test_ugoira_uncompressed_size_cap_rejects_bomb(tmp_path, monkeypatch):
|
||||
"""A zip whose `ZipInfo.file_size` headers sum past
|
||||
`UGOIRA_MAX_UNCOMPRESSED_BYTES` must be refused before decompression.
|
||||
Same monkeypatch trick to keep the test data small."""
|
||||
monkeypatch.setattr(cache, "UGOIRA_MAX_UNCOMPRESSED_BYTES", 50)
|
||||
# Three 100-byte frames → 300 total > 50 cap
|
||||
zip_path = _build_ugoira_zip(
|
||||
tmp_path / "bomb.zip", n_frames=3, frame_bytes=b"x" * 100
|
||||
)
|
||||
gif_path = zip_path.with_suffix(".gif")
|
||||
|
||||
result = _convert_ugoira_to_gif(zip_path)
|
||||
|
||||
assert result == zip_path
|
||||
assert not gif_path.exists()
|
||||
|
||||
|
||||
# -- _do_download MAX_DOWNLOAD_BYTES caps --
|
||||
|
||||
|
||||
class _FakeHeaders:
|
||||
def __init__(self, mapping):
|
||||
self._m = mapping
|
||||
def get(self, key, default=None):
|
||||
return self._m.get(key.lower(), default)
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
def __init__(self, headers, chunks):
|
||||
self.headers = _FakeHeaders({k.lower(): v for k, v in headers.items()})
|
||||
self._chunks = chunks
|
||||
def raise_for_status(self):
|
||||
pass
|
||||
async def aiter_bytes(self, _size):
|
||||
for chunk in self._chunks:
|
||||
yield chunk
|
||||
|
||||
|
||||
class _FakeStreamCtx:
|
||||
def __init__(self, response):
|
||||
self._resp = response
|
||||
async def __aenter__(self):
|
||||
return self._resp
|
||||
async def __aexit__(self, *_args):
|
||||
return False
|
||||
|
||||
|
||||
class _FakeClient:
|
||||
def __init__(self, response):
|
||||
self._resp = response
|
||||
def stream(self, _method, _url, headers=None):
|
||||
return _FakeStreamCtx(self._resp)
|
||||
|
||||
|
||||
def test_download_cap_content_length_pre_check(tmp_path):
|
||||
"""When the server advertises a Content-Length larger than
|
||||
MAX_DOWNLOAD_BYTES, `_do_download` must raise BEFORE iterating any
|
||||
bytes. This is the cheap pre-check that protects against the trivial
|
||||
OOM/disk-fill attack — we don't even start streaming."""
|
||||
too_big = MAX_DOWNLOAD_BYTES + 1
|
||||
response = _FakeResponse(
|
||||
headers={"content-type": "image/jpeg", "content-length": str(too_big)},
|
||||
chunks=[b"never read"],
|
||||
)
|
||||
client = _FakeClient(response)
|
||||
local = tmp_path / "out.jpg"
|
||||
|
||||
with pytest.raises(ValueError, match="Download too large"):
|
||||
asyncio.run(_do_download(client, "http://example.test/x.jpg", {}, local, None))
|
||||
|
||||
# No file should have been written
|
||||
assert not local.exists()
|
||||
|
||||
|
||||
def test_download_cap_running_total_aborts(tmp_path, monkeypatch):
|
||||
"""Servers can lie about Content-Length. The chunk loop must enforce
|
||||
the running-total cap independently and abort mid-stream as soon as
|
||||
cumulative bytes exceed `MAX_DOWNLOAD_BYTES`. We monkeypatch the cap
|
||||
down to 1024 to keep the test fast."""
|
||||
monkeypatch.setattr(cache, "MAX_DOWNLOAD_BYTES", 1024)
|
||||
# Advertise 0 (unknown) so the small-payload branch runs and the
|
||||
# running-total guard inside the chunk loop is what fires.
|
||||
response = _FakeResponse(
|
||||
headers={"content-type": "image/jpeg", "content-length": "0"},
|
||||
chunks=[b"x" * 600, b"x" * 600], # 1200 total > 1024 cap
|
||||
)
|
||||
client = _FakeClient(response)
|
||||
local = tmp_path / "out.jpg"
|
||||
|
||||
with pytest.raises(ValueError, match="exceeded cap mid-stream"):
|
||||
asyncio.run(_do_download(client, "http://example.test/x.jpg", {}, local, None))
|
||||
|
||||
# The buffered-write path only writes after the loop finishes, so the
|
||||
# mid-stream abort means no file lands on disk.
|
||||
assert not local.exists()
|
||||
|
||||
|
||||
# -- _is_valid_media OSError fallback --
|
||||
|
||||
def test_is_valid_media_returns_true_on_oserror(tmp_path):
|
||||
"""If the file can't be opened (transient EBUSY, lock, permissions),
|
||||
`_is_valid_media` must return True so the caller doesn't delete the
|
||||
cached file. The previous behavior of returning False kicked off a
|
||||
delete + re-download loop on every access while the underlying
|
||||
OS issue persisted."""
|
||||
nonexistent = tmp_path / "definitely-not-here.jpg"
|
||||
assert _is_valid_media(nonexistent) is True
|
||||
62
tests/core/test_concurrency.py
Normal file
62
tests/core/test_concurrency.py
Normal file
@ -0,0 +1,62 @@
|
||||
"""Tests for `booru_viewer.core.concurrency` — the persistent-loop handle.
|
||||
|
||||
Locks in:
|
||||
- `get_app_loop` raises a clear RuntimeError if `set_app_loop` was never
|
||||
called (the production code uses this to bail loudly when async work
|
||||
is scheduled before the loop thread starts)
|
||||
- `run_on_app_loop` round-trips a coroutine result from a worker-thread
|
||||
loop back to the calling thread via `concurrent.futures.Future`
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import threading
|
||||
|
||||
import pytest
|
||||
|
||||
from booru_viewer.core import concurrency
|
||||
from booru_viewer.core.concurrency import (
|
||||
get_app_loop,
|
||||
run_on_app_loop,
|
||||
set_app_loop,
|
||||
)
|
||||
|
||||
|
||||
def test_get_app_loop_raises_before_set(reset_app_loop):
|
||||
"""Calling `get_app_loop` before `set_app_loop` is a configuration
|
||||
error — the production code expects a clear RuntimeError so callers
|
||||
bail loudly instead of silently scheduling work onto a None loop."""
|
||||
with pytest.raises(RuntimeError, match="not initialized"):
|
||||
get_app_loop()
|
||||
|
||||
|
||||
def test_run_on_app_loop_round_trips_result(reset_app_loop):
|
||||
"""Spin up a real asyncio loop in a worker thread, register it via
|
||||
`set_app_loop`, then from the test (main) thread schedule a coroutine
|
||||
via `run_on_app_loop` and assert the result comes back through the
|
||||
`concurrent.futures.Future` interface."""
|
||||
loop = asyncio.new_event_loop()
|
||||
ready = threading.Event()
|
||||
|
||||
def _run_loop():
|
||||
asyncio.set_event_loop(loop)
|
||||
ready.set()
|
||||
loop.run_forever()
|
||||
|
||||
t = threading.Thread(target=_run_loop, daemon=True)
|
||||
t.start()
|
||||
ready.wait(timeout=2)
|
||||
|
||||
try:
|
||||
set_app_loop(loop)
|
||||
|
||||
async def _produce():
|
||||
return 42
|
||||
|
||||
fut = run_on_app_loop(_produce())
|
||||
assert fut.result(timeout=2) == 42
|
||||
finally:
|
||||
loop.call_soon_threadsafe(loop.stop)
|
||||
t.join(timeout=2)
|
||||
loop.close()
|
||||
57
tests/core/test_config.py
Normal file
57
tests/core/test_config.py
Normal file
@ -0,0 +1,57 @@
|
||||
"""Tests for `booru_viewer.core.config` — path traversal guard on
|
||||
`saved_folder_dir` and the shallow walk in `find_library_files`.
|
||||
|
||||
Locks in:
|
||||
- `saved_folder_dir` resolve-and-relative_to check (`54ccc40` defense in
|
||||
depth alongside `_validate_folder_name`)
|
||||
- `find_library_files` matching exactly the root + 1-level subdirectory
|
||||
layout that the library uses, with the right MEDIA_EXTENSIONS filter
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from booru_viewer.core import config
|
||||
from booru_viewer.core.config import find_library_files, saved_folder_dir
|
||||
|
||||
|
||||
# -- saved_folder_dir traversal guard --
|
||||
|
||||
def test_saved_folder_dir_rejects_dotdot(tmp_library):
|
||||
"""`..` and any path that resolves outside `saved_dir()` must raise
|
||||
ValueError, not silently mkdir somewhere unexpected. We test literal
|
||||
`..` shapes only — symlink escapes are filesystem-dependent and
|
||||
flaky in tests."""
|
||||
with pytest.raises(ValueError, match="escapes saved directory"):
|
||||
saved_folder_dir("..")
|
||||
with pytest.raises(ValueError, match="escapes saved directory"):
|
||||
saved_folder_dir("../escape")
|
||||
with pytest.raises(ValueError, match="escapes saved directory"):
|
||||
saved_folder_dir("foo/../..")
|
||||
|
||||
|
||||
# -- find_library_files shallow walk --
|
||||
|
||||
def test_find_library_files_walks_root_and_one_level(tmp_library):
|
||||
"""Library has a flat shape: `saved/<post_id>.<ext>` at the root, or
|
||||
`saved/<folder>/<post_id>.<ext>` one level deep. The walk must:
|
||||
- find matches at both depths
|
||||
- filter by MEDIA_EXTENSIONS (skip .txt and other non-media)
|
||||
- filter by exact stem (skip unrelated post ids)
|
||||
"""
|
||||
# Root-level match
|
||||
(tmp_library / "123.jpg").write_bytes(b"")
|
||||
# One-level subfolder match
|
||||
(tmp_library / "folder1").mkdir()
|
||||
(tmp_library / "folder1" / "123.png").write_bytes(b"")
|
||||
# Different post id — must be excluded
|
||||
(tmp_library / "folder2").mkdir()
|
||||
(tmp_library / "folder2" / "456.gif").write_bytes(b"")
|
||||
# Wrong extension — must be excluded even with the right stem
|
||||
(tmp_library / "123.txt").write_bytes(b"")
|
||||
|
||||
matches = find_library_files(123)
|
||||
match_names = {p.name for p in matches}
|
||||
|
||||
assert match_names == {"123.jpg", "123.png"}
|
||||
98
tests/core/test_db.py
Normal file
98
tests/core/test_db.py
Normal file
@ -0,0 +1,98 @@
|
||||
"""Tests for `booru_viewer.core.db` — folder name validation, INSERT OR
|
||||
IGNORE collision handling, and LIKE escaping.
|
||||
|
||||
These tests lock in the `54ccc40` security/correctness fixes:
|
||||
- `_validate_folder_name` rejects path-traversal shapes before they hit the
|
||||
filesystem in `saved_folder_dir`
|
||||
- `add_bookmark` re-SELECTs the actual row id after an INSERT OR IGNORE
|
||||
collision so the returned `Bookmark.id` is never the bogus 0 that broke
|
||||
`update_bookmark_cache_path`
|
||||
- `get_bookmarks` escapes the SQL LIKE wildcards `_` and `%` so a search for
|
||||
`cat_ear` doesn't bleed into `catear` / `catXear`
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from booru_viewer.core.db import _validate_folder_name
|
||||
|
||||
|
||||
# -- _validate_folder_name --
|
||||
|
||||
def test_validate_folder_name_rejects_traversal():
|
||||
"""Every shape that could escape the saved-images dir or hit a hidden
|
||||
file must raise ValueError. One assertion per rejection rule so a
|
||||
failure points at the exact case."""
|
||||
with pytest.raises(ValueError):
|
||||
_validate_folder_name("") # empty
|
||||
with pytest.raises(ValueError):
|
||||
_validate_folder_name("..") # dotdot literal
|
||||
with pytest.raises(ValueError):
|
||||
_validate_folder_name(".") # dot literal
|
||||
with pytest.raises(ValueError):
|
||||
_validate_folder_name("/foo") # forward slash
|
||||
with pytest.raises(ValueError):
|
||||
_validate_folder_name("foo/bar") # embedded forward slash
|
||||
with pytest.raises(ValueError):
|
||||
_validate_folder_name("\\foo") # backslash
|
||||
with pytest.raises(ValueError):
|
||||
_validate_folder_name(".hidden") # leading dot
|
||||
with pytest.raises(ValueError):
|
||||
_validate_folder_name("~user") # leading tilde
|
||||
|
||||
|
||||
def test_validate_folder_name_accepts_unicode_and_punctuation():
|
||||
"""Common real-world folder names must pass through unchanged. The
|
||||
guard is meant to block escape shapes, not normal naming."""
|
||||
assert _validate_folder_name("miku(lewd)") == "miku(lewd)"
|
||||
assert _validate_folder_name("cat ear") == "cat ear"
|
||||
assert _validate_folder_name("日本語") == "日本語"
|
||||
assert _validate_folder_name("foo-bar") == "foo-bar"
|
||||
assert _validate_folder_name("foo.bar") == "foo.bar" # dot OK if not leading
|
||||
|
||||
|
||||
# -- add_bookmark INSERT OR IGNORE collision --
|
||||
|
||||
def test_add_bookmark_collision_returns_existing_id(tmp_db):
|
||||
"""Calling `add_bookmark` twice with the same (site_id, post_id) must
|
||||
return the same row id on the second call, not the stale `lastrowid`
|
||||
of 0 that INSERT OR IGNORE leaves behind. Without the re-SELECT fix,
|
||||
any downstream `update_bookmark_cache_path(id=0, ...)` silently
|
||||
no-ops, breaking the cache-path linkage."""
|
||||
site = tmp_db.add_site("test", "http://example.test", "danbooru")
|
||||
bm1 = tmp_db.add_bookmark(
|
||||
site_id=site.id, post_id=42, file_url="http://example.test/42.jpg",
|
||||
preview_url=None, tags="cat",
|
||||
)
|
||||
bm2 = tmp_db.add_bookmark(
|
||||
site_id=site.id, post_id=42, file_url="http://example.test/42.jpg",
|
||||
preview_url=None, tags="cat",
|
||||
)
|
||||
assert bm1.id != 0
|
||||
assert bm2.id == bm1.id
|
||||
|
||||
|
||||
# -- get_bookmarks LIKE escaping --
|
||||
|
||||
def test_get_bookmarks_like_escaping(tmp_db):
|
||||
"""A search for the literal tag `cat_ear` must NOT match `catear` or
|
||||
`catXear`. SQLite's LIKE treats `_` as a single-char wildcard unless
|
||||
explicitly escaped — without `ESCAPE '\\\\'` the search would return
|
||||
all three rows."""
|
||||
site = tmp_db.add_site("test", "http://example.test", "danbooru")
|
||||
tmp_db.add_bookmark(
|
||||
site_id=site.id, post_id=1, file_url="http://example.test/1.jpg",
|
||||
preview_url=None, tags="cat_ear",
|
||||
)
|
||||
tmp_db.add_bookmark(
|
||||
site_id=site.id, post_id=2, file_url="http://example.test/2.jpg",
|
||||
preview_url=None, tags="catear",
|
||||
)
|
||||
tmp_db.add_bookmark(
|
||||
site_id=site.id, post_id=3, file_url="http://example.test/3.jpg",
|
||||
preview_url=None, tags="catXear",
|
||||
)
|
||||
results = tmp_db.get_bookmarks(search="cat_ear")
|
||||
tags_returned = {b.tags for b in results}
|
||||
assert tags_returned == {"cat_ear"}
|
||||
Loading…
x
Reference in New Issue
Block a user