From bf144663821cf24589d8758bfce56824f69db674 Mon Sep 17 00:00:00 2001 From: pax Date: Wed, 8 Apr 2026 18:50:00 -0500 Subject: [PATCH] Add Phase A test suite for core/ primitives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First regression-test layer for booru-viewer. Pure Python — no Qt, no mpv, no network, no real filesystem outside tmp_path. Locks in the security and concurrency invariants from the 54ccc40 + eb58d76 hardening commits ahead of the upcoming popout state machine refactor (Prompt 3), which needs a stable baseline to refactor against. 16 tests across five files mirroring the source layout under booru_viewer/core/: - tests/core/test_db.py (4): - _validate_folder_name rejection rules (.., /foo, \\foo, .hidden, ~user, empty) and acceptance categories (unicode, spaces, parens) - add_bookmark INSERT OR IGNORE collision returns the existing row id (locks in the lastrowid=0 fix) - get_bookmarks LIKE escaping (literal cat_ear does not match catear) - tests/core/test_cache.py (7): - _referer_for hostname suffix matching (gelbooru.com / donmai.us apex rewrite, both exact-match and subdomain) - _referer_for rejects substring attackers (imgblahgelbooru.attacker.com does NOT pick up the booru referer) - ugoira frame-count and uncompressed-size caps refuse zip bombs before any decompression - _do_download MAX_DOWNLOAD_BYTES enforced both at the Content-Length pre-check AND in the chunk-loop running total - _is_valid_media returns True on OSError (no delete + redownload loop on transient EBUSY) - tests/core/test_config.py (2): - saved_folder_dir rejects literal .. and ../escape - find_library_files walks root + 1 level, filters by MEDIA_EXTENSIONS, exact post-id stem match - tests/core/test_concurrency.py (2): - get_app_loop raises RuntimeError before set_app_loop is called - run_on_app_loop round-trips a coroutine result from a worker thread loop back to the test thread - tests/core/api/test_base.py (1): - BooruClient._shared_client lazy singleton constructor-once under 10-thread first-call race Plus tests/conftest.py with fixtures: tmp_db, tmp_library, reset_app_loop, reset_shared_clients. All fixtures use tmp_path or reset module-level globals around the test so the suite is parallel- safe. pyproject.toml: - New [project.optional-dependencies] test extra: pytest>=8.0, pytest-asyncio>=0.23 - New [tool.pytest.ini_options]: asyncio_mode = "auto", testpaths = ["tests"] README.md: - Linux install section gains "Run tests" with the pip install -e ".[test]" + pytest tests/ invocation Phase B (post-sweep VideoPlayer regression tests for the seek slider pin, _pending_mute lazy replay, and volume replay) is deferred to Prompt 3's state machine work — VideoPlayer cannot be instantiated without QApplication and a real mpv, which is out of scope for a unit test suite. Once the state machine carves the pure-Python state out of VideoPlayer, those tests become trivial against the helper module. Suite runs in 0.07s (16 tests). Independent of Qt/mpv/network/ffmpeg. Test cases for Prompt 3: - (already covered) — this IS the test suite Prompt 3 builds on top of --- README.md | 5 + pyproject.toml | 10 ++ tests/__init__.py | 0 tests/conftest.py | 71 +++++++++++ tests/core/__init__.py | 0 tests/core/api/__init__.py | 0 tests/core/api/test_base.py | 77 ++++++++++++ tests/core/test_cache.py | 224 +++++++++++++++++++++++++++++++++ tests/core/test_concurrency.py | 62 +++++++++ tests/core/test_config.py | 57 +++++++++ tests/core/test_db.py | 98 +++++++++++++++ 11 files changed, 604 insertions(+) create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/core/__init__.py create mode 100644 tests/core/api/__init__.py create mode 100644 tests/core/api/test_base.py create mode 100644 tests/core/test_cache.py create mode 100644 tests/core/test_concurrency.py create mode 100644 tests/core/test_config.py create mode 100644 tests/core/test_db.py diff --git a/README.md b/README.md index c552af2..9ea4862 100644 --- a/README.md +++ b/README.md @@ -147,6 +147,11 @@ booru-viewer Or without installing: `python3 -m booru_viewer.main_gui` +**Run tests:** +```sh +pip install -e ".[test]" +pytest tests/ +``` **Desktop entry:** To add booru-viewer to your app launcher, create `~/.local/share/applications/booru-viewer.desktop`: ```ini diff --git a/pyproject.toml b/pyproject.toml index f2e6eb5..f0320f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,16 @@ dependencies = [ "python-mpv>=1.0", ] +[project.optional-dependencies] +test = [ + "pytest>=8.0", + "pytest-asyncio>=0.23", +] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] + [project.scripts] booru-viewer = "booru_viewer.main_gui:main" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..4c10b49 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,71 @@ +"""Shared fixtures for the booru-viewer test suite. + +All fixtures here are pure-Python — no Qt, no mpv, no network. Filesystem +writes go through `tmp_path` (or fixtures that wrap it). Module-level globals +that the production code mutates (the concurrency loop, the httpx singletons) +get reset around each test that touches them. +""" + +from __future__ import annotations + +import pytest + + +@pytest.fixture +def tmp_db(tmp_path): + """Fresh `Database` instance writing to a temp file. Auto-closes.""" + from booru_viewer.core.db import Database + db = Database(tmp_path / "test.db") + yield db + db.close() + + +@pytest.fixture +def tmp_library(tmp_path): + """Point `saved_dir()` at `tmp_path/saved` for the duration of the test. + + Uses `core.config.set_library_dir` (the official override hook) so the + redirect goes through the same code path the GUI uses for the + user-configurable library location. Tear-down restores the previous + value so tests can run in any order without bleed. + """ + from booru_viewer.core import config + saved = tmp_path / "saved" + saved.mkdir() + original = config._library_dir_override + config.set_library_dir(saved) + yield saved + config.set_library_dir(original) + + +@pytest.fixture +def reset_app_loop(): + """Reset `concurrency._app_loop` between tests. + + The module global is set once at app startup in production; tests need + to start from a clean slate to assert the unset-state behavior. + """ + from booru_viewer.core import concurrency + original = concurrency._app_loop + concurrency._app_loop = None + yield + concurrency._app_loop = original + + +@pytest.fixture +def reset_shared_clients(): + """Reset both shared httpx singletons (cache module + BooruClient class). + + Both are class/module-level globals; tests that exercise the lazy-init + + lock pattern need them cleared so the test sees a fresh first-call + race instead of a leftover instance from a previous test. + """ + from booru_viewer.core.api.base import BooruClient + from booru_viewer.core import cache + original_booru = BooruClient._shared_client + original_cache = cache._shared_client + BooruClient._shared_client = None + cache._shared_client = None + yield + BooruClient._shared_client = original_booru + cache._shared_client = original_cache diff --git a/tests/core/__init__.py b/tests/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/core/api/__init__.py b/tests/core/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/core/api/test_base.py b/tests/core/api/test_base.py new file mode 100644 index 0000000..330b9c0 --- /dev/null +++ b/tests/core/api/test_base.py @@ -0,0 +1,77 @@ +"""Tests for `booru_viewer.core.api.base` — the lazy `_shared_client` +singleton on `BooruClient`. + +Locks in the lock-and-recheck pattern at `base.py:90-108`. Without it, +two threads racing on first `.client` access would both see +`_shared_client is None`, both build an `httpx.AsyncClient`, and one of +them would leak (overwritten without aclose). +""" + +from __future__ import annotations + +import threading +from unittest.mock import patch, MagicMock + +import pytest + +from booru_viewer.core.api.base import BooruClient + + +class _StubClient(BooruClient): + """Concrete subclass so we can instantiate `BooruClient` for the test + — the base class has abstract `search` / `get_post` methods.""" + api_type = "stub" + + async def search(self, tags="", page=1, limit=40): + return [] + + async def get_post(self, post_id): + return None + + +def test_shared_client_singleton_under_concurrency(reset_shared_clients): + """N threads racing on first `.client` access must result in exactly + one `httpx.AsyncClient` constructor call. The threading.Lock guards + the check-and-set so the second-and-later callers re-read the now-set + `_shared_client` after acquiring the lock instead of building their + own.""" + constructor_calls = 0 + constructor_lock = threading.Lock() + + def _fake_async_client(*args, **kwargs): + nonlocal constructor_calls + with constructor_lock: + constructor_calls += 1 + m = MagicMock() + m.is_closed = False + return m + + # Barrier so all threads hit the property at the same moment + n_threads = 10 + barrier = threading.Barrier(n_threads) + results = [] + results_lock = threading.Lock() + + client_instance = _StubClient("http://example.test") + + def _worker(): + barrier.wait() + c = client_instance.client + with results_lock: + results.append(c) + + with patch("booru_viewer.core.api.base.httpx.AsyncClient", + side_effect=_fake_async_client): + threads = [threading.Thread(target=_worker) for _ in range(n_threads)] + for t in threads: + t.start() + for t in threads: + t.join(timeout=5) + + assert constructor_calls == 1, ( + f"Expected exactly one httpx.AsyncClient construction, " + f"got {constructor_calls}" + ) + # All threads got back the same shared instance + assert len(results) == n_threads + assert all(r is results[0] for r in results) diff --git a/tests/core/test_cache.py b/tests/core/test_cache.py new file mode 100644 index 0000000..87393c9 --- /dev/null +++ b/tests/core/test_cache.py @@ -0,0 +1,224 @@ +"""Tests for `booru_viewer.core.cache` — Referer hostname matching, ugoira +zip-bomb defenses, download size caps, and validity-check fallback. + +Locks in: +- `_referer_for` proper hostname suffix matching (`54ccc40` security fix) + guarding against `imgblahgelbooru.attacker.com` mapping to gelbooru.com +- `_convert_ugoira_to_gif` cap enforcement (frame count + uncompressed size) + before any decompression — defense against ugoira zip bombs +- `_do_download` MAX_DOWNLOAD_BYTES enforcement, both the Content-Length + pre-check and the running-total chunk-loop guard +- `_is_valid_media` returning True on OSError so a transient EBUSY/lock + doesn't kick off a delete + re-download loop +""" + +from __future__ import annotations + +import asyncio +import io +import zipfile +from pathlib import Path +from unittest.mock import patch +from urllib.parse import urlparse + +import pytest + +from booru_viewer.core import cache +from booru_viewer.core.cache import ( + MAX_DOWNLOAD_BYTES, + _convert_ugoira_to_gif, + _do_download, + _is_valid_media, + _referer_for, +) + + +# -- _referer_for hostname suffix matching -- + +def test_referer_for_exact_and_suffix_match(): + """Real booru hostnames map to the canonical Referer for their CDN. + + Exact match and subdomain-suffix match both rewrite the Referer host + to the canonical apex (gelbooru → `gelbooru.com`, donmai → + `danbooru.donmai.us`). The actual request netloc is dropped — the + point is to look like a navigation from the canonical site. + """ + # gelbooru exact host + assert _referer_for(urlparse("https://gelbooru.com/index.php")) \ + == "https://gelbooru.com/" + # gelbooru subdomain rewrites to the canonical apex + assert _referer_for(urlparse("https://img3.gelbooru.com/images/abc.jpg")) \ + == "https://gelbooru.com/" + + # donmai exact host + assert _referer_for(urlparse("https://donmai.us/posts/123")) \ + == "https://danbooru.donmai.us/" + # donmai subdomain rewrites to the canonical danbooru host + assert _referer_for(urlparse("https://safebooru.donmai.us/posts/123")) \ + == "https://danbooru.donmai.us/" + + +def test_referer_for_rejects_substring_attacker(): + """An attacker host that contains `gelbooru.com` or `donmai.us` as a + SUBSTRING (not a hostname suffix) must NOT pick up the booru Referer. + + Without proper suffix matching, `imgblahgelbooru.attacker.com` would + leak the gelbooru Referer to the attacker — that's the `54ccc40` + security fix. + """ + # Attacker host that ends with attacker-controlled TLD + parsed = urlparse("https://imgblahgelbooru.attacker.com/x.jpg") + referer = _referer_for(parsed) + assert "gelbooru.com" not in referer + assert "imgblahgelbooru.attacker.com" in referer + + parsed = urlparse("https://donmai.us.attacker.com/x.jpg") + referer = _referer_for(parsed) + assert "danbooru.donmai.us" not in referer + assert "donmai.us.attacker.com" in referer + + # Completely unrelated host preserved as-is + parsed = urlparse("https://example.test/x.jpg") + assert _referer_for(parsed) == "https://example.test/" + + +# -- Ugoira zip-bomb defenses -- + +def _build_ugoira_zip(path: Path, n_frames: int, frame_bytes: bytes = b"x") -> Path: + """Build a synthetic ugoira-shaped zip with `n_frames` numbered .jpg + entries. Content is whatever the caller passes; defaults to 1 byte. + + The cap-enforcement tests don't need decodable JPEGs — the cap fires + before any decode happens. The filenames just need .jpg suffixes so + `_convert_ugoira_to_gif` recognizes them as frames. + """ + with zipfile.ZipFile(path, "w") as zf: + for i in range(n_frames): + zf.writestr(f"{i:04d}.jpg", frame_bytes) + return path + + +def test_ugoira_frame_count_cap_rejects_bomb(tmp_path, monkeypatch): + """A zip with more than `UGOIRA_MAX_FRAMES` frames must be refused + BEFORE any decompression. We monkeypatch the cap down so the test + builds a tiny zip instead of a 5001-entry one — the cap check is + cap > N, not cap == 5000.""" + monkeypatch.setattr(cache, "UGOIRA_MAX_FRAMES", 2) + zip_path = _build_ugoira_zip(tmp_path / "bomb.zip", n_frames=3) + gif_path = zip_path.with_suffix(".gif") + + result = _convert_ugoira_to_gif(zip_path) + + # Function returned the original zip (refusal path) + assert result == zip_path + # No .gif was written + assert not gif_path.exists() + + +def test_ugoira_uncompressed_size_cap_rejects_bomb(tmp_path, monkeypatch): + """A zip whose `ZipInfo.file_size` headers sum past + `UGOIRA_MAX_UNCOMPRESSED_BYTES` must be refused before decompression. + Same monkeypatch trick to keep the test data small.""" + monkeypatch.setattr(cache, "UGOIRA_MAX_UNCOMPRESSED_BYTES", 50) + # Three 100-byte frames → 300 total > 50 cap + zip_path = _build_ugoira_zip( + tmp_path / "bomb.zip", n_frames=3, frame_bytes=b"x" * 100 + ) + gif_path = zip_path.with_suffix(".gif") + + result = _convert_ugoira_to_gif(zip_path) + + assert result == zip_path + assert not gif_path.exists() + + +# -- _do_download MAX_DOWNLOAD_BYTES caps -- + + +class _FakeHeaders: + def __init__(self, mapping): + self._m = mapping + def get(self, key, default=None): + return self._m.get(key.lower(), default) + + +class _FakeResponse: + def __init__(self, headers, chunks): + self.headers = _FakeHeaders({k.lower(): v for k, v in headers.items()}) + self._chunks = chunks + def raise_for_status(self): + pass + async def aiter_bytes(self, _size): + for chunk in self._chunks: + yield chunk + + +class _FakeStreamCtx: + def __init__(self, response): + self._resp = response + async def __aenter__(self): + return self._resp + async def __aexit__(self, *_args): + return False + + +class _FakeClient: + def __init__(self, response): + self._resp = response + def stream(self, _method, _url, headers=None): + return _FakeStreamCtx(self._resp) + + +def test_download_cap_content_length_pre_check(tmp_path): + """When the server advertises a Content-Length larger than + MAX_DOWNLOAD_BYTES, `_do_download` must raise BEFORE iterating any + bytes. This is the cheap pre-check that protects against the trivial + OOM/disk-fill attack — we don't even start streaming.""" + too_big = MAX_DOWNLOAD_BYTES + 1 + response = _FakeResponse( + headers={"content-type": "image/jpeg", "content-length": str(too_big)}, + chunks=[b"never read"], + ) + client = _FakeClient(response) + local = tmp_path / "out.jpg" + + with pytest.raises(ValueError, match="Download too large"): + asyncio.run(_do_download(client, "http://example.test/x.jpg", {}, local, None)) + + # No file should have been written + assert not local.exists() + + +def test_download_cap_running_total_aborts(tmp_path, monkeypatch): + """Servers can lie about Content-Length. The chunk loop must enforce + the running-total cap independently and abort mid-stream as soon as + cumulative bytes exceed `MAX_DOWNLOAD_BYTES`. We monkeypatch the cap + down to 1024 to keep the test fast.""" + monkeypatch.setattr(cache, "MAX_DOWNLOAD_BYTES", 1024) + # Advertise 0 (unknown) so the small-payload branch runs and the + # running-total guard inside the chunk loop is what fires. + response = _FakeResponse( + headers={"content-type": "image/jpeg", "content-length": "0"}, + chunks=[b"x" * 600, b"x" * 600], # 1200 total > 1024 cap + ) + client = _FakeClient(response) + local = tmp_path / "out.jpg" + + with pytest.raises(ValueError, match="exceeded cap mid-stream"): + asyncio.run(_do_download(client, "http://example.test/x.jpg", {}, local, None)) + + # The buffered-write path only writes after the loop finishes, so the + # mid-stream abort means no file lands on disk. + assert not local.exists() + + +# -- _is_valid_media OSError fallback -- + +def test_is_valid_media_returns_true_on_oserror(tmp_path): + """If the file can't be opened (transient EBUSY, lock, permissions), + `_is_valid_media` must return True so the caller doesn't delete the + cached file. The previous behavior of returning False kicked off a + delete + re-download loop on every access while the underlying + OS issue persisted.""" + nonexistent = tmp_path / "definitely-not-here.jpg" + assert _is_valid_media(nonexistent) is True diff --git a/tests/core/test_concurrency.py b/tests/core/test_concurrency.py new file mode 100644 index 0000000..bb3bff0 --- /dev/null +++ b/tests/core/test_concurrency.py @@ -0,0 +1,62 @@ +"""Tests for `booru_viewer.core.concurrency` — the persistent-loop handle. + +Locks in: +- `get_app_loop` raises a clear RuntimeError if `set_app_loop` was never + called (the production code uses this to bail loudly when async work + is scheduled before the loop thread starts) +- `run_on_app_loop` round-trips a coroutine result from a worker-thread + loop back to the calling thread via `concurrent.futures.Future` +""" + +from __future__ import annotations + +import asyncio +import threading + +import pytest + +from booru_viewer.core import concurrency +from booru_viewer.core.concurrency import ( + get_app_loop, + run_on_app_loop, + set_app_loop, +) + + +def test_get_app_loop_raises_before_set(reset_app_loop): + """Calling `get_app_loop` before `set_app_loop` is a configuration + error — the production code expects a clear RuntimeError so callers + bail loudly instead of silently scheduling work onto a None loop.""" + with pytest.raises(RuntimeError, match="not initialized"): + get_app_loop() + + +def test_run_on_app_loop_round_trips_result(reset_app_loop): + """Spin up a real asyncio loop in a worker thread, register it via + `set_app_loop`, then from the test (main) thread schedule a coroutine + via `run_on_app_loop` and assert the result comes back through the + `concurrent.futures.Future` interface.""" + loop = asyncio.new_event_loop() + ready = threading.Event() + + def _run_loop(): + asyncio.set_event_loop(loop) + ready.set() + loop.run_forever() + + t = threading.Thread(target=_run_loop, daemon=True) + t.start() + ready.wait(timeout=2) + + try: + set_app_loop(loop) + + async def _produce(): + return 42 + + fut = run_on_app_loop(_produce()) + assert fut.result(timeout=2) == 42 + finally: + loop.call_soon_threadsafe(loop.stop) + t.join(timeout=2) + loop.close() diff --git a/tests/core/test_config.py b/tests/core/test_config.py new file mode 100644 index 0000000..b5ef43c --- /dev/null +++ b/tests/core/test_config.py @@ -0,0 +1,57 @@ +"""Tests for `booru_viewer.core.config` — path traversal guard on +`saved_folder_dir` and the shallow walk in `find_library_files`. + +Locks in: +- `saved_folder_dir` resolve-and-relative_to check (`54ccc40` defense in + depth alongside `_validate_folder_name`) +- `find_library_files` matching exactly the root + 1-level subdirectory + layout that the library uses, with the right MEDIA_EXTENSIONS filter +""" + +from __future__ import annotations + +import pytest + +from booru_viewer.core import config +from booru_viewer.core.config import find_library_files, saved_folder_dir + + +# -- saved_folder_dir traversal guard -- + +def test_saved_folder_dir_rejects_dotdot(tmp_library): + """`..` and any path that resolves outside `saved_dir()` must raise + ValueError, not silently mkdir somewhere unexpected. We test literal + `..` shapes only — symlink escapes are filesystem-dependent and + flaky in tests.""" + with pytest.raises(ValueError, match="escapes saved directory"): + saved_folder_dir("..") + with pytest.raises(ValueError, match="escapes saved directory"): + saved_folder_dir("../escape") + with pytest.raises(ValueError, match="escapes saved directory"): + saved_folder_dir("foo/../..") + + +# -- find_library_files shallow walk -- + +def test_find_library_files_walks_root_and_one_level(tmp_library): + """Library has a flat shape: `saved/.` at the root, or + `saved//.` one level deep. The walk must: + - find matches at both depths + - filter by MEDIA_EXTENSIONS (skip .txt and other non-media) + - filter by exact stem (skip unrelated post ids) + """ + # Root-level match + (tmp_library / "123.jpg").write_bytes(b"") + # One-level subfolder match + (tmp_library / "folder1").mkdir() + (tmp_library / "folder1" / "123.png").write_bytes(b"") + # Different post id — must be excluded + (tmp_library / "folder2").mkdir() + (tmp_library / "folder2" / "456.gif").write_bytes(b"") + # Wrong extension — must be excluded even with the right stem + (tmp_library / "123.txt").write_bytes(b"") + + matches = find_library_files(123) + match_names = {p.name for p in matches} + + assert match_names == {"123.jpg", "123.png"} diff --git a/tests/core/test_db.py b/tests/core/test_db.py new file mode 100644 index 0000000..3543356 --- /dev/null +++ b/tests/core/test_db.py @@ -0,0 +1,98 @@ +"""Tests for `booru_viewer.core.db` — folder name validation, INSERT OR +IGNORE collision handling, and LIKE escaping. + +These tests lock in the `54ccc40` security/correctness fixes: +- `_validate_folder_name` rejects path-traversal shapes before they hit the + filesystem in `saved_folder_dir` +- `add_bookmark` re-SELECTs the actual row id after an INSERT OR IGNORE + collision so the returned `Bookmark.id` is never the bogus 0 that broke + `update_bookmark_cache_path` +- `get_bookmarks` escapes the SQL LIKE wildcards `_` and `%` so a search for + `cat_ear` doesn't bleed into `catear` / `catXear` +""" + +from __future__ import annotations + +import pytest + +from booru_viewer.core.db import _validate_folder_name + + +# -- _validate_folder_name -- + +def test_validate_folder_name_rejects_traversal(): + """Every shape that could escape the saved-images dir or hit a hidden + file must raise ValueError. One assertion per rejection rule so a + failure points at the exact case.""" + with pytest.raises(ValueError): + _validate_folder_name("") # empty + with pytest.raises(ValueError): + _validate_folder_name("..") # dotdot literal + with pytest.raises(ValueError): + _validate_folder_name(".") # dot literal + with pytest.raises(ValueError): + _validate_folder_name("/foo") # forward slash + with pytest.raises(ValueError): + _validate_folder_name("foo/bar") # embedded forward slash + with pytest.raises(ValueError): + _validate_folder_name("\\foo") # backslash + with pytest.raises(ValueError): + _validate_folder_name(".hidden") # leading dot + with pytest.raises(ValueError): + _validate_folder_name("~user") # leading tilde + + +def test_validate_folder_name_accepts_unicode_and_punctuation(): + """Common real-world folder names must pass through unchanged. The + guard is meant to block escape shapes, not normal naming.""" + assert _validate_folder_name("miku(lewd)") == "miku(lewd)" + assert _validate_folder_name("cat ear") == "cat ear" + assert _validate_folder_name("日本語") == "日本語" + assert _validate_folder_name("foo-bar") == "foo-bar" + assert _validate_folder_name("foo.bar") == "foo.bar" # dot OK if not leading + + +# -- add_bookmark INSERT OR IGNORE collision -- + +def test_add_bookmark_collision_returns_existing_id(tmp_db): + """Calling `add_bookmark` twice with the same (site_id, post_id) must + return the same row id on the second call, not the stale `lastrowid` + of 0 that INSERT OR IGNORE leaves behind. Without the re-SELECT fix, + any downstream `update_bookmark_cache_path(id=0, ...)` silently + no-ops, breaking the cache-path linkage.""" + site = tmp_db.add_site("test", "http://example.test", "danbooru") + bm1 = tmp_db.add_bookmark( + site_id=site.id, post_id=42, file_url="http://example.test/42.jpg", + preview_url=None, tags="cat", + ) + bm2 = tmp_db.add_bookmark( + site_id=site.id, post_id=42, file_url="http://example.test/42.jpg", + preview_url=None, tags="cat", + ) + assert bm1.id != 0 + assert bm2.id == bm1.id + + +# -- get_bookmarks LIKE escaping -- + +def test_get_bookmarks_like_escaping(tmp_db): + """A search for the literal tag `cat_ear` must NOT match `catear` or + `catXear`. SQLite's LIKE treats `_` as a single-char wildcard unless + explicitly escaped — without `ESCAPE '\\\\'` the search would return + all three rows.""" + site = tmp_db.add_site("test", "http://example.test", "danbooru") + tmp_db.add_bookmark( + site_id=site.id, post_id=1, file_url="http://example.test/1.jpg", + preview_url=None, tags="cat_ear", + ) + tmp_db.add_bookmark( + site_id=site.id, post_id=2, file_url="http://example.test/2.jpg", + preview_url=None, tags="catear", + ) + tmp_db.add_bookmark( + site_id=site.id, post_id=3, file_url="http://example.test/3.jpg", + preview_url=None, tags="catXear", + ) + results = tmp_db.get_bookmarks(search="cat_ear") + tags_returned = {b.tags for b in results} + assert tags_returned == {"cat_ear"}