library_save: require category_fetcher to prevent silent category drop

behavior change: save_post_file's category_fetcher argument is now
keyword-only with no default, so every call site has to pass something
explicit (fetcher instance or None). Previously the =None default let
bookmark→library save and bookmark Save As slip through without a
fetcher at all, silently rendering %artist%/%character% tokens as
empty strings and producing filenames like '_12345.jpg' instead of
'greatartist_12345.jpg'.

BookmarksView now takes a category_fetcher_factory callable in its
constructor (wired to BooruApp._get_category_fetcher), called at save
time so it picks up the fetcher for whatever site is currently active.

tests/core/test_library_save.py pins the signature shape and the
three relevant paths: fetcher populates empty categories, None
accepted when categories are pre-populated (Danbooru/e621 inline),
fetcher skipped when template has no category tokens.
This commit is contained in:
pax 2026-04-15 17:32:25 -05:00
parent bbf0d3107b
commit cf8bc0ad89
5 changed files with 165 additions and 4 deletions

View File

@ -7,6 +7,7 @@
### Fixed ### Fixed
- `category_fetcher._do_ensure` no longer permanently flips `_batch_api_works` to False when a transient network error drops a tag-API request mid-call; the unprobed path now routes through `_probe_batch_api`, which distinguishes clean 200-with-zero-matches (structurally broken, flip) from timeout/HTTP-error (transient, retry next call) - `category_fetcher._do_ensure` no longer permanently flips `_batch_api_works` to False when a transient network error drops a tag-API request mid-call; the unprobed path now routes through `_probe_batch_api`, which distinguishes clean 200-with-zero-matches (structurally broken, flip) from timeout/HTTP-error (transient, retry next call)
- Bookmark→library save and bookmark Save As now plumb the active site's `CategoryFetcher` through to the filename template, so `%artist%`/`%character%` tokens render correctly instead of silently dropping out when saving a post that wasn't previewed first
### Refactored ### Refactored
- `category_fetcher` batch tag-API params are now built by a shared `_build_tag_api_params` helper instead of duplicated across `fetch_via_tag_api` and `_probe_batch_api` - `category_fetcher` batch tag-API params are now built by a shared `_build_tag_api_params` helper instead of duplicated across `fetch_via_tag_api` and `_probe_batch_api`

View File

@ -24,6 +24,7 @@ from .db import Database
if TYPE_CHECKING: if TYPE_CHECKING:
from .api.base import Post from .api.base import Post
from .api.category_fetcher import CategoryFetcher
_CATEGORY_TOKENS = {"%artist%", "%character%", "%copyright%", "%general%", "%meta%", "%species%"} _CATEGORY_TOKENS = {"%artist%", "%character%", "%copyright%", "%general%", "%meta%", "%species%"}
@ -36,7 +37,8 @@ async def save_post_file(
db: Database, db: Database,
in_flight: set[str] | None = None, in_flight: set[str] | None = None,
explicit_name: str | None = None, explicit_name: str | None = None,
category_fetcher=None, *,
category_fetcher: "CategoryFetcher | None",
) -> Path: ) -> Path:
"""Copy a Post's already-cached media file into `dest_dir`. """Copy a Post's already-cached media file into `dest_dir`.
@ -89,6 +91,13 @@ async def save_post_file(
explicit_name: optional override. When set, the template is explicit_name: optional override. When set, the template is
bypassed and this basename (already including extension) bypassed and this basename (already including extension)
is used as the starting point for collision resolution. is used as the starting point for collision resolution.
category_fetcher: keyword-only, required. The CategoryFetcher
for the post's site, or None when the site categorises tags
inline (Danbooru, e621) so ``post.tag_categories`` is always
pre-populated. Pass ``None`` explicitly rather than omitting
the argument the ``=None`` default was removed so saves
can't silently render templates with empty category tokens
just because a caller forgot to plumb the fetcher through.
Returns: Returns:
The actual `Path` the file landed at after collision The actual `Path` the file landed at after collision

View File

@ -4,6 +4,7 @@ from __future__ import annotations
import logging import logging
from pathlib import Path from pathlib import Path
from typing import Callable, TYPE_CHECKING
from PySide6.QtCore import Qt, Signal, QObject, QTimer from PySide6.QtCore import Qt, Signal, QObject, QTimer
from PySide6.QtGui import QPixmap from PySide6.QtGui import QPixmap
@ -27,6 +28,9 @@ from ..core.cache import download_thumbnail
from ..core.concurrency import run_on_app_loop from ..core.concurrency import run_on_app_loop
from .grid import ThumbnailGrid from .grid import ThumbnailGrid
if TYPE_CHECKING:
from ..core.api.category_fetcher import CategoryFetcher
log = logging.getLogger("booru") log = logging.getLogger("booru")
@ -43,9 +47,19 @@ class BookmarksView(QWidget):
bookmarks_changed = Signal() # emitted after bookmark add/remove/unsave bookmarks_changed = Signal() # emitted after bookmark add/remove/unsave
open_in_browser_requested = Signal(int, int) # (site_id, post_id) open_in_browser_requested = Signal(int, int) # (site_id, post_id)
def __init__(self, db: Database, parent: QWidget | None = None) -> None: def __init__(
self,
db: Database,
category_fetcher_factory: Callable[[], "CategoryFetcher | None"],
parent: QWidget | None = None,
) -> None:
super().__init__(parent) super().__init__(parent)
self._db = db self._db = db
# Factory returns the fetcher for the currently-active site, or
# None when the site categorises tags inline (Danbooru, e621).
# Called at save time so a site switch between BookmarksView
# construction and a save picks up the new site's fetcher.
self._category_fetcher_factory = category_fetcher_factory
self._bookmarks: list[Bookmark] = [] self._bookmarks: list[Bookmark] = []
self._signals = BookmarkThumbSignals() self._signals = BookmarkThumbSignals()
self._signals.thumb_ready.connect(self._on_thumb_ready, Qt.ConnectionType.QueuedConnection) self._signals.thumb_ready.connect(self._on_thumb_ready, Qt.ConnectionType.QueuedConnection)
@ -296,9 +310,14 @@ class BookmarksView(QWidget):
src = Path(fav.cached_path) src = Path(fav.cached_path)
post = self._bookmark_to_post(fav) post = self._bookmark_to_post(fav)
fetcher = self._category_fetcher_factory()
async def _do(): async def _do():
try: try:
await save_post_file(src, post, dest_dir, self._db) await save_post_file(
src, post, dest_dir, self._db,
category_fetcher=fetcher,
)
self._signals.save_done.emit(fav.post_id) self._signals.save_done.emit(fav.post_id)
except Exception as e: except Exception as e:
log.warning(f"Bookmark→library save #{fav.post_id} failed: {e}") log.warning(f"Bookmark→library save #{fav.post_id} failed: {e}")
@ -412,12 +431,14 @@ class BookmarksView(QWidget):
dest = save_file(self, "Save Image", default_name, f"Images (*{src.suffix})") dest = save_file(self, "Save Image", default_name, f"Images (*{src.suffix})")
if dest: if dest:
dest_path = Path(dest) dest_path = Path(dest)
fetcher = self._category_fetcher_factory()
async def _do_save_as(): async def _do_save_as():
try: try:
await save_post_file( await save_post_file(
src, post, dest_path.parent, self._db, src, post, dest_path.parent, self._db,
explicit_name=dest_path.name, explicit_name=dest_path.name,
category_fetcher=fetcher,
) )
except Exception as e: except Exception as e:
log.warning(f"Bookmark Save As #{fav.post_id} failed: {e}") log.warning(f"Bookmark Save As #{fav.post_id} failed: {e}")

View File

@ -315,7 +315,9 @@ class BooruApp(QMainWindow):
self._grid.nav_before_start.connect(self._search_ctrl.on_nav_before_start) self._grid.nav_before_start.connect(self._search_ctrl.on_nav_before_start)
self._stack.addWidget(self._grid) self._stack.addWidget(self._grid)
self._bookmarks_view = BookmarksView(self._db) self._bookmarks_view = BookmarksView(
self._db, self._get_category_fetcher,
)
self._bookmarks_view.bookmark_selected.connect(self._on_bookmark_selected) self._bookmarks_view.bookmark_selected.connect(self._on_bookmark_selected)
self._bookmarks_view.bookmark_activated.connect(self._on_bookmark_activated) self._bookmarks_view.bookmark_activated.connect(self._on_bookmark_activated)
self._bookmarks_view.bookmarks_changed.connect(self._post_actions.refresh_browse_saved_dots) self._bookmarks_view.bookmarks_changed.connect(self._post_actions.refresh_browse_saved_dots)

View File

@ -0,0 +1,128 @@
"""Tests for save_post_file.
Pins the contract that category_fetcher is a *required* keyword arg
(no silent default) so a forgotten plumb can't result in a save that
drops category tokens from the filename template.
"""
from __future__ import annotations
import asyncio
import inspect
from dataclasses import dataclass, field
from pathlib import Path
import pytest
from booru_viewer.core.library_save import save_post_file
@dataclass
class FakePost:
id: int = 12345
tags: str = "1girl greatartist"
tag_categories: dict = field(default_factory=dict)
score: int = 0
rating: str = ""
source: str = ""
file_url: str = ""
class PopulatingFetcher:
"""ensure_categories fills in the artist category from scratch,
emulating the HTML-scrape/batch-API happy path."""
def __init__(self, categories: dict[str, list[str]]):
self._categories = categories
self.calls = 0
async def ensure_categories(self, post) -> None:
self.calls += 1
post.tag_categories = dict(self._categories)
def _run(coro):
return asyncio.new_event_loop().run_until_complete(coro)
def test_category_fetcher_is_keyword_only_and_required():
"""Signature check: category_fetcher must be explicit at every
call site no ``= None`` default that callers can forget."""
sig = inspect.signature(save_post_file)
param = sig.parameters["category_fetcher"]
assert param.kind == inspect.Parameter.KEYWORD_ONLY, (
"category_fetcher should be keyword-only"
)
assert param.default is inspect.Parameter.empty, (
"category_fetcher must not have a default — forcing every caller "
"to pass it (even as None) is the whole point of this contract"
)
def test_template_category_populated_via_fetcher(tmp_path, tmp_db):
"""Post with empty tag_categories + a template using %artist% +
a working fetcher saved filename includes the fetched artist
instead of falling back to the bare id."""
src = tmp_path / "src.jpg"
src.write_bytes(b"fake-image-bytes")
dest_dir = tmp_path / "dest"
tmp_db.set_setting("library_filename_template", "%artist%_%id%")
post = FakePost(id=12345, tag_categories={})
fetcher = PopulatingFetcher({"Artist": ["greatartist"]})
result = _run(save_post_file(
src, post, dest_dir, tmp_db,
category_fetcher=fetcher,
))
assert fetcher.calls == 1, "fetcher should be invoked exactly once"
assert result.name == "greatartist_12345.jpg", (
f"expected templated filename, got {result.name!r}"
)
assert result.exists()
def test_none_fetcher_accepted_when_categories_prepopulated(tmp_path, tmp_db):
"""Pass-None contract: sites like Danbooru/e621 return ``None``
from ``_get_category_fetcher`` because Post already arrives with
tag_categories populated. ``save_post_file`` must accept None
explicitly the change is about forcing callers to think, not
about forbidding None."""
src = tmp_path / "src.jpg"
src.write_bytes(b"x")
dest_dir = tmp_path / "dest"
tmp_db.set_setting("library_filename_template", "%artist%_%id%")
post = FakePost(id=999, tag_categories={"Artist": ["inlineartist"]})
result = _run(save_post_file(
src, post, dest_dir, tmp_db,
category_fetcher=None,
))
assert result.name == "inlineartist_999.jpg"
assert result.exists()
def test_fetcher_not_called_when_template_has_no_category_tokens(tmp_path, tmp_db):
"""Purely-id template → fetcher ``ensure_categories`` never
invoked, even when categories are empty (the fetch is expensive
and would be wasted)."""
src = tmp_path / "src.jpg"
src.write_bytes(b"x")
dest_dir = tmp_path / "dest"
tmp_db.set_setting("library_filename_template", "%id%")
post = FakePost(id=42, tag_categories={})
fetcher = PopulatingFetcher({"Artist": ["unused"]})
_run(save_post_file(
src, post, dest_dir, tmp_db,
category_fetcher=fetcher,
))
assert fetcher.calls == 0