library_save: require category_fetcher to prevent silent category drop

behavior change: save_post_file's category_fetcher argument is now
keyword-only with no default, so every call site has to pass something
explicit (fetcher instance or None). Previously the =None default let
bookmark→library save and bookmark Save As slip through without a
fetcher at all, silently rendering %artist%/%character% tokens as
empty strings and producing filenames like '_12345.jpg' instead of
'greatartist_12345.jpg'.

BookmarksView now takes a category_fetcher_factory callable in its
constructor (wired to BooruApp._get_category_fetcher), called at save
time so it picks up the fetcher for whatever site is currently active.

tests/core/test_library_save.py pins the signature shape and the
three relevant paths: fetcher populates empty categories, None
accepted when categories are pre-populated (Danbooru/e621 inline),
fetcher skipped when template has no category tokens.
This commit is contained in:
pax 2026-04-15 17:32:25 -05:00
parent bbf0d3107b
commit cf8bc0ad89
5 changed files with 165 additions and 4 deletions

View File

@ -7,6 +7,7 @@
### Fixed
- `category_fetcher._do_ensure` no longer permanently flips `_batch_api_works` to False when a transient network error drops a tag-API request mid-call; the unprobed path now routes through `_probe_batch_api`, which distinguishes clean 200-with-zero-matches (structurally broken, flip) from timeout/HTTP-error (transient, retry next call)
- Bookmark→library save and bookmark Save As now plumb the active site's `CategoryFetcher` through to the filename template, so `%artist%`/`%character%` tokens render correctly instead of silently dropping out when saving a post that wasn't previewed first
### Refactored
- `category_fetcher` batch tag-API params are now built by a shared `_build_tag_api_params` helper instead of duplicated across `fetch_via_tag_api` and `_probe_batch_api`

View File

@ -24,6 +24,7 @@ from .db import Database
if TYPE_CHECKING:
from .api.base import Post
from .api.category_fetcher import CategoryFetcher
_CATEGORY_TOKENS = {"%artist%", "%character%", "%copyright%", "%general%", "%meta%", "%species%"}
@ -36,7 +37,8 @@ async def save_post_file(
db: Database,
in_flight: set[str] | None = None,
explicit_name: str | None = None,
category_fetcher=None,
*,
category_fetcher: "CategoryFetcher | None",
) -> Path:
"""Copy a Post's already-cached media file into `dest_dir`.
@ -89,6 +91,13 @@ async def save_post_file(
explicit_name: optional override. When set, the template is
bypassed and this basename (already including extension)
is used as the starting point for collision resolution.
category_fetcher: keyword-only, required. The CategoryFetcher
for the post's site, or None when the site categorises tags
inline (Danbooru, e621) so ``post.tag_categories`` is always
pre-populated. Pass ``None`` explicitly rather than omitting
the argument the ``=None`` default was removed so saves
can't silently render templates with empty category tokens
just because a caller forgot to plumb the fetcher through.
Returns:
The actual `Path` the file landed at after collision

View File

@ -4,6 +4,7 @@ from __future__ import annotations
import logging
from pathlib import Path
from typing import Callable, TYPE_CHECKING
from PySide6.QtCore import Qt, Signal, QObject, QTimer
from PySide6.QtGui import QPixmap
@ -27,6 +28,9 @@ from ..core.cache import download_thumbnail
from ..core.concurrency import run_on_app_loop
from .grid import ThumbnailGrid
if TYPE_CHECKING:
from ..core.api.category_fetcher import CategoryFetcher
log = logging.getLogger("booru")
@ -43,9 +47,19 @@ class BookmarksView(QWidget):
bookmarks_changed = Signal() # emitted after bookmark add/remove/unsave
open_in_browser_requested = Signal(int, int) # (site_id, post_id)
def __init__(self, db: Database, parent: QWidget | None = None) -> None:
def __init__(
self,
db: Database,
category_fetcher_factory: Callable[[], "CategoryFetcher | None"],
parent: QWidget | None = None,
) -> None:
super().__init__(parent)
self._db = db
# Factory returns the fetcher for the currently-active site, or
# None when the site categorises tags inline (Danbooru, e621).
# Called at save time so a site switch between BookmarksView
# construction and a save picks up the new site's fetcher.
self._category_fetcher_factory = category_fetcher_factory
self._bookmarks: list[Bookmark] = []
self._signals = BookmarkThumbSignals()
self._signals.thumb_ready.connect(self._on_thumb_ready, Qt.ConnectionType.QueuedConnection)
@ -296,9 +310,14 @@ class BookmarksView(QWidget):
src = Path(fav.cached_path)
post = self._bookmark_to_post(fav)
fetcher = self._category_fetcher_factory()
async def _do():
try:
await save_post_file(src, post, dest_dir, self._db)
await save_post_file(
src, post, dest_dir, self._db,
category_fetcher=fetcher,
)
self._signals.save_done.emit(fav.post_id)
except Exception as e:
log.warning(f"Bookmark→library save #{fav.post_id} failed: {e}")
@ -412,12 +431,14 @@ class BookmarksView(QWidget):
dest = save_file(self, "Save Image", default_name, f"Images (*{src.suffix})")
if dest:
dest_path = Path(dest)
fetcher = self._category_fetcher_factory()
async def _do_save_as():
try:
await save_post_file(
src, post, dest_path.parent, self._db,
explicit_name=dest_path.name,
category_fetcher=fetcher,
)
except Exception as e:
log.warning(f"Bookmark Save As #{fav.post_id} failed: {e}")

View File

@ -315,7 +315,9 @@ class BooruApp(QMainWindow):
self._grid.nav_before_start.connect(self._search_ctrl.on_nav_before_start)
self._stack.addWidget(self._grid)
self._bookmarks_view = BookmarksView(self._db)
self._bookmarks_view = BookmarksView(
self._db, self._get_category_fetcher,
)
self._bookmarks_view.bookmark_selected.connect(self._on_bookmark_selected)
self._bookmarks_view.bookmark_activated.connect(self._on_bookmark_activated)
self._bookmarks_view.bookmarks_changed.connect(self._post_actions.refresh_browse_saved_dots)

View File

@ -0,0 +1,128 @@
"""Tests for save_post_file.
Pins the contract that category_fetcher is a *required* keyword arg
(no silent default) so a forgotten plumb can't result in a save that
drops category tokens from the filename template.
"""
from __future__ import annotations
import asyncio
import inspect
from dataclasses import dataclass, field
from pathlib import Path
import pytest
from booru_viewer.core.library_save import save_post_file
@dataclass
class FakePost:
id: int = 12345
tags: str = "1girl greatartist"
tag_categories: dict = field(default_factory=dict)
score: int = 0
rating: str = ""
source: str = ""
file_url: str = ""
class PopulatingFetcher:
"""ensure_categories fills in the artist category from scratch,
emulating the HTML-scrape/batch-API happy path."""
def __init__(self, categories: dict[str, list[str]]):
self._categories = categories
self.calls = 0
async def ensure_categories(self, post) -> None:
self.calls += 1
post.tag_categories = dict(self._categories)
def _run(coro):
return asyncio.new_event_loop().run_until_complete(coro)
def test_category_fetcher_is_keyword_only_and_required():
"""Signature check: category_fetcher must be explicit at every
call site no ``= None`` default that callers can forget."""
sig = inspect.signature(save_post_file)
param = sig.parameters["category_fetcher"]
assert param.kind == inspect.Parameter.KEYWORD_ONLY, (
"category_fetcher should be keyword-only"
)
assert param.default is inspect.Parameter.empty, (
"category_fetcher must not have a default — forcing every caller "
"to pass it (even as None) is the whole point of this contract"
)
def test_template_category_populated_via_fetcher(tmp_path, tmp_db):
"""Post with empty tag_categories + a template using %artist% +
a working fetcher saved filename includes the fetched artist
instead of falling back to the bare id."""
src = tmp_path / "src.jpg"
src.write_bytes(b"fake-image-bytes")
dest_dir = tmp_path / "dest"
tmp_db.set_setting("library_filename_template", "%artist%_%id%")
post = FakePost(id=12345, tag_categories={})
fetcher = PopulatingFetcher({"Artist": ["greatartist"]})
result = _run(save_post_file(
src, post, dest_dir, tmp_db,
category_fetcher=fetcher,
))
assert fetcher.calls == 1, "fetcher should be invoked exactly once"
assert result.name == "greatartist_12345.jpg", (
f"expected templated filename, got {result.name!r}"
)
assert result.exists()
def test_none_fetcher_accepted_when_categories_prepopulated(tmp_path, tmp_db):
"""Pass-None contract: sites like Danbooru/e621 return ``None``
from ``_get_category_fetcher`` because Post already arrives with
tag_categories populated. ``save_post_file`` must accept None
explicitly the change is about forcing callers to think, not
about forbidding None."""
src = tmp_path / "src.jpg"
src.write_bytes(b"x")
dest_dir = tmp_path / "dest"
tmp_db.set_setting("library_filename_template", "%artist%_%id%")
post = FakePost(id=999, tag_categories={"Artist": ["inlineartist"]})
result = _run(save_post_file(
src, post, dest_dir, tmp_db,
category_fetcher=None,
))
assert result.name == "inlineartist_999.jpg"
assert result.exists()
def test_fetcher_not_called_when_template_has_no_category_tokens(tmp_path, tmp_db):
"""Purely-id template → fetcher ``ensure_categories`` never
invoked, even when categories are empty (the fetch is expensive
and would be wasted)."""
src = tmp_path / "src.jpg"
src.write_bytes(b"x")
dest_dir = tmp_path / "dest"
tmp_db.set_setting("library_filename_template", "%id%")
post = FakePost(id=42, tag_categories={})
fetcher = PopulatingFetcher({"Artist": ["unused"]})
_run(save_post_file(
src, post, dest_dir, tmp_db,
category_fetcher=fetcher,
))
assert fetcher.calls == 0