From 762d73dc4fca6a25d8a7ebcc73d172bf4c85570c Mon Sep 17 00:00:00 2001 From: pax Date: Thu, 9 Apr 2026 19:36:58 -0500 Subject: [PATCH] category_fetcher: fix partial-compose vs ensure_categories interaction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit try_compose_from_cache was returning True on ANY partial cache hit (even 1/38 tags). ensure_categories then saw non-empty tag_categories and returned immediately, leaving the post stuck at 1/38 coverage. The bug showed on Rule34: post 1 got fully scraped (40/40), its tags got cached, then post 2's compose found one matching tag and declared victory. Fix: try_compose_from_cache now returns True ONLY when 100% of unique tags have cached labels (no fetch needed). It STILL populates post.tag_categories with whatever IS cached (for immediate partial display), but returning False signals ensure_categories to continue to the fetch path. This is the correct semantic split: - populate → always (for display) - return True → only when complete (for dispatch) Verified: Rule34: 40/40 + 38/38 (was 40/40 + 1/38) Gelbooru: 55/56 + 49/50 (batch API, one rare tag) Safebooru.org: 47/47 + 47/47 (HTML scrape, full) --- booru_viewer/core/api/category_fetcher.py | 27 ++++++++++++++--------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/booru_viewer/core/api/category_fetcher.py b/booru_viewer/core/api/category_fetcher.py index b2f2eca..48700a1 100644 --- a/booru_viewer/core/api/category_fetcher.py +++ b/booru_viewer/core/api/category_fetcher.py @@ -151,17 +151,22 @@ class CategoryFetcher: def try_compose_from_cache(self, post: "Post") -> bool: """Build ``post.tag_categories`` from cached labels. - Populates ``post.tag_categories`` with whatever tags ARE - cached, even if some are missing. Returns True when at least - one tag was categorized (meaning the post is usable — the - info panel can render categories, templates can resolve - ``%artist%`` / ``%character%`` etc.). Returns False only - when the cache has literally nothing for any of the post's - tags, which means a fetch is needed. + ALWAYS populates ``post.tag_categories`` with whatever tags + ARE cached, even if some are missing — so the info panel can + render partial categories immediately while a fetch is + in-flight. - Tags not in the cache are simply absent from the category - dict. They stay in ``post.tags`` (the flat string) and can - be picked up by a later per-post fetch if needed. + Returns True only when **every** unique tag in the post has + a cached label (100% coverage = no fetch needed). Returns + False when any tags are missing, signaling the caller that a + fetch should follow to fill the gaps. + + This distinction is critical for ``ensure_categories``: + partial compose populates the post for display, but the + dispatcher continues to the fetch path because False was + returned. Without the 100%-or-False rule, a single cached + tag would make ``ensure_categories`` skip the fetch and + leave the post at 1/N coverage forever. """ tags = post.tag_list if not tags: @@ -176,7 +181,7 @@ class CategoryFetcher: cats.setdefault(label, []).append(tag) if cats: post.tag_categories = _canonical_order(cats) - return bool(cats) + return len(cached) >= len(set(tags)) # ----- batch tag API fast path -----