From f168bece00b2820f1bc76cc7e854dc38abd6096e Mon Sep 17 00:00:00 2001 From: pax Date: Thu, 9 Apr 2026 19:53:20 -0500 Subject: [PATCH] category_fetcher: fix _do_ensure to try batch API when not yet probed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _do_ensure only tried the batch API when _batch_api_works was True, but after removing the search-time prefetch (where the probe used to run), _batch_api_works stayed None forever. Gelbooru's only viable path IS the batch API (its post-view HTML has no tag links), so clicks on Gelbooru posts produced zero categories. Fix: _do_ensure now tries the batch API when _batch_api_works is not False (i.e., both True and None). When None, the call doubles as an inline probe: if the batch produced categories, save True; if nothing useful came back, save False and fall to HTML. This is simpler than the old prefetch_batch probe because it runs on ONE post at a time — no batch/HTML mixing concerns, no "single path per invocation" rule. The probe result is persisted to DB so it only fires once per site ever. Dispatch matrix in _do_ensure: _batch_api_works True + auth → batch API (Gelbooru proper) _batch_api_works None + auth → batch as probe → True or False _batch_api_works False → HTML scrape (Rule34) no auth → HTML scrape (Safebooru.org) transient error → stays None, retry next click Verified all three sites from clean cache: Gelbooru 55/56+49/50 (batch), Rule34 40/40+38/38 (HTML), Safebooru.org 47/47+47/47 (HTML). --- booru_viewer/core/api/category_fetcher.py | 35 ++++++++++++++++++----- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/booru_viewer/core/api/category_fetcher.py b/booru_viewer/core/api/category_fetcher.py index 3319f0f..7e3a81b 100644 --- a/booru_viewer/core/api/category_fetcher.py +++ b/booru_viewer/core/api/category_fetcher.py @@ -332,13 +332,34 @@ class CategoryFetcher: self._inflight.pop(post.id, None) async def _do_ensure(self, post: "Post") -> None: - """Inner dispatch for ensure_categories.""" - # Batch API path (for single-post ensure, e.g. click or save) - if self._batch_api_works is True and self._batch_api_available(): - await self.fetch_via_tag_api([post]) - if post.tag_categories: - return - # HTML fallback + """Inner dispatch for ensure_categories. + + Tries the batch API when it's known to work (True) OR not yet + probed (None). The result doubles as an inline probe: if the + batch produced categories, it works (save True); if it + returned nothing useful, it's broken (save False). Falls + through to HTML scrape as the universal fallback. + """ + if self._batch_api_works is not False and self._batch_api_available(): + try: + await self.fetch_via_tag_api([post]) + except Exception as e: + log.debug("Batch API ensure failed (transient): %s", e) + # Leave _batch_api_works at None → retry next call + else: + if post.tag_categories: + if self._batch_api_works is None: + self._batch_api_works = True + self._save_probe_result(True) + return + # Batch returned nothing → broken API (Rule34) or + # the specific post has only unknown tags (very rare). + if self._batch_api_works is None: + self._batch_api_works = False + self._save_probe_result(False) + # HTML scrape fallback (works on Rule34/Safebooru.org/Moebooru, + # returns empty on Gelbooru proper which is fine because the + # batch path above covers Gelbooru) await self.fetch_post(post) # ----- dispatch: prefetch (batch, fire-and-forget) -----