From bbf0d3107b5cf902b7a9284ffc21d709422e09d7 Mon Sep 17 00:00:00 2001 From: pax Date: Wed, 15 Apr 2026 17:29:01 -0500 Subject: [PATCH] category_fetcher: stop flipping _batch_api_works=False on transient errors in single-post path behavior change: a single mid-call network drop could previously poison _batch_api_works=False for the whole site, forcing every future ensure_categories onto the slower HTML scrape path. _do_ensure now routes the unprobed case through _probe_batch_api, which only flips the flag on a clean HTTP 200 with zero matching names; timeout and non-200 responses leave the flag None so the next call retries the probe. The bug surfaced because fetch_via_tag_api swallows per-chunk failures with 'except Exception: continue', so the previous code path couldn't distinguish 'API returned zero matches' from 'the network dropped halfway through.' _probe_batch_api already made that distinction for prefetch_batch; _do_ensure now reuses it. Tests in tests/core/api/test_category_fetcher.py pin the three routes (transient raise, clean-200-zero-matches, non-200). --- CHANGELOG.md | 3 + booru_viewer/core/api/category_fetcher.py | 48 ++++++++----- tests/core/api/test_category_fetcher.py | 86 +++++++++++++++++++++++ 3 files changed, 119 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d814d74..0a4fba3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ ### Changed - Thumbnail drag-start threshold raised from 10px to 30px to match the rubber band's gate — small mouse wobbles on a thumb no longer trigger a file drag +### Fixed +- `category_fetcher._do_ensure` no longer permanently flips `_batch_api_works` to False when a transient network error drops a tag-API request mid-call; the unprobed path now routes through `_probe_batch_api`, which distinguishes clean 200-with-zero-matches (structurally broken, flip) from timeout/HTTP-error (transient, retry next call) + ### Refactored - `category_fetcher` batch tag-API params are now built by a shared `_build_tag_api_params` helper instead of duplicated across `fetch_via_tag_api` and `_probe_batch_api` diff --git a/booru_viewer/core/api/category_fetcher.py b/booru_viewer/core/api/category_fetcher.py index 2304149..888eb37 100644 --- a/booru_viewer/core/api/category_fetcher.py +++ b/booru_viewer/core/api/category_fetcher.py @@ -357,29 +357,41 @@ class CategoryFetcher: async def _do_ensure(self, post: "Post") -> None: """Inner dispatch for ensure_categories. - Tries the batch API when it's known to work (True) OR not yet - probed (None). The result doubles as an inline probe: if the - batch produced categories, it works (save True); if it - returned nothing useful, it's broken (save False). Falls - through to HTML scrape as the universal fallback. + Dispatch: + - ``_batch_api_works is True``: call ``fetch_via_tag_api`` + directly. If it populates categories we're done; a + transient failure leaves them empty and we fall through + to the HTML scrape. + - ``_batch_api_works is None``: route through + ``_probe_batch_api``, which only flips the flag to + True/False on a clean HTTP response. Transient errors + leave it ``None`` so the next call retries the probe. + Previously this path called ``fetch_via_tag_api`` and + inferred the result from empty ``tag_categories`` — but + ``fetch_via_tag_api`` swallows per-chunk failures with + ``continue``, so a mid-call network drop poisoned + ``_batch_api_works = False`` for the site permanently. + - ``_batch_api_works is False`` or unavailable: straight + to HTML scrape. """ - if self._batch_api_works is not False and self._batch_api_available(): + if self._batch_api_works is True and self._batch_api_available(): try: await self.fetch_via_tag_api([post]) except Exception as e: log.debug("Batch API ensure failed (transient): %s", e) - # Leave _batch_api_works at None → retry next call - else: - if post.tag_categories: - if self._batch_api_works is None: - self._batch_api_works = True - self._save_probe_result(True) - return - # Batch returned nothing → broken API (Rule34) or - # the specific post has only unknown tags (very rare). - if self._batch_api_works is None: - self._batch_api_works = False - self._save_probe_result(False) + if post.tag_categories: + return + elif self._batch_api_works is None and self._batch_api_available(): + try: + result = await self._probe_batch_api([post]) + except Exception as e: + log.info("Batch API probe error (will retry next call): %s: %s", + type(e).__name__, e) + result = None + if result is True: + # Probe succeeded — results cached and post composed. + return + # result is False (broken API) or None (transient) — fall through # HTML scrape fallback (works on Rule34/Safebooru.org/Moebooru, # returns empty on Gelbooru proper which is fine because the # batch path above covers Gelbooru) diff --git a/tests/core/api/test_category_fetcher.py b/tests/core/api/test_category_fetcher.py index 5c8f30f..6d5cfd4 100644 --- a/tests/core/api/test_category_fetcher.py +++ b/tests/core/api/test_category_fetcher.py @@ -454,3 +454,89 @@ class TestMaps: assert _GELBOORU_TYPE_MAP[4] == "Character" assert _GELBOORU_TYPE_MAP[5] == "Meta" assert 2 not in _GELBOORU_TYPE_MAP # Deprecated intentionally omitted + + +# --------------------------------------------------------------------------- +# _do_ensure dispatch — regression cover for transient-error poisoning +# --------------------------------------------------------------------------- + +class TestDoEnsureProbeRouting: + """When _batch_api_works is None, _do_ensure must route through + _probe_batch_api so transient errors stay transient. The prior + implementation called fetch_via_tag_api directly and inferred + False from empty tag_categories — but fetch_via_tag_api swallows + per-chunk exceptions, so a network drop silently poisoned the + probe flag to False for the whole site.""" + + def test_transient_error_leaves_flag_none(self, tmp_db): + """All chunks fail → _batch_api_works must stay None, + not flip to False.""" + client = FakeClient( + tag_api_url="http://example.com/tags", + api_key="k", + api_user="u", + ) + + async def raising_request(method, url, params=None): + raise RuntimeError("network down") + client._request = raising_request + + fetcher = CategoryFetcher(client, tmp_db, site_id=1) + assert fetcher._batch_api_works is None + post = FakePost(tags="miku 1girl") + + asyncio.new_event_loop().run_until_complete(fetcher._do_ensure(post)) + + assert fetcher._batch_api_works is None, ( + "Transient error must not poison the probe flag" + ) + # Persistence side: nothing was saved + reloaded = CategoryFetcher(FakeClient(), tmp_db, site_id=1) + assert reloaded._batch_api_works is None + + def test_clean_200_zero_matches_flips_to_false(self, tmp_db): + """Clean HTTP 200 + no names matching the request → flips + the flag to False (structurally broken endpoint).""" + client = FakeClient( + tag_api_url="http://example.com/tags", + api_key="k", + api_user="u", + ) + + async def empty_ok_request(method, url, params=None): + # 200 with a valid but empty tag list + return FakeResponse( + json.dumps({"@attributes": {"count": 0}, "tag": []}), + status_code=200, + ) + client._request = empty_ok_request + + fetcher = CategoryFetcher(client, tmp_db, site_id=1) + post = FakePost(tags="definitely_not_a_real_tag") + + asyncio.new_event_loop().run_until_complete(fetcher._do_ensure(post)) + + assert fetcher._batch_api_works is False, ( + "Clean 200 with zero matches must flip flag to False" + ) + reloaded = CategoryFetcher(FakeClient(), tmp_db, site_id=1) + assert reloaded._batch_api_works is False + + def test_non_200_leaves_flag_none(self, tmp_db): + """500-family responses are transient, must not poison.""" + client = FakeClient( + tag_api_url="http://example.com/tags", + api_key="k", + api_user="u", + ) + + async def five_hundred(method, url, params=None): + return FakeResponse("", status_code=503) + client._request = five_hundred + + fetcher = CategoryFetcher(client, tmp_db, site_id=1) + post = FakePost(tags="miku") + + asyncio.new_event_loop().run_until_complete(fetcher._do_ensure(post)) + + assert fetcher._batch_api_works is None