category_fetcher: persist batch API probe result across sessions
The probe that detects whether a site's batch tag API works (Gelbooru proper: yes, Rule34: no) now persists its result in the tag_types table using a sentinel key (__batch_api_probe__). On subsequent app launches, the fetcher reads the saved result at construction time and skips the probe entirely. Before: every session with Rule34 wasted ~0.6s on a probe request that always fails (Rule34 returns garbage for names=). During that time the background prefetch couldn't start HTML scraping, so the first few post clicks paid ~0.3s each. After: first ever session probes Rule34 once, stores False. Every subsequent session reads False from DB, skips the probe, and the background prefetch immediately starts HTML scraping. By the time the user clicks any post, the scrape is usually done. Gelbooru proper: probe succeeds on first session, stores True. Future sessions use the batch API without probing. No change in speed (already fast), just saves the probe roundtrip. Persisted per site_id so different Gelbooru-shaped sites get their own probe result. The clear_tag_cache method wipes probe results along with tag data (the sentinel key lives in the same table).
This commit is contained in:
parent
1547cbe55a
commit
7d11aeab06
@ -127,24 +127,36 @@ class CategoryFetcher:
|
|||||||
self._sem = asyncio.Semaphore(self._PREFETCH_CONCURRENCY)
|
self._sem = asyncio.Semaphore(self._PREFETCH_CONCURRENCY)
|
||||||
self._inflight: dict[int, asyncio.Task] = {}
|
self._inflight: dict[int, asyncio.Task] = {}
|
||||||
|
|
||||||
self._batch_api_works: bool | None = None
|
# Probe state for the batch tag API. Persisted to DB so
|
||||||
# Probe state for the batch tag API:
|
# the probe runs at most ONCE per site, ever. Rule34's
|
||||||
|
# broken batch API is detected on the first session; every
|
||||||
|
# subsequent session skips the probe and goes straight to
|
||||||
|
# HTML prefetch (saving ~0.6s of wasted probe time).
|
||||||
#
|
#
|
||||||
# None — not yet probed, OR last probe hit a transient
|
# None — not yet probed, OR last probe hit a transient
|
||||||
# error (HTTP error, timeout, parse exception).
|
# error. Next prefetch_batch retries the probe.
|
||||||
# Next prefetch_batch will retry the probe.
|
# True — probe succeeded (Gelbooru proper). Permanent.
|
||||||
# True — probe succeeded (response contained >=1 of the
|
# False — clean 200 + zero matching names (Rule34).
|
||||||
# requested names). Batch API used for all future
|
# Permanent. Per-post HTML from now on.
|
||||||
# calls on this instance.
|
self._batch_api_works = self._load_probe_result()
|
||||||
# False — probe got a clean HTTP 200 with zero matching
|
|
||||||
# names for ANY of the requested tags. The API
|
# ----- probe result persistence -----
|
||||||
# is structurally broken on this site (Rule34's
|
|
||||||
# ``names=`` filter returns unrelated tags).
|
_PROBE_KEY = "__batch_api_probe__" # sentinel name in tag_types
|
||||||
# Per-post HTML used for all future calls.
|
|
||||||
#
|
def _load_probe_result(self) -> bool | None:
|
||||||
# Transition to False is permanent for the instance lifetime.
|
"""Read the persisted probe result from the DB, or None."""
|
||||||
# Transition to True is permanent for the instance lifetime.
|
row = self._db.get_tag_labels(self._site_id, [self._PROBE_KEY])
|
||||||
# None -> None on transient error preserves retry ability.
|
val = row.get(self._PROBE_KEY)
|
||||||
|
if val == "true":
|
||||||
|
return True
|
||||||
|
elif val == "false":
|
||||||
|
return False
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _save_probe_result(self, result: bool) -> None:
|
||||||
|
"""Persist the probe result so future sessions skip the probe."""
|
||||||
|
self._db.set_tag_labels(self._site_id, {self._PROBE_KEY: "true" if result else "false"})
|
||||||
|
|
||||||
# ----- cache compose (instant, no HTTP) -----
|
# ----- cache compose (instant, no HTTP) -----
|
||||||
|
|
||||||
@ -421,8 +433,10 @@ class CategoryFetcher:
|
|||||||
cached = self._db.get_tag_labels(self._site_id, list(all_tags))
|
cached = self._db.get_tag_labels(self._site_id, list(all_tags))
|
||||||
missing = [t for t in all_tags if t not in cached]
|
missing = [t for t in all_tags if t not in cached]
|
||||||
if not missing:
|
if not missing:
|
||||||
# Everything's cached — can't probe, assume batch works
|
# Everything's cached — can't probe, skip
|
||||||
|
if self._batch_api_works is None:
|
||||||
self._batch_api_works = True
|
self._batch_api_works = True
|
||||||
|
self._save_probe_result(True)
|
||||||
for p in posts:
|
for p in posts:
|
||||||
self.try_compose_from_cache(p)
|
self.try_compose_from_cache(p)
|
||||||
return True
|
return True
|
||||||
@ -477,6 +491,7 @@ class CategoryFetcher:
|
|||||||
|
|
||||||
if got_any:
|
if got_any:
|
||||||
self._batch_api_works = True
|
self._batch_api_works = True
|
||||||
|
self._save_probe_result(True)
|
||||||
if matched:
|
if matched:
|
||||||
self._db.set_tag_labels(self._site_id, matched)
|
self._db.set_tag_labels(self._site_id, matched)
|
||||||
# Fetch any remaining missing tags via the batch path
|
# Fetch any remaining missing tags via the batch path
|
||||||
@ -485,6 +500,7 @@ class CategoryFetcher:
|
|||||||
else:
|
else:
|
||||||
# Clean 200 but zero matching names → structurally broken
|
# Clean 200 but zero matching names → structurally broken
|
||||||
self._batch_api_works = False
|
self._batch_api_works = False
|
||||||
|
self._save_probe_result(False)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user