From 44a20ac0576ffd2aa182b0a1a474c5e2ba983616 Mon Sep 17 00:00:00 2001
From: pax <paxxe@protonmail.com>
Date: Wed, 8 Apr 2026 16:32:32 -0500
Subject: [PATCH] Search: instrument _do_search and _on_reached_bottom with
 per-filter drop counts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Note #3 in REFACTOR_NOTES.md (search result count + end-of-results
flag mismatch) reproduced once during the refactor verification sweep
and not again at later commits, so it's intermittent — likely
scenario-dependent (specific tag, blacklist hit rate, page-size /
limit interaction). The bug is real but not reliably repro-able, so
the right move is to add logging now and capture real data on the
next reproduction instead of guessing at a fix.

Both _do_search (paginated) and _on_reached_bottom (infinite scroll
backfill) now log a `do_search:` / `on_reached_bottom:` line with the
following fields:

  - limit              the configured page_size
  - api_returned_total raw count of posts the API returned across all
                       fetched pages (sum of every batch the loop saw)
  - kept               post-filter, post-clamp count actually emitted
  - drops_bl_tags      posts dropped by the blacklist-tags filter
  - drops_bl_posts     posts dropped by the blacklist-posts filter
  - drops_dedup        posts dropped by the dedup-against-seen filter
  - api_short_signal   (do_search only) whether the LAST batch came
                       back smaller than limit — the implicit "API ran
                       out" hint
  - api_exhausted      (on_reached_bottom only) the explicit
                       api_exhausted flag the loop sets when len(batch)
                       falls short
  - last_page          (on_reached_bottom only) the highest page index
                       the backfill loop touched

_on_search_done also gets a one-liner with displayed_count, limit,
and the at_end decision so the user-visible "(end)" flag can be
correlated with the upstream numbers.

Implementation note: the per-filter drop counters live in a closure-
captured `drops` dict that the `_filter` closure mutates as it walks
its three passes (bl_tags → bl_posts → dedup). Same dict shape in
both `_do_search` and `_on_reached_bottom` so the two log lines are
directly comparable. Both async closures also accumulate `raw_total`
across the loop iterations to capture the API's true return count,
since the existing locals only kept the last batch's length.

All logging is `log.debug` so it's off at default INFO level. To
capture: bump booru_viewer logger level (or run with debug logging
enabled in main_window.py:440 — already DEBUG by default per the
existing setLevel call).

This commit DOES NOT fix #3 — the symptom is still intermittent and
the root cause is unknown. It just makes the next reproduction
diagnosable in one shot instead of requiring a second instrumented
run.
---
 booru_viewer/gui/main_window.py | 41 +++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/booru_viewer/gui/main_window.py b/booru_viewer/gui/main_window.py
index 5465566..2a1d64e 100644
--- a/booru_viewer/gui/main_window.py
+++ b/booru_viewer/gui/main_window.py
@@ -638,23 +638,36 @@ class BooruApp(QMainWindow):
         shown_ids = self._search.shown_post_ids.copy()
         seen = shown_ids.copy()  # local dedup for this backfill round
 
+        # Per-pass drop counters — same shape as _do_search's instrumentation
+        # so the two code paths produce comparable log lines.
+        drops = {"bl_tags": 0, "bl_posts": 0, "dedup": 0}
+
         def _filter(posts):
+            n0 = len(posts)
             if bl_tags:
                 posts = [p for p in posts if not bl_tags.intersection(p.tag_list)]
+            n1 = len(posts)
+            drops["bl_tags"] += n0 - n1
             if bl_posts:
                 posts = [p for p in posts if p.file_url not in bl_posts]
+            n2 = len(posts)
+            drops["bl_posts"] += n1 - n2
             posts = [p for p in posts if p.id not in seen]
+            n3 = len(posts)
+            drops["dedup"] += n2 - n3
             seen.update(p.id for p in posts)
             return posts
 
         async def _search():
             client = self._make_client()
             collected = []
+            raw_total = 0
             last_page = page
             api_exhausted = False
             try:
                 current_page = page
                 batch = await client.search(tags=search_tags, page=current_page, limit=limit)
+                raw_total += len(batch)
                 last_page = current_page
                 filtered = _filter(batch)
                 collected.extend(filtered)
@@ -665,6 +678,7 @@ class BooruApp(QMainWindow):
                         await asyncio.sleep(0.3)
                         current_page += 1
                         batch = await client.search(tags=search_tags, page=current_page, limit=limit)
+                        raw_total += len(batch)
                         last_page = current_page
                         filtered = _filter(batch)
                         collected.extend(filtered)
@@ -678,6 +692,11 @@ class BooruApp(QMainWindow):
             finally:
                 self._search.infinite_last_page = last_page
                 self._search.infinite_api_exhausted = api_exhausted
+                log.debug(
+                    f"on_reached_bottom: limit={limit} api_returned_total={raw_total} kept={len(collected[:limit])} "
+                    f"drops_bl_tags={drops['bl_tags']} drops_bl_posts={drops['bl_posts']} drops_dedup={drops['dedup']} "
+                    f"api_exhausted={api_exhausted} last_page={last_page}"
+                )
                 self._signals.search_append.emit(collected[:limit])
                 await client.close()
 
@@ -779,12 +798,25 @@ class BooruApp(QMainWindow):
         shown_ids = self._search.shown_post_ids.copy()
         seen = shown_ids.copy()
 
+        # Per-pass drop counters for the at-end-flag instrumentation. The
+        # filter mutates this dict via closure capture so the outer scope
+        # can read the totals after the loop. Lets us distinguish "API
+        # ran out" from "client-side filter trimmed the page".
+        drops = {"bl_tags": 0, "bl_posts": 0, "dedup": 0}
+
         def _filter(posts):
+            n0 = len(posts)
             if bl_tags:
                 posts = [p for p in posts if not bl_tags.intersection(p.tag_list)]
+            n1 = len(posts)
+            drops["bl_tags"] += n0 - n1
             if bl_posts:
                 posts = [p for p in posts if p.file_url not in bl_posts]
+            n2 = len(posts)
+            drops["bl_posts"] += n1 - n2
             posts = [p for p in posts if p.id not in seen]
+            n3 = len(posts)
+            drops["dedup"] += n2 - n3
             seen.update(p.id for p in posts)
             return posts
 
@@ -792,8 +824,10 @@ class BooruApp(QMainWindow):
             client = self._make_client()
             try:
                 collected = []
+                raw_total = 0
                 current_page = page
                 batch = await client.search(tags=search_tags, page=current_page, limit=limit)
+                raw_total += len(batch)
                 filtered = _filter(batch)
                 collected.extend(filtered)
                 # Backfill only if first page didn't return enough after filtering
@@ -802,11 +836,17 @@ class BooruApp(QMainWindow):
                         await asyncio.sleep(0.3)
                         current_page += 1
                         batch = await client.search(tags=search_tags, page=current_page, limit=limit)
+                        raw_total += len(batch)
                         filtered = _filter(batch)
                         collected.extend(filtered)
                         log.debug(f"Backfill: page={current_page} batch={len(batch)} filtered={len(filtered)} total={len(collected)}/{limit}")
                         if len(collected) >= limit or len(batch) < limit:
                             break
+                log.debug(
+                    f"do_search: limit={limit} api_returned_total={raw_total} kept={len(collected[:limit])} "
+                    f"drops_bl_tags={drops['bl_tags']} drops_bl_posts={drops['bl_posts']} drops_dedup={drops['dedup']} "
+                    f"last_batch_size={len(batch)} api_short_signal={len(batch) < limit}"
+                )
                 self._signals.search_done.emit(collected[:limit])
             except Exception as e:
                 self._signals.search_error.emit(str(e))
@@ -828,6 +868,7 @@ class BooruApp(QMainWindow):
             del ss.page_cache[oldest]
         limit = self._db.get_setting_int("page_size") or 40
         at_end = len(posts) < limit
+        log.debug(f"on_search_done: displayed_count={len(posts)} limit={limit} at_end={at_end}")
         if at_end:
             self._status.showMessage(f"{len(posts)} results (end)")
         else: