From 539556921339cd2f7f807e812918b2eebcb8516b Mon Sep 17 00:00:00 2001
From: pax <paxxe@protonmail.com>
Date: Thu, 9 Apr 2026 19:10:37 -0500
Subject: [PATCH] db: re-add tag_types cache table with string labels +
 auto-prune
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per-site tag-type cache for boorus that don't return categories
inline. Uses string labels ("Artist", "Character", "Copyright",
"General", "Meta") instead of the integer codes the reverted
version used — the labels come directly from HTML class names,
no mapping step needed.

Schema: tag_types(site_id, name, label TEXT, fetched_at)
        PRIMARY KEY (site_id, name)

Methods:
  get_tag_labels(site_id, names) — chunked 500-name SELECT
  set_tag_labels(site_id, mapping) — bulk INSERT OR REPLACE,
    auto-prunes oldest entries when the table exceeds 50k rows
  clear_tag_cache(site_id=None) — manual wipe, for future
    Settings UI "Clear tag cache" button

The 50k row cap prevents unbounded growth over months of
browsing multiple boorus. Normal usage (a few thousand unique
tags per site) never reaches it. When exceeded, the oldest
entries by fetched_at are pruned first — these are the tags the
user hasn't encountered recently and would be re-fetched cheaply
if needed.

Migration: CREATE TABLE IF NOT EXISTS in _migrate(), non-breaking
for existing databases.
---
 booru_viewer/core/db.py | 98 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

diff --git a/booru_viewer/core/db.py b/booru_viewer/core/db.py
index 27af201..42c3d8c 100644
--- a/booru_viewer/core/db.py
+++ b/booru_viewer/core/db.py
@@ -124,6 +124,14 @@ CREATE TABLE IF NOT EXISTS saved_searches (
     query TEXT NOT NULL,
     site_id INTEGER
 );
+
+CREATE TABLE IF NOT EXISTS tag_types (
+    site_id    INTEGER NOT NULL,
+    name       TEXT NOT NULL,
+    label      TEXT NOT NULL,
+    fetched_at TEXT NOT NULL,
+    PRIMARY KEY (site_id, name)
+);
 """
 
 _DEFAULTS = {
@@ -252,6 +260,21 @@ class Database:
                 # Add tag_categories to favorites if missing
                 if "tag_categories" not in cols:
                     self._conn.execute("ALTER TABLE favorites ADD COLUMN tag_categories TEXT DEFAULT ''")
+                # Tag-type cache for boorus that don't return
+                # categorized tags inline (Gelbooru-shape, Moebooru).
+                # Per-site keying so forks don't cross-contaminate.
+                # Uses string labels ("Artist", "Character", ...)
+                # instead of integer codes — the labels come from
+                # the HTML class names directly.
+                self._conn.execute("""
+                    CREATE TABLE IF NOT EXISTS tag_types (
+                        site_id    INTEGER NOT NULL,
+                        name       TEXT NOT NULL,
+                        label      TEXT NOT NULL,
+                        fetched_at TEXT NOT NULL,
+                        PRIMARY KEY (site_id, name)
+                    )
+                """)
 
     def close(self) -> None:
         if self._conn:
@@ -727,6 +750,81 @@ class Database:
         with self._write():
             self.conn.execute("DELETE FROM library_meta WHERE post_id = ?", (post_id,))
 
+    # -- Tag-type cache --
+
+    def get_tag_labels(self, site_id: int, names: list[str]) -> dict[str, str]:
+        """Return cached string labels for `names` on `site_id`.
+
+        Result dict only contains tags with a cache entry — callers
+        fetch the misses via CategoryFetcher and call set_tag_labels
+        to backfill. Chunked to stay under SQLite's variable limit.
+        """
+        if not names:
+            return {}
+        result: dict[str, str] = {}
+        BATCH = 500
+        for i in range(0, len(names), BATCH):
+            chunk = names[i:i + BATCH]
+            placeholders = ",".join("?" * len(chunk))
+            rows = self.conn.execute(
+                f"SELECT name, label FROM tag_types WHERE site_id = ? AND name IN ({placeholders})",
+                [site_id, *chunk],
+            ).fetchall()
+            for r in rows:
+                result[r["name"]] = r["label"]
+        return result
+
+    def set_tag_labels(self, site_id: int, mapping: dict[str, str]) -> None:
+        """Bulk INSERT OR REPLACE (name -> label) entries for one site.
+
+        Auto-prunes oldest entries when the table exceeds
+        _TAG_CACHE_MAX_ROWS to prevent unbounded growth.
+        """
+        if not mapping:
+            return
+        now = datetime.now(timezone.utc).isoformat()
+        rows = [(site_id, name, label, now) for name, label in mapping.items()]
+        with self._write():
+            self.conn.executemany(
+                "INSERT OR REPLACE INTO tag_types (site_id, name, label, fetched_at) "
+                "VALUES (?, ?, ?, ?)",
+                rows,
+            )
+            self._prune_tag_cache()
+
+    _TAG_CACHE_MAX_ROWS = 50_000  # ~50k tags ≈ several months of browsing
+
+    def _prune_tag_cache(self) -> None:
+        """Delete the oldest tag_types rows if the table exceeds the cap.
+
+        Keeps the most-recently-fetched entries. Runs inside an
+        existing _write() context from set_tag_labels, so no extra
+        transaction overhead. The cap is generous enough that
+        normal usage never hits it; it's a safety valve for users
+        who browse dozens of boorus over months without clearing.
+        """
+        count = self.conn.execute("SELECT COUNT(*) FROM tag_types").fetchone()[0]
+        if count <= self._TAG_CACHE_MAX_ROWS:
+            return
+        excess = count - self._TAG_CACHE_MAX_ROWS
+        self.conn.execute(
+            "DELETE FROM tag_types WHERE rowid IN ("
+            "  SELECT rowid FROM tag_types ORDER BY fetched_at ASC LIMIT ?"
+            ")",
+            (excess,),
+        )
+
+    def clear_tag_cache(self, site_id: int | None = None) -> int:
+        """Delete cached tag types. Pass site_id to clear one site,
+        or None to clear all. Returns rows deleted. Exposed for
+        future Settings UI "Clear tag cache" button."""
+        with self._write():
+            if site_id is not None:
+                cur = self.conn.execute("DELETE FROM tag_types WHERE site_id = ?", (site_id,))
+            else:
+                cur = self.conn.execute("DELETE FROM tag_types")
+            return cur.rowcount
+
     # -- Settings --
 
     def get_setting(self, key: str) -> str: