From 539556921339cd2f7f807e812918b2eebcb8516b Mon Sep 17 00:00:00 2001 From: pax Date: Thu, 9 Apr 2026 19:10:37 -0500 Subject: [PATCH] db: re-add tag_types cache table with string labels + auto-prune MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-site tag-type cache for boorus that don't return categories inline. Uses string labels ("Artist", "Character", "Copyright", "General", "Meta") instead of the integer codes the reverted version used — the labels come directly from HTML class names, no mapping step needed. Schema: tag_types(site_id, name, label TEXT, fetched_at) PRIMARY KEY (site_id, name) Methods: get_tag_labels(site_id, names) — chunked 500-name SELECT set_tag_labels(site_id, mapping) — bulk INSERT OR REPLACE, auto-prunes oldest entries when the table exceeds 50k rows clear_tag_cache(site_id=None) — manual wipe, for future Settings UI "Clear tag cache" button The 50k row cap prevents unbounded growth over months of browsing multiple boorus. Normal usage (a few thousand unique tags per site) never reaches it. When exceeded, the oldest entries by fetched_at are pruned first — these are the tags the user hasn't encountered recently and would be re-fetched cheaply if needed. Migration: CREATE TABLE IF NOT EXISTS in _migrate(), non-breaking for existing databases. --- booru_viewer/core/db.py | 98 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/booru_viewer/core/db.py b/booru_viewer/core/db.py index 27af201..42c3d8c 100644 --- a/booru_viewer/core/db.py +++ b/booru_viewer/core/db.py @@ -124,6 +124,14 @@ CREATE TABLE IF NOT EXISTS saved_searches ( query TEXT NOT NULL, site_id INTEGER ); + +CREATE TABLE IF NOT EXISTS tag_types ( + site_id INTEGER NOT NULL, + name TEXT NOT NULL, + label TEXT NOT NULL, + fetched_at TEXT NOT NULL, + PRIMARY KEY (site_id, name) +); """ _DEFAULTS = { @@ -252,6 +260,21 @@ class Database: # Add tag_categories to favorites if missing if "tag_categories" not in cols: self._conn.execute("ALTER TABLE favorites ADD COLUMN tag_categories TEXT DEFAULT ''") + # Tag-type cache for boorus that don't return + # categorized tags inline (Gelbooru-shape, Moebooru). + # Per-site keying so forks don't cross-contaminate. + # Uses string labels ("Artist", "Character", ...) + # instead of integer codes — the labels come from + # the HTML class names directly. + self._conn.execute(""" + CREATE TABLE IF NOT EXISTS tag_types ( + site_id INTEGER NOT NULL, + name TEXT NOT NULL, + label TEXT NOT NULL, + fetched_at TEXT NOT NULL, + PRIMARY KEY (site_id, name) + ) + """) def close(self) -> None: if self._conn: @@ -727,6 +750,81 @@ class Database: with self._write(): self.conn.execute("DELETE FROM library_meta WHERE post_id = ?", (post_id,)) + # -- Tag-type cache -- + + def get_tag_labels(self, site_id: int, names: list[str]) -> dict[str, str]: + """Return cached string labels for `names` on `site_id`. + + Result dict only contains tags with a cache entry — callers + fetch the misses via CategoryFetcher and call set_tag_labels + to backfill. Chunked to stay under SQLite's variable limit. + """ + if not names: + return {} + result: dict[str, str] = {} + BATCH = 500 + for i in range(0, len(names), BATCH): + chunk = names[i:i + BATCH] + placeholders = ",".join("?" * len(chunk)) + rows = self.conn.execute( + f"SELECT name, label FROM tag_types WHERE site_id = ? AND name IN ({placeholders})", + [site_id, *chunk], + ).fetchall() + for r in rows: + result[r["name"]] = r["label"] + return result + + def set_tag_labels(self, site_id: int, mapping: dict[str, str]) -> None: + """Bulk INSERT OR REPLACE (name -> label) entries for one site. + + Auto-prunes oldest entries when the table exceeds + _TAG_CACHE_MAX_ROWS to prevent unbounded growth. + """ + if not mapping: + return + now = datetime.now(timezone.utc).isoformat() + rows = [(site_id, name, label, now) for name, label in mapping.items()] + with self._write(): + self.conn.executemany( + "INSERT OR REPLACE INTO tag_types (site_id, name, label, fetched_at) " + "VALUES (?, ?, ?, ?)", + rows, + ) + self._prune_tag_cache() + + _TAG_CACHE_MAX_ROWS = 50_000 # ~50k tags ≈ several months of browsing + + def _prune_tag_cache(self) -> None: + """Delete the oldest tag_types rows if the table exceeds the cap. + + Keeps the most-recently-fetched entries. Runs inside an + existing _write() context from set_tag_labels, so no extra + transaction overhead. The cap is generous enough that + normal usage never hits it; it's a safety valve for users + who browse dozens of boorus over months without clearing. + """ + count = self.conn.execute("SELECT COUNT(*) FROM tag_types").fetchone()[0] + if count <= self._TAG_CACHE_MAX_ROWS: + return + excess = count - self._TAG_CACHE_MAX_ROWS + self.conn.execute( + "DELETE FROM tag_types WHERE rowid IN (" + " SELECT rowid FROM tag_types ORDER BY fetched_at ASC LIMIT ?" + ")", + (excess,), + ) + + def clear_tag_cache(self, site_id: int | None = None) -> int: + """Delete cached tag types. Pass site_id to clear one site, + or None to clear all. Returns rows deleted. Exposed for + future Settings UI "Clear tag cache" button.""" + with self._write(): + if site_id is not None: + cur = self.conn.execute("DELETE FROM tag_types WHERE site_id = ?", (site_id,)) + else: + cur = self.conn.execute("DELETE FROM tag_types") + return cur.rowcount + # -- Settings -- def get_setting(self, key: str) -> str: