db: re-add tag_types cache table with string labels + auto-prune
Per-site tag-type cache for boorus that don't return categories
inline. Uses string labels ("Artist", "Character", "Copyright",
"General", "Meta") instead of the integer codes the reverted
version used — the labels come directly from HTML class names,
no mapping step needed.
Schema: tag_types(site_id, name, label TEXT, fetched_at)
PRIMARY KEY (site_id, name)
Methods:
get_tag_labels(site_id, names) — chunked 500-name SELECT
set_tag_labels(site_id, mapping) — bulk INSERT OR REPLACE,
auto-prunes oldest entries when the table exceeds 50k rows
clear_tag_cache(site_id=None) — manual wipe, for future
Settings UI "Clear tag cache" button
The 50k row cap prevents unbounded growth over months of
browsing multiple boorus. Normal usage (a few thousand unique
tags per site) never reaches it. When exceeded, the oldest
entries by fetched_at are pruned first — these are the tags the
user hasn't encountered recently and would be re-fetched cheaply
if needed.
Migration: CREATE TABLE IF NOT EXISTS in _migrate(), non-breaking
for existing databases.
This commit is contained in:
parent
81fc4d93eb
commit
5395569213
@ -124,6 +124,14 @@ CREATE TABLE IF NOT EXISTS saved_searches (
|
|||||||
query TEXT NOT NULL,
|
query TEXT NOT NULL,
|
||||||
site_id INTEGER
|
site_id INTEGER
|
||||||
);
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS tag_types (
|
||||||
|
site_id INTEGER NOT NULL,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
label TEXT NOT NULL,
|
||||||
|
fetched_at TEXT NOT NULL,
|
||||||
|
PRIMARY KEY (site_id, name)
|
||||||
|
);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_DEFAULTS = {
|
_DEFAULTS = {
|
||||||
@ -252,6 +260,21 @@ class Database:
|
|||||||
# Add tag_categories to favorites if missing
|
# Add tag_categories to favorites if missing
|
||||||
if "tag_categories" not in cols:
|
if "tag_categories" not in cols:
|
||||||
self._conn.execute("ALTER TABLE favorites ADD COLUMN tag_categories TEXT DEFAULT ''")
|
self._conn.execute("ALTER TABLE favorites ADD COLUMN tag_categories TEXT DEFAULT ''")
|
||||||
|
# Tag-type cache for boorus that don't return
|
||||||
|
# categorized tags inline (Gelbooru-shape, Moebooru).
|
||||||
|
# Per-site keying so forks don't cross-contaminate.
|
||||||
|
# Uses string labels ("Artist", "Character", ...)
|
||||||
|
# instead of integer codes — the labels come from
|
||||||
|
# the HTML class names directly.
|
||||||
|
self._conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS tag_types (
|
||||||
|
site_id INTEGER NOT NULL,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
label TEXT NOT NULL,
|
||||||
|
fetched_at TEXT NOT NULL,
|
||||||
|
PRIMARY KEY (site_id, name)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
def close(self) -> None:
|
def close(self) -> None:
|
||||||
if self._conn:
|
if self._conn:
|
||||||
@ -727,6 +750,81 @@ class Database:
|
|||||||
with self._write():
|
with self._write():
|
||||||
self.conn.execute("DELETE FROM library_meta WHERE post_id = ?", (post_id,))
|
self.conn.execute("DELETE FROM library_meta WHERE post_id = ?", (post_id,))
|
||||||
|
|
||||||
|
# -- Tag-type cache --
|
||||||
|
|
||||||
|
def get_tag_labels(self, site_id: int, names: list[str]) -> dict[str, str]:
|
||||||
|
"""Return cached string labels for `names` on `site_id`.
|
||||||
|
|
||||||
|
Result dict only contains tags with a cache entry — callers
|
||||||
|
fetch the misses via CategoryFetcher and call set_tag_labels
|
||||||
|
to backfill. Chunked to stay under SQLite's variable limit.
|
||||||
|
"""
|
||||||
|
if not names:
|
||||||
|
return {}
|
||||||
|
result: dict[str, str] = {}
|
||||||
|
BATCH = 500
|
||||||
|
for i in range(0, len(names), BATCH):
|
||||||
|
chunk = names[i:i + BATCH]
|
||||||
|
placeholders = ",".join("?" * len(chunk))
|
||||||
|
rows = self.conn.execute(
|
||||||
|
f"SELECT name, label FROM tag_types WHERE site_id = ? AND name IN ({placeholders})",
|
||||||
|
[site_id, *chunk],
|
||||||
|
).fetchall()
|
||||||
|
for r in rows:
|
||||||
|
result[r["name"]] = r["label"]
|
||||||
|
return result
|
||||||
|
|
||||||
|
def set_tag_labels(self, site_id: int, mapping: dict[str, str]) -> None:
|
||||||
|
"""Bulk INSERT OR REPLACE (name -> label) entries for one site.
|
||||||
|
|
||||||
|
Auto-prunes oldest entries when the table exceeds
|
||||||
|
_TAG_CACHE_MAX_ROWS to prevent unbounded growth.
|
||||||
|
"""
|
||||||
|
if not mapping:
|
||||||
|
return
|
||||||
|
now = datetime.now(timezone.utc).isoformat()
|
||||||
|
rows = [(site_id, name, label, now) for name, label in mapping.items()]
|
||||||
|
with self._write():
|
||||||
|
self.conn.executemany(
|
||||||
|
"INSERT OR REPLACE INTO tag_types (site_id, name, label, fetched_at) "
|
||||||
|
"VALUES (?, ?, ?, ?)",
|
||||||
|
rows,
|
||||||
|
)
|
||||||
|
self._prune_tag_cache()
|
||||||
|
|
||||||
|
_TAG_CACHE_MAX_ROWS = 50_000 # ~50k tags ≈ several months of browsing
|
||||||
|
|
||||||
|
def _prune_tag_cache(self) -> None:
|
||||||
|
"""Delete the oldest tag_types rows if the table exceeds the cap.
|
||||||
|
|
||||||
|
Keeps the most-recently-fetched entries. Runs inside an
|
||||||
|
existing _write() context from set_tag_labels, so no extra
|
||||||
|
transaction overhead. The cap is generous enough that
|
||||||
|
normal usage never hits it; it's a safety valve for users
|
||||||
|
who browse dozens of boorus over months without clearing.
|
||||||
|
"""
|
||||||
|
count = self.conn.execute("SELECT COUNT(*) FROM tag_types").fetchone()[0]
|
||||||
|
if count <= self._TAG_CACHE_MAX_ROWS:
|
||||||
|
return
|
||||||
|
excess = count - self._TAG_CACHE_MAX_ROWS
|
||||||
|
self.conn.execute(
|
||||||
|
"DELETE FROM tag_types WHERE rowid IN ("
|
||||||
|
" SELECT rowid FROM tag_types ORDER BY fetched_at ASC LIMIT ?"
|
||||||
|
")",
|
||||||
|
(excess,),
|
||||||
|
)
|
||||||
|
|
||||||
|
def clear_tag_cache(self, site_id: int | None = None) -> int:
|
||||||
|
"""Delete cached tag types. Pass site_id to clear one site,
|
||||||
|
or None to clear all. Returns rows deleted. Exposed for
|
||||||
|
future Settings UI "Clear tag cache" button."""
|
||||||
|
with self._write():
|
||||||
|
if site_id is not None:
|
||||||
|
cur = self.conn.execute("DELETE FROM tag_types WHERE site_id = ?", (site_id,))
|
||||||
|
else:
|
||||||
|
cur = self.conn.execute("DELETE FROM tag_types")
|
||||||
|
return cur.rowcount
|
||||||
|
|
||||||
# -- Settings --
|
# -- Settings --
|
||||||
|
|
||||||
def get_setting(self, key: str) -> str:
|
def get_setting(self, key: str) -> str:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user