From 6075f319179d8751721c910210984a4dc25b3d9c Mon Sep 17 00:00:00 2001 From: pax Date: Thu, 9 Apr 2026 16:57:36 -0500 Subject: [PATCH] library: scaffold filename templates + DB column MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the foundation that the unified save flow refactor builds on. No behavior change at this commit — empty default template means every save site still produces {id}{ext} like v0.2.3. - core/db.py: library_meta.filename column with non-breaking migration for legacy databases. Index on filename. New get_library_post_id_by_filename() lookup. filename kwarg on save_library_meta (defaults to "" for legacy callers). library_filename_template added to _DEFAULTS. - core/config.py: render_filename_template() with %id% %md5% %ext% %rating% %score% %artist% %character% %copyright% %general% %meta% %species% tokens. Sanitizes filesystem-reserved chars, collapses whitespace, strips leading dots/.., caps the rendered stem at 200 characters, falls back to post id when sanitization yields empty. - gui/settings.py: Library filename template input field next to the Library directory row, with a help label listing tokens and noting that Gelbooru/Moebooru can only resolve the basic ones. --- booru_viewer/core/config.py | 100 +++++++++++++++++++++++++++++++++++ booru_viewer/core/db.py | 44 +++++++++++++-- booru_viewer/gui/settings.py | 21 ++++++++ 3 files changed, 160 insertions(+), 5 deletions(-) diff --git a/booru_viewer/core/config.py b/booru_viewer/core/config.py index 84f99c6..897a797 100644 --- a/booru_viewer/core/config.py +++ b/booru_viewer/core/config.py @@ -4,8 +4,13 @@ from __future__ import annotations import os import platform +import re import sys from pathlib import Path +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .api.base import Post APPNAME = "booru-viewer" IS_WINDOWS = sys.platform == "win32" @@ -149,6 +154,101 @@ def find_library_files(post_id: int) -> list[Path]: return matches +def render_filename_template(template: str, post: "Post", ext: str) -> str: + """Render a filename template against a Post into a filesystem-safe basename. + + Tokens supported: + %id% post id + %md5% md5 hash extracted from file_url (empty if URL doesn't carry one) + %ext% extension without the leading dot + %rating% post.rating or empty + %score% post.score + %artist% underscore-joined names from post.tag_categories["artist"] + %character% same, character category + %copyright% same, copyright category + %general% same, general category + %meta% same, meta category + %species% same, species category + + The returned string is a basename including the extension. If `template` + is empty or post-sanitization the rendered stem is empty, falls back to + f"{post.id}{ext}" so callers always get a usable name. + + The rendered stem is capped at 200 characters before the extension is + appended. This stays under the 255-byte ext4/NTFS filename limit for + typical ASCII/Latin-1 templates; users typing emoji-heavy templates may + still hit the limit but won't see a hard error from this function. + + Sanitization replaces filesystem-reserved characters (`/\\:*?"<>|`) with + underscores, collapses whitespace runs to a single underscore, and strips + leading/trailing dots/spaces and `..` prefixes so the rendered name can't + escape the destination directory or trip Windows' trailing-dot quirk. + """ + if not template: + return f"{post.id}{ext}" + + cats = post.tag_categories or {} + + def _join_cat(name: str) -> str: + items = cats.get(name) or [] + return "_".join(items) + + # %md5% — most boorus name files by md5 in the URL path + # (e.g. https://cdn.donmai.us/original/0a/1b/0a1b...md5...{ext}). + # Extract the URL stem and accept it only if it's 32 hex chars. + md5 = "" + try: + from urllib.parse import urlparse + url_path = urlparse(post.file_url).path + url_stem = Path(url_path).stem + if len(url_stem) == 32 and all(c in "0123456789abcdef" for c in url_stem.lower()): + md5 = url_stem + except Exception: + pass + + has_ext_token = "%ext%" in template + replacements = { + "%id%": str(post.id), + "%md5%": md5, + "%ext%": ext.lstrip("."), + "%rating%": post.rating or "", + "%score%": str(post.score), + "%artist%": _join_cat("artist"), + "%character%": _join_cat("character"), + "%copyright%": _join_cat("copyright"), + "%general%": _join_cat("general"), + "%meta%": _join_cat("meta"), + "%species%": _join_cat("species"), + } + + rendered = template + for token, value in replacements.items(): + rendered = rendered.replace(token, value) + + # Sanitization: filesystem-reserved chars first, then control chars, + # then whitespace collapse, then leading-cleanup. + for ch in '/\\:*?"<>|': + rendered = rendered.replace(ch, "_") + rendered = "".join(c if ord(c) >= 32 else "_" for c in rendered) + rendered = re.sub(r"\s+", "_", rendered) + while rendered.startswith(".."): + rendered = rendered[2:] + rendered = rendered.lstrip("._") + rendered = rendered.rstrip("._ ") + + # Length cap on the stem (before any system-appended extension). + if len(rendered) > 200: + rendered = rendered[:200].rstrip("._ ") + + if not rendered: + return f"{post.id}{ext}" + + if not has_ext_token: + rendered = rendered + ext + + return rendered + + # Defaults DEFAULT_THUMBNAIL_SIZE = (200, 200) DEFAULT_PAGE_SIZE = 40 diff --git a/booru_viewer/core/db.py b/booru_viewer/core/db.py index 92d7c3e..b3b11ca 100644 --- a/booru_viewer/core/db.py +++ b/booru_viewer/core/db.py @@ -98,8 +98,13 @@ CREATE TABLE IF NOT EXISTS library_meta ( rating TEXT, source TEXT, file_url TEXT, - saved_at TEXT + saved_at TEXT, + filename TEXT NOT NULL DEFAULT '' ); +-- The idx_library_meta_filename index is created in _migrate(), not here. +-- _SCHEMA runs before _migrate against legacy databases that don't yet have +-- the filename column, so creating the index here would fail with "no such +-- column" before the migration could ALTER the column in. CREATE TABLE IF NOT EXISTS settings ( key TEXT PRIMARY KEY, @@ -138,6 +143,7 @@ _DEFAULTS = { "slideshow_monitor": "", "library_dir": "", "infinite_scroll": "0", + "library_filename_template": "", } @@ -236,6 +242,13 @@ class Database: meta_cols = {row[1] for row in cur.fetchall()} if "tag_categories" not in meta_cols: self._conn.execute("ALTER TABLE library_meta ADD COLUMN tag_categories TEXT DEFAULT ''") + # Add filename column. Empty-string default acts as the + # "unknown" sentinel for legacy v0.2.3 rows whose on-disk + # filenames are digit stems — library scan code falls + # back to int(stem) when filename is empty. + if "filename" not in meta_cols: + self._conn.execute("ALTER TABLE library_meta ADD COLUMN filename TEXT NOT NULL DEFAULT ''") + self._conn.execute("CREATE INDEX IF NOT EXISTS idx_library_meta_filename ON library_meta(filename)") # Add tag_categories to favorites if missing if "tag_categories" not in cols: self._conn.execute("ALTER TABLE favorites ADD COLUMN tag_categories TEXT DEFAULT ''") @@ -559,17 +572,38 @@ class Database: def save_library_meta(self, post_id: int, tags: str = "", tag_categories: dict = None, score: int = 0, rating: str = None, source: str = None, - file_url: str = None) -> None: + file_url: str = None, filename: str = "") -> None: cats_json = json.dumps(tag_categories) if tag_categories else "" with self._write(): self.conn.execute( "INSERT OR REPLACE INTO library_meta " - "(post_id, tags, tag_categories, score, rating, source, file_url, saved_at) " - "VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + "(post_id, tags, tag_categories, score, rating, source, file_url, saved_at, filename) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", (post_id, tags, cats_json, score, rating, source, file_url, - datetime.now(timezone.utc).isoformat()), + datetime.now(timezone.utc).isoformat(), filename), ) + def get_library_post_id_by_filename(self, filename: str) -> int | None: + """Look up which post a saved-library file belongs to, by basename. + + Returns the post_id if a `library_meta` row exists with that + filename, or None if no row matches. Used by the unified save + flow's same-post-on-disk check to make re-saves idempotent and + to apply sequential `_1`, `_2`, ... suffixes only when a name + collides with a *different* post. + + Empty-string filenames (the legacy v0.2.3 sentinel) deliberately + do not match — callers fall back to the digit-stem heuristic for + those rows. + """ + if not filename: + return None + row = self.conn.execute( + "SELECT post_id FROM library_meta WHERE filename = ? LIMIT 1", + (filename,), + ).fetchone() + return row["post_id"] if row else None + def get_library_meta(self, post_id: int) -> dict | None: row = self.conn.execute("SELECT * FROM library_meta WHERE post_id = ?", (post_id,)).fetchone() if not row: diff --git a/booru_viewer/gui/settings.py b/booru_viewer/gui/settings.py index 0e5e884..744e00c 100644 --- a/booru_viewer/gui/settings.py +++ b/booru_viewer/gui/settings.py @@ -430,6 +430,26 @@ class SettingsDialog(QDialog): layout.addWidget(QLabel("Library directory:")) layout.addLayout(lib_row) + # Library filename template (editable). Applies to every save action + # — Save to Library, Save As, batch downloads, multi-select bulk + # operations, and bookmark→library copies. Empty = post id. + layout.addWidget(QLabel("Library filename template:")) + self._library_filename_template = QLineEdit( + self._db.get_setting("library_filename_template") or "" + ) + self._library_filename_template.setPlaceholderText("e.g. %artist%_%id% (leave blank for post id)") + layout.addWidget(self._library_filename_template) + tmpl_help = QLabel( + "Tokens: %id% %md5% %ext% %rating% %score% " + "%artist% %character% %copyright% %general% %meta% %species%\n" + "Applies to every save action: Save to Library, Save As, Batch Download, " + "multi-select bulk operations, and bookmark→library copies.\n" + "Note: Gelbooru and Moebooru only support %id% / %md5% / %score% / %rating% / %ext%." + ) + tmpl_help.setWordWrap(True) + tmpl_help.setStyleSheet("color: palette(mid); font-size: 10pt;") + layout.addWidget(tmpl_help) + open_btn = QPushButton("Open Data Folder") open_btn.clicked.connect(self._open_data_folder) layout.addWidget(open_btn) @@ -761,6 +781,7 @@ class SettingsDialog(QDialog): self._db.set_setting("infinite_scroll", "1" if self._infinite_scroll.isChecked() else "0") self._db.set_setting("slideshow_monitor", self._monitor_combo.currentText()) self._db.set_setting("library_dir", self._library_dir.text().strip()) + self._db.set_setting("library_filename_template", self._library_filename_template.text().strip()) self._db.set_setting("max_cache_mb", str(self._max_cache.value())) self._db.set_setting("max_thumb_cache_mb", str(self._max_thumb_cache.value())) self._db.set_setting("auto_evict", "1" if self._auto_evict.isChecked() else "0")