db: add reconcile_library_meta to clean up orphan meta rows
The old delete_from_library deleted files from disk but never cleaned up the matching library_meta row. Result: pathologically the meta table can have many more rows than there are files on disk. This was harmless when the only consumer was tag-search (the meta would just match nothing useful), but it becomes a real problem the moment is_post_in_library / get_saved_post_ids start driving UI state — the saved-dot indicator would light up for posts whose files have been gone for ages. reconcile_library_meta() walks saved_dir() shallowly (root + one level of subdirs), collects every present post_id (digit-stem files plus templated filenames looked up via library_meta.filename), and DELETEs every meta row whose post_id isn't in that set. Returns the count of removed rows. Defensive: if saved_dir() exists but has zero files (e.g. removable drive temporarily unmounted), the method refuses to reconcile and returns 0. The cost of a false positive — wiping every meta row for a perfectly intact library — is higher than the cost of leaving stale rows around for one more session. The cache.py fix in the next commit makes future delete_from_library calls clean up after themselves. This method is the one-time catch-up for libraries that were already polluted before that fix.
This commit is contained in:
parent
6f59de0c64
commit
5976a81bb6
@ -583,6 +583,76 @@ class Database:
|
||||
datetime.now(timezone.utc).isoformat(), filename),
|
||||
)
|
||||
|
||||
def reconcile_library_meta(self) -> int:
|
||||
"""Drop library_meta rows whose files are no longer on disk.
|
||||
|
||||
Walks every row, checks for both digit-stem (legacy v0.2.3)
|
||||
and templated (post-refactor) filenames in saved_dir() + one
|
||||
level of subdirectories, and deletes rows where neither is
|
||||
found. Returns the number of rows removed.
|
||||
|
||||
Cleans up the orphan rows that were leaked by the old
|
||||
delete_from_library before it learned to clean up after
|
||||
itself. Safe to call repeatedly — a no-op once the DB is
|
||||
consistent with disk.
|
||||
|
||||
Skips reconciliation entirely if saved_dir() is missing or
|
||||
empty (defensive — a removable drive temporarily unmounted
|
||||
shouldn't trigger a wholesale meta wipe).
|
||||
"""
|
||||
from .config import saved_dir, MEDIA_EXTENSIONS
|
||||
sd = saved_dir()
|
||||
if not sd.is_dir():
|
||||
return 0
|
||||
|
||||
# Build the set of (post_id present on disk). Walks shallow:
|
||||
# root + one level of subdirectories.
|
||||
on_disk_files: list[Path] = []
|
||||
for entry in sd.iterdir():
|
||||
if entry.is_file() and entry.suffix.lower() in MEDIA_EXTENSIONS:
|
||||
on_disk_files.append(entry)
|
||||
elif entry.is_dir():
|
||||
for sub in entry.iterdir():
|
||||
if sub.is_file() and sub.suffix.lower() in MEDIA_EXTENSIONS:
|
||||
on_disk_files.append(sub)
|
||||
if not on_disk_files:
|
||||
# No files at all — refuse to reconcile. Could be an
|
||||
# unmounted drive, a freshly-cleared library, etc. The
|
||||
# cost of a false positive (wiping every meta row) is
|
||||
# higher than the cost of leaving stale rows.
|
||||
return 0
|
||||
|
||||
present_post_ids: set[int] = set()
|
||||
for f in on_disk_files:
|
||||
if f.stem.isdigit():
|
||||
present_post_ids.add(int(f.stem))
|
||||
# Templated files: look up by filename
|
||||
for f in on_disk_files:
|
||||
if not f.stem.isdigit():
|
||||
row = self.conn.execute(
|
||||
"SELECT post_id FROM library_meta WHERE filename = ? LIMIT 1",
|
||||
(f.name,),
|
||||
).fetchone()
|
||||
if row is not None:
|
||||
present_post_ids.add(row["post_id"])
|
||||
|
||||
all_meta_ids = self.get_saved_post_ids()
|
||||
stale = all_meta_ids - present_post_ids
|
||||
if not stale:
|
||||
return 0
|
||||
|
||||
with self._write():
|
||||
BATCH = 500
|
||||
stale_list = list(stale)
|
||||
for i in range(0, len(stale_list), BATCH):
|
||||
chunk = stale_list[i:i + BATCH]
|
||||
placeholders = ",".join("?" * len(chunk))
|
||||
self.conn.execute(
|
||||
f"DELETE FROM library_meta WHERE post_id IN ({placeholders})",
|
||||
chunk,
|
||||
)
|
||||
return len(stale)
|
||||
|
||||
def is_post_in_library(self, post_id: int) -> bool:
|
||||
"""True iff a `library_meta` row exists for `post_id`.
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user