From a90d71da478e67e9cc82aa39291d782d7afd24dd Mon Sep 17 00:00:00 2001 From: pax Date: Thu, 9 Apr 2026 23:58:56 -0500 Subject: [PATCH] tests: add 36 tests for CategoryFetcher (parser, cache, probe, dispatch) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New test_category_fetcher.py covering: HTML parser (10): Rule34/Moebooru/Konachan markup, Gelbooru-empty, metadata->Meta mapping, URL-encoded names, edge cases Tag API parser (6): JSON, XML, empty, flat list, malformed Canonical ordering (4): standard order, species, unknown, empty Cache compose (6): full/partial/zero coverage, empty tags, order, per-site isolation Probe persistence (5): save/load True/False, per-site, clear wipes Batch API availability (3): URL+auth combinations Map coverage (2): label and type map constants All pure Python — synthetic HTML, FakePost/FakeClient/FakeResponse. No network, no Qt. Uses tmp_db fixture from conftest. Total suite: 117 tests, 0.19s. --- tests/core/api/test_category_fetcher.py | 456 ++++++++++++++++++++++++ 1 file changed, 456 insertions(+) create mode 100644 tests/core/api/test_category_fetcher.py diff --git a/tests/core/api/test_category_fetcher.py b/tests/core/api/test_category_fetcher.py new file mode 100644 index 0000000..5c8f30f --- /dev/null +++ b/tests/core/api/test_category_fetcher.py @@ -0,0 +1,456 @@ +"""Tests for CategoryFetcher: HTML parser, tag API parser, cache compose, +probe persistence, dispatch logic, and canonical ordering. + +All pure Python — no Qt, no network. Uses tmp_db fixture for cache tests +and synthetic HTML/JSON/XML for parser tests. +""" + +from __future__ import annotations + +import asyncio +import json +from dataclasses import dataclass, field +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from booru_viewer.core.api.category_fetcher import ( + CategoryFetcher, + _canonical_order, + _parse_post_html, + _parse_tag_response, + _LABEL_MAP, + _GELBOORU_TYPE_MAP, +) + + +# --------------------------------------------------------------------------- +# Synthetic data helpers +# --------------------------------------------------------------------------- + +@dataclass +class FakePost: + id: int = 1 + tags: str = "" + tag_categories: dict = field(default_factory=dict) + + @property + def tag_list(self) -> list[str]: + return self.tags.split() if self.tags else [] + + +class FakeClient: + """Minimal mock of BooruClient for CategoryFetcher construction.""" + api_key = None + api_user = None + + def __init__(self, post_view_url=None, tag_api_url=None, api_key=None, api_user=None): + self._pv_url = post_view_url + self._ta_url = tag_api_url + self.api_key = api_key + self.api_user = api_user + + def _post_view_url(self, post): + return self._pv_url + + def _tag_api_url(self): + return self._ta_url + + async def _request(self, method, url, params=None): + raise NotImplementedError("mock _request not configured") + + +class FakeResponse: + """Minimal httpx.Response stand-in for parser tests.""" + def __init__(self, text: str, status_code: int = 200): + self.text = text + self.status_code = status_code + + def json(self): + return json.loads(self.text) + + def raise_for_status(self): + if self.status_code >= 400: + raise Exception(f"HTTP {self.status_code}") + + +# --------------------------------------------------------------------------- +# HTML parser tests (_parse_post_html) +# --------------------------------------------------------------------------- + +class TestParsePostHtml: + """Test the two-pass regex HTML parser against synthetic markup.""" + + def test_rule34_style_two_links(self): + """Standard Gelbooru-fork layout: ? wiki link + tag search link.""" + html = ''' +
  • + ? + hatsune miku + 12345 +
  • +
  • + ? + someartist + 100 +
  • +
  • + ? + 1girl + 9999999 +
  • + ''' + cats, labels = _parse_post_html(html) + assert "Character" in cats + assert "Artist" in cats + assert "General" in cats + assert cats["Character"] == ["hatsune_miku"] + assert cats["Artist"] == ["someartist"] + assert cats["General"] == ["1girl"] + assert labels["hatsune_miku"] == "Character" + assert labels["someartist"] == "Artist" + + def test_moebooru_style(self): + """yande.re / Konachan: /post?tags=NAME format.""" + html = ''' +
  • + ? + anmi +
  • + + ''' + cats, labels = _parse_post_html(html) + assert cats["Artist"] == ["anmi"] + assert cats["Copyright"] == ["vocaloid"] + + def test_combined_class_konachan(self): + """Konachan uses class="tag-link tag-type-character".""" + html = ''' + + ? + hatsune miku + + ''' + cats, _ = _parse_post_html(html) + assert cats["Character"] == ["hatsune_miku"] + + def test_gelbooru_proper_returns_empty(self): + """Gelbooru proper only has ? links with no tags= param.""" + html = ''' +
  • + ? +
  • +
  • + ? +
  • + ''' + cats, labels = _parse_post_html(html) + assert cats == {} + assert labels == {} + + def test_metadata_maps_to_meta(self): + """class="tag-type-metadata" should map to label "Meta".""" + html = ''' +
  • + ? + highres +
  • + ''' + cats, labels = _parse_post_html(html) + assert "Meta" in cats + assert cats["Meta"] == ["highres"] + + def test_url_encoded_tag_names(self): + """Tags with special chars get URL-encoded in the href.""" + html = ''' +
  • + ? + miku (shinkalion) +
  • + ''' + cats, labels = _parse_post_html(html) + assert cats["Character"] == ["miku_(shinkalion)"] + + def test_empty_html(self): + cats, labels = _parse_post_html("") + assert cats == {} + assert labels == {} + + def test_no_tag_type_elements(self): + html = '

    Hello world

    ' + cats, labels = _parse_post_html(html) + assert cats == {} + + def test_unknown_type_class_ignored(self): + """Tag types not in _LABEL_MAP are silently skipped.""" + html = ''' +
  • + ? + broken +
  • + ''' + cats, _ = _parse_post_html(html) + assert cats == {} + + def test_multiple_tags_same_category(self): + html = ''' +
  • + ? + miku +
  • +
  • + ? + rin +
  • + ''' + cats, _ = _parse_post_html(html) + assert cats["Character"] == ["miku", "rin"] + + +# --------------------------------------------------------------------------- +# Tag API response parser tests (_parse_tag_response) +# --------------------------------------------------------------------------- + +class TestParseTagResponse: + + def test_json_response(self): + resp = FakeResponse(json.dumps({ + "@attributes": {"limit": 100, "offset": 0, "count": 2}, + "tag": [ + {"id": 1, "name": "hatsune_miku", "count": 12345, "type": 4, "ambiguous": 0}, + {"id": 2, "name": "1girl", "count": 9999, "type": 0, "ambiguous": 0}, + ] + })) + result = _parse_tag_response(resp) + assert ("hatsune_miku", 4) in result + assert ("1girl", 0) in result + + def test_xml_response(self): + resp = FakeResponse( + '' + '' + '' + '' + '' + ) + result = _parse_tag_response(resp) + assert ("hatsune_miku", 4) in result + assert ("1girl", 0) in result + + def test_empty_response(self): + resp = FakeResponse("") + assert _parse_tag_response(resp) == [] + + def test_json_flat_list(self): + """Some endpoints return a flat list instead of wrapping in {"tag": [...]}.""" + resp = FakeResponse(json.dumps([ + {"name": "solo", "type": 0, "count": 5000}, + ])) + result = _parse_tag_response(resp) + assert ("solo", 0) in result + + def test_malformed_xml(self): + resp = FakeResponse("