From ab44735f28e5c2df3a36bcaef6528aeb7a39dd9a Mon Sep 17 00:00:00 2001 From: pax Date: Wed, 15 Apr 2026 17:43:49 -0500 Subject: [PATCH] http: consolidate httpx.AsyncClient construction into make_client Three call sites built near-identical httpx.AsyncClient instances: the cache download pool, BooruClient's shared API pool, and detect_site_type's reach into that same pool. They differed only in timeout (60s vs 20s), Accept header (cache pool only), and which extra request hooks to attach. core/http.py:make_client is the single constructor now. Each call site still keeps its own singleton + lock (separate connection pools for large transfers vs short JSON), so this is a constructor consolidation, not a pool consolidation. No behavior change. Drops now-unused USER_AGENT imports from cache.py and base.py; make_client pulls it from core.config. --- CHANGELOG.md | 1 + booru_viewer/core/api/base.py | 18 ++------ booru_viewer/core/api/detect.py | 20 ++------- booru_viewer/core/cache.py | 21 +++------- booru_viewer/core/http.py | 73 +++++++++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 46 deletions(-) create mode 100644 booru_viewer/core/http.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 80c7e87..59638d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ ### Refactored - `category_fetcher` batch tag-API params are now built by a shared `_build_tag_api_params` helper instead of duplicated across `fetch_via_tag_api` and `_probe_batch_api` - `detect.detect_site_type` — removed the leftover `if True:` indent marker; no behavior change +- `core.http.make_client` — single constructor for the three `httpx.AsyncClient` instances (cache download pool, API pool, detect probe). Each call site still keeps its own singleton and connection pool; only the construction is shared ## v0.2.7 diff --git a/booru_viewer/core/api/base.py b/booru_viewer/core/api/base.py index f030fb4..679c408 100644 --- a/booru_viewer/core/api/base.py +++ b/booru_viewer/core/api/base.py @@ -10,9 +10,9 @@ from dataclasses import dataclass, field import httpx -from ..config import USER_AGENT, DEFAULT_PAGE_SIZE +from ..config import DEFAULT_PAGE_SIZE from ..cache import log_connection -from ._safety import redact_url, validate_public_request +from ._safety import redact_url log = logging.getLogger("booru") @@ -100,21 +100,11 @@ class BooruClient(ABC): return c # Slow path: build it. Lock so two coroutines on the same loop don't # both construct + leak. + from ..http import make_client with BooruClient._shared_client_lock: c = BooruClient._shared_client if c is None or c.is_closed: - c = httpx.AsyncClient( - headers={"User-Agent": USER_AGENT}, - follow_redirects=True, - timeout=20.0, - event_hooks={ - "request": [ - validate_public_request, - self._log_request, - ], - }, - limits=httpx.Limits(max_connections=10, max_keepalive_connections=5), - ) + c = make_client(extra_request_hooks=[self._log_request]) BooruClient._shared_client = c return c diff --git a/booru_viewer/core/api/detect.py b/booru_viewer/core/api/detect.py index 99a25e0..034e8fd 100644 --- a/booru_viewer/core/api/detect.py +++ b/booru_viewer/core/api/detect.py @@ -4,10 +4,7 @@ from __future__ import annotations import logging -import httpx - -from ..config import USER_AGENT -from ._safety import validate_public_request +from ..http import make_client from .danbooru import DanbooruClient from .gelbooru import GelbooruClient from .moebooru import MoebooruClient @@ -29,22 +26,11 @@ async def detect_site_type( url = url.rstrip("/") from .base import BooruClient as _BC - # Reuse shared client for site detection. event_hooks mirrors + # Reuse shared client for site detection. Event hooks mirror # BooruClient.client so detection requests get the same SSRF # validation and connection logging as regular API calls. if _BC._shared_client is None or _BC._shared_client.is_closed: - _BC._shared_client = httpx.AsyncClient( - headers={"User-Agent": USER_AGENT}, - follow_redirects=True, - timeout=20.0, - event_hooks={ - "request": [ - validate_public_request, - _BC._log_request, - ], - }, - limits=httpx.Limits(max_connections=10, max_keepalive_connections=5), - ) + _BC._shared_client = make_client(extra_request_hooks=[_BC._log_request]) client = _BC._shared_client # Try Danbooru / e621 first — /posts.json is a definitive endpoint try: diff --git a/booru_viewer/core/cache.py b/booru_viewer/core/cache.py index 9bf48b9..c978392 100644 --- a/booru_viewer/core/cache.py +++ b/booru_viewer/core/cache.py @@ -17,7 +17,7 @@ from urllib.parse import urlparse import httpx from PIL import Image -from .config import cache_dir, thumbnails_dir, USER_AGENT +from .config import cache_dir, thumbnails_dir log = logging.getLogger("booru") @@ -77,23 +77,14 @@ def _get_shared_client(referer: str = "") -> httpx.AsyncClient: c = _shared_client if c is not None and not c.is_closed: return c - # Lazy import: core.api.base imports log_connection from this - # module, so a top-level `from .api._safety import ...` would - # circular-import through api/__init__.py during cache.py load. - from .api._safety import validate_public_request + # Lazy import: core.http imports from core.api._safety, which + # lives inside the api package that imports this module, so a + # top-level import would circular through cache.py's load. + from .http import make_client with _shared_client_lock: c = _shared_client if c is None or c.is_closed: - c = httpx.AsyncClient( - headers={ - "User-Agent": USER_AGENT, - "Accept": "image/*,video/*,*/*", - }, - follow_redirects=True, - timeout=60.0, - event_hooks={"request": [validate_public_request]}, - limits=httpx.Limits(max_connections=10, max_keepalive_connections=5), - ) + c = make_client(timeout=60.0, accept="image/*,video/*,*/*") _shared_client = c return c diff --git a/booru_viewer/core/http.py b/booru_viewer/core/http.py new file mode 100644 index 0000000..a352d64 --- /dev/null +++ b/booru_viewer/core/http.py @@ -0,0 +1,73 @@ +"""Shared httpx.AsyncClient constructor. + +Three call sites build near-identical clients: the cache module's +download pool, ``BooruClient``'s shared API pool, and +``detect.detect_site_type``'s reach into that same pool. Centralising +the construction in one place means a future change (new SSRF hook, +new connection limit, different default UA) doesn't have to be made +three times and kept in sync. + +The module does NOT manage the singletons themselves — each call site +keeps its own ``_shared_client`` and its own lock, so the cache +pool's long-lived large transfers don't compete with short JSON +requests from the API layer. ``make_client`` is a pure constructor. +""" + +from __future__ import annotations + +from typing import Callable, Iterable + +import httpx + +from .config import USER_AGENT +from .api._safety import validate_public_request + + +# Connection pool limits are identical across all three call sites. +# Keeping the default here centralises any future tuning. +_DEFAULT_LIMITS = httpx.Limits(max_connections=10, max_keepalive_connections=5) + + +def make_client( + *, + timeout: float = 20.0, + accept: str | None = None, + extra_request_hooks: Iterable[Callable] | None = None, +) -> httpx.AsyncClient: + """Return a fresh ``httpx.AsyncClient`` with the project's defaults. + + Defaults applied unconditionally: + - ``User-Agent`` header from ``core.config.USER_AGENT`` + - ``follow_redirects=True`` + - ``validate_public_request`` SSRF hook (always first on the + request-hook chain; extras run after it) + - Connection limits: 10 max, 5 keepalive + + Parameters: + timeout: per-request timeout in seconds. Cache downloads pass + 60s for large videos; the API pool uses 20s. + accept: optional ``Accept`` header value. The cache pool sets + ``image/*,video/*,*/*``; the API pool leaves it unset so + httpx's ``*/*`` default takes effect. + extra_request_hooks: optional extra callables to run after + ``validate_public_request``. The API clients pass their + connection-logging hook here; detect passes the same. + + Call sites are responsible for their own singleton caching — + ``make_client`` always returns a fresh instance. + """ + headers: dict[str, str] = {"User-Agent": USER_AGENT} + if accept is not None: + headers["Accept"] = accept + + hooks: list[Callable] = [validate_public_request] + if extra_request_hooks: + hooks.extend(extra_request_hooks) + + return httpx.AsyncClient( + headers=headers, + follow_redirects=True, + timeout=timeout, + event_hooks={"request": hooks}, + limits=_DEFAULT_LIMITS, + )