http: consolidate httpx.AsyncClient construction into make_client

Three call sites built near-identical httpx.AsyncClient instances:
the cache download pool, BooruClient's shared API pool, and
detect_site_type's reach into that same pool. They differed only
in timeout (60s vs 20s), Accept header (cache pool only), and
which extra request hooks to attach.

core/http.py:make_client is the single constructor now. Each call
site still keeps its own singleton + lock (separate connection
pools for large transfers vs short JSON), so this is a constructor
consolidation, not a pool consolidation.

No behavior change. Drops now-unused USER_AGENT imports from
cache.py and base.py; make_client pulls it from core.config.
This commit is contained in:
pax 2026-04-15 17:43:49 -05:00
parent 90b27fe36a
commit ab44735f28
5 changed files with 87 additions and 46 deletions

View File

@ -16,6 +16,7 @@
### Refactored
- `category_fetcher` batch tag-API params are now built by a shared `_build_tag_api_params` helper instead of duplicated across `fetch_via_tag_api` and `_probe_batch_api`
- `detect.detect_site_type` — removed the leftover `if True:` indent marker; no behavior change
- `core.http.make_client` — single constructor for the three `httpx.AsyncClient` instances (cache download pool, API pool, detect probe). Each call site still keeps its own singleton and connection pool; only the construction is shared
## v0.2.7

View File

@ -10,9 +10,9 @@ from dataclasses import dataclass, field
import httpx
from ..config import USER_AGENT, DEFAULT_PAGE_SIZE
from ..config import DEFAULT_PAGE_SIZE
from ..cache import log_connection
from ._safety import redact_url, validate_public_request
from ._safety import redact_url
log = logging.getLogger("booru")
@ -100,21 +100,11 @@ class BooruClient(ABC):
return c
# Slow path: build it. Lock so two coroutines on the same loop don't
# both construct + leak.
from ..http import make_client
with BooruClient._shared_client_lock:
c = BooruClient._shared_client
if c is None or c.is_closed:
c = httpx.AsyncClient(
headers={"User-Agent": USER_AGENT},
follow_redirects=True,
timeout=20.0,
event_hooks={
"request": [
validate_public_request,
self._log_request,
],
},
limits=httpx.Limits(max_connections=10, max_keepalive_connections=5),
)
c = make_client(extra_request_hooks=[self._log_request])
BooruClient._shared_client = c
return c

View File

@ -4,10 +4,7 @@ from __future__ import annotations
import logging
import httpx
from ..config import USER_AGENT
from ._safety import validate_public_request
from ..http import make_client
from .danbooru import DanbooruClient
from .gelbooru import GelbooruClient
from .moebooru import MoebooruClient
@ -29,22 +26,11 @@ async def detect_site_type(
url = url.rstrip("/")
from .base import BooruClient as _BC
# Reuse shared client for site detection. event_hooks mirrors
# Reuse shared client for site detection. Event hooks mirror
# BooruClient.client so detection requests get the same SSRF
# validation and connection logging as regular API calls.
if _BC._shared_client is None or _BC._shared_client.is_closed:
_BC._shared_client = httpx.AsyncClient(
headers={"User-Agent": USER_AGENT},
follow_redirects=True,
timeout=20.0,
event_hooks={
"request": [
validate_public_request,
_BC._log_request,
],
},
limits=httpx.Limits(max_connections=10, max_keepalive_connections=5),
)
_BC._shared_client = make_client(extra_request_hooks=[_BC._log_request])
client = _BC._shared_client
# Try Danbooru / e621 first — /posts.json is a definitive endpoint
try:

View File

@ -17,7 +17,7 @@ from urllib.parse import urlparse
import httpx
from PIL import Image
from .config import cache_dir, thumbnails_dir, USER_AGENT
from .config import cache_dir, thumbnails_dir
log = logging.getLogger("booru")
@ -77,23 +77,14 @@ def _get_shared_client(referer: str = "") -> httpx.AsyncClient:
c = _shared_client
if c is not None and not c.is_closed:
return c
# Lazy import: core.api.base imports log_connection from this
# module, so a top-level `from .api._safety import ...` would
# circular-import through api/__init__.py during cache.py load.
from .api._safety import validate_public_request
# Lazy import: core.http imports from core.api._safety, which
# lives inside the api package that imports this module, so a
# top-level import would circular through cache.py's load.
from .http import make_client
with _shared_client_lock:
c = _shared_client
if c is None or c.is_closed:
c = httpx.AsyncClient(
headers={
"User-Agent": USER_AGENT,
"Accept": "image/*,video/*,*/*",
},
follow_redirects=True,
timeout=60.0,
event_hooks={"request": [validate_public_request]},
limits=httpx.Limits(max_connections=10, max_keepalive_connections=5),
)
c = make_client(timeout=60.0, accept="image/*,video/*,*/*")
_shared_client = c
return c

73
booru_viewer/core/http.py Normal file
View File

@ -0,0 +1,73 @@
"""Shared httpx.AsyncClient constructor.
Three call sites build near-identical clients: the cache module's
download pool, ``BooruClient``'s shared API pool, and
``detect.detect_site_type``'s reach into that same pool. Centralising
the construction in one place means a future change (new SSRF hook,
new connection limit, different default UA) doesn't have to be made
three times and kept in sync.
The module does NOT manage the singletons themselves each call site
keeps its own ``_shared_client`` and its own lock, so the cache
pool's long-lived large transfers don't compete with short JSON
requests from the API layer. ``make_client`` is a pure constructor.
"""
from __future__ import annotations
from typing import Callable, Iterable
import httpx
from .config import USER_AGENT
from .api._safety import validate_public_request
# Connection pool limits are identical across all three call sites.
# Keeping the default here centralises any future tuning.
_DEFAULT_LIMITS = httpx.Limits(max_connections=10, max_keepalive_connections=5)
def make_client(
*,
timeout: float = 20.0,
accept: str | None = None,
extra_request_hooks: Iterable[Callable] | None = None,
) -> httpx.AsyncClient:
"""Return a fresh ``httpx.AsyncClient`` with the project's defaults.
Defaults applied unconditionally:
- ``User-Agent`` header from ``core.config.USER_AGENT``
- ``follow_redirects=True``
- ``validate_public_request`` SSRF hook (always first on the
request-hook chain; extras run after it)
- Connection limits: 10 max, 5 keepalive
Parameters:
timeout: per-request timeout in seconds. Cache downloads pass
60s for large videos; the API pool uses 20s.
accept: optional ``Accept`` header value. The cache pool sets
``image/*,video/*,*/*``; the API pool leaves it unset so
httpx's ``*/*`` default takes effect.
extra_request_hooks: optional extra callables to run after
``validate_public_request``. The API clients pass their
connection-logging hook here; detect passes the same.
Call sites are responsible for their own singleton caching
``make_client`` always returns a fresh instance.
"""
headers: dict[str, str] = {"User-Agent": USER_AGENT}
if accept is not None:
headers["Accept"] = accept
hooks: list[Callable] = [validate_public_request]
if extra_request_hooks:
hooks.extend(extra_request_hooks)
return httpx.AsyncClient(
headers=headers,
follow_redirects=True,
timeout=timeout,
event_hooks={"request": hooks},
limits=_DEFAULT_LIMITS,
)