"""FastAPI application entry point.""" import logging from concurrent.futures import ThreadPoolExecutor from contextlib import asynccontextmanager import httpx from fastapi import FastAPI import app.resources as resources from app.config import ( DET_SIZE, DOWNLOAD_TIMEOUT, HTTP_POOL_MAX_CONNECTIONS, HTTP_POOL_MAX_KEEPALIVE, INFERENCE_THREADS, MODEL_NAME, USE_TENSORRT, ) from app.face import load_face_app from app.routes import embed logging.basicConfig(level=logging.INFO) logger = logging.getLogger("face_service") @asynccontextmanager async def lifespan(app: FastAPI): """Application lifespan handler - load models and shared resources on startup.""" logger.info("Starting face service...") # Create HTTP client with connection pooling limits = httpx.Limits( max_connections=HTTP_POOL_MAX_CONNECTIONS, max_keepalive_connections=HTTP_POOL_MAX_KEEPALIVE, ) resources.http_client = httpx.AsyncClient( timeout=DOWNLOAD_TIMEOUT, limits=limits, # http2=True requires 'h2' package - disable for now ) logger.info( f"HTTP client initialized (max_conn={HTTP_POOL_MAX_CONNECTIONS}, " f"keepalive={HTTP_POOL_MAX_KEEPALIVE})" ) # Create thread pool for blocking operations (GPU inference, image decode) resources.inference_executor = ThreadPoolExecutor( max_workers=INFERENCE_THREADS, thread_name_prefix="inference", ) logger.info(f"Thread pool initialized (workers={INFERENCE_THREADS})") # Load face model (may take time if TensorRT engines need building) load_face_app() logger.info("Face service ready") yield # Cleanup logger.info("Shutting down face service...") await resources.http_client.aclose() resources.inference_executor.shutdown(wait=True) logger.info("Cleanup complete") app = FastAPI( title="Face Service", version="1.0.0", lifespan=lifespan, ) # Include routers app.include_router(embed.router) @app.get("/healthz") def healthz(): """Health check endpoint.""" return { "status": "ok", "model": MODEL_NAME, "det_size": DET_SIZE, "tensorrt": USE_TENSORRT, }