face/app/main.py

"""FastAPI application entry point."""

import logging
from concurrent.futures import ThreadPoolExecutor
from contextlib import asynccontextmanager

import httpx
from fastapi import FastAPI

import app.resources as resources
from app.config import (
    DET_SIZE,
    DOWNLOAD_TIMEOUT,
    HTTP_POOL_MAX_CONNECTIONS,
    HTTP_POOL_MAX_KEEPALIVE,
    INFERENCE_THREADS,
    MODEL_NAME,
    USE_TENSORRT,
)
from app.face import load_face_app
from app.routes import embed

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("face_service")


@asynccontextmanager
async def lifespan(app: FastAPI):
    """Application lifespan handler - load models and shared resources on startup."""
    logger.info("Starting face service...")

    # Create HTTP client with connection pooling
    limits = httpx.Limits(
        max_connections=HTTP_POOL_MAX_CONNECTIONS,
        max_keepalive_connections=HTTP_POOL_MAX_KEEPALIVE,
    )
    resources.http_client = httpx.AsyncClient(
        timeout=DOWNLOAD_TIMEOUT,
        limits=limits,
        # http2=True requires 'h2' package - disable for now
    )
    logger.info(
        f"HTTP client initialized (max_conn={HTTP_POOL_MAX_CONNECTIONS}, "
        f"keepalive={HTTP_POOL_MAX_KEEPALIVE})"
    )

    # Create thread pool for blocking operations (GPU inference, image decode)
    resources.inference_executor = ThreadPoolExecutor(
        max_workers=INFERENCE_THREADS,
        thread_name_prefix="inference",
    )
    logger.info(f"Thread pool initialized (workers={INFERENCE_THREADS})")

    # Load face model (may take time if TensorRT engines need building)
    load_face_app()

    logger.info("Face service ready")
    yield

    # Cleanup
    logger.info("Shutting down face service...")
    await resources.http_client.aclose()
    resources.inference_executor.shutdown(wait=True)
    logger.info("Cleanup complete")


app = FastAPI(
    title="Face Service",
    version="1.0.0",
    lifespan=lifespan,
)

# Include routers
app.include_router(embed.router)


@app.get("/healthz")
def healthz():
    """Health check endpoint."""
    return {
        "status": "ok",
        "model": MODEL_NAME,
        "det_size": DET_SIZE,
        "tensorrt": USE_TENSORRT,
    }