87 lines
2.2 KiB
Python
87 lines
2.2 KiB
Python
"""FastAPI application entry point."""
|
|
|
|
import logging
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from contextlib import asynccontextmanager
|
|
|
|
import httpx
|
|
from fastapi import FastAPI
|
|
|
|
import app.resources as resources
|
|
from app.config import (
|
|
DET_SIZE,
|
|
DOWNLOAD_TIMEOUT,
|
|
HTTP_POOL_MAX_CONNECTIONS,
|
|
HTTP_POOL_MAX_KEEPALIVE,
|
|
INFERENCE_THREADS,
|
|
MODEL_NAME,
|
|
USE_TENSORRT,
|
|
)
|
|
from app.face import load_face_app
|
|
from app.routes import embed
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger("face_service")
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
"""Application lifespan handler - load models and shared resources on startup."""
|
|
logger.info("Starting face service...")
|
|
|
|
# Create HTTP client with connection pooling
|
|
limits = httpx.Limits(
|
|
max_connections=HTTP_POOL_MAX_CONNECTIONS,
|
|
max_keepalive_connections=HTTP_POOL_MAX_KEEPALIVE,
|
|
)
|
|
resources.http_client = httpx.AsyncClient(
|
|
timeout=DOWNLOAD_TIMEOUT,
|
|
limits=limits,
|
|
# http2=True requires 'h2' package - disable for now
|
|
)
|
|
logger.info(
|
|
f"HTTP client initialized (max_conn={HTTP_POOL_MAX_CONNECTIONS}, "
|
|
f"keepalive={HTTP_POOL_MAX_KEEPALIVE})"
|
|
)
|
|
|
|
# Create thread pool for blocking operations (GPU inference, image decode)
|
|
resources.inference_executor = ThreadPoolExecutor(
|
|
max_workers=INFERENCE_THREADS,
|
|
thread_name_prefix="inference",
|
|
)
|
|
logger.info(f"Thread pool initialized (workers={INFERENCE_THREADS})")
|
|
|
|
# Load face model (may take time if TensorRT engines need building)
|
|
load_face_app()
|
|
|
|
logger.info("Face service ready")
|
|
yield
|
|
|
|
# Cleanup
|
|
logger.info("Shutting down face service...")
|
|
await resources.http_client.aclose()
|
|
resources.inference_executor.shutdown(wait=True)
|
|
logger.info("Cleanup complete")
|
|
|
|
|
|
app = FastAPI(
|
|
title="Face Service",
|
|
version="1.0.0",
|
|
lifespan=lifespan,
|
|
)
|
|
|
|
# Include routers
|
|
app.include_router(embed.router)
|
|
|
|
|
|
@app.get("/healthz")
|
|
def healthz():
|
|
"""Health check endpoint."""
|
|
return {
|
|
"status": "ok",
|
|
"model": MODEL_NAME,
|
|
"det_size": DET_SIZE,
|
|
"tensorrt": USE_TENSORRT,
|
|
}
|
|
|