Compare commits
2 Commits
master
...
tensorrt-e
| Author | SHA1 | Date |
|---|---|---|
|
|
a7595603b4 | |
|
|
b8f167d336 |
|
|
@ -25,6 +25,10 @@ ENV/
|
|||
models/
|
||||
*.onnx
|
||||
|
||||
# TensorRT artifacts
|
||||
*.engine
|
||||
*.profile
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
|
@ -32,4 +36,3 @@ Thumbs.db
|
|||
# Test/temp files
|
||||
*.tmp
|
||||
*.bak
|
||||
|
||||
|
|
|
|||
49
README.md
49
README.md
|
|
@ -1,18 +1,23 @@
|
|||
## Genealog Face Service
|
||||
|
||||
FastAPI-based face embedding and matching microservice using InsightFace + ONNX Runtime GPU. This service is designed to be called from the `genealog-api` backend via HTTP.
|
||||
FastAPI-based face embedding microservice using InsightFace + ONNX Runtime GPU. This service generates face embeddings from images and is designed to be called from the `genealog-api` backend via HTTP.
|
||||
|
||||
### Endpoints
|
||||
|
||||
- `GET /healthz` – basic health check and model info.
|
||||
- `POST /embed-avatar` – JSON body: `{ "image_url": "https://..." }`, returns a single best face embedding for an avatar image.
|
||||
- `POST /embed-image` – JSON body: `{ "image_url": "https://..." }`, returns all detected faces and embeddings.
|
||||
- `POST /test-avatar` – multipart form with fields:
|
||||
- `tag`: string tag for logging / correlation
|
||||
- `avatar`: avatar image file (face to match)
|
||||
- `image`: target image file (search space)
|
||||
|
||||
All embeddings are normalized float vectors suitable for cosine-similarity comparison.
|
||||
All embeddings are normalized float vectors suitable for cosine-similarity comparison. Face matching/comparison is handled by the calling service (`genealog-api`).
|
||||
|
||||
### Features
|
||||
|
||||
- **Async HTTP downloads** with retry logic (httpx + tenacity)
|
||||
- **Image validation**: size limits (20MB max), dimension limits (32px-8192px), decompression bomb protection
|
||||
- **Robust image decoding**: handles all color modes (RGB, RGBA, L, LA, PA, CMYK, I, F), EXIF orientation correction
|
||||
- **Face detection fallback**: If no face is detected in `/embed-avatar`, falls back to center crop embedding
|
||||
- **Embedding validation**: Checks for NaN/Inf values before returning
|
||||
- **Modular structure**: Clean separation of concerns (config, models, face processing, image handling, routes)
|
||||
|
||||
`/embed-avatar` notes:
|
||||
|
||||
|
|
@ -29,6 +34,14 @@ source .venv/bin/activate
|
|||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
**Dependencies:**
|
||||
- `fastapi`, `uvicorn` - Web framework
|
||||
- `insightface` - Face detection and recognition
|
||||
- `onnxruntime-gpu` - GPU-accelerated inference
|
||||
- `httpx` - Async HTTP client for image downloads
|
||||
- `tenacity` - Retry logic
|
||||
- `opencv-python-headless`, `numpy`, `Pillow` - Image processing
|
||||
|
||||
GPU support assumes:
|
||||
|
||||
- WSL2 with GPU enabled.
|
||||
|
|
@ -60,6 +73,10 @@ You can override via environment variables:
|
|||
PORT=18081 \
|
||||
FACE_MODEL_NAME=buffalo_l \
|
||||
FACE_DET_SIZE=1024 \
|
||||
MAX_DOWNLOAD_SIZE=20971520 \
|
||||
MAX_IMAGE_DIMENSION=8192 \
|
||||
DOWNLOAD_TIMEOUT=15.0 \
|
||||
MAX_RETRIES=3 \
|
||||
UVICORN_WORKERS=20 \
|
||||
./run_face_service.sh
|
||||
```
|
||||
|
|
@ -72,6 +89,26 @@ nohup ./run_face_service.sh > face_service.log 2>&1 &
|
|||
|
||||
Logs are written to `face_service.log` in the repo root.
|
||||
|
||||
### Project Structure
|
||||
|
||||
```
|
||||
genealog-face/
|
||||
├── app/
|
||||
│ ├── __init__.py
|
||||
│ ├── main.py # FastAPI app, lifespan handler, health endpoint
|
||||
│ ├── config.py # Environment variables and constants
|
||||
│ ├── models.py # Pydantic request/response models
|
||||
│ ├── face.py # FaceAnalysis loading, embedding logic
|
||||
│ ├── image.py # Image download, decode, validation
|
||||
│ └── routes/
|
||||
│ ├── __init__.py
|
||||
│ └── embed.py # /embed-avatar, /embed-image endpoints
|
||||
├── .gitignore
|
||||
├── requirements.txt
|
||||
├── run_face_service.sh
|
||||
└── README.md
|
||||
```
|
||||
|
||||
### Integration with genealog-api (Docker)
|
||||
|
||||
The `genealog-api` service expects this face service to be reachable at:
|
||||
|
|
|
|||
|
|
@ -5,16 +5,27 @@ import os
|
|||
# Model configuration
|
||||
MODEL_NAME = os.getenv("FACE_MODEL_NAME", "buffalo_l")
|
||||
DET_SIZE = int(os.getenv("FACE_DET_SIZE", "1024"))
|
||||
# Fallback det_size for large faces (close-up selfies)
|
||||
FALLBACK_DET_SIZE = int(os.getenv("FACE_DET_SIZE_FALLBACK", "640"))
|
||||
USE_TENSORRT = os.getenv("USE_TENSORRT", "true").lower() in ("true", "1", "yes")
|
||||
|
||||
# Image processing limits
|
||||
MAX_DOWNLOAD_SIZE = int(os.getenv("MAX_DOWNLOAD_SIZE", 20 * 1024 * 1024)) # 20MB
|
||||
MAX_IMAGE_DIMENSION = int(os.getenv("MAX_IMAGE_DIMENSION", 8192)) # 8192px
|
||||
MAX_IMAGE_DIMENSION = int(os.getenv("MAX_IMAGE_DIMENSION", 4096)) # 4096px (reduced from 8192)
|
||||
MIN_IMAGE_DIMENSION = int(os.getenv("MIN_IMAGE_DIMENSION", 32)) # 32px
|
||||
TARGET_MAX_DIMENSION = int(os.getenv("TARGET_MAX_DIMENSION", 2048)) # Downscale large images
|
||||
|
||||
# HTTP client settings
|
||||
DOWNLOAD_TIMEOUT = float(os.getenv("DOWNLOAD_TIMEOUT", 15.0)) # 15 seconds
|
||||
MAX_RETRIES = int(os.getenv("MAX_RETRIES", 3))
|
||||
|
||||
# HTTP connection pool settings
|
||||
HTTP_POOL_MAX_CONNECTIONS = int(os.getenv("HTTP_POOL_MAX_CONNECTIONS", 100))
|
||||
HTTP_POOL_MAX_KEEPALIVE = int(os.getenv("HTTP_POOL_MAX_KEEPALIVE", 20))
|
||||
|
||||
# Thread pool for blocking operations (GPU inference, image decode)
|
||||
INFERENCE_THREADS = int(os.getenv("INFERENCE_THREADS", 4))
|
||||
|
||||
# TODO [PROD]: Add URL allowlist for SSRF protection
|
||||
# ALLOWED_URL_PATTERNS = os.getenv("ALLOWED_URL_PATTERNS", "").split(",")
|
||||
|
||||
|
|
|
|||
174
app/face.py
174
app/face.py
|
|
@ -1,33 +1,167 @@
|
|||
"""Face analysis and embedding logic using InsightFace."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import numpy as np
|
||||
from insightface.app import FaceAnalysis
|
||||
|
||||
from app.config import DET_SIZE, MODEL_NAME
|
||||
from app.config import DET_SIZE, FALLBACK_DET_SIZE, MODEL_NAME, USE_TENSORRT
|
||||
from app.models import BBox
|
||||
|
||||
logger = logging.getLogger("face_service")
|
||||
|
||||
face_app: FaceAnalysis | None = None
|
||||
# Two face apps for different image types (lazy loaded)
|
||||
face_app_large: FaceAnalysis | None = None # det_size=1024 for group photos
|
||||
face_app_small: FaceAnalysis | None = None # det_size=640 for selfies/portraits
|
||||
|
||||
|
||||
def load_face_app() -> FaceAnalysis:
|
||||
"""Load and initialize the FaceAnalysis model (singleton)."""
|
||||
global face_app
|
||||
if face_app is not None:
|
||||
return face_app
|
||||
def _check_tensorrt_available() -> bool:
|
||||
"""Check if TensorRT libraries are actually installed."""
|
||||
try:
|
||||
import tensorrt
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
logger.info(f"Loading InsightFace model pack={MODEL_NAME}, det_size={DET_SIZE}")
|
||||
|
||||
def _get_providers() -> list:
|
||||
"""Get ONNX Runtime execution providers based on configuration."""
|
||||
import onnxruntime as ort
|
||||
|
||||
available_providers = ort.get_available_providers()
|
||||
logger.info(f"Available ONNX providers: {available_providers}")
|
||||
|
||||
providers = []
|
||||
|
||||
# Try TensorRT first if enabled and libraries are actually installed
|
||||
if USE_TENSORRT and "TensorrtExecutionProvider" in available_providers:
|
||||
if _check_tensorrt_available():
|
||||
providers.append(
|
||||
(
|
||||
"TensorrtExecutionProvider",
|
||||
{
|
||||
"trt_max_workspace_size": 2 * 1024 * 1024 * 1024, # 2GB
|
||||
"trt_fp16_enable": True, # FP16 for faster inference
|
||||
"trt_engine_cache_enable": True, # Cache TensorRT engines
|
||||
},
|
||||
)
|
||||
)
|
||||
else:
|
||||
logger.warning("TensorRT requested but libnvinfer.so.10 not found, skipping")
|
||||
|
||||
# CUDA fallback
|
||||
if "CUDAExecutionProvider" in available_providers:
|
||||
providers.append("CUDAExecutionProvider")
|
||||
|
||||
# CPU fallback (always available)
|
||||
providers.append("CPUExecutionProvider")
|
||||
|
||||
logger.info(f"Using providers: {[p[0] if isinstance(p, tuple) else p for p in providers]}")
|
||||
return providers
|
||||
|
||||
|
||||
def load_face_app(det_size: int = DET_SIZE) -> FaceAnalysis:
|
||||
"""Load and initialize the FaceAnalysis model for given det_size (cached)."""
|
||||
global face_app_large, face_app_small
|
||||
|
||||
# Return cached instance if available
|
||||
if det_size >= 1024 and face_app_large is not None:
|
||||
return face_app_large
|
||||
if det_size < 1024 and face_app_small is not None:
|
||||
return face_app_small
|
||||
|
||||
providers = _get_providers()
|
||||
logger.info(
|
||||
f"Loading InsightFace model pack={MODEL_NAME}, det_size={det_size}, "
|
||||
f"tensorrt={USE_TENSORRT}"
|
||||
)
|
||||
fa = FaceAnalysis(
|
||||
name=MODEL_NAME,
|
||||
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
|
||||
providers=providers,
|
||||
)
|
||||
fa.prepare(ctx_id=0, det_size=(DET_SIZE, DET_SIZE))
|
||||
face_app = fa
|
||||
logger.info("FaceAnalysis initialized")
|
||||
return face_app
|
||||
fa.prepare(ctx_id=0, det_size=(det_size, det_size))
|
||||
|
||||
# Cache the instance
|
||||
if det_size >= 1024:
|
||||
face_app_large = fa
|
||||
else:
|
||||
face_app_small = fa
|
||||
|
||||
logger.info(f"FaceAnalysis initialized (det_size={det_size})")
|
||||
return fa
|
||||
|
||||
|
||||
def get_optimal_det_size(img_height: int, img_width: int) -> int:
|
||||
"""
|
||||
Choose optimal det_size based on image characteristics.
|
||||
|
||||
- Portrait/selfie (tall, narrow) → 640 (face likely fills frame)
|
||||
- Landscape/group photo → 1024 (need to detect small faces)
|
||||
- Small images → 640 (no benefit from larger det_size)
|
||||
"""
|
||||
max_dim = max(img_height, img_width)
|
||||
min_dim = min(img_height, img_width)
|
||||
aspect_ratio = max_dim / min_dim if min_dim > 0 else 1.0
|
||||
|
||||
# Small images - 640 is sufficient
|
||||
if max_dim <= 1024:
|
||||
return FALLBACK_DET_SIZE # 640
|
||||
|
||||
# Portrait orientation (height > width) with tall aspect ratio
|
||||
# Likely a selfie or single-person portrait
|
||||
if img_height > img_width and aspect_ratio >= 1.3:
|
||||
return FALLBACK_DET_SIZE # 640
|
||||
|
||||
# Landscape or square, larger image - likely group photo
|
||||
return DET_SIZE # 1024
|
||||
|
||||
|
||||
class FaceServiceError(Exception):
|
||||
"""Error during face detection/embedding."""
|
||||
pass
|
||||
|
||||
|
||||
async def get_faces_async(
|
||||
img: np.ndarray,
|
||||
executor: ThreadPoolExecutor,
|
||||
) -> list:
|
||||
"""
|
||||
Run face detection/embedding in thread pool to not block event loop.
|
||||
|
||||
Automatically selects optimal det_size based on image dimensions:
|
||||
- Portrait/selfie → 640 (handles large faces)
|
||||
- Landscape/group → 1024 (detects small faces)
|
||||
|
||||
Raises:
|
||||
FaceServiceError: If face detection fails (GPU OOM, ONNX errors, etc.)
|
||||
"""
|
||||
h, w = img.shape[:2]
|
||||
det_size = get_optimal_det_size(h, w)
|
||||
fa = load_face_app(det_size)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
try:
|
||||
faces = await loop.run_in_executor(executor, fa.get, img)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"get_faces_async: face detection failed det_size=%d image=%dx%d error=%s",
|
||||
det_size, w, h, str(e)
|
||||
)
|
||||
raise FaceServiceError(f"Face detection failed: {str(e)}") from e
|
||||
|
||||
logger.debug(
|
||||
"get_faces_async: det_size=%d, image=%dx%d, faces=%d",
|
||||
det_size, w, h, len(faces)
|
||||
)
|
||||
|
||||
return faces
|
||||
|
||||
|
||||
def face_area(face) -> float:
|
||||
"""Calculate face bounding box area for sorting."""
|
||||
return (face.bbox[2] - face.bbox[0]) * (face.bbox[3] - face.bbox[1])
|
||||
|
||||
|
||||
def to_pixel_bbox(bbox, width: int, height: int) -> BBox:
|
||||
|
|
@ -52,11 +186,19 @@ def validate_embedding(embedding: np.ndarray) -> bool:
|
|||
|
||||
|
||||
def normalize_embedding(embedding: np.ndarray) -> np.ndarray:
|
||||
"""Normalize embedding vector to unit length."""
|
||||
"""Normalize embedding vector to unit length.
|
||||
|
||||
Returns None-equivalent behavior if embedding is near-zero vector,
|
||||
which would cause NaN in similarity calculations.
|
||||
"""
|
||||
emb = embedding.astype(np.float32)
|
||||
norm = float(np.linalg.norm(emb))
|
||||
if norm > 0.0:
|
||||
emb = emb / norm
|
||||
# Near-zero vectors can't be normalized meaningfully
|
||||
# and would cause issues in similarity calculations
|
||||
if norm < 0.01:
|
||||
logger.warning("normalize_embedding: near-zero vector detected (norm=%.6f)", norm)
|
||||
return emb # Return as-is, validate_embedding will catch it
|
||||
emb = emb / norm
|
||||
return emb
|
||||
|
||||
|
||||
|
|
|
|||
73
app/image.py
73
app/image.py
|
|
@ -1,6 +1,8 @@
|
|||
"""Image download, decoding, and validation utilities."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from io import BytesIO
|
||||
|
||||
import cv2
|
||||
|
|
@ -21,6 +23,7 @@ from app.config import (
|
|||
MAX_IMAGE_DIMENSION,
|
||||
MAX_RETRIES,
|
||||
MIN_IMAGE_DIMENSION,
|
||||
TARGET_MAX_DIMENSION,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("face_service")
|
||||
|
|
@ -114,6 +117,9 @@ def _decode_image_bytes(data: bytes, source: str) -> np.ndarray:
|
|||
img = np.array(pil_image)
|
||||
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
||||
|
||||
# Downscale large images for faster processing
|
||||
img = _maybe_downscale(img)
|
||||
|
||||
logger.info(
|
||||
"decode_image_bytes: source=%s shape=%s mode=%s",
|
||||
source,
|
||||
|
|
@ -123,6 +129,25 @@ def _decode_image_bytes(data: bytes, source: str) -> np.ndarray:
|
|||
return img
|
||||
|
||||
|
||||
def _maybe_downscale(img: np.ndarray, max_dim: int = TARGET_MAX_DIMENSION) -> np.ndarray:
|
||||
"""Downscale image if larger than max_dim while preserving aspect ratio."""
|
||||
h, w = img.shape[:2]
|
||||
if max(h, w) <= max_dim:
|
||||
return img
|
||||
|
||||
scale = max_dim / max(h, w)
|
||||
new_w = int(w * scale)
|
||||
new_h = int(h * scale)
|
||||
|
||||
logger.info(
|
||||
"downscaling image from %dx%d to %dx%d (scale=%.2f)",
|
||||
w, h, new_w, new_h, scale,
|
||||
)
|
||||
|
||||
# Use INTER_AREA for downscaling (best quality)
|
||||
return cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
|
||||
|
||||
|
||||
@retry(
|
||||
retry=retry_if_exception_type((httpx.TimeoutException, httpx.NetworkError)),
|
||||
stop=stop_after_attempt(MAX_RETRIES),
|
||||
|
|
@ -158,19 +183,46 @@ async def _download_with_retry(client: httpx.AsyncClient, url: str) -> bytes:
|
|||
return content
|
||||
|
||||
|
||||
async def download_image(image_url: str) -> np.ndarray:
|
||||
async def download_image(
|
||||
image_url: str,
|
||||
client: httpx.AsyncClient | None = None,
|
||||
executor: ThreadPoolExecutor | None = None,
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Download and decode an image from URL.
|
||||
|
||||
|
||||
Features:
|
||||
- Async HTTP with connection pooling
|
||||
- Async HTTP with connection pooling (uses shared client if provided)
|
||||
- Retry with exponential backoff for transient failures
|
||||
- Size validation before and after download
|
||||
- Comprehensive image decoding
|
||||
- Async image decoding in thread pool
|
||||
|
||||
Args:
|
||||
image_url: URL to download image from
|
||||
client: Shared httpx client (falls back to creating new one if None)
|
||||
executor: Thread pool for blocking decode (runs sync if None)
|
||||
"""
|
||||
# Use shared client or create temporary one
|
||||
if client is None:
|
||||
from app.resources import http_client
|
||||
client = http_client
|
||||
|
||||
# Fallback to temporary client if still None (e.g., during tests)
|
||||
if client is None:
|
||||
async with httpx.AsyncClient(timeout=DOWNLOAD_TIMEOUT) as temp_client:
|
||||
return await _download_and_decode(temp_client, image_url, executor)
|
||||
|
||||
return await _download_and_decode(client, image_url, executor)
|
||||
|
||||
|
||||
async def _download_and_decode(
|
||||
client: httpx.AsyncClient,
|
||||
image_url: str,
|
||||
executor: ThreadPoolExecutor | None,
|
||||
) -> np.ndarray:
|
||||
"""Internal helper to download and decode image."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=DOWNLOAD_TIMEOUT) as client:
|
||||
data = await _download_with_retry(client, image_url)
|
||||
data = await _download_with_retry(client, image_url)
|
||||
except httpx.TimeoutException:
|
||||
logger.exception("Timeout downloading image")
|
||||
raise HTTPException(status_code=408, detail="Timeout downloading image")
|
||||
|
|
@ -186,8 +238,15 @@ async def download_image(image_url: str) -> np.ndarray:
|
|||
logger.exception("Failed to download image")
|
||||
raise HTTPException(status_code=400, detail=f"Failed to download image: {e}")
|
||||
|
||||
# Decode in thread pool to avoid blocking event loop
|
||||
try:
|
||||
img = _decode_image_bytes(data, image_url)
|
||||
if executor is not None:
|
||||
loop = asyncio.get_running_loop()
|
||||
img = await loop.run_in_executor(
|
||||
executor, _decode_image_bytes, data, image_url
|
||||
)
|
||||
else:
|
||||
img = _decode_image_bytes(data, image_url)
|
||||
except (ImageDecodeError, ImageValidationError) as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
|
|
|
|||
49
app/main.py
49
app/main.py
|
|
@ -1,13 +1,24 @@
|
|||
"""FastAPI application entry point."""
|
||||
|
||||
import logging
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import httpx
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app.config import DET_SIZE, MODEL_NAME
|
||||
import app.resources as resources
|
||||
from app.config import (
|
||||
DET_SIZE,
|
||||
DOWNLOAD_TIMEOUT,
|
||||
HTTP_POOL_MAX_CONNECTIONS,
|
||||
HTTP_POOL_MAX_KEEPALIVE,
|
||||
INFERENCE_THREADS,
|
||||
MODEL_NAME,
|
||||
USE_TENSORRT,
|
||||
)
|
||||
from app.face import load_face_app
|
||||
from app.routes import embed
|
||||
from app.routes import benchmark, embed
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("face_service")
|
||||
|
|
@ -15,12 +26,42 @@ logger = logging.getLogger("face_service")
|
|||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Application lifespan handler - load models on startup."""
|
||||
"""Application lifespan handler - load models and shared resources on startup."""
|
||||
logger.info("Starting face service...")
|
||||
|
||||
# Create HTTP client with connection pooling
|
||||
limits = httpx.Limits(
|
||||
max_connections=HTTP_POOL_MAX_CONNECTIONS,
|
||||
max_keepalive_connections=HTTP_POOL_MAX_KEEPALIVE,
|
||||
)
|
||||
resources.http_client = httpx.AsyncClient(
|
||||
timeout=DOWNLOAD_TIMEOUT,
|
||||
limits=limits,
|
||||
# http2=True requires 'h2' package - disable for now
|
||||
)
|
||||
logger.info(
|
||||
f"HTTP client initialized (max_conn={HTTP_POOL_MAX_CONNECTIONS}, "
|
||||
f"keepalive={HTTP_POOL_MAX_KEEPALIVE})"
|
||||
)
|
||||
|
||||
# Create thread pool for blocking operations (GPU inference, image decode)
|
||||
resources.inference_executor = ThreadPoolExecutor(
|
||||
max_workers=INFERENCE_THREADS,
|
||||
thread_name_prefix="inference",
|
||||
)
|
||||
logger.info(f"Thread pool initialized (workers={INFERENCE_THREADS})")
|
||||
|
||||
# Load face model (may take time if TensorRT engines need building)
|
||||
load_face_app()
|
||||
|
||||
logger.info("Face service ready")
|
||||
yield
|
||||
|
||||
# Cleanup
|
||||
logger.info("Shutting down face service...")
|
||||
await resources.http_client.aclose()
|
||||
resources.inference_executor.shutdown(wait=True)
|
||||
logger.info("Cleanup complete")
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
|
|
@ -31,6 +72,7 @@ app = FastAPI(
|
|||
|
||||
# Include routers
|
||||
app.include_router(embed.router)
|
||||
app.include_router(benchmark.router)
|
||||
|
||||
|
||||
@app.get("/healthz")
|
||||
|
|
@ -40,5 +82,6 @@ def healthz():
|
|||
"status": "ok",
|
||||
"model": MODEL_NAME,
|
||||
"det_size": DET_SIZE,
|
||||
"tensorrt": USE_TENSORRT,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
"""Pydantic models for request/response schemas."""
|
||||
|
||||
from typing import List
|
||||
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
|
||||
|
||||
|
|
@ -19,17 +17,21 @@ class BBox(BaseModel):
|
|||
class FaceEmbedding(BaseModel):
|
||||
bbox: BBox
|
||||
score: float
|
||||
embedding: List[float]
|
||||
embedding: list[float]
|
||||
|
||||
|
||||
class EmbedAvatarResponse(BaseModel):
|
||||
embedding: List[float]
|
||||
embedding: list[float]
|
||||
bbox: BBox
|
||||
score: float
|
||||
processed_width: int | None = None
|
||||
processed_height: int | None = None
|
||||
|
||||
|
||||
class EmbedImageResponse(BaseModel):
|
||||
faces: List[FaceEmbedding]
|
||||
faces: list[FaceEmbedding]
|
||||
processed_width: int | None = None
|
||||
processed_height: int | None = None
|
||||
|
||||
|
||||
class MatchResult(BaseModel):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,9 @@
|
|||
"""Shared application resources (HTTP client, thread pool, etc.)."""
|
||||
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import httpx
|
||||
|
||||
# Global shared resources (initialized in app lifespan)
|
||||
http_client: httpx.AsyncClient | None = None
|
||||
inference_executor: ThreadPoolExecutor | None = None
|
||||
|
|
@ -0,0 +1,516 @@
|
|||
"""Benchmark UI for face comparison."""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import logging
|
||||
from io import BytesIO
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from fastapi import APIRouter, File, UploadFile
|
||||
from fastapi.responses import HTMLResponse
|
||||
|
||||
from app.face import face_area, get_faces_async, validate_embedding
|
||||
from app.image import read_upload_image
|
||||
from app.resources import inference_executor
|
||||
|
||||
logger = logging.getLogger("face_service")
|
||||
|
||||
router = APIRouter(prefix="/benchmark", tags=["benchmark"])
|
||||
|
||||
|
||||
def cosine_similarity(emb1: np.ndarray, emb2: np.ndarray) -> float:
|
||||
"""Compute cosine similarity between two embeddings."""
|
||||
# Embeddings are already normalized, so dot product = cosine similarity
|
||||
return float(np.dot(emb1, emb2))
|
||||
|
||||
|
||||
def draw_faces_on_image(img: np.ndarray, faces: list, face_indices: list[int]) -> np.ndarray:
|
||||
"""Draw bounding boxes and indices on image."""
|
||||
img_copy = img.copy()
|
||||
for idx, face in zip(face_indices, faces):
|
||||
bbox = face.bbox.astype(int)
|
||||
x1, y1, x2, y2 = bbox
|
||||
# Draw rectangle
|
||||
cv2.rectangle(img_copy, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
# Draw index label
|
||||
label = f"#{idx}"
|
||||
(tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)
|
||||
cv2.rectangle(img_copy, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(img_copy, label, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
|
||||
return img_copy
|
||||
|
||||
|
||||
def encode_image_to_base64(img: np.ndarray, max_dim: int = 800) -> str:
|
||||
"""Encode image to base64 for display in HTML, resizing if needed."""
|
||||
h, w = img.shape[:2]
|
||||
if max(h, w) > max_dim:
|
||||
scale = max_dim / max(h, w)
|
||||
img = cv2.resize(img, (int(w * scale), int(h * scale)))
|
||||
|
||||
# Convert BGR to RGB for proper display
|
||||
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
from PIL import Image
|
||||
pil_img = Image.fromarray(img_rgb)
|
||||
buffer = BytesIO()
|
||||
pil_img.save(buffer, format="JPEG", quality=85)
|
||||
return base64.b64encode(buffer.getvalue()).decode()
|
||||
|
||||
|
||||
def crop_face(img: np.ndarray, bbox: np.ndarray, padding: float = 0.2) -> np.ndarray:
|
||||
"""Crop face from image with padding."""
|
||||
h, w = img.shape[:2]
|
||||
x1, y1, x2, y2 = bbox.astype(int)
|
||||
|
||||
# Add padding
|
||||
face_w = x2 - x1
|
||||
face_h = y2 - y1
|
||||
pad_x = int(face_w * padding)
|
||||
pad_y = int(face_h * padding)
|
||||
|
||||
x1 = max(0, x1 - pad_x)
|
||||
y1 = max(0, y1 - pad_y)
|
||||
x2 = min(w, x2 + pad_x)
|
||||
y2 = min(h, y2 + pad_y)
|
||||
|
||||
return img[y1:y2, x1:x2]
|
||||
|
||||
|
||||
@router.get("/", response_class=HTMLResponse)
|
||||
async def benchmark_ui():
|
||||
"""Serve the benchmark UI."""
|
||||
return """
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Face Benchmark</title>
|
||||
<style>
|
||||
* { box-sizing: border-box; }
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
max-width: 1400px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
background: #1a1a2e;
|
||||
color: #eee;
|
||||
}
|
||||
h1 { text-align: center; color: #00d4ff; }
|
||||
.upload-section {
|
||||
display: flex;
|
||||
gap: 20px;
|
||||
margin-bottom: 20px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.upload-box {
|
||||
flex: 1;
|
||||
min-width: 300px;
|
||||
border: 2px dashed #444;
|
||||
border-radius: 10px;
|
||||
padding: 20px;
|
||||
text-align: center;
|
||||
background: #16213e;
|
||||
}
|
||||
.upload-box h3 { margin-top: 0; color: #00d4ff; }
|
||||
.upload-box input[type="file"] {
|
||||
display: block;
|
||||
margin: 10px auto;
|
||||
}
|
||||
.preview {
|
||||
max-width: 100%;
|
||||
max-height: 300px;
|
||||
margin-top: 10px;
|
||||
border-radius: 5px;
|
||||
}
|
||||
button {
|
||||
display: block;
|
||||
width: 100%;
|
||||
max-width: 300px;
|
||||
margin: 20px auto;
|
||||
padding: 15px 30px;
|
||||
font-size: 18px;
|
||||
background: #00d4ff;
|
||||
color: #1a1a2e;
|
||||
border: none;
|
||||
border-radius: 5px;
|
||||
cursor: pointer;
|
||||
font-weight: bold;
|
||||
}
|
||||
button:hover { background: #00b8e6; }
|
||||
button:disabled { background: #444; cursor: not-allowed; }
|
||||
#results {
|
||||
margin-top: 30px;
|
||||
}
|
||||
.results-grid {
|
||||
display: flex;
|
||||
gap: 20px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.result-box {
|
||||
flex: 1;
|
||||
min-width: 300px;
|
||||
background: #16213e;
|
||||
border-radius: 10px;
|
||||
padding: 20px;
|
||||
}
|
||||
.result-box h3 { margin-top: 0; color: #00d4ff; }
|
||||
.result-box img {
|
||||
max-width: 100%;
|
||||
border-radius: 5px;
|
||||
}
|
||||
.similarity-table {
|
||||
width: 100%;
|
||||
margin-top: 20px;
|
||||
background: #16213e;
|
||||
border-radius: 10px;
|
||||
padding: 20px;
|
||||
}
|
||||
.similarity-table h3 { margin-top: 0; color: #00d4ff; }
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-top: 10px;
|
||||
}
|
||||
th, td {
|
||||
padding: 10px;
|
||||
text-align: center;
|
||||
border: 1px solid #333;
|
||||
}
|
||||
th { background: #0f3460; }
|
||||
.match-high { background: #00c853; color: #000; font-weight: bold; }
|
||||
.match-medium { background: #ffab00; color: #000; }
|
||||
.match-low { background: #333; }
|
||||
.loading {
|
||||
text-align: center;
|
||||
padding: 40px;
|
||||
font-size: 18px;
|
||||
}
|
||||
.spinner {
|
||||
border: 4px solid #333;
|
||||
border-top: 4px solid #00d4ff;
|
||||
border-radius: 50%;
|
||||
width: 40px;
|
||||
height: 40px;
|
||||
animation: spin 1s linear infinite;
|
||||
margin: 20px auto;
|
||||
}
|
||||
@keyframes spin {
|
||||
0% { transform: rotate(0deg); }
|
||||
100% { transform: rotate(360deg); }
|
||||
}
|
||||
.error { color: #ff5252; text-align: center; padding: 20px; }
|
||||
.stats { margin-top: 10px; font-size: 14px; color: #888; }
|
||||
.best-match {
|
||||
background: #16213e;
|
||||
border-radius: 10px;
|
||||
padding: 20px;
|
||||
margin-top: 20px;
|
||||
}
|
||||
.best-match h3 { color: #00d4ff; margin-top: 0; }
|
||||
.match-item {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 15px;
|
||||
padding: 15px;
|
||||
background: #0f3460;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.match-score {
|
||||
font-size: 28px;
|
||||
font-weight: bold;
|
||||
min-width: 90px;
|
||||
text-align: center;
|
||||
}
|
||||
.face-crop {
|
||||
width: 100px;
|
||||
height: 100px;
|
||||
object-fit: cover;
|
||||
border-radius: 8px;
|
||||
border: 2px solid #00d4ff;
|
||||
}
|
||||
.match-label {
|
||||
font-size: 14px;
|
||||
color: #888;
|
||||
margin-left: auto;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Face Comparison Benchmark</h1>
|
||||
|
||||
<form id="uploadForm">
|
||||
<div class="upload-section">
|
||||
<div class="upload-box">
|
||||
<h3>Image 1 (Source)</h3>
|
||||
<p>Upload an image to find faces from</p>
|
||||
<input type="file" id="image1" name="image1" accept="image/*" required>
|
||||
<img id="preview1" class="preview" style="display:none;">
|
||||
</div>
|
||||
<div class="upload-box">
|
||||
<h3>Image 2 (Target)</h3>
|
||||
<p>Upload an image to search for matching faces</p>
|
||||
<input type="file" id="image2" name="image2" accept="image/*" required>
|
||||
<img id="preview2" class="preview" style="display:none;">
|
||||
</div>
|
||||
</div>
|
||||
<button type="submit" id="compareBtn">Compare Faces</button>
|
||||
</form>
|
||||
|
||||
<div id="results"></div>
|
||||
|
||||
<script>
|
||||
// Preview images on selection
|
||||
document.getElementById('image1').addEventListener('change', function(e) {
|
||||
previewImage(e.target, 'preview1');
|
||||
});
|
||||
document.getElementById('image2').addEventListener('change', function(e) {
|
||||
previewImage(e.target, 'preview2');
|
||||
});
|
||||
|
||||
function previewImage(input, previewId) {
|
||||
const preview = document.getElementById(previewId);
|
||||
if (input.files && input.files[0]) {
|
||||
const reader = new FileReader();
|
||||
reader.onload = function(e) {
|
||||
preview.src = e.target.result;
|
||||
preview.style.display = 'block';
|
||||
};
|
||||
reader.readAsDataURL(input.files[0]);
|
||||
}
|
||||
}
|
||||
|
||||
document.getElementById('uploadForm').addEventListener('submit', async function(e) {
|
||||
e.preventDefault();
|
||||
|
||||
const image1 = document.getElementById('image1').files[0];
|
||||
const image2 = document.getElementById('image2').files[0];
|
||||
|
||||
if (!image1 || !image2) {
|
||||
alert('Please select both images');
|
||||
return;
|
||||
}
|
||||
|
||||
const resultsDiv = document.getElementById('results');
|
||||
const btn = document.getElementById('compareBtn');
|
||||
|
||||
btn.disabled = true;
|
||||
btn.textContent = 'Processing...';
|
||||
resultsDiv.innerHTML = '<div class="loading"><div class="spinner"></div>Detecting faces and computing embeddings...</div>';
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append('image1', image1);
|
||||
formData.append('image2', image2);
|
||||
|
||||
try {
|
||||
const startTime = performance.now();
|
||||
const response = await fetch('/benchmark/compare', {
|
||||
method: 'POST',
|
||||
body: formData
|
||||
});
|
||||
const endTime = performance.now();
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw new Error(error.detail || 'Comparison failed');
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
displayResults(data, endTime - startTime);
|
||||
} catch (error) {
|
||||
resultsDiv.innerHTML = `<div class="error">Error: ${error.message}</div>`;
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
btn.textContent = 'Compare Faces';
|
||||
}
|
||||
});
|
||||
|
||||
function displayResults(data, processingTime) {
|
||||
const resultsDiv = document.getElementById('results');
|
||||
|
||||
let html = `
|
||||
<div class="stats">Processing time: ${(processingTime/1000).toFixed(2)}s |
|
||||
Image 1: ${data.image1_faces} face(s) | Image 2: ${data.image2_faces} face(s)</div>
|
||||
|
||||
<div class="results-grid">
|
||||
<div class="result-box">
|
||||
<h3>Image 1 - ${data.image1_faces} face(s) detected</h3>
|
||||
<img src="data:image/jpeg;base64,${data.image1_annotated}" alt="Image 1">
|
||||
</div>
|
||||
<div class="result-box">
|
||||
<h3>Image 2 - ${data.image2_faces} face(s) detected</h3>
|
||||
<img src="data:image/jpeg;base64,${data.image2_annotated}" alt="Image 2">
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
|
||||
if (data.similarities.length > 0) {
|
||||
html += `
|
||||
<div class="best-match">
|
||||
<h3>Best Matches (Image 1 faces found in Image 2)</h3>
|
||||
`;
|
||||
|
||||
for (const match of data.best_matches) {
|
||||
const scorePercent = (match.similarity * 100).toFixed(1);
|
||||
const scoreClass = match.similarity >= 0.5 ? 'match-high' :
|
||||
match.similarity >= 0.3 ? 'match-medium' : 'match-low';
|
||||
html += `
|
||||
<div class="match-item">
|
||||
<img src="data:image/jpeg;base64,${match.face1_crop}" class="face-crop" alt="Face ${match.face1_idx}">
|
||||
<span class="match-score ${scoreClass}">${scorePercent}%</span>
|
||||
<img src="data:image/jpeg;base64,${match.face2_crop}" class="face-crop" alt="Face ${match.face2_idx}">
|
||||
<span class="match-label">Face #${match.face1_idx} ↔ Face #${match.face2_idx}</span>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
html += '</div>';
|
||||
|
||||
// Similarity matrix
|
||||
html += `
|
||||
<div class="similarity-table">
|
||||
<h3>Full Similarity Matrix</h3>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Image 1 \\ Image 2</th>
|
||||
`;
|
||||
|
||||
for (let j = 0; j < data.image2_faces; j++) {
|
||||
html += `<th>Face #${j}</th>`;
|
||||
}
|
||||
html += '</tr>';
|
||||
|
||||
for (let i = 0; i < data.image1_faces; i++) {
|
||||
html += `<tr><th>Face #${i}</th>`;
|
||||
for (let j = 0; j < data.image2_faces; j++) {
|
||||
const sim = data.similarity_matrix[i][j];
|
||||
const percent = (sim * 100).toFixed(1);
|
||||
const cls = sim >= 0.5 ? 'match-high' : sim >= 0.3 ? 'match-medium' : 'match-low';
|
||||
html += `<td class="${cls}">${percent}%</td>`;
|
||||
}
|
||||
html += '</tr>';
|
||||
}
|
||||
|
||||
html += '</table></div>';
|
||||
} else if (data.image1_faces === 0 || data.image2_faces === 0) {
|
||||
html += '<div class="error">No faces detected in one or both images</div>';
|
||||
}
|
||||
|
||||
resultsDiv.innerHTML = html;
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
@router.post("/compare")
|
||||
async def compare_faces(
|
||||
image1: UploadFile = File(...),
|
||||
image2: UploadFile = File(...),
|
||||
):
|
||||
"""
|
||||
Compare faces between two uploaded images.
|
||||
|
||||
Returns:
|
||||
- Annotated images with face bounding boxes
|
||||
- Similarity matrix between all detected faces
|
||||
- Best matches for each face in image1
|
||||
"""
|
||||
logger.info("benchmark/compare: image1=%s image2=%s", image1.filename, image2.filename)
|
||||
|
||||
# Read both images
|
||||
data1 = await image1.read()
|
||||
data2 = await image2.read()
|
||||
|
||||
img1 = read_upload_image(data1, image1.filename or "image1")
|
||||
img2 = read_upload_image(data2, image2.filename or "image2")
|
||||
|
||||
# Detect faces in both images concurrently
|
||||
faces1, faces2 = await asyncio.gather(
|
||||
get_faces_async(img1, inference_executor),
|
||||
get_faces_async(img2, inference_executor),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"benchmark/compare: detected %d faces in image1, %d faces in image2",
|
||||
len(faces1), len(faces2)
|
||||
)
|
||||
|
||||
# Sort faces by area (largest first)
|
||||
faces1.sort(key=face_area, reverse=True)
|
||||
faces2.sort(key=face_area, reverse=True)
|
||||
|
||||
# Draw faces on images
|
||||
face1_indices = list(range(len(faces1)))
|
||||
face2_indices = list(range(len(faces2)))
|
||||
|
||||
img1_annotated = draw_faces_on_image(img1, faces1, face1_indices)
|
||||
img2_annotated = draw_faces_on_image(img2, faces2, face2_indices)
|
||||
|
||||
# Encode images for response
|
||||
img1_b64 = encode_image_to_base64(img1_annotated)
|
||||
img2_b64 = encode_image_to_base64(img2_annotated)
|
||||
|
||||
# Compute similarity matrix
|
||||
similarity_matrix: list[list[float]] = []
|
||||
best_matches: list[dict] = []
|
||||
|
||||
for i, f1 in enumerate(faces1):
|
||||
emb1 = f1.normed_embedding.astype(np.float32)
|
||||
if not validate_embedding(emb1):
|
||||
similarity_matrix.append([0.0] * len(faces2))
|
||||
continue
|
||||
|
||||
row = []
|
||||
best_sim = -1.0
|
||||
best_j = -1
|
||||
|
||||
for j, f2 in enumerate(faces2):
|
||||
emb2 = f2.normed_embedding.astype(np.float32)
|
||||
if not validate_embedding(emb2):
|
||||
row.append(0.0)
|
||||
continue
|
||||
|
||||
sim = cosine_similarity(emb1, emb2)
|
||||
row.append(sim)
|
||||
|
||||
if sim > best_sim:
|
||||
best_sim = sim
|
||||
best_j = j
|
||||
|
||||
similarity_matrix.append(row)
|
||||
|
||||
if best_j >= 0:
|
||||
best_matches.append({
|
||||
"face1_idx": i,
|
||||
"face2_idx": best_j,
|
||||
"similarity": best_sim,
|
||||
})
|
||||
|
||||
# Sort best matches by similarity and keep top 3
|
||||
best_matches.sort(key=lambda m: m["similarity"], reverse=True)
|
||||
best_matches = best_matches[:3]
|
||||
|
||||
# Add cropped face images for top 3 matches
|
||||
for match in best_matches:
|
||||
i, j = match["face1_idx"], match["face2_idx"]
|
||||
crop1 = crop_face(img1, faces1[i].bbox)
|
||||
crop2 = crop_face(img2, faces2[j].bbox)
|
||||
match["face1_crop"] = encode_image_to_base64(crop1, max_dim=150)
|
||||
match["face2_crop"] = encode_image_to_base64(crop2, max_dim=150)
|
||||
|
||||
return {
|
||||
"image1_faces": len(faces1),
|
||||
"image2_faces": len(faces2),
|
||||
"image1_annotated": img1_b64,
|
||||
"image2_annotated": img2_b64,
|
||||
"similarity_matrix": similarity_matrix,
|
||||
"similarities": [
|
||||
{"face1": i, "face2": j, "score": similarity_matrix[i][j]}
|
||||
for i in range(len(faces1))
|
||||
for j in range(len(faces2))
|
||||
],
|
||||
"best_matches": best_matches,
|
||||
}
|
||||
|
|
@ -1,45 +1,65 @@
|
|||
"""Face embedding endpoints."""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from app.face import (
|
||||
FaceServiceError,
|
||||
face_area,
|
||||
fallback_avatar_embedding,
|
||||
get_faces_async,
|
||||
load_face_app,
|
||||
to_pixel_bbox,
|
||||
validate_embedding,
|
||||
)
|
||||
from app.image import download_image
|
||||
from app.models import (
|
||||
BBox,
|
||||
EmbedAvatarResponse,
|
||||
EmbedImageResponse,
|
||||
EmbedRequest,
|
||||
FaceEmbedding,
|
||||
)
|
||||
from app.resources import http_client, inference_executor
|
||||
|
||||
# Expected embedding dimension from buffalo_l model
|
||||
EXPECTED_EMBEDDING_DIM = 512
|
||||
|
||||
logger = logging.getLogger("face_service")
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def validate_face_embedding(emb: np.ndarray, context: str) -> tuple[bool, str | None]:
|
||||
"""
|
||||
Validate embedding dimension and values.
|
||||
Returns (is_valid, error_message).
|
||||
"""
|
||||
if len(emb) != EXPECTED_EMBEDDING_DIM:
|
||||
return False, f"{context}: unexpected embedding dimension {len(emb)}, expected {EXPECTED_EMBEDDING_DIM}"
|
||||
if not validate_embedding(emb):
|
||||
return False, f"{context}: embedding contains NaN/Inf values"
|
||||
return True, None
|
||||
|
||||
|
||||
@router.post("/embed-avatar", response_model=EmbedAvatarResponse)
|
||||
async def embed_avatar(req: EmbedRequest):
|
||||
"""
|
||||
Extract face embedding from an avatar image.
|
||||
|
||||
|
||||
Returns the largest detected face. If no face is detected,
|
||||
falls back to center crop embedding with score=0.0.
|
||||
"""
|
||||
logger.info("embed_avatar: image_url=%s", req.image_url)
|
||||
fa = load_face_app()
|
||||
img = await download_image(str(req.image_url))
|
||||
img = await download_image(str(req.image_url), http_client, inference_executor)
|
||||
h, w = img.shape[:2]
|
||||
|
||||
faces = fa.get(img)
|
||||
try:
|
||||
faces = await get_faces_async(img, inference_executor)
|
||||
except FaceServiceError as e:
|
||||
logger.error("embed_avatar: face service error: %s", str(e))
|
||||
raise HTTPException(status_code=503, detail="Face service unavailable")
|
||||
if len(faces) == 0:
|
||||
logger.warning(
|
||||
"embed_avatar: no faces detected image_url=%s size=%dx%d, using fallback",
|
||||
|
|
@ -47,6 +67,7 @@ async def embed_avatar(req: EmbedRequest):
|
|||
w,
|
||||
h,
|
||||
)
|
||||
fa = load_face_app() # Need face_app for recognition model
|
||||
fallback = fallback_avatar_embedding(fa, img, w, h)
|
||||
if fallback is None:
|
||||
raise HTTPException(
|
||||
|
|
@ -61,25 +82,28 @@ async def embed_avatar(req: EmbedRequest):
|
|||
score,
|
||||
len(emb),
|
||||
)
|
||||
return EmbedAvatarResponse(embedding=emb, bbox=bbox, score=score)
|
||||
return EmbedAvatarResponse(
|
||||
embedding=emb,
|
||||
bbox=bbox,
|
||||
score=score,
|
||||
processed_width=w,
|
||||
processed_height=h,
|
||||
)
|
||||
|
||||
# Sort by face area (largest first)
|
||||
faces.sort(
|
||||
key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
|
||||
reverse=True,
|
||||
)
|
||||
faces.sort(key=face_area, reverse=True)
|
||||
face = faces[0]
|
||||
|
||||
emb = face.normed_embedding.astype(np.float32)
|
||||
|
||||
# Validate embedding
|
||||
if not validate_embedding(emb):
|
||||
logger.error("embed_avatar: embedding contains NaN/Inf values")
|
||||
|
||||
is_valid, error_msg = validate_face_embedding(emb, "embed_avatar")
|
||||
if not is_valid:
|
||||
logger.error(error_msg)
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail="Failed to generate valid face embedding",
|
||||
)
|
||||
|
||||
|
||||
emb_list = emb.tolist()
|
||||
bbox = to_pixel_bbox(face.bbox, w, h)
|
||||
score = float(getattr(face, "det_score", 1.0))
|
||||
|
|
@ -91,22 +115,32 @@ async def embed_avatar(req: EmbedRequest):
|
|||
len(emb_list),
|
||||
)
|
||||
|
||||
return EmbedAvatarResponse(embedding=emb_list, bbox=bbox, score=score)
|
||||
return EmbedAvatarResponse(
|
||||
embedding=emb_list,
|
||||
bbox=bbox,
|
||||
score=score,
|
||||
processed_width=w,
|
||||
processed_height=h,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/embed-image", response_model=EmbedImageResponse)
|
||||
async def embed_image(req: EmbedRequest):
|
||||
"""
|
||||
Extract face embeddings from all faces in an image.
|
||||
|
||||
|
||||
Returns all detected faces sorted by detection score (highest first).
|
||||
Returns empty list if no faces detected.
|
||||
"""
|
||||
fa = load_face_app()
|
||||
img = await download_image(str(req.image_url))
|
||||
img = await download_image(str(req.image_url), http_client, inference_executor)
|
||||
h, w = img.shape[:2]
|
||||
|
||||
faces = fa.get(img)
|
||||
try:
|
||||
faces = await get_faces_async(img, inference_executor)
|
||||
except FaceServiceError as e:
|
||||
logger.error("embed_image: face service error: %s", str(e))
|
||||
raise HTTPException(status_code=503, detail="Face service unavailable")
|
||||
|
||||
if len(faces) == 0:
|
||||
logger.warning(
|
||||
"embed_image: no faces detected image_url=%s size=%dx%d",
|
||||
|
|
@ -114,7 +148,7 @@ async def embed_image(req: EmbedRequest):
|
|||
w,
|
||||
h,
|
||||
)
|
||||
return EmbedImageResponse(faces=[])
|
||||
return EmbedImageResponse(faces=[], processed_width=w, processed_height=h)
|
||||
|
||||
logger.info(
|
||||
"embed_image: detected %d faces image_url=%s size=%dx%d",
|
||||
|
|
@ -130,19 +164,19 @@ async def embed_image(req: EmbedRequest):
|
|||
reverse=True,
|
||||
)
|
||||
|
||||
result: List[FaceEmbedding] = []
|
||||
result: list[FaceEmbedding] = []
|
||||
for f in faces:
|
||||
emb = f.normed_embedding.astype(np.float32)
|
||||
|
||||
# Skip faces with invalid embeddings
|
||||
if not validate_embedding(emb):
|
||||
logger.warning("embed_image: skipping face with NaN/Inf embedding")
|
||||
|
||||
is_valid, error_msg = validate_face_embedding(emb, "embed_image")
|
||||
if not is_valid:
|
||||
logger.warning(error_msg)
|
||||
continue
|
||||
|
||||
|
||||
emb_list = emb.tolist()
|
||||
bbox = to_pixel_bbox(f.bbox, w, h)
|
||||
score = float(getattr(f, "det_score", 1.0))
|
||||
result.append(FaceEmbedding(bbox=bbox, score=score, embedding=emb_list))
|
||||
|
||||
return EmbedImageResponse(faces=result)
|
||||
return EmbedImageResponse(faces=result, processed_width=w, processed_height=h)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,27 +5,66 @@ set -euo pipefail
|
|||
if [ -d ".venv" ]; then
|
||||
# shellcheck disable=SC1091
|
||||
source ".venv/bin/activate"
|
||||
|
||||
# Add TensorRT libs to library path if installed via pip
|
||||
TENSORRT_LIBS=".venv/lib/python3.12/site-packages/tensorrt_libs"
|
||||
if [ -d "$TENSORRT_LIBS" ]; then
|
||||
export LD_LIBRARY_PATH="${TENSORRT_LIBS}:${LD_LIBRARY_PATH:-}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Model configuration (can be overridden via env)
|
||||
# =============================================================================
|
||||
# Model Configuration
|
||||
# =============================================================================
|
||||
export FACE_MODEL_NAME="${FACE_MODEL_NAME:-buffalo_l}"
|
||||
export FACE_DET_SIZE="${FACE_DET_SIZE:-1024}"
|
||||
|
||||
# Tune CPU thread usage and workers
|
||||
# TensorRT acceleration (2-3x faster inference)
|
||||
# First startup is slow (~30-60s) while TensorRT builds optimized engines
|
||||
# Engines are cached in ~/.cache/onnxruntime/ for subsequent runs
|
||||
export USE_TENSORRT="${USE_TENSORRT:-true}"
|
||||
|
||||
# =============================================================================
|
||||
# Performance Tuning
|
||||
# =============================================================================
|
||||
CPU_CORES="$(nproc || echo 4)"
|
||||
DEFAULT_WORKERS="${CPU_CORES}"
|
||||
if [ "$DEFAULT_WORKERS" -lt 2 ]; then
|
||||
DEFAULT_WORKERS=2
|
||||
fi
|
||||
|
||||
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-2}"
|
||||
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-2}"
|
||||
# GPU inference is the bottleneck - use 1 worker to avoid loading multiple
|
||||
# copies of the model into GPU memory. Concurrency is handled via thread pool.
|
||||
DEFAULT_WORKERS=1
|
||||
|
||||
# Thread pool for blocking operations (GPU inference, image decode)
|
||||
# 4 threads allows overlapping I/O with GPU work
|
||||
export INFERENCE_THREADS="${INFERENCE_THREADS:-4}"
|
||||
|
||||
# CPU threading for numpy/BLAS operations
|
||||
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-4}"
|
||||
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-4}"
|
||||
|
||||
# =============================================================================
|
||||
# HTTP Connection Pool
|
||||
# =============================================================================
|
||||
export HTTP_POOL_MAX_CONNECTIONS="${HTTP_POOL_MAX_CONNECTIONS:-100}"
|
||||
export HTTP_POOL_MAX_KEEPALIVE="${HTTP_POOL_MAX_KEEPALIVE:-20}"
|
||||
|
||||
# =============================================================================
|
||||
# Image Processing
|
||||
# =============================================================================
|
||||
# Max dimension for input images (reject larger)
|
||||
export MAX_IMAGE_DIMENSION="${MAX_IMAGE_DIMENSION:-4096}"
|
||||
# Downscale large images to this size before processing (det_size handles rest)
|
||||
export TARGET_MAX_DIMENSION="${TARGET_MAX_DIMENSION:-2048}"
|
||||
|
||||
# =============================================================================
|
||||
# Server Configuration
|
||||
# =============================================================================
|
||||
WORKERS="${UVICORN_WORKERS:-$DEFAULT_WORKERS}"
|
||||
# Match genealog-api FACE_SERVICE_URL: http://host.docker.internal:18081
|
||||
PORT="${PORT:-18081}"
|
||||
|
||||
echo "Starting face service on port ${PORT} with ${WORKERS} workers (CPU cores: ${CPU_CORES})"
|
||||
echo "Starting face service on port ${PORT} with ${WORKERS} workers"
|
||||
echo " Model: ${FACE_MODEL_NAME}, det_size: ${FACE_DET_SIZE}, TensorRT: ${USE_TENSORRT}"
|
||||
echo " Thread pool: ${INFERENCE_THREADS} workers"
|
||||
echo " CPU cores: ${CPU_CORES}"
|
||||
|
||||
exec uvicorn app.main:app \
|
||||
--host 0.0.0.0 \
|
||||
|
|
|
|||
Loading…
Reference in New Issue