face/app/face.py

285 lines
8.5 KiB
Python

"""Face analysis and embedding logic using InsightFace."""
import asyncio
import logging
from concurrent.futures import ThreadPoolExecutor
import numpy as np
from insightface.app import FaceAnalysis
from app.config import DET_SIZE, FALLBACK_DET_SIZE, MODEL_NAME, USE_TENSORRT
from app.models import BBox
logger = logging.getLogger("face_service")
# Two face apps for different image types (lazy loaded)
face_app_large: FaceAnalysis | None = None # det_size=1024 for group photos
face_app_small: FaceAnalysis | None = None # det_size=640 for selfies/portraits
def _check_tensorrt_available() -> bool:
"""Check if TensorRT libraries are actually installed."""
try:
import tensorrt
return True
except ImportError:
return False
def _get_providers() -> list:
"""Get ONNX Runtime execution providers based on configuration."""
import onnxruntime as ort
available_providers = ort.get_available_providers()
logger.info(f"Available ONNX providers: {available_providers}")
providers = []
# Try TensorRT first if enabled and libraries are actually installed
if USE_TENSORRT and "TensorrtExecutionProvider" in available_providers:
if _check_tensorrt_available():
providers.append(
(
"TensorrtExecutionProvider",
{
"trt_max_workspace_size": 2 * 1024 * 1024 * 1024, # 2GB
"trt_fp16_enable": True, # FP16 for faster inference
"trt_engine_cache_enable": True, # Cache TensorRT engines
},
)
)
else:
logger.warning("TensorRT requested but libnvinfer.so.10 not found, skipping")
# CUDA fallback
if "CUDAExecutionProvider" in available_providers:
providers.append("CUDAExecutionProvider")
# CPU fallback (always available)
providers.append("CPUExecutionProvider")
logger.info(f"Using providers: {[p[0] if isinstance(p, tuple) else p for p in providers]}")
return providers
def load_face_app(det_size: int = DET_SIZE) -> FaceAnalysis:
"""Load and initialize the FaceAnalysis model for given det_size (cached)."""
global face_app_large, face_app_small
# Return cached instance if available
if det_size >= 1024 and face_app_large is not None:
return face_app_large
if det_size < 1024 and face_app_small is not None:
return face_app_small
providers = _get_providers()
logger.info(
f"Loading InsightFace model pack={MODEL_NAME}, det_size={det_size}, "
f"tensorrt={USE_TENSORRT}"
)
fa = FaceAnalysis(
name=MODEL_NAME,
providers=providers,
)
fa.prepare(ctx_id=0, det_size=(det_size, det_size))
# Cache the instance
if det_size >= 1024:
face_app_large = fa
else:
face_app_small = fa
logger.info(f"FaceAnalysis initialized (det_size={det_size})")
return fa
def get_optimal_det_size(img_height: int, img_width: int) -> int:
"""
Choose optimal det_size based on image characteristics.
- Portrait/selfie (tall, narrow) → 640 (face likely fills frame)
- Landscape/group photo → 1024 (need to detect small faces)
- Small images → 640 (no benefit from larger det_size)
"""
max_dim = max(img_height, img_width)
min_dim = min(img_height, img_width)
aspect_ratio = max_dim / min_dim if min_dim > 0 else 1.0
# Small images - 640 is sufficient
if max_dim <= 1024:
return FALLBACK_DET_SIZE # 640
# Portrait orientation (height > width) with tall aspect ratio
# Likely a selfie or single-person portrait
if img_height > img_width and aspect_ratio >= 1.3:
return FALLBACK_DET_SIZE # 640
# Landscape or square, larger image - likely group photo
return DET_SIZE # 1024
class FaceServiceError(Exception):
"""Error during face detection/embedding."""
pass
async def get_faces_async(
img: np.ndarray,
executor: ThreadPoolExecutor,
) -> list:
"""
Run face detection/embedding in thread pool to not block event loop.
Automatically selects optimal det_size based on image dimensions:
- Portrait/selfie → 640 (handles large faces)
- Landscape/group → 1024 (detects small faces)
Raises:
FaceServiceError: If face detection fails (GPU OOM, ONNX errors, etc.)
"""
h, w = img.shape[:2]
det_size = get_optimal_det_size(h, w)
fa = load_face_app(det_size)
loop = asyncio.get_running_loop()
try:
faces = await loop.run_in_executor(executor, fa.get, img)
except Exception as e:
logger.error(
"get_faces_async: face detection failed det_size=%d image=%dx%d error=%s",
det_size, w, h, str(e)
)
raise FaceServiceError(f"Face detection failed: {str(e)}") from e
logger.debug(
"get_faces_async: det_size=%d, image=%dx%d, faces=%d",
det_size, w, h, len(faces)
)
return faces
def face_area(face) -> float:
"""Calculate face bounding box area for sorting."""
return (face.bbox[2] - face.bbox[0]) * (face.bbox[3] - face.bbox[1])
def to_pixel_bbox(bbox, width: int, height: int) -> BBox:
"""Convert InsightFace bbox to pixel BBox with coordinate clamping."""
x1, y1, x2, y2 = bbox
# Clamp coordinates to image bounds
x1 = max(0, min(int(x1), width))
y1 = max(0, min(int(y1), height))
x2 = max(0, min(int(x2), width))
y2 = max(0, min(int(y2), height))
w = max(x2 - x1, 1)
h = max(y2 - y1, 1)
return BBox(x=x1, y=y1, w=w, h=h)
def validate_embedding(embedding: np.ndarray) -> bool:
"""Check if embedding contains valid values (no NaN or Inf)."""
return bool(np.isfinite(embedding).all())
def normalize_embedding(embedding: np.ndarray) -> np.ndarray:
"""Normalize embedding vector to unit length.
Returns None-equivalent behavior if embedding is near-zero vector,
which would cause NaN in similarity calculations.
"""
emb = embedding.astype(np.float32)
norm = float(np.linalg.norm(emb))
# Near-zero vectors can't be normalized meaningfully
# and would cause issues in similarity calculations
if norm < 0.01:
logger.warning("normalize_embedding: near-zero vector detected (norm=%.6f)", norm)
return emb # Return as-is, validate_embedding will catch it
emb = emb / norm
return emb
def fallback_avatar_embedding(
fa: FaceAnalysis,
img: np.ndarray,
width: int,
height: int,
) -> tuple[list[float], BBox, float] | None:
"""
Generate embedding from center crop when no face is detected.
This fallback uses the recognition model directly on a center square crop,
useful for avatar images where the face might not be detected.
"""
rec_model = getattr(fa, "models", {}).get("recognition")
if rec_model is None:
logger.warning("embed_avatar_fallback: recognition model is not available")
return None
side = min(height, width)
if side <= 0:
logger.warning(
"embed_avatar_fallback: invalid image size width=%d height=%d",
width,
height,
)
return None
cy, cx = height // 2, width // 2
x1 = max(cx - side // 2, 0)
y1 = max(cy - side // 2, 0)
x2 = min(x1 + side, width)
y2 = min(y1 + side, height)
crop = img[y1:y2, x1:x2]
if crop.size == 0:
logger.warning("embed_avatar_fallback: empty crop region")
return None
try:
import cv2
target_size = getattr(rec_model, "input_size", None)
if not target_size:
logger.warning("embed_avatar_fallback: recognition model has no input_size")
return None
face_img = cv2.resize(crop, target_size)
except Exception as e:
logger.exception("embed_avatar_fallback: failed to resize crop: %s", e)
return None
try:
feat = rec_model.get_feat(face_img)[0]
except Exception as e:
logger.exception("embed_avatar_fallback: get_feat failed: %s", e)
return None
emb = normalize_embedding(feat)
if not validate_embedding(emb):
logger.warning("embed_avatar_fallback: embedding contains NaN/Inf values")
return None
bbox = BBox(
x=int(x1),
y=int(y1),
w=int(x2 - x1),
h=int(y2 - y1),
)
score = 0.0
logger.info(
"embed_avatar_fallback: generated embedding bbox=(%d,%d,%d,%d) score=%.4f len=%d",
bbox.x,
bbox.y,
bbox.w,
bbox.h,
score,
len(emb),
)
return emb.tolist(), bbox, score