204 lines
5.7 KiB
Python
204 lines
5.7 KiB
Python
"""Face analysis and embedding logic using InsightFace."""
|
|
|
|
import asyncio
|
|
import logging
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
import numpy as np
|
|
from insightface.app import FaceAnalysis
|
|
|
|
from app.config import DET_SIZE, MODEL_NAME, USE_TENSORRT
|
|
from app.models import BBox
|
|
|
|
logger = logging.getLogger("face_service")
|
|
|
|
face_app: FaceAnalysis | None = None
|
|
|
|
|
|
def _check_tensorrt_available() -> bool:
|
|
"""Check if TensorRT libraries are actually installed."""
|
|
try:
|
|
import tensorrt
|
|
return True
|
|
except ImportError:
|
|
return False
|
|
|
|
|
|
def _get_providers() -> list:
|
|
"""Get ONNX Runtime execution providers based on configuration."""
|
|
import onnxruntime as ort
|
|
|
|
available_providers = ort.get_available_providers()
|
|
logger.info(f"Available ONNX providers: {available_providers}")
|
|
|
|
providers = []
|
|
|
|
# Try TensorRT first if enabled and libraries are actually installed
|
|
if USE_TENSORRT and "TensorrtExecutionProvider" in available_providers:
|
|
if _check_tensorrt_available():
|
|
providers.append(
|
|
(
|
|
"TensorrtExecutionProvider",
|
|
{
|
|
"trt_max_workspace_size": 2 * 1024 * 1024 * 1024, # 2GB
|
|
"trt_fp16_enable": True, # FP16 for faster inference
|
|
"trt_engine_cache_enable": True, # Cache TensorRT engines
|
|
},
|
|
)
|
|
)
|
|
else:
|
|
logger.warning("TensorRT requested but libnvinfer.so.10 not found, skipping")
|
|
|
|
# CUDA fallback
|
|
if "CUDAExecutionProvider" in available_providers:
|
|
providers.append("CUDAExecutionProvider")
|
|
|
|
# CPU fallback (always available)
|
|
providers.append("CPUExecutionProvider")
|
|
|
|
logger.info(f"Using providers: {[p[0] if isinstance(p, tuple) else p for p in providers]}")
|
|
return providers
|
|
|
|
|
|
def load_face_app() -> FaceAnalysis:
|
|
"""Load and initialize the FaceAnalysis model (singleton)."""
|
|
global face_app
|
|
if face_app is not None:
|
|
return face_app
|
|
|
|
providers = _get_providers()
|
|
logger.info(
|
|
f"Loading InsightFace model pack={MODEL_NAME}, det_size={DET_SIZE}, "
|
|
f"tensorrt={USE_TENSORRT}"
|
|
)
|
|
fa = FaceAnalysis(
|
|
name=MODEL_NAME,
|
|
providers=providers,
|
|
)
|
|
fa.prepare(ctx_id=0, det_size=(DET_SIZE, DET_SIZE))
|
|
face_app = fa
|
|
logger.info("FaceAnalysis initialized")
|
|
return face_app
|
|
|
|
|
|
async def get_faces_async(
|
|
fa: FaceAnalysis,
|
|
img: np.ndarray,
|
|
executor: ThreadPoolExecutor,
|
|
) -> list:
|
|
"""Run face detection/embedding in thread pool to not block event loop."""
|
|
loop = asyncio.get_running_loop()
|
|
return await loop.run_in_executor(executor, fa.get, img)
|
|
|
|
|
|
def to_pixel_bbox(bbox, width: int, height: int) -> BBox:
|
|
"""Convert InsightFace bbox to pixel BBox with coordinate clamping."""
|
|
x1, y1, x2, y2 = bbox
|
|
|
|
# Clamp coordinates to image bounds
|
|
x1 = max(0, min(int(x1), width))
|
|
y1 = max(0, min(int(y1), height))
|
|
x2 = max(0, min(int(x2), width))
|
|
y2 = max(0, min(int(y2), height))
|
|
|
|
w = max(x2 - x1, 1)
|
|
h = max(y2 - y1, 1)
|
|
|
|
return BBox(x=x1, y=y1, w=w, h=h)
|
|
|
|
|
|
def validate_embedding(embedding: np.ndarray) -> bool:
|
|
"""Check if embedding contains valid values (no NaN or Inf)."""
|
|
return bool(np.isfinite(embedding).all())
|
|
|
|
|
|
def normalize_embedding(embedding: np.ndarray) -> np.ndarray:
|
|
"""Normalize embedding vector to unit length."""
|
|
emb = embedding.astype(np.float32)
|
|
norm = float(np.linalg.norm(emb))
|
|
if norm > 0.0:
|
|
emb = emb / norm
|
|
return emb
|
|
|
|
|
|
def fallback_avatar_embedding(
|
|
fa: FaceAnalysis,
|
|
img: np.ndarray,
|
|
width: int,
|
|
height: int,
|
|
) -> tuple[list[float], BBox, float] | None:
|
|
"""
|
|
Generate embedding from center crop when no face is detected.
|
|
|
|
This fallback uses the recognition model directly on a center square crop,
|
|
useful for avatar images where the face might not be detected.
|
|
"""
|
|
rec_model = getattr(fa, "models", {}).get("recognition")
|
|
if rec_model is None:
|
|
logger.warning("embed_avatar_fallback: recognition model is not available")
|
|
return None
|
|
|
|
side = min(height, width)
|
|
if side <= 0:
|
|
logger.warning(
|
|
"embed_avatar_fallback: invalid image size width=%d height=%d",
|
|
width,
|
|
height,
|
|
)
|
|
return None
|
|
|
|
cy, cx = height // 2, width // 2
|
|
x1 = max(cx - side // 2, 0)
|
|
y1 = max(cy - side // 2, 0)
|
|
x2 = min(x1 + side, width)
|
|
y2 = min(y1 + side, height)
|
|
|
|
crop = img[y1:y2, x1:x2]
|
|
if crop.size == 0:
|
|
logger.warning("embed_avatar_fallback: empty crop region")
|
|
return None
|
|
|
|
try:
|
|
import cv2
|
|
target_size = getattr(rec_model, "input_size", None)
|
|
if not target_size:
|
|
logger.warning("embed_avatar_fallback: recognition model has no input_size")
|
|
return None
|
|
face_img = cv2.resize(crop, target_size)
|
|
except Exception as e:
|
|
logger.exception("embed_avatar_fallback: failed to resize crop: %s", e)
|
|
return None
|
|
|
|
try:
|
|
feat = rec_model.get_feat(face_img)[0]
|
|
except Exception as e:
|
|
logger.exception("embed_avatar_fallback: get_feat failed: %s", e)
|
|
return None
|
|
|
|
emb = normalize_embedding(feat)
|
|
|
|
if not validate_embedding(emb):
|
|
logger.warning("embed_avatar_fallback: embedding contains NaN/Inf values")
|
|
return None
|
|
|
|
bbox = BBox(
|
|
x=int(x1),
|
|
y=int(y1),
|
|
w=int(x2 - x1),
|
|
h=int(y2 - y1),
|
|
)
|
|
score = 0.0
|
|
|
|
logger.info(
|
|
"embed_avatar_fallback: generated embedding bbox=(%d,%d,%d,%d) score=%.4f len=%d",
|
|
bbox.x,
|
|
bbox.y,
|
|
bbox.w,
|
|
bbox.h,
|
|
score,
|
|
len(emb),
|
|
)
|
|
|
|
return emb.tolist(), bbox, score
|
|
|