diff --git a/app/config.py b/app/config.py index 3feecf9..29b6081 100644 --- a/app/config.py +++ b/app/config.py @@ -5,6 +5,8 @@ import os # Model configuration MODEL_NAME = os.getenv("FACE_MODEL_NAME", "buffalo_l") DET_SIZE = int(os.getenv("FACE_DET_SIZE", "1024")) +# Fallback det_size for large faces (close-up selfies) +FALLBACK_DET_SIZE = int(os.getenv("FACE_DET_SIZE_FALLBACK", "640")) USE_TENSORRT = os.getenv("USE_TENSORRT", "true").lower() in ("true", "1", "yes") # Image processing limits diff --git a/app/face.py b/app/face.py index 9163a8e..1ddb1ca 100644 --- a/app/face.py +++ b/app/face.py @@ -7,12 +7,14 @@ from concurrent.futures import ThreadPoolExecutor import numpy as np from insightface.app import FaceAnalysis -from app.config import DET_SIZE, MODEL_NAME, USE_TENSORRT +from app.config import DET_SIZE, FALLBACK_DET_SIZE, MODEL_NAME, USE_TENSORRT from app.models import BBox logger = logging.getLogger("face_service") -face_app: FaceAnalysis | None = None +# Two face apps for different image types (lazy loaded) +face_app_large: FaceAnalysis | None = None # det_size=1024 for group photos +face_app_small: FaceAnalysis | None = None # det_size=640 for selfies/portraits def _check_tensorrt_available() -> bool: @@ -60,35 +62,106 @@ def _get_providers() -> list: return providers -def load_face_app() -> FaceAnalysis: - """Load and initialize the FaceAnalysis model (singleton).""" - global face_app - if face_app is not None: - return face_app +def load_face_app(det_size: int = DET_SIZE) -> FaceAnalysis: + """Load and initialize the FaceAnalysis model for given det_size (cached).""" + global face_app_large, face_app_small + + # Return cached instance if available + if det_size >= 1024 and face_app_large is not None: + return face_app_large + if det_size < 1024 and face_app_small is not None: + return face_app_small providers = _get_providers() logger.info( - f"Loading InsightFace model pack={MODEL_NAME}, det_size={DET_SIZE}, " + f"Loading InsightFace model pack={MODEL_NAME}, det_size={det_size}, " f"tensorrt={USE_TENSORRT}" ) fa = FaceAnalysis( name=MODEL_NAME, providers=providers, ) - fa.prepare(ctx_id=0, det_size=(DET_SIZE, DET_SIZE)) - face_app = fa - logger.info("FaceAnalysis initialized") - return face_app + fa.prepare(ctx_id=0, det_size=(det_size, det_size)) + + # Cache the instance + if det_size >= 1024: + face_app_large = fa + else: + face_app_small = fa + + logger.info(f"FaceAnalysis initialized (det_size={det_size})") + return fa + + +def get_optimal_det_size(img_height: int, img_width: int) -> int: + """ + Choose optimal det_size based on image characteristics. + + - Portrait/selfie (tall, narrow) → 640 (face likely fills frame) + - Landscape/group photo → 1024 (need to detect small faces) + - Small images → 640 (no benefit from larger det_size) + """ + max_dim = max(img_height, img_width) + min_dim = min(img_height, img_width) + aspect_ratio = max_dim / min_dim if min_dim > 0 else 1.0 + + # Small images - 640 is sufficient + if max_dim <= 1024: + return FALLBACK_DET_SIZE # 640 + + # Portrait orientation (height > width) with tall aspect ratio + # Likely a selfie or single-person portrait + if img_height > img_width and aspect_ratio >= 1.3: + return FALLBACK_DET_SIZE # 640 + + # Landscape or square, larger image - likely group photo + return DET_SIZE # 1024 + + +class FaceServiceError(Exception): + """Error during face detection/embedding.""" + pass async def get_faces_async( - fa: FaceAnalysis, img: np.ndarray, executor: ThreadPoolExecutor, ) -> list: - """Run face detection/embedding in thread pool to not block event loop.""" + """ + Run face detection/embedding in thread pool to not block event loop. + + Automatically selects optimal det_size based on image dimensions: + - Portrait/selfie → 640 (handles large faces) + - Landscape/group → 1024 (detects small faces) + + Raises: + FaceServiceError: If face detection fails (GPU OOM, ONNX errors, etc.) + """ + h, w = img.shape[:2] + det_size = get_optimal_det_size(h, w) + fa = load_face_app(det_size) + loop = asyncio.get_running_loop() - return await loop.run_in_executor(executor, fa.get, img) + try: + faces = await loop.run_in_executor(executor, fa.get, img) + except Exception as e: + logger.error( + "get_faces_async: face detection failed det_size=%d image=%dx%d error=%s", + det_size, w, h, str(e) + ) + raise FaceServiceError(f"Face detection failed: {str(e)}") from e + + logger.debug( + "get_faces_async: det_size=%d, image=%dx%d, faces=%d", + det_size, w, h, len(faces) + ) + + return faces + + +def face_area(face) -> float: + """Calculate face bounding box area for sorting.""" + return (face.bbox[2] - face.bbox[0]) * (face.bbox[3] - face.bbox[1]) def to_pixel_bbox(bbox, width: int, height: int) -> BBox: @@ -113,11 +186,19 @@ def validate_embedding(embedding: np.ndarray) -> bool: def normalize_embedding(embedding: np.ndarray) -> np.ndarray: - """Normalize embedding vector to unit length.""" + """Normalize embedding vector to unit length. + + Returns None-equivalent behavior if embedding is near-zero vector, + which would cause NaN in similarity calculations. + """ emb = embedding.astype(np.float32) norm = float(np.linalg.norm(emb)) - if norm > 0.0: - emb = emb / norm + # Near-zero vectors can't be normalized meaningfully + # and would cause issues in similarity calculations + if norm < 0.01: + logger.warning("normalize_embedding: near-zero vector detected (norm=%.6f)", norm) + return emb # Return as-is, validate_embedding will catch it + emb = emb / norm return emb diff --git a/app/main.py b/app/main.py index 824ac1e..a6adfb3 100644 --- a/app/main.py +++ b/app/main.py @@ -18,7 +18,7 @@ from app.config import ( USE_TENSORRT, ) from app.face import load_face_app -from app.routes import embed +from app.routes import benchmark, embed logging.basicConfig(level=logging.INFO) logger = logging.getLogger("face_service") @@ -72,6 +72,7 @@ app = FastAPI( # Include routers app.include_router(embed.router) +app.include_router(benchmark.router) @app.get("/healthz") diff --git a/app/models.py b/app/models.py index f4b5caa..b07fd32 100644 --- a/app/models.py +++ b/app/models.py @@ -1,7 +1,5 @@ """Pydantic models for request/response schemas.""" -from typing import List - from pydantic import BaseModel, HttpUrl @@ -19,17 +17,21 @@ class BBox(BaseModel): class FaceEmbedding(BaseModel): bbox: BBox score: float - embedding: List[float] + embedding: list[float] class EmbedAvatarResponse(BaseModel): - embedding: List[float] + embedding: list[float] bbox: BBox score: float + processed_width: int | None = None + processed_height: int | None = None class EmbedImageResponse(BaseModel): - faces: List[FaceEmbedding] + faces: list[FaceEmbedding] + processed_width: int | None = None + processed_height: int | None = None class MatchResult(BaseModel): diff --git a/app/routes/benchmark.py b/app/routes/benchmark.py new file mode 100644 index 0000000..a3052b8 --- /dev/null +++ b/app/routes/benchmark.py @@ -0,0 +1,516 @@ +"""Benchmark UI for face comparison.""" + +import asyncio +import base64 +import logging +from io import BytesIO + +import cv2 +import numpy as np +from fastapi import APIRouter, File, UploadFile +from fastapi.responses import HTMLResponse + +from app.face import face_area, get_faces_async, validate_embedding +from app.image import read_upload_image +from app.resources import inference_executor + +logger = logging.getLogger("face_service") + +router = APIRouter(prefix="/benchmark", tags=["benchmark"]) + + +def cosine_similarity(emb1: np.ndarray, emb2: np.ndarray) -> float: + """Compute cosine similarity between two embeddings.""" + # Embeddings are already normalized, so dot product = cosine similarity + return float(np.dot(emb1, emb2)) + + +def draw_faces_on_image(img: np.ndarray, faces: list, face_indices: list[int]) -> np.ndarray: + """Draw bounding boxes and indices on image.""" + img_copy = img.copy() + for idx, face in zip(face_indices, faces): + bbox = face.bbox.astype(int) + x1, y1, x2, y2 = bbox + # Draw rectangle + cv2.rectangle(img_copy, (x1, y1), (x2, y2), (0, 255, 0), 2) + # Draw index label + label = f"#{idx}" + (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2) + cv2.rectangle(img_copy, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1) + cv2.putText(img_copy, label, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2) + return img_copy + + +def encode_image_to_base64(img: np.ndarray, max_dim: int = 800) -> str: + """Encode image to base64 for display in HTML, resizing if needed.""" + h, w = img.shape[:2] + if max(h, w) > max_dim: + scale = max_dim / max(h, w) + img = cv2.resize(img, (int(w * scale), int(h * scale))) + + # Convert BGR to RGB for proper display + img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + from PIL import Image + pil_img = Image.fromarray(img_rgb) + buffer = BytesIO() + pil_img.save(buffer, format="JPEG", quality=85) + return base64.b64encode(buffer.getvalue()).decode() + + +def crop_face(img: np.ndarray, bbox: np.ndarray, padding: float = 0.2) -> np.ndarray: + """Crop face from image with padding.""" + h, w = img.shape[:2] + x1, y1, x2, y2 = bbox.astype(int) + + # Add padding + face_w = x2 - x1 + face_h = y2 - y1 + pad_x = int(face_w * padding) + pad_y = int(face_h * padding) + + x1 = max(0, x1 - pad_x) + y1 = max(0, y1 - pad_y) + x2 = min(w, x2 + pad_x) + y2 = min(h, y2 + pad_y) + + return img[y1:y2, x1:x2] + + +@router.get("/", response_class=HTMLResponse) +async def benchmark_ui(): + """Serve the benchmark UI.""" + return """ + + +
+ + +