From a7595603b4a536afe2676b3a1c8258e2e5f7589c Mon Sep 17 00:00:00 2001 From: Hung Luu Date: Thu, 5 Feb 2026 10:49:50 +0700 Subject: [PATCH] Add adaptive det_size, benchmark UI, and code simplification - Add adaptive det_size selection based on image characteristics (portraits use 640, landscapes use 1024) - Add FaceServiceError for better error handling in face detection - Add benchmark UI for comparing faces between two images - Extract face_area() helper to eliminate duplicate sorting logic - Modernize type hints (List -> list, Tuple -> tuple) - Add processed_width/height to embed responses - Add embedding dimension validation Co-Authored-By: Claude Opus 4.5 --- app/config.py | 2 + app/face.py | 117 +++++++-- app/main.py | 3 +- app/models.py | 12 +- app/routes/benchmark.py | 516 ++++++++++++++++++++++++++++++++++++++++ app/routes/embed.py | 84 +++++-- 6 files changed, 684 insertions(+), 50 deletions(-) create mode 100644 app/routes/benchmark.py diff --git a/app/config.py b/app/config.py index 3feecf9..29b6081 100644 --- a/app/config.py +++ b/app/config.py @@ -5,6 +5,8 @@ import os # Model configuration MODEL_NAME = os.getenv("FACE_MODEL_NAME", "buffalo_l") DET_SIZE = int(os.getenv("FACE_DET_SIZE", "1024")) +# Fallback det_size for large faces (close-up selfies) +FALLBACK_DET_SIZE = int(os.getenv("FACE_DET_SIZE_FALLBACK", "640")) USE_TENSORRT = os.getenv("USE_TENSORRT", "true").lower() in ("true", "1", "yes") # Image processing limits diff --git a/app/face.py b/app/face.py index 9163a8e..1ddb1ca 100644 --- a/app/face.py +++ b/app/face.py @@ -7,12 +7,14 @@ from concurrent.futures import ThreadPoolExecutor import numpy as np from insightface.app import FaceAnalysis -from app.config import DET_SIZE, MODEL_NAME, USE_TENSORRT +from app.config import DET_SIZE, FALLBACK_DET_SIZE, MODEL_NAME, USE_TENSORRT from app.models import BBox logger = logging.getLogger("face_service") -face_app: FaceAnalysis | None = None +# Two face apps for different image types (lazy loaded) +face_app_large: FaceAnalysis | None = None # det_size=1024 for group photos +face_app_small: FaceAnalysis | None = None # det_size=640 for selfies/portraits def _check_tensorrt_available() -> bool: @@ -60,35 +62,106 @@ def _get_providers() -> list: return providers -def load_face_app() -> FaceAnalysis: - """Load and initialize the FaceAnalysis model (singleton).""" - global face_app - if face_app is not None: - return face_app +def load_face_app(det_size: int = DET_SIZE) -> FaceAnalysis: + """Load and initialize the FaceAnalysis model for given det_size (cached).""" + global face_app_large, face_app_small + + # Return cached instance if available + if det_size >= 1024 and face_app_large is not None: + return face_app_large + if det_size < 1024 and face_app_small is not None: + return face_app_small providers = _get_providers() logger.info( - f"Loading InsightFace model pack={MODEL_NAME}, det_size={DET_SIZE}, " + f"Loading InsightFace model pack={MODEL_NAME}, det_size={det_size}, " f"tensorrt={USE_TENSORRT}" ) fa = FaceAnalysis( name=MODEL_NAME, providers=providers, ) - fa.prepare(ctx_id=0, det_size=(DET_SIZE, DET_SIZE)) - face_app = fa - logger.info("FaceAnalysis initialized") - return face_app + fa.prepare(ctx_id=0, det_size=(det_size, det_size)) + + # Cache the instance + if det_size >= 1024: + face_app_large = fa + else: + face_app_small = fa + + logger.info(f"FaceAnalysis initialized (det_size={det_size})") + return fa + + +def get_optimal_det_size(img_height: int, img_width: int) -> int: + """ + Choose optimal det_size based on image characteristics. + + - Portrait/selfie (tall, narrow) → 640 (face likely fills frame) + - Landscape/group photo → 1024 (need to detect small faces) + - Small images → 640 (no benefit from larger det_size) + """ + max_dim = max(img_height, img_width) + min_dim = min(img_height, img_width) + aspect_ratio = max_dim / min_dim if min_dim > 0 else 1.0 + + # Small images - 640 is sufficient + if max_dim <= 1024: + return FALLBACK_DET_SIZE # 640 + + # Portrait orientation (height > width) with tall aspect ratio + # Likely a selfie or single-person portrait + if img_height > img_width and aspect_ratio >= 1.3: + return FALLBACK_DET_SIZE # 640 + + # Landscape or square, larger image - likely group photo + return DET_SIZE # 1024 + + +class FaceServiceError(Exception): + """Error during face detection/embedding.""" + pass async def get_faces_async( - fa: FaceAnalysis, img: np.ndarray, executor: ThreadPoolExecutor, ) -> list: - """Run face detection/embedding in thread pool to not block event loop.""" + """ + Run face detection/embedding in thread pool to not block event loop. + + Automatically selects optimal det_size based on image dimensions: + - Portrait/selfie → 640 (handles large faces) + - Landscape/group → 1024 (detects small faces) + + Raises: + FaceServiceError: If face detection fails (GPU OOM, ONNX errors, etc.) + """ + h, w = img.shape[:2] + det_size = get_optimal_det_size(h, w) + fa = load_face_app(det_size) + loop = asyncio.get_running_loop() - return await loop.run_in_executor(executor, fa.get, img) + try: + faces = await loop.run_in_executor(executor, fa.get, img) + except Exception as e: + logger.error( + "get_faces_async: face detection failed det_size=%d image=%dx%d error=%s", + det_size, w, h, str(e) + ) + raise FaceServiceError(f"Face detection failed: {str(e)}") from e + + logger.debug( + "get_faces_async: det_size=%d, image=%dx%d, faces=%d", + det_size, w, h, len(faces) + ) + + return faces + + +def face_area(face) -> float: + """Calculate face bounding box area for sorting.""" + return (face.bbox[2] - face.bbox[0]) * (face.bbox[3] - face.bbox[1]) def to_pixel_bbox(bbox, width: int, height: int) -> BBox: @@ -113,11 +186,19 @@ def validate_embedding(embedding: np.ndarray) -> bool: def normalize_embedding(embedding: np.ndarray) -> np.ndarray: - """Normalize embedding vector to unit length.""" + """Normalize embedding vector to unit length. + + Returns None-equivalent behavior if embedding is near-zero vector, + which would cause NaN in similarity calculations. + """ emb = embedding.astype(np.float32) norm = float(np.linalg.norm(emb)) - if norm > 0.0: - emb = emb / norm + # Near-zero vectors can't be normalized meaningfully + # and would cause issues in similarity calculations + if norm < 0.01: + logger.warning("normalize_embedding: near-zero vector detected (norm=%.6f)", norm) + return emb # Return as-is, validate_embedding will catch it + emb = emb / norm return emb diff --git a/app/main.py b/app/main.py index 824ac1e..a6adfb3 100644 --- a/app/main.py +++ b/app/main.py @@ -18,7 +18,7 @@ from app.config import ( USE_TENSORRT, ) from app.face import load_face_app -from app.routes import embed +from app.routes import benchmark, embed logging.basicConfig(level=logging.INFO) logger = logging.getLogger("face_service") @@ -72,6 +72,7 @@ app = FastAPI( # Include routers app.include_router(embed.router) +app.include_router(benchmark.router) @app.get("/healthz") diff --git a/app/models.py b/app/models.py index f4b5caa..b07fd32 100644 --- a/app/models.py +++ b/app/models.py @@ -1,7 +1,5 @@ """Pydantic models for request/response schemas.""" -from typing import List - from pydantic import BaseModel, HttpUrl @@ -19,17 +17,21 @@ class BBox(BaseModel): class FaceEmbedding(BaseModel): bbox: BBox score: float - embedding: List[float] + embedding: list[float] class EmbedAvatarResponse(BaseModel): - embedding: List[float] + embedding: list[float] bbox: BBox score: float + processed_width: int | None = None + processed_height: int | None = None class EmbedImageResponse(BaseModel): - faces: List[FaceEmbedding] + faces: list[FaceEmbedding] + processed_width: int | None = None + processed_height: int | None = None class MatchResult(BaseModel): diff --git a/app/routes/benchmark.py b/app/routes/benchmark.py new file mode 100644 index 0000000..a3052b8 --- /dev/null +++ b/app/routes/benchmark.py @@ -0,0 +1,516 @@ +"""Benchmark UI for face comparison.""" + +import asyncio +import base64 +import logging +from io import BytesIO + +import cv2 +import numpy as np +from fastapi import APIRouter, File, UploadFile +from fastapi.responses import HTMLResponse + +from app.face import face_area, get_faces_async, validate_embedding +from app.image import read_upload_image +from app.resources import inference_executor + +logger = logging.getLogger("face_service") + +router = APIRouter(prefix="/benchmark", tags=["benchmark"]) + + +def cosine_similarity(emb1: np.ndarray, emb2: np.ndarray) -> float: + """Compute cosine similarity between two embeddings.""" + # Embeddings are already normalized, so dot product = cosine similarity + return float(np.dot(emb1, emb2)) + + +def draw_faces_on_image(img: np.ndarray, faces: list, face_indices: list[int]) -> np.ndarray: + """Draw bounding boxes and indices on image.""" + img_copy = img.copy() + for idx, face in zip(face_indices, faces): + bbox = face.bbox.astype(int) + x1, y1, x2, y2 = bbox + # Draw rectangle + cv2.rectangle(img_copy, (x1, y1), (x2, y2), (0, 255, 0), 2) + # Draw index label + label = f"#{idx}" + (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2) + cv2.rectangle(img_copy, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1) + cv2.putText(img_copy, label, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2) + return img_copy + + +def encode_image_to_base64(img: np.ndarray, max_dim: int = 800) -> str: + """Encode image to base64 for display in HTML, resizing if needed.""" + h, w = img.shape[:2] + if max(h, w) > max_dim: + scale = max_dim / max(h, w) + img = cv2.resize(img, (int(w * scale), int(h * scale))) + + # Convert BGR to RGB for proper display + img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + from PIL import Image + pil_img = Image.fromarray(img_rgb) + buffer = BytesIO() + pil_img.save(buffer, format="JPEG", quality=85) + return base64.b64encode(buffer.getvalue()).decode() + + +def crop_face(img: np.ndarray, bbox: np.ndarray, padding: float = 0.2) -> np.ndarray: + """Crop face from image with padding.""" + h, w = img.shape[:2] + x1, y1, x2, y2 = bbox.astype(int) + + # Add padding + face_w = x2 - x1 + face_h = y2 - y1 + pad_x = int(face_w * padding) + pad_y = int(face_h * padding) + + x1 = max(0, x1 - pad_x) + y1 = max(0, y1 - pad_y) + x2 = min(w, x2 + pad_x) + y2 = min(h, y2 + pad_y) + + return img[y1:y2, x1:x2] + + +@router.get("/", response_class=HTMLResponse) +async def benchmark_ui(): + """Serve the benchmark UI.""" + return """ + + + + + + Face Benchmark + + + +

Face Comparison Benchmark

+ +
+
+
+

Image 1 (Source)

+

Upload an image to find faces from

+ + +
+
+

Image 2 (Target)

+

Upload an image to search for matching faces

+ + +
+
+ +
+ +
+ + + + +""" + + +@router.post("/compare") +async def compare_faces( + image1: UploadFile = File(...), + image2: UploadFile = File(...), +): + """ + Compare faces between two uploaded images. + + Returns: + - Annotated images with face bounding boxes + - Similarity matrix between all detected faces + - Best matches for each face in image1 + """ + logger.info("benchmark/compare: image1=%s image2=%s", image1.filename, image2.filename) + + # Read both images + data1 = await image1.read() + data2 = await image2.read() + + img1 = read_upload_image(data1, image1.filename or "image1") + img2 = read_upload_image(data2, image2.filename or "image2") + + # Detect faces in both images concurrently + faces1, faces2 = await asyncio.gather( + get_faces_async(img1, inference_executor), + get_faces_async(img2, inference_executor), + ) + + logger.info( + "benchmark/compare: detected %d faces in image1, %d faces in image2", + len(faces1), len(faces2) + ) + + # Sort faces by area (largest first) + faces1.sort(key=face_area, reverse=True) + faces2.sort(key=face_area, reverse=True) + + # Draw faces on images + face1_indices = list(range(len(faces1))) + face2_indices = list(range(len(faces2))) + + img1_annotated = draw_faces_on_image(img1, faces1, face1_indices) + img2_annotated = draw_faces_on_image(img2, faces2, face2_indices) + + # Encode images for response + img1_b64 = encode_image_to_base64(img1_annotated) + img2_b64 = encode_image_to_base64(img2_annotated) + + # Compute similarity matrix + similarity_matrix: list[list[float]] = [] + best_matches: list[dict] = [] + + for i, f1 in enumerate(faces1): + emb1 = f1.normed_embedding.astype(np.float32) + if not validate_embedding(emb1): + similarity_matrix.append([0.0] * len(faces2)) + continue + + row = [] + best_sim = -1.0 + best_j = -1 + + for j, f2 in enumerate(faces2): + emb2 = f2.normed_embedding.astype(np.float32) + if not validate_embedding(emb2): + row.append(0.0) + continue + + sim = cosine_similarity(emb1, emb2) + row.append(sim) + + if sim > best_sim: + best_sim = sim + best_j = j + + similarity_matrix.append(row) + + if best_j >= 0: + best_matches.append({ + "face1_idx": i, + "face2_idx": best_j, + "similarity": best_sim, + }) + + # Sort best matches by similarity and keep top 3 + best_matches.sort(key=lambda m: m["similarity"], reverse=True) + best_matches = best_matches[:3] + + # Add cropped face images for top 3 matches + for match in best_matches: + i, j = match["face1_idx"], match["face2_idx"] + crop1 = crop_face(img1, faces1[i].bbox) + crop2 = crop_face(img2, faces2[j].bbox) + match["face1_crop"] = encode_image_to_base64(crop1, max_dim=150) + match["face2_crop"] = encode_image_to_base64(crop2, max_dim=150) + + return { + "image1_faces": len(faces1), + "image2_faces": len(faces2), + "image1_annotated": img1_b64, + "image2_annotated": img2_b64, + "similarity_matrix": similarity_matrix, + "similarities": [ + {"face1": i, "face2": j, "score": similarity_matrix[i][j]} + for i in range(len(faces1)) + for j in range(len(faces2)) + ], + "best_matches": best_matches, + } diff --git a/app/routes/embed.py b/app/routes/embed.py index 62f2aec..06fe970 100644 --- a/app/routes/embed.py +++ b/app/routes/embed.py @@ -1,12 +1,13 @@ """Face embedding endpoints.""" import logging -from typing import List import numpy as np from fastapi import APIRouter, HTTPException from app.face import ( + FaceServiceError, + face_area, fallback_avatar_embedding, get_faces_async, load_face_app, @@ -14,20 +15,34 @@ from app.face import ( validate_embedding, ) from app.image import download_image -from app.resources import http_client, inference_executor from app.models import ( - BBox, EmbedAvatarResponse, EmbedImageResponse, EmbedRequest, FaceEmbedding, ) +from app.resources import http_client, inference_executor + +# Expected embedding dimension from buffalo_l model +EXPECTED_EMBEDDING_DIM = 512 logger = logging.getLogger("face_service") router = APIRouter() +def validate_face_embedding(emb: np.ndarray, context: str) -> tuple[bool, str | None]: + """ + Validate embedding dimension and values. + Returns (is_valid, error_message). + """ + if len(emb) != EXPECTED_EMBEDDING_DIM: + return False, f"{context}: unexpected embedding dimension {len(emb)}, expected {EXPECTED_EMBEDDING_DIM}" + if not validate_embedding(emb): + return False, f"{context}: embedding contains NaN/Inf values" + return True, None + + @router.post("/embed-avatar", response_model=EmbedAvatarResponse) async def embed_avatar(req: EmbedRequest): """ @@ -37,11 +52,14 @@ async def embed_avatar(req: EmbedRequest): falls back to center crop embedding with score=0.0. """ logger.info("embed_avatar: image_url=%s", req.image_url) - fa = load_face_app() img = await download_image(str(req.image_url), http_client, inference_executor) h, w = img.shape[:2] - faces = await get_faces_async(fa, img, inference_executor) + try: + faces = await get_faces_async(img, inference_executor) + except FaceServiceError as e: + logger.error("embed_avatar: face service error: %s", str(e)) + raise HTTPException(status_code=503, detail="Face service unavailable") if len(faces) == 0: logger.warning( "embed_avatar: no faces detected image_url=%s size=%dx%d, using fallback", @@ -49,6 +67,7 @@ async def embed_avatar(req: EmbedRequest): w, h, ) + fa = load_face_app() # Need face_app for recognition model fallback = fallback_avatar_embedding(fa, img, w, h) if fallback is None: raise HTTPException( @@ -63,25 +82,28 @@ async def embed_avatar(req: EmbedRequest): score, len(emb), ) - return EmbedAvatarResponse(embedding=emb, bbox=bbox, score=score) + return EmbedAvatarResponse( + embedding=emb, + bbox=bbox, + score=score, + processed_width=w, + processed_height=h, + ) # Sort by face area (largest first) - faces.sort( - key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]), - reverse=True, - ) + faces.sort(key=face_area, reverse=True) face = faces[0] emb = face.normed_embedding.astype(np.float32) - - # Validate embedding - if not validate_embedding(emb): - logger.error("embed_avatar: embedding contains NaN/Inf values") + + is_valid, error_msg = validate_face_embedding(emb, "embed_avatar") + if not is_valid: + logger.error(error_msg) raise HTTPException( status_code=422, detail="Failed to generate valid face embedding", ) - + emb_list = emb.tolist() bbox = to_pixel_bbox(face.bbox, w, h) score = float(getattr(face, "det_score", 1.0)) @@ -93,7 +115,13 @@ async def embed_avatar(req: EmbedRequest): len(emb_list), ) - return EmbedAvatarResponse(embedding=emb_list, bbox=bbox, score=score) + return EmbedAvatarResponse( + embedding=emb_list, + bbox=bbox, + score=score, + processed_width=w, + processed_height=h, + ) @router.post("/embed-image", response_model=EmbedImageResponse) @@ -104,11 +132,15 @@ async def embed_image(req: EmbedRequest): Returns all detected faces sorted by detection score (highest first). Returns empty list if no faces detected. """ - fa = load_face_app() img = await download_image(str(req.image_url), http_client, inference_executor) h, w = img.shape[:2] - faces = await get_faces_async(fa, img, inference_executor) + try: + faces = await get_faces_async(img, inference_executor) + except FaceServiceError as e: + logger.error("embed_image: face service error: %s", str(e)) + raise HTTPException(status_code=503, detail="Face service unavailable") + if len(faces) == 0: logger.warning( "embed_image: no faces detected image_url=%s size=%dx%d", @@ -116,7 +148,7 @@ async def embed_image(req: EmbedRequest): w, h, ) - return EmbedImageResponse(faces=[]) + return EmbedImageResponse(faces=[], processed_width=w, processed_height=h) logger.info( "embed_image: detected %d faces image_url=%s size=%dx%d", @@ -132,19 +164,19 @@ async def embed_image(req: EmbedRequest): reverse=True, ) - result: List[FaceEmbedding] = [] + result: list[FaceEmbedding] = [] for f in faces: emb = f.normed_embedding.astype(np.float32) - - # Skip faces with invalid embeddings - if not validate_embedding(emb): - logger.warning("embed_image: skipping face with NaN/Inf embedding") + + is_valid, error_msg = validate_face_embedding(emb, "embed_image") + if not is_valid: + logger.warning(error_msg) continue - + emb_list = emb.tolist() bbox = to_pixel_bbox(f.bbox, w, h) score = float(getattr(f, "det_score", 1.0)) result.append(FaceEmbedding(bbox=bbox, score=score, embedding=emb_list)) - return EmbedImageResponse(faces=result) + return EmbedImageResponse(faces=result, processed_width=w, processed_height=h)