From a7595603b4a536afe2676b3a1c8258e2e5f7589c Mon Sep 17 00:00:00 2001
From: Hung Luu <luuhung061296@gmail.com>
Date: Thu, 5 Feb 2026 10:49:50 +0700
Subject: [PATCH] Add adaptive det_size, benchmark UI, and code simplification

- Add adaptive det_size selection based on image characteristics
  (portraits use 640, landscapes use 1024)
- Add FaceServiceError for better error handling in face detection
- Add benchmark UI for comparing faces between two images
- Extract face_area() helper to eliminate duplicate sorting logic
- Modernize type hints (List -> list, Tuple -> tuple)
- Add processed_width/height to embed responses
- Add embedding dimension validation

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 app/config.py           |   2 +
 app/face.py             | 117 +++++++--
 app/main.py             |   3 +-
 app/models.py           |  12 +-
 app/routes/benchmark.py | 516 ++++++++++++++++++++++++++++++++++++++++
 app/routes/embed.py     |  84 +++++--
 6 files changed, 684 insertions(+), 50 deletions(-)
 create mode 100644 app/routes/benchmark.py

diff --git a/app/config.py b/app/config.py
index 3feecf9..29b6081 100644
--- a/app/config.py
+++ b/app/config.py
@@ -5,6 +5,8 @@ import os
 # Model configuration
 MODEL_NAME = os.getenv("FACE_MODEL_NAME", "buffalo_l")
 DET_SIZE = int(os.getenv("FACE_DET_SIZE", "1024"))
+# Fallback det_size for large faces (close-up selfies)
+FALLBACK_DET_SIZE = int(os.getenv("FACE_DET_SIZE_FALLBACK", "640"))
 USE_TENSORRT = os.getenv("USE_TENSORRT", "true").lower() in ("true", "1", "yes")
 
 # Image processing limits
diff --git a/app/face.py b/app/face.py
index 9163a8e..1ddb1ca 100644
--- a/app/face.py
+++ b/app/face.py
@@ -7,12 +7,14 @@ from concurrent.futures import ThreadPoolExecutor
 import numpy as np
 from insightface.app import FaceAnalysis
 
-from app.config import DET_SIZE, MODEL_NAME, USE_TENSORRT
+from app.config import DET_SIZE, FALLBACK_DET_SIZE, MODEL_NAME, USE_TENSORRT
 from app.models import BBox
 
 logger = logging.getLogger("face_service")
 
-face_app: FaceAnalysis | None = None
+# Two face apps for different image types (lazy loaded)
+face_app_large: FaceAnalysis | None = None   # det_size=1024 for group photos
+face_app_small: FaceAnalysis | None = None   # det_size=640 for selfies/portraits
 
 
 def _check_tensorrt_available() -> bool:
@@ -60,35 +62,106 @@ def _get_providers() -> list:
     return providers
 
 
-def load_face_app() -> FaceAnalysis:
-    """Load and initialize the FaceAnalysis model (singleton)."""
-    global face_app
-    if face_app is not None:
-        return face_app
+def load_face_app(det_size: int = DET_SIZE) -> FaceAnalysis:
+    """Load and initialize the FaceAnalysis model for given det_size (cached)."""
+    global face_app_large, face_app_small
+
+    # Return cached instance if available
+    if det_size >= 1024 and face_app_large is not None:
+        return face_app_large
+    if det_size < 1024 and face_app_small is not None:
+        return face_app_small
 
     providers = _get_providers()
     logger.info(
-        f"Loading InsightFace model pack={MODEL_NAME}, det_size={DET_SIZE}, "
+        f"Loading InsightFace model pack={MODEL_NAME}, det_size={det_size}, "
         f"tensorrt={USE_TENSORRT}"
     )
     fa = FaceAnalysis(
         name=MODEL_NAME,
         providers=providers,
     )
-    fa.prepare(ctx_id=0, det_size=(DET_SIZE, DET_SIZE))
-    face_app = fa
-    logger.info("FaceAnalysis initialized")
-    return face_app
+    fa.prepare(ctx_id=0, det_size=(det_size, det_size))
+
+    # Cache the instance
+    if det_size >= 1024:
+        face_app_large = fa
+    else:
+        face_app_small = fa
+
+    logger.info(f"FaceAnalysis initialized (det_size={det_size})")
+    return fa
+
+
+def get_optimal_det_size(img_height: int, img_width: int) -> int:
+    """
+    Choose optimal det_size based on image characteristics.
+
+    - Portrait/selfie (tall, narrow) → 640 (face likely fills frame)
+    - Landscape/group photo → 1024 (need to detect small faces)
+    - Small images → 640 (no benefit from larger det_size)
+    """
+    max_dim = max(img_height, img_width)
+    min_dim = min(img_height, img_width)
+    aspect_ratio = max_dim / min_dim if min_dim > 0 else 1.0
+
+    # Small images - 640 is sufficient
+    if max_dim <= 1024:
+        return FALLBACK_DET_SIZE  # 640
+
+    # Portrait orientation (height > width) with tall aspect ratio
+    # Likely a selfie or single-person portrait
+    if img_height > img_width and aspect_ratio >= 1.3:
+        return FALLBACK_DET_SIZE  # 640
+
+    # Landscape or square, larger image - likely group photo
+    return DET_SIZE  # 1024
+
+
+class FaceServiceError(Exception):
+    """Error during face detection/embedding."""
+    pass
 
 
 async def get_faces_async(
-    fa: FaceAnalysis,
     img: np.ndarray,
     executor: ThreadPoolExecutor,
 ) -> list:
-    """Run face detection/embedding in thread pool to not block event loop."""
+    """
+    Run face detection/embedding in thread pool to not block event loop.
+
+    Automatically selects optimal det_size based on image dimensions:
+    - Portrait/selfie → 640 (handles large faces)
+    - Landscape/group → 1024 (detects small faces)
+
+    Raises:
+        FaceServiceError: If face detection fails (GPU OOM, ONNX errors, etc.)
+    """
+    h, w = img.shape[:2]
+    det_size = get_optimal_det_size(h, w)
+    fa = load_face_app(det_size)
+
     loop = asyncio.get_running_loop()
-    return await loop.run_in_executor(executor, fa.get, img)
+    try:
+        faces = await loop.run_in_executor(executor, fa.get, img)
+    except Exception as e:
+        logger.error(
+            "get_faces_async: face detection failed det_size=%d image=%dx%d error=%s",
+            det_size, w, h, str(e)
+        )
+        raise FaceServiceError(f"Face detection failed: {str(e)}") from e
+
+    logger.debug(
+        "get_faces_async: det_size=%d, image=%dx%d, faces=%d",
+        det_size, w, h, len(faces)
+    )
+
+    return faces
+
+
+def face_area(face) -> float:
+    """Calculate face bounding box area for sorting."""
+    return (face.bbox[2] - face.bbox[0]) * (face.bbox[3] - face.bbox[1])
 
 
 def to_pixel_bbox(bbox, width: int, height: int) -> BBox:
@@ -113,11 +186,19 @@ def validate_embedding(embedding: np.ndarray) -> bool:
 
 
 def normalize_embedding(embedding: np.ndarray) -> np.ndarray:
-    """Normalize embedding vector to unit length."""
+    """Normalize embedding vector to unit length.
+
+    Returns None-equivalent behavior if embedding is near-zero vector,
+    which would cause NaN in similarity calculations.
+    """
     emb = embedding.astype(np.float32)
     norm = float(np.linalg.norm(emb))
-    if norm > 0.0:
-        emb = emb / norm
+    # Near-zero vectors can't be normalized meaningfully
+    # and would cause issues in similarity calculations
+    if norm < 0.01:
+        logger.warning("normalize_embedding: near-zero vector detected (norm=%.6f)", norm)
+        return emb  # Return as-is, validate_embedding will catch it
+    emb = emb / norm
     return emb
 
 
diff --git a/app/main.py b/app/main.py
index 824ac1e..a6adfb3 100644
--- a/app/main.py
+++ b/app/main.py
@@ -18,7 +18,7 @@ from app.config import (
     USE_TENSORRT,
 )
 from app.face import load_face_app
-from app.routes import embed
+from app.routes import benchmark, embed
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("face_service")
@@ -72,6 +72,7 @@ app = FastAPI(
 
 # Include routers
 app.include_router(embed.router)
+app.include_router(benchmark.router)
 
 
 @app.get("/healthz")
diff --git a/app/models.py b/app/models.py
index f4b5caa..b07fd32 100644
--- a/app/models.py
+++ b/app/models.py
@@ -1,7 +1,5 @@
 """Pydantic models for request/response schemas."""
 
-from typing import List
-
 from pydantic import BaseModel, HttpUrl
 
 
@@ -19,17 +17,21 @@ class BBox(BaseModel):
 class FaceEmbedding(BaseModel):
     bbox: BBox
     score: float
-    embedding: List[float]
+    embedding: list[float]
 
 
 class EmbedAvatarResponse(BaseModel):
-    embedding: List[float]
+    embedding: list[float]
     bbox: BBox
     score: float
+    processed_width: int | None = None
+    processed_height: int | None = None
 
 
 class EmbedImageResponse(BaseModel):
-    faces: List[FaceEmbedding]
+    faces: list[FaceEmbedding]
+    processed_width: int | None = None
+    processed_height: int | None = None
 
 
 class MatchResult(BaseModel):
diff --git a/app/routes/benchmark.py b/app/routes/benchmark.py
new file mode 100644
index 0000000..a3052b8
--- /dev/null
+++ b/app/routes/benchmark.py
@@ -0,0 +1,516 @@
+"""Benchmark UI for face comparison."""
+
+import asyncio
+import base64
+import logging
+from io import BytesIO
+
+import cv2
+import numpy as np
+from fastapi import APIRouter, File, UploadFile
+from fastapi.responses import HTMLResponse
+
+from app.face import face_area, get_faces_async, validate_embedding
+from app.image import read_upload_image
+from app.resources import inference_executor
+
+logger = logging.getLogger("face_service")
+
+router = APIRouter(prefix="/benchmark", tags=["benchmark"])
+
+
+def cosine_similarity(emb1: np.ndarray, emb2: np.ndarray) -> float:
+    """Compute cosine similarity between two embeddings."""
+    # Embeddings are already normalized, so dot product = cosine similarity
+    return float(np.dot(emb1, emb2))
+
+
+def draw_faces_on_image(img: np.ndarray, faces: list, face_indices: list[int]) -> np.ndarray:
+    """Draw bounding boxes and indices on image."""
+    img_copy = img.copy()
+    for idx, face in zip(face_indices, faces):
+        bbox = face.bbox.astype(int)
+        x1, y1, x2, y2 = bbox
+        # Draw rectangle
+        cv2.rectangle(img_copy, (x1, y1), (x2, y2), (0, 255, 0), 2)
+        # Draw index label
+        label = f"#{idx}"
+        (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)
+        cv2.rectangle(img_copy, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
+        cv2.putText(img_copy, label, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
+    return img_copy
+
+
+def encode_image_to_base64(img: np.ndarray, max_dim: int = 800) -> str:
+    """Encode image to base64 for display in HTML, resizing if needed."""
+    h, w = img.shape[:2]
+    if max(h, w) > max_dim:
+        scale = max_dim / max(h, w)
+        img = cv2.resize(img, (int(w * scale), int(h * scale)))
+
+    # Convert BGR to RGB for proper display
+    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    from PIL import Image
+    pil_img = Image.fromarray(img_rgb)
+    buffer = BytesIO()
+    pil_img.save(buffer, format="JPEG", quality=85)
+    return base64.b64encode(buffer.getvalue()).decode()
+
+
+def crop_face(img: np.ndarray, bbox: np.ndarray, padding: float = 0.2) -> np.ndarray:
+    """Crop face from image with padding."""
+    h, w = img.shape[:2]
+    x1, y1, x2, y2 = bbox.astype(int)
+
+    # Add padding
+    face_w = x2 - x1
+    face_h = y2 - y1
+    pad_x = int(face_w * padding)
+    pad_y = int(face_h * padding)
+
+    x1 = max(0, x1 - pad_x)
+    y1 = max(0, y1 - pad_y)
+    x2 = min(w, x2 + pad_x)
+    y2 = min(h, y2 + pad_y)
+
+    return img[y1:y2, x1:x2]
+
+
+@router.get("/", response_class=HTMLResponse)
+async def benchmark_ui():
+    """Serve the benchmark UI."""
+    return """
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Face Benchmark</title>
+    <style>
+        * { box-sizing: border-box; }
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+            max-width: 1400px;
+            margin: 0 auto;
+            padding: 20px;
+            background: #1a1a2e;
+            color: #eee;
+        }
+        h1 { text-align: center; color: #00d4ff; }
+        .upload-section {
+            display: flex;
+            gap: 20px;
+            margin-bottom: 20px;
+            flex-wrap: wrap;
+        }
+        .upload-box {
+            flex: 1;
+            min-width: 300px;
+            border: 2px dashed #444;
+            border-radius: 10px;
+            padding: 20px;
+            text-align: center;
+            background: #16213e;
+        }
+        .upload-box h3 { margin-top: 0; color: #00d4ff; }
+        .upload-box input[type="file"] {
+            display: block;
+            margin: 10px auto;
+        }
+        .preview {
+            max-width: 100%;
+            max-height: 300px;
+            margin-top: 10px;
+            border-radius: 5px;
+        }
+        button {
+            display: block;
+            width: 100%;
+            max-width: 300px;
+            margin: 20px auto;
+            padding: 15px 30px;
+            font-size: 18px;
+            background: #00d4ff;
+            color: #1a1a2e;
+            border: none;
+            border-radius: 5px;
+            cursor: pointer;
+            font-weight: bold;
+        }
+        button:hover { background: #00b8e6; }
+        button:disabled { background: #444; cursor: not-allowed; }
+        #results {
+            margin-top: 30px;
+        }
+        .results-grid {
+            display: flex;
+            gap: 20px;
+            flex-wrap: wrap;
+        }
+        .result-box {
+            flex: 1;
+            min-width: 300px;
+            background: #16213e;
+            border-radius: 10px;
+            padding: 20px;
+        }
+        .result-box h3 { margin-top: 0; color: #00d4ff; }
+        .result-box img {
+            max-width: 100%;
+            border-radius: 5px;
+        }
+        .similarity-table {
+            width: 100%;
+            margin-top: 20px;
+            background: #16213e;
+            border-radius: 10px;
+            padding: 20px;
+        }
+        .similarity-table h3 { margin-top: 0; color: #00d4ff; }
+        table {
+            width: 100%;
+            border-collapse: collapse;
+            margin-top: 10px;
+        }
+        th, td {
+            padding: 10px;
+            text-align: center;
+            border: 1px solid #333;
+        }
+        th { background: #0f3460; }
+        .match-high { background: #00c853; color: #000; font-weight: bold; }
+        .match-medium { background: #ffab00; color: #000; }
+        .match-low { background: #333; }
+        .loading {
+            text-align: center;
+            padding: 40px;
+            font-size: 18px;
+        }
+        .spinner {
+            border: 4px solid #333;
+            border-top: 4px solid #00d4ff;
+            border-radius: 50%;
+            width: 40px;
+            height: 40px;
+            animation: spin 1s linear infinite;
+            margin: 20px auto;
+        }
+        @keyframes spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        .error { color: #ff5252; text-align: center; padding: 20px; }
+        .stats { margin-top: 10px; font-size: 14px; color: #888; }
+        .best-match {
+            background: #16213e;
+            border-radius: 10px;
+            padding: 20px;
+            margin-top: 20px;
+        }
+        .best-match h3 { color: #00d4ff; margin-top: 0; }
+        .match-item {
+            display: flex;
+            align-items: center;
+            gap: 15px;
+            padding: 15px;
+            background: #0f3460;
+            border-radius: 8px;
+            margin-bottom: 15px;
+        }
+        .match-score {
+            font-size: 28px;
+            font-weight: bold;
+            min-width: 90px;
+            text-align: center;
+        }
+        .face-crop {
+            width: 100px;
+            height: 100px;
+            object-fit: cover;
+            border-radius: 8px;
+            border: 2px solid #00d4ff;
+        }
+        .match-label {
+            font-size: 14px;
+            color: #888;
+            margin-left: auto;
+        }
+    </style>
+</head>
+<body>
+    <h1>Face Comparison Benchmark</h1>
+
+    <form id="uploadForm">
+        <div class="upload-section">
+            <div class="upload-box">
+                <h3>Image 1 (Source)</h3>
+                <p>Upload an image to find faces from</p>
+                <input type="file" id="image1" name="image1" accept="image/*" required>
+                <img id="preview1" class="preview" style="display:none;">
+            </div>
+            <div class="upload-box">
+                <h3>Image 2 (Target)</h3>
+                <p>Upload an image to search for matching faces</p>
+                <input type="file" id="image2" name="image2" accept="image/*" required>
+                <img id="preview2" class="preview" style="display:none;">
+            </div>
+        </div>
+        <button type="submit" id="compareBtn">Compare Faces</button>
+    </form>
+
+    <div id="results"></div>
+
+    <script>
+        // Preview images on selection
+        document.getElementById('image1').addEventListener('change', function(e) {
+            previewImage(e.target, 'preview1');
+        });
+        document.getElementById('image2').addEventListener('change', function(e) {
+            previewImage(e.target, 'preview2');
+        });
+
+        function previewImage(input, previewId) {
+            const preview = document.getElementById(previewId);
+            if (input.files && input.files[0]) {
+                const reader = new FileReader();
+                reader.onload = function(e) {
+                    preview.src = e.target.result;
+                    preview.style.display = 'block';
+                };
+                reader.readAsDataURL(input.files[0]);
+            }
+        }
+
+        document.getElementById('uploadForm').addEventListener('submit', async function(e) {
+            e.preventDefault();
+
+            const image1 = document.getElementById('image1').files[0];
+            const image2 = document.getElementById('image2').files[0];
+
+            if (!image1 || !image2) {
+                alert('Please select both images');
+                return;
+            }
+
+            const resultsDiv = document.getElementById('results');
+            const btn = document.getElementById('compareBtn');
+
+            btn.disabled = true;
+            btn.textContent = 'Processing...';
+            resultsDiv.innerHTML = '<div class="loading"><div class="spinner"></div>Detecting faces and computing embeddings...</div>';
+
+            const formData = new FormData();
+            formData.append('image1', image1);
+            formData.append('image2', image2);
+
+            try {
+                const startTime = performance.now();
+                const response = await fetch('/benchmark/compare', {
+                    method: 'POST',
+                    body: formData
+                });
+                const endTime = performance.now();
+
+                if (!response.ok) {
+                    const error = await response.json();
+                    throw new Error(error.detail || 'Comparison failed');
+                }
+
+                const data = await response.json();
+                displayResults(data, endTime - startTime);
+            } catch (error) {
+                resultsDiv.innerHTML = `<div class="error">Error: ${error.message}</div>`;
+            } finally {
+                btn.disabled = false;
+                btn.textContent = 'Compare Faces';
+            }
+        });
+
+        function displayResults(data, processingTime) {
+            const resultsDiv = document.getElementById('results');
+
+            let html = `
+                <div class="stats">Processing time: ${(processingTime/1000).toFixed(2)}s |
+                Image 1: ${data.image1_faces} face(s) | Image 2: ${data.image2_faces} face(s)</div>
+
+                <div class="results-grid">
+                    <div class="result-box">
+                        <h3>Image 1 - ${data.image1_faces} face(s) detected</h3>
+                        <img src="data:image/jpeg;base64,${data.image1_annotated}" alt="Image 1">
+                    </div>
+                    <div class="result-box">
+                        <h3>Image 2 - ${data.image2_faces} face(s) detected</h3>
+                        <img src="data:image/jpeg;base64,${data.image2_annotated}" alt="Image 2">
+                    </div>
+                </div>
+            `;
+
+            if (data.similarities.length > 0) {
+                html += `
+                    <div class="best-match">
+                        <h3>Best Matches (Image 1 faces found in Image 2)</h3>
+                `;
+
+                for (const match of data.best_matches) {
+                    const scorePercent = (match.similarity * 100).toFixed(1);
+                    const scoreClass = match.similarity >= 0.5 ? 'match-high' :
+                                       match.similarity >= 0.3 ? 'match-medium' : 'match-low';
+                    html += `
+                        <div class="match-item">
+                            <img src="data:image/jpeg;base64,${match.face1_crop}" class="face-crop" alt="Face ${match.face1_idx}">
+                            <span class="match-score ${scoreClass}">${scorePercent}%</span>
+                            <img src="data:image/jpeg;base64,${match.face2_crop}" class="face-crop" alt="Face ${match.face2_idx}">
+                            <span class="match-label">Face #${match.face1_idx} ↔ Face #${match.face2_idx}</span>
+                        </div>
+                    `;
+                }
+
+                html += '</div>';
+
+                // Similarity matrix
+                html += `
+                    <div class="similarity-table">
+                        <h3>Full Similarity Matrix</h3>
+                        <table>
+                            <tr>
+                                <th>Image 1 \\ Image 2</th>
+                `;
+
+                for (let j = 0; j < data.image2_faces; j++) {
+                    html += `<th>Face #${j}</th>`;
+                }
+                html += '</tr>';
+
+                for (let i = 0; i < data.image1_faces; i++) {
+                    html += `<tr><th>Face #${i}</th>`;
+                    for (let j = 0; j < data.image2_faces; j++) {
+                        const sim = data.similarity_matrix[i][j];
+                        const percent = (sim * 100).toFixed(1);
+                        const cls = sim >= 0.5 ? 'match-high' : sim >= 0.3 ? 'match-medium' : 'match-low';
+                        html += `<td class="${cls}">${percent}%</td>`;
+                    }
+                    html += '</tr>';
+                }
+
+                html += '</table></div>';
+            } else if (data.image1_faces === 0 || data.image2_faces === 0) {
+                html += '<div class="error">No faces detected in one or both images</div>';
+            }
+
+            resultsDiv.innerHTML = html;
+        }
+    </script>
+</body>
+</html>
+"""
+
+
+@router.post("/compare")
+async def compare_faces(
+    image1: UploadFile = File(...),
+    image2: UploadFile = File(...),
+):
+    """
+    Compare faces between two uploaded images.
+
+    Returns:
+        - Annotated images with face bounding boxes
+        - Similarity matrix between all detected faces
+        - Best matches for each face in image1
+    """
+    logger.info("benchmark/compare: image1=%s image2=%s", image1.filename, image2.filename)
+
+    # Read both images
+    data1 = await image1.read()
+    data2 = await image2.read()
+
+    img1 = read_upload_image(data1, image1.filename or "image1")
+    img2 = read_upload_image(data2, image2.filename or "image2")
+
+    # Detect faces in both images concurrently
+    faces1, faces2 = await asyncio.gather(
+        get_faces_async(img1, inference_executor),
+        get_faces_async(img2, inference_executor),
+    )
+
+    logger.info(
+        "benchmark/compare: detected %d faces in image1, %d faces in image2",
+        len(faces1), len(faces2)
+    )
+
+    # Sort faces by area (largest first)
+    faces1.sort(key=face_area, reverse=True)
+    faces2.sort(key=face_area, reverse=True)
+
+    # Draw faces on images
+    face1_indices = list(range(len(faces1)))
+    face2_indices = list(range(len(faces2)))
+
+    img1_annotated = draw_faces_on_image(img1, faces1, face1_indices)
+    img2_annotated = draw_faces_on_image(img2, faces2, face2_indices)
+
+    # Encode images for response
+    img1_b64 = encode_image_to_base64(img1_annotated)
+    img2_b64 = encode_image_to_base64(img2_annotated)
+
+    # Compute similarity matrix
+    similarity_matrix: list[list[float]] = []
+    best_matches: list[dict] = []
+
+    for i, f1 in enumerate(faces1):
+        emb1 = f1.normed_embedding.astype(np.float32)
+        if not validate_embedding(emb1):
+            similarity_matrix.append([0.0] * len(faces2))
+            continue
+
+        row = []
+        best_sim = -1.0
+        best_j = -1
+
+        for j, f2 in enumerate(faces2):
+            emb2 = f2.normed_embedding.astype(np.float32)
+            if not validate_embedding(emb2):
+                row.append(0.0)
+                continue
+
+            sim = cosine_similarity(emb1, emb2)
+            row.append(sim)
+
+            if sim > best_sim:
+                best_sim = sim
+                best_j = j
+
+        similarity_matrix.append(row)
+
+        if best_j >= 0:
+            best_matches.append({
+                "face1_idx": i,
+                "face2_idx": best_j,
+                "similarity": best_sim,
+            })
+
+    # Sort best matches by similarity and keep top 3
+    best_matches.sort(key=lambda m: m["similarity"], reverse=True)
+    best_matches = best_matches[:3]
+
+    # Add cropped face images for top 3 matches
+    for match in best_matches:
+        i, j = match["face1_idx"], match["face2_idx"]
+        crop1 = crop_face(img1, faces1[i].bbox)
+        crop2 = crop_face(img2, faces2[j].bbox)
+        match["face1_crop"] = encode_image_to_base64(crop1, max_dim=150)
+        match["face2_crop"] = encode_image_to_base64(crop2, max_dim=150)
+
+    return {
+        "image1_faces": len(faces1),
+        "image2_faces": len(faces2),
+        "image1_annotated": img1_b64,
+        "image2_annotated": img2_b64,
+        "similarity_matrix": similarity_matrix,
+        "similarities": [
+            {"face1": i, "face2": j, "score": similarity_matrix[i][j]}
+            for i in range(len(faces1))
+            for j in range(len(faces2))
+        ],
+        "best_matches": best_matches,
+    }
diff --git a/app/routes/embed.py b/app/routes/embed.py
index 62f2aec..06fe970 100644
--- a/app/routes/embed.py
+++ b/app/routes/embed.py
@@ -1,12 +1,13 @@
 """Face embedding endpoints."""
 
 import logging
-from typing import List
 
 import numpy as np
 from fastapi import APIRouter, HTTPException
 
 from app.face import (
+    FaceServiceError,
+    face_area,
     fallback_avatar_embedding,
     get_faces_async,
     load_face_app,
@@ -14,20 +15,34 @@ from app.face import (
     validate_embedding,
 )
 from app.image import download_image
-from app.resources import http_client, inference_executor
 from app.models import (
-    BBox,
     EmbedAvatarResponse,
     EmbedImageResponse,
     EmbedRequest,
     FaceEmbedding,
 )
+from app.resources import http_client, inference_executor
+
+# Expected embedding dimension from buffalo_l model
+EXPECTED_EMBEDDING_DIM = 512
 
 logger = logging.getLogger("face_service")
 
 router = APIRouter()
 
 
+def validate_face_embedding(emb: np.ndarray, context: str) -> tuple[bool, str | None]:
+    """
+    Validate embedding dimension and values.
+    Returns (is_valid, error_message).
+    """
+    if len(emb) != EXPECTED_EMBEDDING_DIM:
+        return False, f"{context}: unexpected embedding dimension {len(emb)}, expected {EXPECTED_EMBEDDING_DIM}"
+    if not validate_embedding(emb):
+        return False, f"{context}: embedding contains NaN/Inf values"
+    return True, None
+
+
 @router.post("/embed-avatar", response_model=EmbedAvatarResponse)
 async def embed_avatar(req: EmbedRequest):
     """
@@ -37,11 +52,14 @@ async def embed_avatar(req: EmbedRequest):
     falls back to center crop embedding with score=0.0.
     """
     logger.info("embed_avatar: image_url=%s", req.image_url)
-    fa = load_face_app()
     img = await download_image(str(req.image_url), http_client, inference_executor)
     h, w = img.shape[:2]
 
-    faces = await get_faces_async(fa, img, inference_executor)
+    try:
+        faces = await get_faces_async(img, inference_executor)
+    except FaceServiceError as e:
+        logger.error("embed_avatar: face service error: %s", str(e))
+        raise HTTPException(status_code=503, detail="Face service unavailable")
     if len(faces) == 0:
         logger.warning(
             "embed_avatar: no faces detected image_url=%s size=%dx%d, using fallback",
@@ -49,6 +67,7 @@ async def embed_avatar(req: EmbedRequest):
             w,
             h,
         )
+        fa = load_face_app()  # Need face_app for recognition model
         fallback = fallback_avatar_embedding(fa, img, w, h)
         if fallback is None:
             raise HTTPException(
@@ -63,25 +82,28 @@ async def embed_avatar(req: EmbedRequest):
             score,
             len(emb),
         )
-        return EmbedAvatarResponse(embedding=emb, bbox=bbox, score=score)
+        return EmbedAvatarResponse(
+            embedding=emb,
+            bbox=bbox,
+            score=score,
+            processed_width=w,
+            processed_height=h,
+        )
 
     # Sort by face area (largest first)
-    faces.sort(
-        key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
-        reverse=True,
-    )
+    faces.sort(key=face_area, reverse=True)
     face = faces[0]
 
     emb = face.normed_embedding.astype(np.float32)
-    
-    # Validate embedding
-    if not validate_embedding(emb):
-        logger.error("embed_avatar: embedding contains NaN/Inf values")
+
+    is_valid, error_msg = validate_face_embedding(emb, "embed_avatar")
+    if not is_valid:
+        logger.error(error_msg)
         raise HTTPException(
             status_code=422,
             detail="Failed to generate valid face embedding",
         )
-    
+
     emb_list = emb.tolist()
     bbox = to_pixel_bbox(face.bbox, w, h)
     score = float(getattr(face, "det_score", 1.0))
@@ -93,7 +115,13 @@ async def embed_avatar(req: EmbedRequest):
         len(emb_list),
     )
 
-    return EmbedAvatarResponse(embedding=emb_list, bbox=bbox, score=score)
+    return EmbedAvatarResponse(
+        embedding=emb_list,
+        bbox=bbox,
+        score=score,
+        processed_width=w,
+        processed_height=h,
+    )
 
 
 @router.post("/embed-image", response_model=EmbedImageResponse)
@@ -104,11 +132,15 @@ async def embed_image(req: EmbedRequest):
     Returns all detected faces sorted by detection score (highest first).
     Returns empty list if no faces detected.
     """
-    fa = load_face_app()
     img = await download_image(str(req.image_url), http_client, inference_executor)
     h, w = img.shape[:2]
 
-    faces = await get_faces_async(fa, img, inference_executor)
+    try:
+        faces = await get_faces_async(img, inference_executor)
+    except FaceServiceError as e:
+        logger.error("embed_image: face service error: %s", str(e))
+        raise HTTPException(status_code=503, detail="Face service unavailable")
+
     if len(faces) == 0:
         logger.warning(
             "embed_image: no faces detected image_url=%s size=%dx%d",
@@ -116,7 +148,7 @@ async def embed_image(req: EmbedRequest):
             w,
             h,
         )
-        return EmbedImageResponse(faces=[])
+        return EmbedImageResponse(faces=[], processed_width=w, processed_height=h)
 
     logger.info(
         "embed_image: detected %d faces image_url=%s size=%dx%d",
@@ -132,19 +164,19 @@ async def embed_image(req: EmbedRequest):
         reverse=True,
     )
 
-    result: List[FaceEmbedding] = []
+    result: list[FaceEmbedding] = []
     for f in faces:
         emb = f.normed_embedding.astype(np.float32)
-        
-        # Skip faces with invalid embeddings
-        if not validate_embedding(emb):
-            logger.warning("embed_image: skipping face with NaN/Inf embedding")
+
+        is_valid, error_msg = validate_face_embedding(emb, "embed_image")
+        if not is_valid:
+            logger.warning(error_msg)
             continue
-        
+
         emb_list = emb.tolist()
         bbox = to_pixel_bbox(f.bbox, w, h)
         score = float(getattr(f, "det_score", 1.0))
         result.append(FaceEmbedding(bbox=bbox, score=score, embedding=emb_list))
 
-    return EmbedImageResponse(faces=result)
+    return EmbedImageResponse(faces=result, processed_width=w, processed_height=h)