Add adaptive det_size, benchmark UI, and code simplification

- Add adaptive det_size selection based on image characteristics
  (portraits use 640, landscapes use 1024)
- Add FaceServiceError for better error handling in face detection
- Add benchmark UI for comparing faces between two images
- Extract face_area() helper to eliminate duplicate sorting logic
- Modernize type hints (List -> list, Tuple -> tuple)
- Add processed_width/height to embed responses
- Add embedding dimension validation

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Hung Luu 2026-02-05 10:49:50 +07:00
parent b8f167d336
commit a7595603b4
6 changed files with 684 additions and 50 deletions

View File

@ -5,6 +5,8 @@ import os
# Model configuration
MODEL_NAME = os.getenv("FACE_MODEL_NAME", "buffalo_l")
DET_SIZE = int(os.getenv("FACE_DET_SIZE", "1024"))
# Fallback det_size for large faces (close-up selfies)
FALLBACK_DET_SIZE = int(os.getenv("FACE_DET_SIZE_FALLBACK", "640"))
USE_TENSORRT = os.getenv("USE_TENSORRT", "true").lower() in ("true", "1", "yes")
# Image processing limits

View File

@ -7,12 +7,14 @@ from concurrent.futures import ThreadPoolExecutor
import numpy as np
from insightface.app import FaceAnalysis
from app.config import DET_SIZE, MODEL_NAME, USE_TENSORRT
from app.config import DET_SIZE, FALLBACK_DET_SIZE, MODEL_NAME, USE_TENSORRT
from app.models import BBox
logger = logging.getLogger("face_service")
face_app: FaceAnalysis | None = None
# Two face apps for different image types (lazy loaded)
face_app_large: FaceAnalysis | None = None # det_size=1024 for group photos
face_app_small: FaceAnalysis | None = None # det_size=640 for selfies/portraits
def _check_tensorrt_available() -> bool:
@ -60,35 +62,106 @@ def _get_providers() -> list:
return providers
def load_face_app() -> FaceAnalysis:
"""Load and initialize the FaceAnalysis model (singleton)."""
global face_app
if face_app is not None:
return face_app
def load_face_app(det_size: int = DET_SIZE) -> FaceAnalysis:
"""Load and initialize the FaceAnalysis model for given det_size (cached)."""
global face_app_large, face_app_small
# Return cached instance if available
if det_size >= 1024 and face_app_large is not None:
return face_app_large
if det_size < 1024 and face_app_small is not None:
return face_app_small
providers = _get_providers()
logger.info(
f"Loading InsightFace model pack={MODEL_NAME}, det_size={DET_SIZE}, "
f"Loading InsightFace model pack={MODEL_NAME}, det_size={det_size}, "
f"tensorrt={USE_TENSORRT}"
)
fa = FaceAnalysis(
name=MODEL_NAME,
providers=providers,
)
fa.prepare(ctx_id=0, det_size=(DET_SIZE, DET_SIZE))
face_app = fa
logger.info("FaceAnalysis initialized")
return face_app
fa.prepare(ctx_id=0, det_size=(det_size, det_size))
# Cache the instance
if det_size >= 1024:
face_app_large = fa
else:
face_app_small = fa
logger.info(f"FaceAnalysis initialized (det_size={det_size})")
return fa
def get_optimal_det_size(img_height: int, img_width: int) -> int:
"""
Choose optimal det_size based on image characteristics.
- Portrait/selfie (tall, narrow) 640 (face likely fills frame)
- Landscape/group photo 1024 (need to detect small faces)
- Small images 640 (no benefit from larger det_size)
"""
max_dim = max(img_height, img_width)
min_dim = min(img_height, img_width)
aspect_ratio = max_dim / min_dim if min_dim > 0 else 1.0
# Small images - 640 is sufficient
if max_dim <= 1024:
return FALLBACK_DET_SIZE # 640
# Portrait orientation (height > width) with tall aspect ratio
# Likely a selfie or single-person portrait
if img_height > img_width and aspect_ratio >= 1.3:
return FALLBACK_DET_SIZE # 640
# Landscape or square, larger image - likely group photo
return DET_SIZE # 1024
class FaceServiceError(Exception):
"""Error during face detection/embedding."""
pass
async def get_faces_async(
fa: FaceAnalysis,
img: np.ndarray,
executor: ThreadPoolExecutor,
) -> list:
"""Run face detection/embedding in thread pool to not block event loop."""
"""
Run face detection/embedding in thread pool to not block event loop.
Automatically selects optimal det_size based on image dimensions:
- Portrait/selfie 640 (handles large faces)
- Landscape/group 1024 (detects small faces)
Raises:
FaceServiceError: If face detection fails (GPU OOM, ONNX errors, etc.)
"""
h, w = img.shape[:2]
det_size = get_optimal_det_size(h, w)
fa = load_face_app(det_size)
loop = asyncio.get_running_loop()
return await loop.run_in_executor(executor, fa.get, img)
try:
faces = await loop.run_in_executor(executor, fa.get, img)
except Exception as e:
logger.error(
"get_faces_async: face detection failed det_size=%d image=%dx%d error=%s",
det_size, w, h, str(e)
)
raise FaceServiceError(f"Face detection failed: {str(e)}") from e
logger.debug(
"get_faces_async: det_size=%d, image=%dx%d, faces=%d",
det_size, w, h, len(faces)
)
return faces
def face_area(face) -> float:
"""Calculate face bounding box area for sorting."""
return (face.bbox[2] - face.bbox[0]) * (face.bbox[3] - face.bbox[1])
def to_pixel_bbox(bbox, width: int, height: int) -> BBox:
@ -113,11 +186,19 @@ def validate_embedding(embedding: np.ndarray) -> bool:
def normalize_embedding(embedding: np.ndarray) -> np.ndarray:
"""Normalize embedding vector to unit length."""
"""Normalize embedding vector to unit length.
Returns None-equivalent behavior if embedding is near-zero vector,
which would cause NaN in similarity calculations.
"""
emb = embedding.astype(np.float32)
norm = float(np.linalg.norm(emb))
if norm > 0.0:
emb = emb / norm
# Near-zero vectors can't be normalized meaningfully
# and would cause issues in similarity calculations
if norm < 0.01:
logger.warning("normalize_embedding: near-zero vector detected (norm=%.6f)", norm)
return emb # Return as-is, validate_embedding will catch it
emb = emb / norm
return emb

View File

@ -18,7 +18,7 @@ from app.config import (
USE_TENSORRT,
)
from app.face import load_face_app
from app.routes import embed
from app.routes import benchmark, embed
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("face_service")
@ -72,6 +72,7 @@ app = FastAPI(
# Include routers
app.include_router(embed.router)
app.include_router(benchmark.router)
@app.get("/healthz")

View File

@ -1,7 +1,5 @@
"""Pydantic models for request/response schemas."""
from typing import List
from pydantic import BaseModel, HttpUrl
@ -19,17 +17,21 @@ class BBox(BaseModel):
class FaceEmbedding(BaseModel):
bbox: BBox
score: float
embedding: List[float]
embedding: list[float]
class EmbedAvatarResponse(BaseModel):
embedding: List[float]
embedding: list[float]
bbox: BBox
score: float
processed_width: int | None = None
processed_height: int | None = None
class EmbedImageResponse(BaseModel):
faces: List[FaceEmbedding]
faces: list[FaceEmbedding]
processed_width: int | None = None
processed_height: int | None = None
class MatchResult(BaseModel):

516
app/routes/benchmark.py Normal file
View File

@ -0,0 +1,516 @@
"""Benchmark UI for face comparison."""
import asyncio
import base64
import logging
from io import BytesIO
import cv2
import numpy as np
from fastapi import APIRouter, File, UploadFile
from fastapi.responses import HTMLResponse
from app.face import face_area, get_faces_async, validate_embedding
from app.image import read_upload_image
from app.resources import inference_executor
logger = logging.getLogger("face_service")
router = APIRouter(prefix="/benchmark", tags=["benchmark"])
def cosine_similarity(emb1: np.ndarray, emb2: np.ndarray) -> float:
"""Compute cosine similarity between two embeddings."""
# Embeddings are already normalized, so dot product = cosine similarity
return float(np.dot(emb1, emb2))
def draw_faces_on_image(img: np.ndarray, faces: list, face_indices: list[int]) -> np.ndarray:
"""Draw bounding boxes and indices on image."""
img_copy = img.copy()
for idx, face in zip(face_indices, faces):
bbox = face.bbox.astype(int)
x1, y1, x2, y2 = bbox
# Draw rectangle
cv2.rectangle(img_copy, (x1, y1), (x2, y2), (0, 255, 0), 2)
# Draw index label
label = f"#{idx}"
(tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)
cv2.rectangle(img_copy, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
cv2.putText(img_copy, label, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
return img_copy
def encode_image_to_base64(img: np.ndarray, max_dim: int = 800) -> str:
"""Encode image to base64 for display in HTML, resizing if needed."""
h, w = img.shape[:2]
if max(h, w) > max_dim:
scale = max_dim / max(h, w)
img = cv2.resize(img, (int(w * scale), int(h * scale)))
# Convert BGR to RGB for proper display
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
from PIL import Image
pil_img = Image.fromarray(img_rgb)
buffer = BytesIO()
pil_img.save(buffer, format="JPEG", quality=85)
return base64.b64encode(buffer.getvalue()).decode()
def crop_face(img: np.ndarray, bbox: np.ndarray, padding: float = 0.2) -> np.ndarray:
"""Crop face from image with padding."""
h, w = img.shape[:2]
x1, y1, x2, y2 = bbox.astype(int)
# Add padding
face_w = x2 - x1
face_h = y2 - y1
pad_x = int(face_w * padding)
pad_y = int(face_h * padding)
x1 = max(0, x1 - pad_x)
y1 = max(0, y1 - pad_y)
x2 = min(w, x2 + pad_x)
y2 = min(h, y2 + pad_y)
return img[y1:y2, x1:x2]
@router.get("/", response_class=HTMLResponse)
async def benchmark_ui():
"""Serve the benchmark UI."""
return """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Face Benchmark</title>
<style>
* { box-sizing: border-box; }
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
max-width: 1400px;
margin: 0 auto;
padding: 20px;
background: #1a1a2e;
color: #eee;
}
h1 { text-align: center; color: #00d4ff; }
.upload-section {
display: flex;
gap: 20px;
margin-bottom: 20px;
flex-wrap: wrap;
}
.upload-box {
flex: 1;
min-width: 300px;
border: 2px dashed #444;
border-radius: 10px;
padding: 20px;
text-align: center;
background: #16213e;
}
.upload-box h3 { margin-top: 0; color: #00d4ff; }
.upload-box input[type="file"] {
display: block;
margin: 10px auto;
}
.preview {
max-width: 100%;
max-height: 300px;
margin-top: 10px;
border-radius: 5px;
}
button {
display: block;
width: 100%;
max-width: 300px;
margin: 20px auto;
padding: 15px 30px;
font-size: 18px;
background: #00d4ff;
color: #1a1a2e;
border: none;
border-radius: 5px;
cursor: pointer;
font-weight: bold;
}
button:hover { background: #00b8e6; }
button:disabled { background: #444; cursor: not-allowed; }
#results {
margin-top: 30px;
}
.results-grid {
display: flex;
gap: 20px;
flex-wrap: wrap;
}
.result-box {
flex: 1;
min-width: 300px;
background: #16213e;
border-radius: 10px;
padding: 20px;
}
.result-box h3 { margin-top: 0; color: #00d4ff; }
.result-box img {
max-width: 100%;
border-radius: 5px;
}
.similarity-table {
width: 100%;
margin-top: 20px;
background: #16213e;
border-radius: 10px;
padding: 20px;
}
.similarity-table h3 { margin-top: 0; color: #00d4ff; }
table {
width: 100%;
border-collapse: collapse;
margin-top: 10px;
}
th, td {
padding: 10px;
text-align: center;
border: 1px solid #333;
}
th { background: #0f3460; }
.match-high { background: #00c853; color: #000; font-weight: bold; }
.match-medium { background: #ffab00; color: #000; }
.match-low { background: #333; }
.loading {
text-align: center;
padding: 40px;
font-size: 18px;
}
.spinner {
border: 4px solid #333;
border-top: 4px solid #00d4ff;
border-radius: 50%;
width: 40px;
height: 40px;
animation: spin 1s linear infinite;
margin: 20px auto;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
.error { color: #ff5252; text-align: center; padding: 20px; }
.stats { margin-top: 10px; font-size: 14px; color: #888; }
.best-match {
background: #16213e;
border-radius: 10px;
padding: 20px;
margin-top: 20px;
}
.best-match h3 { color: #00d4ff; margin-top: 0; }
.match-item {
display: flex;
align-items: center;
gap: 15px;
padding: 15px;
background: #0f3460;
border-radius: 8px;
margin-bottom: 15px;
}
.match-score {
font-size: 28px;
font-weight: bold;
min-width: 90px;
text-align: center;
}
.face-crop {
width: 100px;
height: 100px;
object-fit: cover;
border-radius: 8px;
border: 2px solid #00d4ff;
}
.match-label {
font-size: 14px;
color: #888;
margin-left: auto;
}
</style>
</head>
<body>
<h1>Face Comparison Benchmark</h1>
<form id="uploadForm">
<div class="upload-section">
<div class="upload-box">
<h3>Image 1 (Source)</h3>
<p>Upload an image to find faces from</p>
<input type="file" id="image1" name="image1" accept="image/*" required>
<img id="preview1" class="preview" style="display:none;">
</div>
<div class="upload-box">
<h3>Image 2 (Target)</h3>
<p>Upload an image to search for matching faces</p>
<input type="file" id="image2" name="image2" accept="image/*" required>
<img id="preview2" class="preview" style="display:none;">
</div>
</div>
<button type="submit" id="compareBtn">Compare Faces</button>
</form>
<div id="results"></div>
<script>
// Preview images on selection
document.getElementById('image1').addEventListener('change', function(e) {
previewImage(e.target, 'preview1');
});
document.getElementById('image2').addEventListener('change', function(e) {
previewImage(e.target, 'preview2');
});
function previewImage(input, previewId) {
const preview = document.getElementById(previewId);
if (input.files && input.files[0]) {
const reader = new FileReader();
reader.onload = function(e) {
preview.src = e.target.result;
preview.style.display = 'block';
};
reader.readAsDataURL(input.files[0]);
}
}
document.getElementById('uploadForm').addEventListener('submit', async function(e) {
e.preventDefault();
const image1 = document.getElementById('image1').files[0];
const image2 = document.getElementById('image2').files[0];
if (!image1 || !image2) {
alert('Please select both images');
return;
}
const resultsDiv = document.getElementById('results');
const btn = document.getElementById('compareBtn');
btn.disabled = true;
btn.textContent = 'Processing...';
resultsDiv.innerHTML = '<div class="loading"><div class="spinner"></div>Detecting faces and computing embeddings...</div>';
const formData = new FormData();
formData.append('image1', image1);
formData.append('image2', image2);
try {
const startTime = performance.now();
const response = await fetch('/benchmark/compare', {
method: 'POST',
body: formData
});
const endTime = performance.now();
if (!response.ok) {
const error = await response.json();
throw new Error(error.detail || 'Comparison failed');
}
const data = await response.json();
displayResults(data, endTime - startTime);
} catch (error) {
resultsDiv.innerHTML = `<div class="error">Error: ${error.message}</div>`;
} finally {
btn.disabled = false;
btn.textContent = 'Compare Faces';
}
});
function displayResults(data, processingTime) {
const resultsDiv = document.getElementById('results');
let html = `
<div class="stats">Processing time: ${(processingTime/1000).toFixed(2)}s |
Image 1: ${data.image1_faces} face(s) | Image 2: ${data.image2_faces} face(s)</div>
<div class="results-grid">
<div class="result-box">
<h3>Image 1 - ${data.image1_faces} face(s) detected</h3>
<img src="data:image/jpeg;base64,${data.image1_annotated}" alt="Image 1">
</div>
<div class="result-box">
<h3>Image 2 - ${data.image2_faces} face(s) detected</h3>
<img src="data:image/jpeg;base64,${data.image2_annotated}" alt="Image 2">
</div>
</div>
`;
if (data.similarities.length > 0) {
html += `
<div class="best-match">
<h3>Best Matches (Image 1 faces found in Image 2)</h3>
`;
for (const match of data.best_matches) {
const scorePercent = (match.similarity * 100).toFixed(1);
const scoreClass = match.similarity >= 0.5 ? 'match-high' :
match.similarity >= 0.3 ? 'match-medium' : 'match-low';
html += `
<div class="match-item">
<img src="data:image/jpeg;base64,${match.face1_crop}" class="face-crop" alt="Face ${match.face1_idx}">
<span class="match-score ${scoreClass}">${scorePercent}%</span>
<img src="data:image/jpeg;base64,${match.face2_crop}" class="face-crop" alt="Face ${match.face2_idx}">
<span class="match-label">Face #${match.face1_idx} ↔ Face #${match.face2_idx}</span>
</div>
`;
}
html += '</div>';
// Similarity matrix
html += `
<div class="similarity-table">
<h3>Full Similarity Matrix</h3>
<table>
<tr>
<th>Image 1 \\ Image 2</th>
`;
for (let j = 0; j < data.image2_faces; j++) {
html += `<th>Face #${j}</th>`;
}
html += '</tr>';
for (let i = 0; i < data.image1_faces; i++) {
html += `<tr><th>Face #${i}</th>`;
for (let j = 0; j < data.image2_faces; j++) {
const sim = data.similarity_matrix[i][j];
const percent = (sim * 100).toFixed(1);
const cls = sim >= 0.5 ? 'match-high' : sim >= 0.3 ? 'match-medium' : 'match-low';
html += `<td class="${cls}">${percent}%</td>`;
}
html += '</tr>';
}
html += '</table></div>';
} else if (data.image1_faces === 0 || data.image2_faces === 0) {
html += '<div class="error">No faces detected in one or both images</div>';
}
resultsDiv.innerHTML = html;
}
</script>
</body>
</html>
"""
@router.post("/compare")
async def compare_faces(
image1: UploadFile = File(...),
image2: UploadFile = File(...),
):
"""
Compare faces between two uploaded images.
Returns:
- Annotated images with face bounding boxes
- Similarity matrix between all detected faces
- Best matches for each face in image1
"""
logger.info("benchmark/compare: image1=%s image2=%s", image1.filename, image2.filename)
# Read both images
data1 = await image1.read()
data2 = await image2.read()
img1 = read_upload_image(data1, image1.filename or "image1")
img2 = read_upload_image(data2, image2.filename or "image2")
# Detect faces in both images concurrently
faces1, faces2 = await asyncio.gather(
get_faces_async(img1, inference_executor),
get_faces_async(img2, inference_executor),
)
logger.info(
"benchmark/compare: detected %d faces in image1, %d faces in image2",
len(faces1), len(faces2)
)
# Sort faces by area (largest first)
faces1.sort(key=face_area, reverse=True)
faces2.sort(key=face_area, reverse=True)
# Draw faces on images
face1_indices = list(range(len(faces1)))
face2_indices = list(range(len(faces2)))
img1_annotated = draw_faces_on_image(img1, faces1, face1_indices)
img2_annotated = draw_faces_on_image(img2, faces2, face2_indices)
# Encode images for response
img1_b64 = encode_image_to_base64(img1_annotated)
img2_b64 = encode_image_to_base64(img2_annotated)
# Compute similarity matrix
similarity_matrix: list[list[float]] = []
best_matches: list[dict] = []
for i, f1 in enumerate(faces1):
emb1 = f1.normed_embedding.astype(np.float32)
if not validate_embedding(emb1):
similarity_matrix.append([0.0] * len(faces2))
continue
row = []
best_sim = -1.0
best_j = -1
for j, f2 in enumerate(faces2):
emb2 = f2.normed_embedding.astype(np.float32)
if not validate_embedding(emb2):
row.append(0.0)
continue
sim = cosine_similarity(emb1, emb2)
row.append(sim)
if sim > best_sim:
best_sim = sim
best_j = j
similarity_matrix.append(row)
if best_j >= 0:
best_matches.append({
"face1_idx": i,
"face2_idx": best_j,
"similarity": best_sim,
})
# Sort best matches by similarity and keep top 3
best_matches.sort(key=lambda m: m["similarity"], reverse=True)
best_matches = best_matches[:3]
# Add cropped face images for top 3 matches
for match in best_matches:
i, j = match["face1_idx"], match["face2_idx"]
crop1 = crop_face(img1, faces1[i].bbox)
crop2 = crop_face(img2, faces2[j].bbox)
match["face1_crop"] = encode_image_to_base64(crop1, max_dim=150)
match["face2_crop"] = encode_image_to_base64(crop2, max_dim=150)
return {
"image1_faces": len(faces1),
"image2_faces": len(faces2),
"image1_annotated": img1_b64,
"image2_annotated": img2_b64,
"similarity_matrix": similarity_matrix,
"similarities": [
{"face1": i, "face2": j, "score": similarity_matrix[i][j]}
for i in range(len(faces1))
for j in range(len(faces2))
],
"best_matches": best_matches,
}

View File

@ -1,12 +1,13 @@
"""Face embedding endpoints."""
import logging
from typing import List
import numpy as np
from fastapi import APIRouter, HTTPException
from app.face import (
FaceServiceError,
face_area,
fallback_avatar_embedding,
get_faces_async,
load_face_app,
@ -14,20 +15,34 @@ from app.face import (
validate_embedding,
)
from app.image import download_image
from app.resources import http_client, inference_executor
from app.models import (
BBox,
EmbedAvatarResponse,
EmbedImageResponse,
EmbedRequest,
FaceEmbedding,
)
from app.resources import http_client, inference_executor
# Expected embedding dimension from buffalo_l model
EXPECTED_EMBEDDING_DIM = 512
logger = logging.getLogger("face_service")
router = APIRouter()
def validate_face_embedding(emb: np.ndarray, context: str) -> tuple[bool, str | None]:
"""
Validate embedding dimension and values.
Returns (is_valid, error_message).
"""
if len(emb) != EXPECTED_EMBEDDING_DIM:
return False, f"{context}: unexpected embedding dimension {len(emb)}, expected {EXPECTED_EMBEDDING_DIM}"
if not validate_embedding(emb):
return False, f"{context}: embedding contains NaN/Inf values"
return True, None
@router.post("/embed-avatar", response_model=EmbedAvatarResponse)
async def embed_avatar(req: EmbedRequest):
"""
@ -37,11 +52,14 @@ async def embed_avatar(req: EmbedRequest):
falls back to center crop embedding with score=0.0.
"""
logger.info("embed_avatar: image_url=%s", req.image_url)
fa = load_face_app()
img = await download_image(str(req.image_url), http_client, inference_executor)
h, w = img.shape[:2]
faces = await get_faces_async(fa, img, inference_executor)
try:
faces = await get_faces_async(img, inference_executor)
except FaceServiceError as e:
logger.error("embed_avatar: face service error: %s", str(e))
raise HTTPException(status_code=503, detail="Face service unavailable")
if len(faces) == 0:
logger.warning(
"embed_avatar: no faces detected image_url=%s size=%dx%d, using fallback",
@ -49,6 +67,7 @@ async def embed_avatar(req: EmbedRequest):
w,
h,
)
fa = load_face_app() # Need face_app for recognition model
fallback = fallback_avatar_embedding(fa, img, w, h)
if fallback is None:
raise HTTPException(
@ -63,25 +82,28 @@ async def embed_avatar(req: EmbedRequest):
score,
len(emb),
)
return EmbedAvatarResponse(embedding=emb, bbox=bbox, score=score)
return EmbedAvatarResponse(
embedding=emb,
bbox=bbox,
score=score,
processed_width=w,
processed_height=h,
)
# Sort by face area (largest first)
faces.sort(
key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
reverse=True,
)
faces.sort(key=face_area, reverse=True)
face = faces[0]
emb = face.normed_embedding.astype(np.float32)
# Validate embedding
if not validate_embedding(emb):
logger.error("embed_avatar: embedding contains NaN/Inf values")
is_valid, error_msg = validate_face_embedding(emb, "embed_avatar")
if not is_valid:
logger.error(error_msg)
raise HTTPException(
status_code=422,
detail="Failed to generate valid face embedding",
)
emb_list = emb.tolist()
bbox = to_pixel_bbox(face.bbox, w, h)
score = float(getattr(face, "det_score", 1.0))
@ -93,7 +115,13 @@ async def embed_avatar(req: EmbedRequest):
len(emb_list),
)
return EmbedAvatarResponse(embedding=emb_list, bbox=bbox, score=score)
return EmbedAvatarResponse(
embedding=emb_list,
bbox=bbox,
score=score,
processed_width=w,
processed_height=h,
)
@router.post("/embed-image", response_model=EmbedImageResponse)
@ -104,11 +132,15 @@ async def embed_image(req: EmbedRequest):
Returns all detected faces sorted by detection score (highest first).
Returns empty list if no faces detected.
"""
fa = load_face_app()
img = await download_image(str(req.image_url), http_client, inference_executor)
h, w = img.shape[:2]
faces = await get_faces_async(fa, img, inference_executor)
try:
faces = await get_faces_async(img, inference_executor)
except FaceServiceError as e:
logger.error("embed_image: face service error: %s", str(e))
raise HTTPException(status_code=503, detail="Face service unavailable")
if len(faces) == 0:
logger.warning(
"embed_image: no faces detected image_url=%s size=%dx%d",
@ -116,7 +148,7 @@ async def embed_image(req: EmbedRequest):
w,
h,
)
return EmbedImageResponse(faces=[])
return EmbedImageResponse(faces=[], processed_width=w, processed_height=h)
logger.info(
"embed_image: detected %d faces image_url=%s size=%dx%d",
@ -132,19 +164,19 @@ async def embed_image(req: EmbedRequest):
reverse=True,
)
result: List[FaceEmbedding] = []
result: list[FaceEmbedding] = []
for f in faces:
emb = f.normed_embedding.astype(np.float32)
# Skip faces with invalid embeddings
if not validate_embedding(emb):
logger.warning("embed_image: skipping face with NaN/Inf embedding")
is_valid, error_msg = validate_face_embedding(emb, "embed_image")
if not is_valid:
logger.warning(error_msg)
continue
emb_list = emb.tolist()
bbox = to_pixel_bbox(f.bbox, w, h)
score = float(getattr(f, "det_score", 1.0))
result.append(FaceEmbedding(bbox=bbox, score=score, embedding=emb_list))
return EmbedImageResponse(faces=result)
return EmbedImageResponse(faces=result, processed_width=w, processed_height=h)