Add adaptive det_size, benchmark UI, and code simplification
- Add adaptive det_size selection based on image characteristics (portraits use 640, landscapes use 1024) - Add FaceServiceError for better error handling in face detection - Add benchmark UI for comparing faces between two images - Extract face_area() helper to eliminate duplicate sorting logic - Modernize type hints (List -> list, Tuple -> tuple) - Add processed_width/height to embed responses - Add embedding dimension validation Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
b8f167d336
commit
a7595603b4
|
|
@ -5,6 +5,8 @@ import os
|
|||
# Model configuration
|
||||
MODEL_NAME = os.getenv("FACE_MODEL_NAME", "buffalo_l")
|
||||
DET_SIZE = int(os.getenv("FACE_DET_SIZE", "1024"))
|
||||
# Fallback det_size for large faces (close-up selfies)
|
||||
FALLBACK_DET_SIZE = int(os.getenv("FACE_DET_SIZE_FALLBACK", "640"))
|
||||
USE_TENSORRT = os.getenv("USE_TENSORRT", "true").lower() in ("true", "1", "yes")
|
||||
|
||||
# Image processing limits
|
||||
|
|
|
|||
117
app/face.py
117
app/face.py
|
|
@ -7,12 +7,14 @@ from concurrent.futures import ThreadPoolExecutor
|
|||
import numpy as np
|
||||
from insightface.app import FaceAnalysis
|
||||
|
||||
from app.config import DET_SIZE, MODEL_NAME, USE_TENSORRT
|
||||
from app.config import DET_SIZE, FALLBACK_DET_SIZE, MODEL_NAME, USE_TENSORRT
|
||||
from app.models import BBox
|
||||
|
||||
logger = logging.getLogger("face_service")
|
||||
|
||||
face_app: FaceAnalysis | None = None
|
||||
# Two face apps for different image types (lazy loaded)
|
||||
face_app_large: FaceAnalysis | None = None # det_size=1024 for group photos
|
||||
face_app_small: FaceAnalysis | None = None # det_size=640 for selfies/portraits
|
||||
|
||||
|
||||
def _check_tensorrt_available() -> bool:
|
||||
|
|
@ -60,35 +62,106 @@ def _get_providers() -> list:
|
|||
return providers
|
||||
|
||||
|
||||
def load_face_app() -> FaceAnalysis:
|
||||
"""Load and initialize the FaceAnalysis model (singleton)."""
|
||||
global face_app
|
||||
if face_app is not None:
|
||||
return face_app
|
||||
def load_face_app(det_size: int = DET_SIZE) -> FaceAnalysis:
|
||||
"""Load and initialize the FaceAnalysis model for given det_size (cached)."""
|
||||
global face_app_large, face_app_small
|
||||
|
||||
# Return cached instance if available
|
||||
if det_size >= 1024 and face_app_large is not None:
|
||||
return face_app_large
|
||||
if det_size < 1024 and face_app_small is not None:
|
||||
return face_app_small
|
||||
|
||||
providers = _get_providers()
|
||||
logger.info(
|
||||
f"Loading InsightFace model pack={MODEL_NAME}, det_size={DET_SIZE}, "
|
||||
f"Loading InsightFace model pack={MODEL_NAME}, det_size={det_size}, "
|
||||
f"tensorrt={USE_TENSORRT}"
|
||||
)
|
||||
fa = FaceAnalysis(
|
||||
name=MODEL_NAME,
|
||||
providers=providers,
|
||||
)
|
||||
fa.prepare(ctx_id=0, det_size=(DET_SIZE, DET_SIZE))
|
||||
face_app = fa
|
||||
logger.info("FaceAnalysis initialized")
|
||||
return face_app
|
||||
fa.prepare(ctx_id=0, det_size=(det_size, det_size))
|
||||
|
||||
# Cache the instance
|
||||
if det_size >= 1024:
|
||||
face_app_large = fa
|
||||
else:
|
||||
face_app_small = fa
|
||||
|
||||
logger.info(f"FaceAnalysis initialized (det_size={det_size})")
|
||||
return fa
|
||||
|
||||
|
||||
def get_optimal_det_size(img_height: int, img_width: int) -> int:
|
||||
"""
|
||||
Choose optimal det_size based on image characteristics.
|
||||
|
||||
- Portrait/selfie (tall, narrow) → 640 (face likely fills frame)
|
||||
- Landscape/group photo → 1024 (need to detect small faces)
|
||||
- Small images → 640 (no benefit from larger det_size)
|
||||
"""
|
||||
max_dim = max(img_height, img_width)
|
||||
min_dim = min(img_height, img_width)
|
||||
aspect_ratio = max_dim / min_dim if min_dim > 0 else 1.0
|
||||
|
||||
# Small images - 640 is sufficient
|
||||
if max_dim <= 1024:
|
||||
return FALLBACK_DET_SIZE # 640
|
||||
|
||||
# Portrait orientation (height > width) with tall aspect ratio
|
||||
# Likely a selfie or single-person portrait
|
||||
if img_height > img_width and aspect_ratio >= 1.3:
|
||||
return FALLBACK_DET_SIZE # 640
|
||||
|
||||
# Landscape or square, larger image - likely group photo
|
||||
return DET_SIZE # 1024
|
||||
|
||||
|
||||
class FaceServiceError(Exception):
|
||||
"""Error during face detection/embedding."""
|
||||
pass
|
||||
|
||||
|
||||
async def get_faces_async(
|
||||
fa: FaceAnalysis,
|
||||
img: np.ndarray,
|
||||
executor: ThreadPoolExecutor,
|
||||
) -> list:
|
||||
"""Run face detection/embedding in thread pool to not block event loop."""
|
||||
"""
|
||||
Run face detection/embedding in thread pool to not block event loop.
|
||||
|
||||
Automatically selects optimal det_size based on image dimensions:
|
||||
- Portrait/selfie → 640 (handles large faces)
|
||||
- Landscape/group → 1024 (detects small faces)
|
||||
|
||||
Raises:
|
||||
FaceServiceError: If face detection fails (GPU OOM, ONNX errors, etc.)
|
||||
"""
|
||||
h, w = img.shape[:2]
|
||||
det_size = get_optimal_det_size(h, w)
|
||||
fa = load_face_app(det_size)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
return await loop.run_in_executor(executor, fa.get, img)
|
||||
try:
|
||||
faces = await loop.run_in_executor(executor, fa.get, img)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"get_faces_async: face detection failed det_size=%d image=%dx%d error=%s",
|
||||
det_size, w, h, str(e)
|
||||
)
|
||||
raise FaceServiceError(f"Face detection failed: {str(e)}") from e
|
||||
|
||||
logger.debug(
|
||||
"get_faces_async: det_size=%d, image=%dx%d, faces=%d",
|
||||
det_size, w, h, len(faces)
|
||||
)
|
||||
|
||||
return faces
|
||||
|
||||
|
||||
def face_area(face) -> float:
|
||||
"""Calculate face bounding box area for sorting."""
|
||||
return (face.bbox[2] - face.bbox[0]) * (face.bbox[3] - face.bbox[1])
|
||||
|
||||
|
||||
def to_pixel_bbox(bbox, width: int, height: int) -> BBox:
|
||||
|
|
@ -113,11 +186,19 @@ def validate_embedding(embedding: np.ndarray) -> bool:
|
|||
|
||||
|
||||
def normalize_embedding(embedding: np.ndarray) -> np.ndarray:
|
||||
"""Normalize embedding vector to unit length."""
|
||||
"""Normalize embedding vector to unit length.
|
||||
|
||||
Returns None-equivalent behavior if embedding is near-zero vector,
|
||||
which would cause NaN in similarity calculations.
|
||||
"""
|
||||
emb = embedding.astype(np.float32)
|
||||
norm = float(np.linalg.norm(emb))
|
||||
if norm > 0.0:
|
||||
emb = emb / norm
|
||||
# Near-zero vectors can't be normalized meaningfully
|
||||
# and would cause issues in similarity calculations
|
||||
if norm < 0.01:
|
||||
logger.warning("normalize_embedding: near-zero vector detected (norm=%.6f)", norm)
|
||||
return emb # Return as-is, validate_embedding will catch it
|
||||
emb = emb / norm
|
||||
return emb
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ from app.config import (
|
|||
USE_TENSORRT,
|
||||
)
|
||||
from app.face import load_face_app
|
||||
from app.routes import embed
|
||||
from app.routes import benchmark, embed
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger("face_service")
|
||||
|
|
@ -72,6 +72,7 @@ app = FastAPI(
|
|||
|
||||
# Include routers
|
||||
app.include_router(embed.router)
|
||||
app.include_router(benchmark.router)
|
||||
|
||||
|
||||
@app.get("/healthz")
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
"""Pydantic models for request/response schemas."""
|
||||
|
||||
from typing import List
|
||||
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
|
||||
|
||||
|
|
@ -19,17 +17,21 @@ class BBox(BaseModel):
|
|||
class FaceEmbedding(BaseModel):
|
||||
bbox: BBox
|
||||
score: float
|
||||
embedding: List[float]
|
||||
embedding: list[float]
|
||||
|
||||
|
||||
class EmbedAvatarResponse(BaseModel):
|
||||
embedding: List[float]
|
||||
embedding: list[float]
|
||||
bbox: BBox
|
||||
score: float
|
||||
processed_width: int | None = None
|
||||
processed_height: int | None = None
|
||||
|
||||
|
||||
class EmbedImageResponse(BaseModel):
|
||||
faces: List[FaceEmbedding]
|
||||
faces: list[FaceEmbedding]
|
||||
processed_width: int | None = None
|
||||
processed_height: int | None = None
|
||||
|
||||
|
||||
class MatchResult(BaseModel):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,516 @@
|
|||
"""Benchmark UI for face comparison."""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import logging
|
||||
from io import BytesIO
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from fastapi import APIRouter, File, UploadFile
|
||||
from fastapi.responses import HTMLResponse
|
||||
|
||||
from app.face import face_area, get_faces_async, validate_embedding
|
||||
from app.image import read_upload_image
|
||||
from app.resources import inference_executor
|
||||
|
||||
logger = logging.getLogger("face_service")
|
||||
|
||||
router = APIRouter(prefix="/benchmark", tags=["benchmark"])
|
||||
|
||||
|
||||
def cosine_similarity(emb1: np.ndarray, emb2: np.ndarray) -> float:
|
||||
"""Compute cosine similarity between two embeddings."""
|
||||
# Embeddings are already normalized, so dot product = cosine similarity
|
||||
return float(np.dot(emb1, emb2))
|
||||
|
||||
|
||||
def draw_faces_on_image(img: np.ndarray, faces: list, face_indices: list[int]) -> np.ndarray:
|
||||
"""Draw bounding boxes and indices on image."""
|
||||
img_copy = img.copy()
|
||||
for idx, face in zip(face_indices, faces):
|
||||
bbox = face.bbox.astype(int)
|
||||
x1, y1, x2, y2 = bbox
|
||||
# Draw rectangle
|
||||
cv2.rectangle(img_copy, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
# Draw index label
|
||||
label = f"#{idx}"
|
||||
(tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)
|
||||
cv2.rectangle(img_copy, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(img_copy, label, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
|
||||
return img_copy
|
||||
|
||||
|
||||
def encode_image_to_base64(img: np.ndarray, max_dim: int = 800) -> str:
|
||||
"""Encode image to base64 for display in HTML, resizing if needed."""
|
||||
h, w = img.shape[:2]
|
||||
if max(h, w) > max_dim:
|
||||
scale = max_dim / max(h, w)
|
||||
img = cv2.resize(img, (int(w * scale), int(h * scale)))
|
||||
|
||||
# Convert BGR to RGB for proper display
|
||||
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
from PIL import Image
|
||||
pil_img = Image.fromarray(img_rgb)
|
||||
buffer = BytesIO()
|
||||
pil_img.save(buffer, format="JPEG", quality=85)
|
||||
return base64.b64encode(buffer.getvalue()).decode()
|
||||
|
||||
|
||||
def crop_face(img: np.ndarray, bbox: np.ndarray, padding: float = 0.2) -> np.ndarray:
|
||||
"""Crop face from image with padding."""
|
||||
h, w = img.shape[:2]
|
||||
x1, y1, x2, y2 = bbox.astype(int)
|
||||
|
||||
# Add padding
|
||||
face_w = x2 - x1
|
||||
face_h = y2 - y1
|
||||
pad_x = int(face_w * padding)
|
||||
pad_y = int(face_h * padding)
|
||||
|
||||
x1 = max(0, x1 - pad_x)
|
||||
y1 = max(0, y1 - pad_y)
|
||||
x2 = min(w, x2 + pad_x)
|
||||
y2 = min(h, y2 + pad_y)
|
||||
|
||||
return img[y1:y2, x1:x2]
|
||||
|
||||
|
||||
@router.get("/", response_class=HTMLResponse)
|
||||
async def benchmark_ui():
|
||||
"""Serve the benchmark UI."""
|
||||
return """
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Face Benchmark</title>
|
||||
<style>
|
||||
* { box-sizing: border-box; }
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
max-width: 1400px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
background: #1a1a2e;
|
||||
color: #eee;
|
||||
}
|
||||
h1 { text-align: center; color: #00d4ff; }
|
||||
.upload-section {
|
||||
display: flex;
|
||||
gap: 20px;
|
||||
margin-bottom: 20px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.upload-box {
|
||||
flex: 1;
|
||||
min-width: 300px;
|
||||
border: 2px dashed #444;
|
||||
border-radius: 10px;
|
||||
padding: 20px;
|
||||
text-align: center;
|
||||
background: #16213e;
|
||||
}
|
||||
.upload-box h3 { margin-top: 0; color: #00d4ff; }
|
||||
.upload-box input[type="file"] {
|
||||
display: block;
|
||||
margin: 10px auto;
|
||||
}
|
||||
.preview {
|
||||
max-width: 100%;
|
||||
max-height: 300px;
|
||||
margin-top: 10px;
|
||||
border-radius: 5px;
|
||||
}
|
||||
button {
|
||||
display: block;
|
||||
width: 100%;
|
||||
max-width: 300px;
|
||||
margin: 20px auto;
|
||||
padding: 15px 30px;
|
||||
font-size: 18px;
|
||||
background: #00d4ff;
|
||||
color: #1a1a2e;
|
||||
border: none;
|
||||
border-radius: 5px;
|
||||
cursor: pointer;
|
||||
font-weight: bold;
|
||||
}
|
||||
button:hover { background: #00b8e6; }
|
||||
button:disabled { background: #444; cursor: not-allowed; }
|
||||
#results {
|
||||
margin-top: 30px;
|
||||
}
|
||||
.results-grid {
|
||||
display: flex;
|
||||
gap: 20px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.result-box {
|
||||
flex: 1;
|
||||
min-width: 300px;
|
||||
background: #16213e;
|
||||
border-radius: 10px;
|
||||
padding: 20px;
|
||||
}
|
||||
.result-box h3 { margin-top: 0; color: #00d4ff; }
|
||||
.result-box img {
|
||||
max-width: 100%;
|
||||
border-radius: 5px;
|
||||
}
|
||||
.similarity-table {
|
||||
width: 100%;
|
||||
margin-top: 20px;
|
||||
background: #16213e;
|
||||
border-radius: 10px;
|
||||
padding: 20px;
|
||||
}
|
||||
.similarity-table h3 { margin-top: 0; color: #00d4ff; }
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-top: 10px;
|
||||
}
|
||||
th, td {
|
||||
padding: 10px;
|
||||
text-align: center;
|
||||
border: 1px solid #333;
|
||||
}
|
||||
th { background: #0f3460; }
|
||||
.match-high { background: #00c853; color: #000; font-weight: bold; }
|
||||
.match-medium { background: #ffab00; color: #000; }
|
||||
.match-low { background: #333; }
|
||||
.loading {
|
||||
text-align: center;
|
||||
padding: 40px;
|
||||
font-size: 18px;
|
||||
}
|
||||
.spinner {
|
||||
border: 4px solid #333;
|
||||
border-top: 4px solid #00d4ff;
|
||||
border-radius: 50%;
|
||||
width: 40px;
|
||||
height: 40px;
|
||||
animation: spin 1s linear infinite;
|
||||
margin: 20px auto;
|
||||
}
|
||||
@keyframes spin {
|
||||
0% { transform: rotate(0deg); }
|
||||
100% { transform: rotate(360deg); }
|
||||
}
|
||||
.error { color: #ff5252; text-align: center; padding: 20px; }
|
||||
.stats { margin-top: 10px; font-size: 14px; color: #888; }
|
||||
.best-match {
|
||||
background: #16213e;
|
||||
border-radius: 10px;
|
||||
padding: 20px;
|
||||
margin-top: 20px;
|
||||
}
|
||||
.best-match h3 { color: #00d4ff; margin-top: 0; }
|
||||
.match-item {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 15px;
|
||||
padding: 15px;
|
||||
background: #0f3460;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.match-score {
|
||||
font-size: 28px;
|
||||
font-weight: bold;
|
||||
min-width: 90px;
|
||||
text-align: center;
|
||||
}
|
||||
.face-crop {
|
||||
width: 100px;
|
||||
height: 100px;
|
||||
object-fit: cover;
|
||||
border-radius: 8px;
|
||||
border: 2px solid #00d4ff;
|
||||
}
|
||||
.match-label {
|
||||
font-size: 14px;
|
||||
color: #888;
|
||||
margin-left: auto;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Face Comparison Benchmark</h1>
|
||||
|
||||
<form id="uploadForm">
|
||||
<div class="upload-section">
|
||||
<div class="upload-box">
|
||||
<h3>Image 1 (Source)</h3>
|
||||
<p>Upload an image to find faces from</p>
|
||||
<input type="file" id="image1" name="image1" accept="image/*" required>
|
||||
<img id="preview1" class="preview" style="display:none;">
|
||||
</div>
|
||||
<div class="upload-box">
|
||||
<h3>Image 2 (Target)</h3>
|
||||
<p>Upload an image to search for matching faces</p>
|
||||
<input type="file" id="image2" name="image2" accept="image/*" required>
|
||||
<img id="preview2" class="preview" style="display:none;">
|
||||
</div>
|
||||
</div>
|
||||
<button type="submit" id="compareBtn">Compare Faces</button>
|
||||
</form>
|
||||
|
||||
<div id="results"></div>
|
||||
|
||||
<script>
|
||||
// Preview images on selection
|
||||
document.getElementById('image1').addEventListener('change', function(e) {
|
||||
previewImage(e.target, 'preview1');
|
||||
});
|
||||
document.getElementById('image2').addEventListener('change', function(e) {
|
||||
previewImage(e.target, 'preview2');
|
||||
});
|
||||
|
||||
function previewImage(input, previewId) {
|
||||
const preview = document.getElementById(previewId);
|
||||
if (input.files && input.files[0]) {
|
||||
const reader = new FileReader();
|
||||
reader.onload = function(e) {
|
||||
preview.src = e.target.result;
|
||||
preview.style.display = 'block';
|
||||
};
|
||||
reader.readAsDataURL(input.files[0]);
|
||||
}
|
||||
}
|
||||
|
||||
document.getElementById('uploadForm').addEventListener('submit', async function(e) {
|
||||
e.preventDefault();
|
||||
|
||||
const image1 = document.getElementById('image1').files[0];
|
||||
const image2 = document.getElementById('image2').files[0];
|
||||
|
||||
if (!image1 || !image2) {
|
||||
alert('Please select both images');
|
||||
return;
|
||||
}
|
||||
|
||||
const resultsDiv = document.getElementById('results');
|
||||
const btn = document.getElementById('compareBtn');
|
||||
|
||||
btn.disabled = true;
|
||||
btn.textContent = 'Processing...';
|
||||
resultsDiv.innerHTML = '<div class="loading"><div class="spinner"></div>Detecting faces and computing embeddings...</div>';
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append('image1', image1);
|
||||
formData.append('image2', image2);
|
||||
|
||||
try {
|
||||
const startTime = performance.now();
|
||||
const response = await fetch('/benchmark/compare', {
|
||||
method: 'POST',
|
||||
body: formData
|
||||
});
|
||||
const endTime = performance.now();
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw new Error(error.detail || 'Comparison failed');
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
displayResults(data, endTime - startTime);
|
||||
} catch (error) {
|
||||
resultsDiv.innerHTML = `<div class="error">Error: ${error.message}</div>`;
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
btn.textContent = 'Compare Faces';
|
||||
}
|
||||
});
|
||||
|
||||
function displayResults(data, processingTime) {
|
||||
const resultsDiv = document.getElementById('results');
|
||||
|
||||
let html = `
|
||||
<div class="stats">Processing time: ${(processingTime/1000).toFixed(2)}s |
|
||||
Image 1: ${data.image1_faces} face(s) | Image 2: ${data.image2_faces} face(s)</div>
|
||||
|
||||
<div class="results-grid">
|
||||
<div class="result-box">
|
||||
<h3>Image 1 - ${data.image1_faces} face(s) detected</h3>
|
||||
<img src="data:image/jpeg;base64,${data.image1_annotated}" alt="Image 1">
|
||||
</div>
|
||||
<div class="result-box">
|
||||
<h3>Image 2 - ${data.image2_faces} face(s) detected</h3>
|
||||
<img src="data:image/jpeg;base64,${data.image2_annotated}" alt="Image 2">
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
|
||||
if (data.similarities.length > 0) {
|
||||
html += `
|
||||
<div class="best-match">
|
||||
<h3>Best Matches (Image 1 faces found in Image 2)</h3>
|
||||
`;
|
||||
|
||||
for (const match of data.best_matches) {
|
||||
const scorePercent = (match.similarity * 100).toFixed(1);
|
||||
const scoreClass = match.similarity >= 0.5 ? 'match-high' :
|
||||
match.similarity >= 0.3 ? 'match-medium' : 'match-low';
|
||||
html += `
|
||||
<div class="match-item">
|
||||
<img src="data:image/jpeg;base64,${match.face1_crop}" class="face-crop" alt="Face ${match.face1_idx}">
|
||||
<span class="match-score ${scoreClass}">${scorePercent}%</span>
|
||||
<img src="data:image/jpeg;base64,${match.face2_crop}" class="face-crop" alt="Face ${match.face2_idx}">
|
||||
<span class="match-label">Face #${match.face1_idx} ↔ Face #${match.face2_idx}</span>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
html += '</div>';
|
||||
|
||||
// Similarity matrix
|
||||
html += `
|
||||
<div class="similarity-table">
|
||||
<h3>Full Similarity Matrix</h3>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Image 1 \\ Image 2</th>
|
||||
`;
|
||||
|
||||
for (let j = 0; j < data.image2_faces; j++) {
|
||||
html += `<th>Face #${j}</th>`;
|
||||
}
|
||||
html += '</tr>';
|
||||
|
||||
for (let i = 0; i < data.image1_faces; i++) {
|
||||
html += `<tr><th>Face #${i}</th>`;
|
||||
for (let j = 0; j < data.image2_faces; j++) {
|
||||
const sim = data.similarity_matrix[i][j];
|
||||
const percent = (sim * 100).toFixed(1);
|
||||
const cls = sim >= 0.5 ? 'match-high' : sim >= 0.3 ? 'match-medium' : 'match-low';
|
||||
html += `<td class="${cls}">${percent}%</td>`;
|
||||
}
|
||||
html += '</tr>';
|
||||
}
|
||||
|
||||
html += '</table></div>';
|
||||
} else if (data.image1_faces === 0 || data.image2_faces === 0) {
|
||||
html += '<div class="error">No faces detected in one or both images</div>';
|
||||
}
|
||||
|
||||
resultsDiv.innerHTML = html;
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
@router.post("/compare")
|
||||
async def compare_faces(
|
||||
image1: UploadFile = File(...),
|
||||
image2: UploadFile = File(...),
|
||||
):
|
||||
"""
|
||||
Compare faces between two uploaded images.
|
||||
|
||||
Returns:
|
||||
- Annotated images with face bounding boxes
|
||||
- Similarity matrix between all detected faces
|
||||
- Best matches for each face in image1
|
||||
"""
|
||||
logger.info("benchmark/compare: image1=%s image2=%s", image1.filename, image2.filename)
|
||||
|
||||
# Read both images
|
||||
data1 = await image1.read()
|
||||
data2 = await image2.read()
|
||||
|
||||
img1 = read_upload_image(data1, image1.filename or "image1")
|
||||
img2 = read_upload_image(data2, image2.filename or "image2")
|
||||
|
||||
# Detect faces in both images concurrently
|
||||
faces1, faces2 = await asyncio.gather(
|
||||
get_faces_async(img1, inference_executor),
|
||||
get_faces_async(img2, inference_executor),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"benchmark/compare: detected %d faces in image1, %d faces in image2",
|
||||
len(faces1), len(faces2)
|
||||
)
|
||||
|
||||
# Sort faces by area (largest first)
|
||||
faces1.sort(key=face_area, reverse=True)
|
||||
faces2.sort(key=face_area, reverse=True)
|
||||
|
||||
# Draw faces on images
|
||||
face1_indices = list(range(len(faces1)))
|
||||
face2_indices = list(range(len(faces2)))
|
||||
|
||||
img1_annotated = draw_faces_on_image(img1, faces1, face1_indices)
|
||||
img2_annotated = draw_faces_on_image(img2, faces2, face2_indices)
|
||||
|
||||
# Encode images for response
|
||||
img1_b64 = encode_image_to_base64(img1_annotated)
|
||||
img2_b64 = encode_image_to_base64(img2_annotated)
|
||||
|
||||
# Compute similarity matrix
|
||||
similarity_matrix: list[list[float]] = []
|
||||
best_matches: list[dict] = []
|
||||
|
||||
for i, f1 in enumerate(faces1):
|
||||
emb1 = f1.normed_embedding.astype(np.float32)
|
||||
if not validate_embedding(emb1):
|
||||
similarity_matrix.append([0.0] * len(faces2))
|
||||
continue
|
||||
|
||||
row = []
|
||||
best_sim = -1.0
|
||||
best_j = -1
|
||||
|
||||
for j, f2 in enumerate(faces2):
|
||||
emb2 = f2.normed_embedding.astype(np.float32)
|
||||
if not validate_embedding(emb2):
|
||||
row.append(0.0)
|
||||
continue
|
||||
|
||||
sim = cosine_similarity(emb1, emb2)
|
||||
row.append(sim)
|
||||
|
||||
if sim > best_sim:
|
||||
best_sim = sim
|
||||
best_j = j
|
||||
|
||||
similarity_matrix.append(row)
|
||||
|
||||
if best_j >= 0:
|
||||
best_matches.append({
|
||||
"face1_idx": i,
|
||||
"face2_idx": best_j,
|
||||
"similarity": best_sim,
|
||||
})
|
||||
|
||||
# Sort best matches by similarity and keep top 3
|
||||
best_matches.sort(key=lambda m: m["similarity"], reverse=True)
|
||||
best_matches = best_matches[:3]
|
||||
|
||||
# Add cropped face images for top 3 matches
|
||||
for match in best_matches:
|
||||
i, j = match["face1_idx"], match["face2_idx"]
|
||||
crop1 = crop_face(img1, faces1[i].bbox)
|
||||
crop2 = crop_face(img2, faces2[j].bbox)
|
||||
match["face1_crop"] = encode_image_to_base64(crop1, max_dim=150)
|
||||
match["face2_crop"] = encode_image_to_base64(crop2, max_dim=150)
|
||||
|
||||
return {
|
||||
"image1_faces": len(faces1),
|
||||
"image2_faces": len(faces2),
|
||||
"image1_annotated": img1_b64,
|
||||
"image2_annotated": img2_b64,
|
||||
"similarity_matrix": similarity_matrix,
|
||||
"similarities": [
|
||||
{"face1": i, "face2": j, "score": similarity_matrix[i][j]}
|
||||
for i in range(len(faces1))
|
||||
for j in range(len(faces2))
|
||||
],
|
||||
"best_matches": best_matches,
|
||||
}
|
||||
|
|
@ -1,12 +1,13 @@
|
|||
"""Face embedding endpoints."""
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from app.face import (
|
||||
FaceServiceError,
|
||||
face_area,
|
||||
fallback_avatar_embedding,
|
||||
get_faces_async,
|
||||
load_face_app,
|
||||
|
|
@ -14,20 +15,34 @@ from app.face import (
|
|||
validate_embedding,
|
||||
)
|
||||
from app.image import download_image
|
||||
from app.resources import http_client, inference_executor
|
||||
from app.models import (
|
||||
BBox,
|
||||
EmbedAvatarResponse,
|
||||
EmbedImageResponse,
|
||||
EmbedRequest,
|
||||
FaceEmbedding,
|
||||
)
|
||||
from app.resources import http_client, inference_executor
|
||||
|
||||
# Expected embedding dimension from buffalo_l model
|
||||
EXPECTED_EMBEDDING_DIM = 512
|
||||
|
||||
logger = logging.getLogger("face_service")
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def validate_face_embedding(emb: np.ndarray, context: str) -> tuple[bool, str | None]:
|
||||
"""
|
||||
Validate embedding dimension and values.
|
||||
Returns (is_valid, error_message).
|
||||
"""
|
||||
if len(emb) != EXPECTED_EMBEDDING_DIM:
|
||||
return False, f"{context}: unexpected embedding dimension {len(emb)}, expected {EXPECTED_EMBEDDING_DIM}"
|
||||
if not validate_embedding(emb):
|
||||
return False, f"{context}: embedding contains NaN/Inf values"
|
||||
return True, None
|
||||
|
||||
|
||||
@router.post("/embed-avatar", response_model=EmbedAvatarResponse)
|
||||
async def embed_avatar(req: EmbedRequest):
|
||||
"""
|
||||
|
|
@ -37,11 +52,14 @@ async def embed_avatar(req: EmbedRequest):
|
|||
falls back to center crop embedding with score=0.0.
|
||||
"""
|
||||
logger.info("embed_avatar: image_url=%s", req.image_url)
|
||||
fa = load_face_app()
|
||||
img = await download_image(str(req.image_url), http_client, inference_executor)
|
||||
h, w = img.shape[:2]
|
||||
|
||||
faces = await get_faces_async(fa, img, inference_executor)
|
||||
try:
|
||||
faces = await get_faces_async(img, inference_executor)
|
||||
except FaceServiceError as e:
|
||||
logger.error("embed_avatar: face service error: %s", str(e))
|
||||
raise HTTPException(status_code=503, detail="Face service unavailable")
|
||||
if len(faces) == 0:
|
||||
logger.warning(
|
||||
"embed_avatar: no faces detected image_url=%s size=%dx%d, using fallback",
|
||||
|
|
@ -49,6 +67,7 @@ async def embed_avatar(req: EmbedRequest):
|
|||
w,
|
||||
h,
|
||||
)
|
||||
fa = load_face_app() # Need face_app for recognition model
|
||||
fallback = fallback_avatar_embedding(fa, img, w, h)
|
||||
if fallback is None:
|
||||
raise HTTPException(
|
||||
|
|
@ -63,25 +82,28 @@ async def embed_avatar(req: EmbedRequest):
|
|||
score,
|
||||
len(emb),
|
||||
)
|
||||
return EmbedAvatarResponse(embedding=emb, bbox=bbox, score=score)
|
||||
return EmbedAvatarResponse(
|
||||
embedding=emb,
|
||||
bbox=bbox,
|
||||
score=score,
|
||||
processed_width=w,
|
||||
processed_height=h,
|
||||
)
|
||||
|
||||
# Sort by face area (largest first)
|
||||
faces.sort(
|
||||
key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
|
||||
reverse=True,
|
||||
)
|
||||
faces.sort(key=face_area, reverse=True)
|
||||
face = faces[0]
|
||||
|
||||
emb = face.normed_embedding.astype(np.float32)
|
||||
|
||||
# Validate embedding
|
||||
if not validate_embedding(emb):
|
||||
logger.error("embed_avatar: embedding contains NaN/Inf values")
|
||||
|
||||
is_valid, error_msg = validate_face_embedding(emb, "embed_avatar")
|
||||
if not is_valid:
|
||||
logger.error(error_msg)
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail="Failed to generate valid face embedding",
|
||||
)
|
||||
|
||||
|
||||
emb_list = emb.tolist()
|
||||
bbox = to_pixel_bbox(face.bbox, w, h)
|
||||
score = float(getattr(face, "det_score", 1.0))
|
||||
|
|
@ -93,7 +115,13 @@ async def embed_avatar(req: EmbedRequest):
|
|||
len(emb_list),
|
||||
)
|
||||
|
||||
return EmbedAvatarResponse(embedding=emb_list, bbox=bbox, score=score)
|
||||
return EmbedAvatarResponse(
|
||||
embedding=emb_list,
|
||||
bbox=bbox,
|
||||
score=score,
|
||||
processed_width=w,
|
||||
processed_height=h,
|
||||
)
|
||||
|
||||
|
||||
@router.post("/embed-image", response_model=EmbedImageResponse)
|
||||
|
|
@ -104,11 +132,15 @@ async def embed_image(req: EmbedRequest):
|
|||
Returns all detected faces sorted by detection score (highest first).
|
||||
Returns empty list if no faces detected.
|
||||
"""
|
||||
fa = load_face_app()
|
||||
img = await download_image(str(req.image_url), http_client, inference_executor)
|
||||
h, w = img.shape[:2]
|
||||
|
||||
faces = await get_faces_async(fa, img, inference_executor)
|
||||
try:
|
||||
faces = await get_faces_async(img, inference_executor)
|
||||
except FaceServiceError as e:
|
||||
logger.error("embed_image: face service error: %s", str(e))
|
||||
raise HTTPException(status_code=503, detail="Face service unavailable")
|
||||
|
||||
if len(faces) == 0:
|
||||
logger.warning(
|
||||
"embed_image: no faces detected image_url=%s size=%dx%d",
|
||||
|
|
@ -116,7 +148,7 @@ async def embed_image(req: EmbedRequest):
|
|||
w,
|
||||
h,
|
||||
)
|
||||
return EmbedImageResponse(faces=[])
|
||||
return EmbedImageResponse(faces=[], processed_width=w, processed_height=h)
|
||||
|
||||
logger.info(
|
||||
"embed_image: detected %d faces image_url=%s size=%dx%d",
|
||||
|
|
@ -132,19 +164,19 @@ async def embed_image(req: EmbedRequest):
|
|||
reverse=True,
|
||||
)
|
||||
|
||||
result: List[FaceEmbedding] = []
|
||||
result: list[FaceEmbedding] = []
|
||||
for f in faces:
|
||||
emb = f.normed_embedding.astype(np.float32)
|
||||
|
||||
# Skip faces with invalid embeddings
|
||||
if not validate_embedding(emb):
|
||||
logger.warning("embed_image: skipping face with NaN/Inf embedding")
|
||||
|
||||
is_valid, error_msg = validate_face_embedding(emb, "embed_image")
|
||||
if not is_valid:
|
||||
logger.warning(error_msg)
|
||||
continue
|
||||
|
||||
|
||||
emb_list = emb.tolist()
|
||||
bbox = to_pixel_bbox(f.bbox, w, h)
|
||||
score = float(getattr(f, "det_score", 1.0))
|
||||
result.append(FaceEmbedding(bbox=bbox, score=score, embedding=emb_list))
|
||||
|
||||
return EmbedImageResponse(faces=result)
|
||||
return EmbedImageResponse(faces=result, processed_width=w, processed_height=h)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue