From d34b27ecc5f2eeec5f221df934f24226795ff24b Mon Sep 17 00:00:00 2001 From: Hung Luu Date: Mon, 1 Dec 2025 11:38:50 +0700 Subject: [PATCH] Initial commit: Face embedding service with modular structure - FastAPI-based face embedding service using InsightFace - Modular package structure (app/config, app/models, app/face, app/image, app/routes) - Async HTTP downloads with retry logic (httpx + tenacity) - Image size/dimension limits and decompression bomb protection - Comprehensive image decoding with color mode handling - BBox coordinate clamping and embedding validation - Production-ready structure with TODOs for security features --- .gitignore | 35 +++++++ README.md | 101 +++++++++++++++++++ app/__init__.py | 2 + app/config.py | 27 +++++ app/face.py | 142 ++++++++++++++++++++++++++ app/image.py | 221 +++++++++++++++++++++++++++++++++++++++++ app/main.py | 44 ++++++++ app/models.py | 40 ++++++++ app/routes/__init__.py | 2 + app/routes/embed.py | 148 +++++++++++++++++++++++++++ requirements.txt | 10 ++ run_face_service.sh | 35 +++++++ 12 files changed, 807 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 app/__init__.py create mode 100644 app/config.py create mode 100644 app/face.py create mode 100644 app/image.py create mode 100644 app/main.py create mode 100644 app/models.py create mode 100644 app/routes/__init__.py create mode 100644 app/routes/embed.py create mode 100644 requirements.txt create mode 100755 run_face_service.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7d413ff --- /dev/null +++ b/.gitignore @@ -0,0 +1,35 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Virtual environments +.venv/ +venv/ +ENV/ + +# Environment variables +.env +.env.local + +# Logs +*.log + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# InsightFace models (usually in ~/.insightface, but just in case) +models/ +*.onnx + +# OS files +.DS_Store +Thumbs.db + +# Test/temp files +*.tmp +*.bak + diff --git a/README.md b/README.md new file mode 100644 index 0000000..2780411 --- /dev/null +++ b/README.md @@ -0,0 +1,101 @@ +## Genealog Face Service + +FastAPI-based face embedding and matching microservice using InsightFace + ONNX Runtime GPU. This service is designed to be called from the `genealog-api` backend via HTTP. + +### Endpoints + +- `GET /healthz` – basic health check and model info. +- `POST /embed-avatar` – JSON body: `{ "image_url": "https://..." }`, returns a single best face embedding for an avatar image. +- `POST /embed-image` – JSON body: `{ "image_url": "https://..." }`, returns all detected faces and embeddings. +- `POST /test-avatar` – multipart form with fields: + - `tag`: string tag for logging / correlation + - `avatar`: avatar image file (face to match) + - `image`: target image file (search space) + +All embeddings are normalized float vectors suitable for cosine-similarity comparison. + +`/embed-avatar` notes: + +- Images are decoded with Pillow and EXIF orientation is applied (e.g. iPhone photos) before running detection. +- If no face is detected, the service will fall back to a center square crop and run the recognition model directly to still produce an embedding. In this case, the `score` field will be `0.0` and `bbox` is the used crop. + +### Installation (WSL2, Python venv) + +From `/home/hung/genealog-face`: + +```bash +python -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +GPU support assumes: + +- WSL2 with GPU enabled. +- NVIDIA drivers installed on Windows. +- `nvidia-smi` works inside WSL. + +The service uses `insightface` with `CUDAExecutionProvider` first, falling back to CPU if needed. + +### Running the service + +Use the helper script (recommended): + +```bash +cd /home/hung/genealog-face +./run_face_service.sh +``` + +Defaults: + +- Host: `0.0.0.0` +- Port: `18081` +- Model: `buffalo_l` +- Detection size: `1024` +- Workers: `nproc` (all CPU cores detected) + +You can override via environment variables: + +```bash +PORT=18081 \ +FACE_MODEL_NAME=buffalo_l \ +FACE_DET_SIZE=1024 \ +UVICORN_WORKERS=20 \ +./run_face_service.sh +``` + +To run in the background: + +```bash +nohup ./run_face_service.sh > face_service.log 2>&1 & +``` + +Logs are written to `face_service.log` in the repo root. + +### Integration with genealog-api (Docker) + +The `genealog-api` service expects this face service to be reachable at: + +- `FACE_SERVICE_URL: http://host.docker.internal:18081` + +You only need to ensure the service is running in WSL on port `18081` before starting the Docker stack. + +### Autostart on Windows reboot (via WSL2) + +You can have Windows start this service automatically at logon using Task Scheduler: + +1. Open **Task Scheduler** → **Create Task…**. +2. **General** tab: + - Name: `GenealogFaceService`. + - Configure to run for your Windows user. +3. **Triggers** tab: + - New → Begin the task: **At log on**. +4. **Actions** tab: + - Program/script: `wsl.exe` + - Arguments: + ```text + -d Ubuntu -- bash -lc "cd /home/hung/genealog-face && nohup ./run_face_service.sh >> face_service.log 2>&1" + ``` +5. Save the task (provide credentials if prompted). + +After this, logging into Windows will start WSL and launch the face service in the background, ready to be used by `genealog-api`. diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..a5dd66c --- /dev/null +++ b/app/__init__.py @@ -0,0 +1,2 @@ +"""Face Service - Face embedding microservice using InsightFace.""" + diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..5198cbd --- /dev/null +++ b/app/config.py @@ -0,0 +1,27 @@ +"""Configuration and environment variables.""" + +import os + +# Model configuration +MODEL_NAME = os.getenv("FACE_MODEL_NAME", "buffalo_l") +DET_SIZE = int(os.getenv("FACE_DET_SIZE", "1024")) + +# Image processing limits +MAX_DOWNLOAD_SIZE = int(os.getenv("MAX_DOWNLOAD_SIZE", 20 * 1024 * 1024)) # 20MB +MAX_IMAGE_DIMENSION = int(os.getenv("MAX_IMAGE_DIMENSION", 8192)) # 8192px +MIN_IMAGE_DIMENSION = int(os.getenv("MIN_IMAGE_DIMENSION", 32)) # 32px + +# HTTP client settings +DOWNLOAD_TIMEOUT = float(os.getenv("DOWNLOAD_TIMEOUT", 15.0)) # 15 seconds +MAX_RETRIES = int(os.getenv("MAX_RETRIES", 3)) + +# TODO [PROD]: Add URL allowlist for SSRF protection +# ALLOWED_URL_PATTERNS = os.getenv("ALLOWED_URL_PATTERNS", "").split(",") + +# TODO [PROD]: Add API key authentication +# API_KEY = os.getenv("API_KEY", "") + +# TODO [PROD]: Add rate limiting configuration +# RATE_LIMIT_REQUESTS = int(os.getenv("RATE_LIMIT_REQUESTS", 100)) +# RATE_LIMIT_WINDOW = int(os.getenv("RATE_LIMIT_WINDOW", 60)) + diff --git a/app/face.py b/app/face.py new file mode 100644 index 0000000..6836fd4 --- /dev/null +++ b/app/face.py @@ -0,0 +1,142 @@ +"""Face analysis and embedding logic using InsightFace.""" + +import logging + +import numpy as np +from insightface.app import FaceAnalysis + +from app.config import DET_SIZE, MODEL_NAME +from app.models import BBox + +logger = logging.getLogger("face_service") + +face_app: FaceAnalysis | None = None + + +def load_face_app() -> FaceAnalysis: + """Load and initialize the FaceAnalysis model (singleton).""" + global face_app + if face_app is not None: + return face_app + + logger.info(f"Loading InsightFace model pack={MODEL_NAME}, det_size={DET_SIZE}") + fa = FaceAnalysis( + name=MODEL_NAME, + providers=["CUDAExecutionProvider", "CPUExecutionProvider"], + ) + fa.prepare(ctx_id=0, det_size=(DET_SIZE, DET_SIZE)) + face_app = fa + logger.info("FaceAnalysis initialized") + return face_app + + +def to_pixel_bbox(bbox, width: int, height: int) -> BBox: + """Convert InsightFace bbox to pixel BBox with coordinate clamping.""" + x1, y1, x2, y2 = bbox + + # Clamp coordinates to image bounds + x1 = max(0, min(int(x1), width)) + y1 = max(0, min(int(y1), height)) + x2 = max(0, min(int(x2), width)) + y2 = max(0, min(int(y2), height)) + + w = max(x2 - x1, 1) + h = max(y2 - y1, 1) + + return BBox(x=x1, y=y1, w=w, h=h) + + +def validate_embedding(embedding: np.ndarray) -> bool: + """Check if embedding contains valid values (no NaN or Inf).""" + return bool(np.isfinite(embedding).all()) + + +def normalize_embedding(embedding: np.ndarray) -> np.ndarray: + """Normalize embedding vector to unit length.""" + emb = embedding.astype(np.float32) + norm = float(np.linalg.norm(emb)) + if norm > 0.0: + emb = emb / norm + return emb + + +def fallback_avatar_embedding( + fa: FaceAnalysis, + img: np.ndarray, + width: int, + height: int, +) -> tuple[list[float], BBox, float] | None: + """ + Generate embedding from center crop when no face is detected. + + This fallback uses the recognition model directly on a center square crop, + useful for avatar images where the face might not be detected. + """ + rec_model = getattr(fa, "models", {}).get("recognition") + if rec_model is None: + logger.warning("embed_avatar_fallback: recognition model is not available") + return None + + side = min(height, width) + if side <= 0: + logger.warning( + "embed_avatar_fallback: invalid image size width=%d height=%d", + width, + height, + ) + return None + + cy, cx = height // 2, width // 2 + x1 = max(cx - side // 2, 0) + y1 = max(cy - side // 2, 0) + x2 = min(x1 + side, width) + y2 = min(y1 + side, height) + + crop = img[y1:y2, x1:x2] + if crop.size == 0: + logger.warning("embed_avatar_fallback: empty crop region") + return None + + try: + import cv2 + target_size = getattr(rec_model, "input_size", None) + if not target_size: + logger.warning("embed_avatar_fallback: recognition model has no input_size") + return None + face_img = cv2.resize(crop, target_size) + except Exception as e: + logger.exception("embed_avatar_fallback: failed to resize crop: %s", e) + return None + + try: + feat = rec_model.get_feat(face_img)[0] + except Exception as e: + logger.exception("embed_avatar_fallback: get_feat failed: %s", e) + return None + + emb = normalize_embedding(feat) + + if not validate_embedding(emb): + logger.warning("embed_avatar_fallback: embedding contains NaN/Inf values") + return None + + bbox = BBox( + x=int(x1), + y=int(y1), + w=int(x2 - x1), + h=int(y2 - y1), + ) + score = 0.0 + + logger.info( + "embed_avatar_fallback: generated embedding bbox=(%d,%d,%d,%d) score=%.4f len=%d", + bbox.x, + bbox.y, + bbox.w, + bbox.h, + score, + len(emb), + ) + + return emb.tolist(), bbox, score + diff --git a/app/image.py b/app/image.py new file mode 100644 index 0000000..d5c6945 --- /dev/null +++ b/app/image.py @@ -0,0 +1,221 @@ +"""Image download, decoding, and validation utilities.""" + +import logging +from io import BytesIO + +import cv2 +import httpx +import numpy as np +from fastapi import HTTPException +from PIL import Image, ImageOps +from tenacity import ( + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential, +) + +from app.config import ( + DOWNLOAD_TIMEOUT, + MAX_DOWNLOAD_SIZE, + MAX_IMAGE_DIMENSION, + MAX_RETRIES, + MIN_IMAGE_DIMENSION, +) + +logger = logging.getLogger("face_service") + +# Set Pillow's decompression bomb limit +Image.MAX_IMAGE_PIXELS = MAX_IMAGE_DIMENSION * MAX_IMAGE_DIMENSION + + +class ImageDownloadError(Exception): + """Raised when image download fails.""" + pass + + +class ImageDecodeError(Exception): + """Raised when image decoding fails.""" + pass + + +class ImageValidationError(Exception): + """Raised when image validation fails.""" + pass + + +def _decode_image_bytes(data: bytes, source: str) -> np.ndarray: + """ + Decode image bytes to BGR numpy array. + + Handles: + - EXIF orientation correction + - All color modes (RGB, RGBA, L, LA, PA, CMYK, I, F) + - Truncated/corrupted image detection + - Dimension validation + """ + try: + pil_image = Image.open(BytesIO(data)) + except Exception as e: + logger.exception("Could not open image from %s", source) + raise ImageDecodeError(f"Could not decode image: {e}") + + # Force load to detect truncated/corrupted images + try: + pil_image.load() + except Exception as e: + logger.exception("Image data is corrupted or truncated from %s", source) + raise ImageDecodeError(f"Image data is corrupted or truncated: {e}") + + # Apply EXIF orientation + try: + pil_image = ImageOps.exif_transpose(pil_image) + except Exception: + logger.warning("Failed to apply EXIF orientation for %s", source) + + # Validate dimensions + width, height = pil_image.size + if width < MIN_IMAGE_DIMENSION or height < MIN_IMAGE_DIMENSION: + raise ImageValidationError( + f"Image too small: {width}x{height}, minimum is {MIN_IMAGE_DIMENSION}x{MIN_IMAGE_DIMENSION}" + ) + if width > MAX_IMAGE_DIMENSION or height > MAX_IMAGE_DIMENSION: + raise ImageValidationError( + f"Image too large: {width}x{height}, maximum is {MAX_IMAGE_DIMENSION}x{MAX_IMAGE_DIMENSION}" + ) + + # Convert to RGB, handling all color modes + mode = pil_image.mode + if mode in ("RGBA", "LA", "PA"): + # Has alpha channel - composite on white background + background = Image.new("RGB", pil_image.size, (255, 255, 255)) + if mode == "LA": + pil_image = pil_image.convert("RGBA") + elif mode == "PA": + pil_image = pil_image.convert("RGBA") + background.paste(pil_image, mask=pil_image.split()[-1]) + pil_image = background + elif mode == "CMYK": + pil_image = pil_image.convert("RGB") + elif mode in ("I", "F"): + # 16-bit or floating point - normalize to 8-bit + arr = np.array(pil_image) + if mode == "F": + arr = (arr * 255).clip(0, 255).astype(np.uint8) + else: + arr = (arr / 256).clip(0, 255).astype(np.uint8) + pil_image = Image.fromarray(arr, mode="L").convert("RGB") + elif mode == "L": + pil_image = pil_image.convert("RGB") + elif mode != "RGB": + pil_image = pil_image.convert("RGB") + + # Convert to BGR for OpenCV/InsightFace + img = np.array(pil_image) + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) + + logger.info( + "decode_image_bytes: source=%s shape=%s mode=%s", + source, + img.shape, + mode, + ) + return img + + +@retry( + retry=retry_if_exception_type((httpx.TimeoutException, httpx.NetworkError)), + stop=stop_after_attempt(MAX_RETRIES), + wait=wait_exponential(multiplier=1, min=1, max=10), + reraise=True, +) +async def _download_with_retry(client: httpx.AsyncClient, url: str) -> bytes: + """Download image with retry logic for transient failures.""" + # TODO [PROD]: Add URL validation for SSRF protection + # - Block internal IPs (10.x, 172.16-31.x, 192.168.x, 127.x, 169.254.x) + # - Block cloud metadata endpoints + # - Validate against allowlist if configured + + logger.info("download_image: url=%s", url) # TODO [PROD]: Redact query params + + response = await client.get(url, follow_redirects=True) + response.raise_for_status() + + # Check content length if provided + content_length = response.headers.get("content-length") + if content_length and int(content_length) > MAX_DOWNLOAD_SIZE: + raise ImageDownloadError( + f"Image too large: {int(content_length)} bytes, maximum is {MAX_DOWNLOAD_SIZE} bytes" + ) + + # Read content and check actual size + content = response.content + if len(content) > MAX_DOWNLOAD_SIZE: + raise ImageDownloadError( + f"Image too large: {len(content)} bytes, maximum is {MAX_DOWNLOAD_SIZE} bytes" + ) + + return content + + +async def download_image(image_url: str) -> np.ndarray: + """ + Download and decode an image from URL. + + Features: + - Async HTTP with connection pooling + - Retry with exponential backoff for transient failures + - Size validation before and after download + - Comprehensive image decoding + """ + try: + async with httpx.AsyncClient(timeout=DOWNLOAD_TIMEOUT) as client: + data = await _download_with_retry(client, image_url) + except httpx.TimeoutException: + logger.exception("Timeout downloading image") + raise HTTPException(status_code=408, detail="Timeout downloading image") + except httpx.HTTPStatusError as e: + logger.exception("HTTP error downloading image") + raise HTTPException( + status_code=400, + detail=f"Failed to download image: HTTP {e.response.status_code}" + ) + except ImageDownloadError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + logger.exception("Failed to download image") + raise HTTPException(status_code=400, detail=f"Failed to download image: {e}") + + try: + img = _decode_image_bytes(data, image_url) + except (ImageDecodeError, ImageValidationError) as e: + raise HTTPException(status_code=400, detail=str(e)) + + logger.info( + "download_image: success url=%s shape=%s", + image_url, + img.shape, + ) + return img + + +def read_upload_image(data: bytes, filename: str) -> np.ndarray: + """ + Decode an uploaded image file. + + Args: + data: Raw image bytes + filename: Original filename for logging + + Returns: + BGR numpy array + """ + try: + img = _decode_image_bytes(data, filename or "") + except (ImageDecodeError, ImageValidationError) as e: + raise HTTPException(status_code=400, detail=str(e)) + + if img is None: + raise HTTPException(status_code=400, detail="Could not decode uploaded image") + return img + diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..d80f554 --- /dev/null +++ b/app/main.py @@ -0,0 +1,44 @@ +"""FastAPI application entry point.""" + +import logging +from contextlib import asynccontextmanager + +from fastapi import FastAPI + +from app.config import DET_SIZE, MODEL_NAME +from app.face import load_face_app +from app.routes import embed + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger("face_service") + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Application lifespan handler - load models on startup.""" + logger.info("Starting face service...") + load_face_app() + logger.info("Face service ready") + yield + logger.info("Shutting down face service...") + + +app = FastAPI( + title="Face Service", + version="1.0.0", + lifespan=lifespan, +) + +# Include routers +app.include_router(embed.router) + + +@app.get("/healthz") +def healthz(): + """Health check endpoint.""" + return { + "status": "ok", + "model": MODEL_NAME, + "det_size": DET_SIZE, + } + diff --git a/app/models.py b/app/models.py new file mode 100644 index 0000000..f4b5caa --- /dev/null +++ b/app/models.py @@ -0,0 +1,40 @@ +"""Pydantic models for request/response schemas.""" + +from typing import List + +from pydantic import BaseModel, HttpUrl + + +class EmbedRequest(BaseModel): + image_url: HttpUrl + + +class BBox(BaseModel): + x: int + y: int + w: int + h: int + + +class FaceEmbedding(BaseModel): + bbox: BBox + score: float + embedding: List[float] + + +class EmbedAvatarResponse(BaseModel): + embedding: List[float] + bbox: BBox + score: float + + +class EmbedImageResponse(BaseModel): + faces: List[FaceEmbedding] + + +class MatchResult(BaseModel): + tag: str + found: bool + score: float | None = None + bbox: BBox | None = None + diff --git a/app/routes/__init__.py b/app/routes/__init__.py new file mode 100644 index 0000000..cc7deeb --- /dev/null +++ b/app/routes/__init__.py @@ -0,0 +1,2 @@ +"""API route modules.""" + diff --git a/app/routes/embed.py b/app/routes/embed.py new file mode 100644 index 0000000..2209ea1 --- /dev/null +++ b/app/routes/embed.py @@ -0,0 +1,148 @@ +"""Face embedding endpoints.""" + +import logging +from typing import List + +import numpy as np +from fastapi import APIRouter, HTTPException + +from app.face import ( + fallback_avatar_embedding, + load_face_app, + to_pixel_bbox, + validate_embedding, +) +from app.image import download_image +from app.models import ( + BBox, + EmbedAvatarResponse, + EmbedImageResponse, + EmbedRequest, + FaceEmbedding, +) + +logger = logging.getLogger("face_service") + +router = APIRouter() + + +@router.post("/embed-avatar", response_model=EmbedAvatarResponse) +async def embed_avatar(req: EmbedRequest): + """ + Extract face embedding from an avatar image. + + Returns the largest detected face. If no face is detected, + falls back to center crop embedding with score=0.0. + """ + logger.info("embed_avatar: image_url=%s", req.image_url) + fa = load_face_app() + img = await download_image(str(req.image_url)) + h, w = img.shape[:2] + + faces = fa.get(img) + if len(faces) == 0: + logger.warning( + "embed_avatar: no faces detected image_url=%s size=%dx%d, using fallback", + req.image_url, + w, + h, + ) + fallback = fallback_avatar_embedding(fa, img, w, h) + if fallback is None: + raise HTTPException( + status_code=422, + detail="No face detected in avatar image", + ) + + emb, bbox, score = fallback + logger.info( + "embed_avatar: using fallback bbox=%s score=%.4f embedding_len=%d", + bbox, + score, + len(emb), + ) + return EmbedAvatarResponse(embedding=emb, bbox=bbox, score=score) + + # Sort by face area (largest first) + faces.sort( + key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]), + reverse=True, + ) + face = faces[0] + + emb = face.normed_embedding.astype(np.float32) + + # Validate embedding + if not validate_embedding(emb): + logger.error("embed_avatar: embedding contains NaN/Inf values") + raise HTTPException( + status_code=422, + detail="Failed to generate valid face embedding", + ) + + emb_list = emb.tolist() + bbox = to_pixel_bbox(face.bbox, w, h) + score = float(getattr(face, "det_score", 1.0)) + + logger.info( + "embed_avatar: using face bbox=%s score=%.4f embedding_len=%d", + face.bbox, + score, + len(emb_list), + ) + + return EmbedAvatarResponse(embedding=emb_list, bbox=bbox, score=score) + + +@router.post("/embed-image", response_model=EmbedImageResponse) +async def embed_image(req: EmbedRequest): + """ + Extract face embeddings from all faces in an image. + + Returns all detected faces sorted by detection score (highest first). + Returns empty list if no faces detected. + """ + fa = load_face_app() + img = await download_image(str(req.image_url)) + h, w = img.shape[:2] + + faces = fa.get(img) + if len(faces) == 0: + logger.warning( + "embed_image: no faces detected image_url=%s size=%dx%d", + req.image_url, + w, + h, + ) + return EmbedImageResponse(faces=[]) + + logger.info( + "embed_image: detected %d faces image_url=%s size=%dx%d", + len(faces), + req.image_url, + w, + h, + ) + + # Sort by detection score (highest first) + faces.sort( + key=lambda f: float(getattr(f, "det_score", 1.0)), + reverse=True, + ) + + result: List[FaceEmbedding] = [] + for f in faces: + emb = f.normed_embedding.astype(np.float32) + + # Skip faces with invalid embeddings + if not validate_embedding(emb): + logger.warning("embed_image: skipping face with NaN/Inf embedding") + continue + + emb_list = emb.tolist() + bbox = to_pixel_bbox(f.bbox, w, h) + score = float(getattr(f, "det_score", 1.0)) + result.append(FaceEmbedding(bbox=bbox, score=score, embedding=emb_list)) + + return EmbedImageResponse(faces=result) + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..89abcfb --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +fastapi +uvicorn[standard] +insightface>=0.7.3 +onnxruntime-gpu +opencv-python-headless +numpy +httpx +tenacity +pydantic +Pillow diff --git a/run_face_service.sh b/run_face_service.sh new file mode 100755 index 0000000..510ed4c --- /dev/null +++ b/run_face_service.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Optional: activate local virtualenv if present +if [ -d ".venv" ]; then + # shellcheck disable=SC1091 + source ".venv/bin/activate" +fi + +# Model configuration (can be overridden via env) +export FACE_MODEL_NAME="${FACE_MODEL_NAME:-buffalo_l}" +export FACE_DET_SIZE="${FACE_DET_SIZE:-1024}" + +# Tune CPU thread usage and workers +CPU_CORES="$(nproc || echo 4)" +DEFAULT_WORKERS="${CPU_CORES}" +if [ "$DEFAULT_WORKERS" -lt 2 ]; then + DEFAULT_WORKERS=2 +fi + +export OMP_NUM_THREADS="${OMP_NUM_THREADS:-2}" +export MKL_NUM_THREADS="${MKL_NUM_THREADS:-2}" + +WORKERS="${UVICORN_WORKERS:-$DEFAULT_WORKERS}" +# Match genealog-api FACE_SERVICE_URL: http://host.docker.internal:18081 +PORT="${PORT:-18081}" + +echo "Starting face service on port ${PORT} with ${WORKERS} workers (CPU cores: ${CPU_CORES})" + +exec uvicorn app.main:app \ + --host 0.0.0.0 \ + --port "${PORT}" \ + --workers "${WORKERS}" \ + --loop uvloop \ + --http httptools