From e60b95cdec5f3c46b581258d672c54ee9d6c14c4 Mon Sep 17 00:00:00 2001 From: Ho Ngoc Hai Date: Wed, 8 Apr 2026 06:13:29 +0700 Subject: [PATCH] =?UTF-8?q?fix(infra):=20harden=20AI=20service=20=E2=80=94?= =?UTF-8?q?=20graceful=20shutdown,=20rate=20limiting,=20API=20key=20auth,?= =?UTF-8?q?=20pinned=20deps,=20Grafana=20secrets?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add dumb-init + --timeout-graceful-shutdown 30 to AI service Dockerfile - Add slowapi rate limiting (configurable via AI_RATE_LIMIT) and X-API-Key auth middleware - Pin all Python dependencies to exact versions for reproducible builds - Move Grafana admin credentials from env vars to Docker secrets in production compose Co-Authored-By: Paperclip --- docker-compose.prod.yml | 16 ++++++++++++++-- libs/ai-services/Dockerfile | 24 +++++++++++++----------- libs/ai-services/app/main.py | 11 ++++++++++- libs/ai-services/app/middleware.py | 23 +++++++++++++++++++++++ libs/ai-services/pyproject.toml | 17 +++++++++-------- 5 files changed, 69 insertions(+), 22 deletions(-) create mode 100644 libs/ai-services/app/middleware.py diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 01ed032..9472e3c 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -18,6 +18,7 @@ services: MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY} MINIO_SECRET_KEY: ${MINIO_SECRET_KEY} AI_SERVICES_URL: http://ai-services:8000 + AI_SERVICES_API_KEY: ${AI_API_KEY} depends_on: postgres: condition: service_healthy @@ -136,6 +137,8 @@ services: environment: AI_DEBUG: 'false' AI_LOG_LEVEL: info + AI_API_KEY: ${AI_API_KEY} + AI_RATE_LIMIT: ${AI_RATE_LIMIT:-60/minute} healthcheck: test: ['CMD', 'python', '-c', 'import httpx; httpx.get("http://localhost:8000/health").raise_for_status()'] interval: 30s @@ -172,10 +175,13 @@ services: ports: - '${GRAFANA_PORT:-3002}:3000' environment: - GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER} - GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD} + GF_SECURITY_ADMIN_USER__FILE: /run/secrets/grafana_admin_user + GF_SECURITY_ADMIN_PASSWORD__FILE: /run/secrets/grafana_admin_password GF_USERS_ALLOW_SIGN_UP: 'false' GF_SERVER_ROOT_URL: ${GRAFANA_ROOT_URL:-http://localhost:3002} + secrets: + - grafana_admin_user + - grafana_admin_password volumes: - ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro - ./monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro @@ -206,6 +212,12 @@ volumes: grafana_data: driver: local +secrets: + grafana_admin_user: + environment: GRAFANA_ADMIN_USER + grafana_admin_password: + environment: GRAFANA_ADMIN_PASSWORD + networks: goodgo-net: driver: bridge diff --git a/libs/ai-services/Dockerfile b/libs/ai-services/Dockerfile index 8096154..15bd3b5 100644 --- a/libs/ai-services/Dockerfile +++ b/libs/ai-services/Dockerfile @@ -2,21 +2,22 @@ FROM python:3.12-slim WORKDIR /app -# Install system deps for underthesea / numpy +# Install system deps for underthesea / numpy + dumb-init for signal handling RUN apt-get update && \ - apt-get install -y --no-install-recommends gcc g++ && \ + apt-get install -y --no-install-recommends gcc g++ dumb-init && \ rm -rf /var/lib/apt/lists/* COPY pyproject.toml . RUN pip install --no-cache-dir . 2>/dev/null || pip install --no-cache-dir \ - "fastapi>=0.115.0" \ - "uvicorn[standard]>=0.32.0" \ - "xgboost>=2.1.0" \ - "numpy>=1.26.0" \ - "underthesea>=6.8.0" \ - "pydantic>=2.9.0" \ - "pydantic-settings>=2.5.0" \ - "httpx>=0.27.0" + "fastapi==0.115.0" \ + "uvicorn[standard]==0.32.0" \ + "xgboost==2.1.0" \ + "numpy==1.26.4" \ + "underthesea==6.8.0" \ + "pydantic==2.9.0" \ + "pydantic-settings==2.5.0" \ + "httpx==0.27.0" \ + "slowapi==0.1.9" COPY app/ ./app/ @@ -28,4 +29,5 @@ EXPOSE 8000 HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \ CMD python -c "import httpx; httpx.get('http://localhost:8000/health').raise_for_status()" -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] +ENTRYPOINT ["dumb-init", "--"] +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--timeout-graceful-shutdown", "30"] diff --git a/libs/ai-services/app/main.py b/libs/ai-services/app/main.py index 6243ed6..cc27225 100644 --- a/libs/ai-services/app/main.py +++ b/libs/ai-services/app/main.py @@ -1,15 +1,24 @@ -from fastapi import FastAPI +from fastapi import Depends, FastAPI from fastapi.middleware.cors import CORSMiddleware +from slowapi import Limiter, _rate_limit_exceeded_handler +from slowapi.errors import RateLimitExceeded +from slowapi.util import get_remote_address from app.config import settings +from app.middleware import verify_api_key from app.routers import avm, moderation +limiter = Limiter(key_func=get_remote_address, default_limits=[settings.rate_limit]) + app = FastAPI( title=settings.app_name, version="0.1.0", docs_url="/docs", redoc_url="/redoc", + dependencies=[Depends(verify_api_key)], ) +app.state.limiter = limiter +app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) if not settings.cors_origin_list: raise RuntimeError("AI_CORS_ORIGINS must be set (comma-separated list of allowed origins)") diff --git a/libs/ai-services/app/middleware.py b/libs/ai-services/app/middleware.py new file mode 100644 index 0000000..d771650 --- /dev/null +++ b/libs/ai-services/app/middleware.py @@ -0,0 +1,23 @@ +import hmac +from typing import Optional + +from fastapi import Depends, HTTPException, Security, status +from fastapi.security import APIKeyHeader + +from app.config import settings + +api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False) + + +async def verify_api_key( + api_key: Optional[str] = Security(api_key_header), +) -> str: + """Validate X-API-Key header. Skipped when AI_API_KEY is not configured.""" + if not settings.api_key: + return "no-auth" + if not api_key or not hmac.compare_digest(api_key, settings.api_key): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid or missing API key", + ) + return api_key diff --git a/libs/ai-services/pyproject.toml b/libs/ai-services/pyproject.toml index 548ab5f..7727d91 100644 --- a/libs/ai-services/pyproject.toml +++ b/libs/ai-services/pyproject.toml @@ -4,14 +4,15 @@ version = "0.1.0" description = "AI/ML services for Goodgo Platform — AVM, feature extraction, moderation" requires-python = ">=3.12" dependencies = [ - "fastapi>=0.115.0", - "uvicorn[standard]>=0.32.0", - "xgboost>=2.1.0", - "numpy>=1.26.0", - "underthesea>=6.8.0", - "pydantic>=2.9.0", - "pydantic-settings>=2.5.0", - "httpx>=0.27.0", + "fastapi==0.115.0", + "uvicorn[standard]==0.32.0", + "xgboost==2.1.0", + "numpy==1.26.4", + "underthesea==6.8.0", + "pydantic==2.9.0", + "pydantic-settings==2.5.0", + "httpx==0.27.0", + "slowapi==0.1.9", ] [project.optional-dependencies]