fix(docker): harden production deployment config for all services

- Add resource limits (memory/CPU) and reservations for all services
- Add security hardening: read_only, no-new-privileges, tmpfs for temp dirs
- Add missing prod services: loki, promtail, pg-backup from dev compose
- Fix API healthcheck to include catch() for proper exit codes
- Add json-file logging driver with rotation limits across all services
- Remove exposed PostgreSQL port in prod (internal only)
- Add shm_size for PostgreSQL shared memory
- Add non-root user (appuser) to AI services Dockerfile
- Add --chown=node:node to COPY directives in API/Web Dockerfiles
- Harden .dockerignore: exclude IDE files, OS files, docker-compose files
- Fix Redis URL to include password authentication
- Add JWT_REFRESH_SECRET to API environment
- Add Grafana dependency on Loki for log datasource

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Ho Ngoc Hai
2026-04-08 13:44:44 +07:00
parent 0c84c2ddae
commit 767afb56d5
5 changed files with 291 additions and 24 deletions

View File

@@ -2,16 +2,43 @@ node_modules
.next .next
dist dist
*.tsbuildinfo *.tsbuildinfo
# Version control
.git .git
.github .github
.husky .husky
# Documentation and tests
docs docs
e2e e2e
playwright-report playwright-report
monitoring
*.md *.md
!README.md !README.md
# Environment and secrets
.env* .env*
!.env.example
# IDE and editor
.vscode
.idea
*.swp
*.swo
# Build caches
.eslintcache .eslintcache
coverage coverage
.turbo .turbo
.cache
# OS files
.DS_Store
Thumbs.db
# Docker files (avoid recursive context)
docker-compose*.yml
monitoring
# Dev tools
scripts/backup
*.log

View File

@@ -31,16 +31,16 @@ WORKDIR /app
ENV NODE_ENV=production ENV NODE_ENV=production
COPY --from=build /app/apps/api/dist ./dist COPY --from=build --chown=node:node /app/apps/api/dist ./dist
COPY --from=build /app/node_modules ./node_modules COPY --from=build --chown=node:node /app/node_modules ./node_modules
COPY --from=build /app/apps/api/node_modules ./apps/api/node_modules COPY --from=build --chown=node:node /app/apps/api/node_modules ./apps/api/node_modules
COPY --from=build /app/prisma ./prisma COPY --from=build --chown=node:node /app/prisma ./prisma
COPY --from=build /app/apps/api/package.json ./package.json COPY --from=build --chown=node:node /app/apps/api/package.json ./package.json
EXPOSE 3001 EXPOSE 3001
HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \ HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
CMD node -e "fetch('http://localhost:3001/health').then(r => { if (!r.ok) throw 1 })" CMD node -e "fetch('http://localhost:3001/health').then(r => { if (!r.ok) throw 1 }).catch(() => process.exit(1))"
USER node USER node

View File

@@ -27,9 +27,9 @@ ENV NEXT_TELEMETRY_DISABLED=1
ENV HOSTNAME=0.0.0.0 ENV HOSTNAME=0.0.0.0
ENV PORT=3000 ENV PORT=3000
COPY --from=build /app/apps/web/public ./public COPY --from=build --chown=node:node /app/apps/web/public ./public
COPY --from=build /app/apps/web/.next/standalone ./ COPY --from=build --chown=node:node /app/apps/web/.next/standalone ./
COPY --from=build /app/apps/web/.next/static ./.next/static COPY --from=build --chown=node:node /app/apps/web/.next/static ./.next/static
EXPOSE 3000 EXPOSE 3000

View File

@@ -1,4 +1,5 @@
services: services:
# ── Application Services ──────────────────────────────────────────────────────
api: api:
image: ${REGISTRY_URL:-ghcr.io/goodgo}/goodgo-api:${IMAGE_TAG:-latest} image: ${REGISTRY_URL:-ghcr.io/goodgo}/goodgo-api:${IMAGE_TAG:-latest}
container_name: goodgo-api container_name: goodgo-api
@@ -8,11 +9,12 @@ services:
environment: environment:
NODE_ENV: production NODE_ENV: production
DATABASE_URL: postgresql://${DB_USER}:${DB_PASSWORD}@postgres:5432/${DB_NAME} DATABASE_URL: postgresql://${DB_USER}:${DB_PASSWORD}@postgres:5432/${DB_NAME}
REDIS_URL: redis://redis:6379 REDIS_URL: redis://:${REDIS_PASSWORD}@redis:6379
TYPESENSE_HOST: typesense TYPESENSE_HOST: typesense
TYPESENSE_PORT: 8108 TYPESENSE_PORT: 8108
TYPESENSE_API_KEY: ${TYPESENSE_API_KEY} TYPESENSE_API_KEY: ${TYPESENSE_API_KEY}
JWT_SECRET: ${JWT_SECRET} JWT_SECRET: ${JWT_SECRET}
JWT_REFRESH_SECRET: ${JWT_REFRESH_SECRET}
MINIO_ENDPOINT: minio MINIO_ENDPOINT: minio
MINIO_PORT: 9000 MINIO_PORT: 9000
MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY} MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY}
@@ -27,11 +29,28 @@ services:
typesense: typesense:
condition: service_healthy condition: service_healthy
healthcheck: healthcheck:
test: ['CMD', 'node', '-e', "fetch('http://localhost:3001/health').then(r => { if (!r.ok) throw 1 })"] test: ['CMD', 'node', '-e', "fetch('http://localhost:3001/health').then(r => { if (!r.ok) throw 1 }).catch(() => process.exit(1))"]
interval: 30s interval: 30s
timeout: 5s timeout: 5s
retries: 5 retries: 5
start_period: 30s start_period: 30s
deploy:
resources:
limits:
memory: 1g
cpus: '1.0'
reservations:
memory: 512m
security_opt:
- no-new-privileges:true
read_only: true
tmpfs:
- /tmp
logging:
driver: json-file
options:
max-size: '10m'
max-file: '5'
networks: networks:
- goodgo-net - goodgo-net
@@ -53,15 +72,66 @@ services:
timeout: 5s timeout: 5s
retries: 3 retries: 3
start_period: 15s start_period: 15s
deploy:
resources:
limits:
memory: 512m
cpus: '0.5'
reservations:
memory: 256m
security_opt:
- no-new-privileges:true
read_only: true
tmpfs:
- /tmp
logging:
driver: json-file
options:
max-size: '10m'
max-file: '5'
networks: networks:
- goodgo-net - goodgo-net
ai-services:
image: ${REGISTRY_URL:-ghcr.io/goodgo}/goodgo-ai-services:${IMAGE_TAG:-latest}
container_name: goodgo-ai-services
restart: unless-stopped
environment:
AI_DEBUG: 'false'
AI_LOG_LEVEL: info
AI_API_KEY: ${AI_API_KEY}
AI_RATE_LIMIT: ${AI_RATE_LIMIT:-60/minute}
healthcheck:
test: ['CMD', 'python', '-c', 'import httpx; httpx.get("http://localhost:8000/health").raise_for_status()']
interval: 30s
timeout: 5s
retries: 5
start_period: 30s
deploy:
resources:
limits:
memory: 1g
cpus: '1.0'
reservations:
memory: 512m
security_opt:
- no-new-privileges:true
read_only: true
tmpfs:
- /tmp
logging:
driver: json-file
options:
max-size: '10m'
max-file: '5'
networks:
- goodgo-net
# ── Data Services ─────────────────────────────────────────────────────────────
postgres: postgres:
image: postgis/postgis:16-3.4 image: postgis/postgis:16-3.4
container_name: goodgo-postgres container_name: goodgo-postgres
restart: unless-stopped restart: unless-stopped
ports:
- '${DB_PORT:-5432}:5432'
environment: environment:
POSTGRES_DB: ${DB_NAME} POSTGRES_DB: ${DB_NAME}
POSTGRES_USER: ${DB_USER} POSTGRES_USER: ${DB_USER}
@@ -74,6 +144,19 @@ services:
timeout: 5s timeout: 5s
retries: 5 retries: 5
start_period: 30s start_period: 30s
deploy:
resources:
limits:
memory: 2g
cpus: '2.0'
reservations:
memory: 1g
shm_size: 256m
logging:
driver: json-file
options:
max-size: '10m'
max-file: '5'
networks: networks:
- goodgo-net - goodgo-net
@@ -90,6 +173,23 @@ services:
timeout: 5s timeout: 5s
retries: 5 retries: 5
start_period: 10s start_period: 10s
deploy:
resources:
limits:
memory: 768m
cpus: '0.5'
reservations:
memory: 256m
security_opt:
- no-new-privileges:true
read_only: true
tmpfs:
- /tmp
logging:
driver: json-file
options:
max-size: '10m'
max-file: '3'
networks: networks:
- goodgo-net - goodgo-net
@@ -108,6 +208,18 @@ services:
timeout: 5s timeout: 5s
retries: 5 retries: 5
start_period: 15s start_period: 15s
deploy:
resources:
limits:
memory: 1g
cpus: '1.0'
reservations:
memory: 512m
logging:
driver: json-file
options:
max-size: '10m'
max-file: '3'
networks: networks:
- goodgo-net - goodgo-net
@@ -127,24 +239,114 @@ services:
timeout: 5s timeout: 5s
retries: 5 retries: 5
start_period: 15s start_period: 15s
deploy:
resources:
limits:
memory: 1g
cpus: '0.5'
reservations:
memory: 256m
logging:
driver: json-file
options:
max-size: '10m'
max-file: '3'
networks: networks:
- goodgo-net - goodgo-net
ai-services: # ── Database Backup ───────────────────────────────────────────────────────────
image: ${REGISTRY_URL:-ghcr.io/goodgo}/goodgo-ai-services:${IMAGE_TAG:-latest} pg-backup:
container_name: goodgo-ai-services image: postgis/postgis:16-3.4
container_name: goodgo-pg-backup
restart: unless-stopped restart: unless-stopped
entrypoint: /bin/bash
command:
- -c
- |
apt-get update -qq && apt-get install -y -qq cron > /dev/null 2>&1
echo "0 2 * * * PGHOST=postgres PGPORT=5432 PGUSER=${DB_USER} PGDATABASE=${DB_NAME} PGPASSWORD=${DB_PASSWORD} BACKUP_DIR=/backups RETENTION_DAYS=${BACKUP_RETENTION_DAYS:-7} /scripts/pg-backup.sh >> /var/log/pg-backup.log 2>&1" | crontab -
/scripts/pg-backup.sh
cron -f
environment: environment:
AI_DEBUG: 'false' PGHOST: postgres
AI_LOG_LEVEL: info PGPORT: '5432'
AI_API_KEY: ${AI_API_KEY} PGUSER: ${DB_USER}
AI_RATE_LIMIT: ${AI_RATE_LIMIT:-60/minute} PGDATABASE: ${DB_NAME}
PGPASSWORD: ${DB_PASSWORD}
BACKUP_DIR: /backups
RETENTION_DAYS: ${BACKUP_RETENTION_DAYS:-7}
volumes:
- ./scripts/backup:/scripts:ro
- pg_backups:/backups
depends_on:
postgres:
condition: service_healthy
deploy:
resources:
limits:
memory: 512m
cpus: '0.5'
logging:
driver: json-file
options:
max-size: '5m'
max-file: '3'
networks:
- goodgo-net
# ── Monitoring & Logging ──────────────────────────────────────────────────────
loki:
image: grafana/loki:3.0.0
container_name: goodgo-loki
restart: unless-stopped
command: -config.file=/etc/loki/loki-config.yml
volumes:
- ./monitoring/loki/loki-config.yml:/etc/loki/loki-config.yml:ro
- loki_data:/loki
healthcheck: healthcheck:
test: ['CMD', 'python', '-c', 'import httpx; httpx.get("http://localhost:8000/health").raise_for_status()'] test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3100/ready']
interval: 30s interval: 15s
timeout: 5s timeout: 5s
retries: 5 retries: 5
start_period: 30s start_period: 20s
deploy:
resources:
limits:
memory: 512m
cpus: '0.5'
reservations:
memory: 256m
logging:
driver: json-file
options:
max-size: '10m'
max-file: '3'
networks:
- goodgo-net
promtail:
image: grafana/promtail:3.0.0
container_name: goodgo-promtail
restart: unless-stopped
command: -config.file=/etc/promtail/promtail-config.yml
volumes:
- ./monitoring/promtail/promtail-config.yml:/etc/promtail/promtail-config.yml:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
depends_on:
loki:
condition: service_healthy
deploy:
resources:
limits:
memory: 256m
cpus: '0.25'
reservations:
memory: 128m
logging:
driver: json-file
options:
max-size: '5m'
max-file: '3'
networks: networks:
- goodgo-net - goodgo-net
@@ -165,6 +367,20 @@ services:
timeout: 5s timeout: 5s
retries: 3 retries: 3
start_period: 10s start_period: 10s
deploy:
resources:
limits:
memory: 1g
cpus: '0.5'
reservations:
memory: 512m
security_opt:
- no-new-privileges:true
logging:
driver: json-file
options:
max-size: '10m'
max-file: '3'
networks: networks:
- goodgo-net - goodgo-net
@@ -189,12 +405,28 @@ services:
depends_on: depends_on:
prometheus: prometheus:
condition: service_healthy condition: service_healthy
loki:
condition: service_healthy
healthcheck: healthcheck:
test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3000/api/health'] test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3000/api/health']
interval: 15s interval: 15s
timeout: 5s timeout: 5s
retries: 3 retries: 3
start_period: 15s start_period: 15s
deploy:
resources:
limits:
memory: 512m
cpus: '0.5'
reservations:
memory: 256m
security_opt:
- no-new-privileges:true
logging:
driver: json-file
options:
max-size: '10m'
max-file: '3'
networks: networks:
- goodgo-net - goodgo-net
@@ -207,6 +439,10 @@ volumes:
driver: local driver: local
minio_data: minio_data:
driver: local driver: local
pg_backups:
driver: local
loki_data:
driver: local
prometheus_data: prometheus_data:
driver: local driver: local
grafana_data: grafana_data:

View File

@@ -24,10 +24,14 @@ COPY app/ ./app/
# Pre-download underthesea models at build time # Pre-download underthesea models at build time
RUN python -c "from underthesea import word_tokenize; word_tokenize('test')" 2>/dev/null || true RUN python -c "from underthesea import word_tokenize; word_tokenize('test')" 2>/dev/null || true
RUN useradd --create-home --shell /bin/bash appuser && chown -R appuser:appuser /app
EXPOSE 8000 EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \ HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
CMD python -c "import httpx; httpx.get('http://localhost:8000/health').raise_for_status()" CMD python -c "import httpx; httpx.get('http://localhost:8000/health').raise_for_status()"
USER appuser
ENTRYPOINT ["dumb-init", "--"] ENTRYPOINT ["dumb-init", "--"]
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--timeout-graceful-shutdown", "30"] CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--timeout-graceful-shutdown", "30"]