fix(docker): harden production deployment config for all services

- Add resource limits (memory/CPU) and reservations for all services
- Add security hardening: read_only, no-new-privileges, tmpfs for temp dirs
- Add missing prod services: loki, promtail, pg-backup from dev compose
- Fix API healthcheck to include catch() for proper exit codes
- Add json-file logging driver with rotation limits across all services
- Remove exposed PostgreSQL port in prod (internal only)
- Add shm_size for PostgreSQL shared memory
- Add non-root user (appuser) to AI services Dockerfile
- Add --chown=node:node to COPY directives in API/Web Dockerfiles
- Harden .dockerignore: exclude IDE files, OS files, docker-compose files
- Fix Redis URL to include password authentication
- Add JWT_REFRESH_SECRET to API environment
- Add Grafana dependency on Loki for log datasource

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Ho Ngoc Hai
2026-04-08 13:44:44 +07:00
parent 0c84c2ddae
commit 767afb56d5
5 changed files with 291 additions and 24 deletions

View File

@@ -2,16 +2,43 @@ node_modules
.next
dist
*.tsbuildinfo
# Version control
.git
.github
.husky
# Documentation and tests
docs
e2e
playwright-report
monitoring
*.md
!README.md
# Environment and secrets
.env*
!.env.example
# IDE and editor
.vscode
.idea
*.swp
*.swo
# Build caches
.eslintcache
coverage
.turbo
.cache
# OS files
.DS_Store
Thumbs.db
# Docker files (avoid recursive context)
docker-compose*.yml
monitoring
# Dev tools
scripts/backup
*.log

View File

@@ -31,16 +31,16 @@ WORKDIR /app
ENV NODE_ENV=production
COPY --from=build /app/apps/api/dist ./dist
COPY --from=build /app/node_modules ./node_modules
COPY --from=build /app/apps/api/node_modules ./apps/api/node_modules
COPY --from=build /app/prisma ./prisma
COPY --from=build /app/apps/api/package.json ./package.json
COPY --from=build --chown=node:node /app/apps/api/dist ./dist
COPY --from=build --chown=node:node /app/node_modules ./node_modules
COPY --from=build --chown=node:node /app/apps/api/node_modules ./apps/api/node_modules
COPY --from=build --chown=node:node /app/prisma ./prisma
COPY --from=build --chown=node:node /app/apps/api/package.json ./package.json
EXPOSE 3001
HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
CMD node -e "fetch('http://localhost:3001/health').then(r => { if (!r.ok) throw 1 })"
CMD node -e "fetch('http://localhost:3001/health').then(r => { if (!r.ok) throw 1 }).catch(() => process.exit(1))"
USER node

View File

@@ -27,9 +27,9 @@ ENV NEXT_TELEMETRY_DISABLED=1
ENV HOSTNAME=0.0.0.0
ENV PORT=3000
COPY --from=build /app/apps/web/public ./public
COPY --from=build /app/apps/web/.next/standalone ./
COPY --from=build /app/apps/web/.next/static ./.next/static
COPY --from=build --chown=node:node /app/apps/web/public ./public
COPY --from=build --chown=node:node /app/apps/web/.next/standalone ./
COPY --from=build --chown=node:node /app/apps/web/.next/static ./.next/static
EXPOSE 3000

View File

@@ -1,4 +1,5 @@
services:
# ── Application Services ──────────────────────────────────────────────────────
api:
image: ${REGISTRY_URL:-ghcr.io/goodgo}/goodgo-api:${IMAGE_TAG:-latest}
container_name: goodgo-api
@@ -8,11 +9,12 @@ services:
environment:
NODE_ENV: production
DATABASE_URL: postgresql://${DB_USER}:${DB_PASSWORD}@postgres:5432/${DB_NAME}
REDIS_URL: redis://redis:6379
REDIS_URL: redis://:${REDIS_PASSWORD}@redis:6379
TYPESENSE_HOST: typesense
TYPESENSE_PORT: 8108
TYPESENSE_API_KEY: ${TYPESENSE_API_KEY}
JWT_SECRET: ${JWT_SECRET}
JWT_REFRESH_SECRET: ${JWT_REFRESH_SECRET}
MINIO_ENDPOINT: minio
MINIO_PORT: 9000
MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY}
@@ -27,11 +29,28 @@ services:
typesense:
condition: service_healthy
healthcheck:
test: ['CMD', 'node', '-e', "fetch('http://localhost:3001/health').then(r => { if (!r.ok) throw 1 })"]
test: ['CMD', 'node', '-e', "fetch('http://localhost:3001/health').then(r => { if (!r.ok) throw 1 }).catch(() => process.exit(1))"]
interval: 30s
timeout: 5s
retries: 5
start_period: 30s
deploy:
resources:
limits:
memory: 1g
cpus: '1.0'
reservations:
memory: 512m
security_opt:
- no-new-privileges:true
read_only: true
tmpfs:
- /tmp
logging:
driver: json-file
options:
max-size: '10m'
max-file: '5'
networks:
- goodgo-net
@@ -53,15 +72,66 @@ services:
timeout: 5s
retries: 3
start_period: 15s
deploy:
resources:
limits:
memory: 512m
cpus: '0.5'
reservations:
memory: 256m
security_opt:
- no-new-privileges:true
read_only: true
tmpfs:
- /tmp
logging:
driver: json-file
options:
max-size: '10m'
max-file: '5'
networks:
- goodgo-net
ai-services:
image: ${REGISTRY_URL:-ghcr.io/goodgo}/goodgo-ai-services:${IMAGE_TAG:-latest}
container_name: goodgo-ai-services
restart: unless-stopped
environment:
AI_DEBUG: 'false'
AI_LOG_LEVEL: info
AI_API_KEY: ${AI_API_KEY}
AI_RATE_LIMIT: ${AI_RATE_LIMIT:-60/minute}
healthcheck:
test: ['CMD', 'python', '-c', 'import httpx; httpx.get("http://localhost:8000/health").raise_for_status()']
interval: 30s
timeout: 5s
retries: 5
start_period: 30s
deploy:
resources:
limits:
memory: 1g
cpus: '1.0'
reservations:
memory: 512m
security_opt:
- no-new-privileges:true
read_only: true
tmpfs:
- /tmp
logging:
driver: json-file
options:
max-size: '10m'
max-file: '5'
networks:
- goodgo-net
# ── Data Services ─────────────────────────────────────────────────────────────
postgres:
image: postgis/postgis:16-3.4
container_name: goodgo-postgres
restart: unless-stopped
ports:
- '${DB_PORT:-5432}:5432'
environment:
POSTGRES_DB: ${DB_NAME}
POSTGRES_USER: ${DB_USER}
@@ -74,6 +144,19 @@ services:
timeout: 5s
retries: 5
start_period: 30s
deploy:
resources:
limits:
memory: 2g
cpus: '2.0'
reservations:
memory: 1g
shm_size: 256m
logging:
driver: json-file
options:
max-size: '10m'
max-file: '5'
networks:
- goodgo-net
@@ -90,6 +173,23 @@ services:
timeout: 5s
retries: 5
start_period: 10s
deploy:
resources:
limits:
memory: 768m
cpus: '0.5'
reservations:
memory: 256m
security_opt:
- no-new-privileges:true
read_only: true
tmpfs:
- /tmp
logging:
driver: json-file
options:
max-size: '10m'
max-file: '3'
networks:
- goodgo-net
@@ -108,6 +208,18 @@ services:
timeout: 5s
retries: 5
start_period: 15s
deploy:
resources:
limits:
memory: 1g
cpus: '1.0'
reservations:
memory: 512m
logging:
driver: json-file
options:
max-size: '10m'
max-file: '3'
networks:
- goodgo-net
@@ -127,24 +239,114 @@ services:
timeout: 5s
retries: 5
start_period: 15s
deploy:
resources:
limits:
memory: 1g
cpus: '0.5'
reservations:
memory: 256m
logging:
driver: json-file
options:
max-size: '10m'
max-file: '3'
networks:
- goodgo-net
ai-services:
image: ${REGISTRY_URL:-ghcr.io/goodgo}/goodgo-ai-services:${IMAGE_TAG:-latest}
container_name: goodgo-ai-services
# ── Database Backup ───────────────────────────────────────────────────────────
pg-backup:
image: postgis/postgis:16-3.4
container_name: goodgo-pg-backup
restart: unless-stopped
entrypoint: /bin/bash
command:
- -c
- |
apt-get update -qq && apt-get install -y -qq cron > /dev/null 2>&1
echo "0 2 * * * PGHOST=postgres PGPORT=5432 PGUSER=${DB_USER} PGDATABASE=${DB_NAME} PGPASSWORD=${DB_PASSWORD} BACKUP_DIR=/backups RETENTION_DAYS=${BACKUP_RETENTION_DAYS:-7} /scripts/pg-backup.sh >> /var/log/pg-backup.log 2>&1" | crontab -
/scripts/pg-backup.sh
cron -f
environment:
AI_DEBUG: 'false'
AI_LOG_LEVEL: info
AI_API_KEY: ${AI_API_KEY}
AI_RATE_LIMIT: ${AI_RATE_LIMIT:-60/minute}
PGHOST: postgres
PGPORT: '5432'
PGUSER: ${DB_USER}
PGDATABASE: ${DB_NAME}
PGPASSWORD: ${DB_PASSWORD}
BACKUP_DIR: /backups
RETENTION_DAYS: ${BACKUP_RETENTION_DAYS:-7}
volumes:
- ./scripts/backup:/scripts:ro
- pg_backups:/backups
depends_on:
postgres:
condition: service_healthy
deploy:
resources:
limits:
memory: 512m
cpus: '0.5'
logging:
driver: json-file
options:
max-size: '5m'
max-file: '3'
networks:
- goodgo-net
# ── Monitoring & Logging ──────────────────────────────────────────────────────
loki:
image: grafana/loki:3.0.0
container_name: goodgo-loki
restart: unless-stopped
command: -config.file=/etc/loki/loki-config.yml
volumes:
- ./monitoring/loki/loki-config.yml:/etc/loki/loki-config.yml:ro
- loki_data:/loki
healthcheck:
test: ['CMD', 'python', '-c', 'import httpx; httpx.get("http://localhost:8000/health").raise_for_status()']
interval: 30s
test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3100/ready']
interval: 15s
timeout: 5s
retries: 5
start_period: 30s
start_period: 20s
deploy:
resources:
limits:
memory: 512m
cpus: '0.5'
reservations:
memory: 256m
logging:
driver: json-file
options:
max-size: '10m'
max-file: '3'
networks:
- goodgo-net
promtail:
image: grafana/promtail:3.0.0
container_name: goodgo-promtail
restart: unless-stopped
command: -config.file=/etc/promtail/promtail-config.yml
volumes:
- ./monitoring/promtail/promtail-config.yml:/etc/promtail/promtail-config.yml:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
depends_on:
loki:
condition: service_healthy
deploy:
resources:
limits:
memory: 256m
cpus: '0.25'
reservations:
memory: 128m
logging:
driver: json-file
options:
max-size: '5m'
max-file: '3'
networks:
- goodgo-net
@@ -165,6 +367,20 @@ services:
timeout: 5s
retries: 3
start_period: 10s
deploy:
resources:
limits:
memory: 1g
cpus: '0.5'
reservations:
memory: 512m
security_opt:
- no-new-privileges:true
logging:
driver: json-file
options:
max-size: '10m'
max-file: '3'
networks:
- goodgo-net
@@ -189,12 +405,28 @@ services:
depends_on:
prometheus:
condition: service_healthy
loki:
condition: service_healthy
healthcheck:
test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3000/api/health']
interval: 15s
timeout: 5s
retries: 3
start_period: 15s
deploy:
resources:
limits:
memory: 512m
cpus: '0.5'
reservations:
memory: 256m
security_opt:
- no-new-privileges:true
logging:
driver: json-file
options:
max-size: '10m'
max-file: '3'
networks:
- goodgo-net
@@ -207,6 +439,10 @@ volumes:
driver: local
minio_data:
driver: local
pg_backups:
driver: local
loki_data:
driver: local
prometheus_data:
driver: local
grafana_data:

View File

@@ -24,10 +24,14 @@ COPY app/ ./app/
# Pre-download underthesea models at build time
RUN python -c "from underthesea import word_tokenize; word_tokenize('test')" 2>/dev/null || true
RUN useradd --create-home --shell /bin/bash appuser && chown -R appuser:appuser /app
EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
CMD python -c "import httpx; httpx.get('http://localhost:8000/health').raise_for_status()"
USER appuser
ENTRYPOINT ["dumb-init", "--"]
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--timeout-graceful-shutdown", "30"]