diff --git a/.dockerignore b/.dockerignore index 9033418..1e4ba7f 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,16 +2,43 @@ node_modules .next dist *.tsbuildinfo + +# Version control .git .github .husky + +# Documentation and tests docs e2e playwright-report -monitoring *.md !README.md + +# Environment and secrets .env* +!.env.example + +# IDE and editor +.vscode +.idea +*.swp +*.swo + +# Build caches .eslintcache coverage .turbo +.cache + +# OS files +.DS_Store +Thumbs.db + +# Docker files (avoid recursive context) +docker-compose*.yml +monitoring + +# Dev tools +scripts/backup +*.log diff --git a/apps/api/Dockerfile b/apps/api/Dockerfile index 4dac4fa..b8ae19a 100644 --- a/apps/api/Dockerfile +++ b/apps/api/Dockerfile @@ -31,16 +31,16 @@ WORKDIR /app ENV NODE_ENV=production -COPY --from=build /app/apps/api/dist ./dist -COPY --from=build /app/node_modules ./node_modules -COPY --from=build /app/apps/api/node_modules ./apps/api/node_modules -COPY --from=build /app/prisma ./prisma -COPY --from=build /app/apps/api/package.json ./package.json +COPY --from=build --chown=node:node /app/apps/api/dist ./dist +COPY --from=build --chown=node:node /app/node_modules ./node_modules +COPY --from=build --chown=node:node /app/apps/api/node_modules ./apps/api/node_modules +COPY --from=build --chown=node:node /app/prisma ./prisma +COPY --from=build --chown=node:node /app/apps/api/package.json ./package.json EXPOSE 3001 HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \ - CMD node -e "fetch('http://localhost:3001/health').then(r => { if (!r.ok) throw 1 })" + CMD node -e "fetch('http://localhost:3001/health').then(r => { if (!r.ok) throw 1 }).catch(() => process.exit(1))" USER node diff --git a/apps/web/Dockerfile b/apps/web/Dockerfile index e7d3d5a..73413db 100644 --- a/apps/web/Dockerfile +++ b/apps/web/Dockerfile @@ -27,9 +27,9 @@ ENV NEXT_TELEMETRY_DISABLED=1 ENV HOSTNAME=0.0.0.0 ENV PORT=3000 -COPY --from=build /app/apps/web/public ./public -COPY --from=build /app/apps/web/.next/standalone ./ -COPY --from=build /app/apps/web/.next/static ./.next/static +COPY --from=build --chown=node:node /app/apps/web/public ./public +COPY --from=build --chown=node:node /app/apps/web/.next/standalone ./ +COPY --from=build --chown=node:node /app/apps/web/.next/static ./.next/static EXPOSE 3000 diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 9472e3c..2afcda7 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -1,4 +1,5 @@ services: + # ── Application Services ────────────────────────────────────────────────────── api: image: ${REGISTRY_URL:-ghcr.io/goodgo}/goodgo-api:${IMAGE_TAG:-latest} container_name: goodgo-api @@ -8,11 +9,12 @@ services: environment: NODE_ENV: production DATABASE_URL: postgresql://${DB_USER}:${DB_PASSWORD}@postgres:5432/${DB_NAME} - REDIS_URL: redis://redis:6379 + REDIS_URL: redis://:${REDIS_PASSWORD}@redis:6379 TYPESENSE_HOST: typesense TYPESENSE_PORT: 8108 TYPESENSE_API_KEY: ${TYPESENSE_API_KEY} JWT_SECRET: ${JWT_SECRET} + JWT_REFRESH_SECRET: ${JWT_REFRESH_SECRET} MINIO_ENDPOINT: minio MINIO_PORT: 9000 MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY} @@ -27,11 +29,28 @@ services: typesense: condition: service_healthy healthcheck: - test: ['CMD', 'node', '-e', "fetch('http://localhost:3001/health').then(r => { if (!r.ok) throw 1 })"] + test: ['CMD', 'node', '-e', "fetch('http://localhost:3001/health').then(r => { if (!r.ok) throw 1 }).catch(() => process.exit(1))"] interval: 30s timeout: 5s retries: 5 start_period: 30s + deploy: + resources: + limits: + memory: 1g + cpus: '1.0' + reservations: + memory: 512m + security_opt: + - no-new-privileges:true + read_only: true + tmpfs: + - /tmp + logging: + driver: json-file + options: + max-size: '10m' + max-file: '5' networks: - goodgo-net @@ -53,15 +72,66 @@ services: timeout: 5s retries: 3 start_period: 15s + deploy: + resources: + limits: + memory: 512m + cpus: '0.5' + reservations: + memory: 256m + security_opt: + - no-new-privileges:true + read_only: true + tmpfs: + - /tmp + logging: + driver: json-file + options: + max-size: '10m' + max-file: '5' networks: - goodgo-net + ai-services: + image: ${REGISTRY_URL:-ghcr.io/goodgo}/goodgo-ai-services:${IMAGE_TAG:-latest} + container_name: goodgo-ai-services + restart: unless-stopped + environment: + AI_DEBUG: 'false' + AI_LOG_LEVEL: info + AI_API_KEY: ${AI_API_KEY} + AI_RATE_LIMIT: ${AI_RATE_LIMIT:-60/minute} + healthcheck: + test: ['CMD', 'python', '-c', 'import httpx; httpx.get("http://localhost:8000/health").raise_for_status()'] + interval: 30s + timeout: 5s + retries: 5 + start_period: 30s + deploy: + resources: + limits: + memory: 1g + cpus: '1.0' + reservations: + memory: 512m + security_opt: + - no-new-privileges:true + read_only: true + tmpfs: + - /tmp + logging: + driver: json-file + options: + max-size: '10m' + max-file: '5' + networks: + - goodgo-net + + # ── Data Services ───────────────────────────────────────────────────────────── postgres: image: postgis/postgis:16-3.4 container_name: goodgo-postgres restart: unless-stopped - ports: - - '${DB_PORT:-5432}:5432' environment: POSTGRES_DB: ${DB_NAME} POSTGRES_USER: ${DB_USER} @@ -74,6 +144,19 @@ services: timeout: 5s retries: 5 start_period: 30s + deploy: + resources: + limits: + memory: 2g + cpus: '2.0' + reservations: + memory: 1g + shm_size: 256m + logging: + driver: json-file + options: + max-size: '10m' + max-file: '5' networks: - goodgo-net @@ -90,6 +173,23 @@ services: timeout: 5s retries: 5 start_period: 10s + deploy: + resources: + limits: + memory: 768m + cpus: '0.5' + reservations: + memory: 256m + security_opt: + - no-new-privileges:true + read_only: true + tmpfs: + - /tmp + logging: + driver: json-file + options: + max-size: '10m' + max-file: '3' networks: - goodgo-net @@ -108,6 +208,18 @@ services: timeout: 5s retries: 5 start_period: 15s + deploy: + resources: + limits: + memory: 1g + cpus: '1.0' + reservations: + memory: 512m + logging: + driver: json-file + options: + max-size: '10m' + max-file: '3' networks: - goodgo-net @@ -127,24 +239,114 @@ services: timeout: 5s retries: 5 start_period: 15s + deploy: + resources: + limits: + memory: 1g + cpus: '0.5' + reservations: + memory: 256m + logging: + driver: json-file + options: + max-size: '10m' + max-file: '3' networks: - goodgo-net - ai-services: - image: ${REGISTRY_URL:-ghcr.io/goodgo}/goodgo-ai-services:${IMAGE_TAG:-latest} - container_name: goodgo-ai-services + # ── Database Backup ─────────────────────────────────────────────────────────── + pg-backup: + image: postgis/postgis:16-3.4 + container_name: goodgo-pg-backup restart: unless-stopped + entrypoint: /bin/bash + command: + - -c + - | + apt-get update -qq && apt-get install -y -qq cron > /dev/null 2>&1 + echo "0 2 * * * PGHOST=postgres PGPORT=5432 PGUSER=${DB_USER} PGDATABASE=${DB_NAME} PGPASSWORD=${DB_PASSWORD} BACKUP_DIR=/backups RETENTION_DAYS=${BACKUP_RETENTION_DAYS:-7} /scripts/pg-backup.sh >> /var/log/pg-backup.log 2>&1" | crontab - + /scripts/pg-backup.sh + cron -f environment: - AI_DEBUG: 'false' - AI_LOG_LEVEL: info - AI_API_KEY: ${AI_API_KEY} - AI_RATE_LIMIT: ${AI_RATE_LIMIT:-60/minute} + PGHOST: postgres + PGPORT: '5432' + PGUSER: ${DB_USER} + PGDATABASE: ${DB_NAME} + PGPASSWORD: ${DB_PASSWORD} + BACKUP_DIR: /backups + RETENTION_DAYS: ${BACKUP_RETENTION_DAYS:-7} + volumes: + - ./scripts/backup:/scripts:ro + - pg_backups:/backups + depends_on: + postgres: + condition: service_healthy + deploy: + resources: + limits: + memory: 512m + cpus: '0.5' + logging: + driver: json-file + options: + max-size: '5m' + max-file: '3' + networks: + - goodgo-net + + # ── Monitoring & Logging ────────────────────────────────────────────────────── + loki: + image: grafana/loki:3.0.0 + container_name: goodgo-loki + restart: unless-stopped + command: -config.file=/etc/loki/loki-config.yml + volumes: + - ./monitoring/loki/loki-config.yml:/etc/loki/loki-config.yml:ro + - loki_data:/loki healthcheck: - test: ['CMD', 'python', '-c', 'import httpx; httpx.get("http://localhost:8000/health").raise_for_status()'] - interval: 30s + test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3100/ready'] + interval: 15s timeout: 5s retries: 5 - start_period: 30s + start_period: 20s + deploy: + resources: + limits: + memory: 512m + cpus: '0.5' + reservations: + memory: 256m + logging: + driver: json-file + options: + max-size: '10m' + max-file: '3' + networks: + - goodgo-net + + promtail: + image: grafana/promtail:3.0.0 + container_name: goodgo-promtail + restart: unless-stopped + command: -config.file=/etc/promtail/promtail-config.yml + volumes: + - ./monitoring/promtail/promtail-config.yml:/etc/promtail/promtail-config.yml:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + depends_on: + loki: + condition: service_healthy + deploy: + resources: + limits: + memory: 256m + cpus: '0.25' + reservations: + memory: 128m + logging: + driver: json-file + options: + max-size: '5m' + max-file: '3' networks: - goodgo-net @@ -165,6 +367,20 @@ services: timeout: 5s retries: 3 start_period: 10s + deploy: + resources: + limits: + memory: 1g + cpus: '0.5' + reservations: + memory: 512m + security_opt: + - no-new-privileges:true + logging: + driver: json-file + options: + max-size: '10m' + max-file: '3' networks: - goodgo-net @@ -189,12 +405,28 @@ services: depends_on: prometheus: condition: service_healthy + loki: + condition: service_healthy healthcheck: test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3000/api/health'] interval: 15s timeout: 5s retries: 3 start_period: 15s + deploy: + resources: + limits: + memory: 512m + cpus: '0.5' + reservations: + memory: 256m + security_opt: + - no-new-privileges:true + logging: + driver: json-file + options: + max-size: '10m' + max-file: '3' networks: - goodgo-net @@ -207,6 +439,10 @@ volumes: driver: local minio_data: driver: local + pg_backups: + driver: local + loki_data: + driver: local prometheus_data: driver: local grafana_data: diff --git a/libs/ai-services/Dockerfile b/libs/ai-services/Dockerfile index 15bd3b5..ce673b5 100644 --- a/libs/ai-services/Dockerfile +++ b/libs/ai-services/Dockerfile @@ -24,10 +24,14 @@ COPY app/ ./app/ # Pre-download underthesea models at build time RUN python -c "from underthesea import word_tokenize; word_tokenize('test')" 2>/dev/null || true +RUN useradd --create-home --shell /bin/bash appuser && chown -R appuser:appuser /app + EXPOSE 8000 HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \ CMD python -c "import httpx; httpx.get('http://localhost:8000/health').raise_for_status()" +USER appuser + ENTRYPOINT ["dumb-init", "--"] CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--timeout-graceful-shutdown", "30"]