feat(ops): add database backup strategy and log aggregation stack
- Add pg-backup container with daily automated pg_dump (02:00 UTC) and 7-day retention - Add backup/restore scripts with documented recovery procedure - Add Loki + Promtail for centralized log aggregation from all Docker containers - Add Loki as Grafana datasource with correlation ID derived fields - Add Grafana logs dashboard with volume, error rate, HTTP request, and log viewer panels - Configure Promtail to parse Pino structured JSON logs with level/context labels - Enhance LoggerService with string-level formatter and service base field - Configure 15-day log retention in Loki Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
@@ -14,10 +14,14 @@ export class LoggerService implements NestLoggerService {
|
|||||||
? { target: 'pino-pretty', options: { colorize: true } }
|
? { target: 'pino-pretty', options: { colorize: true } }
|
||||||
: undefined,
|
: undefined,
|
||||||
formatters: {
|
formatters: {
|
||||||
|
level(label) {
|
||||||
|
return { level: label };
|
||||||
|
},
|
||||||
log(object) {
|
log(object) {
|
||||||
return maskPii(object) as Record<string, unknown>;
|
return maskPii(object) as Record<string, unknown>;
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
base: { service: 'goodgo-api' },
|
||||||
timestamp: pino.stdTimeFunctions.isoTime,
|
timestamp: pino.stdTimeFunctions.isoTime,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -101,6 +101,70 @@ services:
|
|||||||
networks:
|
networks:
|
||||||
- goodgo-net
|
- goodgo-net
|
||||||
|
|
||||||
|
# ── Database Backup ──
|
||||||
|
pg-backup:
|
||||||
|
image: postgis/postgis:16-3.4
|
||||||
|
container_name: goodgo-pg-backup
|
||||||
|
restart: unless-stopped
|
||||||
|
entrypoint: /bin/bash
|
||||||
|
command:
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
apt-get update -qq && apt-get install -y -qq cron > /dev/null 2>&1
|
||||||
|
echo "0 2 * * * PGHOST=postgres PGPORT=5432 PGUSER=${DB_USER:-goodgo} PGDATABASE=${DB_NAME:-goodgo} PGPASSWORD=${DB_PASSWORD:-goodgo_secret} BACKUP_DIR=/backups RETENTION_DAYS=${BACKUP_RETENTION_DAYS:-7} /scripts/pg-backup.sh >> /var/log/pg-backup.log 2>&1" | crontab -
|
||||||
|
/scripts/pg-backup.sh
|
||||||
|
cron -f
|
||||||
|
environment:
|
||||||
|
PGHOST: postgres
|
||||||
|
PGPORT: '5432'
|
||||||
|
PGUSER: ${DB_USER:-goodgo}
|
||||||
|
PGDATABASE: ${DB_NAME:-goodgo}
|
||||||
|
PGPASSWORD: ${DB_PASSWORD:-goodgo_secret}
|
||||||
|
BACKUP_DIR: /backups
|
||||||
|
RETENTION_DAYS: ${BACKUP_RETENTION_DAYS:-7}
|
||||||
|
volumes:
|
||||||
|
- ./scripts/backup:/scripts:ro
|
||||||
|
- pg_backups:/backups
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
networks:
|
||||||
|
- goodgo-net
|
||||||
|
|
||||||
|
# ── Log Aggregation ──
|
||||||
|
loki:
|
||||||
|
image: grafana/loki:3.0.0
|
||||||
|
container_name: goodgo-loki
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- '${LOKI_PORT:-3100}:3100'
|
||||||
|
command: -config.file=/etc/loki/loki-config.yml
|
||||||
|
volumes:
|
||||||
|
- ./monitoring/loki/loki-config.yml:/etc/loki/loki-config.yml:ro
|
||||||
|
- loki_data:/loki
|
||||||
|
healthcheck:
|
||||||
|
test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3100/ready']
|
||||||
|
interval: 15s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
start_period: 20s
|
||||||
|
networks:
|
||||||
|
- goodgo-net
|
||||||
|
|
||||||
|
promtail:
|
||||||
|
image: grafana/promtail:3.0.0
|
||||||
|
container_name: goodgo-promtail
|
||||||
|
restart: unless-stopped
|
||||||
|
command: -config.file=/etc/promtail/promtail-config.yml
|
||||||
|
volumes:
|
||||||
|
- ./monitoring/promtail/promtail-config.yml:/etc/promtail/promtail-config.yml:ro
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||||
|
depends_on:
|
||||||
|
loki:
|
||||||
|
condition: service_healthy
|
||||||
|
networks:
|
||||||
|
- goodgo-net
|
||||||
|
|
||||||
prometheus:
|
prometheus:
|
||||||
image: prom/prometheus:v2.51.0
|
image: prom/prometheus:v2.51.0
|
||||||
container_name: goodgo-prometheus
|
container_name: goodgo-prometheus
|
||||||
@@ -142,6 +206,8 @@ services:
|
|||||||
depends_on:
|
depends_on:
|
||||||
prometheus:
|
prometheus:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
loki:
|
||||||
|
condition: service_healthy
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3000/api/health']
|
test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3000/api/health']
|
||||||
interval: 15s
|
interval: 15s
|
||||||
@@ -160,6 +226,10 @@ volumes:
|
|||||||
driver: local
|
driver: local
|
||||||
minio_data:
|
minio_data:
|
||||||
driver: local
|
driver: local
|
||||||
|
pg_backups:
|
||||||
|
driver: local
|
||||||
|
loki_data:
|
||||||
|
driver: local
|
||||||
prometheus_data:
|
prometheus_data:
|
||||||
driver: local
|
driver: local
|
||||||
grafana_data:
|
grafana_data:
|
||||||
|
|||||||
102
docs/backup-restore.md
Normal file
102
docs/backup-restore.md
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
# Database Backup & Restore Procedures
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Automated daily PostgreSQL backups run inside the `pg-backup` Docker container using `pg_dump` with custom format compression. Backups are stored in the `pg_backups` Docker volume.
|
||||||
|
|
||||||
|
## Backup Configuration
|
||||||
|
|
||||||
|
| Setting | Default | Environment Variable |
|
||||||
|
|---------|---------|---------------------|
|
||||||
|
| Schedule | Daily at 02:00 UTC | Cron in `pg-backup` service |
|
||||||
|
| Retention | 7 days | `BACKUP_RETENTION_DAYS` |
|
||||||
|
| Format | Custom (`pg_dump --format=custom`) | — |
|
||||||
|
| Compression | Level 6 | — |
|
||||||
|
| Storage | `pg_backups` Docker volume | — |
|
||||||
|
|
||||||
|
## Listing Backups
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker exec goodgo-pg-backup ls -lh /backups/
|
||||||
|
```
|
||||||
|
|
||||||
|
## Manual Backup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker exec goodgo-pg-backup /scripts/pg-backup.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Restore Procedure
|
||||||
|
|
||||||
|
### 1. Identify the backup to restore
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker exec goodgo-pg-backup ls -lht /backups/
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Stop application services
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose stop ai-services
|
||||||
|
# Stop any NestJS API processes
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Run restore
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker exec -it goodgo-pg-backup /scripts/pg-restore.sh /backups/goodgo_YYYYMMDD_HHMMSS.sql.gz
|
||||||
|
```
|
||||||
|
|
||||||
|
The restore script will:
|
||||||
|
- Terminate active database connections
|
||||||
|
- Drop and recreate the database
|
||||||
|
- Restore from the selected backup
|
||||||
|
|
||||||
|
### 4. Verify restore
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker exec goodgo-postgres psql -U goodgo -d goodgo -c '\dt'
|
||||||
|
docker exec goodgo-postgres psql -U goodgo -d goodgo -c 'SELECT count(*) FROM "User";'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Run Prisma migrations (if needed)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm prisma migrate deploy
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Restart services
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
## Backup Verification
|
||||||
|
|
||||||
|
Check the backup log:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker exec goodgo-pg-backup cat /var/log/pg-backup.log
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify backup integrity without restoring:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker exec goodgo-pg-backup pg_restore --list /backups/goodgo_YYYYMMDD_HHMMSS.sql.gz
|
||||||
|
```
|
||||||
|
|
||||||
|
## Disaster Recovery
|
||||||
|
|
||||||
|
For complete data loss (volume destroyed):
|
||||||
|
|
||||||
|
1. Retrieve backup from external storage (if configured)
|
||||||
|
2. Recreate the `pg_backups` volume and copy backup file in
|
||||||
|
3. Follow the restore procedure above
|
||||||
|
|
||||||
|
## Log Aggregation
|
||||||
|
|
||||||
|
Logs are aggregated via Loki + Promtail and viewable in Grafana:
|
||||||
|
|
||||||
|
- **Grafana**: http://localhost:3002 (dashboard: "GoodGo - Logs")
|
||||||
|
- **Loki**: http://localhost:3100
|
||||||
|
- **Log retention**: 15 days (configured in `monitoring/loki/loki-config.yml`)
|
||||||
137
monitoring/grafana/dashboards/logs.json
Normal file
137
monitoring/grafana/dashboards/logs.json
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
{
|
||||||
|
"annotations": { "list": [] },
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"title": "Log Volume by Level",
|
||||||
|
"type": "timeseries",
|
||||||
|
"gridPos": { "h": 6, "w": 24, "x": 0, "y": 0 },
|
||||||
|
"datasource": { "type": "loki", "uid": "loki" },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum by (level) (count_over_time({compose_service=~\"$service\"} | json [$__interval]))",
|
||||||
|
"legendFormat": "{{ level }}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "bars",
|
||||||
|
"stacking": { "mode": "normal" },
|
||||||
|
"fillOpacity": 80
|
||||||
|
},
|
||||||
|
"color": { "mode": "palette-classic" }
|
||||||
|
},
|
||||||
|
"overrides": [
|
||||||
|
{ "matcher": { "id": "byName", "options": "error" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] },
|
||||||
|
{ "matcher": { "id": "byName", "options": "fatal" }, "properties": [{ "id": "color", "value": { "fixedColor": "dark-red", "mode": "fixed" } }] },
|
||||||
|
{ "matcher": { "id": "byName", "options": "warn" }, "properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }] },
|
||||||
|
{ "matcher": { "id": "byName", "options": "info" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] },
|
||||||
|
{ "matcher": { "id": "byName", "options": "debug" }, "properties": [{ "id": "color", "value": { "fixedColor": "blue", "mode": "fixed" } }] }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Error Rate",
|
||||||
|
"type": "stat",
|
||||||
|
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 6 },
|
||||||
|
"datasource": { "type": "loki", "uid": "loki" },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum(count_over_time({compose_service=~\"$service\"} | json | level = `error` or level = `fatal` [$__range]))",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "green", "value": null },
|
||||||
|
{ "color": "yellow", "value": 10 },
|
||||||
|
{ "color": "red", "value": 50 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Errors by Service",
|
||||||
|
"type": "piechart",
|
||||||
|
"gridPos": { "h": 8, "w": 6, "x": 6, "y": 6 },
|
||||||
|
"datasource": { "type": "loki", "uid": "loki" },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "sum by (compose_service) (count_over_time({compose_service=~\".+\"} | json | level = `error` or level = `fatal` [$__range]))",
|
||||||
|
"legendFormat": "{{ compose_service }}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "HTTP Request Logs (4xx/5xx)",
|
||||||
|
"type": "table",
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 },
|
||||||
|
"datasource": { "type": "loki", "uid": "loki" },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "{compose_service=~\"$service\", component=\"http\"} | json | statusCode >= 400 | line_format \"{{.method}} {{.url}} {{.statusCode}} {{.duration}}ms\"",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "All Logs",
|
||||||
|
"type": "logs",
|
||||||
|
"gridPos": { "h": 12, "w": 24, "x": 0, "y": 14 },
|
||||||
|
"datasource": { "type": "loki", "uid": "loki" },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "{compose_service=~\"$service\"} | json |= `$search`",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {
|
||||||
|
"showTime": true,
|
||||||
|
"showLabels": true,
|
||||||
|
"showCommonLabels": false,
|
||||||
|
"wrapLogMessage": true,
|
||||||
|
"prettifyLogMessage": true,
|
||||||
|
"enableLogDetails": true,
|
||||||
|
"sortOrder": "Descending"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "10s",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"tags": ["goodgo", "logs", "loki"],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"name": "service",
|
||||||
|
"type": "query",
|
||||||
|
"datasource": { "type": "loki", "uid": "loki" },
|
||||||
|
"query": "label_values(compose_service)",
|
||||||
|
"includeAll": true,
|
||||||
|
"allValue": ".+",
|
||||||
|
"current": { "text": "All", "value": "$__all" },
|
||||||
|
"refresh": 2,
|
||||||
|
"multi": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "search",
|
||||||
|
"type": "textbox",
|
||||||
|
"current": { "text": "", "value": "" },
|
||||||
|
"label": "Search"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": { "from": "now-1h", "to": "now" },
|
||||||
|
"title": "GoodGo - Logs",
|
||||||
|
"uid": "goodgo-logs"
|
||||||
|
}
|
||||||
@@ -7,3 +7,15 @@ datasources:
|
|||||||
url: http://prometheus:9090
|
url: http://prometheus:9090
|
||||||
isDefault: true
|
isDefault: true
|
||||||
editable: true
|
editable: true
|
||||||
|
|
||||||
|
- name: Loki
|
||||||
|
type: loki
|
||||||
|
access: proxy
|
||||||
|
url: http://loki:3100
|
||||||
|
editable: true
|
||||||
|
jsonData:
|
||||||
|
derivedFields:
|
||||||
|
- datasourceUid: prometheus
|
||||||
|
matcherRegex: 'correlationId":"([^"]+)'
|
||||||
|
name: correlationId
|
||||||
|
url: '$${__value.raw}'
|
||||||
|
|||||||
58
monitoring/loki/loki-config.yml
Normal file
58
monitoring/loki/loki-config.yml
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
auth_enabled: false
|
||||||
|
|
||||||
|
server:
|
||||||
|
http_listen_port: 3100
|
||||||
|
grpc_listen_port: 9096
|
||||||
|
|
||||||
|
common:
|
||||||
|
path_prefix: /loki
|
||||||
|
storage:
|
||||||
|
filesystem:
|
||||||
|
chunks_directory: /loki/chunks
|
||||||
|
rules_directory: /loki/rules
|
||||||
|
replication_factor: 1
|
||||||
|
ring:
|
||||||
|
instance_addr: 127.0.0.1
|
||||||
|
kvstore:
|
||||||
|
store: inmemory
|
||||||
|
|
||||||
|
schema_config:
|
||||||
|
configs:
|
||||||
|
- from: 2024-01-01
|
||||||
|
store: tsdb
|
||||||
|
object_store: filesystem
|
||||||
|
schema: v13
|
||||||
|
index:
|
||||||
|
prefix: index_
|
||||||
|
period: 24h
|
||||||
|
|
||||||
|
limits_config:
|
||||||
|
reject_old_samples: true
|
||||||
|
reject_old_samples_max_age: 168h # 7 days
|
||||||
|
max_entries_limit_per_query: 5000
|
||||||
|
ingestion_rate_mb: 4
|
||||||
|
ingestion_burst_size_mb: 6
|
||||||
|
|
||||||
|
storage_config:
|
||||||
|
tsdb_shipper:
|
||||||
|
active_index_directory: /loki/tsdb-index
|
||||||
|
cache_location: /loki/tsdb-cache
|
||||||
|
|
||||||
|
compactor:
|
||||||
|
working_directory: /loki/compactor
|
||||||
|
compaction_interval: 10m
|
||||||
|
retention_enabled: true
|
||||||
|
retention_delete_delay: 2h
|
||||||
|
retention_delete_worker_count: 150
|
||||||
|
delete_request_store: filesystem
|
||||||
|
|
||||||
|
# Retention: keep logs for 15 days
|
||||||
|
chunk_store_config:
|
||||||
|
chunk_cache_config:
|
||||||
|
embedded_cache:
|
||||||
|
enabled: true
|
||||||
|
max_size_mb: 100
|
||||||
|
|
||||||
|
table_manager:
|
||||||
|
retention_deletes_enabled: true
|
||||||
|
retention_period: 360h # 15 days
|
||||||
66
monitoring/promtail/promtail-config.yml
Normal file
66
monitoring/promtail/promtail-config.yml
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
server:
|
||||||
|
http_listen_port: 9080
|
||||||
|
grpc_listen_port: 0
|
||||||
|
|
||||||
|
positions:
|
||||||
|
filename: /tmp/positions.yaml
|
||||||
|
|
||||||
|
clients:
|
||||||
|
- url: http://loki:3100/loki/api/v1/push
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
# Scrape Docker container logs
|
||||||
|
- job_name: docker
|
||||||
|
docker_sd_configs:
|
||||||
|
- host: unix:///var/run/docker.sock
|
||||||
|
refresh_interval: 5s
|
||||||
|
filters:
|
||||||
|
- name: network
|
||||||
|
values: ["goodgo-net"]
|
||||||
|
relabel_configs:
|
||||||
|
# Use container name as label
|
||||||
|
- source_labels: ['__meta_docker_container_name']
|
||||||
|
regex: '/(.*)'
|
||||||
|
target_label: 'container'
|
||||||
|
# Add service label from container name (strip goodgo- prefix)
|
||||||
|
- source_labels: ['__meta_docker_container_name']
|
||||||
|
regex: '/goodgo-(.*)'
|
||||||
|
target_label: 'service'
|
||||||
|
# Add compose service label
|
||||||
|
- source_labels: ['__meta_docker_container_label_com_docker_compose_service']
|
||||||
|
target_label: 'compose_service'
|
||||||
|
pipeline_stages:
|
||||||
|
# Try to parse JSON logs (Pino structured output)
|
||||||
|
- json:
|
||||||
|
expressions:
|
||||||
|
level: level
|
||||||
|
msg: msg
|
||||||
|
context: context
|
||||||
|
method: method
|
||||||
|
url: url
|
||||||
|
statusCode: statusCode
|
||||||
|
duration: duration
|
||||||
|
correlationId: correlationId
|
||||||
|
component: component
|
||||||
|
timestamp: time
|
||||||
|
# Map Pino numeric levels to labels
|
||||||
|
- template:
|
||||||
|
source: level
|
||||||
|
template: '{{ if eq .Value "10" }}trace{{ else if eq .Value "20" }}debug{{ else if eq .Value "30" }}info{{ else if eq .Value "40" }}warn{{ else if eq .Value "50" }}error{{ else if eq .Value "60" }}fatal{{ else }}{{ .Value }}{{ end }}'
|
||||||
|
- labels:
|
||||||
|
level:
|
||||||
|
context:
|
||||||
|
component:
|
||||||
|
# Add structured metadata
|
||||||
|
- structured_metadata:
|
||||||
|
method:
|
||||||
|
url:
|
||||||
|
statusCode:
|
||||||
|
correlationId:
|
||||||
|
# Timestamp from Pino output
|
||||||
|
- timestamp:
|
||||||
|
source: timestamp
|
||||||
|
format: RFC3339Nano
|
||||||
|
fallback_formats:
|
||||||
|
- '2006-01-02T15:04:05.999Z07:00'
|
||||||
|
action_on_failure: fudge
|
||||||
42
scripts/backup/pg-backup.sh
Executable file
42
scripts/backup/pg-backup.sh
Executable file
@@ -0,0 +1,42 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# ── PostgreSQL Automated Backup Script ──
|
||||||
|
# Runs daily via cron inside the pg-backup container.
|
||||||
|
# Dumps the database and manages retention.
|
||||||
|
|
||||||
|
BACKUP_DIR="${BACKUP_DIR:-/backups}"
|
||||||
|
RETENTION_DAYS="${RETENTION_DAYS:-7}"
|
||||||
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||||
|
BACKUP_FILE="${BACKUP_DIR}/goodgo_${TIMESTAMP}.sql.gz"
|
||||||
|
|
||||||
|
echo "[backup] Starting PostgreSQL backup at $(date -Iseconds)"
|
||||||
|
|
||||||
|
# Ensure backup directory exists
|
||||||
|
mkdir -p "${BACKUP_DIR}"
|
||||||
|
|
||||||
|
# Run pg_dump with compression
|
||||||
|
pg_dump \
|
||||||
|
-h "${PGHOST:-postgres}" \
|
||||||
|
-p "${PGPORT:-5432}" \
|
||||||
|
-U "${PGUSER:-goodgo}" \
|
||||||
|
-d "${PGDATABASE:-goodgo}" \
|
||||||
|
--no-owner \
|
||||||
|
--no-privileges \
|
||||||
|
--format=custom \
|
||||||
|
--compress=6 \
|
||||||
|
-f "${BACKUP_FILE}"
|
||||||
|
|
||||||
|
FILESIZE=$(du -h "${BACKUP_FILE}" | cut -f1)
|
||||||
|
echo "[backup] Backup completed: ${BACKUP_FILE} (${FILESIZE})"
|
||||||
|
|
||||||
|
# Prune old backups beyond retention period
|
||||||
|
echo "[backup] Pruning backups older than ${RETENTION_DAYS} days..."
|
||||||
|
PRUNED=$(find "${BACKUP_DIR}" -name "goodgo_*.sql.gz" -type f -mtime "+${RETENTION_DAYS}" -print -delete | wc -l)
|
||||||
|
echo "[backup] Pruned ${PRUNED} old backup(s)"
|
||||||
|
|
||||||
|
# List current backups
|
||||||
|
echo "[backup] Current backups:"
|
||||||
|
ls -lh "${BACKUP_DIR}"/goodgo_*.sql.gz 2>/dev/null || echo " (none)"
|
||||||
|
|
||||||
|
echo "[backup] Done at $(date -Iseconds)"
|
||||||
72
scripts/backup/pg-restore.sh
Executable file
72
scripts/backup/pg-restore.sh
Executable file
@@ -0,0 +1,72 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# ── PostgreSQL Restore Script ──
|
||||||
|
# Restores a database from a backup file.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./pg-restore.sh <backup-file>
|
||||||
|
# ./pg-restore.sh /backups/goodgo_20260408_020000.sql.gz
|
||||||
|
#
|
||||||
|
# Inside Docker:
|
||||||
|
# docker exec -it goodgo-pg-backup /scripts/pg-restore.sh /backups/<file>
|
||||||
|
|
||||||
|
BACKUP_FILE="${1:-}"
|
||||||
|
|
||||||
|
if [ -z "${BACKUP_FILE}" ]; then
|
||||||
|
echo "Usage: $0 <backup-file>"
|
||||||
|
echo ""
|
||||||
|
echo "Available backups:"
|
||||||
|
ls -lht "${BACKUP_DIR:-/backups}"/goodgo_*.sql.gz 2>/dev/null || echo " (none found)"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f "${BACKUP_FILE}" ]; then
|
||||||
|
echo "[restore] ERROR: Backup file not found: ${BACKUP_FILE}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
PGHOST="${PGHOST:-postgres}"
|
||||||
|
PGPORT="${PGPORT:-5432}"
|
||||||
|
PGUSER="${PGUSER:-goodgo}"
|
||||||
|
PGDATABASE="${PGDATABASE:-goodgo}"
|
||||||
|
|
||||||
|
echo "[restore] WARNING: This will DROP and recreate the '${PGDATABASE}' database."
|
||||||
|
echo "[restore] Backup file: ${BACKUP_FILE}"
|
||||||
|
echo "[restore] Target: ${PGHOST}:${PGPORT}/${PGDATABASE}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# If running interactively, prompt for confirmation
|
||||||
|
if [ -t 0 ]; then
|
||||||
|
read -rp "Continue? (yes/no): " CONFIRM
|
||||||
|
if [ "${CONFIRM}" != "yes" ]; then
|
||||||
|
echo "[restore] Aborted."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[restore] Starting restore at $(date -Iseconds)..."
|
||||||
|
|
||||||
|
# Terminate existing connections
|
||||||
|
psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d postgres -c \
|
||||||
|
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '${PGDATABASE}' AND pid <> pg_backend_pid();" \
|
||||||
|
2>/dev/null || true
|
||||||
|
|
||||||
|
# Drop and recreate database
|
||||||
|
psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d postgres -c "DROP DATABASE IF EXISTS \"${PGDATABASE}\";"
|
||||||
|
psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d postgres -c "CREATE DATABASE \"${PGDATABASE}\";"
|
||||||
|
|
||||||
|
# Restore from backup
|
||||||
|
pg_restore \
|
||||||
|
-h "${PGHOST}" \
|
||||||
|
-p "${PGPORT}" \
|
||||||
|
-U "${PGUSER}" \
|
||||||
|
-d "${PGDATABASE}" \
|
||||||
|
--no-owner \
|
||||||
|
--no-privileges \
|
||||||
|
--clean \
|
||||||
|
--if-exists \
|
||||||
|
"${BACKUP_FILE}" || true
|
||||||
|
|
||||||
|
echo "[restore] Restore completed at $(date -Iseconds)"
|
||||||
|
echo "[restore] Verify with: psql -h ${PGHOST} -U ${PGUSER} -d ${PGDATABASE} -c '\\dt'"
|
||||||
Reference in New Issue
Block a user