feat(monitoring): add Prometheus metrics endpoint and Grafana dashboards

Add observability stack with @willsoto/nestjs-prometheus for /metrics endpoint,
Prometheus scraping config, and 4 auto-provisioned Grafana dashboards
(API overview, database, search, business metrics).

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Ho Ngoc Hai
2026-04-08 03:08:54 +07:00
parent b392bc3570
commit d99dfbafbc
13 changed files with 770 additions and 2 deletions

View File

@@ -0,0 +1,149 @@
{
"annotations": { "list": [] },
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"links": [],
"panels": [
{
"title": "Request Rate (req/s)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"targets": [
{
"expr": "sum(rate(http_requests_total[5m])) by (method)",
"legendFormat": "{{method}}",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "reqps",
"custom": { "drawStyle": "line", "fillOpacity": 10 }
}
}
},
{
"title": "Error Rate (5xx)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"targets": [
{
"expr": "sum(rate(http_requests_total{status_code=~\"5..\"}[5m]))",
"legendFormat": "5xx errors/s",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "reqps",
"custom": { "drawStyle": "line", "fillOpacity": 10 },
"color": { "mode": "fixed", "fixedColor": "red" }
}
}
},
{
"title": "Request Latency (p50 / p95 / p99)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p50",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p95",
"refId": "B"
},
{
"expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p99",
"refId": "C"
}
],
"fieldConfig": {
"defaults": {
"unit": "s",
"custom": { "drawStyle": "line", "fillOpacity": 5 }
}
}
},
{
"title": "Requests by Route",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"targets": [
{
"expr": "sum(rate(http_requests_total[5m])) by (route)",
"legendFormat": "{{route}}",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "reqps",
"custom": { "drawStyle": "bars", "fillOpacity": 50 }
}
}
},
{
"title": "Requests by Status Code",
"type": "piechart",
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 16 },
"targets": [
{
"expr": "sum(increase(http_requests_total[1h])) by (status_code)",
"legendFormat": "{{status_code}}",
"refId": "A"
}
]
},
{
"title": "Process Memory (RSS)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 16 },
"targets": [
{
"expr": "process_resident_memory_bytes{job=\"goodgo-api\"}",
"legendFormat": "RSS",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "bytes",
"custom": { "drawStyle": "line", "fillOpacity": 10 }
}
}
},
{
"title": "Node.js Event Loop Lag",
"type": "timeseries",
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 16 },
"targets": [
{
"expr": "nodejs_eventloop_lag_seconds{job=\"goodgo-api\"}",
"legendFormat": "Event Loop Lag",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "s",
"custom": { "drawStyle": "line", "fillOpacity": 10 }
}
}
}
],
"schemaVersion": 39,
"tags": ["goodgo", "api"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"timepicker": {},
"timezone": "browser",
"title": "GoodGo API Overview",
"uid": "goodgo-api-overview",
"version": 1
}

View File

@@ -0,0 +1,117 @@
{
"annotations": { "list": [] },
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"links": [],
"panels": [
{
"title": "Listings Created (rate)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"targets": [
{
"expr": "sum(rate(listings_created_total[5m])) by (category)",
"legendFormat": "{{category}}",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "ops",
"custom": { "drawStyle": "bars", "fillOpacity": 50 }
}
}
},
{
"title": "Listings Created (total)",
"type": "stat",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"targets": [
{
"expr": "sum(increase(listings_created_total[24h]))",
"legendFormat": "Last 24h",
"refId": "A"
}
],
"fieldConfig": {
"defaults": { "unit": "short" }
}
},
{
"title": "Payments Processed (rate)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
"targets": [
{
"expr": "sum(rate(payments_processed_total[5m])) by (status)",
"legendFormat": "{{status}}",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "ops",
"custom": { "drawStyle": "line", "fillOpacity": 10 }
}
}
},
{
"title": "Payment Success Rate",
"type": "gauge",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
"targets": [
{
"expr": "sum(rate(payments_processed_total{status=\"success\"}[1h])) / sum(rate(payments_processed_total[1h]))",
"legendFormat": "Success Rate",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "percentunit",
"min": 0,
"max": 1,
"thresholds": {
"steps": [
{ "color": "red", "value": null },
{ "color": "yellow", "value": 0.9 },
{ "color": "green", "value": 0.95 }
]
}
}
}
},
{
"title": "Active Subscriptions by Plan",
"type": "bargauge",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 },
"targets": [
{
"expr": "active_subscriptions",
"legendFormat": "{{plan}}",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "short",
"thresholds": {
"steps": [
{ "color": "blue", "value": null }
]
}
}
}
}
],
"schemaVersion": 39,
"tags": ["goodgo", "business"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"timepicker": {},
"timezone": "browser",
"title": "GoodGo Business Metrics",
"uid": "goodgo-business",
"version": 1
}

View File

@@ -0,0 +1,108 @@
{
"annotations": { "list": [] },
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"links": [],
"panels": [
{
"title": "Query Latency (p50 / p95 / p99)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(db_query_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p50",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, sum(rate(db_query_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p95",
"refId": "B"
},
{
"expr": "histogram_quantile(0.99, sum(rate(db_query_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p99",
"refId": "C"
}
],
"fieldConfig": {
"defaults": {
"unit": "s",
"custom": { "drawStyle": "line", "fillOpacity": 10 }
}
}
},
{
"title": "Query Rate by Operation",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"targets": [
{
"expr": "sum(rate(db_query_duration_seconds_count[5m])) by (operation)",
"legendFormat": "{{operation}}",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "ops",
"custom": { "drawStyle": "bars", "fillOpacity": 50 }
}
}
},
{
"title": "Active DB Connections",
"type": "gauge",
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 8 },
"targets": [
{
"expr": "db_pool_active_connections",
"legendFormat": "Active",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"thresholds": {
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 15 },
{ "color": "red", "value": 25 }
]
},
"max": 30
}
}
},
{
"title": "Slow Queries (> 100ms)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 16, "x": 8, "y": 8 },
"targets": [
{
"expr": "sum(rate(db_query_duration_seconds_bucket{le=\"0.1\"}[5m])) / sum(rate(db_query_duration_seconds_count[5m]))",
"legendFormat": "% queries < 100ms",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "percentunit",
"min": 0,
"max": 1,
"custom": { "drawStyle": "line", "fillOpacity": 10 }
}
}
}
],
"schemaVersion": 39,
"tags": ["goodgo", "database"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"timepicker": {},
"timezone": "browser",
"title": "GoodGo Database",
"uid": "goodgo-database",
"version": 1
}

View File

@@ -0,0 +1,82 @@
{
"annotations": { "list": [] },
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"links": [],
"panels": [
{
"title": "Search Latency (p50 / p95 / p99)",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
"targets": [
{
"expr": "histogram_quantile(0.50, sum(rate(search_query_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p50",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, sum(rate(search_query_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p95",
"refId": "B"
},
{
"expr": "histogram_quantile(0.99, sum(rate(search_query_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "p99",
"refId": "C"
}
],
"fieldConfig": {
"defaults": {
"unit": "s",
"custom": { "drawStyle": "line", "fillOpacity": 10 }
}
}
},
{
"title": "Search Query Rate",
"type": "timeseries",
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
"targets": [
{
"expr": "sum(rate(search_query_duration_seconds_count[5m])) by (collection)",
"legendFormat": "{{collection}}",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "ops",
"custom": { "drawStyle": "line", "fillOpacity": 10 }
}
}
},
{
"title": "Search Query Rate by Type",
"type": "timeseries",
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 8 },
"targets": [
{
"expr": "sum(rate(search_query_duration_seconds_count[5m])) by (type)",
"legendFormat": "{{type}}",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"unit": "ops",
"custom": { "drawStyle": "bars", "fillOpacity": 50 }
}
}
}
],
"schemaVersion": 39,
"tags": ["goodgo", "search"],
"templating": { "list": [] },
"time": { "from": "now-1h", "to": "now" },
"timepicker": {},
"timezone": "browser",
"title": "GoodGo Search (Typesense)",
"uid": "goodgo-search",
"version": 1
}

View File

@@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: 'GoodGo Dashboards'
orgId: 1
folder: 'GoodGo'
type: file
disableDeletion: false
editable: true
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: false

View File

@@ -0,0 +1,9 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: true

View File

@@ -0,0 +1,16 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'goodgo-api'
metrics_path: '/metrics'
static_configs:
- targets: ['host.docker.internal:3001']
labels:
service: 'goodgo-api'
environment: 'development'
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']