diff --git a/apps/web-docs/src/components/mdx/Mermaid.tsx b/apps/web-docs/src/components/mdx/Mermaid.tsx index 1a8e8c02..6d3384eb 100644 --- a/apps/web-docs/src/components/mdx/Mermaid.tsx +++ b/apps/web-docs/src/components/mdx/Mermaid.tsx @@ -18,7 +18,7 @@ const Mermaid: React.FC = ({ chart }) => { // Initialize mermaid mermaid.initialize({ startOnLoad: false, - theme: 'default', + theme: 'base', securityLevel: 'loose', fontFamily: 'inherit', }); diff --git a/apps/web-docs/src/lib/docs-generator.ts b/apps/web-docs/src/lib/docs-generator.ts index 765684ce..335430bc 100644 --- a/apps/web-docs/src/lib/docs-generator.ts +++ b/apps/web-docs/src/lib/docs-generator.ts @@ -48,7 +48,12 @@ const ITEM_ORDER: Record = { ], 'architecture': [ 'system-design', // EN: Overview first / VI: Overview trước - 'service-communication', // EN: Then communication / VI: Sau đó là communication + 'microservices-communication', + 'caching-architecture', + 'data-consistency-patterns', + 'event-driven-architecture', + 'security-architecture', + 'observability-architecture', ], 'skills': [ 'project-rules', // EN: Project rules first / VI: Project rules trước diff --git a/docs/en/architecture/microservices-communication.md b/docs/en/architecture/microservices-communication.md index 3a110c87..57f7f96d 100644 --- a/docs/en/architecture/microservices-communication.md +++ b/docs/en/architecture/microservices-communication.md @@ -18,9 +18,13 @@ graph TD ServiceA --> SD[Service Discovery
Docker DNS / K8s DNS] ServiceB --> SD - style Gateway fill:#e1f5ff - style Kafka fill:#fff4e1 - style SD fill:#d4edda + classDef blue fill:#253041,stroke:#4b6584,color:#ffffff + classDef orange fill:#3a2e1e,stroke:#7a5f3c,color:#ffffff + classDef green fill:#1e3a29,stroke:#3c7a52,color:#ffffff + + class Gateway blue + class Kafka orange + class SD green ``` ## System Context @@ -162,7 +166,8 @@ graph LR LB --> Service1A[Instance A] LB --> Service1B[Instance B] - style Gateway fill:#e1f5ff + classDef blue fill:#253041,stroke:#4b6584,color:#ffffff + class Gateway blue ``` Single entry point for all client requests with routing, auth, rate limiting. @@ -252,10 +257,15 @@ graph TD ServiceB1 & ServiceB2 --> Redis end - style Gateway fill:#e1f5ff - style Kafka fill:#fff4e1 - style DB fill:#d4edda - style Redis fill:#ffe1e1 + classDef blue fill:#253041,stroke:#4b6584,color:#ffffff + classDef orange fill:#3a2e1e,stroke:#7a5f3c,color:#ffffff + classDef green fill:#1e3a29,stroke:#3c7a52,color:#ffffff + classDef red fill:#3a1e1e,stroke:#7a3c3c,color:#ffffff + + class Gateway blue + class Kafka orange + class DB green + class Redis red ``` ### Deployment Environments @@ -358,6 +368,31 @@ readinessProbe: --- +## Quick Tips + +### Mermaid Common Issues +- **Arrow Syntax**: `-->` (solid), `-.->` (dotted), `==>` (thick) +- **Special Characters**: Escape with quote marks `"` +- **Subgraphs**: Use `subgraph "Title"` ... `end` + +### Color Pattern Quick Reference +| Element | Color | Hex | Stroke | Usage | +|---------|-------|-----|--------|-------| +| **Core** | Blue | `#253041` | `#4b6584` | Primary components | +| **Logic** | Purple | `#2e1e3a` | `#5f3c7a` | Processing steps | +| **Data** | Green | `#1e3a29` | `#3c7a52` | Database, Cache | +| **External** | Orange | `#3a2e1e` | `#7a5f3c` | External APIs | +| **Error** | Red | `#3a1e1e` | `#7a3c3c` | Failures, Alerts | + +### Visual Indicators +- 🔵 **Blue**: Core Infrastructure +- 🟢 **Green**: Data Operations +- 🟠 **Orange**: Event/External +- 🔴 **Red**: Critical/Error +- ⚪ **Grey**: Neutral/Boundary + +--- + **Last Updated**: 2026-01-07 -**Author**: VelikHo (hongochai10@icloud.com) +**Authors**: GoodGo Architecture Team **Reviewers**: To be assigned diff --git a/docs/en/architecture/observability-architecture.md b/docs/en/architecture/observability-architecture.md index f0d4dcaa..7f29a58e 100644 --- a/docs/en/architecture/observability-architecture.md +++ b/docs/en/architecture/observability-architecture.md @@ -1,9 +1,8 @@ -# Observability Architecture / Kiến trúc Khả năng Quan sát +# Observability Architecture -> **EN**: Comprehensive observability with metrics, logging, and tracing -> **VI**: Khả năng quan sát toàn diện với metrics, logging và tracing +> **Note**: Comprehensive observability with metrics, logging, and tracing -## Overview Diagram / Sơ đồ Tổng quan +## Overview Diagram ```mermaid graph TD @@ -30,12 +29,51 @@ graph TD Jaeger --> JaegerUI[Jaeger UI] end - style Prom fill:#d4edda - style Loki fill:#fff4e1 - style Jaeger fill:#e1f5ff + classDef service fill:#2D3748,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef metrics fill:#2F855A,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef logging fill:#C05621,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef tracing fill:#2C5282,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef dashboard fill:#4A5568,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + + class Service1,Service2 service; + class Prom metrics; + class Loki logging; + class Jaeger,JaegerUI tracing; + class Grafana,GrafanaLogs dashboard; ``` -## Three Pillars of Observability / Ba Trụ cột +## System Context + +```mermaid +C4Context + title Observability System Context + + Person(dev, "Developer", "Uses dashboards to monitor system") + Person(sre, "SRE", "Manages infrastructure & alerts") + + System(obs, "Observability Stack", "Prometheus, Loki, Jaeger, Grafana") + + System_Ext(service, "Microservices", "Sends telemetry data") + System_Ext(k8s, "Kubernetes", "Sends cluster metrics") + + Rel(dev, obs, "Views Dashboards", "HTTPS") + Rel(sre, obs, "Configures Alerts", "HTTPS") + Rel(service, obs, "Push/Pull Telemetry", "HTTP/gRPC") + Rel(k8s, obs, "Exposes Metrics", "HTTP") + + UpdateElementStyle(dev, $fontColor="white", $bgColor="#2D3748", $borderColor="white") + UpdateElementStyle(sre, $fontColor="white", $bgColor="#2D3748", $borderColor="white") + UpdateElementStyle(obs, $fontColor="white", $bgColor="#2C5282", $borderColor="white") + UpdateElementStyle(service, $fontColor="white", $bgColor="#4A5568", $borderColor="white") + UpdateElementStyle(k8s, $fontColor="white", $bgColor="#4A5568", $borderColor="white") +``` + +### Context Description +- **Observability Stack**: Central hub for collecting and displaying data (Prometheus, Loki, Jaeger, Grafana). +- **Microservices**: Send logs, metrics, and traces (OpenTelemetry). +- **Developer/SRE**: Use Grafana to monitor system health and debug. + +## Three Pillars of Observability ### 1. Metrics (Prometheus + Grafana) @@ -48,13 +86,15 @@ graph LR Grafana --> Dashboard2[Error Dashboard] Grafana --> Dashboard3[Performance Dashboard] - style Prom fill:#d4edda - style Grafana fill:#e1f5ff + classDef default fill:#2D3748,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef prom fill:#2F855A,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef grafana fill:#2C5282,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + + class Prom prom; + class Grafana grafana; ``` -**EN**: Numerical measurements over time (requests/sec, latency, errors). - -**VI**: Các phép đo số theo thời gian (requests/sec, latency, errors). +**Description**: Numerical measurements over time (requests/sec, latency, errors). **Implementation**: ```typescript @@ -125,9 +165,7 @@ sequenceDiagram Loki-->>Grafana: Log results ``` -**EN**: Structured logging with correlation IDs for request tracing. - -**VI**: Structured logging với correlation IDs để tracing requests. +**Description**: Structured logging with correlation IDs for request tracing. **Implementation**: ```typescript @@ -192,13 +230,15 @@ graph LR Jaeger --> Timeline[Trace Timeline] - style Trace fill:#e1f5ff - style Jaeger fill:#d4edda + classDef default fill:#2D3748,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef trace fill:#2C5282,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef jaeger fill:#2F855A,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + + class Trace trace; + class Jaeger jaeger; ``` -**EN**: Distributed tracing to track requests across services. - -**VI**: Distributed tracing để track requests giữa các services. +**Description**: Distributed tracing to track requests across services. **Implementation**: ```typescript @@ -238,7 +278,7 @@ async getUserWithTracing(userId: string): Promise { } ``` -## Health Checks / Kiểm tra Sức khỏe +## Health Checks ```typescript // Liveness probe - is service running? @@ -273,7 +313,7 @@ async function checkDatabase(): Promise { } ``` -## Alerting Rules / Quy tắc Cảnh báo +## Alerting Rules ```yaml # Prometheus alerting rules @@ -313,22 +353,111 @@ groups: summary: "Service is down" ``` -## Performance Targets / Mục tiêu Hiệu suất +## Performance Targets -| Metric | Target | Alert Threshold | -|--------|--------|-----------------| -| Response Time (P95) | < 200ms | > 500ms | -| Response Time (P99) | < 500ms | > 1s | -| Error Rate | < 1% | > 5% | -| Availability | > 99.9% | < 99% | -| Cache Hit Rate | > 80% | < 50% | +### Performance Goals +| Metric | Target | Notes | +|--------|--------|-------| +| **Metric Scrape Interval** | 15s | Critical services | +| **Log Ingestion Latency** | < 1s | Time from emit to queryable | +| **Trace Sampling Rate** | 10% | Production (100% in Dev/Staging) | +| **Dashboard Load Time** | < 2s | P95 Latency | +| **Alert Evaluation** | Every 1m | Evaluation interval | +| **Retention Policy** | 14 days | Logs & Traces (Metrics: 30 days) | -## Related Documentation / Tài liệu Liên quan +## Security Considerations + +### Observability Security +- **Log Scrubbing**: Automatically remove PII (emails, ssn, credit cards) and secrets from logs before ingestion. +- **Access Control**: Grafana integrated with OAuth2/OIDC, with Viewer/Editor/Admin roles. +- **Network Policy**: Only allow traffic from internal namespace to ingestion ports (9090, 3100, 14268). +- **TLS**: Encrypt traffic between agents and collectors. + +## Deployment + +```mermaid +graph TD + subgraph "Kubernetes Monitoring Namespace" + Grafana[Grafana] + Prom[Prometheus Server] + Loki[Loki Gateway] + Jaeger[Jaeger Collector] + end + + subgraph "App Namespace" + App[Application Pods] + Agent[Grafana Agent / Promtail] + end + + App -->|Push Logs| Agent + Agent -->|Push| Loki + + Prom -->|Pull Metrics| App + Prom -->|Pull Metrics| Agent + + App -->|Push Traces| Jaeger + + Grafana --> Prom + Grafana --> Loki + Grafana --> Jaeger + + classDef k8s fill:#2D3748,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef app fill:#4A5568,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef grafana fill:#2C5282,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef loki fill:#C05621,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef jaeger fill:#2F855A,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef prom fill:#2F855A,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + + class Grafana grafana; + class Loki loki; + class Jaeger jaeger; + class Prom prom; + class App,Agent app; +``` + +**Deployment Description**: +- **Agent**: Promtail or Grafana Agent runs as DaemonSet or Sidecar to collect logs. +- **Pull Model**: Prometheus scrapes metrics from `/metrics` endpoints. +- **Push Model**: Traces and Logs are pushed to collectors. +- **Resources**: Dedicated nodes for monitoring stack in production to avoid impacting main workload. + +## Related Documentation - [System Design](./system-design.md) - Overall architecture - [Caching Architecture](./caching-architecture.md) - Cache metrics +## Quick Tips + +### Mermaid Common Issues + +| Issue | Solution | +|-------|----------| +| **Parse Error** | Check for special characters like `()` or `[]` inside node text without quotes. Use `"text"` for complex strings. | +| **Color Not Showing** | Ensure `style` or `classDef` definitions are correct and IDs match. | +| **Arrow Direction** | `TD` = Top-Down, `LR` = Left-Right. Choose appropriately for layout. | + +### Color Pattern Quick Reference + +| Element | Color | Hex | Use Case | +|---------|-------|-----|----------| +| **Primary** | Dark Blue | `#2D3748` | System components, core services | +| **Secondary** | Grey | `#4A5568` | Supporting modules, libraries | +| **Accent** | Blue | `#2C5282` | Databases, external APIs | +| **Highlight** | Teal | `#285E61` | User interactions, highlights | +| **Success** | Green | `#2F855A` | Successful states, active | +| **Warning** | Orange | `#C05621` | Warning/Caution states | +| **Error** | Red | `#C53030` | Error states, failures | + +### Visual Indicators + +| Indicator | Meaning | +|-----------|---------| +| 🟢 | Safe / Recommended | +| 🟡 | Warning / Caution | +| 🔴 | Danger / Anti-pattern | +| 💡 | Tip / Best Practice | + --- -**Last Updated**: 2026-01-07 -**Author**: VelikHo (hongochai10@icloud.com) +**Last Updated**: 2026-01-10 +**Author**: GoodGo Architecture Team diff --git a/docs/en/architecture/service-communication.md b/docs/en/architecture/service-communication.md deleted file mode 100644 index 1b615442..00000000 --- a/docs/en/architecture/service-communication.md +++ /dev/null @@ -1,58 +0,0 @@ -# Service Communication - -## Communication Patterns - -### Synchronous Communication (HTTP/REST) - -Services communicate synchronously via HTTP REST APIs through Traefik API Gateway. - -**Example:** -```typescript -// Web App -> Auth Service -const response = await fetch('http://api.goodgo.vn/api/v1/auth/login', { - method: 'POST', - body: JSON.stringify({ email, password }), -}); -``` - -### Service-to-Service Communication - -Services can communicate directly via internal network: - -```typescript -// Auth Service -> Notification Service (future) -const response = await fetch('http://notification-service:5003/api/v1/notifications', { - method: 'POST', - headers: { 'X-Service-Auth': process.env.INTERNAL_API_KEY }, - body: JSON.stringify({ userId, message }), -}); -``` - -## API Gateway Routing - -Traefik routes requests based on: -- Host header (`api.goodgo.vn`) -- Path prefix (`/api/v1/auth`) - -## Error Handling - -All services follow consistent error response format: - -```json -{ - "success": false, - "error": { - "code": "AUTH_001", - "message": "Invalid credentials", - "details": {} - }, - "timestamp": "2024-01-01T00:00:00.000Z" -} -``` - -## Retry and Circuit Breaker - -Future implementation: -- Exponential backoff for retries -- Circuit breaker pattern for fault tolerance -- Fallback mechanisms diff --git a/docs/vi/architecture/caching-architecture.md b/docs/vi/architecture/caching-architecture.md index 07a36584..09a4b696 100644 --- a/docs/vi/architecture/caching-architecture.md +++ b/docs/vi/architecture/caching-architecture.md @@ -1,9 +1,8 @@ -# Kiến trúc Caching / Caching Architecture +# Kiến trúc Caching -> **VI**: Chiến lược caching nhiều tầng để tối ưu hiệu suất -> **EN**: Multi-layer caching strategy for optimal performance +> Chiến lược caching nhiều tầng để tối ưu hiệu suất -## Sơ đồ Tổng quan / Overview Diagram +## Sơ đồ Tổng quan ```mermaid graph TD @@ -19,18 +18,21 @@ graph TD DB --> StoreL2[Store L2 + L1] StoreL2 --> Return3[Return
< 50ms] - style L1 fill:#d4edda - style L2 fill:#fff4e1 - style DB fill:#f0e1ff + classDef memory fill:#1b5e20,stroke:#2e7d32,color:#fff + classDef redis fill:#e65100,stroke:#ef6c00,color:#fff + classDef db fill:#212121,stroke:#424242,color:#fff + classDef default fill:#202020,stroke:#505050,color:#fff + + class L1,Return1,WarmL1 memory + class L2,Return2,StoreL2 redis + class DB,Return3 db ``` -``` - -## Bối cảnh Hệ thống / System Context +## Bối cảnh Hệ thống ```mermaid C4Context - title Sơ đồ Bối cảnh Hệ thống Caching / Caching System Context + title Sơ đồ Bối cảnh Hệ thống Caching System(service, "Microservice", "Client service using cache") System_Ext(db, "Neon PostgreSQL", "Primary database") @@ -44,23 +46,22 @@ C4Context Rel(service, l2, "Reads/Writes", "Redis Protocol") Rel(l1, l2, "Fills from", "On miss") Rel(l2, db, "Cache aside", "On miss") + + UpdateElementStyle(service, $fontColor="white", $bgColor="#1a237e", $borderColor="#3949ab") + UpdateElementStyle(db, $fontColor="white", $bgColor="#212121", $borderColor="#424242") + UpdateElementStyle(l1, $fontColor="white", $bgColor="#1b5e20", $borderColor="#2e7d32") + UpdateElementStyle(l2, $fontColor="white", $bgColor="#e65100", $borderColor="#ef6c00") ``` -### VI Mô tả Bối cảnh +### Mô tả Bối cảnh - **Service**: Giao tiếp trực tiếp với L1 Cache (in-memory) để đạt độ trễ thấp nhất. - **L1 Cache**: Cache cục bộ, không chia sẻ, tự động hết hạn (TTL ngắn). - **L2 Cache**: Redis cluster chia sẻ, giữ dữ liệu lâu dài hơn và đồng bộ giữa các instances. - **Database**: Nguồn dữ liệu gốc (source of truth), chỉ được truy cập khi cache miss. -### EN Context Description -- **Service**: Communicates directly with L1 Cache (in-memory) for lowest latency. -- **L1 Cache**: Local cache, not shared, automatic expiration (short TTL). -- **L2 Cache**: Shared Redis cluster, holds data longer and syncs across instances. -- **Database**: Source of truth, accessed only on cache miss. +## Mô tả Kiến trúc -## Mô tả Kiến trúc / Architecture Description - -### VI: Caching Nhiều Tầng +### Caching Nhiều Tầng Nền tảng GoodGo sử dụng caching 2 tầng để tối ưu hiệu suất: @@ -86,54 +87,25 @@ Request → L1 → L2 → Database hit rate hit rate rate ``` -### EN: Multi-Layer Caching - -GoodGo platform uses 2-layer caching for performance: - -**L1 Cache (Memory)**: -- In-memory cache per service instance -- Very fast access (< 1ms) -- Limited capacity (10k keys default) -- Short TTL (60 seconds default, max 5 minutes) -- Not shared across instances - -**L2 Cache (Redis)**: -- Shared distributed cache -- Fast access (< 5ms) -- Large capacity -- Longer TTL (configurable, typically 5-15 minutes) -- Shared across all service instances - -**Cache Flow**: -``` -Request → L1 → L2 → Database - ↓ ↓ ↓ ↓ -40-50% 80-90% 10-20% Cache miss -hit rate hit rate rate -``` - -## Triển khai Cache / Cache Implementation +## Triển khai Cache ### Multi-Layer Cache Service ```typescript -// VI: Triển khai multi-layer cache -// EN: Multi-layer cache implementation +// Triển khai multi-layer cache export class MultiLayerCache { private l1Cache: NodeCache; private l2Cache: Redis; constructor() { - // VI: L1: Memory cache - // EN: L1: Memory cache + // L1: Memory cache this.l1Cache = new NodeCache({ - stdTTL: 60, // VI: 60 giây mặc định / EN: 60 seconds default - maxKeys: 10000, // VI: Tối đa 10k keys / EN: Max 10k keys - checkperiod: 120 // VI: Kiểm tra expired keys mỗi 2 phút / EN: Check for expired keys every 2min + stdTTL: 60, // 60 giây mặc định + maxKeys: 10000, // Tối đa 10k keys + checkperiod: 120 // Kiểm tra expired keys mỗi 2 phút }); - // VI: L2: Redis cache - // EN: L2: Redis cache + // L2: Redis cache this.l2Cache = new Redis({ host: process.env.REDIS_HOST, port: parseInt(process.env.REDIS_PORT), @@ -142,23 +114,20 @@ export class MultiLayerCache { } async get(key: string): Promise { - // VI: Thử L1 trước - // EN: Try L1 first + // Thử L1 trước const l1Value = this.l1Cache.get(key); if (l1Value) { logger.debug('L1 cache hit', { key }); return l1Value; } - // VI: Thử L2 - // EN: Try L2 + // Thử L2 const l2Value = await this.l2Cache.get(key); if (l2Value) { logger.debug('L2 cache hit', { key }); const parsed = JSON.parse(l2Value) as T; - // VI: Làm ấm L1 cache - // EN: Warm L1 cache + // Làm ấm L1 cache this.l1Cache.set(key, parsed); return parsed; } @@ -168,9 +137,8 @@ export class MultiLayerCache { } async set(key: string, value: any, ttl: number = 300): Promise { - // VI: Lưu vào cả L1 và L2 - // EN: Store in both L1 and L2 - this.l1Cache.set(key, value, Math.min(ttl, 300)); // VI: L1 tối đa 5 phút / EN: L1 max 5min + // Lưu vào cả L1 và L2 + this.l1Cache.set(key, value, Math.min(ttl, 300)); // L1 tối đa 5 phút await this.l2Cache.setex(key, ttl, JSON.stringify(value)); } @@ -180,12 +148,10 @@ export class MultiLayerCache { } async invalidatePattern(pattern: string): Promise { - // VI: L1: Xóa tất cả (cách đơn giản) - // EN: L1: Clear all (simple approach) + // L1: Xóa tất cả (cách đơn giản) this.l1Cache.flushAll(); - // VI: L2: Xóa theo pattern - // EN: L2: Delete by pattern + // L2: Xóa theo pattern const keys = await this.l2Cache.keys(pattern); if (keys.length > 0) { await this.l2Cache.del(...keys); @@ -194,14 +160,13 @@ export class MultiLayerCache { } ``` -### Quy ước Đặt tên Key / Cache Key Naming +### Quy ước Đặt tên Key **Pattern**: `{service}:{entity}:{identifier}:{sub-resource}` -**Ví dụ / Examples**: +**Ví dụ**: ```typescript -// VI: User cache keys -// EN: User cache keys +// User cache keys const keys = { user: (userId: string) => `iam:user:${userId}`, userPermissions: (userId: string) => `iam:user:${userId}:permissions`, @@ -209,13 +174,12 @@ const keys = { session: (sessionId: string) => `iam:session:${sessionId}`, }; -// VI: Sử dụng -// EN: Usage +// Sử dụng const user = await cache.get(keys.user('user_123')); const permissions = await cache.get(keys.userPermissions('user_123')); ``` -## Chiến lược TTL / TTL Strategies +## Chiến lược TTL ```mermaid graph LR @@ -234,22 +198,28 @@ graph LR Long --> Config[Static Config] Long --> RefData[Reference Data] - %% style Short fill:#f8d7da - %% style Medium fill:#fff3cd - %% style Long fill:#d4edda + classDef tier fill:#202020,stroke:#505050,color:#fff + classDef short fill:#b71c1c,stroke:#f44336,color:#fff + classDef medium fill:#e65100,stroke:#ef6c00,color:#fff + classDef long fill:#1b5e20,stroke:#2e7d32,color:#fff + + class Short short + class Medium medium + class Long long + class Permissions,Sessions,UserProfiles,OrgData,Config,RefData tier ``` -**Hướng dẫn TTL / TTL Guidelines**: -| Loại Dữ liệu / Data Type | TTL | Lý do / Reason | +**Hướng dẫn TTL**: +| Loại Dữ liệu | TTL | Lý do | |---------------------------|-----|----------------| -| User permissions | 5 min | Security-sensitive / Nhạy cảm bảo mật | -| Session data | Varies | Based on session length / Dựa trên độ dài session | -| User profiles | 10 min | Moderate update frequency / Tần suất cập nhật vừa phải | -| Organization data | 15 min | Infrequent updates / Cập nhật không thường xuyên | -| Static config | 30-60 min | Very stable / Rất ổn định | -| Reference data | 1-2 hours | Almost never changes / Hầu như không thay đổi | +| User permissions | 5 min | Nhạy cảm bảo mật | +| Session data | Varies | Dựa trên độ dài session | +| User profiles | 10 min | Tần suất cập nhật vừa phải | +| Organization data | 15 min | Cập nhật không thường xuyên | +| Static config | 30-60 min | Rất ổn định | +| Reference data | 1-2 hours | Hầu như không thay đổi | -## Vô hiệu hóa Cache / Cache Invalidation +## Vô hiệu hóa Cache ```mermaid sequenceDiagram @@ -272,47 +242,39 @@ sequenceDiagram Note over Service,Cache: Next request will fetch fresh data ``` -**Chiến lược Invalidation / Invalidation Strategies**: +**Chiến lược Invalidation**: ```typescript -// VI: 1. Invalidation single key -// EN: 1. Single key invalidation +// 1. Invalidation single key async updateUser(userId: string, data: UpdateUserDto): Promise { const user = await userRepository.update(userId, data); - // VI: Vô hiệu hóa user cache - // EN: Invalidate user cache + // Vô hiệu hóa user cache await cache.del(cacheKeys.user(userId)); return user; } -// VI: 2. Invalidation theo pattern -// EN: 2. Pattern-based invalidation +// 2. Invalidation theo pattern async updateUserRole(userId: string, roleId: string): Promise { await userRoleRepository.assign(userId, roleId); - // VI: Vô hiệu hóa tất cả cache liên quan đến user - // EN: Invalidate all user-related cache + // Vô hiệu hóa tất cả cache liên quan đến user await cache.invalidatePattern(`iam:user:${userId}:*`); } -// VI: 3. Invalidation theo thời gian (TTL expiry) -// EN: 3. Time-based invalidation (TTL expiry) -// VI: Tự động xử lý bởi cache -// EN: Automatically handled by cache +// 3. Invalidation theo thời gian (TTL expiry) +// Tự động xử lý bởi cache ``` -## Làm ấm Cache / Cache Warming +## Làm ấm Cache ```typescript -// VI: Preload dữ liệu thường xuyên truy cập -// EN: Preload frequently accessed data +// Preload dữ liệu thường xuyên truy cập async warmCache(): Promise { logger.info('Starting cache warming'); - // VI: Làm ấm user permissions cho active users - // EN: Warm user permissions for active users + // Làm ấm user permissions cho active users const activeUsers = await userRepository.findActive({ limit: 1000 }); for (const user of activeUsers) { @@ -321,40 +283,32 @@ async warmCache(): Promise { await cache.set( cacheKeys.userPermissions(user.id), permissions, - 300 // VI: 5 phút / EN: 5 minutes + 300 // 5 phút ); } logger.info('Cache warming completed', { count: activeUsers.length }); } -// VI: Chạy khi service khởi động -// EN: Run on service startup +// Chạy khi service khởi động warmCache().catch(err => logger.error('Cache warming failed', { err })); ``` -## Quyết định Thiết kế / Design Decisions +## Quyết định Thiết kế ### Quyết định 1: Multi-layer Caching (L1 + L2) -**VI Bối cảnh**: Cần giảm tải cho Redis và đạt độ trễ cực thấp cho dữ liệu hot. -**VI Quyết định**: Sử dụng kết hợp L1 (NodeCache) và L2 (Redis). -**VI Hậu quả**: +**Bối cảnh**: Cần giảm tải cho Redis và đạt độ trễ cực thấp cho dữ liệu hot. +**Quyết định**: Sử dụng kết hợp L1 (NodeCache) và L2 (Redis). +**Hậu quả**: - ✅ Độ trễ < 1ms cho 40-50% requests. - ✅ Giảm network traffic tới Redis. - ❌ Phức tạp trong đồng bộ (L1 có thể stale trong thời gian ngắn). -**EN Context**: Need to reduce load on Redis and achieve ultra-low latency for hot data. -**EN Decision**: Use combination of L1 (NodeCache) and L2 (Redis). -**EN Consequences**: -- ✅ Latency < 1ms for 40-50% requests. -- ✅ Reduced network traffic to Redis. -- ❌ synchronization complexity (L1 might be stale for short duration). +## Đặc điểm Hiệu suất -## Đặc điểm Hiệu suất / Performance Characteristics - -### VI: Mục tiêu Hiệu suất -| Chỉ số / Metric | Mục tiêu / Target | Ghi chú / Notes | +### Mục tiêu Hiệu suất +| Chỉ số | Mục tiêu | Ghi chú | |-----------------|-------------------|-----------------| | **L1 Hit Latency** | < 0.5ms | In-memory lookup | | **L2 Hit Latency** | < 5ms | Network RTT + Redis processing | @@ -362,233 +316,15 @@ warmCache().catch(err => logger.error('Cache warming failed', { err })); | **L1 Capacity** | 10k items | Per instance limit to protect heap | | **Cache Warmup Time** | < 30s | At service startup | -### EN: Performance Targets -| Metric | Target | Notes | -|--------|--------|-------| -| **L1 Hit Latency** | < 0.5ms | In-memory lookup | -| **L2 Hit Latency** | < 5ms | Network RTT + Redis processing | -| **Combine Hit Rate** | > 90% | L1 + L2 combined | -| **L1 Capacity** | 10k items | Per instance limit to protect heap | -| **Cache Warmup Time** | < 30s | At service startup | +## Cân nhắc Bảo mật -## Cân nhắc Bảo mật / Security Considerations - -### VI: Bảo mật Cache +### Bảo mật Cache - **Encryption**: Dữ liệu nhạy cảm (PII) PHẢI được mã hóa trước khi lưu vào L2 Redis (AES-256). L1 có thể lưuplaintext vì nằm trong memory process (trừ khi memory dump). - **Isolation**: Redis instance được bảo vệ bằng mật khẩu và Network Policy (chỉ allow traffic từ nội bộ K8s). - **TLS**: Kết nối tới Redis qua TLS 1.2+. - **Data Sanitization**: Không cache toàn bộ user object nếu chứa password hash hoặc secrets. -### EN: Cache Security -- **Encryption**: Sensitive data (PII) MUST be encrypted before storing in L2 Redis (AES-256). L1 can store plaintext as it is in process memory (unless memory dump). -- **Isolation**: Redis instance protected by password and Network Policy (allow internal K8s traffic only). -- **TLS**: Connect to Redis via TLS 1.2+. -- **Data Sanitization**: Do not cache entire user objects if they contain password hashes or secrets. - -## Triển khai / Deployment - -```mermaid -graph TD - subgraph "Kubernetes Pod" - Service[Microservice Container] - L1[L1 Cache (RAM)] - Service --- L1 - end - - subgraph "Infrastructure" - RedisMaster[Redis Master] - RedisSlave1[Redis Slave 1] - RedisSlave2[Redis Slave 2] - end - - Service -->|Write| RedisMaster - Service -->|Read| RedisSlave1 - Service -->|Read| RedisSlave2 - - RedisMaster -.->|Replication| RedisSlave1 - RedisMaster -.->|Replication| RedisSlave2 - - style Service fill:#e1f5ff - style L1 fill:#d4edda - style RedisMaster fill:#fff4e1 -``` - -**VI Mô tả Triển khai**: -- **L1**: Nhúng trực tiếp trong process của Microservice, scale theo số lượng Pods. -- **L2**: Cụm Redis (Cluster hoặc Sentinel) với ít nhất 3 nodes cho High Availability. -- **Connection Pooling**: Sử dụng ioredis với connection pooling để quản lý kết nối hiệu quả. - -**EN Deployment Description**: -- **L1**: Embedded directly in Microservice process, scales with number of Pods. -- **L2**: Redis Cluster (or Sentinel) with at least 3 nodes for High Availability. -- **Connection Pooling**: Use ioredis with connection pooling for efficient connection management. - -## Giám sát & Khả năng quan sát / Monitoring & Observability - -### VI: Các chỉ số giám sát -- **Metrics**: Prometheus metrics cho hit rate, miss rate, latency, memory usage. -- **Logs**: Log cache miss/hit ở level debug (sample), log connection errors ở level error. -- **Health Checks**: Readiness probe kiểm tra kết nối tới Redis. - -### EN: Monitoring Metrics -- **Metrics**: Prometheus metrics for hit rate, miss rate, latency, memory usage. -- **Logs**: Log cache miss/hit at debug level (sampled), log connection errors at error level. -- **Health Checks**: Readiness probe checks connection to Redis. - -### Code Giám sát / Monitoring Code - -**Cache Hit Rates**: -```typescript -// VI: Theo dõi hiệu suất cache -// EN: Track cache performance -export class CacheMetrics { - private hits = new Counter({ - name: 'cache_hits_total', - help: 'Total cache hits', - labelNames: ['layer', 'key_prefix'] - }); - - private misses = new Counter({ - name: 'cache_misses_total', - help: 'Total cache misses', - labelNames: ['layer', 'key_prefix'] - }); - - recordHit(layer: 'l1' | 'l2', key: string): void { - const prefix = key.split(':')[0]; - this.hits.inc({ layer, key_prefix: prefix }); - } - - recordMiss(key: string): void { - const prefix = key.split(':')[0]; - this.misses.inc({ layer: 'db', key_prefix: prefix }); - } -} -``` - -**Hiệu suất Kỳ vọng / Expected Performance**: -| Chỉ số / Metric | L1 Cache | L2 Cache | Database | -|-----------------|----------|----------|----------| -| Độ trễ / Latency | < 1ms | < 5ms | < 50ms | -| Tỷ lệ Hit / Hit Rate | 40-50% | 80-90% | - | -| Dung lượng / Capacity | 10k keys | Unlimited | - | - -## Best Practices - -**NÊN / DO**: -- ✅ Sử dụng cache cho dữ liệu thường xuyên truy cập / Use cache for frequently accessed data -- ✅ Đặt TTL phù hợp dựa trên tần suất thay đổi dữ liệu / Set appropriate TTLs based on data change frequency -- ✅ Vô hiệu hóa cache khi cập nhật dữ liệu / Invalidate cache on data updates -- ✅ Sử dụng cache key namespacing / Use cache key namespacing -- ✅ Giám sát cache hit rates / Monitor cache hit rates -- ✅ Làm ấm cache khi khởi động cho dữ liệu quan trọng / Warm cache on startup for critical data - -**KHÔNG NÊN / DON'T**: -- ❌ Cache dữ liệu thay đổi rất thường xuyên / Cache data that changes very frequently -- ❌ Đặt TTL quá dài (nguy cơ dữ liệu cũ) / Set TTL too long (stale data risk) -- ❌ Đặt TTL quá ngắn (mất lợi ích cache) / Set TTL too short (negates cache benefit) -- ❌ Cache dữ liệu nhạy cảm không mã hóa / Cache sensitive data without encryption -- ❌ Bỏ qua cache invalidation khi cập nhật / Ignore cache invalidation on updates -- ❌ Sử dụng cache làm primary data store / Use cache as primary data store - -## Bối cảnh Hệ thống / System Context - -```mermaid -C4Context - title Sơ đồ Bối cảnh Caching Architecture - - System(services, "Microservices", "Application services") - - System_Ext(redis, "Redis Cluster", "L2 distributed cache") - System_Ext(db, "Neon PostgreSQL", "Primary data store") - System_Ext(monitoring, "Monitoring", "Cache metrics & alerts") - - Rel(services, redis, "Cache operations", "Redis Protocol") - Rel(services, db, "Data operations", "PostgreSQL") - Rel(redis, monitoring, "Sends metrics", "Prometheus") - - BiRel(services, redis, "L2 cache miss → DB query") -``` - -**VI Mô tả**: -- **Microservices**: Sử dụng multi-layer cache (L1: Memory, L2: Redis) -- **Redis Cluster**: L2 cache shared giữa tất cả service instances -- **PostgreSQL**: Primary data store, fallback khi cache miss -- **Monitoring**: Thu thập cache metrics (hit rate, latency, evictions) - -**EN Description**: -- **Microservices**: Use multi-layer cache (L1: Memory, L2: Redis) -- **Redis Cluster**: L2 cache shared across all service instances -- **PostgreSQL**: Primary data store, fallback on cache miss -- **Monitoring**: Collects cache metrics (hit rate, latency, evictions) - -## Cân nhắc Bảo mật / Security Considerations - -### VI: Phần Tiếng Việt - -**Access Control**: -- Redis AUTH password cho authentication -- Network isolation: Redis chỉ accessible từ service pods -- Kubernetes Network Policies: Whitelist specific services - -**Encryption**: -- TLS cho Redis connections (optional, recommended for production) -- Encryption at rest: Redis persistence files encrypted -- Sensitive data: Encrypt before caching (AES-256-GCM) - -**Data Sensitivity**: -- **KHÔNG cache**: Passwords, tokens, credit cards, SSN -- **Cache với encryption**: PII (email, phone, address) -- **Cache plaintext**: Non-sensitive data (public info, configs) - -**Cache Poisoning Prevention**: -- Validate data before caching -- Use signed cache keys để prevent tampering -- Implement cache key namespacing per service - -**TTL Management**: -- Short TTL (< 5 min) cho security-sensitive data -- Invalidate cache immediately khi data changes -- Auto-expire sessions on logout - -**Audit**: -- Log cache access cho sensitive data -- Monitor unusual cache patterns (high miss rate, frequent invalidations) -- Alert on cache security events - -### EN: English Section - -**Access Control**: -- Redis AUTH password for authentication -- Network isolation: Redis only accessible from service pods -- Kubernetes Network Policies: Whitelist specific services - -**Encryption**: -- TLS for Redis connections (optional, recommended for production) -- Encryption at rest: Redis persistence files encrypted -- Sensitive data: Encrypt before caching (AES-256-GCM) - -**Data Sensitivity**: -- **DON'T cache**: Passwords, tokens, credit cards, SSN -- **Cache with encryption**: PII (email, phone, address) -- **Cache plaintext**: Non-sensitive data (public info, configs) - -**Cache Poisoning Prevention**: -- Validate data before caching -- Use signed cache keys to prevent tampering -- Implement cache key namespacing per service - -**TTL Management**: -- Short TTL (< 5 min) for security-sensitive data -- Invalidate cache immediately when data changes -- Auto-expire sessions on logout - -**Audit**: -- Log cache access for sensitive data -- Monitor unusual cache patterns (high miss rate, frequent invalidations) -- Alert on cache security events - -## Triển khai / Deployment +## Triển khai ```mermaid graph TD @@ -634,13 +370,19 @@ graph TD Service3 --> M2 Service3 --> M3 - style M1 fill:#e1f5ff - style M2 fill:#fff4e1 - style M3 fill:#d4edda - style Sentinel fill:#f0e1ff + classDef master fill:#e65100,stroke:#ef6c00,color:#fff + classDef slave fill:#f57c00,stroke:#e65100,color:#fff + classDef sentinel fill:#4a148c,stroke:#7b1fa2,color:#fff + classDef service fill:#1a237e,stroke:#3949ab,color:#fff + classDef default fill:#202020,stroke:#505050,color:#fff + + class M1,M2,M3 master + class S1,S2,S3 slave + class Sentinel sentinel + class Service1,Service2,Service3 service ``` -### VI: Chiến lược Triển khai +### Chiến lược Triển khai **Redis Cluster Configuration**: - **Mode**: Cluster mode với 3 masters + 3 slaves @@ -684,89 +426,42 @@ cluster-replica-validity-factor 0 - **Read Scaling**: Route reads to slaves - **Monitoring**: Auto-alert khi memory usage > 80% -### EN: Deployment Strategy +## Giám sát & Khả năng quan sát -**Redis Cluster Configuration**: -- **Mode**: Cluster mode with 3 masters + 3 slaves -- **Replication**: Each master has 1 slave for high availability -- **Sentinel**: 3-node Sentinel ensemble for automatic failover -- **Sharding**: 16384 hash slots distributed evenly across 3 masters -- **Persistence**: RDB snapshots every 5 minutes, AOF disabled (performance) - -**Resource Allocation**: -| Component | CPU | Memory | Disk | Replicas | -|-----------|-----|--------|------|----------| -| **Redis Master** | 1 core | 2GB | 10GB SSD | 3 | -| **Redis Slave** | 1 core | 2GB | 10GB SSD | 3 | -| **Sentinel** | 500m | 512MB | 5GB | 3 | - -**Redis Configuration**: -```yaml -# redis.conf -maxmemory 2gb -maxmemory-policy allkeys-lru # Evict least recently used keys -timeout 300 # Close idle connections after 5min -tcp-keepalive 60 -save 300 10 # RDB snapshot every 5min if 10+ keys changed -appendonly no # Disable AOF for performance - -# Cluster config -cluster-enabled yes -cluster-node-timeout 5000 -cluster-replica-validity-factor 0 -``` - -**High Availability**: -- Automatic failover with Redis Sentinel -- Slave promotion when master fails -- Client-side retry logic -- Connection pooling (max 50 connections per service) - -**Scaling Strategy**: -- **Vertical**: Increase memory per node (2GB → 4GB → 8GB) -- **Horizontal**: Add master nodes (3 → 5 → 7) -- **Read Scaling**: Route reads to slaves -- **Monitoring**: Auto-alert when memory usage > 80% - -## Giám sát & Khả năng quan sát / Monitoring & Observability - -### VI: Chỉ số Chính +### Chỉ số Chính **Cache Performance Metrics**: ```typescript -// VI: Custom metrics cho cache performance -// EN: Custom metrics for cache performance - +// Custom metrics cho cache performance import { Counter, Histogram, Gauge } from 'prom-client'; export const cacheHits = new Counter({ name: 'cache_hits_total', - help: 'Total cache hits', labelNames: ['layer', 'key_prefix'] // layer: l1/l2, key_prefix: user/session/etc }); export const cacheMisses = new Counter({ name: 'cache_misses_total', - help: 'Total cache misses', + help: 'Tổng số cache misses', labelNames: ['key_prefix'] }); export const cacheLatency = new Histogram({ name: 'cache_operation_duration_seconds', - help: 'Cache operation duration', + help: 'Thời gian thực hiện cache operation', labelNames: ['operation', 'layer'], // operation: get/set/del buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1] }); export const cacheSize = new Gauge({ name: 'cache_size_bytes', - help: 'Cache size in bytes', + help: 'Kích thước cache (bytes)', labelNames: ['layer'] }); export const cacheEvictions = new Counter({ name: 'cache_evictions_total', - help: 'Total cache evictions', + help: 'Tổng số cache evictions', labelNames: ['layer', 'reason'] // reason: ttl_expired/memory_full }); ``` @@ -801,9 +496,7 @@ redis_used_memory_bytes / redis_maxmemory_bytes * 100 **Alerting Rules**: ```yaml -# VI: Quy tắc cảnh báo cho cache -# EN: Alerting rules for cache - +# Quy tắc cảnh báo cho cache groups: - name: cache_alerts interval: 30s @@ -817,8 +510,8 @@ groups: labels: severity: warning annotations: - summary: "Low cache hit rate" - description: "Cache hit rate is {{ $value | humanizePercentage }}" + summary: "Tỷ lệ cache hit thấp" + description: "Tỷ lệ cache hit là {{ $value | humanizePercentage }}" # High memory usage - alert: HighRedisMemoryUsage @@ -827,8 +520,8 @@ groups: labels: severity: warning annotations: - summary: "High Redis memory usage" - description: "Redis memory usage is {{ $value | humanizePercentage }}" + summary: "Sử dụng bộ nhớ Redis cao" + description: "Bộ nhớ Redis sử dụng là {{ $value | humanizePercentage }}" # High eviction rate - alert: HighEvictionRate @@ -837,8 +530,8 @@ groups: labels: severity: warning annotations: - summary: "High cache eviction rate" - description: "Eviction rate is {{ $value }}/sec" + summary: "Tỷ lệ cache eviction cao" + description: "Tỷ lệ eviction là {{ $value }}/giây" # Redis down - alert: RedisDown @@ -847,7 +540,7 @@ groups: labels: severity: critical annotations: - summary: "Redis is down" + summary: "Redis bị down" # High replication lag - alert: HighReplicationLag @@ -856,8 +549,8 @@ groups: labels: severity: warning annotations: - summary: "High Redis replication lag" - description: "Replication lag is {{ $value }}s" + summary: "Độ trễ replication cao" + description: "Độ trễ replication là {{ $value }}s" ``` **Dashboards**: @@ -868,9 +561,7 @@ groups: **Logging**: ```typescript -// VI: Structured logging cho cache operations -// EN: Structured logging for cache operations - +// Structured logging cho cache operations logger.debug('Cache operation', { operation: 'get', layer: 'l1', @@ -897,187 +588,7 @@ logger.error('Cache error', { **Health Checks**: ```typescript -// VI: Health check cho Redis -// EN: Health check for Redis -async function checkRedisHealth(): Promise { - try { - await redis.ping(); - const info = await redis.info('memory'); - const memoryUsage = parseMemoryUsage(info); - - return memoryUsage < 0.9; // Healthy if < 90% memory - } catch (error) { - logger.error('Redis health check failed', { error }); - return false; - } -} -``` - -### EN: Key Metrics - -**Cache Performance Metrics**: -```typescript -// Custom metrics for cache performance - -import { Counter, Histogram, Gauge } from 'prom-client'; - -export const cacheHits = new Counter({ - name: 'cache_hits_total', - help: 'Total cache hits', - labelNames: ['layer', 'key_prefix'] // layer: l1/l2, key_prefix: user/session/etc -}); - -export const cacheMisses = new Counter({ - name: 'cache_misses_total', - help: 'Total cache misses', - labelNames: ['key_prefix'] -}); - -export const cacheLatency = new Histogram({ - name: 'cache_operation_duration_seconds', - help: 'Cache operation duration', - labelNames: ['operation', 'layer'], // operation: get/set/del - buckets: [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1] -}); - -export const cacheSize = new Gauge({ - name: 'cache_size_bytes', - help: 'Cache size in bytes', - labelNames: ['layer'] -}); - -export const cacheEvictions = new Counter({ - name: 'cache_evictions_total', - help: 'Total cache evictions', - labelNames: ['layer', 'reason'] // reason: ttl_expired/memory_full -}); -``` - -**Redis Metrics**: -- `redis_connected_clients` - Connected clients -- `redis_used_memory_bytes` - Memory usage -- `redis_memory_fragmentation_ratio` - Memory fragmentation -- `redis_keyspace_hits_total` - Cache hits -- `redis_keyspace_misses_total` - Cache misses -- `redis_evicted_keys_total` - Evicted keys -- `redis_expired_keys_total` - Expired keys -- `redis_commands_processed_total` - Commands processed - -**Calculated Metrics**: -```promql -# Cache hit rate -rate(cache_hits_total[5m]) / (rate(cache_hits_total[5m]) + rate(cache_misses_total[5m])) - -# L1 hit rate -rate(cache_hits_total{layer="l1"}[5m]) / rate(cache_hits_total[5m]) - -# L2 hit rate -rate(cache_hits_total{layer="l2"}[5m]) / rate(cache_hits_total[5m]) - -# Average cache latency -histogram_quantile(0.95, cache_operation_duration_seconds_bucket) - -# Memory usage percentage -redis_used_memory_bytes / redis_maxmemory_bytes * 100 -``` - -**Alerting Rules**: -```yaml -# Alerting rules for cache - -groups: - - name: cache_alerts - interval: 30s - rules: - # Low cache hit rate - - alert: LowCacheHitRate - expr: | - rate(cache_hits_total[5m]) / - (rate(cache_hits_total[5m]) + rate(cache_misses_total[5m])) < 0.5 - for: 10m - labels: - severity: warning - annotations: - summary: "Low cache hit rate" - description: "Cache hit rate is {{ $value | humanizePercentage }}" - - # High memory usage - - alert: HighRedisMemoryUsage - expr: redis_used_memory_bytes / redis_maxmemory_bytes > 0.8 - for: 5m - labels: - severity: warning - annotations: - summary: "High Redis memory usage" - description: "Redis memory usage is {{ $value | humanizePercentage }}" - - # High eviction rate - - alert: HighEvictionRate - expr: rate(redis_evicted_keys_total[5m]) > 100 - for: 5m - labels: - severity: warning - annotations: - summary: "High cache eviction rate" - description: "Eviction rate is {{ $value }}/sec" - - # Redis down - - alert: RedisDown - expr: redis_up == 0 - for: 1m - labels: - severity: critical - annotations: - summary: "Redis is down" - - # High replication lag - - alert: HighReplicationLag - expr: redis_replication_lag_seconds > 5 - for: 2m - labels: - severity: warning - annotations: - summary: "High Redis replication lag" - description: "Replication lag is {{ $value }}s" -``` - -**Dashboards**: -- **Cache Overview**: Hit rate, miss rate, latency, size -- **Redis Cluster**: Memory usage, connections, commands/sec -- **Performance**: L1 vs L2 hit rates, operation latency -- **Evictions**: Eviction rate, reasons, trends - -**Logging**: -```typescript -// Structured logging for cache operations - -logger.debug('Cache operation', { - operation: 'get', - layer: 'l1', - key: cacheKey, - hit: true, - latency: duration, - correlationId: req.correlationId -}); - -logger.warn('Cache eviction', { - layer: 'l2', - reason: 'memory_full', - evictedKeys: count, - memoryUsage: usagePercent -}); - -logger.error('Cache error', { - operation: 'set', - layer: 'l2', - error: error.message, - key: cacheKey -}); -``` - -**Health Checks**: -```typescript -// Health check for Redis +// Health check cho Redis async function checkRedisHealth(): Promise { try { await redis.ping(); @@ -1093,12 +604,33 @@ async function checkRedisHealth(): Promise { ``` -## Tài liệu Liên quan / Related Documentation +## Tài liệu Liên quan -- [System Design](./system-design.md) - Kiến trúc tổng thể với caching / Overall architecture with caching +- [System Design](./system-design.md) - Kiến trúc tổng thể với caching - [Data Consistency Patterns](./data-consistency-patterns.md) - Cache invalidation patterns --- -**Cập nhật Lần cuối / Last Updated**: 2026-01-07 -**Tác giả / Authors**: GoodGo Architecture Team +**Cập nhật Lần cuối**: 2026-01-07 +**Tác giả**: GoodGo Architecture Team + +## Quick Tips + +### Mermaid Common Issues +- **Arrow Syntax**: Use `-->` for solid arrows, `-.->` for dotted arrows. +- **Node IDs**: Avoid spaces/special chars in IDs (e.g., `Node-A` not `Node A`). +- **Subgraphs**: Ensure `subgraph` names are unique and descriptive. + +### Color Pattern Quick Reference +| Element | Dark Color | Text Color | +|---------|------------|------------| +| **Service (Blue)** | `#1a237e` | `#ffffff` | +| **Storage (Gray)** | `#212121` | `#ffffff` | +| **Cache L2 (Orange)** | `#e65100` | `#ffffff` | +| **Cache L1 (Green)** | `#1b5e20` | `#ffffff` | +| **Monitoring (Purple)** | `#4a148c` | `#ffffff` | + +### Visual Indicators +- ✅ **Recommended / Khuyên dùng** +- ❌ **Not Recommended / Không khuyên dùng** +- ⚠️ **Warning / Cảnh báo** diff --git a/docs/vi/architecture/data-consistency-patterns.md b/docs/vi/architecture/data-consistency-patterns.md index 50b95bd8..dd54d649 100644 --- a/docs/vi/architecture/data-consistency-patterns.md +++ b/docs/vi/architecture/data-consistency-patterns.md @@ -1,9 +1,8 @@ -# Patterns Đồng bộ Dữ liệu / Data Consistency Patterns +# Kiến trúc Patterns Đồng bộ Dữ liệu -> **VI**: Các patterns để duy trì tính nhất quán dữ liệu trong kiến trúc microservices phân tán -> **EN**: Patterns for maintaining data consistency in distributed microservices architecture +> Các patterns để duy trì tính nhất quán dữ liệu trong kiến trúc microservices phân tán -## Sơ đồ Tổng quan / Overview Diagram +## Sơ đồ Tổng quan ```mermaid graph TD @@ -25,15 +24,22 @@ graph TD OptimisticLock --> StrongConsistency[Strong Consistency] CQRS --> EventualConsistency - style Saga fill:#e1f5ff - style Outbox fill:#fff4e1 - style Idempotency fill:#f0e1ff - style CQRS fill:#d4edda + %% Dark color palette with white text + style Saga fill:#1d4ed8,stroke:#3b82f6,color:#ffffff + style Outbox fill:#b45309,stroke:#f59e0b,color:#ffffff + style Idempotency fill:#7e22ce,stroke:#a855f7,color:#ffffff + style OptimisticLock fill:#15803d,stroke:#22c55e,color:#ffffff + style CQRS fill:#15803d,stroke:#22c55e,color:#ffffff + style EventualConsistency fill:#374151,stroke:#6b7280,color:#ffffff + style StrongConsistency fill:#374151,stroke:#6b7280,color:#ffffff + style Service1 fill:#4527a0,stroke:#7c4dff,color:#ffffff + style Service2 fill:#4527a0,stroke:#7c4dff,color:#ffffff + style Service3 fill:#4527a0,stroke:#7c4dff,color:#ffffff ``` -## Mô tả Kiến trúc / Architecture Description +## Mô tả Kiến trúc -### VI: Tổng quan Kiến trúc +### Tổng quan Kiến trúc Nền tảng GoodGo sử dụng nhiều consistency patterns để xử lý dữ liệu phân tán: @@ -50,24 +56,7 @@ Nền tảng GoodGo sử dụng nhiều consistency patterns để xử lý dữ - **Optimistic Locking**: Cho concurrent updates - **CQRS**: Cho tối ưu read/write -### EN: Architecture Overview - -GoodGo platform uses multiple consistency patterns to handle distributed data: - -**Core Challenges**: -- No distributed transactions (2PC too slow) -- Services own their data (database per service) -- Network failures can cause partial completion -- Need to maintain data integrity across services - -**Pattern Selection**: -- **Saga**: For multi-service workflows -- **Outbox**: For guaranteed event publishing -- **Idempotency**: For safe retries -- **Optimistic Locking**: For concurrent updates -- **CQRS**: For read/write optimization - -## Bối cảnh Hệ thống / System Context +## Bối cảnh Hệ thống ```mermaid C4Context @@ -106,11 +95,9 @@ C4Context UpdateRelStyle(saga_orchestrator, inventory_service, $lineColor="red", $textColor="red") ``` -**VI**: Nền tảng GoodGo sử dụng kiến trúc database-per-service nơi mỗi service sở hữu dữ liệu riêng. Tính nhất quán dữ liệu giữa các services đạt được thông qua các patterns như Saga (cho workflows phối hợp), Outbox (cho event publishing đáng tin cậy), Idempotency (cho retries an toàn), và Optimistic Locking (cho concurrent updates). Các patterns này cho phép eventual consistency đồng thời duy trì data integrity. +Nền tảng GoodGo sử dụng kiến trúc database-per-service nơi mỗi service sở hữu dữ liệu riêng. Tính nhất quán dữ liệu giữa các services đạt được thông qua các patterns như Saga (cho workflows phối hợp), Outbox (cho event publishing đáng tin cậy), Idempotency (cho retries an toàn), và Optimistic Locking (cho concurrent updates). Các patterns này cho phép eventual consistency đồng thời duy trì data integrity. -**EN**: The GoodGo platform uses a database-per-service architecture where each service owns its data. Data consistency across services is achieved through patterns like Saga (for coordinated workflows), Outbox (for reliable event publishing), Idempotency (for safe retries), and Optimistic Locking (for concurrent updates). These patterns enable eventual consistency while maintaining data integrity. - -## Pattern Saga / Saga Pattern +## Pattern Saga ```mermaid sequenceDiagram @@ -139,14 +126,11 @@ sequenceDiagram end ``` -**VI Mô tả**: Saga quản lý distributed transactions dưới dạng chuỗi local transactions với compensation. +**Mô tả**: Saga quản lý distributed transactions dưới dạng chuỗi local transactions với compensation. -**EN Description**: Saga manages distributed transactions as sequence of local transactions with compensation. - -**Triển khai / Implementation**: +**Triển khai**: ```typescript -// VI: Saga orchestrator -// EN: Saga orchestrator +// Saga orchestrator class OrderSaga { async execute(orderData: OrderData): Promise { const sagaContext = { @@ -156,24 +140,19 @@ class OrderSaga { }; try { - // VI: Bước 1: Tạo đơn hàng - // EN: Step 1: Create order + // Bước 1: Tạo đơn hàng sagaContext.orderId = await orderService.create(orderData); - // VI: Bước 2: Xử lý thanh toán - // EN: Step 2: Process payment + // Bước 2: Xử lý thanh toán sagaContext.paymentId = await paymentService.process(orderData.payment); - // VI: Bước 3: Đặt trước kho - // EN: Step 3: Reserve inventory + // Bước 3: Đặt trước kho sagaContext.inventoryId = await inventoryService.reserve(orderData.items); - // VI: Tất cả thành công - commit - // EN: All success - commit + // Tất cả thành công - commit await this.completeSaga(sagaContext); } catch (error) { - // VI: Compensate theo thứ tự ngược lại - // EN: Compensate in reverse order + // Compensate theo thứ tự ngược lại await this.compensate(sagaContext, error); throw error; } @@ -193,7 +172,7 @@ class OrderSaga { } ``` -## Pattern Outbox / Outbox Pattern +## Pattern Outbox ```mermaid sequenceDiagram @@ -217,22 +196,17 @@ sequenceDiagram end ``` -**VI**: Đảm bảo event publishing bằng cách lưu events trong database cùng transaction với business data. +**Mô tả**: Đảm bảo event publishing bằng cách lưu events trong database cùng transaction với business data. -**EN**: Guarantees event publishing by storing events in database within same transaction as business data. - -**Triển khai / Implementation**: +**Triển khai**: ```typescript -// VI: Lưu event trong outbox -// EN: Store event in outbox +// Lưu event trong outbox async createUser(userData: CreateUserDto): Promise { return await prisma.$transaction(async (tx) => { - // VI: Business operation - // EN: Business operation + // Business operation const user = await tx.user.create({ data: userData }); - // VI: Lưu event trong outbox (cùng transaction) - // EN: Store event in outbox (same transaction) + // Lưu event trong outbox (cùng transaction) await tx.outbox.create({ data: { aggregateId: user.id, @@ -247,8 +221,7 @@ async createUser(userData: CreateUserDto): Promise { }); } -// VI: Outbox processor (chạy định kỳ) -// EN: Outbox processor (runs periodically) +// Outbox processor (chạy định kỳ) async processOutbox(): Promise { const events = await prisma.outbox.findMany({ where: { publishedAt: null }, @@ -273,7 +246,7 @@ async processOutbox(): Promise { } ``` -## Pattern Idempotency / Idempotency Pattern +## Pattern Idempotency ```mermaid graph LR @@ -290,43 +263,43 @@ graph LR Request2 --> Check Return --> Response2[Same Response] - style Check fill:#fff3cd - style Store fill:#d4edda + %% Dark color palette with white text + style Request1 fill:#374151,stroke:#6b7280,color:#ffffff + style Request2 fill:#374151,stroke:#6b7280,color:#ffffff + style Check fill:#b45309,stroke:#f59e0b,color:#ffffff + style Process fill:#1d4ed8,stroke:#3b82f6,color:#ffffff + style Store fill:#15803d,stroke:#22c55e,color:#ffffff + style Return fill:#7e22ce,stroke:#a855f7,color:#ffffff + style Response1 fill:#15803d,stroke:#22c55e,color:#ffffff + style Response2 fill:#15803d,stroke:#22c55e,color:#ffffff ``` -**VI**: Đảm bảo operations có thể retry an toàn mà không có side effects bằng cách sử dụng idempotency keys. +**Mô tả**: Đảm bảo operations có thể retry an toàn mà không có side effects bằng cách sử dụng idempotency keys. -**EN**: Ensures operations can be safely retried without side effects by using idempotency keys. - -**Triển khai / Implementation**: +**Triển khai**: ```typescript -// VI: Idempotency middleware -// EN: Idempotency middleware +// Idempotency middleware async function idempotentOperation( key: string, operation: () => Promise, - ttl: number = 86400 // VI: 24 giờ / EN: 24 hours + ttl: number = 86400 ): Promise { - // VI: Kiểm tra đã xử lý chưa - // EN: Check if already processed + // Kiểm tra đã xử lý chưa const cached = await redis.get(`idempotency:${key}`); if (cached) { return JSON.parse(cached); } - // VI: Xử lý operation - // EN: Process operation + // Xử lý operation const result = await operation(); - // VI: Lưu kết quả - // EN: Store result + // Lưu kết quả await redis.setex(`idempotency:${key}`, ttl, JSON.stringify(result)); return result; } -// VI: Sử dụng trong controller -// EN: Usage in controller +// Sử dụng trong controller async createPayment(req: Request, res: Response): Promise { const idempotencyKey = req.headers['idempotency-key'] as string; @@ -343,7 +316,7 @@ async createPayment(req: Request, res: Response): Promise { } ``` -## Khóa Lạc quan / Optimistic Locking +## Khóa Lạc quan (Optimistic Locking) ```mermaid sequenceDiagram @@ -369,34 +342,30 @@ sequenceDiagram Service-->>User2: Success ``` -**VI**: Ngăn chặn lost updates bằng cách kiểm tra version khi update. +**Mô tả**: Ngăn chặn lost updates bằng cách kiểm tra version khi update. -**EN**: Prevents lost updates by checking version on update. - -**Triển khai / Implementation**: +**Triển khai**: ```prisma -// VI: Prisma schema -// EN: Prisma schema +// Prisma schema model User { id String @id @default(cuid()) email String @unique name String - version Int @default(1) // VI: Trường version / EN: Version field + version Int @default(1) } ``` ```typescript -// VI: Update với optimistic locking -// EN: Update with optimistic locking +// Update với optimistic locking async updateUser(userId: string, data: UpdateUserDto, currentVersion: number): Promise { const result = await prisma.user.updateMany({ where: { id: userId, - version: currentVersion // VI: Kiểm tra version / EN: Check version + version: currentVersion }, data: { ...data, - version: { increment: 1 } // VI: Tăng version / EN: Increment version + version: { increment: 1 } } }); @@ -426,105 +395,104 @@ graph LR WriteModel --> DB1[(Write DB)] ReadModel --> DB2[(Read DB
Optimized)] - style WriteModel fill:#f0e1ff - style ReadModel fill:#d4edda + %% Dark color palette with white text + style Command fill:#1d4ed8,stroke:#3b82f6,color:#ffffff + style WriteModel fill:#7e22ce,stroke:#a855f7,color:#ffffff + style Events fill:#b45309,stroke:#f59e0b,color:#ffffff + style Projection fill:#1d4ed8,stroke:#3b82f6,color:#ffffff + style ReadModel fill:#15803d,stroke:#22c55e,color:#ffffff + style Query fill:#15803d,stroke:#22c55e,color:#ffffff + style DB1 fill:#374151,stroke:#6b7280,color:#ffffff + style DB2 fill:#374151,stroke:#6b7280,color:#ffffff ``` -**VI**: Tách biệt read và write models để tối ưu hiệu suất. +**Mô tả**: Tách biệt read và write models để tối ưu hiệu suất. -**EN**: Separates read and write models for optimal performance. +## Đặc điểm Hiệu suất -## Đặc điểm Hiệu suất / Performance Characteristics +Chỉ số hiệu suất và chiến lược tối ưu cho patterns đồng bộ dữ liệu. -**VI**: Chỉ số hiệu suất và chiến lược tối ưu cho patterns đồng bộ dữ liệu. +| Pattern | Tác động Độ trễ | Thông lượng | Ghi chú | +|---------|-----------------|-------------|---------| +| **Thực thi Saga** | 500ms - 2s | 100-500 sagas/s | Phụ thuộc số bước và compensation | +| **Xử lý Outbox** | < 100ms | 10,000 events/s | Xử lý bất đồng bộ, tác động tối thiểu | +| **Kiểm tra Idempotency** | < 10ms | 50,000 checks/s | Redis lookup, rất nhanh | +| **Cập nhật Optimistic Lock** | < 50ms | 5,000 updates/s | Single DB operation với version check | +| **CQRS Projection** | 100ms - 1s | 1,000 events/s | Xử lý event sang read model | +| **Thực thi Compensation** | 200ms - 1s | Varies | Rollback operations trong saga | -**EN**: Performance metrics and optimization strategies for data consistency patterns. - -| Pattern / Pattern | Tác động Độ trễ / Latency Impact | Thông lượng / Throughput | Ghi chú / Notes | -|-------------------|----------------------------------|--------------------------|-----------------| -| **Thực thi Saga / Saga Execution** | 500ms - 2s | 100-500 sagas/s | Phụ thuộc số bước và compensation / Depends on number of steps and compensation | -| **Xử lý Outbox / Outbox Processing** | < 100ms | 10,000 events/s | Xử lý bất đồng bộ, tác động tối thiểu / Async processing, minimal user impact | -| **Kiểm tra Idempotency / Idempotency Check** | < 10ms | 50,000 checks/s | Redis lookup, rất nhanh / Redis lookup, very fast | -| **Cập nhật Optimistic Lock / Optimistic Lock Update** | < 50ms | 5,000 updates/s | Single DB operation với version check / Single DB operation with version check | -| **CQRS Projection** | 100ms - 1s | 1,000 events/s | Xử lý event sang read model / Event processing to read model | -| **Thực thi Compensation / Compensation Execution** | 200ms - 1s | Varies | Rollback operations trong saga / Rollback operations in saga | - -### Chiến lược Tối ưu Hiệu suất / Performance Optimization Strategies +### Chiến lược Tối ưu Hiệu suất **Saga Pattern**: -- Giảm thiểu số bước (< 5 bước lý tưởng) / Minimize number of steps (< 5 steps ideal) -- Thực thi song song khi có thể / Parallel execution where possible +- Giảm thiểu số bước (< 5 bước lý tưởng) +- Thực thi song song khi có thể - Cache service responses -- Đặt timeouts phù hợp (30s mặc định) / Set appropriate timeouts (30s default) +- Đặt timeouts phù hợp (30s mặc định) **Outbox Pattern**: -- Batch process outbox events (100-500 mỗi batch / per batch) -- Index cột `publishedAt` cho hiệu suất / Index `publishedAt` column for performance -- Archive processed events định kỳ / Archive processed events periodically -- Sử dụng connection pooling cho Kafka / Use connection pooling for Kafka +- Batch process outbox events (100-500 mỗi batch) +- Index cột `publishedAt` cho hiệu suất +- Archive processed events định kỳ +- Sử dụng connection pooling cho Kafka **Idempotency**: -- Sử dụng Redis cho fast key lookups / Use Redis for fast key lookups -- Đặt TTL 24-48 giờ / Set TTL to 24-48 hours +- Sử dụng Redis cho fast key lookups +- Đặt TTL 24-48 giờ - Hash long idempotency keys -- Clean expired keys thường xuyên / Clean expired keys regularly +- Clean expired keys thường xuyên **Optimistic Locking**: -- Hoạt động tốt nhất cho low-contention scenarios / Works best for low-contention scenarios -- Triển khai retry với exponential backoff / Implement retry with exponential backoff -- Giám sát conflict rates (nên < 5%) / Monitor conflict rates (should be < 5%) -- Cân nhắc pessimistic locking nếu conflicts > 10% / Consider pessimistic locking if conflicts > 10% +- Hoạt động tốt nhất cho low-contention scenarios +- Triển khai retry với exponential backoff +- Giám sát conflict rates (nên < 5%) +- Cân nhắc pessimistic locking nếu conflicts > 10% -## Cân nhắc Bảo mật / Security Considerations +## Cân nhắc Bảo mật -**VI**: Biện pháp bảo mật để bảo vệ các operations đồng bộ dữ liệu. +Biện pháp bảo mật để bảo vệ các operations đồng bộ dữ liệu. -**EN**: Security measures for protecting data consistency operations. +### Bảo mật Saga -### Bảo mật Saga / Saga Security - -**Bảo vệ Compensation / Compensation Protection**: -- Xác thực saga execution permissions ở mỗi bước / Validate saga execution permissions at each step -- Mã hóa sensitive data trong saga context / Encrypt sensitive data in saga context -- Log tất cả saga executions cho audit / Log all saga executions for audit -- Triển khai timeout để ngăn hanging sagas / Implement timeout to prevent hanging sagas +**Bảo vệ Compensation**: +- Xác thực saga execution permissions ở mỗi bước +- Mã hóa sensitive data trong saga context +- Log tất cả saga executions cho audit +- Triển khai timeout để ngăn hanging sagas ```typescript -// VI: Saga context bảo mật -// EN: Secure saga context +// Saga context bảo mật interface SecureSagaContext { sagaId: string; - userId: string; // VI: User khởi tạo / EN: User who initiated - permissions: string[]; // VI: Quyền yêu cầu / EN: Required permissions - encryptedData: string; // VI: Dữ liệu nhạy cảm đã mã hóa / EN: Encrypted sensitive data - auditLog: AuditEntry[]; // VI: Audit trail / EN: Audit trail + userId: string; + permissions: string[]; + encryptedData: string; + auditLog: AuditEntry[]; } ``` -### Bảo mật Outbox / Outbox Security +### Bảo mật Outbox -**Mã hóa Event Payload / Event Payload Encryption**: -- Mã hóa PII (Personally Identifiable Information) trước khi lưu trong outbox / Encrypt PII before storing in outbox -- Sử dụng AES-256-GCM cho event payload encryption / Use AES-256-GCM for event payload encryption -- Giải mã chỉ khi publishing sang Kafka / Decrypt only when publishing to Kafka -- Rotate encryption keys hàng quý / Rotate encryption keys quarterly +**Mã hóa Event Payload**: +- Mã hóa PII (Personally Identifiable Information) trước khi lưu trong outbox +- Sử dụng AES-256-GCM cho event payload encryption +- Giải mã chỉ khi publishing sang Kafka +- Rotate encryption keys hàng quý -**Kiểm soát Truy cập / Access Control**: -- Hạn chế truy cập outbox table chỉ cho outbox processor / Restrict outbox table access to outbox processor only -- Sử dụng database roles và permissions / Use database roles and permissions -- Giám sát outbox table access patterns / Monitor outbox table access patterns +**Kiểm soát Truy cập**: +- Hạn chế truy cập outbox table chỉ cho outbox processor +- Sử dụng database roles và permissions +- Giám sát outbox table access patterns -### Bảo mật Idempotency / Idempotency Security +### Bảo mật Idempotency -**Bảo mật Key / Key Security**: -- Sử dụng cryptographic hashing cho idempotency keys (SHA-256) / Use cryptographic hashing for idempotency keys (SHA-256) -- Bao gồm user context trong key generation / Include user context in key generation -- Xác thực key ownership trước khi xử lý / Validate key ownership before processing -- Clear keys khi user logout cho sensitive operations / Clear keys on user logout for sensitive operations +**Bảo mật Key**: +- Sử dụng cryptographic hashing cho idempotency keys (SHA-256) +- Bao gồm user context trong key generation +- Xác thực key ownership trước khi xử lý +- Clear keys khi user logout cho sensitive operations ```typescript -// VI: Tạo idempotency key bảo mật -// EN: Secure idempotency key generation +// Tạo idempotency key bảo mật function generateIdempotencyKey( operation: string, userId: string, @@ -535,19 +503,17 @@ function generateIdempotencyKey( } ``` -### Bảo mật Optimistic Lock / Optimistic Locking Security +### Bảo mật Optimistic Lock -**Ngăn chặn Giả mạo Version / Version Tampering Prevention**: -- Xác thực version field chỉ ở server-side / Validate version field on server-side only -- Không bao giờ chấp nhận version từ client trực tiếp / Never accept version from client directly -- Log version conflicts cho security monitoring / Log version conflicts for security monitoring +**Ngăn chặn Giả mạo Version**: +- Xác thực version field chỉ ở server-side +- Không bao giờ chấp nhận version từ client trực tiếp +- Log version conflicts cho security monitoring - Rate limit update attempts per user -## Triển khai / Deployment +## Triển khai -**VI**: Cách các patterns đồng bộ dữ liệu được triển khai và mở rộng. - -**EN**: How data consistency patterns are deployed and scaled. +Cách các patterns đồng bộ dữ liệu được triển khai và mở rộng. ```mermaid graph TD @@ -578,116 +544,118 @@ graph TD OP1 & OP2 --> Kafka[Kafka Cluster\n5 brokers] end - style SO1 fill:#e1f5ff - style SO2 fill:#e1f5ff - style OP1 fill:#fff4e1 - style OP2 fill:#fff4e1 - style DB fill:#d4edda - style Kafka fill:#ffe1e1 + %% Dark color palette with white text + style OS1 fill:#4527a0,stroke:#7c4dff,color:#ffffff + style OS2 fill:#4527a0,stroke:#7c4dff,color:#ffffff + style OS3 fill:#4527a0,stroke:#7c4dff,color:#ffffff + style SO1 fill:#1d4ed8,stroke:#3b82f6,color:#ffffff + style SO2 fill:#1d4ed8,stroke:#3b82f6,color:#ffffff + style OP1 fill:#b45309,stroke:#f59e0b,color:#ffffff + style OP2 fill:#b45309,stroke:#f59e0b,color:#ffffff + style DB fill:#15803d,stroke:#22c55e,color:#ffffff + style Redis fill:#7e22ce,stroke:#a855f7,color:#ffffff + style Kafka fill:#b91c1c,stroke:#ef4444,color:#ffffff + style PS fill:#374151,stroke:#6b7280,color:#ffffff + style IS fill:#374151,stroke:#6b7280,color:#ffffff ``` -### Cấu hình Triển khai / Deployment Configuration +### Cấu hình Triển khai -| Thành phần / Component | Replicas | Resources | HA Strategy | -|------------------------|----------|-----------|-------------| -| **Saga Orchestrator** | 2-3 | 512Mi RAM, 500m CPU | Leader election với etcd / Leader election with etcd | +| Thành phần | Replicas | Resources | HA Strategy | +|------------|----------|-----------|-------------| +| **Saga Orchestrator** | 2-3 | 512Mi RAM, 500m CPU | Leader election với etcd | | **Outbox Processor** | 2-5 | 256Mi RAM, 250m CPU | Distributed lock per event batch | -| **Services với Outbox / Services with Outbox** | 3+ | Varies | Standard service scaling | -| **Redis (Idempotency)** | 3 nodes | 1Gi RAM each | Redis Cluster với replication / Redis Cluster with replication | +| **Services với Outbox** | 3+ | Varies | Standard service scaling | +| **Redis (Idempotency)** | 3 nodes | 1Gi RAM each | Redis Cluster với replication | -### Chiến lược Mở rộng / Scaling Strategy +### Chiến lược Mở rộng **Saga Orchestrator**: -- Scale dựa trên pending saga count / Scale based on pending saga count -- Sử dụng queue-based load distribution / Use queue-based load distribution -- Giám sát saga execution duration / Monitor saga execution duration +- Scale dựa trên pending saga count +- Sử dụng queue-based load distribution +- Giám sát saga execution duration **Outbox Processor**: -- Scale với database sharding (1 processor per shard) / Scale with database sharding (1 processor per shard) -- Tăng batch size trước khi thêm replicas / Increase batch size before adding replicas -- Giám sát outbox table size và age / Monitor outbox table size and age +- Scale với database sharding (1 processor per shard) +- Tăng batch size trước khi thêm replicas +- Giám sát outbox table size và age **Idempotency Store (Redis)**: - Scale Redis cluster horizontally -- Sử dụng consistent hashing cho key distribution / Use consistent hashing for key distribution -- Giám sát memory usage (nên < 70%) / Monitor memory usage (should be < 70%) +- Sử dụng consistent hashing cho key distribution +- Giám sát memory usage (nên < 70%) -## Giám sát & Khả năng quan sát / Monitoring & Observability +## Giám sát & Khả năng quan sát -**VI**: Chiến lược giám sát cho patterns đồng bộ dữ liệu. +Chiến lược giám sát cho patterns đồng bộ dữ liệu. -**EN**: Monitoring strategies for data consistency patterns. - -### Chỉ số Chính / Key Metrics +### Chỉ số Chính **Saga Metrics**: -- `saga_executions_total` - Tổng saga executions (success/failure) / Total saga executions (success/failure) +- `saga_executions_total` - Tổng saga executions (success/failure) - `saga_duration_seconds` - Saga execution time histogram -- `saga_compensations_total` - Tổng compensation executions / Total compensation executions -- `saga_timeout_total` - Sagas timeout / Sagas that timed out -- `saga_pending_count` - Sagas đang thực thi / Sagas currently executing +- `saga_compensations_total` - Tổng compensation executions +- `saga_timeout_total` - Sagas timeout +- `saga_pending_count` - Sagas đang thực thi **Outbox Metrics**: -- `outbox_events_total` - Events ghi vào outbox / Events written to outbox -- `outbox_published_total` - Events published sang Kafka / Events published to Kafka -- `outbox_processing_lag_seconds` - Thời gian từ write đến publish / Time from write to publish -- `outbox_table_size` - Số dòng outbox table / Outbox table row count +- `outbox_events_total` - Events ghi vào outbox +- `outbox_published_total` - Events published sang Kafka +- `outbox_processing_lag_seconds` - Thời gian từ write đến publish +- `outbox_table_size` - Số dòng outbox table - `outbox_failed_events_total` - Failed event publications **Idempotency Metrics**: -- `idempotency_checks_total` - Tổng idempotency checks / Total idempotency checks +- `idempotency_checks_total` - Tổng idempotency checks - `idempotency_hits_total` - Duplicate requests prevented - `idempotency_key_ttl_seconds` - Average key TTL - `idempotency_redis_errors_total` - Redis failures **Optimistic Lock Metrics**: - `optimistic_lock_conflicts_total` - Version conflicts detected -- `optimistic_lock_retries_total` - Retry attempts sau conflict / Retry attempts after conflict +- `optimistic_lock_retries_total` - Retry attempts sau conflict - `optimistic_lock_success_rate` - Update success percentage -### Cảnh báo / Alerts +### Cảnh báo **Critical Alerts**: ```yaml -# VI: Saga timeout rate quá cao -# EN: Saga timeout rate too high +# Saga timeout rate quá cao alert: HighSagaTimeoutRate expr: rate(saga_timeout_total[5m]) > 0.05 for: 5m severity: critical -# VI: Outbox processing lag -# EN: Outbox processing lag +# Outbox processing lag alert: OutboxProcessingLag expr: outbox_processing_lag_seconds > 300 for: 10m severity: critical -# VI: High optimistic lock conflict rate -# EN: High optimistic lock conflict rate +# High optimistic lock conflict rate alert: HighOptimisticLockConflicts expr: rate(optimistic_lock_conflicts_total[5m]) / rate(optimistic_lock_attempts_total[5m]) > 0.1 for: 5m severity: warning ``` -### Dashboard Giám sát / Monitoring Dashboard +### Dashboard Giám sát **Grafana Panels**: -1. **Tổng quan Saga Orchestration / Saga Orchestration Overview**: +1. **Tổng quan Saga Orchestration**: - Saga execution rate (success/failure) - Average saga duration - Compensation rate - Pending saga count -2. **Sức khỏe Outbox Processing / Outbox Processing Health**: +2. **Sức khỏe Outbox Processing**: - Outbox publishing rate - Processing lag (P95, P99) - Failed events - Table size trend -3. **Hiệu quả Idempotency / Idempotency Effectiveness**: +3. **Hiệu quả Idempotency**: - Duplicate prevention rate - Redis hit rate - Key distribution @@ -697,12 +665,11 @@ severity: warning - Mean time to consistency (MTTC) - Conflict resolution success rate -### Tracing Phân tán / Distributed Tracing +### Tracing Phân tán **Trace Saga Execution**: ```typescript -// VI: Saga step được trace -// EN: Traced saga step +// Saga step được trace async function executeStepWithTracing( step: SagaStep, context: SagaContext @@ -729,17 +696,48 @@ async function executeStepWithTracing( } ``` -## Tài liệu Liên quan / Related Documentation +## Tài liệu Liên quan -- [Event-Driven Architecture](./event-driven-architecture.md) - Event sourcing và Kafka / Event sourcing and Kafka -- [System Design](./system-design.md) - Kiến trúc tổng thể / Overall architecture -- [Microservices Communication](./microservices-communication.md) - Patterns giao tiếp service / Service communication patterns -- [Resilience Patterns](../skills/resilience-patterns.md) - Circuit breaker, retry cho saga steps / Circuit breaker, retry for saga steps -- [Caching Patterns](../skills/caching-patterns.md) - Caching cho idempotency keys / Caching for idempotency keys -- [Database Prisma](../skills/database-prisma.md) - Prisma transactions cho outbox pattern / Prisma transactions for outbox pattern +- [Event-Driven Architecture](./event-driven-architecture.md) - Event sourcing và Kafka +- [System Design](./system-design.md) - Kiến trúc tổng thể +- [Microservices Communication](./microservices-communication.md) - Patterns giao tiếp service +- [Resilience Patterns](../skills/resilience-patterns.md) - Circuit breaker, retry cho saga steps +- [Caching Patterns](../skills/caching-patterns.md) - Caching cho idempotency keys +- [Database Prisma](../skills/database-prisma.md) - Prisma transactions cho outbox pattern --- -**Cập nhật Lần cuối / Last Updated**: 2026-01-07 -**Tác giả / Authors**: GoodGo Architecture Team -**Người Đánh giá / Reviewers**: To be assigned +**Cập nhật Lần cuối**: 2026-01-10 +**Tác giả**: GoodGo Architecture Team + +## Quick Tips + +### Mermaid Common Issues +- ⚠️ **Syntax Error**: Kiểm tra dấu `(` `)` `[` `]` `{` `}` +- ⚠️ **Render Error**: Kiểm tra `graph` vs `flowchart`, sử dụng `graph` cho compatibility +- ⚠️ **Arrow Direction**: Sử dụng `-->` (solid) hoặc `-.->` (dashed) +- ✅ **Color**: Luôn sử dụng dark palette với white text + +### Color Palette Reference + +| Color | Fill | Stroke | Use Case | +|-------|------|--------|----------| +| **Blue** | `#1d4ed8` | `#3b82f6` | Primary Components, Saga | +| **Green** | `#15803d` | `#22c55e` | Success, DB, Stable States | +| **Purple** | `#7e22ce` | `#a855f7` | Feature, Logic, Idempotency | +| **Orange** | `#b45309` | `#f59e0b` | Warning, External, Outbox | +| **Red** | `#b91c1c` | `#ef4444` | Error, Failure, Critical | +| **Gray** | `#374151` | `#6b7280` | Background, Secondary | + +**Pattern áp dụng**: +``` +style NodeName fill:#1d4ed8,stroke:#3b82f6,color:#ffffff +``` + +### Visual Indicators + +- ✅ **Recommended**: Best practices, khuyến nghị sử dụng +- ⚠️ **Warning**: Cần chú ý, có điều kiện +- ❌ **Avoid**: Anti-patterns, tránh sử dụng +- 🔒 **Security**: Liên quan đến bảo mật +- ⚡ **Performance**: Liên quan đến hiệu suất diff --git a/docs/vi/architecture/event-driven-architecture.md b/docs/vi/architecture/event-driven-architecture.md index 9fc8d0bd..c62a8eb3 100644 --- a/docs/vi/architecture/event-driven-architecture.md +++ b/docs/vi/architecture/event-driven-architecture.md @@ -1,9 +1,8 @@ -# Kiến trúc Hướng Sự kiện / Event-Driven Architecture +# Kiến trúc Hướng Sự kiện -> **VI**: Kiến trúc hướng sự kiện cho giao tiếp bất đồng bộ sử dụng Apache Kafka -> **EN**: Event-driven architecture for asynchronous communication using Apache Kafka +> Kiến trúc hướng sự kiện cho giao tiếp bất đồng bộ sử dụng Apache Kafka -## Sơ đồ Tổng quan / Overview Diagram +## Sơ đồ Tổng quan ```mermaid graph TD @@ -28,47 +27,28 @@ graph TD Topics -->|Subscribe| Consumer1 Topics -->|Subscribe| Consumer2 - style Kafka fill:#e1f5ff - style Topics fill:#fff4e1 + style Kafka fill:#1E88E5,stroke:#1565C0,color:#ffffff + style Topics fill:#FB8C00,stroke:#EF6C00,color:#ffffff ``` -## Mô tả Kiến trúc / Architecture Description - -### VI: Phần Tiếng Việt +## Mô tả Kiến trúc Nền tảng GoodGo triển khai Kiến trúc Hướng Sự kiện (EDA) cho giao tiếp bất đồng bộ giữa microservices. **Nguyên tắc Cốt lõi**: 1. **Event-First Design**: Mọi thay đổi trạng thái phát ra domain events 2. **Loose Coupling**: Services giao tiếp qua events -3. **Eventual Consistency**: Chấp nhận inconsistency tạm thời +3. **Eventual Consistency**: Chấp nhận inconsistency tạm thời 4. **Event Sourcing**: Lưu thay đổi dưới dạng chuỗi event 5. **CQRS Pattern**: Tách biệt read/write operations **Công nghệ**: -- Apache Kafka - Nền tảng event streaming -- Schema Registry - Avro schemas để validation -- KafkaJS - Thư viện Node.js client -- Event Sourcing - Triển khai tùy chỉnh trong IAM +- **Apache Kafka** - Nền tảng event streaming +- **Schema Registry** - Avro schemas để validation +- **KafkaJS** - Thư viện Node.js client +- **Event Sourcing** - Triển khai tùy chỉnh trong IAM -### EN: English Section - -The GoodGo platform implements Event-Driven Architecture (EDA) for asynchronous communication between microservices. - -**Core Principles**: -1. **Event-First Design**: All state changes emit domain events -2. **Loose Coupling**: Services communicate through events -3. **Eventual Consistency**: Accept temporary inconsistency -4. **Event Sourcing**: Store changes as event sequence -5. **CQRS Pattern**: Separate read/write operations - -**Technology Stack**: -- Apache Kafka - Event streaming platform -- Schema Registry - Avro schemas for validation -- KafkaJS - Node.js client library -- Event Sourcing - Custom implementation in IAM - -## Luồng Sự kiện / Event Flow +## Luồng Sự kiện ```mermaid sequenceDiagram @@ -82,11 +62,9 @@ sequenceDiagram Consumer-->>Kafka: Acknowledge ``` -**VI Các Bước**: Publish → Distribute → Consume → Retry (nếu thất bại) → DLQ (sau retry tối đa) → Acknowledge +**Các Bước**: Publish → Distribute → Consume → Retry (nếu thất bại) → DLQ (sau retry tối đa) → Acknowledge -**EN Steps**: Publish → Distribute → Consume → Retry (if failed) → DLQ (after max retries) → Acknowledge - -## Cấu trúc Sự kiện / Event Structure +## Cấu trúc Sự kiện ```typescript interface BaseEvent { @@ -100,7 +78,7 @@ interface BaseEvent { } ``` -**Ví dụ / Example**: +**Ví dụ**: ```json { "eventId": "550e8400-e29b-41d4-a716-446655440000", @@ -122,19 +100,19 @@ graph LR AuthLogin[auth.login.success
Partitions: 5] AuditEvents[audit.events
Partitions: 10] - style UserCreated fill:#e1f5ff - style AuthLogin fill:#fff4e1 - style AuditEvents fill:#f8d7da + style UserCreated fill:#1E88E5,stroke:#1565C0,color:#ffffff + style AuthLogin fill:#43A047,stroke:#2E7D32,color:#ffffff + style AuditEvents fill:#E53935,stroke:#C62828,color:#ffffff ``` -**Quy ước Đặt tên / Naming Convention**: `{domain}.{action}.{version}` +**Quy ước Đặt tên**: `{domain}.{action}.{version}` -**Ví dụ / Examples**: +**Ví dụ**: - `user.created.v1` - `auth.login.success.v1` - `audit.event.logged.v1` -## Xử lý Lỗi / Error Handling +## Xử lý Lỗi ```mermaid graph TD @@ -143,15 +121,22 @@ graph TD Process -->|Failure| Retry[Retry 3x] Retry -->|Max Retries| DLQ[Dead Letter Queue] DLQ --> Alert[Alert Team] + + style Event fill:#757575,stroke:#616161,color:#ffffff + style Process fill:#1E88E5,stroke:#1565C0,color:#ffffff + style Ack fill:#43A047,stroke:#2E7D32,color:#ffffff + style Retry fill:#FB8C00,stroke:#EF6C00,color:#ffffff + style DLQ fill:#E53935,stroke:#C62828,color:#ffffff + style Alert fill:#E53935,stroke:#C62828,color:#ffffff ``` -**Chiến lược / Strategy**: +**Chiến lược**: 1. Retry với exponential backoff (100ms → 200ms → 400ms) -2. Tối đa 3 lần thử / Max 3 attempts -3. Chuyển sang DLQ sau retry tối đa / Move to DLQ after max retries -4. Xem xét thủ công và xử lý lại / Manual review and reprocess +2. Tối đa 3 lần thử +3. Chuyển sang DLQ sau retry tối đa +4. Xem xét thủ công và xử lý lại -## Bối cảnh Hệ thống / System Context +## Bối cảnh Hệ thống ```mermaid C4Context @@ -174,23 +159,16 @@ C4Context Rel(kafka, monitoring, "Sends metrics", "JMX") ``` -**VI Mô tả**: +**Mô tả Các Thành phần**: - **Producers**: IAM Service và các services khác publish domain events - **Kafka**: Event broker trung tâm, quản lý topics và partitions - **Consumers**: Notification và Audit services consume events - **Schema Registry**: Quản lý và validate Avro schemas - **Monitoring**: Thu thập metrics từ Kafka cluster -**EN Description**: -- **Producers**: IAM Service and other services publish domain events -- **Kafka**: Central event broker, manages topics and partitions -- **Consumers**: Notification and Audit services consume events -- **Schema Registry**: Manages and validates Avro schemas -- **Monitoring**: Collects metrics from Kafka cluster +## Đặc điểm Hiệu suất -## Đặc điểm Hiệu suất / Performance Characteristics - -| Chỉ số / Metric | Mục tiêu / Target | Ghi chú / Notes | +| Chỉ số | Mục tiêu | Ghi chú | |-----------------|-------------------|-----------------| | **Event Publish Latency (P95)** | < 10ms | Fire-and-forget, async | | **Event Delivery Latency (P95)** | < 100ms | End-to-end from publish to consume | @@ -200,80 +178,42 @@ C4Context | **Retention** | 7 days | Default, configurable per topic | | **Replication Factor** | 3 | For fault tolerance | -**VI Tối ưu hóa Hiệu suất**: +**Tối ưu hóa Hiệu suất**: - **Batch Publishing**: Group multiple events để giảm network overhead - **Compression**: Sử dụng Snappy hoặc LZ4 compression - **Partitioning**: Phân chia topics thành multiple partitions cho parallel processing - **Consumer Groups**: Multiple consumers trong cùng group để scale horizontally - **Async Publishing**: Fire-and-forget pattern, không block request handlers -**EN Performance Optimizations**: -- **Batch Publishing**: Group multiple events to reduce network overhead -- **Compression**: Use Snappy or LZ4 compression -- **Partitioning**: Divide topics into multiple partitions for parallel processing -- **Consumer Groups**: Multiple consumers in same group for horizontal scaling -- **Async Publishing**: Fire-and-forget pattern, don't block request handlers +## Cân nhắc Bảo mật -## Cân nhắc Bảo mật / Security Considerations - -### VI: Phần Tiếng Việt - -**Event Encryption**: +**Mã hóa Sự kiện**: - TLS in-transit cho tất cả Kafka connections - Optional payload encryption cho sensitive data - End-to-end encryption với custom encryption layer -**Access Control**: +**Kiểm soát Truy cập**: - Kafka ACLs (Access Control Lists) per topic - SASL/SCRAM authentication cho producers và consumers - Separate credentials cho mỗi service - Principle of least privilege - chỉ grant quyền cần thiết -**Schema Validation**: +**Xác thực Schema**: - Avro schemas trong Schema Registry - Schema evolution với backward/forward compatibility - Reject events không match schema -**Audit**: +**Kiểm toán**: - Log tất cả event publishes và consumes - Correlation IDs để trace event flow - Retention policy cho audit logs (7 years) -**Data Retention**: +**Lưu trữ Dữ liệu**: - Default 7 days retention - Configurable per topic - Automatic deletion sau retention period - Compliance với GDPR (right to erasure) -### EN: English Section - -**Event Encryption**: -- TLS in-transit for all Kafka connections -- Optional payload encryption for sensitive data -- End-to-end encryption with custom encryption layer - -**Access Control**: -- Kafka ACLs (Access Control Lists) per topic -- SASL/SCRAM authentication for producers and consumers -- Separate credentials per service -- Principle of least privilege - grant only necessary permissions - -**Schema Validation**: -- Avro schemas in Schema Registry -- Schema evolution with backward/forward compatibility -- Reject events that don't match schema - -**Audit**: -- Log all event publishes and consumes -- Correlation IDs to trace event flow -- Retention policy for audit logs (7 years) - -**Data Retention**: -- Default 7 days retention -- Configurable per topic -- Automatic deletion after retention period -- GDPR compliance (right to erasure) - ## Triển khai / Deployment ```mermaid @@ -320,29 +260,29 @@ graph TD Broker2 --> Audit Broker3 --> Audit - style Broker1 fill:#e1f5ff - style Broker2 fill:#fff4e1 - style Broker3 fill:#d4edda - style ZK fill:#f0e1ff + style Broker1 fill:#1E88E5,stroke:#1565C0,color:#ffffff + style Broker2 fill:#1E88E5,stroke:#1565C0,color:#ffffff + style Broker3 fill:#1E88E5,stroke:#1565C0,color:#ffffff + style ZK fill:#8E24AA,stroke:#7B1FA2,color:#ffffff ``` -### VI: Chiến lược Triển khai +### Chiến lược Triển khai / Deployment Strategy -**Kafka Cluster Configuration**: +**Cấu hình Kafka Cluster / Kafka Cluster Configuration**: - **Brokers**: 3 brokers minimum (5 for production) - **Replication Factor**: 3 (for fault tolerance) - **Min In-Sync Replicas**: 2 (ensure data durability) - **Partitions**: 3-10 per topic (based on throughput needs) - **Zookeeper**: 3-node ensemble (for coordination) -**Resource Allocation**: +**Phân bổ Tài nguyên / Resource Allocation**: | Component | CPU | Memory | Disk | |-----------|-----|--------|------| | **Kafka Broker** | 2 cores | 4GB RAM | 100GB SSD | | **Zookeeper** | 1 core | 2GB RAM | 20GB SSD | | **Schema Registry** | 500m | 1GB RAM | 10GB | -**Topic Configuration**: +**Cấu hình Topic / Topic Configuration**: ```yaml user.created: partitions: 3 @@ -363,58 +303,15 @@ audit.events: compression-type: lz4 ``` -**High Availability**: -- Multiple brokers với partition replication -- Automatic leader election khi broker fails +**Tính Sẵn sàng Cao / High Availability**: +- Multiple brokers với partition replication / Multiple brokers with partition replication +- Automatic leader election khi broker fails / Automatic leader election when broker fails - Consumer group rebalancing -- Monitoring và alerting cho broker health - -### EN: Deployment Strategy - -**Kafka Cluster Configuration**: -- **Brokers**: 3 brokers minimum (5 for production) -- **Replication Factor**: 3 (for fault tolerance) -- **Min In-Sync Replicas**: 2 (ensure data durability) -- **Partitions**: 3-10 per topic (based on throughput needs) -- **Zookeeper**: 3-node ensemble (for coordination) - -**Resource Allocation**: -| Component | CPU | Memory | Disk | -|-----------|-----|--------|------| -| **Kafka Broker** | 2 cores | 4GB RAM | 100GB SSD | -| **Zookeeper** | 1 core | 2GB RAM | 20GB SSD | -| **Schema Registry** | 500m | 1GB RAM | 10GB | - -**Topic Configuration**: -```yaml -user.created: - partitions: 3 - replication-factor: 3 - retention-ms: 604800000 # 7 days - compression-type: snappy - -auth.login.success: - partitions: 5 - replication-factor: 3 - retention-ms: 604800000 - compression-type: snappy - -audit.events: - partitions: 10 - replication-factor: 3 - retention-ms: 220752000000 # 7 years - compression-type: lz4 -``` - -**High Availability**: -- Multiple brokers with partition replication -- Automatic leader election when broker fails -- Consumer group rebalancing -- Monitoring and alerting for broker health +- Monitoring và alerting cho broker health / Monitoring and alerting for broker health ## Giám sát & Khả năng quan sát / Monitoring & Observability -### VI: Chỉ số Chính +### Chỉ số Chính / Key Metrics **Kafka Broker Metrics**: - `kafka_server_brokertopicmetrics_messagesinpersec` - Messages in/sec @@ -435,8 +332,7 @@ audit.events: **Application Metrics**: ```typescript -// VI: Custom metrics cho event processing -// EN: Custom metrics for event processing +// Custom metrics cho event processing / Custom metrics for event processing const eventPublished = new Counter({ name: 'events_published_total', help: 'Total events published', @@ -457,11 +353,8 @@ const eventProcessingDuration = new Histogram({ }); ``` -**Alerting Rules**: +**Quy tắc Cảnh báo / Alerting Rules**: ```yaml -# VI: Quy tắc cảnh báo -# EN: Alerting rules - # High consumer lag - alert: HighConsumerLag expr: kafka_consumer_fetch_manager_records_lag_max > 10000 @@ -502,115 +395,9 @@ const eventProcessingDuration = new Histogram({ - Consumer Performance (lag, throughput, errors) - Topic Metrics (messages/sec, bytes/sec, retention) -**Logging**: +**Logging / Ghi nhật ký**: ```typescript -// VI: Structured logging cho events -// EN: Structured logging for events -logger.info('Event published', { - eventId: event.eventId, - eventType: event.eventType, - topic: 'user.created', - correlationId: event.correlationId -}); - -logger.info('Event consumed', { - eventId: event.eventId, - eventType: event.eventType, - topic: 'user.created', - consumerGroup: 'notifications', - processingTime: duration -}); -``` - -### EN: Key Metrics - -**Kafka Broker Metrics**: -- `kafka_server_brokertopicmetrics_messagesinpersec` - Messages in/sec -- `kafka_server_brokertopicmetrics_bytesinpersec` - Bytes in/sec -- `kafka_server_brokertopicmetrics_bytesoutpersec` - Bytes out/sec -- `kafka_controller_kafkacontroller_activecontrollercount` - Active controller -- `kafka_server_replicamanager_underreplicatedpartitions` - Under-replicated partitions - -**Consumer Metrics**: -- `kafka_consumer_fetch_manager_records_lag_max` - Max consumer lag -- `kafka_consumer_fetch_manager_records_consumed_rate` - Records consumed/sec -- `kafka_consumer_coordinator_commit_latency_avg` - Commit latency - -**Producer Metrics**: -- `kafka_producer_record_send_total` - Total records sent -- `kafka_producer_record_error_total` - Total send errors -- `kafka_producer_request_latency_avg` - Request latency - -**Application Metrics**: -```typescript -// Custom metrics for event processing -const eventPublished = new Counter({ - name: 'events_published_total', - help: 'Total events published', - labelNames: ['event_type', 'topic'] -}); - -const eventConsumed = new Counter({ - name: 'events_consumed_total', - help: 'Total events consumed', - labelNames: ['event_type', 'topic', 'consumer_group'] -}); - -const eventProcessingDuration = new Histogram({ - name: 'event_processing_duration_seconds', - help: 'Event processing duration', - labelNames: ['event_type'], - buckets: [0.01, 0.05, 0.1, 0.5, 1, 2, 5] -}); -``` - -**Alerting Rules**: -```yaml -# Alerting rules - -# High consumer lag -- alert: HighConsumerLag - expr: kafka_consumer_fetch_manager_records_lag_max > 10000 - for: 5m - severity: warning - annotations: - summary: "High consumer lag detected" - description: "Consumer lag is {{ $value }} messages" - -# Broker down -- alert: KafkaBrokerDown - expr: kafka_server_kafkaserver_brokerstate != 3 - for: 1m - severity: critical - annotations: - summary: "Kafka broker is down" - -# Under-replicated partitions -- alert: UnderReplicatedPartitions - expr: kafka_server_replicamanager_underreplicatedpartitions > 0 - for: 5m - severity: warning - annotations: - summary: "Under-replicated partitions detected" - -# Offline partitions -- alert: OfflinePartitions - expr: kafka_controller_kafkacontroller_offlinepartitionscount > 0 - for: 1m - severity: critical - annotations: - summary: "Offline partitions detected" -``` - -**Dashboards**: -- Kafka Cluster Overview (brokers, topics, partitions) -- Producer Performance (throughput, latency, errors) -- Consumer Performance (lag, throughput, errors) -- Topic Metrics (messages/sec, bytes/sec, retention) - -**Logging**: -```typescript -// Structured logging for events +// Structured logging cho events / Structured logging for events logger.info('Event published', { eventId: event.eventId, eventType: event.eventType, @@ -637,3 +424,23 @@ logger.info('Event consumed', { **Cập nhật Lần cuối / Last Updated**: 2026-01-07 **Tác giả / Authors**: GoodGo Architecture Team + +## Quick Tips / Mẹo Nhanh + +### Mermaid Diagram Color Palette + +| Node Type | Fill Color | Stroke Color | Text Color | Usage | +|-----------|------------|--------------|------------|-------| +| **Core/Broker** | `#1E88E5` (Blue) | `#1565C0` | `#ffffff` | Kafka Brokers, Main Components | +| **Topic/Data** | `#FB8C00` (Orange) | `#EF6C00` | `#ffffff` | Topics, Queues, Data Stores | +| **Success/Safe** | `#43A047` (Green) | `#2E7D32` | `#ffffff` | Successful flows, Safe states | +| **Error/Danger** | `#E53935` (Red) | `#C62828` | `#ffffff` | Errors, DLQ, Critical issues | +| **Coordination** | `#8E24AA` (Purple) | `#7B1FA2` | `#ffffff` | Zookeeper, Orchestrators | + +### Visual Indicators / Các Chỉ báo Trực quan + +- 🔄 **Retry Loop**: Indicates automatic retries (Chỉ báo thử lại tự động) +- ⚠️ **DLQ/Warning**: Indicates error handling path (Đường dẫn xử lý lỗi) +- 📝 **Log/Audit**: Indicates logging point (Điểm ghi log) +- 🔐 **Lock/Auth**: Indicates security check (Kiểm tra bảo mật) + diff --git a/docs/vi/architecture/iam-proposal.md b/docs/vi/architecture/iam-proposal.md index e54d8dee..4a7dfcb3 100644 --- a/docs/vi/architecture/iam-proposal.md +++ b/docs/vi/architecture/iam-proposal.md @@ -95,51 +95,105 @@ Tài liệu này mô tả đề xuất kiến trúc cho IAM Service (Identity an ``` services/iam-service/ ├── src/ -│ ├── config/ # Configuration files +│ ├── config/ # Các file cấu hình hệ thống │ ├── core/ -│ │ ├── cache/ # Multi-layer cache -│ │ ├── security/ # Zero-trust, encryption -│ │ ├── events/ # Event sourcing -│ │ └── workflows/ # Workflow engine (NEW) +│ │ ├── cache/ # Cache đa lớp (Multi-layer cache) +│ │ ├── security/ # Zero-trust, mã hóa +│ │ ├── events/ # Xử lý sự kiện (Event sourcing) +│ │ └── workflows/ # Workflow engine (MỚI) │ ├── modules/ -│ │ ├── auth/ # ✅ Core authentication -│ │ ├── rbac/ # ✅ RBAC system -│ │ ├── social/ # ✅ Social authentication -│ │ ├── oidc/ # ✅ OIDC implementation -│ │ ├── token/ # ✅ JWT & Cookie management -│ │ ├── session/ # ✅ Session management -│ │ ├── mfa/ # ✅ Multi-factor auth +│ │ ├── auth/ # ✅ Xác thực cốt lõi (Core authentication) +│ │ ├── rbac/ # ✅ Hệ thống phân quyền (RBAC) +│ │ ├── social/ # ✅ Xác thực mạng xã hội +│ │ ├── oidc/ # ✅ Triển khai OIDC +│ │ ├── token/ # ✅ Quản lý JWT & Cookie +│ │ ├── session/ # ✅ Quản lý phiên làm việc +│ │ ├── mfa/ # ✅ Xác thực đa yếu tố │ │ │ -│ │ ├── identity/ # 🆕 Identity Management -│ │ │ ├── user/ # User lifecycle -│ │ │ ├── profile/ # Profile management -│ │ │ ├── verification/ # Identity verification -│ │ │ └── organization/ # Organizations & groups +│ │ ├── identity/ # 🆕 Quản lý danh tính (Identity Management) +│ │ │ ├── user/ # Vòng đời người dùng +│ │ │ ├── profile/ # Quản lý hồ sơ +│ │ │ ├── verification/ # Xác minh danh tính +│ │ │ └── organization/ # Tổ chức & nhóm │ │ │ -│ │ ├── access/ # 🆕 Access Management -│ │ │ ├── request/ # Access requests -│ │ │ ├── review/ # Access reviews -│ │ │ ├── pam/ # Privileged access -│ │ │ └── analytics/ # Access analytics +│ │ ├── access/ # 🆕 Quản lý truy cập (Access Management) +│ │ │ ├── request/ # Yêu cầu truy cập +│ │ │ ├── review/ # Đánh giá truy cập +│ │ │ ├── pam/ # Truy cập đặc quyền (PAM) +│ │ │ └── analytics/ # Phân tích truy cập │ │ │ -│ │ ├── governance/ # 🆕 Governance & Compliance -│ │ │ ├── compliance/ # Compliance reporting -│ │ │ ├── policy/ # Policy governance -│ │ │ ├── risk/ # Risk management -│ │ │ └── reporting/ # Reporting & dashboards +│ │ ├── governance/ # 🆕 Quản trị & Tuân thủ (Governance & Compliance) +│ │ │ ├── compliance/ # Báo cáo tuân thủ +│ │ │ ├── policy/ # Quản trị chính sách +│ │ │ ├── risk/ # Quản lý rủi ro +│ │ │ └── reporting/ # Báo cáo & dashboards │ │ │ │ │ └── workflow/ # 🆕 Workflow Engine -│ │ ├── engine/ # Workflow engine -│ │ ├── approval/ # Approval workflows -│ │ └── automation/ # Automated workflows +│ │ ├── engine/ # Core engine +│ │ ├── approval/ # Quy trình phê duyệt +│ │ └── automation/ # Quy trình tự động │ │ │ ├── middlewares/ # Express middlewares -│ ├── repositories/ # Data access layer -│ └── routes/ # Route definitions +│ ├── repositories/ # Lớp truy cập dữ liệu +│ └── routes/ # Định nghĩa routes └── prisma/ - └── schema.prisma # Database schema (mở rộng) -``` + └── schema.prisma # Cấu trúc CSDL (mở rộng) +### Sơ Đồ Kiến Trúc Modules + +```mermaid +graph TD + %% Styling Configuration + classDef base fill:#202020,stroke:#505050,color:#fff,stroke-width:1px; + classDef core fill:#1a237e,stroke:#3949ab,color:#fff,stroke-width:1px; + classDef newModule fill:#1b5e20,stroke:#43a047,color:#fff,stroke-width:1px; + classDef database fill:#4a148c,stroke:#7b1fa2,color:#fff,stroke-width:1px; + + %% Main Service Node + IAM[IAM Service]:::core + + %% Identity Management Subgraph + subgraph Identity [Identity Management] + direction TB + User[User Lifecycle]:::newModule + Profile[Profile Mgmt]:::newModule + Verify[Verification]:::newModule + Org[Org & Groups]:::newModule + end + + %% Access Management Subgraph + subgraph Access [Access Management] + direction TB + Req[Access Requests]:::newModule + Review[Access Reviews]:::newModule + PAM[PAM]:::newModule + Analytics[Analytics]:::newModule + end + + %% Governance Subgraph + subgraph Governance [Governance & Compliance] + direction TB + Comp[Compliance]:::newModule + Policy[Policy Gov]:::newModule + Risk[Risk Mgmt]:::newModule + end + + %% Database + DB[(Neon Database)]:::database + + %% Relationships + IAM --> Identity + IAM --> Access + IAM --> Governance + + Identity -.-> DB + Access -.-> DB + Governance -.-> DB + + %% Internal Dependencies + Access --> Identity + Governance ---> Access +``` --- ## 3. Database Schema Mở Rộng @@ -337,3 +391,32 @@ GET /api/v1/governance/reports/security-events - **Workflow automation** linh hoạt Điều này biến service từ authentication/authorization cơ bản thành một IAM platform toàn diện, phù hợp cho enterprise. + +--- + +## Quick Tips + +### Mermaid Common Issues + +- **Syntax Error**: Kiểm tra kỹ các dấu ngoặc `[]`, `{}`, `()` trong node label. +- **Connection**: Đảm bảo các mũi tên `-->`, `-.->` đúng cú pháp. +- **Indentation**: Subgraph cần thụt đầu dòng đúng cách. + +### Color Pattern Reference + +| Element | Fill Color | Stroke | Text | Usage | +|---------|------------|--------|------|-------| +| **Base** | `#202020` | `#505050` | `#fff` | Node thông thường | +| **Core** | `#1a237e` | `#3949ab` | `#fff` | Node trung tâm, quan trọng | +| **Module**| `#1b5e20` | `#43a047` | `#fff` | Module, service con | +| **DB** | `#4a148c` | `#7b1fa2` | `#fff` | Database, storage | +| **Warn** | `#b71c1c` | `#f44336` | `#fff` | Cảnh báo, lỗi | + +### Visual Indicators + +| Icon | Meaning | +|------|---------| +| ✅ | Đã hoàn thành / Tốt | +| 🔄 | Đang xử lý / Thay đổi | +| ⚠️ | Cảnh báo / Lưu ý | +| ❌ | Lỗi / Không khuyến khích | diff --git a/docs/vi/architecture/microservices-communication.md b/docs/vi/architecture/microservices-communication.md index 6733aa94..e8172d39 100644 --- a/docs/vi/architecture/microservices-communication.md +++ b/docs/vi/architecture/microservices-communication.md @@ -1,9 +1,81 @@ -# Giao tiếp Microservices / Microservices Communication +# Kiến trúc Giao tiếp Microservices -> **VI**: Các patterns và protocols giao tiếp giữa các services -> **EN**: Communication patterns and protocols for inter-service communication +> Các patterns và protocols giao tiếp giữa các services -## Sơ đồ Tổng quan / Overview Diagram +## Quick Overview + +Hướng dẫn nhanh về các patterns giao tiếp cơ bản trong hệ thống GoodGo. + +### Mô hình Giao tiếp Cơ bản + +```mermaid +graph TD + %% Nodes + Client[Web App / Mobile App] + Traefik[Traefik API Gateway] + Auth[Auth Service] + Notify[Notification Service] + + %% Relationships + Client -->|HTTP Request| Traefik + Traefik -->|Routing| Auth + Auth -.->|Internal HTTP| Notify + + %% Styles using dark color palette + style Client fill:#1565c0,stroke:#fff,stroke-width:2px,color:#fff + style Traefik fill:#0f4c81,stroke:#fff,stroke-width:2px,color:#fff + style Auth fill:#283593,stroke:#fff,stroke-width:2px,color:#fff + style Notify fill:#4527a0,stroke:#fff,stroke-width:2px,color:#fff +``` + +### Giao tiếp Đồng bộ (HTTP/REST) + +Các service giao tiếp đồng bộ qua HTTP REST APIs thông qua Traefik API Gateway. + +**Ví dụ Client → Service:** +```typescript +// Web App -> Auth Service +const response = await fetch('http://api.goodgo.vn/api/v1/auth/login', { + method: 'POST', + body: JSON.stringify({ email, password }), +}); +``` + +**Ví dụ Service → Service:** +```typescript +// Auth Service -> Notification Service +const response = await fetch('http://notification-service:5003/api/v1/notifications', { + method: 'POST', + headers: { 'X-Service-Auth': process.env.INTERNAL_API_KEY }, + body: JSON.stringify({ userId, message }), +}); +``` + +### API Gateway Routing + +Traefik định tuyến requests dựa trên: +- **Host header**: `api.goodgo.vn` +- **Path prefix**: `/api/v1/auth`, `/api/v1/users` + +### Format Error Response Chuẩn + +Tất cả services tuân theo định dạng error response nhất quán: + +```json +{ + "success": false, + "error": { + "code": "AUTH_001", + "message": "Invalid credentials", + "details": {} + }, + "timestamp": "2024-01-01T00:00:00.000Z" +} +``` + +--- + +## Sơ đồ Tổng quan ```mermaid graph TD @@ -19,12 +91,16 @@ graph TD ServiceA --> SD[Service Discovery
Docker DNS / K8s DNS] ServiceB --> SD - style Gateway fill:#e1f5ff - style Kafka fill:#fff4e1 - style SD fill:#d4edda + classDef blue fill:#253041,stroke:#4b6584,color:#ffffff + classDef orange fill:#3a2e1e,stroke:#7a5f3c,color:#ffffff + classDef green fill:#1e3a29,stroke:#3c7a52,color:#ffffff + + class Gateway blue + class Kafka orange + class SD green ``` -## Bối cảnh Hệ thống / System Context +## Bối cảnh Hệ thống ```mermaid C4Context @@ -54,13 +130,11 @@ C4Context Rel(services, external_api, "Integrates", "HTTPS") ``` -**VI**: Nền tảng GoodGo sử dụng kiến trúc microservices nơi tất cả client requests đi qua API Gateway (Traefik), được route đến các microservices phù hợp. Các services giao tiếp đồng bộ qua REST/HTTP cho patterns request-response và bất đồng bộ qua Kafka cho workflows event-driven. Service discovery được xử lý bởi Docker DNS trong môi trường local và Kubernetes DNS trong production. +Nền tảng GoodGo sử dụng kiến trúc microservices nơi tất cả client requests đi qua API Gateway (Traefik), được route đến các microservices phù hợp. Các services giao tiếp đồng bộ qua REST/HTTP cho patterns request-response và bất đồng bộ qua Kafka cho workflows event-driven. Service discovery được xử lý bởi Docker DNS trong môi trường local và Kubernetes DNS trong production. -**EN**: The GoodGo platform uses a microservices architecture where all client requests flow through an API Gateway (Traefik), which routes them to appropriate microservices. Services communicate synchronously via REST/HTTP for request-response patterns and asynchronously via Kafka for event-driven workflows. Service discovery is handled by Docker DNS in local environments and Kubernetes DNS in production. +## Protocols Giao tiếp -## Protocols Giao tiếp / Communication Protocols - -### So sánh Protocols / Protocol Comparison +### So sánh Protocols | Protocol | Latency | Complexity | Use Case | |----------|---------|------------|----------| @@ -86,11 +160,9 @@ sequenceDiagram Gateway-->>Client: JSON Response ``` -**VI**: Request-response đồng bộ sử dụng HTTP/REST. +Request-response đồng bộ sử dụng HTTP/REST. -**EN**: Synchronous request-response using HTTP/REST. - -**Triển khai / Implementation**: +**Triển khai**: ```typescript // Service-to-service HTTP client import axios from 'axios'; @@ -130,11 +202,9 @@ sequenceDiagram end ``` -**VI**: Giao tiếp bất đồng bộ dựa trên events qua Kafka. +Giao tiếp bất đồng bộ dựa trên events qua Kafka. -**EN**: Asynchronous event-based communication via Kafka. - -### Khám phá Dịch vụ / Service Discovery +### Khám phá Dịch vụ **Local (Docker Compose)**: ```yaml @@ -169,64 +239,66 @@ graph LR LB --> Service1A[Instance A] LB --> Service1B[Instance B] - style Gateway fill:#e1f5ff + %% Dark color palette with white text + classDef clientBlue fill:#1565c0,stroke:#fff,stroke-width:2px,color:#fff + classDef gatewayBlue fill:#0f4c81,stroke:#fff,stroke-width:2px,color:#fff + classDef featurePurple fill:#4527a0,stroke:#fff,stroke-width:2px,color:#fff + classDef serviceGreen fill:#1e3a29,stroke:#3c7a52,stroke-width:2px,color:#fff + + class Client clientBlue + class Gateway gatewayBlue + class Route,LB,Auth,Rate,CORS featurePurple + class Service1,Service2,Service1A,Service1B serviceGreen ``` -**VI**: Điểm vào duy nhất cho tất cả client requests với routing, auth, rate limiting. +Điểm vào duy nhất cho tất cả client requests với routing, auth, rate limiting. -**EN**: Single entry point for all client requests with routing, auth, rate limiting. +## Đặc điểm Hiệu suất -## Đặc điểm Hiệu suất / Performance Characteristics +Kỳ vọng hiệu suất và chiến lược tối ưu cho giao tiếp giữa các services. -**VI**: Kỳ vọng hiệu suất và chiến lược tối ưu cho giao tiếp giữa các services. - -**EN**: Performance expectations and optimization strategies for inter-service communication. - -| Chỉ số / Metric | Mục tiêu / Target | Ghi chú / Notes | +| Chỉ số | Mục tiêu | Ghi chú | |------------------|-------------------|-----------------| -| **Thời gian phản hồi REST API / REST API Response Time** | < 100ms | P95 for internal service-to-service calls | -| **Độ trễ publish event / Event Publishing Latency** | < 50ms | Time to publish to Kafka | -| **Service discovery lookup** | < 10ms | DNS resolution time | -| **Chi phí routing của Gateway / Gateway Routing Overhead** | < 20ms | Additional latency added by Traefik | -| **Thông lượng / Throughput** | 10,000 req/s | Per service instance | -| **Xử lý Kafka event / Kafka Event Processing** | < 500ms | P95 end-to-end event processing | +| **Thời gian phản hồi REST API** | < 100ms | P95 cho các cuộc gọi service-to-service nội bộ | +| **Độ trễ publish event** | < 50ms | Thời gian publish tới Kafka | +| **Service discovery lookup** | < 10ms | Thời gian phân giải DNS | +| **Chi phí routing của Gateway** | < 20ms | Độ trễ thêm vào bởi Traefik | +| **Thông lượng** | 10,000 req/s | Mỗi service instance | +| **Xử lý Kafka event** | < 500ms | P95 xử lý event end-to-end | -**Chiến lược Tối ưu / Optimization Strategies**: -- **Connection Pooling**: Reuse HTTP connections between services / Tái sử dụng HTTP connections giữa services -- **Circuit Breaker**: Prevent cascading failures with Opossum library / Ngăn chặn cascading failures với thư viện Opossum -- **Retry with Backoff**: Exponential backoff for transient failures / Exponential backoff cho transient failures -- **Compression**: Enable gzip for large payloads / Bật gzip cho payloads lớn -- **Caching**: Cache service discovery results and responses / Cache kết quả service discovery và responses +**Chiến lược Tối ưu**: +- **Connection Pooling**: Tái sử dụng HTTP connections giữa services +- **Circuit Breaker**: Ngăn chặn cascading failures với thư viện Opossum +- **Retry with Backoff**: Exponential backoff cho transient failures +- **Compression**: Bật gzip cho payloads lớn +- **Caching**: Cache kết quả service discovery và responses -## Cân nhắc Bảo mật / Security Considerations +## Cân nhắc Bảo mật -**VI**: Biện pháp bảo mật để bảo vệ giao tiếp giữa các services. +Biện pháp bảo mật để bảo vệ giao tiếp giữa các services. -**EN**: Security measures for protecting inter-service communication. +### Xác thực Service-to-Service -### Xác thực Service-to-Service / Service-to-Service Authentication +- **Internal API Keys**: Services xác thực sử dụng `x-service-auth` header +- **JWT Tokens**: Để truyền user context giữa services +- **Mutual TLS (mTLS)**: Tùy chọn cho môi trường production (Kubernetes service mesh) -- **Internal API Keys**: Services authenticate using `x-service-auth` header / Services xác thực sử dụng `x-service-auth` header -- **JWT Tokens**: For user context propagation between services / Để truyền user context giữa services -- **Mutual TLS (mTLS)**: Optional for production environments (Kubernetes service mesh) / Tùy chọn cho môi trường production +### Bảo mật Mạng -### Bảo mật Mạng / Network Security +- **Network Policies**: Kubernetes NetworkPolicies hạn chế traffic service-to-service +- **Service Mesh**: Istio/Linkerd cho security policies nâng cao (tùy chọn) +- **Private Networks**: Services giao tiếp trong private VPC/cluster network -- **Network Policies**: Kubernetes NetworkPolicies restrict service-to-service traffic / Hạn chế traffic service-to-service -- **Service Mesh**: Istio/Linkerd for advanced security policies (optional) / Cho security policies nâng cao (tùy chọn) -- **Private Networks**: Services communicate within private VPC/cluster network / Services giao tiếp trong private VPC/cluster network +### Bảo vệ Dữ liệu -### Bảo vệ Dữ liệu / Data Protection +- **Encryption in Transit**: TLS 1.2+ cho mọi external communication +- **Event Payload Encryption**: Dữ liệu nhạy cảm được mã hóa trước khi publish tới Kafka +- **API Gateway**: Xử lý SSL termination và request validation -- **Encryption in Transit**: TLS 1.2+ for all external communication / TLS 1.2+ cho mọi external communication -- **Event Payload Encryption**: Sensitive data encrypted before publishing to Kafka / Dữ liệu nhạy cảm được mã hóa trước khi publish tới Kafka -- **API Gateway**: Traefik handles SSL termination and request validation / Xử lý SSL termination và request validation - -### Best Practices Bảo mật / Security Best Practices +### Best Practices Bảo mật ```typescript -// VI: Service client với xác thực -// EN: Service client with authentication +// Service client với xác thực export class SecureServiceClient { private client = axios.create({ baseURL: process.env.SERVICE_URL, @@ -236,45 +308,53 @@ export class SecureServiceClient { 'x-correlation-id': generateCorrelationId() }, httpsAgent: new https.Agent({ - rejectUnauthorized: true // VI: Xác minh SSL certificates / EN: Verify SSL certificates + rejectUnauthorized: true // Xác minh SSL certificates }) }); } ``` -## Triển khai / Deployment +## Triển khai -**VI**: Cách giao tiếp microservices được triển khai và mở rộng qua các môi trường. - -**EN**: How microservices communication is deployed and scaled across environments. +Cách giao tiếp microservices được triển khai và mở rộng qua các môi trường. ```mermaid graph TD subgraph "Production Cluster" - LB[Load Balancer] --> Gateway[API Gateway\n3 replicas] + LB[Load Balancer] --> Gateway[API Gateway
3 replicas] - Gateway --> ServiceA1[Service A\nInstance 1] - Gateway --> ServiceA2[Service A\nInstance 2] - Gateway --> ServiceB1[Service B\nInstance 1] - Gateway --> ServiceB2[Service B\nInstance 2] + Gateway --> ServiceA1[Service A
Instance 1] + Gateway --> ServiceA2[Service A
Instance 2] + Gateway --> ServiceB1[Service B
Instance 1] + Gateway --> ServiceB2[Service B
Instance 2] - ServiceA1 & ServiceA2 --> Kafka[Kafka Cluster\n3 brokers] + ServiceA1 & ServiceA2 --> Kafka[Kafka Cluster
3 brokers] ServiceB1 & ServiceB2 --> Kafka - ServiceA1 & ServiceA2 --> DB[(PostgreSQL\nPrimary + Replica)] + ServiceA1 & ServiceA2 --> DB[(PostgreSQL
Primary + Replica)] ServiceB1 & ServiceB2 --> DB - ServiceA1 & ServiceA2 --> Redis[(Redis Cluster\n3 nodes)] + ServiceA1 & ServiceA2 --> Redis[(Redis Cluster
3 nodes)] ServiceB1 & ServiceB2 --> Redis end - style Gateway fill:#e1f5ff - style Kafka fill:#fff4e1 - style DB fill:#d4edda - style Redis fill:#ffe1e1 + %% Dark color palette with white text and white strokes + classDef lbGrey fill:#424242,stroke:#fff,stroke-width:2px,color:#fff + classDef gatewayBlue fill:#0f4c81,stroke:#fff,stroke-width:2px,color:#fff + classDef servicePurple fill:#4527a0,stroke:#fff,stroke-width:2px,color:#fff + classDef kafkaOrange fill:#3a2e1e,stroke:#fff,stroke-width:2px,color:#fff + classDef dbGreen fill:#1e3a29,stroke:#fff,stroke-width:2px,color:#fff + classDef redisRed fill:#3a1e1e,stroke:#fff,stroke-width:2px,color:#fff + + class LB lbGrey + class Gateway gatewayBlue + class ServiceA1,ServiceA2,ServiceB1,ServiceB2 servicePurple + class Kafka kafkaOrange + class DB dbGreen + class Redis redisRed ``` -### Môi trường Triển khai / Deployment Environments +### Môi trường Triển khai | Environment | Gateway | Services | Kafka | Service Discovery | |-------------|---------|----------|-------|-------------------| @@ -282,20 +362,18 @@ graph TD | **Staging** | Traefik (2 replicas) | 2 replicas per service | 3 brokers | Kubernetes DNS | | **Production** | Traefik (3+ replicas) | 3+ replicas per service | 5+ brokers | Kubernetes DNS + Service Mesh | -### Chiến lược Mở rộng / Scaling Strategy +### Chiến lược Mở rộng -- **Horizontal Pod Autoscaler (HPA)**: Auto-scale based on CPU/memory / Tự động scale dựa trên CPU/memory -- **Kafka Partitions**: Scale event processing by increasing partitions / Scale event processing bằng cách tăng partitions -- **Load Balancing**: Kubernetes Service load balances across pod replicas / Cân bằng tải giữa pod replicas -- **Gateway Scaling**: Traefik scales independently from backend services / Traefik scale độc lập với backend services +- **Horizontal Pod Autoscaler (HPA)**: Tự động scale dựa trên CPU/memory +- **Kafka Partitions**: Scale event processing bằng cách tăng partitions +- **Load Balancing**: Cân bằng tải giữa pod replicas +- **Gateway Scaling**: Traefik scale độc lập với backend services -## Giám sát & Khả năng quan sát / Monitoring & Observability +## Giám sát & Khả năng quan sát -**VI**: Cách giám sát và quan sát giao tiếp microservices. +Cách giám sát và quan sát giao tiếp microservices. -**EN**: How to monitor and observe microservices communication. - -### Chỉ số Chính / Key Metrics +### Chỉ số Chính **Service-to-Service Metrics**: - `http_request_duration_seconds` - Request latency histogram @@ -313,18 +391,16 @@ graph TD - `kafka_consumer_lag` - Consumer lag - `kafka_consumer_records_consumed_total` - Events consumed -### Kiểm tra Sức khỏe / Health Checks +### Kiểm tra Sức khỏe **Service Endpoints**: ```typescript -// VI: Liveness - service có đang chạy không? -// EN: Liveness - is service running? +// Liveness - service có đang chạy không? app.get('/health/live', (req, res) => { res.json({ status: 'ok', timestamp: new Date().toISOString() }); }); -// VI: Readiness - service có thể xử lý traffic không? -// EN: Readiness - can service handle traffic? +// Readiness - service có thể xử lý traffic không? app.get('/health/ready', async (req, res) => { const checks = { database: await checkDatabase(), @@ -354,13 +430,13 @@ readinessProbe: periodSeconds: 5 ``` -### Tracing Phân tán / Distributed Tracing +### Tracing Phân tán -- **OpenTelemetry**: Instrument all service-to-service calls / Instrument tất cả service-to-service calls -- **Jaeger**: Visualize distributed traces / Hiển thị distributed traces -- **Correlation IDs**: Propagate via `x-correlation-id` header for request tracking / Truyền qua `x-correlation-id` header để tracking requests +- **OpenTelemetry**: Instrument tất cả service-to-service calls +- **Jaeger**: Hiển thị distributed traces +- **Correlation IDs**: Truyền qua `x-correlation-id` header để tracking requests -### Dashboard Giám sát / Monitoring Dashboard +### Dashboard Giám sát **Grafana Panels**: - Service Communication Overview (request rate, latency, errors) @@ -368,9 +444,9 @@ readinessProbe: - Event Bus Health (Kafka lag, throughput) - Service Dependencies (service map from traces) -## Tài liệu Liên quan / Related Documentation +## Tài liệu Liên quan -- [System Design](./system-design.md) - Overall architecture / Kiến trúc tổng thể +- [System Design](./system-design.md) - Kiến trúc tổng thể - [Event-Driven Architecture](./event-driven-architecture.md) - Event patterns - [API Gateway Advanced](../skills/api-gateway-advanced.md) - Gateway patterns - [Inter-Service Communication](../skills/inter-service-communication.md) - Communication patterns @@ -378,6 +454,31 @@ readinessProbe: --- +## Quick Tips + +### Mermaid Common Issues +- **Arrow Syntax**: `-->` (solid), `-.->` (dotted), `==>` (thick) +- **Special Characters**: Escape with quote marks `"` +- **Subgraphs**: Use `subgraph "Title"` ... `end` + +### Color Pattern Quick Reference +| Element | Color | Hex | Stroke | Usage | +|---------|-------|-----|--------|-------| +| **Core** | Blue | `#253041` | `#4b6584` | Primary components | +| **Logic** | Purple | `#2e1e3a` | `#5f3c7a` | Processing steps | +| **Data** | Green | `#1e3a29` | `#3c7a52` | Database, Cache | +| **External** | Orange | `#3a2e1e` | `#7a5f3c` | External APIs | +| **Error** | Red | `#3a1e1e` | `#7a3c3c` | Failures, Alerts | + +### Visual Indicators +- 🔵 **Blue**: Core Infrastructure +- 🟢 **Green**: Data Operations +- 🟠 **Orange**: Event/External +- 🔴 **Red**: Critical/Error +- ⚪ **Grey**: Neutral/Boundary + +--- + **Cập nhật lần cuối / Last Updated**: 2026-01-07 **Tác giả / Authors**: GoodGo Architecture Team **Reviewers**: To be assigned diff --git a/docs/vi/architecture/observability-architecture.md b/docs/vi/architecture/observability-architecture.md index 4d82262a..7a8d5eb8 100644 --- a/docs/vi/architecture/observability-architecture.md +++ b/docs/vi/architecture/observability-architecture.md @@ -1,9 +1,8 @@ -# Kiến trúc Khả năng Quan sát / Observability Architecture +# Kiến trúc Khả năng Quan sát -> **VI**: Khả năng quan sát toàn diện với metrics, logging và tracing -> **EN**: Comprehensive observability with metrics, logging, and tracing +> **Note**: Khả năng quan sát toàn diện với metrics, logging và tracing -## Sơ đồ Tổng quan / Overview Diagram +## Sơ đồ Tổng quan ```mermaid graph TD @@ -30,18 +29,24 @@ graph TD Jaeger --> JaegerUI[Jaeger UI] end - style Prom fill:#d4edda - style Loki fill:#fff4e1 - style Jaeger fill:#e1f5ff + classDef service fill:#2D3748,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef metrics fill:#2F855A,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef logging fill:#C05621,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef tracing fill:#2C5282,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef dashboard fill:#4A5568,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + + class Service1,Service2 service; + class Prom metrics; + class Loki logging; + class Jaeger,JaegerUI tracing; + class Grafana,GrafanaLogs dashboard; ``` -``` - -## Bối cảnh Hệ thống / System Context +## Bối cảnh Hệ thống ```mermaid C4Context - title Sơ đồ Bối cảnh Khả năng Quan sát / Observability System Context + title Sơ đồ Bối cảnh Khả năng Quan sát Person(dev, "Developer", "Uses dashboards to monitor system") Person(sre, "SRE", "Manages infrastructure & alerts") @@ -55,19 +60,20 @@ C4Context Rel(sre, obs, "Configures Alerts", "HTTPS") Rel(service, obs, "Push/Pull Telemetry", "HTTP/gRPC") Rel(k8s, obs, "Exposes Metrics", "HTTP") + + UpdateElementStyle(dev, $fontColor="white", $bgColor="#2D3748", $borderColor="white") + UpdateElementStyle(sre, $fontColor="white", $bgColor="#2D3748", $borderColor="white") + UpdateElementStyle(obs, $fontColor="white", $bgColor="#2C5282", $borderColor="white") + UpdateElementStyle(service, $fontColor="white", $bgColor="#4A5568", $borderColor="white") + UpdateElementStyle(k8s, $fontColor="white", $bgColor="#4A5568", $borderColor="white") ``` -### VI Mô tả Bối cảnh +### Mô tả Bối cảnh - **Observability Stack**: Trung tâm thu thập và hiển thị dữ liệu (Prometheus, Loki, Jaeger, Grafana). - **Microservices**: Gửi logs, metrics và traces (OpenTelemetry). - **Developer/SRE**: Sử dụng Grafana để theo dõi sức khỏe hệ thống và debug. -### EN Context Description -- **Observability Stack**: Central collection and visualization (Prometheus, Loki, Jaeger, Grafana). -- **Microservices**: Send logs, metrics, and traces (OpenTelemetry). -- **Developer/SRE**: Use Grafana to monitor system health and debug. - -## Ba Trụ cột Khả năng Quan sát / Three Pillars of Observability +## Ba Trụ cột Khả năng Quan sát ### 1. Metrics (Prometheus + Grafana) @@ -80,20 +86,21 @@ graph LR Grafana --> Dashboard2[Error Dashboard] Grafana --> Dashboard3[Performance Dashboard] - style Prom fill:#d4edda - style Grafana fill:#e1f5ff + classDef default fill:#2D3748,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef prom fill:#2F855A,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef grafana fill:#2C5282,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + + class Prom prom; + class Grafana grafana; ``` -**VI**: Các phép đo số theo thời gian (requests/sec, latency, errors). +**Mô tả**: Các phép đo số theo thời gian (requests/sec, latency, errors). -**EN**: Numerical measurements over time (requests/sec, latency, errors). - -**Triển khai / Implementation**: +**Triển khai**: ```typescript import { Counter, Histogram, Gauge } from 'prom-client'; -// VI: HTTP request metrics -// EN: HTTP request metrics +// HTTP request metrics export const httpRequestDuration = new Histogram({ name: 'http_request_duration_seconds', help: 'Duration of HTTP requests in seconds', @@ -112,8 +119,7 @@ export const activeRequests = new Gauge({ help: 'Number of active HTTP requests' }); -// VI: Middleware để track metrics -// EN: Middleware to track metrics +// Middleware để track metrics export function metricsMiddleware(req, res, next) { const start = Date.now(); activeRequests.inc(); @@ -159,11 +165,9 @@ sequenceDiagram Loki-->>Grafana: Log results ``` -**VI**: Structured logging với correlation IDs để tracing requests. +**Mô tả**: Structured logging với correlation IDs để tracing requests. -**EN**: Structured logging with correlation IDs for request tracing. - -**Triển khai / Implementation**: +**Triển khai**: ```typescript import winston from 'winston'; @@ -180,13 +184,11 @@ export const logger = winston.createLogger({ }, transports: [ new winston.transports.Console(), - // VI: Loki transport (nếu configured) - // EN: Loki transport (if configured) + // Loki transport (nếu configured) ] }); -// VI: Logger middleware -// EN: Logger middleware +// Logger middleware export function loggerMiddleware(req, res, next) { const correlationId = req.headers['x-correlation-id'] || generateId(); @@ -228,20 +230,21 @@ graph LR Jaeger --> Timeline[Trace Timeline] - style Trace fill:#e1f5ff - style Jaeger fill:#d4edda + classDef default fill:#2D3748,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef trace fill:#2C5282,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef jaeger fill:#2F855A,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + + class Trace trace; + class Jaeger jaeger; ``` -**VI**: Distributed tracing để track requests giữa các services. +**Mô tả**: Distributed tracing để track requests giữa các services. -**EN**: Distributed tracing to track requests across services. - -**Triển khai / Implementation**: +**Triển khai**: ```typescript import { trace, SpanStatusCode } from '@opentelemetry/api'; -// VI: Tạo traced function -// EN: Create traced function +// Tạo traced function export function traced( name: string, fn: () => Promise @@ -267,8 +270,7 @@ export function traced( }); } -// VI: Sử dụng -// EN: Usage +// Sử dụng async getUserWithTracing(userId: string): Promise { return traced('getUserById', async () => { return await userRepository.findById(userId); @@ -276,17 +278,15 @@ async getUserWithTracing(userId: string): Promise { } ``` -## Kiểm tra Sức khỏe / Health Checks +## Kiểm tra Sức khỏe ```typescript -// VI: Liveness probe - service có đang chạy không? -// EN: Liveness probe - is service running? +// Liveness probe - service có đang chạy không? app.get('/health/live', (req, res) => { res.json({ status: 'ok', timestamp: new Date().toISOString() }); }); -// VI: Readiness probe - service có sẵn sàng nhận traffic không? -// EN: Readiness probe - is service ready for traffic? +// Readiness probe - service có sẵn sàng nhận traffic không? app.get('/health/ready', async (req, res) => { const checks = { database: await checkDatabase(), @@ -313,17 +313,15 @@ async function checkDatabase(): Promise { } ``` -## Quy tắc Cảnh báo / Alerting Rules +## Quy tắc Cảnh báo ```yaml -# VI: Prometheus alerting rules -# EN: Prometheus alerting rules +# Prometheus alerting rules groups: - name: service_alerts interval: 30s rules: - # VI: Tỷ lệ lỗi cao - # EN: High error rate + # Tỷ lệ lỗi cao - alert: HighErrorRate expr: | rate(http_requests_total{status=~"5.."}[5m]) > 0.05 @@ -334,8 +332,7 @@ groups: summary: "High error rate detected" description: "Error rate is {{ $value }} (> 5%)" - # VI: Độ trễ cao - # EN: High latency + # Độ trễ cao - alert: HighLatency expr: | histogram_quantile(0.95, http_request_duration_seconds_bucket) > 1 @@ -346,8 +343,7 @@ groups: summary: "High latency detected" description: "P95 latency is {{ $value }}s" - # VI: Service down - # EN: Service down + # Service down - alert: ServiceDown expr: up == 0 for: 1m @@ -357,11 +353,11 @@ groups: summary: "Service is down" ``` -## Đặc điểm Hiệu suất / Performance Characteristics +## Đặc điểm Hiệu suất -### VI: Mục tiêu Hiệu suất -| Chỉ số / Metric | Mục tiêu / Target | Ghi chú / Notes | -|-----------------|-------------------|-----------------| +### Mục tiêu Hiệu suất +| Chỉ số | Mục tiêu | Ghi chú | +|--------|----------|---------| | **Metric Scrape Interval** | 15s | Critical services | | **Log Ingestion Latency** | < 1s | Time from emit to queryable | | **Trace Sampling Rate** | 10% | Production (100% in Dev/Staging) | @@ -369,31 +365,15 @@ groups: | **Alert Evaluation** | Every 1m | Evaluation interval | | **Retention Policy** | 14 days | Logs & Traces (Metrics: 30 days) | -### EN: Performance Targets -| Metric | Target | Notes | -|--------|--------|-------| -| **Metric Scrape Interval** | 15s | Critical services | -| **Log Ingestion Latency** | < 1s | Time from emit to queryable | -| **Trace Sampling Rate** | 10% | Production (100% in Dev/Staging) | -| **Dashboard Load Time** | < 2s | P95 Latency | -| **Alert Evaluation** | Every 1m | Evaluation interval | -| **Retention Policy** | 14 days | Logs & Traces (Metrics: 30 days) | +## Cân nhắc Bảo mật -## Cân nhắc Bảo mật / Security Considerations - -### VI: Bảo mật Observability +### Bảo mật Observability - **Log Scrubbing**: Tự động loại bỏ PII (emails, ssn, credit cards) và secrets khỏi logs trước khi ingestion. - **Access Control**: Grafana integrated với OAuth2/OIDC, phân quyền Viewer/Editor/Admin. - **Network Policy**: Chỉ cho phép traffic từ namespace nội bộ tới các cổng ingestion (9090, 3100, 14268). - **TLS**: Mã hóa traffic giữa agents và collectors. -### EN: Observability Security -- **Log Scrubbing**: Automatically scrub PII (emails, ssn, credit cards) and secrets from logs before ingestion. -- **Access Control**: Grafana integrated with OAuth2/OIDC, roles for Viewer/Editor/Admin. -- **Network Policy**: Allow traffic only from internal namespaces to ingestion ports (9090, 3100, 14268). -- **TLS**: Encrypt traffic between agents and collectors. - -## Triển khai / Deployment +## Triển khai ```mermaid graph TD @@ -421,30 +401,63 @@ graph TD Grafana --> Loki Grafana --> Jaeger - style Grafana fill:#ffe1e1 - style Prom fill:#d4edda - style Loki fill:#fff4e1 - style Jaeger fill:#e1f5ff + classDef k8s fill:#2D3748,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef app fill:#4A5568,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef grafana fill:#2C5282,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef loki fill:#C05621,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef jaeger fill:#2F855A,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + classDef prom fill:#2F855A,stroke:#FFFFFF,stroke-width:2px,color:#FFFFFF; + + class Grafana grafana; + class Loki loki; + class Jaeger jaeger; + class Prom prom; + class App,Agent app; ``` -**VI Mô tả Triển khai**: +**Mô tả Triển khai**: - **Agent**: Promtail hoặc Grafana Agent chạy như DaemonSet hoặc Sidecar để thu thập logs. - **Pull Model**: Prometheus scrape metrics từ endpoints `/metrics`. - **Push Model**: Traces và Logs được push tới collectors. - **Resources**: Dedicated nodes cho monitoring stack trong production để tránh ảnh hưởng workload chính. -**EN Deployment Description**: -- **Agent**: Promtail or Grafana Agent runs as DaemonSet or Sidecar to collect logs. -- **Pull Model**: Prometheus scrapes metrics from `/metrics` endpoints. -- **Push Model**: Traces and Logs are pushed to collectors. -- **Resources**: Dedicated nodes for monitoring stack in production to prevent impact on main workload. +## Tài liệu Liên quan -## Tài liệu Liên quan / Related Documentation - -- [System Design](./system-design.md) - Kiến trúc tổng thể / Overall architecture +- [System Design](./system-design.md) - Kiến trúc tổng thể - [Caching Architecture](./caching-architecture.md) - Cache metrics +## Quick Tips + +### Mermaid Common Issues + +| Issue | Solution | +|-------|----------| +| **Parse Error** | Check for special characters like `()` or `[]` inside node text without quotes. Use `"text"` for complex strings. | +| **Color Not Showing** | Ensure `style` or `classDef` definitions are correct and IDs match. | +| **Arrow Direction** | `TD` = Top-Down, `LR` = Left-Right. Choose appropriately for layout. | + +### Color Pattern Quick Reference + +| Element | Color | Hex | Use Case | +|---------|-------|-----|----------| +| **Primary** | Dark Blue | `#2D3748` | System components, core services | +| **Secondary** | Grey | `#4A5568` | Supporting modules, libraries | +| **Accent** | Blue | `#2C5282` | Databases, external APIs | +| **Highlight** | Teal | `#285E61` | User interactions, highlights | +| **Success** | Green | `#2F855A` | Successful states, active | +| **Warning** | Orange | `#C05621` | Warning/Caution states | +| **Error** | Red | `#C53030` | Error states, failures | + +### Visual Indicators + +| Indicator | Meaning | +|-----------|---------| +| 🟢 | Safe / Recommended | +| 🟡 | Warning / Caution | +| 🔴 | Danger / Anti-pattern | +| 💡 | Tip / Best Practice | + --- -**Cập nhật Lần cuối / Last Updated**: 2026-01-07 -**Tác giả / Authors**: GoodGo Architecture Team +**Cập nhật Lần cuối**: 2026-01-07 +**Tác giả**: GoodGo Architecture Team diff --git a/docs/vi/architecture/security-architecture.md b/docs/vi/architecture/security-architecture.md index ffd824ea..74a17dde 100644 --- a/docs/vi/architecture/security-architecture.md +++ b/docs/vi/architecture/security-architecture.md @@ -20,10 +20,10 @@ graph TD Service --> Audit[Audit Logging] Audit --> AuditDB[(Audit Trail
7-year retention)] - style TLS fill:#d4edda - style JWT fill:#e1f5ff - style Encrypt fill:#f8d7da - style Audit fill:#fff4e1 + style TLS fill:#15803d,stroke:#fff,stroke-width:2px,color:#fff + style JWT fill:#1d4ed8,stroke:#fff,stroke-width:2px,color:#fff + style Encrypt fill:#b91c1c,stroke:#fff,stroke-width:2px,color:#fff + style Audit fill:#c2410c,stroke:#fff,stroke-width:2px,color:#fff ``` ## Mô tả Kiến trúc / Architecture Description @@ -69,6 +69,7 @@ The GoodGo Security Architecture implements defense-in-depth with multiple secur ## Luồng Xác thực / Authentication Flow ```mermaid +%%{init: {'theme': 'dark'}}%% sequenceDiagram participant Client participant API as API Gateway @@ -158,8 +159,8 @@ graph TD Perm[Permission
resource:action:scope] end - style Check fill:#e1f5ff - style Perm fill:#fff4e1 + style Check fill:#1d4ed8,stroke:#fff,stroke-width:2px,color:#fff + style Perm fill:#c2410c,stroke:#fff,stroke-width:2px,color:#fff ``` ### VI: RBAC (Role-Based Access Control) @@ -181,12 +182,9 @@ SuperAdmin > OrgAdmin > Manager > User > Guest **3. Permission Caching**: ```typescript -// VI: Cache key: user:{userId}:permissions -// EN: Cache key: user:{userId}:permissions -// VI: TTL: 5 phút -// EN: TTL: 5 minutes -// VI: Invalidate khi: role change, permission change -// EN: Invalidate on: role change, permission change +Cache key: user:{userId}:permissions +TTL: 5 phút +Invalidate khi: role change, permission change ``` ### EN: RBAC (Role-Based Access Control) @@ -208,9 +206,9 @@ SuperAdmin > OrgAdmin > Manager > User > Guest **3. Permission Caching**: ```typescript -// Cache key: user:{userId}:permissions -// TTL: 5 minutes -// Invalidate on: role change, permission change +Cache key: user:{userId}:permissions +TTL: 5 minutes +Invalidate on: role change, permission change ``` ## Kiến trúc Zero-Trust / Zero-Trust Architecture @@ -226,9 +224,9 @@ graph TD Session -->|Suspicious| MFA[Require MFA] Session -->|Anomaly| Block[Block + Alert] - style Block fill:#f8d7da - style MFA fill:#fff3cd - style Allow fill:#d4edda + style Block fill:#b91c1c,stroke:#fff,stroke-width:2px,color:#fff + style MFA fill:#c2410c,stroke:#fff,stroke-width:2px,color:#fff + style Allow fill:#15803d,stroke:#fff,stroke-width:2px,color:#fff ``` ### VI: Thành phần Zero-Trust @@ -337,8 +335,6 @@ graph TD **3. Audit Trail**: ```typescript -// VI: Event sourcing cho tất cả auth events -// EN: Event sourcing for all auth events { eventType: 'auth.login.success', userId: 'user_123', @@ -364,7 +360,6 @@ graph TD - Incident response plan ```typescript -// Event sourcing for all auth events { eventType: 'auth.login.success', userId: 'user_123', @@ -378,6 +373,7 @@ graph TD ## Bối cảnh Hệ thống / System Context ```mermaid +%%{init: {'theme': 'dark'}}%% C4Context title Sơ đồ Bối cảnh Security Architecture @@ -423,6 +419,7 @@ C4Context ## Kiến trúc Database / Database Architecture ```mermaid +%%{init: {'theme': 'dark'}}%% erDiagram User ||--o{ Session : has User ||--o{ UserRole : has @@ -744,12 +741,12 @@ graph TD SIEM -.->|Alerts| Alerts - style LB fill:#d4edda - style WAF fill:#fff3cd - style DB fill:#f0e1ff - style Cache fill:#fff4e1 - style Vault fill:#f8d7da - style SIEM fill:#e1f5ff + style LB fill:#15803d,stroke:#fff,stroke-width:2px,color:#fff + style WAF fill:#c2410c,stroke:#fff,stroke-width:2px,color:#fff + style DB fill:#7e22ce,stroke:#fff,stroke-width:2px,color:#fff + style Cache fill:#1f2937,stroke:#fff,stroke-width:2px,color:#fff + style Vault fill:#b91c1c,stroke:#fff,stroke-width:2px,color:#fff + style SIEM fill:#1d4ed8,stroke:#fff,stroke-width:2px,color:#fff ``` ### VI: Chiến lược Triển khai @@ -770,7 +767,6 @@ graph TD **Security Configuration**: ```yaml -# K8s Network Policy apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: @@ -837,7 +833,6 @@ spec: **Security Configuration**: ```yaml -# K8s Network Policy apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: @@ -916,15 +911,12 @@ spec: **Application Code**: ```typescript -// VI: Custom metrics cho authentication -// EN: Custom metrics for authentication - import { Counter, Histogram, Gauge } from 'prom-client'; export const loginAttempts = new Counter({ name: 'auth_login_attempts_total', help: 'Total login attempts', - labelNames: ['status'] // success, failure + labelNames: ['status'] }); export const loginDuration = new Histogram({ @@ -936,17 +928,15 @@ export const loginDuration = new Histogram({ export const permissionChecks = new Counter({ name: 'auth_permission_checks_total', help: 'Total permission checks', - labelNames: ['result'] // granted, denied + labelNames: ['result'] }); export const suspiciousActivities = new Counter({ name: 'auth_suspicious_activities_total', help: 'Suspicious activities detected', - labelNames: ['type'] // new_device, new_ip, brute_force, etc. + labelNames: ['type'] }); -// VI: Sử dụng -// EN: Usage loginAttempts.inc({ status: 'success' }); loginDuration.observe(duration); permissionChecks.inc({ result: 'granted' }); @@ -955,14 +945,10 @@ suspiciousActivities.inc({ type: 'new_device' }); **Alerting Rules**: ```yaml -# VI: Quy tắc cảnh báo bảo mật -# EN: Security alerting rules - groups: - name: security_alerts interval: 30s rules: - # High failed login rate - alert: HighFailedLoginRate expr: rate(auth_login_attempts_total{status="failure"}[5m]) > 10 for: 2m @@ -972,7 +958,6 @@ groups: summary: "High failed login rate detected" description: "Failed login rate is {{ $value }}/sec" - # Brute force attack - alert: BruteForceAttack expr: | sum by (user_id) ( @@ -985,7 +970,6 @@ groups: summary: "Potential brute force attack" description: "User {{ $labels.user_id }} has > 5 failed logins/min" - # Account lockout spike - alert: AccountLockoutSpike expr: rate(auth_account_lockouts_total[5m]) > 5 for: 2m @@ -995,7 +979,6 @@ groups: summary: "Account lockout spike detected" description: "Lockout rate is {{ $value }}/sec" - # Suspicious activity - alert: SuspiciousActivity expr: rate(auth_suspicious_activities_total[5m]) > 10 for: 1m @@ -1005,7 +988,6 @@ groups: summary: "Suspicious activity detected" description: "Suspicious activity rate: {{ $value }}/sec" - # Anomaly detection - alert: AnomalyDetected expr: auth_anomalies_detected_total > 0 for: 1m @@ -1015,7 +997,6 @@ groups: summary: "Security anomaly detected" description: "{{ $labels.anomaly_type }} detected" - # Permission denied spike - alert: PermissionDeniedSpike expr: rate(auth_permission_checks_total{result="denied"}[5m]) > 50 for: 2m @@ -1035,9 +1016,6 @@ groups: **Logging**: ```typescript -// VI: Structured logging cho security events -// EN: Structured logging for security events - logger.info('Login successful', { eventType: 'auth.login.success', userId: user.id, @@ -1102,14 +1080,12 @@ logger.error('Login failed', { **Application Code**: ```typescript -// Custom metrics for authentication - import { Counter, Histogram, Gauge } from 'prom-client'; export const loginAttempts = new Counter({ name: 'auth_login_attempts_total', help: 'Total login attempts', - labelNames: ['status'] // success, failure + labelNames: ['status'] }); export const loginDuration = new Histogram({ @@ -1121,16 +1097,15 @@ export const loginDuration = new Histogram({ export const permissionChecks = new Counter({ name: 'auth_permission_checks_total', help: 'Total permission checks', - labelNames: ['result'] // granted, denied + labelNames: ['result'] }); export const suspiciousActivities = new Counter({ name: 'auth_suspicious_activities_total', help: 'Suspicious activities detected', - labelNames: ['type'] // new_device, new_ip, brute_force, etc. + labelNames: ['type'] }); -// Usage loginAttempts.inc({ status: 'success' }); loginDuration.observe(duration); permissionChecks.inc({ result: 'granted' }); @@ -1139,13 +1114,10 @@ suspiciousActivities.inc({ type: 'new_device' }); **Alerting Rules**: ```yaml -# Security alerting rules - groups: - name: security_alerts interval: 30s rules: - # High failed login rate - alert: HighFailedLoginRate expr: rate(auth_login_attempts_total{status="failure"}[5m]) > 10 for: 2m @@ -1155,7 +1127,6 @@ groups: summary: "High failed login rate detected" description: "Failed login rate is {{ $value }}/sec" - # Brute force attack - alert: BruteForceAttack expr: | sum by (user_id) ( @@ -1168,7 +1139,6 @@ groups: summary: "Potential brute force attack" description: "User {{ $labels.user_id }} has > 5 failed logins/min" - # Account lockout spike - alert: AccountLockoutSpike expr: rate(auth_account_lockouts_total[5m]) > 5 for: 2m @@ -1178,7 +1148,6 @@ groups: summary: "Account lockout spike detected" description: "Lockout rate is {{ $value }}/sec" - # Suspicious activity - alert: SuspiciousActivity expr: rate(auth_suspicious_activities_total[5m]) > 10 for: 1m @@ -1188,7 +1157,6 @@ groups: summary: "Suspicious activity detected" description: "Suspicious activity rate: {{ $value }}/sec" - # Anomaly detection - alert: AnomalyDetected expr: auth_anomalies_detected_total > 0 for: 1m @@ -1198,7 +1166,6 @@ groups: summary: "Security anomaly detected" description: "{{ $labels.anomaly_type }} detected" - # Permission denied spike - alert: PermissionDeniedSpike expr: rate(auth_permission_checks_total{result="denied"}[5m]) > 50 for: 2m @@ -1218,8 +1185,6 @@ groups: **Logging**: ```typescript -// Structured logging for security events - logger.info('Login successful', { eventType: 'auth.login.success', userId: user.id, @@ -1256,7 +1221,6 @@ logger.error('Login failed', { - Retention policy compliance - Anonymization after retention period - ## Tài liệu Liên quan / Related Documentation - [System Design](./system-design.md) - Kiến trúc tổng thể / Overall architecture @@ -1267,3 +1231,88 @@ logger.error('Login failed', { **Cập nhật Lần cuối / Last Updated**: 2026-01-07 **Tác giả / Authors**: GoodGo Security Team + +## Quick Tips + +### 🎨 Color Palette Reference (Dark Theme) + +| Node Type | Color | Hex | Tailwind | Usage | Example | +|-----------|-------|-----|----------|-------|---------| +| **Primary** | Blue | `#1d4ed8` | `bg-blue-700` | Core components, Identity, IAM, Permission Checks | JWT Validation, Auth Services | +| **Secondary**| Purple| `#7e22ce` | `bg-purple-700`| Data stores, Database, Queues | PostgreSQL, Redis | +| **Success** | Green | `#15803d` | `bg-green-700` | Valid, Allowed, Safe, Completed, TLS | Allow Request, Secure Connection | +| **Error** | Red | `#b91c1c` | `bg-red-700` | Blocked, Invalid, Failed, Critical, Encryption Keys | Block + Alert, Vault, Critical Errors | +| **Warning** | Orange| `#c2410c` | `bg-orange-700`| MFA, Suspicious, Latency, Cache, Alerts | Require MFA, WAF, SIEM | +| **Base** | Grey | `#1f2937` | `bg-gray-800` | External systems, Infrastructure, Logs | Cache, Monitoring | + +### 🔧 Mermaid Common Issues + +| Issue | Sign | Solution | +|-------|------|----------| +| **Parse Error** | Unexpected PIPE/character | Check for missing spaces after `graph TD` | +| **Box Not Showing** | Node missing in diagram | Verify node syntax: `Node[Label]` | +| **Color Not Applied** | Node has no color | Add style: `style Node fill:#1d4ed8,stroke:#fff,stroke-width:2px,color:#fff` | +| **Arrow Issues** | Connection not visible | Check arrow syntax: `-->` (solid), `-.->`(dashed) | +| **Text Not Readable** | Dark text on dark bg | Always use `color:#fff` (white text) | +| **Subgraph Issues** | Broken layout | Ensure proper indentation and `end` statement | + +### 📊 Color Pattern Quick Reference + +```mermaid +graph LR + A[Input] --> B[Process] + B --> C{Decision} + C -->|Yes| D[Success] + C -->|No| E[Error] + + style A fill:#1f2937,stroke:#fff,stroke-width:2px,color:#fff + style B fill:#1d4ed8,stroke:#fff,stroke-width:2px,color:#fff + style C fill:#c2410c,stroke:#fff,stroke-width:2px,color:#fff + style D fill:#15803d,stroke:#fff,stroke-width:2px,color:#fff + style E fill:#b91c1c,stroke:#fff,stroke-width:2px,color:#fff +``` + +**Pattern Template**: +``` +style NodeName fill:#color,stroke:#fff,stroke-width:2px,color:#fff +``` + +### 🎯 Visual Indicators + +| Emoji | Meaning | Color | Usage | +|-------|---------|-------|-------| +| ✅ | Secure/Allowed/Valid | Green (#15803d) | Successful auth, allowed access | +| ❌ | Blocked/Denied/Invalid | Red (#b91c1c) | Failed login, access denied | +| ⚠️ | Warning/MFA/Alert | Orange (#c2410c) | Require MFA, suspicious activity | +| 🔒 | Encrypted/Secure | Blue/Purple (#1d4ed8, #7e22ce) | Encrypted data, secure channel | +| ☁️ | Cloud/External | Grey (#1f2937) | External services, cloud resources | +| 🔑 | Authentication | Orange (#c2410c) | Auth tokens, keys, credentials | +| 🛡️ | Security Layer | Green (#15803d) | Security controls, protection | +| 📊 | Monitoring | Blue (#1d4ed8) | Metrics, dashboards, logs | + +### 🚀 Diagram Best Practices + +1. **Always use dark palette** with white text (`color:#fff`) +2. **Consistent stroke**: `stroke:#fff,stroke-width:2px` +3. **Logical color mapping**: + - Blue = Core processes + - Green = Success/Allow + - Red = Error/Block + - Orange = Warning/MFA + - Purple = Data stores + - Grey = External systems + +4. **Readable labels**: Use `
` for line breaks in labels +5. **Arrow clarity**: Solid (`-->`) for main flow, dashed (`-.->`) for secondary/async +6. **Subgraph organization**: Group related components + +### 🔍 Mermaid Debugging Checklist + +- [ ] Graph type declared? (`graph TD`, `sequenceDiagram`, `erDiagram`) +- [ ] All nodes have unique IDs? +- [ ] Arrows have proper syntax? (`-->`, `-.->`, `-.->>`) +- [ ] Style definitions after graph content? +- [ ] All subgraphs have `end` statement? +- [ ] Labels escaped properly? (use quotes for special chars) +- [ ] Color values correct? (6-digit hex with #) +- [ ] White text applied? (`color:#fff`) diff --git a/docs/vi/architecture/service-communication.md b/docs/vi/architecture/service-communication.md deleted file mode 100644 index 3028fa06..00000000 --- a/docs/vi/architecture/service-communication.md +++ /dev/null @@ -1,58 +0,0 @@ -# Giao Tiếp Giữa Các Service - -## Các Mẫu Giao Tiếp - -### Giao Tiếp Đồng Bộ (HTTP/REST) - -Các service giao tiếp đồng bộ qua HTTP REST APIs thông qua Traefik API Gateway. - -**Ví dụ:** -```typescript -// Web App -> Auth Service -const response = await fetch('http://api.goodgo.vn/api/v1/auth/login', { - method: 'POST', - body: JSON.stringify({ email, password }), -}); -``` - -### Giao Tiếp Service-to-Service - -Các service có thể giao tiếp trực tiếp qua mạng nội bộ: - -```typescript -// Auth Service -> Notification Service (tương lai) -const response = await fetch('http://notification-service:5003/api/v1/notifications', { - method: 'POST', - headers: { 'X-Service-Auth': process.env.INTERNAL_API_KEY }, - body: JSON.stringify({ userId, message }), -}); -``` - -## API Gateway Routing - -Traefik định tuyến requests dựa trên: -- Host header (`api.goodgo.vn`) -- Path prefix (`/api/v1/auth`) - -## Xử Lý Lỗi - -Tất cả services tuân theo định dạng error response nhất quán: - -```json -{ - "success": false, - "error": { - "code": "AUTH_001", - "message": "Invalid credentials", - "details": {} - }, - "timestamp": "2024-01-01T00:00:00.000Z" -} -``` - -## Retry và Circuit Breaker - -Triển khai trong tương lai: -- Exponential backoff cho retries -- Circuit breaker pattern cho fault tolerance -- Fallback mechanisms diff --git a/docs/vi/architecture/system-design.md b/docs/vi/architecture/system-design.md index 4a3de7e9..9ca46a90 100644 --- a/docs/vi/architecture/system-design.md +++ b/docs/vi/architecture/system-design.md @@ -1,11 +1,26 @@ -# Thiết Kế Hệ Thống / System Design +# Kiến Trúc Thiết Kế Hệ Thống -> **VI**: Kiến trúc tổng thể của nền tảng GoodGo Microservices -> **EN**: Overall architecture of GoodGo Microservices Platform +Kiến trúc tổng thể của nền tảng GoodGo Microservices -## Sơ đồ Tổng quan / Overview Diagram +## Sơ đồ Tổng quan ```mermaid +%%{init: {'theme':'base', 'themeVariables': { + 'primaryTextColor':'#000', + 'secondaryTextColor':'#000', + 'tertiaryTextColor':'#000', + 'textColor':'#000', + 'mainBkg':'#fff', + 'secondBkg':'#fff', + 'lineColor':'#333', + 'border1':'#000', + 'border2':'#000', + 'clusterBkg':'#fff', + 'clusterBorder':'#000', + 'titleColor':'#000', + 'edgeLabelBackground':'#fff', + 'nodeTextColor':'#fff' +}}}%% graph TD subgraph "Client Layer" Web[Web App
Next.js] @@ -70,16 +85,22 @@ graph TD Loki --> Grafana Jaeger --> Grafana - style Traefik fill:#e1f5ff - style DB fill:#f0e1ff - style Cache fill:#fff4e1 - style Kafka fill:#d4edda - style Grafana fill:#ffe1e1 + style Web fill:#1565c0,stroke:#fff,stroke-width:2px,color:#fff + style Mobile fill:#1565c0,stroke:#fff,stroke-width:2px,color:#fff + style Traefik fill:#0f4c81,stroke:#fff,stroke-width:2px,color:#fff + style IAM fill:#283593,stroke:#fff,stroke-width:2px,color:#fff + style Future1 fill:#4527a0,stroke:#fff,stroke-width:2px,color:#fff + style Future2 fill:#4527a0,stroke:#fff,stroke-width:2px,color:#fff + style DB fill:#5e35b1,stroke:#fff,stroke-width:2px,color:#fff + style Cache fill:#ef6c00,stroke:#fff,stroke-width:2px,color:#fff + style Kafka fill:#2e7d32,stroke:#fff,stroke-width:2px,color:#fff + style Prom fill:#c62828,stroke:#fff,stroke-width:2px,color:#fff + style Loki fill:#d84315,stroke:#fff,stroke-width:2px,color:#fff + style Jaeger fill:#e65100,stroke:#fff,stroke-width:2px,color:#fff + style Grafana fill:#b71c1c,stroke:#fff,stroke-width:2px,color:#fff ``` -## Mô tả Kiến trúc / Architecture Description - -### VI: Phần Tiếng Việt +## Mô tả Kiến trúc GoodGo Platform được xây dựng theo kiến trúc microservices với các nguyên tắc sau: @@ -99,27 +120,7 @@ GoodGo Platform được xây dựng theo kiến trúc microservices với các - **API Gateway**: Traefik - **Observability**: Prometheus, Grafana, Loki, Jaeger -### EN: English Section - -GoodGo Platform is built on microservices architecture with the following principles: - -**Core Principles**: -1. **Service Independence**: Each service has its own database and can be deployed independently -2. **API Gateway Pattern**: Traefik handles routing, load balancing, and cross-cutting concerns -3. **Shared Libraries**: Common functionality extracted into shared packages (`@goodgo/*`) -4. **Infrastructure as Code**: All infrastructure configuration is version controlled -5. **Observability First**: Complete metrics, logging, and distributed tracing - -**Technology Stack**: -- **Frontend**: Next.js 14+ (App Router), Flutter 3.x -- **Backend**: Node.js 20+, TypeScript 5+, Express -- **Database**: Neon PostgreSQL (serverless) -- **Cache**: Redis (multi-layer caching) -- **Message Broker**: Apache Kafka -- **API Gateway**: Traefik -- **Observability**: Prometheus, Grafana, Loki, Jaeger - -## Bối cảnh Hệ thống / System Context +## Bối cảnh Hệ thống ```mermaid C4Context @@ -146,7 +147,7 @@ C4Context Rel(platform, monitoring, "Sends metrics, logs, traces", "HTTP, gRPC") ``` -## Thành phần / Components +## Thành phần ### Frontend Layer @@ -262,7 +263,7 @@ C4Context **Vị trí File**: [`infra/kafka/`](file:///Users/velikho/Desktop/WORKING/Base/infra/kafka) -## Luồng Dữ liệu / Data Flow +## Luồng Dữ liệu ```mermaid sequenceDiagram @@ -297,7 +298,7 @@ sequenceDiagram Note over Kafka: Event consumers process asynchronously ``` -**VI Giải thích chi tiết**: +**Giải thích chi tiết**: 1. **Request**: Client gửi HTTPS request đến Traefik 2. **Gateway Processing**: Traefik thực hiện rate limiting và JWT validation 3. **Routing**: Traefik route request đến service phù hợp @@ -308,18 +309,7 @@ sequenceDiagram 8. **Event Publishing**: Publish domain events đến Kafka (async) 9. **Response**: Trả về response cho client qua Traefik -**EN Detailed Explanation**: -1. **Request**: Client sends HTTPS request to Traefik -2. **Gateway Processing**: Traefik performs rate limiting and JWT validation -3. **Routing**: Traefik routes request to appropriate service -4. **Cache Check**: Service checks L1 (memory) → L2 (Redis) cache -5. **Database Query**: If cache miss, query from PostgreSQL -6. **Cache Update**: Store result in cache with appropriate TTL -7. **Business Logic**: Process business logic -8. **Event Publishing**: Publish domain events to Kafka (async) -9. **Response**: Return response to client via Traefik - -## Kiến trúc Database / Database Architecture +## Kiến trúc Database ```mermaid erDiagram @@ -383,27 +373,21 @@ erDiagram } ``` -**VI Mô tả**: +**Mô tả**: - **Database per Service**: Mỗi service có database schema riêng - **Shared Database**: Hiện tại sử dụng shared Neon PostgreSQL, schemas isolated bằng Prisma - **Event Sourcing**: Audit events lưu tất cả thay đổi quan trọng - **Soft Delete**: Sử dụng `deletedAt` field thay vì hard delete -**EN Description**: -- **Database per Service**: Each service has its own database schema -- **Shared Database**: Currently using shared Neon PostgreSQL, schemas isolated by Prisma -- **Event Sourcing**: Audit events store all important changes -- **Soft Delete**: Use `deletedAt` field instead of hard delete - -## Quyết định Thiết kế / Design Decisions +## Quyết định Thiết kế ### Quyết định 1: Microservices Architecture -**VI Bối cảnh**: Cần khả năng scale độc lập và deploy riêng biệt cho từng business domain +**Bối cảnh**: Cần khả năng scale độc lập và deploy riêng biệt cho từng business domain -**VI Quyết định**: Sử dụng microservices architecture với database per service pattern +**Quyết định**: Sử dụng microservices architecture với database per service pattern -**VI Hậu quả**: +**Hậu quả**: - ✅ **Tích cực**: - Scale độc lập từng service theo nhu cầu - Deploy riêng biệt, giảm risk khi release @@ -415,35 +399,17 @@ erDiagram - Distributed transactions phức tạp (Saga pattern) - Operational overhead (monitoring, deployment) -**VI Các lựa chọn thay thế**: Monolith, Modular Monolith - -**EN Context**: Need independent scaling and deployment for each business domain - -**EN Decision**: Use microservices architecture with database per service pattern - -**EN Consequences**: -- ✅ **Positive**: - - Independent scaling per service based on demand - - Independent deployment, reduced release risk - - Fault isolation - one service failure doesn't affect entire system - - Technology flexibility - each service can use different tech stack -- ❌ **Negative**: - - More complex than monolith (distributed systems challenges) - - Eventual consistency instead of strong consistency - - Complex distributed transactions (Saga pattern) - - Operational overhead (monitoring, deployment) - -**EN Alternatives**: Monolith, Modular Monolith +**Các lựa chọn thay thế**: Monolith, Modular Monolith --- ### Quyết định 2: Traefik as API Gateway -**VI Bối cảnh**: Cần reverse proxy, load balancing, SSL termination, và service discovery +**Bối cảnh**: Cần reverse proxy, load balancing, SSL termination, và service discovery -**VI Quyết định**: Sử dụng Traefik thay vì Kong, NGINX, hoặc AWS API Gateway +**Quyết định**: Sử dụng Traefik thay vì Kong, NGINX, hoặc AWS API Gateway -**VI Hậu quả**: +**Hậu quả**: - ✅ **Tích cực**: - Auto service discovery với Docker labels - Dynamic configuration không cần restart @@ -455,35 +421,17 @@ erDiagram - Plugin ecosystem nhỏ hơn Kong - Community nhỏ hơn NGINX -**VI Các lựa chọn thay thế**: Kong, NGINX, AWS API Gateway, Envoy - -**EN Context**: Need reverse proxy, load balancing, SSL termination, and service discovery - -**EN Decision**: Use Traefik instead of Kong, NGINX, or AWS API Gateway - -**EN Consequences**: -- ✅ **Positive**: - - Auto service discovery with Docker labels - - Dynamic configuration without restart - - Built-in Let's Encrypt support - - Native Kubernetes integration - - Built-in metrics and tracing -- ❌ **Negative**: - - Higher learning curve than NGINX - - Smaller plugin ecosystem than Kong - - Smaller community than NGINX - -**EN Alternatives**: Kong, NGINX, AWS API Gateway, Envoy +**Các lựa chọn thay thế**: Kong, NGINX, AWS API Gateway, Envoy --- ### Quyết định 3: Neon PostgreSQL (Serverless) -**VI Bối cảnh**: Cần database với auto-scaling, branching, và cost-effective cho development +**Bối cảnh**: Cần database với auto-scaling, branching, và cost-effective cho development -**VI Quyết định**: Sử dụng Neon PostgreSQL (serverless) thay vì self-hosted PostgreSQL hoặc AWS RDS +**Quyết định**: Sử dụng Neon PostgreSQL (serverless) thay vì self-hosted PostgreSQL hoặc AWS RDS -**VI Hậu quả**: +**Hậu quả**: - ✅ **Tích cực**: - Auto-scaling theo usage - Database branching cho dev/staging @@ -495,27 +443,9 @@ erDiagram - Cold start latency (mitigated by connection pooling) - Limited control over database configuration -**VI Các lựa chọn thay thế**: Self-hosted PostgreSQL, AWS RDS, Google Cloud SQL +**Các lựa chọn thay thế**: Self-hosted PostgreSQL, AWS RDS, Google Cloud SQL -**EN Context**: Need database with auto-scaling, branching, and cost-effective for development - -**EN Decision**: Use Neon PostgreSQL (serverless) instead of self-hosted PostgreSQL or AWS RDS - -**EN Consequences**: -- ✅ **Positive**: - - Auto-scaling based on usage - - Database branching for dev/staging - - Pay-per-use pricing model - - Automatic backups and point-in-time recovery - - No infrastructure management -- ❌ **Negative**: - - Vendor lock-in - - Cold start latency (mitigated by connection pooling) - - Limited control over database configuration - -**EN Alternatives**: Self-hosted PostgreSQL, AWS RDS, Google Cloud SQL - -## Đặc điểm Hiệu suất / Performance Characteristics +## Đặc điểm Hiệu suất | Chỉ số / Metric | Mục tiêu / Target | Ghi chú / Notes | |-----------------|-------------------|-----------------| @@ -529,7 +459,7 @@ erDiagram | **Service Availability** | > 99.9% | Monthly uptime target | | **Error Rate** | < 1% | 4xx + 5xx errors | -**VI Tối ưu hóa Hiệu suất**: +**Tối ưu hóa Hiệu suất**: - Multi-layer caching (L1: Memory, L2: Redis) - Connection pooling cho database - Pagination cho list endpoints (max 100 items) @@ -537,17 +467,7 @@ erDiagram - Async event publishing (fire-and-forget) - CDN cho static assets (Next.js) -**EN Performance Optimizations**: -- Multi-layer caching (L1: Memory, L2: Redis) -- Connection pooling for database -- Pagination for list endpoints (max 100 items) -- Database indexes for frequently queried fields -- Async event publishing (fire-and-forget) -- CDN for static assets (Next.js) - -## Cân nhắc Bảo mật / Security Considerations - -### VI: Phần Tiếng Việt +## Cân nhắc Bảo mật **Authentication**: - JWT với RS256 (asymmetric signing) @@ -588,50 +508,7 @@ erDiagram - 7-year retention cho compliance - Immutable audit logs - Correlation IDs cho request tracing - -### EN: English Section - -**Authentication**: -- JWT with RS256 (asymmetric signing) -- Access token: 15 minutes expiry -- Refresh token: 7 days expiry, rotation on use -- httpOnly cookies for token storage -- MFA support (TOTP, backup codes) - -**Authorization**: -- RBAC (Role-Based Access Control) -- ABAC (Attribute-Based Access Control) -- Permission format: `resource:action:scope` -- Permission caching (5 min TTL) -- Zero-trust device validation - -**Network Security**: -- TLS 1.2+ enforcement -- HTTPS-only (HSTS headers) -- Rate limiting: 100 req/15min (standard), 10 req/hour (strict) -- CORS whitelist from environment variables -- Network policies (Kubernetes) - -**Data Protection**: -- AES-256-GCM encryption for PII at rest -- bcrypt (cost 12) for password hashing -- SHA-256 hashing for tokens before storage -- Database encryption at rest (Neon) -- TLS in-transit for all connections - -**Secrets Management**: -- Kubernetes secrets for production -- Environment variables validation with Zod -- No hardcoded secrets in code -- Quarterly secret rotation - -**Audit Trail**: -- Event sourcing for all auth events -- 7-year retention for compliance -- Immutable audit logs -- Correlation IDs for request tracing - -## Triển khai / Deployment +## Triển khai ```mermaid graph TD @@ -697,14 +574,21 @@ graph TD Loki --> Grafana Jaeger --> Grafana - style LB fill:#e1f5ff - style DB fill:#f0e1ff - style Redis fill:#fff4e1 - style Kafka fill:#d4edda - style Grafana fill:#ffe1e1 + style LB fill:#1565c0,stroke:#fff,stroke-width:2px,color:#fff + style Traefik fill:#0f4c81,stroke:#fff,stroke-width:2px,color:#fff + style IAM fill:#283593,stroke:#fff,stroke-width:2px,color:#fff + style Service1 fill:#4527a0,stroke:#fff,stroke-width:2px,color:#fff + style Service2 fill:#4527a0,stroke:#fff,stroke-width:2px,color:#fff + style DB fill:#5e35b1,stroke:#fff,stroke-width:2px,color:#fff + style Redis fill:#ef6c00,stroke:#fff,stroke-width:2px,color:#fff + style Kafka fill:#2e7d32,stroke:#fff,stroke-width:2px,color:#fff + style Prom fill:#c62828,stroke:#fff,stroke-width:2px,color:#fff + style Loki fill:#d84315,stroke:#fff,stroke-width:2px,color:#fff + style Jaeger fill:#e65100,stroke:#fff,stroke-width:2px,color:#fff + style Grafana fill:#b71c1c,stroke:#fff,stroke-width:2px,color:#fff ``` -### VI: Chiến lược Triển khai +### Chiến lược Triển khai **Deployment Strategy**: - Rolling updates (maxSurge: 1, maxUnavailable: 0) @@ -736,43 +620,9 @@ graph TD - **Local**: Docker Compose - **Staging**: Kubernetes cluster (shared) - **Production**: Kubernetes cluster (dedicated) +## Giám sát & Khả năng quan sát -### EN: Deployment Strategy - -**Deployment Strategy**: -- Rolling updates (maxSurge: 1, maxUnavailable: 0) -- Zero-downtime deployments -- Blue-green deployment for major releases -- Canary deployment for high-risk changes - -**Auto-scaling**: -- Horizontal Pod Autoscaler (HPA) - - Min replicas: 2 - - Max replicas: 10 - - Target CPU: 70% - - Target Memory: 80% - -**Resource Allocation**: -| Service | Requests | Limits | -|---------|----------|--------| -| **Microservices** | 256Mi RAM, 250m CPU | 512Mi RAM, 500m CPU | -| **Traefik** | 512Mi RAM, 500m CPU | 1Gi RAM, 1000m CPU | -| **Redis** | 2Gi RAM, 1 CPU | 4Gi RAM, 2 CPU | -| **Prometheus** | 4Gi RAM, 2 CPU | 8Gi RAM, 4 CPU | - -**Health Checks**: -- Liveness probe: `/health/live` (K8s restarts if fails) -- Readiness probe: `/health/ready` (K8s removes from LB if fails) -- Startup probe: `/health/live` (initial delay 30s) - -**Environments**: -- **Local**: Docker Compose -- **Staging**: Kubernetes cluster (shared) -- **Production**: Kubernetes cluster (dedicated) - -## Giám sát & Khả năng quan sát / Monitoring & Observability - -### VI: Chỉ số Chính +### Chỉ số Chính **Application Metrics**: - `http_requests_total` - Total HTTP requests (counter) @@ -838,82 +688,16 @@ graph TD - Trace sampling: 10% in production, 100% in staging - Span attributes: service, operation, user_id, correlation_id -### EN: Key Metrics +## Tài liệu Liên quan -**Application Metrics**: -- `http_requests_total` - Total HTTP requests (counter) -- `http_request_duration_seconds` - Request duration (histogram) -- `http_requests_active` - Active requests (gauge) -- `cache_hits_total` / `cache_misses_total` - Cache performance -- `db_query_duration_seconds` - Database query duration +- [Event-Driven Architecture](./event-driven-architecture.md) - Kiến trúc hướng sự kiện +- [Caching Architecture](./caching-architecture.md) - Chiến lược caching +- [Security Architecture](./security-architecture.md) - Kiến trúc bảo mật +- [Observability Architecture](./observability-architecture.md) - Khả năng quan sát +- [Data Consistency Patterns](./data-consistency-patterns.md) - Mẫu nhất quán dữ liệu +- [Microservices Communication](./microservices-communication.md) - Giao tiếp microservices -**Infrastructure Metrics**: -- CPU usage, Memory usage per pod -- Network I/O, Disk I/O -- Pod restart count -- Node resource utilization - -**Business Metrics**: -- User registrations per day -- Login success/failure rate -- API usage by endpoint -- Error rate by service - -**Health Checks**: -- `/health/live` - Liveness probe (service running?) -- `/health/ready` - Readiness probe (ready for traffic?) -- `/metrics` - Prometheus metrics endpoint - -**Alerting Rules**: -```yaml -# High error rate -- alert: HighErrorRate - expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.05 - for: 2m - severity: warning - -# High latency -- alert: HighLatency - expr: histogram_quantile(0.95, http_request_duration_seconds_bucket) > 0.5 - for: 5m - severity: warning - -# Service down -- alert: ServiceDown - expr: up == 0 - for: 1m - severity: critical - -# High memory usage -- alert: HighMemoryUsage - expr: container_memory_usage_bytes / container_spec_memory_limit_bytes > 0.85 - for: 5m - severity: warning -``` - -**Logging**: -- Structured JSON logging with Winston -- Correlation IDs for request tracing -- Log levels: error, warn, info, debug -- Log aggregation with Loki -- 7 days retention - -**Distributed Tracing**: -- OpenTelemetry instrumentation -- Jaeger backend -- Trace sampling: 10% in production, 100% in staging -- Span attributes: service, operation, user_id, correlation_id - -## Tài liệu Liên quan / Related Documentation - -- [Event-Driven Architecture](./event-driven-architecture.md) - Kiến trúc hướng sự kiện / Event-driven architecture -- [Caching Architecture](./caching-architecture.md) - Chiến lược caching / Caching strategy -- [Security Architecture](./security-architecture.md) - Kiến trúc bảo mật / Security architecture -- [Observability Architecture](./observability-architecture.md) - Khả năng quan sát / Observability -- [Data Consistency Patterns](./data-consistency-patterns.md) - Mẫu nhất quán dữ liệu / Data consistency patterns -- [Microservices Communication](./microservices-communication.md) - Giao tiếp microservices / Microservices communication - -## Tham khảo / References +## Tham khảo - [Microservices Patterns](https://microservices.io/patterns/index.html) - Microservices pattern catalog - [Twelve-Factor App](https://12factor.net/) - Best practices for cloud-native apps @@ -923,6 +707,27 @@ graph TD --- -**Cập nhật Lần cuối / Last Updated**: 2026-01-07 -**Tác giả / Authors**: GoodGo Architecture Team -**Người review / Reviewers**: GoodGo Development Team +**Cập nhật Lần cuối**: 2026-01-07 +**Tác giả**: GoodGo Architecture Team +**Người review**: GoodGo Development Team + +## Quick Tips + +### Mermaid Common Issues +- **Arrow Syntax**: Use `-->` for solid arrows, `-.->` for dotted arrows. +- **Node IDs**: Avoid spaces/special chars in IDs (e.g., `Node-A` not `Node A`). +- **Subgraphs**: Ensure `subgraph` names are unique and descriptive. + +### Color Pattern Quick Reference +| Element | Dark Color | Text Color | +|---------|------------|------------| +| **Blue (Primary)** | `#0f4c81` | `#ffffff` | +| **Purple (DB)** | `#5e35b1` | `#ffffff` | +| **Orange (Cache)** | `#ef6c00` | `#ffffff` | +| **Green (Success)** | `#2e7d32` | `#ffffff` | +| **Red (Alert)** | `#c62828` | `#ffffff` | + +### Visual Indicators +- ✅ **Khuyên dùng** +- ❌ **Không khuyên dùng** +- ⚠️ **Cảnh báo**