From 4ccfb220bee6be54d0e7e0deb8564e94ae6673ed Mon Sep 17 00:00:00 2001 From: Ho Ngoc Hai Date: Thu, 8 Jan 2026 10:13:36 +0700 Subject: [PATCH] . --- .../docs/en/api/openapi/iam-service.yaml | 102 -- .../en/architecture/caching-architecture.md | 382 ----- .../architecture/data-consistency-patterns.md | 684 --------- .../architecture/event-driven-architecture.md | 375 ----- .../docs/en/architecture/iam-proposal.md | 339 ----- .../microservices-communication.md | 363 ----- .../observability-architecture.md | 334 ----- .../en/architecture/security-architecture.md | 569 -------- .../en/architecture/service-communication.md | 58 - .../docs/en/architecture/system-design.md | 929 ------------ .../content/docs/en/guides/deployment.md | 234 --- .../content/docs/en/guides/development.md | 211 --- .../content/docs/en/guides/getting-started.md | 214 --- .../content/docs/en/guides/iam-migration.md | 209 --- .../docs/en/guides/kubernetes-local.md | 273 ---- .../docs/en/guides/local-deployment.md | 263 ---- .../docs/en/guides/local-development.md | 250 ---- .../content/docs/en/guides/mermaid.md | 567 -------- .../content/docs/en/guides/neon-database.md | 215 --- .../content/docs/en/guides/observability.md | 89 -- .../content/docs/en/guides/troubleshooting.md | 218 --- .../docs/en/onboarding/new-developer-guide.md | 89 -- .../docs/en/runbooks/incident-response.md | 65 - .../docs/en/runbooks/rollback-procedure.md | 71 - .../web-docs/content/docs/en/skills/README.md | 222 --- .../content/docs/en/skills/api-design.md | 602 -------- .../docs/en/skills/api-gateway-advanced.md | 204 --- .../docs/en/skills/api-versioning-strategy.md | 404 ------ .../docs/en/skills/caching-patterns.md | 369 ----- .../docs/en/skills/cicd-advanced-patterns.md | 544 ------- .../content/docs/en/skills/comment-code.md | 489 ------- .../en/skills/configuration-management.md | 131 -- .../en/skills/data-consistency-patterns.md | 363 ----- .../content/docs/en/skills/database-prisma.md | 571 -------- .../docs/en/skills/deployment-kubernetes.md | 618 -------- .../content/docs/en/skills/documentation.md | 507 ------- .../docs/en/skills/error-handling-patterns.md | 460 ------ .../en/skills/event-driven-architecture.md | 452 ------ .../docs/en/skills/infrastructure-as-code.md | 224 --- .../en/skills/inter-service-communication.md | 280 ---- .../microservices-development-process.md | 660 --------- .../docs/en/skills/middleware-patterns.md | 413 ------ .../en/skills/observability-monitoring.md | 658 --------- .../en/skills/performance-optimization.md | 158 -- .../content/docs/en/skills/project-rules.md | 400 ------ .../docs/en/skills/repository-pattern.md | 334 ----- .../docs/en/skills/resilience-patterns.md | 239 ---- .../content/docs/en/skills/security.md | 925 ------------ .../en/skills/service-discovery-registry.md | 323 ----- .../docs/en/skills/service-layer-patterns.md | 338 ----- .../docs/en/skills/testing-patterns.md | 589 -------- .../content/docs/en/templates/README.md | 482 ------- .../content/docs/en/templates/architecture.md | 228 --- .../content/docs/en/templates/guide.md | 257 ---- .../docs/en/templates/mermaid-guide.md | 539 ------- .../docs/en/templates/skill-pattern.md | 475 ------ .../docs/vi/api/openapi/iam-service.yaml | 102 -- .../vi/architecture/caching-architecture.md | 1104 -------------- .../architecture/data-consistency-patterns.md | 745 ---------- .../architecture/event-driven-architecture.md | 639 --------- .../docs/vi/architecture/iam-proposal.md | 339 ----- .../microservices-communication.md | 383 ----- .../observability-architecture.md | 450 ------ .../vi/architecture/security-architecture.md | 1269 ----------------- .../vi/architecture/service-communication.md | 58 - .../docs/vi/architecture/system-design.md | 928 ------------ .../content/docs/vi/guides/deployment.md | 234 --- .../content/docs/vi/guides/development.md | 211 --- .../content/docs/vi/guides/getting-started.md | 214 --- .../content/docs/vi/guides/iam-migration.md | 204 --- .../docs/vi/guides/kubernetes-local.md | 273 ---- .../docs/vi/guides/local-deployment.md | 261 ---- .../docs/vi/guides/local-development.md | 250 ---- .../content/docs/vi/guides/mermaid.md | 583 -------- .../content/docs/vi/guides/neon-database.md | 215 --- .../content/docs/vi/guides/observability.md | 89 -- .../content/docs/vi/guides/troubleshooting.md | 218 --- .../docs/vi/onboarding/new-developer-guide.md | 89 -- .../docs/vi/runbooks/incident-response.md | 65 - .../docs/vi/runbooks/rollback-procedure.md | 71 - .../web-docs/content/docs/vi/skills/README.md | 277 ---- .../content/docs/vi/skills/api-design.md | 578 -------- .../docs/vi/skills/api-gateway-advanced.md | 197 --- .../docs/vi/skills/api-versioning-strategy.md | 456 ------ .../docs/vi/skills/caching-patterns.md | 420 ------ .../docs/vi/skills/cicd-advanced-patterns.md | 578 -------- .../content/docs/vi/skills/comment-code.md | 682 --------- .../vi/skills/configuration-management.md | 119 -- .../vi/skills/data-consistency-patterns.md | 378 ----- .../content/docs/vi/skills/database-prisma.md | 683 --------- .../docs/vi/skills/deployment-kubernetes.md | 585 -------- .../content/docs/vi/skills/documentation.md | 602 -------- .../docs/vi/skills/error-handling-patterns.md | 488 ------- .../vi/skills/event-driven-architecture.md | 505 ------- .../docs/vi/skills/infrastructure-as-code.md | 244 ---- .../vi/skills/inter-service-communication.md | 303 ---- .../microservices-development-process.md | 344 ----- .../docs/vi/skills/middleware-patterns.md | 462 ------ .../vi/skills/observability-monitoring.md | 723 ---------- .../vi/skills/performance-optimization.md | 154 -- .../content/docs/vi/skills/project-rules.md | 564 -------- .../docs/vi/skills/repository-pattern.md | 421 ------ .../docs/vi/skills/resilience-patterns.md | 217 --- .../content/docs/vi/skills/security.md | 853 ----------- .../vi/skills/service-discovery-registry.md | 344 ----- .../docs/vi/skills/service-layer-patterns.md | 416 ------ .../docs/vi/skills/testing-patterns.md | 841 ----------- .../content/docs/vi/templates/README.md | 205 --- .../content/docs/vi/templates/architecture.md | 227 --- .../content/docs/vi/templates/guide.md | 256 ---- .../docs/vi/templates/mermaid-guide.md | 543 ------- .../docs/vi/templates/skill-pattern.md | 474 ------ apps/web-docs/src/app/[locale]/layout.tsx | 2 +- .../src/components/docs/DocsContentClient.tsx | 17 +- .../src/components/docs/DocsSearch.tsx | 29 +- 115 files changed, 28 insertions(+), 43714 deletions(-) delete mode 100644 apps/web-docs/content/docs/en/api/openapi/iam-service.yaml delete mode 100644 apps/web-docs/content/docs/en/architecture/caching-architecture.md delete mode 100644 apps/web-docs/content/docs/en/architecture/data-consistency-patterns.md delete mode 100644 apps/web-docs/content/docs/en/architecture/event-driven-architecture.md delete mode 100644 apps/web-docs/content/docs/en/architecture/iam-proposal.md delete mode 100644 apps/web-docs/content/docs/en/architecture/microservices-communication.md delete mode 100644 apps/web-docs/content/docs/en/architecture/observability-architecture.md delete mode 100644 apps/web-docs/content/docs/en/architecture/security-architecture.md delete mode 100644 apps/web-docs/content/docs/en/architecture/service-communication.md delete mode 100644 apps/web-docs/content/docs/en/architecture/system-design.md delete mode 100644 apps/web-docs/content/docs/en/guides/deployment.md delete mode 100644 apps/web-docs/content/docs/en/guides/development.md delete mode 100644 apps/web-docs/content/docs/en/guides/getting-started.md delete mode 100644 apps/web-docs/content/docs/en/guides/iam-migration.md delete mode 100644 apps/web-docs/content/docs/en/guides/kubernetes-local.md delete mode 100644 apps/web-docs/content/docs/en/guides/local-deployment.md delete mode 100644 apps/web-docs/content/docs/en/guides/local-development.md delete mode 100644 apps/web-docs/content/docs/en/guides/mermaid.md delete mode 100644 apps/web-docs/content/docs/en/guides/neon-database.md delete mode 100644 apps/web-docs/content/docs/en/guides/observability.md delete mode 100644 apps/web-docs/content/docs/en/guides/troubleshooting.md delete mode 100644 apps/web-docs/content/docs/en/onboarding/new-developer-guide.md delete mode 100644 apps/web-docs/content/docs/en/runbooks/incident-response.md delete mode 100644 apps/web-docs/content/docs/en/runbooks/rollback-procedure.md delete mode 100644 apps/web-docs/content/docs/en/skills/README.md delete mode 100644 apps/web-docs/content/docs/en/skills/api-design.md delete mode 100644 apps/web-docs/content/docs/en/skills/api-gateway-advanced.md delete mode 100644 apps/web-docs/content/docs/en/skills/api-versioning-strategy.md delete mode 100644 apps/web-docs/content/docs/en/skills/caching-patterns.md delete mode 100644 apps/web-docs/content/docs/en/skills/cicd-advanced-patterns.md delete mode 100644 apps/web-docs/content/docs/en/skills/comment-code.md delete mode 100644 apps/web-docs/content/docs/en/skills/configuration-management.md delete mode 100644 apps/web-docs/content/docs/en/skills/data-consistency-patterns.md delete mode 100644 apps/web-docs/content/docs/en/skills/database-prisma.md delete mode 100644 apps/web-docs/content/docs/en/skills/deployment-kubernetes.md delete mode 100644 apps/web-docs/content/docs/en/skills/documentation.md delete mode 100644 apps/web-docs/content/docs/en/skills/error-handling-patterns.md delete mode 100644 apps/web-docs/content/docs/en/skills/event-driven-architecture.md delete mode 100644 apps/web-docs/content/docs/en/skills/infrastructure-as-code.md delete mode 100644 apps/web-docs/content/docs/en/skills/inter-service-communication.md delete mode 100644 apps/web-docs/content/docs/en/skills/microservices-development-process.md delete mode 100644 apps/web-docs/content/docs/en/skills/middleware-patterns.md delete mode 100644 apps/web-docs/content/docs/en/skills/observability-monitoring.md delete mode 100644 apps/web-docs/content/docs/en/skills/performance-optimization.md delete mode 100644 apps/web-docs/content/docs/en/skills/project-rules.md delete mode 100644 apps/web-docs/content/docs/en/skills/repository-pattern.md delete mode 100644 apps/web-docs/content/docs/en/skills/resilience-patterns.md delete mode 100644 apps/web-docs/content/docs/en/skills/security.md delete mode 100644 apps/web-docs/content/docs/en/skills/service-discovery-registry.md delete mode 100644 apps/web-docs/content/docs/en/skills/service-layer-patterns.md delete mode 100644 apps/web-docs/content/docs/en/skills/testing-patterns.md delete mode 100644 apps/web-docs/content/docs/en/templates/README.md delete mode 100644 apps/web-docs/content/docs/en/templates/architecture.md delete mode 100644 apps/web-docs/content/docs/en/templates/guide.md delete mode 100644 apps/web-docs/content/docs/en/templates/mermaid-guide.md delete mode 100644 apps/web-docs/content/docs/en/templates/skill-pattern.md delete mode 100644 apps/web-docs/content/docs/vi/api/openapi/iam-service.yaml delete mode 100644 apps/web-docs/content/docs/vi/architecture/caching-architecture.md delete mode 100644 apps/web-docs/content/docs/vi/architecture/data-consistency-patterns.md delete mode 100644 apps/web-docs/content/docs/vi/architecture/event-driven-architecture.md delete mode 100644 apps/web-docs/content/docs/vi/architecture/iam-proposal.md delete mode 100644 apps/web-docs/content/docs/vi/architecture/microservices-communication.md delete mode 100644 apps/web-docs/content/docs/vi/architecture/observability-architecture.md delete mode 100644 apps/web-docs/content/docs/vi/architecture/security-architecture.md delete mode 100644 apps/web-docs/content/docs/vi/architecture/service-communication.md delete mode 100644 apps/web-docs/content/docs/vi/architecture/system-design.md delete mode 100644 apps/web-docs/content/docs/vi/guides/deployment.md delete mode 100644 apps/web-docs/content/docs/vi/guides/development.md delete mode 100644 apps/web-docs/content/docs/vi/guides/getting-started.md delete mode 100644 apps/web-docs/content/docs/vi/guides/iam-migration.md delete mode 100644 apps/web-docs/content/docs/vi/guides/kubernetes-local.md delete mode 100644 apps/web-docs/content/docs/vi/guides/local-deployment.md delete mode 100644 apps/web-docs/content/docs/vi/guides/local-development.md delete mode 100644 apps/web-docs/content/docs/vi/guides/mermaid.md delete mode 100644 apps/web-docs/content/docs/vi/guides/neon-database.md delete mode 100644 apps/web-docs/content/docs/vi/guides/observability.md delete mode 100644 apps/web-docs/content/docs/vi/guides/troubleshooting.md delete mode 100644 apps/web-docs/content/docs/vi/onboarding/new-developer-guide.md delete mode 100644 apps/web-docs/content/docs/vi/runbooks/incident-response.md delete mode 100644 apps/web-docs/content/docs/vi/runbooks/rollback-procedure.md delete mode 100644 apps/web-docs/content/docs/vi/skills/README.md delete mode 100644 apps/web-docs/content/docs/vi/skills/api-design.md delete mode 100644 apps/web-docs/content/docs/vi/skills/api-gateway-advanced.md delete mode 100644 apps/web-docs/content/docs/vi/skills/api-versioning-strategy.md delete mode 100644 apps/web-docs/content/docs/vi/skills/caching-patterns.md delete mode 100644 apps/web-docs/content/docs/vi/skills/cicd-advanced-patterns.md delete mode 100644 apps/web-docs/content/docs/vi/skills/comment-code.md delete mode 100644 apps/web-docs/content/docs/vi/skills/configuration-management.md delete mode 100644 apps/web-docs/content/docs/vi/skills/data-consistency-patterns.md delete mode 100644 apps/web-docs/content/docs/vi/skills/database-prisma.md delete mode 100644 apps/web-docs/content/docs/vi/skills/deployment-kubernetes.md delete mode 100644 apps/web-docs/content/docs/vi/skills/documentation.md delete mode 100644 apps/web-docs/content/docs/vi/skills/error-handling-patterns.md delete mode 100644 apps/web-docs/content/docs/vi/skills/event-driven-architecture.md delete mode 100644 apps/web-docs/content/docs/vi/skills/infrastructure-as-code.md delete mode 100644 apps/web-docs/content/docs/vi/skills/inter-service-communication.md delete mode 100644 apps/web-docs/content/docs/vi/skills/microservices-development-process.md delete mode 100644 apps/web-docs/content/docs/vi/skills/middleware-patterns.md delete mode 100644 apps/web-docs/content/docs/vi/skills/observability-monitoring.md delete mode 100644 apps/web-docs/content/docs/vi/skills/performance-optimization.md delete mode 100644 apps/web-docs/content/docs/vi/skills/project-rules.md delete mode 100644 apps/web-docs/content/docs/vi/skills/repository-pattern.md delete mode 100644 apps/web-docs/content/docs/vi/skills/resilience-patterns.md delete mode 100644 apps/web-docs/content/docs/vi/skills/security.md delete mode 100644 apps/web-docs/content/docs/vi/skills/service-discovery-registry.md delete mode 100644 apps/web-docs/content/docs/vi/skills/service-layer-patterns.md delete mode 100644 apps/web-docs/content/docs/vi/skills/testing-patterns.md delete mode 100644 apps/web-docs/content/docs/vi/templates/README.md delete mode 100644 apps/web-docs/content/docs/vi/templates/architecture.md delete mode 100644 apps/web-docs/content/docs/vi/templates/guide.md delete mode 100644 apps/web-docs/content/docs/vi/templates/mermaid-guide.md delete mode 100644 apps/web-docs/content/docs/vi/templates/skill-pattern.md diff --git a/apps/web-docs/content/docs/en/api/openapi/iam-service.yaml b/apps/web-docs/content/docs/en/api/openapi/iam-service.yaml deleted file mode 100644 index 411997e5..00000000 --- a/apps/web-docs/content/docs/en/api/openapi/iam-service.yaml +++ /dev/null @@ -1,102 +0,0 @@ -openapi: 3.0.0 -info: - title: IAM Service API - version: 1.0.0 - description: Identity and Access Management Service API - -servers: - - url: http://localhost/api/v1 - description: Local development - - url: https://api.goodgo.vn/api/v1 - description: Production - -paths: - /auth/register: - post: - summary: Register new user - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - email: - type: string - format: email - password: - type: string - minLength: 6 - confirmPassword: - type: string - responses: - '201': - description: User registered successfully - '400': - description: Validation error - - /auth/login: - post: - summary: Login user - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - email: - type: string - format: email - password: - type: string - responses: - '200': - description: Login successful - '401': - description: Invalid credentials - - /auth/logout: - post: - summary: Logout user - security: - - bearerAuth: [] - responses: - '200': - description: Logout successful - - /auth/refresh: - post: - summary: Refresh access token - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - refreshToken: - type: string - responses: - '200': - description: Token refreshed - '401': - description: Invalid refresh token - - /users/me: - get: - summary: Get current user - security: - - bearerAuth: [] - responses: - '200': - description: User information - '401': - description: Unauthorized - -components: - securitySchemes: - bearerAuth: - type: http - scheme: bearer - bearerFormat: JWT diff --git a/apps/web-docs/content/docs/en/architecture/caching-architecture.md b/apps/web-docs/content/docs/en/architecture/caching-architecture.md deleted file mode 100644 index d8f5e6a3..00000000 --- a/apps/web-docs/content/docs/en/architecture/caching-architecture.md +++ /dev/null @@ -1,382 +0,0 @@ -# Caching Architecture - -> Multi-layer caching strategy for optimal performance - -## Overview Diagram - -```mermaid -graph TD - Request[API Request] --> L1{L1 Cache
Memory} - - L1 -->|Hit| Return1[Return
< 1ms] - L1 -->|Miss| L2{L2 Cache
Redis} - - L2 -->|Hit| WarmL1[Warm L1] - WarmL1 --> Return2[Return
< 5ms] - - L2 -->|Miss| DB[(Database)] - DB --> StoreL2[Store L2 + L1] - StoreL2 --> Return3[Return
< 50ms] - - style L1 fill:#d4edda - style L2 fill:#fff4e1 - style DB fill:#f0e1ff -``` - -## System Context - -```mermaid -C4Context - title Caching System Context - - System(service, "Microservice", "Client service using cache") - System_Ext(db, "Neon PostgreSQL", "Primary database") - - Boundary(caching, "Caching Layer") { - System(l1, "L1 Cache", "In-memory NodeCache") - System(l2, "L2 Cache", "Redis Cluster") - } - - Rel(service, l1, "Reads/Writes", "In-process") - Rel(service, l2, "Reads/Writes", "Redis Protocol") - Rel(l1, l2, "Fills from", "On miss") - Rel(l2, db, "Cache aside", "On miss") -``` - -### Context Description -- **Service**: Communicates directly with L1 Cache (in-memory) for lowest latency. -- **L1 Cache**: Local cache, not shared, automatic expiration (short TTL). -- **L2 Cache**: Shared Redis cluster, holds data longer and syncs across instances. -- **Database**: Source of truth, accessed only on cache miss. - -## Architecture Description - -### Multi-Layer Caching - -GoodGo platform uses 2-layer caching for performance: - -**L1 Cache (Memory)**: -- In-memory cache per service instance -- Very fast access (< 1ms) -- Limited capacity (10k keys default) -- Short TTL (60 seconds default, max 5 minutes) -- Not shared across instances - -**L2 Cache (Redis)**: -- Shared distributed cache -- Fast access (< 5ms) -- Large capacity -- Longer TTL (configurable, typically 5-15 minutes) -- Shared across all service instances - -**Cache Flow**: -``` -Request → L1 → L2 → Database - ↓ ↓ ↓ ↓ -40-50% 80-90% 10-20% Cache miss -hit rate hit rate rate -``` - -## Cache Implementation - -### Multi-Layer Cache Service - -```typescript -export class MultiLayerCache { - private l1Cache: NodeCache; - private l2Cache: Redis; - - constructor() { - // L1: Memory cache - this.l1Cache = new NodeCache({ - stdTTL: 60, // 60 seconds default - maxKeys: 10000, // Max 10k keys - checkperiod: 120 // Check for expired keys every 2min - }); - - // L2: Redis cache - this.l2Cache = new Redis({ - host: process.env.REDIS_HOST, - port: parseInt(process.env.REDIS_PORT), - db: 0 - }); - } - - async get(key: string): Promise { - // Try L1 first - const l1Value = this.l1Cache.get(key); - if (l1Value) { - logger.debug('L1 cache hit', { key }); - return l1Value; - } - - // Try L2 - const l2Value = await this.l2Cache.get(key); - if (l2Value) { - logger.debug('L2 cache hit', { key }); - const parsed = JSON.parse(l2Value) as T; - - // Warm L1 cache - this.l1Cache.set(key, parsed); - return parsed; - } - - logger.debug('Cache miss', { key }); - return null; - } - - async set(key: string, value: any, ttl: number = 300): Promise { - // Store in both L1 and L2 - this.l1Cache.set(key, value, Math.min(ttl, 300)); // L1 max 5min - await this.l2Cache.setex(key, ttl, JSON.stringify(value)); - } - - async del(key: string): Promise { - this.l1Cache.del(key); - await this.l2Cache.del(key); - } - - async invalidatePattern(pattern: string): Promise { - // L1: Clear all (simple approach) - this.l1Cache.flushAll(); - - // L2: Delete by pattern - const keys = await this.l2Cache.keys(pattern); - if (keys.length > 0) { - await this.l2Cache.del(...keys); - } - } -} -``` - -### Cache Key Naming - -**Pattern**: `{service}:{entity}:{identifier}:{sub-resource}` - -**Examples**: -```typescript -const keys = { - user: (userId: string) => `iam:user:${userId}`, - userPermissions: (userId: string) => `iam:user:${userId}:permissions`, - userRoles: (userId: string) => `iam:user:${userId}:roles`, - session: (sessionId: string) => `iam:session:${sessionId}`, -}; - -// Usage -const user = await cache.get(keys.user('user_123')); -const permissions = await cache.get(keys.userPermissions('user_123')); -``` - -## TTL Strategies - -```mermaid -graph LR - subgraph "TTL Tiers" - Short[Short TTL
60-300s
Frequently changing] - Medium[Medium TTL
300-1800s
Moderately changing] - Long[Long TTL
1800-3600s
Rarely changing] - end - - Short --> Permissions[User Permissions] - Short --> Sessions[Session Data] - - Medium --> UserProfiles[User Profiles] - Medium --> OrgData[Organization Data] - - Long --> Config[Static Config] - Long --> RefData[Reference Data] - - style Short fill:#f8d7da - style Medium fill:#fff3cd - style Long fill:#d4edda -``` - -**TTL Guidelines**: -| Data Type | TTL | Reason | -|-----------|-----|--------| -| User permissions | 5 min | Security-sensitive | -| Session data | Varies | Based on session length | -| User profiles | 10 min | Moderate update frequency | -| Organization data | 15 min | Infrequent updates | -| Static config | 30-60 min | Very stable | -| Reference data | 1-2 hours | Almost never changes | - -## Cache Invalidation - -```mermaid -sequenceDiagram - participant API - participant Service - participant Cache - participant DB - - API->>Service: Update User - Service->>DB: UPDATE user - DB-->>Service: Success - - Service->>Cache: Invalidate user:123 - Service->>Cache: Invalidate user:123:permissions - Service->>Cache: Invalidate user:123:roles - Cache-->>Service: Cleared - - Service-->>API: Success - - Note over Service,Cache: Next request will fetch fresh data -``` - -**Invalidation Strategies**: - -```typescript -// 1. Single key invalidation -async updateUser(userId: string, data: UpdateUserDto): Promise { - const user = await userRepository.update(userId, data); - - // Invalidate user cache - await cache.del(cacheKeys.user(userId)); - - return user; -} - -// 2. Pattern-based invalidation -async updateUserRole(userId: string, roleId: string): Promise { - await userRoleRepository.assign(userId, roleId); - - // Invalidate all user-related cache - await cache.invalidatePattern(`iam:user:${userId}:*`); -} - -// 3. Time-based invalidation (TTL expiry) -// Automatically handled by cache -``` - -## Cache Warming - -```typescript -// Preload frequently accessed data -async warmCache(): Promise { - logger.info('Starting cache warming'); - - // Warm user permissions for active users - const activeUsers = await userRepository.findActive({ limit: 1000 }); - - for (const user of activeUsers) { - const permissions = await rbacService.getUserPermissions(user.id); - - await cache.set( - cacheKeys.userPermissions(user.id), - permissions, - 300 // 5 minutes - ); - } - - logger.info('Cache warming completed', { count: activeUsers.length }); -} - -// Run on service startup -warmCache().catch(err => logger.error('Cache warming failed', { err })); -``` - -## Design Decisions - -### Decision 1: Multi-layer Caching (L1 + L2) - -**Context**: Need to reduce load on Redis and achieve ultra-low latency for hot data. -**Decision**: Use combination of L1 (NodeCache) and L2 (Redis). -**Consequences**: -- ✅ Latency < 1ms for 40-50% requests. -- ✅ Reduced network traffic to Redis. -- ❌ Synchronization complexity (L1 might be stale for short duration). - -## Performance Characteristics - -### Performance Targets -| Metric | Target | Notes | -|--------|--------|-------| -| **L1 Hit Latency** | < 0.5ms | In-memory lookup | -| **L2 Hit Latency** | < 5ms | Network RTT + Redis processing | -| **Combine Hit Rate** | > 90% | L1 + L2 combined | -| **L1 Capacity** | 10k items | Per instance limit to protect heap | -| **Cache Warmup Time** | < 30s | At service startup | - -## Security Considerations - -### Cache Security -- **Encryption**: Sensitive data (PII) MUST be encrypted before storing in L2 Redis (AES-256). L1 can store plaintext as it is in process memory (unless memory dump). -- **Isolation**: Redis instance protected by password and Network Policy (allow internal K8s traffic only). -- **TLS**: Connect to Redis via TLS 1.2+. -- **Data Sanitization**: Do not cache entire user objects if they contain password hashes or secrets. - -## Deployment - -```mermaid -graph TD - subgraph "Kubernetes Pod" - Service[Microservice Container] - L1[L1 Cache (RAM)] - Service --- L1 - end - - subgraph "Infrastructure" - RedisMaster[Redis Master] - RedisSlave1[Redis Slave 1] - RedisSlave2[Redis Slave 2] - end - - Service -->|Write| RedisMaster - Service -->|Read| RedisSlave1 - Service -->|Read| RedisSlave2 - - RedisMaster -.->|Replication| RedisSlave1 - RedisMaster -.->|Replication| RedisSlave2 - - style Service fill:#e1f5ff - style L1 fill:#d4edda - style RedisMaster fill:#fff4e1 -``` - -**Deployment Description**: -- **L1**: Embedded directly in Microservice process, scales with number of Pods. -- **L2**: Redis Cluster (or Sentinel) with at least 3 nodes for High Availability. -- **Connection Pooling**: Use ioredis with connection pooling for efficient connection management. - -## Monitoring & Observability - -### Monitoring Metrics -- **Metrics**: Prometheus metrics for hit rate, miss rate, latency, memory usage. -- **Logs**: Log cache miss/hit at debug level (sampled), log connection errors at error level. -- **Health Checks**: Readiness probe checks connection to Redis. - -### Monitoring Code - -**Cache Hit Rates**: -```typescript -// Track cache performance -export class CacheMetrics { - // ... Prometheus Implementation ... -} -``` - -**Expected Performance**: -| Metric | L1 Cache | L2 Cache | Database | -|--------|----------|----------|----------| -| Latency | < 1ms | < 5ms | < 50ms | -| Hit Rate | 40-50% | 80-90% | - | -| Capacity | 10k keys | Unlimited | - | - -## Best Practices - -**DO**: -- ✅ Use cache for frequently accessed data -- ✅ Set appropriate TTLs based on data change frequency -- ✅ Invalidate cache on data updates -- ✅ Use cache key namespacing -- ✅ Monitor cache hit rates -- ✅ Warm cache on startup for critical data - -**DON'T**: -- ❌ Cache data that changes very frequently -- ❌ Set TTL too long (stale data risk) -- ❌ Set TTL too short (negates cache benefit) -- ❌ Cache sensitive data without encryption -- ❌ Ignore cache invalidation on updates -- ❌ Use cache as primary data store diff --git a/apps/web-docs/content/docs/en/architecture/data-consistency-patterns.md b/apps/web-docs/content/docs/en/architecture/data-consistency-patterns.md deleted file mode 100644 index 8e8ac26b..00000000 --- a/apps/web-docs/content/docs/en/architecture/data-consistency-patterns.md +++ /dev/null @@ -1,684 +0,0 @@ -# Data Consistency Patterns - -> Patterns for maintaining data consistency in distributed microservices architecture - -## Overview Diagram - -```mermaid -graph TD - subgraph "Consistency Patterns" - Saga[Saga Pattern
Distributed Transactions] - Outbox[Outbox Pattern
Reliable Events] - Idempotency[Idempotency
Retry Safety] - OptimisticLock[Optimistic Locking
Concurrent Updates] - CQRS[CQRS
Read/Write Separation] - end - - Service1[Service A] --> Saga - Service2[Service B] --> Outbox - Service3[Service C] --> Idempotency - - Saga --> EventualConsistency[Eventual Consistency] - Outbox --> EventualConsistency - Idempotency --> EventualConsistency - OptimisticLock --> StrongConsistency[Strong Consistency] - CQRS --> EventualConsistency - - style Saga fill:#e1f5ff - style Outbox fill:#fff4e1 - style Idempotency fill:#f0e1ff - style CQRS fill:#d4edda -``` - -## Architecture Description - -### Architecture Overview - -GoodGo platform uses multiple consistency patterns to handle distributed data: - -**Core Challenges**: -- No distributed transactions (2PC too slow) -- Services own their data (database per service) -- Network failures can cause partial completion -- Need to maintain data integrity across services - -**Pattern Selection**: -- **Saga**: For multi-service workflows -- **Outbox**: For guaranteed event publishing -- **Idempotency**: For safe retries -- **Optimistic Locking**: For concurrent updates -- **CQRS**: For read/write optimization - -## System Context - -```mermaid -C4Context - title System Context for Data Consistency in GoodGo Platform - - Person(user, "User", "End user performing actions") - - System_Boundary(goodgo, "GoodGo Microservices") { - System(order_service, "Order Service", "Manages orders with Saga") - System(payment_service, "Payment Service", "Processes payments") - System(inventory_service, "Inventory Service", "Manages stock") - System(saga_orchestrator, "Saga Orchestrator", "Coordinates distributed transactions") - System(outbox_processor, "Outbox Processor", "Publishes events reliably") - } - - System_Ext(db_order, "Order DB", "PostgreSQL with Outbox table") - System_Ext(db_payment, "Payment DB", "PostgreSQL with version field") - System_Ext(db_inventory, "Inventory DB", "PostgreSQL") - System_Ext(kafka, "Event Bus", "Kafka - Event streaming") - System_Ext(redis, "Cache", "Redis - Idempotency keys") - - Rel(user, order_service, "Places order", "HTTPS") - Rel(order_service, saga_orchestrator, "Starts saga", "Internal") - Rel(saga_orchestrator, payment_service, "Process payment", "HTTP") - Rel(saga_orchestrator, inventory_service, "Reserve stock", "HTTP") - - Rel(order_service, db_order, "Writes + Outbox", "SQL") - Rel(payment_service, db_payment, "Updates with version", "SQL") - Rel(inventory_service, db_inventory, "Reads/Writes", "SQL") - - Rel(outbox_processor, db_order, "Polls outbox", "SQL") - Rel(outbox_processor, kafka, "Publishes events", "Kafka Protocol") - Rel(order_service, redis, "Checks idempotency key", "Redis Protocol") - - UpdateRelStyle(saga_orchestrator, payment_service, $lineColor="red", $textColor="red") - UpdateRelStyle(saga_orchestrator, inventory_service, $lineColor="red", $textColor="red") -``` - -The GoodGo platform uses a database-per-service architecture where each service owns its data. Data consistency across services is achieved through patterns like Saga (for coordinated workflows), Outbox (for reliable event publishing), Idempotency (for safe retries), and Optimistic Locking (for concurrent updates). These patterns enable eventual consistency while maintaining data integrity. - -## Saga Pattern - -```mermaid -sequenceDiagram - participant Orchestrator - participant OrderService - participant PaymentService - participant InventoryService - - Orchestrator->>OrderService: 1. Create Order - OrderService-->>Orchestrator: Order Created - - Orchestrator->>PaymentService: 2. Process Payment - PaymentService-->>Orchestrator: Payment Success - - Orchestrator->>InventoryService: 3. Reserve Inventory - - alt Inventory Reserved - InventoryService-->>Orchestrator: Success - Orchestrator->>Orchestrator: Complete Saga ✓ - else Inventory Failed - InventoryService-->>Orchestrator: Failed ✗ - Orchestrator->>PaymentService: Compensate: Refund - PaymentService-->>Orchestrator: Refunded - Orchestrator->>OrderService: Compensate: Cancel Order - OrderService-->>Orchestrator: Cancelled - end -``` - -**Description**: Saga manages distributed transactions as sequence of local transactions with compensation. - -**Implementation**: -```typescript -// Saga orchestrator -class OrderSaga { - async execute(orderData: OrderData): Promise { - const sagaContext = { - orderId: null, - paymentId: null, - inventoryId: null - }; - - try { - // Step 1: Create order - sagaContext.orderId = await orderService.create(orderData); - - // Step 2: Process payment - sagaContext.paymentId = await paymentService.process(orderData.payment); - - // Step 3: Reserve inventory - sagaContext.inventoryId = await inventoryService.reserve(orderData.items); - - // All success - commit - await this.completeSaga(sagaContext); - } catch (error) { - // Compensate in reverse order - await this.compensate(sagaContext, error); - throw error; - } - } - - private async compensate(context: SagaContext, error: Error): Promise { - if (context.inventoryId) { - await inventoryService.release(context.inventoryId); - } - if (context.paymentId) { - await paymentService.refund(context.paymentId); - } - if (context.orderId) { - await orderService.cancel(context.orderId); - } - } -} -``` - -## Outbox Pattern - -```mermaid -sequenceDiagram - participant Service - participant DB as Database - participant OutboxTable as Outbox Table - participant Processor as Outbox Processor - participant Kafka - - Service->>DB: Begin Transaction - Service->>DB: Update Business Data - Service->>OutboxTable: Insert Event - Service->>DB: Commit Transaction - - loop Every 5 seconds - Processor->>OutboxTable: SELECT unpublished events - OutboxTable-->>Processor: Events - Processor->>Kafka: Publish Events - Kafka-->>Processor: Ack - Processor->>OutboxTable: Mark as published - end -``` - -**Description**: Guarantees event publishing by storing events in database within same transaction as business data. - -**Implementation**: -```typescript -// Store event in outbox -async createUser(userData: CreateUserDto): Promise { - return await prisma.$transaction(async (tx) => { - // Business operation - const user = await tx.user.create({ data: userData }); - - // Store event in outbox (same transaction) - await tx.outbox.create({ - data: { - aggregateId: user.id, - aggregateType: 'User', - eventType: 'user.created.v1', - payload: JSON.stringify(user), - createdAt: new Date() - } - }); - - return user; - }); -} - -// Outbox processor (runs periodically) -async processOutbox(): Promise { - const events = await prisma.outbox.findMany({ - where: { publishedAt: null }, - take: 100 - }); - - for (const event of events) { - try { - await kafkaProducer.send({ - topic: event.eventType, - messages: [{ value: event.payload }] - }); - - await prisma.outbox.update({ - where: { id: event.id }, - data: { publishedAt: new Date() } - }); - } catch (error) { - logger.error('Failed to publish event', { event, error }); - } - } -} -``` - -## Idempotency Pattern - -```mermaid -graph LR - Request1[Request with
Idempotency Key] - Request2[Retry with
Same Key] - - Request1 --> Check{Key Exists?} - Check -->|No| Process[Process Request] - Check -->|Yes| Return[Return Cached Result] - - Process --> Store[Store Result
with Key] - Store --> Response1[Response] - - Request2 --> Check - Return --> Response2[Same Response] - - style Check fill:#fff3cd - style Store fill:#d4edda -``` - -**Description**: Ensures operations can be safely retried without side effects by using idempotency keys. - -**Implementation**: -```typescript -// Idempotency middleware -async function idempotentOperation( - key: string, - operation: () => Promise, - ttl: number = 86400 // 24 hours -): Promise { - // Check if already processed - const cached = await redis.get(`idempotency:${key}`); - if (cached) { - return JSON.parse(cached); - } - - // Process operation - const result = await operation(); - - // Store result - await redis.setex(`idempotency:${key}`, ttl, JSON.stringify(result)); - - return result; -} - -// Usage in controller -async createPayment(req: Request, res: Response): Promise { - const idempotencyKey = req.headers['idempotency-key'] as string; - - if (!idempotencyKey) { - return res.status(400).json({ error: 'Idempotency-Key header required' }); - } - - const result = await idempotentOperation( - idempotencyKey, - () => paymentService.process(req.body) - ); - - res.json({ success: true, data: result }); -} -``` - -## Optimistic Locking - -```mermaid -sequenceDiagram - participant User1 - participant User2 - participant Service - participant DB - - User1->>Service: Read (version=1) - User2->>Service: Read (version=1) - - User1->>Service: Update (version=1) - Service->>DB: UPDATE WHERE version=1 - DB-->>Service: Success, version→2 - Service-->>User1: Success - - User2->>Service: Update (version=1) - Service->>DB: UPDATE WHERE version=1 - DB-->>Service: No rows updated - Service-->>User2: Conflict - version mismatch - User2->>Service: Read (version=2) - User2->>Service: Update (version=2) - Service-->>User2: Success -``` - -**Description**: Prevents lost updates by checking version on update. - -**Implementation**: -```prisma -// Prisma schema -model User { - id String @id @default(cuid()) - email String @unique - name String - version Int @default(1) // Version field -} -``` - -```typescript -// Update with optimistic locking -async updateUser(userId: string, data: UpdateUserDto, currentVersion: number): Promise { - const result = await prisma.user.updateMany({ - where: { - id: userId, - version: currentVersion // Check version - }, - data: { - ...data, - version: { increment: 1 } // Increment version - } - }); - - if (result.count === 0) { - throw new ConflictError('Version mismatch - data was modified by another user'); - } - - return await prisma.user.findUnique({ where: { id: userId } }); -} -``` - -## CQRS Pattern - -```mermaid -graph LR - subgraph "Write Side" - Command[Command] --> WriteModel[Write Model
Normalized] - WriteModel --> Events[Domain Events] - end - - subgraph "Read Side" - Events --> Projection[Event Projection] - Projection --> ReadModel[Read Model
Denormalized] - Query[Query] --> ReadModel - end - - WriteModel --> DB1[(Write DB)] - ReadModel --> DB2[(Read DB
Optimized)] - - style WriteModel fill:#f0e1ff - style ReadModel fill:#d4edda -``` - -**Description**: Separates read and write models for optimal performance. - -## Performance Characteristics - -Performance metrics and optimization strategies for data consistency patterns. - -| Pattern | Latency Impact | Throughput | Notes | -|---------|----------------|------------|-------| -| **Saga Execution** | 500ms - 2s | 100-500 sagas/s | Depends on number of steps and compensation | -| **Outbox Processing** | < 100ms | 10,000 events/s | Async processing, minimal user impact | -| **Idempotency Check** | < 10ms | 50,000 checks/s | Redis lookup, very fast | -| **Optimistic Lock Update** | < 50ms | 5,000 updates/s | Single DB operation with version check | -| **CQRS Projection** | 100ms - 1s | 1,000 events/s | Event processing to read model | -| **Compensation Execution** | 200ms - 1s | Varies | Rollback operations in saga | - -### Performance Optimization Strategies - -**Saga Pattern**: -- Minimize number of steps (< 5 steps ideal) -- Parallel execution where possible -- Cache service responses -- Set appropriate timeouts (30s default) - -**Outbox Pattern**: -- Batch process outbox events (100-500 per batch) -- Index `publishedAt` column for performance -- Archive processed events periodically -- Use connection pooling for Kafka - -**Idempotency**: -- Use Redis for fast key lookups -- Set TTL to 24-48 hours -- Hash long idempotency keys -- Clean expired keys regularly - -**Optimistic Locking**: -- Works best for low-contention scenarios -- Implement retry with exponential backoff -- Monitor conflict rates (should be < 5%) -- Consider pessimistic locking if conflicts > 10% - -## Security Considerations - -Security measures for protecting data consistency operations. - -### Saga Security - -**Compensation Protection**: -- Validate saga execution permissions at each step -- Encrypt sensitive data in saga context -- Log all saga executions for audit -- Implement timeout to prevent hanging sagas - -```typescript -// Secure saga context -interface SecureSagaContext { - sagaId: string; - userId: string; // User who initiated - permissions: string[]; // Required permissions - encryptedData: string; // Encrypted sensitive data - auditLog: AuditEntry[]; // Audit trail -} -``` - -### Outbox Security - -**Event Payload Encryption**: -- Encrypt PII (Personally Identifiable Information) before storing in outbox -- Use AES-256-GCM for event payload encryption -- Decrypt only when publishing to Kafka -- Rotate encryption keys quarterly - -**Access Control**: -- Restrict outbox table access to outbox processor only -- Use database roles and permissions -- Monitor outbox table access patterns - -### Idempotency Security - -**Key Security**: -- Use cryptographic hashing for idempotency keys (SHA-256) -- Include user context in key generation -- Validate key ownership before processing -- Clear keys on user logout for sensitive operations - -```typescript -// Secure idempotency key generation -function generateIdempotencyKey( - operation: string, - userId: string, - data: any -): string { - const payload = JSON.stringify({ operation, userId, data }); - return crypto.createHash('sha256').update(payload).digest('hex'); -} -``` - -### Optimistic Locking Security - -**Version Tampering Prevention**: -- Validate version field on server-side only -- Never accept version from client directly -- Log version conflicts for security monitoring -- Rate limit update attempts per user - -## Deployment - -How data consistency patterns are deployed and scaled. - -```mermaid -graph TD - subgraph "Production Deployment" - subgraph "Order Service Cluster" - OS1[Order Service\nPod 1] - OS2[Order Service\nPod 2] - OS3[Order Service\nPod 3] - end - - subgraph "Saga Orchestrator" - SO1[Saga Orchestrator\nPod 1] - SO2[Saga Orchestrator\nPod 2] - end - - subgraph "Outbox Processor" - OP1[Outbox Processor\nPod 1] - OP2[Outbox Processor\nPod 2] - end - - OS1 & OS2 & OS3 --> DB[(Order DB\nwith Outbox)] - OS1 & OS2 & OS3 --> Redis[(Redis\nIdempotency Keys)] - - SO1 & SO2 --> PS[Payment Service] - SO1 & SO2 --> IS[Inventory Service] - - OP1 & OP2 --> DB - OP1 & OP2 --> Kafka[Kafka Cluster\n5 brokers] - end - - style SO1 fill:#e1f5ff - style SO2 fill:#e1f5ff - style OP1 fill:#fff4e1 - style OP2 fill:#fff4e1 - style DB fill:#d4edda - style Kafka fill:#ffe1e1 -``` - -### Deployment Configuration - -| Component | Replicas | Resources | HA Strategy | -|-----------|----------|-----------|-------------| -| **Saga Orchestrator** | 2-3 | 512Mi RAM, 500m CPU | Leader election with etcd | -| **Outbox Processor** | 2-5 | 256Mi RAM, 250m CPU | Distributed lock per event batch | -| **Services with Outbox** | 3+ | Varies | Standard service scaling | -| **Redis (Idempotency)** | 3 nodes | 1Gi RAM each | Redis Cluster with replication | - -### Scaling Strategy - -**Saga Orchestrator**: -- Scale based on pending saga count -- Use queue-based load distribution -- Monitor saga execution duration - -**Outbox Processor**: -- Scale with database sharding (1 processor per shard) -- Increase batch size before adding replicas -- Monitor outbox table size and age - -**Idempotency Store (Redis)**: -- Scale Redis cluster horizontally -- Use consistent hashing for key distribution -- Monitor memory usage (should be < 70%) - -## Monitoring & Observability - -Monitoring strategies for data consistency patterns. - -### Key Metrics - -**Saga Metrics**: -- `saga_executions_total` - Total saga executions (success/failure) -- `saga_duration_seconds` - Saga execution time histogram -- `saga_compensations_total` - Total compensation executions -- `saga_timeout_total` - Sagas that timed out -- `saga_pending_count` - Sagas currently executing - -**Outbox Metrics**: -- `outbox_events_total` - Events written to outbox -- `outbox_published_total` - Events published to Kafka -- `outbox_processing_lag_seconds` - Time from write to publish -- `outbox_table_size` - Outbox table row count -- `outbox_failed_events_total` - Failed event publications - -**Idempotency Metrics**: -- `idempotency_checks_total` - Total idempotency checks -- `idempotency_hits_total` - Duplicate requests prevented -- `idempotency_key_ttl_seconds` - Average key TTL -- `idempotency_redis_errors_total` - Redis failures - -**Optimistic Lock Metrics**: -- `optimistic_lock_conflicts_total` - Version conflicts detected -- `optimistic_lock_retries_total` - Retry attempts after conflict -- `optimistic_lock_success_rate` - Update success percentage - -### Alerts - -**Critical Alerts**: -```yaml -# Saga timeout rate too high -alert: HighSagaTimeoutRate -expr: rate(saga_timeout_total[5m]) > 0.05 -for: 5m -severity: critical - -# Outbox processing lag -alert: OutboxProcessingLag -expr: outbox_processing_lag_seconds > 300 -for: 10m -severity: critical - -# High optimistic lock conflict rate -alert: HighOptimisticLockConflicts -expr: rate(optimistic_lock_conflicts_total[5m]) / rate(optimistic_lock_attempts_total[5m]) > 0.1 -for: 5m -severity: warning -``` - -### Monitoring Dashboard - -**Grafana Panels**: - -1. **Saga Orchestration Overview**: - - Saga execution rate (success/failure) - - Average saga duration - - Compensation rate - - Pending saga count - -2. **Outbox Processing Health**: - - Outbox publishing rate - - Processing lag (P95, P99) - - Failed events - - Table size trend - -3. **Idempotency Effectiveness**: - - Duplicate prevention rate - - Redis hit rate - - Key distribution - -4. **Data Consistency SLA**: - - Overall consistency rate (target: 99.9%) - - Mean time to consistency (MTTC) - - Conflict resolution success rate - -### Distributed Tracing - -**Trace Saga Execution**: -```typescript -// Traced saga step -async function executeStepWithTracing( - step: SagaStep, - context: SagaContext -): Promise { - const tracer = trace.getTracer('saga-orchestrator'); - const span = tracer.startSpan(`saga.step.${step.name}`, { - attributes: { - 'saga.id': context.sagaId, - 'saga.step': step.name, - 'saga.attempt': context.currentAttempt - } - }); - - try { - await step.execute(context); - span.setStatus({ code: SpanStatusCode.OK }); - } catch (error) { - span.setStatus({ code: SpanStatusCode.ERROR, message: error.message }); - span.recordException(error); - throw error; - } finally { - span.end(); - } -} -``` - -## Related Documentation - -- [Event-Driven Architecture](./event-driven-architecture.md) - Event sourcing and Kafka -- [System Design](./system-design.md) - Overall architecture -- [Microservices Communication](./microservices-communication.md) - Service communication patterns -- [Resilience Patterns](../skills/resilience-patterns.md) - Circuit breaker, retry for saga steps -- [Caching Patterns](../skills/caching-patterns.md) - Caching for idempotency keys -- [Database Prisma](../skills/database-prisma.md) - Prisma transactions for outbox pattern - ---- - -**Last Updated**: 2026-01-07 -**Author**: VelikHo (hongochai10@icloud.com) -**Reviewers**: To be assigned diff --git a/apps/web-docs/content/docs/en/architecture/event-driven-architecture.md b/apps/web-docs/content/docs/en/architecture/event-driven-architecture.md deleted file mode 100644 index ea563d0c..00000000 --- a/apps/web-docs/content/docs/en/architecture/event-driven-architecture.md +++ /dev/null @@ -1,375 +0,0 @@ -# Event-Driven Architecture - -> Event-driven architecture for asynchronous communication using Apache Kafka - -## Overview Diagram - -```mermaid -graph TD - subgraph "Event Producers" - IAM[IAM Service] - Service1[Service A] - end - - subgraph "Event Broker" - Kafka[Apache Kafka] - Topics[Topics: user.events, auth.events] - end - - subgraph "Event Consumers" - Consumer1[Notification Service] - Consumer2[Audit Service] - end - - IAM -->|Publish| Kafka - Service1 -->|Publish| Kafka - Kafka --> Topics - Topics -->|Subscribe| Consumer1 - Topics -->|Subscribe| Consumer2 - - style Kafka fill:#e1f5ff - style Topics fill:#fff4e1 -``` - -## Architecture Description - -The GoodGo platform implements Event-Driven Architecture (EDA) for asynchronous communication between microservices. - -**Core Principles**: -1. **Event-First Design**: All state changes emit domain events -2. **Loose Coupling**: Services communicate through events -3. **Eventual Consistency**: Accept temporary inconsistency -4. **Event Sourcing**: Store changes as event sequence -5. **CQRS Pattern**: Separate read/write operations - -**Technology Stack**: -- Apache Kafka - Event streaming platform -- Schema Registry - Avro schemas for validation -- KafkaJS - Node.js client library -- Event Sourcing - Custom implementation in IAM - -## Event Flow - -```mermaid -sequenceDiagram - participant Producer as IAM Service - participant Kafka as Kafka Broker - participant Consumer as Notification Service - - Producer->>Kafka: Publish Event (user.created) - Kafka->>Consumer: Deliver Event - Consumer->>Consumer: Process Event - Consumer-->>Kafka: Acknowledge -``` - -**Steps**: Publish → Distribute → Consume → Retry (if failed) → DLQ (after max retries) → Acknowledge - -## Event Structure - -```typescript -interface BaseEvent { - eventId: string; // UUID - eventType: string; // user.created.v1 - eventVersion: string; // 1.0.0 - timestamp: string; // ISO 8601 - source: string; // iam-service - correlationId?: string; // Request correlation - data: unknown; // Event payload -} -``` - -**Example**: -```json -{ - "eventId": "550e8400-e29b-41d4-a716-446655440000", - "eventType": "user.created.v1", - "timestamp": "2024-01-15T10:30:00Z", - "source": "iam-service", - "data": { - "userId": "user_123", - "email": "user@example.com" - } -} -``` - -## Kafka Topics - -```mermaid -graph LR - UserCreated[user.created
Partitions: 3] - AuthLogin[auth.login.success
Partitions: 5] - AuditEvents[audit.events
Partitions: 10] - - style UserCreated fill:#e1f5ff - style AuthLogin fill:#fff4e1 - style AuditEvents fill:#f8d7da -``` - -**Naming Convention**: `{domain}.{action}.{version}` - -**Examples**: -- `user.created.v1` -- `auth.login.success.v1` -- `audit.event.logged.v1` - -## Error Handling - -```mermaid -graph TD - Event[Event] --> Process[Process] - Process -->|Success| Ack[Acknowledge] - Process -->|Failure| Retry[Retry 3x] - Retry -->|Max Retries| DLQ[Dead Letter Queue] - DLQ --> Alert[Alert Team] -``` - -**Strategy**: -1. Retry with exponential backoff (100ms → 200ms → 400ms) -2. Max 3 attempts -3. Move to DLQ after max retries -4. Manual review and reprocess - -## System Context - -```mermaid -C4Context - title Event-Driven Architecture Context - - System(iam, "IAM Service", "Event producer") - System(service_a, "Service A", "Event producer") - System(notification, "Notification Service", "Event consumer") - System(audit, "Audit Service", "Event consumer") - - System_Ext(kafka, "Apache Kafka", "Event streaming platform") - System_Ext(registry, "Schema Registry", "Schema management") - System_Ext(monitoring, "Monitoring", "Kafka metrics & alerts") - - Rel(iam, kafka, "Publishes events", "Kafka Protocol") - Rel(service_a, kafka, "Publishes events", "Kafka Protocol") - Rel(kafka, notification, "Delivers events", "Kafka Protocol") - Rel(kafka, audit, "Delivers events", "Kafka Protocol") - Rel(kafka, registry, "Validates schemas", "HTTP") - Rel(kafka, monitoring, "Sends metrics", "JMX") -``` - -**Context Description**: -- **Producers**: IAM Service and other services publish domain events -- **Kafka**: Central event broker, manages topics and partitions -- **Consumers**: Notification and Audit services consume events -- **Schema Registry**: Manages and validates Avro schemas -- **Monitoring**: Collects metrics from Kafka cluster - -## Performance Characteristics - -| Metric | Target | Notes | -|--------|--------|-------| -| **Event Publish Latency (P95)** | < 10ms | Fire-and-forget, async | -| **Event Delivery Latency (P95)** | < 100ms | End-to-end from publish to consume | -| **Throughput** | 10,000 events/s | Per topic, scalable with partitions | -| **Consumer Lag** | < 1000 messages | Per partition, monitored | -| **Event Size** | < 1MB | Recommended max size | -| **Retention** | 7 days | Default, configurable per topic | -| **Replication Factor** | 3 | For fault tolerance | - -**Performance Optimizations**: -- **Batch Publishing**: Group multiple events to reduce network overhead -- **Compression**: Use Snappy or LZ4 compression -- **Partitioning**: Divide topics into multiple partitions for parallel processing -- **Consumer Groups**: Multiple consumers in same group for horizontal scaling -- **Async Publishing**: Fire-and-forget pattern, don't block request handlers - -## Security Considerations - -**Event Encryption**: -- TLS in-transit for all Kafka connections -- Optional payload encryption for sensitive data -- End-to-end encryption with custom encryption layer - -**Access Control**: -- Kafka ACLs (Access Control Lists) per topic -- SASL/SCRAM authentication for producers and consumers -- Separate credentials per service -- Principle of least privilege - grant only necessary permissions - -**Schema Validation**: -- Avro schemas in Schema Registry -- Schema evolution with backward/forward compatibility -- Reject events that don't match schema - -**Audit**: -- Log all event publishes and consumes -- Correlation IDs to trace event flow -- Retention policy for audit logs (7 years) - -**Data Retention**: -- Default 7 days retention -- Configurable per topic -- Automatic deletion after retention period -- GDPR compliance (right to erasure) - -## Deployment - -```mermaid -graph TD - subgraph "Kafka Cluster" - subgraph "Brokers" - Broker1[Kafka Broker 1
Leader for partitions 0,3,6] - Broker2[Kafka Broker 2
Leader for partitions 1,4,7] - Broker3[Kafka Broker 3
Leader for partitions 2,5,8] - end - - subgraph "Coordination" - ZK[Zookeeper Ensemble
3 nodes] - end - - Broker1 --> ZK - Broker2 --> ZK - Broker3 --> ZK - end - - subgraph "Producers" - IAM[IAM Service] - ServiceA[Service A] - end - - subgraph "Consumers" - Notification[Notification Service
Consumer Group: notifications] - Audit[Audit Service
Consumer Group: audit] - end - - IAM --> Broker1 - IAM --> Broker2 - IAM --> Broker3 - - ServiceA --> Broker1 - ServiceA --> Broker2 - ServiceA --> Broker3 - - Broker1 --> Notification - Broker2 --> Notification - Broker3 --> Notification - - Broker1 --> Audit - Broker2 --> Audit - Broker3 --> Audit - - style Broker1 fill:#e1f5ff - style Broker2 fill:#fff4e1 - style Broker3 fill:#d4edda - style ZK fill:#f0e1ff -``` - -**Kafka Cluster Configuration**: -- **Brokers**: 3 brokers minimum (5 for production) -- **Replication Factor**: 3 (for fault tolerance) -- **Min In-Sync Replicas**: 2 (ensure data durability) -- **Partitions**: 3-10 per topic (based on throughput needs) -- **Zookeeper**: 3-node ensemble (for coordination) - -**Resource Allocation**: -| Component | CPU | Memory | Disk | -|-----------|-----|--------|------| -| **Kafka Broker** | 2 cores | 4GB RAM | 100GB SSD | -| **Zookeeper** | 1 core | 2GB RAM | 20GB SSD | -| **Schema Registry** | 500m | 1GB RAM | 10GB | - -**Topic Configuration**: -```yaml -user.created: - partitions: 3 - replication-factor: 3 - retention-ms: 604800000 # 7 days - compression-type: snappy - -auth.login.success: - partitions: 5 - replication-factor: 3 - retention-ms: 604800000 - compression-type: snappy - -audit.events: - partitions: 10 - replication-factor: 3 - retention-ms: 220752000000 # 7 years - compression-type: lz4 -``` - -**High Availability**: -- Multiple brokers with partition replication -- Automatic leader election when broker fails -- Consumer group rebalancing -- Monitoring and alerting for broker health - -## Monitoring & Observability - -**Key Metrics**: - -**Kafka Broker Metrics**: -- `kafka_server_brokertopicmetrics_messagesinpersec` - Messages in/sec -- `kafka_server_brokertopicmetrics_bytesinpersec` - Bytes in/sec -- `kafka_server_brokertopicmetrics_bytesoutpersec` - Bytes out/sec -- `kafka_controller_kafkacontroller_activecontrollercount` - Active controller -- `kafka_server_replicamanager_underreplicatedpartitions` - Under-replicated partitions - -**Consumer Metrics**: -- `kafka_consumer_fetch_manager_records_lag_max` - Max consumer lag -- `kafka_consumer_fetch_manager_records_consumed_rate` - Records consumed/sec -- `kafka_consumer_coordinator_commit_latency_avg` - Commit latency - -**Producer Metrics**: -- `kafka_producer_record_send_total` - Total records sent -- `kafka_producer_record_error_total` - Total send errors -- `kafka_producer_request_latency_avg` - Request latency - -**Application Metrics**: -```typescript -// Custom metrics for event processing -const eventPublished = new Counter({ - name: 'events_published_total', - help: 'Total events published', - labelNames: ['event_type', 'topic'] -}); - -const eventConsumed = new Counter({ - name: 'events_consumed_total', - help: 'Total events consumed', - labelNames: ['event_type', 'topic', 'consumer_group'] -}); - -const eventProcessingDuration = new Histogram({ - name: 'event_processing_duration_seconds', - help: 'Event processing duration', - labelNames: ['event_type'], - buckets: [0.01, 0.05, 0.1, 0.5, 1, 2, 5] -}); -``` - -**Dashboards**: -- Kafka Cluster Overview (brokers, topics, partitions) -- Producer Performance (throughput, latency, errors) -- Consumer Performance (lag, throughput, errors) -- Topic Metrics (messages/sec, bytes/sec, retention) - -**Logging**: -```typescript -// Structured logging for events -logger.info('Event published', { - eventId: event.eventId, - eventType: event.eventType, - topic: 'user.created', - correlationId: event.correlationId -}); - -logger.info('Event consumed', { - eventId: event.eventId, - eventType: event.eventType, - topic: 'user.created', - consumerGroup: 'notifications', - processingTime: duration -}); -``` - -## Related Documentation - -- [System Design](./system-design.md) - Overall architecture -- [IAM Architecture](./iam-proposal.md) - Event sourcing implementation diff --git a/apps/web-docs/content/docs/en/architecture/iam-proposal.md b/apps/web-docs/content/docs/en/architecture/iam-proposal.md deleted file mode 100644 index e54d8dee..00000000 --- a/apps/web-docs/content/docs/en/architecture/iam-proposal.md +++ /dev/null @@ -1,339 +0,0 @@ -# Đề Xuất Kiến Trúc IAM Service - -Tài liệu này mô tả đề xuất kiến trúc cho IAM Service (Identity and Access Management Service), mở rộng từ auth-service hiện tại. - -## Tổng Quan: Auth Service → IAM Service - -**Auth Service hiện tại** tập trung vào: -- Authentication (xác thực) -- Authorization (phân quyền) -- Session & Token management -- RBAC/ABAC - -**IAM Service** mở rộng thêm: -- **Identity Management** (quản lý danh tính toàn diện) -- **Access Governance** (quản trị truy cập) -- **Compliance & Reporting** (tuân thủ và báo cáo) -- **Lifecycle Management** (quản lý vòng đời tài khoản) - ---- - -## 1. Phạm Vi IAM Service - -### 1.1 Identity Management (Quản Lý Danh Tính) - -#### A. User Lifecycle Management -- User CRUD operations -- User provisioning/deprovisioning workflows -- Bulk user operations (import/export) -- User deactivation/reactivation với approval workflow -- Account merging/deduplication -- User archival (soft delete với retention policy) - -#### B. Profile Management -- Extended attributes (custom fields) -- Profile picture upload & management -- Contact information (phone, address) -- Preferences & settings -- Profile versioning/audit trail - -#### C. Identity Verification -- Email verification -- Phone/SMS verification -- Identity document verification (KYC) -- Multi-level verification (verified, pending, rejected) - -#### D. Organizations & Groups -- Organization management (multi-tenant) -- Group/Team management -- Organization hierarchy -- Group-based access control -- Organization-level policies - -### 1.2 Access Management (Quản Lý Truy Cập) - -#### A. Advanced Access Control -- Just-In-Time (JIT) access provisioning -- Privileged Access Management (PAM) -- Temporary access grants -- Access request/approval workflows -- Delegation & impersonation (admin view) -- Conditional access policies (location, time, device) - -#### B. Access Reviews & Certifications -- Periodic access reviews -- Access certification campaigns -- Access analytics & reporting -- Risk scoring for access decisions -- Anomaly detection (unusual access patterns) - -### 1.3 Governance & Compliance (Quản Trị & Tuân Thủ) - -#### A. Audit & Logging -- Compliance reporting (GDPR, SOC2, ISO 27001) -- Data retention policies -- Audit log search & analytics -- Export audit logs - -#### B. Policy Governance -- Policy versioning & rollback -- Policy templates library -- Policy testing & validation -- Policy compliance checks - -#### C. Risk Management -- Risk scoring engine -- Risk-based authentication -- Threat detection -- Incident response workflows -- Security posture dashboard - ---- - -## 2. Kiến Trúc Module Structure - -``` -services/iam-service/ -├── src/ -│ ├── config/ # Configuration files -│ ├── core/ -│ │ ├── cache/ # Multi-layer cache -│ │ ├── security/ # Zero-trust, encryption -│ │ ├── events/ # Event sourcing -│ │ └── workflows/ # Workflow engine (NEW) -│ ├── modules/ -│ │ ├── auth/ # ✅ Core authentication -│ │ ├── rbac/ # ✅ RBAC system -│ │ ├── social/ # ✅ Social authentication -│ │ ├── oidc/ # ✅ OIDC implementation -│ │ ├── token/ # ✅ JWT & Cookie management -│ │ ├── session/ # ✅ Session management -│ │ ├── mfa/ # ✅ Multi-factor auth -│ │ │ -│ │ ├── identity/ # 🆕 Identity Management -│ │ │ ├── user/ # User lifecycle -│ │ │ ├── profile/ # Profile management -│ │ │ ├── verification/ # Identity verification -│ │ │ └── organization/ # Organizations & groups -│ │ │ -│ │ ├── access/ # 🆕 Access Management -│ │ │ ├── request/ # Access requests -│ │ │ ├── review/ # Access reviews -│ │ │ ├── pam/ # Privileged access -│ │ │ └── analytics/ # Access analytics -│ │ │ -│ │ ├── governance/ # 🆕 Governance & Compliance -│ │ │ ├── compliance/ # Compliance reporting -│ │ │ ├── policy/ # Policy governance -│ │ │ ├── risk/ # Risk management -│ │ │ └── reporting/ # Reporting & dashboards -│ │ │ -│ │ └── workflow/ # 🆕 Workflow Engine -│ │ ├── engine/ # Workflow engine -│ │ ├── approval/ # Approval workflows -│ │ └── automation/ # Automated workflows -│ │ -│ ├── middlewares/ # Express middlewares -│ ├── repositories/ # Data access layer -│ └── routes/ # Route definitions -└── prisma/ - └── schema.prisma # Database schema (mở rộng) -``` - ---- - -## 3. Database Schema Mở Rộng - -### 3.1 Identity Management Models - -- **Organization**: Quản lý tổ chức với hierarchy -- **Group**: Quản lý nhóm trong organization -- **GroupMember**: Thành viên của group -- **GroupPermission**: Permissions cho group -- **UserProfile**: Thông tin profile mở rộng của user -- **IdentityVerification**: Xác thực danh tính (email, phone, document) - -### 3.2 Access Management Models - -- **AccessRequest**: Yêu cầu truy cập -- **AccessRequestApprover**: Người phê duyệt request -- **AccessReview**: Đánh giá truy cập định kỳ -- **AccessReviewItem**: Item trong review - -### 3.3 Governance Models - -- **ComplianceReport**: Báo cáo tuân thủ (GDPR, SOC2, ISO27001) -- **PolicyTemplate**: Template cho policies -- **RiskScore**: Điểm rủi ro của user - ---- - -## 4. API Endpoints Mở Rộng - -### 4.1 Identity Management APIs - -``` -# User Management -GET /api/v1/identity/users -POST /api/v1/identity/users -GET /api/v1/identity/users/:id -PUT /api/v1/identity/users/:id -DELETE /api/v1/identity/users/:id -POST /api/v1/identity/users/bulk-import -GET /api/v1/identity/users/bulk-export - -# Profile Management -GET /api/v1/identity/users/:id/profile -PUT /api/v1/identity/users/:id/profile -POST /api/v1/identity/users/:id/profile/avatar - -# Identity Verification -POST /api/v1/identity/verification/email/request -POST /api/v1/identity/verification/email/verify -POST /api/v1/identity/verification/phone/request -POST /api/v1/identity/verification/phone/verify - -# Organizations & Groups -GET /api/v1/identity/organizations -POST /api/v1/identity/organizations -GET /api/v1/identity/organizations/:id/groups -POST /api/v1/identity/organizations/:id/groups -GET /api/v1/identity/groups/:id/members -POST /api/v1/identity/groups/:id/members -``` - -### 4.2 Access Management APIs - -``` -# Access Requests -GET /api/v1/access/requests -POST /api/v1/access/requests -PUT /api/v1/access/requests/:id/approve -PUT /api/v1/access/requests/:id/reject - -# Access Reviews -GET /api/v1/access/reviews -POST /api/v1/access/reviews -POST /api/v1/access/reviews/:id/start -POST /api/v1/access/reviews/:id/complete -GET /api/v1/access/reviews/:id/items - -# Access Analytics -GET /api/v1/access/analytics/usage -GET /api/v1/access/analytics/permissions -GET /api/v1/access/analytics/risks -``` - -### 4.3 Governance APIs - -``` -# Compliance Reports -GET /api/v1/governance/compliance/reports -POST /api/v1/governance/compliance/reports/generate -GET /api/v1/governance/compliance/reports/:id/export - -# Policy Governance -GET /api/v1/governance/policies/templates -POST /api/v1/governance/policies/templates -GET /api/v1/governance/policies/:id/versions -POST /api/v1/governance/policies/:id/test - -# Risk Management -GET /api/v1/governance/risk/scores -GET /api/v1/governance/risk/scores/:userId -POST /api/v1/governance/risk/calculate - -# Reporting -GET /api/v1/governance/reports/access-summary -GET /api/v1/governance/reports/user-activity -GET /api/v1/governance/reports/security-events -``` - ---- - -## 5. Implementation Roadmap - -### Phase 1: Foundation (Weeks 1-4) -- ✅ Migrate từ auth-service sang iam-service -- 🔄 Tổ chức lại modules theo IAM structure -- 🔄 Mở rộng database schema với identity models -- 🔄 Implement User Profile module - -### Phase 2: Identity Management (Weeks 5-8) -- 🔄 User lifecycle management -- 🔄 Identity verification (email, phone, document) -- 🔄 Organization & Group management -- 🔄 Profile management with extended attributes - -### Phase 3: Access Management (Weeks 9-12) -- 🔄 Access request/approval workflows -- 🔄 Access review & certification system -- 🔄 Access analytics -- 🔄 Privileged Access Management (PAM) - -### Phase 4: Governance (Weeks 13-16) -- 🔄 Compliance reporting engine -- 🔄 Policy governance & versioning -- 🔄 Risk scoring & management -- 🔄 Reporting dashboards - -### Phase 5: Advanced Features (Weeks 17-20) -- 🔄 Workflow engine -- 🔄 Advanced analytics & ML-based insights -- 🔄 Integration APIs (SCIM, LDAP sync) -- 🔄 Performance optimization & scaling - ---- - -## 6. Lợi Ích Của IAM Service - -### 6.1 Cho Doanh Nghiệp -- ✅ Tuân thủ (GDPR, SOC2, ISO 27001) -- ✅ Quản lý rủi ro bảo mật tốt hơn -- ✅ Tự động hóa quy trình quản lý truy cập -- ✅ Báo cáo và audit trail đầy đủ -- ✅ Hỗ trợ multi-tenant/organization - -### 6.2 Cho Developers -- ✅ API thống nhất cho identity & access -- ✅ Workflow engine linh hoạt -- ✅ Extensible architecture -- ✅ Comprehensive documentation -- ✅ SDK support - -### 6.3 Cho End Users -- ✅ Self-service profile management -- ✅ Transparent access requests -- ✅ Better user experience -- ✅ Enhanced security với MFA & verification - ---- - -## 7. Migration Strategy - -### Từ Auth Service → IAM Service - -1. **Rename Service**: `services/auth-service` → `services/iam-service` -2. **Update Package Name**: `@goodgo/auth-service` → `@goodgo/iam-service` -3. **Update Routes**: - - Giữ backward compatibility với `/api/v1/auth/*` - - Thêm routes mới cho `/api/v1/identity/*`, `/api/v1/access/*`, `/api/v1/governance/*` -4. **Database Migration**: - - Thêm schema mới cho identity, access, governance - - Giữ nguyên các tables hiện có (backward compatible) -5. **Gradual Rollout**: - - Phase 1: Deploy cùng auth-service (dual deployment) - - Phase 2: Migrate clients dần dần - - Phase 3: Deprecate auth-service khi migration hoàn tất - ---- - -## Kết Luận - -Đề xuất này mở rộng `auth-service` thành `IAM Service` với đầy đủ các tính năng: -- **Identity Management** đầy đủ -- **Access Management** nâng cao -- **Governance & Compliance** toàn diện -- **Workflow automation** linh hoạt - -Điều này biến service từ authentication/authorization cơ bản thành một IAM platform toàn diện, phù hợp cho enterprise. diff --git a/apps/web-docs/content/docs/en/architecture/microservices-communication.md b/apps/web-docs/content/docs/en/architecture/microservices-communication.md deleted file mode 100644 index 3a110c87..00000000 --- a/apps/web-docs/content/docs/en/architecture/microservices-communication.md +++ /dev/null @@ -1,363 +0,0 @@ -# Microservices Communication - -> Communication patterns and protocols for inter-service communication - -## Overview Diagram - -```mermaid -graph TD - Client[Client Apps] --> Gateway[API Gateway
Traefik] - - Gateway --> ServiceA[Service A] - Gateway --> ServiceB[Service B] - - ServiceA <-->|REST/HTTP| ServiceB - ServiceA -->|Events| Kafka[Kafka Broker] - ServiceB <-.->|Sub| Kafka - - ServiceA --> SD[Service Discovery
Docker DNS / K8s DNS] - ServiceB --> SD - - style Gateway fill:#e1f5ff - style Kafka fill:#fff4e1 - style SD fill:#d4edda -``` - -## System Context - -```mermaid -C4Context - title System Context Diagram for GoodGo Microservices Communication - - Person(client_web, "Web Client", "Browser/Mobile App") - Person(client_api, "API Consumer", "External API clients") - - System_Boundary(goodgo, "GoodGo Platform") { - System(gateway, "API Gateway", "Traefik - Routes requests to services") - System(services, "Microservices", "IAM, User, Order, Product services") - System(kafka, "Event Bus", "Kafka - Async communication") - System(discovery, "Service Discovery", "Docker DNS / K8s DNS") - } - - System_Ext(db, "Database", "Neon PostgreSQL") - System_Ext(cache, "Cache", "Redis") - System_Ext(external_api, "External APIs", "Payment, Email, SMS") - - Rel(client_web, gateway, "Uses", "HTTPS") - Rel(client_api, gateway, "Calls", "HTTPS/REST") - Rel(gateway, services, "Routes to", "HTTP") - Rel(services, kafka, "Pub/Sub", "Kafka Protocol") - Rel(services, discovery, "Lookup", "DNS") - Rel(services, db, "Reads/Writes", "PostgreSQL") - Rel(services, cache, "Gets/Sets", "Redis Protocol") - Rel(services, external_api, "Integrates", "HTTPS") -``` - -The GoodGo platform uses a microservices architecture where all client requests flow through an API Gateway (Traefik), which routes them to appropriate microservices. Services communicate synchronously via REST/HTTP for request-response patterns and asynchronously via Kafka for event-driven workflows. Service discovery is handled by Docker DNS in local environments and Kubernetes DNS in production. - -## Communication Protocols - -### Protocol Comparison - -| Protocol | Latency | Complexity | Use Case | -|----------|---------|------------|----------| -| **REST** | Medium | Low | External APIs, CRUD | -| **gRPC** | Low | High | Internal high-performance | -| **Events** | Async | Medium | Decoupled workflows | -| **GraphQL** | Medium | Medium | Complex data fetching | - -### REST/HTTP Pattern - -```mermaid -sequenceDiagram - participant Client - participant Gateway as API Gateway - participant ServiceA as Service A - participant ServiceB as Service B - - Client->>Gateway: GET /api/v1/users/123 - Gateway->>ServiceA: Forward Request - ServiceA->>ServiceB: GET /internal/permissions/123 - ServiceB-->>ServiceA: Permissions - ServiceA-->>Gateway: User + Permissions - Gateway-->>Client: JSON Response -``` - -Synchronous request-response using HTTP/REST. - -**Implementation**: -```typescript -// Service-to-service HTTP client -import axios from 'axios'; - -export class UserServiceClient { - private client = axios.create({ - baseURL: process.env.USER_SERVICE_URL, - timeout: 5000, - headers: { - 'x-service-auth': process.env.INTERNAL_API_KEY - } - }); - - async getUser(userId: string): Promise { - const response = await this.client.get(`/users/${userId}`); - return response.data; - } -} -``` - -### Event-Driven Pattern - -```mermaid -sequenceDiagram - participant ServiceA - participant Kafka - participant ServiceB - participant ServiceC - - ServiceA->>Kafka: Publish: user.created - Kafka->>ServiceB: Deliver event - Kafka->>ServiceC: Deliver event - - par Parallel Processing - ServiceB->>ServiceB: Send welcome email - ServiceC->>ServiceC: Create user profile - end -``` - -Asynchronous event-based communication via Kafka. - -### Service Discovery - -**Local (Docker Compose)**: -```yaml -# Services discover via Docker DNS -http://service-name:port -http://iam-service:3001 -``` - -**Kubernetes**: -```yaml -# Services discover via K8s DNS -http://service-name.namespace.svc.cluster.local -http://iam-service.default.svc.cluster.local:3001 -``` - -## API Gateway Pattern - -```mermaid -graph LR - Client --> Gateway[API Gateway
Traefik] - - subgraph "Gateway Features" - Gateway --> Route[Routing] - Gateway --> LB[Load Balancing] - Gateway --> Auth[Authentication] - Gateway --> Rate[Rate Limiting] - Gateway --> CORS - end - - Route --> Service1[Service 1] - Route --> Service2[Service 2] - LB --> Service1A[Instance A] - LB --> Service1B[Instance B] - - style Gateway fill:#e1f5ff -``` - -Single entry point for all client requests with routing, auth, rate limiting. - -## Performance Characteristics - -Performance expectations and optimization strategies for inter-service communication. - -| Metric | Target | Notes | -|--------|--------|-------| -| **REST API Response Time** | < 100ms | P95 for internal service-to-service calls | -| **Event Publishing Latency** | < 50ms | Time to publish to Kafka | -| **Service Discovery Lookup** | < 10ms | DNS resolution time | -| **Gateway Routing Overhead** | < 20ms | Additional latency added by Traefik | -| **Throughput** | 10,000 req/s | Per service instance | -| **Kafka Event Processing** | < 500ms | P95 end-to-end event processing | - -**Optimization Strategies**: -- **Connection Pooling**: Reuse HTTP connections between services -- **Circuit Breaker**: Prevent cascading failures with Opossum library -- **Retry with Backoff**: Exponential backoff for transient failures -- **Compression**: Enable gzip for large payloads -- **Caching**: Cache service discovery results and responses - -## Security Considerations - -Security measures for protecting inter-service communication. - -### Service-to-Service Authentication - -- **Internal API Keys**: Services authenticate using `x-service-auth` header -- **JWT Tokens**: For user context propagation between services -- **Mutual TLS (mTLS)**: Optional for production environments (Kubernetes service mesh) - -### Network Security - -- **Network Policies**: Kubernetes NetworkPolicies restrict service-to-service traffic -- **Service Mesh**: Istio/Linkerd for advanced security policies (optional) -- **Private Networks**: Services communicate within private VPC/cluster network - -### Data Protection - -- **Encryption in Transit**: TLS 1.2+ for all external communication -- **Event Payload Encryption**: Sensitive data encrypted before publishing to Kafka -- **API Gateway**: Traefik handles SSL termination and request validation - -### Security Best Practices - -```typescript -// Service client with authentication -export class SecureServiceClient { - private client = axios.create({ - baseURL: process.env.SERVICE_URL, - timeout: 5000, - headers: { - 'x-service-auth': process.env.INTERNAL_API_KEY, - 'x-correlation-id': generateCorrelationId() - }, - httpsAgent: new https.Agent({ - rejectUnauthorized: true // Verify SSL certificates - }) - }); -} -``` - -## Deployment - -How microservices communication is deployed and scaled across environments. - -```mermaid -graph TD - subgraph "Production Cluster" - LB[Load Balancer] --> Gateway[API Gateway\n3 replicas] - - Gateway --> ServiceA1[Service A\nInstance 1] - Gateway --> ServiceA2[Service A\nInstance 2] - Gateway --> ServiceB1[Service B\nInstance 1] - Gateway --> ServiceB2[Service B\nInstance 2] - - ServiceA1 & ServiceA2 --> Kafka[Kafka Cluster\n3 brokers] - ServiceB1 & ServiceB2 --> Kafka - - ServiceA1 & ServiceA2 --> DB[(PostgreSQL\nPrimary + Replica)] - ServiceB1 & ServiceB2 --> DB - - ServiceA1 & ServiceA2 --> Redis[(Redis Cluster\n3 nodes)] - ServiceB1 & ServiceB2 --> Redis - end - - style Gateway fill:#e1f5ff - style Kafka fill:#fff4e1 - style DB fill:#d4edda - style Redis fill:#ffe1e1 -``` - -### Deployment Environments - -| Environment | Gateway | Services | Kafka | Service Discovery | -|-------------|---------|----------|-------|-------------------| -| **Local** | Traefik (Docker) | Single instance per service | Single broker | Docker DNS | -| **Staging** | Traefik (2 replicas) | 2 replicas per service | 3 brokers | Kubernetes DNS | -| **Production** | Traefik (3+ replicas) | 3+ replicas per service | 5+ brokers | Kubernetes DNS + Service Mesh | - -### Scaling Strategy - -- **Horizontal Pod Autoscaler (HPA)**: Auto-scale based on CPU/memory -- **Kafka Partitions**: Scale event processing by increasing partitions -- **Load Balancing**: Kubernetes Service load balances across pod replicas -- **Gateway Scaling**: Traefik scales independently from backend services - -## Monitoring & Observability - -How to monitor and observe microservices communication. - -### Key Metrics - -**Service-to-Service Metrics**: -- `http_request_duration_seconds` - Request latency histogram -- `http_requests_total` - Total requests counter -- `http_request_errors_total` - Failed requests counter -- `service_client_timeout_total` - Timeout counter - -**Gateway Metrics**: -- `traefik_service_requests_total` - Requests per service -- `traefik_service_request_duration_seconds` - Routing latency -- `traefik_service_retries_total` - Retry attempts - -**Kafka Metrics**: -- `kafka_producer_record_send_total` - Events published -- `kafka_consumer_lag` - Consumer lag -- `kafka_consumer_records_consumed_total` - Events consumed - -### Health Checks - -**Service Endpoints**: -```typescript -// Liveness - is service running? -app.get('/health/live', (req, res) => { - res.json({ status: 'ok', timestamp: new Date().toISOString() }); -}); - -// Readiness - can service handle traffic? -app.get('/health/ready', async (req, res) => { - const checks = { - database: await checkDatabase(), - redis: await checkRedis(), - kafka: await checkKafka() - }; - - const healthy = Object.values(checks).every(c => c); - res.status(healthy ? 200 : 503).json({ ready: healthy, checks }); -}); -``` - -**Kubernetes Probes**: -```yaml -livenessProbe: - httpGet: - path: /health/live - port: 3000 - initialDelaySeconds: 30 - periodSeconds: 10 - -readinessProbe: - httpGet: - path: /health/ready - port: 3000 - initialDelaySeconds: 5 - periodSeconds: 5 -``` - -### Distributed Tracing - -- **OpenTelemetry**: Instrument all service-to-service calls -- **Jaeger**: Visualize distributed traces -- **Correlation IDs**: Propagate via `x-correlation-id` header for request tracking - -### Monitoring Dashboard - -**Grafana Panels**: -- Service Communication Overview (request rate, latency, errors) -- Gateway Performance (routing time, backend health) -- Event Bus Health (Kafka lag, throughput) -- Service Dependencies (service map from traces) - -## Related Documentation - -- [System Design](./system-design.md) - Overall architecture -- [Event-Driven Architecture](./event-driven-architecture.md) - Event patterns -- [API Gateway Advanced](../skills/api-gateway-advanced.md) - Gateway patterns -- [Inter-Service Communication](../skills/inter-service-communication.md) - Communication patterns -- [Resilience Patterns](../skills/resilience-patterns.md) - Circuit breaker, retry - ---- - -**Last Updated**: 2026-01-07 -**Author**: VelikHo (hongochai10@icloud.com) -**Reviewers**: To be assigned diff --git a/apps/web-docs/content/docs/en/architecture/observability-architecture.md b/apps/web-docs/content/docs/en/architecture/observability-architecture.md deleted file mode 100644 index f0d4dcaa..00000000 --- a/apps/web-docs/content/docs/en/architecture/observability-architecture.md +++ /dev/null @@ -1,334 +0,0 @@ -# Observability Architecture / Kiến trúc Khả năng Quan sát - -> **EN**: Comprehensive observability with metrics, logging, and tracing -> **VI**: Khả năng quan sát toàn diện với metrics, logging và tracing - -## Overview Diagram / Sơ đồ Tổng quan - -```mermaid -graph TD - subgraph "Services" - Service1[Service A] - Service2[Service B] - end - - subgraph "Metrics" - Service1 -->|/metrics| Prom[Prometheus] - Service2 -->|/metrics| Prom - Prom --> Grafana[Grafana
Dashboards] - end - - subgraph "Logging" - Service1 -->|JSON Logs| Loki - Service2 -->|JSON Logs| Loki - Loki --> GrafanaLogs[Grafana
Log Explorer] - end - - subgraph "Tracing" - Service1 -->|Spans| Jaeger - Service2 -->|Spans| Jaeger - Jaeger --> JaegerUI[Jaeger UI] - end - - style Prom fill:#d4edda - style Loki fill:#fff4e1 - style Jaeger fill:#e1f5ff -``` - -## Three Pillars of Observability / Ba Trụ cột - -### 1. Metrics (Prometheus + Grafana) - -```mermaid -graph LR - Service[Service] -->|Expose /metrics| Prom[Prometheus] - Prom -->|Scrape every 15s| Metrics[Time Series DB] - Metrics --> Grafana[Grafana] - Grafana --> Dashboard1[Request Dashboard] - Grafana --> Dashboard2[Error Dashboard] - Grafana --> Dashboard3[Performance Dashboard] - - style Prom fill:#d4edda - style Grafana fill:#e1f5ff -``` - -**EN**: Numerical measurements over time (requests/sec, latency, errors). - -**VI**: Các phép đo số theo thời gian (requests/sec, latency, errors). - -**Implementation**: -```typescript -import { Counter, Histogram, Gauge } from 'prom-client'; - -// HTTP request metrics -export const httpRequestDuration = new Histogram({ - name: 'http_request_duration_seconds', - help: 'Duration of HTTP requests in seconds', - labelNames: ['method', 'route', 'status'], - buckets: [0.001, 0.01, 0.05, 0.1, 0.5, 1, 2, 5] -}); - -export const httpRequestTotal = new Counter({ - name: 'http_requests_total', - help: 'Total HTTP requests', - labelNames: ['method', 'route', 'status'] -}); - -export const activeRequests = new Gauge({ - name: 'http_requests_active', - help: 'Number of active HTTP requests' -}); - -// Middleware to track metrics -export function metricsMiddleware(req, res, next) { - const start = Date.now(); - activeRequests.inc(); - - res.on('finish', () => { - const duration = (Date.now() - start) / 1000; - - httpRequestDuration.observe( - { method: req.method, route: req.route?.path || req.path, status: res.statusCode }, - duration - ); - - httpRequestTotal.inc({ - method: req.method, - route: req.route?.path || req.path, - status: res.statusCode - }); - - activeRequests.dec(); - }); - - next(); -} -``` - -### 2. Logging (Winston + Loki) - -```mermaid -sequenceDiagram - participant Service - participant Winston as Winston Logger - participant Loki - participant Grafana - - Service->>Winston: Log event - Winston->>Winston: Format JSON - Winston->>Winston: Add metadata
(correlation ID, trace ID) - Winston->>Loki: Push logs - Loki->>Loki: Index & store - - User->>Grafana: Query logs - Grafana->>Loki: LogQL query - Loki-->>Grafana: Log results -``` - -**EN**: Structured logging with correlation IDs for request tracing. - -**VI**: Structured logging với correlation IDs để tracing requests. - -**Implementation**: -```typescript -import winston from 'winston'; - -export const logger = winston.createLogger({ - level: process.env.LOG_LEVEL || 'info', - format: winston.format.combine( - winston.format.timestamp(), - winston.format.errors({ stack: true }), - winston.format.json() - ), - defaultMeta: { - service: process.env.SERVICE_NAME || 'unknown-service', - environment: process.env.NODE_ENV || 'development' - }, - transports: [ - new winston.transports.Console(), - // Loki transport (if configured) - ] -}); - -// Logger middleware -export function loggerMiddleware(req, res, next) { - const correlationId = req.headers['x-correlation-id'] || generateId(); - - req.correlationId = correlationId; - req.logger = logger.child({ correlationId }); - - req.logger.info('Incoming request', { - method: req.method, - path: req.path, - ip: req.ip - }); - - res.on('finish', () => { - req.logger.info('Request completed', { - method: req.method, - path: req.path, - status: res.statusCode, - duration: Date.now() - req.startTime - }); - }); - - next(); -} -``` - -### 3. Tracing (OpenTelemetry + Jaeger) - -```mermaid -graph LR - Request[Incoming Request] --> Trace[Create Trace] - Trace --> SpanA[Span: HTTP Request] - SpanA --> SpanB[Span: DB Query] - SpanA --> SpanC[Span: Cache Check] - SpanA --> SpanD[Span: External API] - - SpanB --> Jaeger[Jaeger] - SpanC --> Jaeger - SpanD --> Jaeger - - Jaeger --> Timeline[Trace Timeline] - - style Trace fill:#e1f5ff - style Jaeger fill:#d4edda -``` - -**EN**: Distributed tracing to track requests across services. - -**VI**: Distributed tracing để track requests giữa các services. - -**Implementation**: -```typescript -import { trace, SpanStatusCode } from '@opentelemetry/api'; - -// Create traced function -export function traced( - name: string, - fn: () => Promise -): Promise { - const tracer = trace.getTracer('app'); - const span = tracer.startSpan(name); - - return fn() - .then(result => { - span.setStatus({ code: SpanStatusCode.OK }); - return result; - }) - .catch(error => { - span.setStatus({ - code: SpanStatusCode.ERROR, - message: error.message - }); - span.recordException(error); - throw error; - }) - .finally(() => { - span.end(); - }); -} - -// Usage -async getUserWithTracing(userId: string): Promise { - return traced('getUserById', async () => { - return await userRepository.findById(userId); - }); -} -``` - -## Health Checks / Kiểm tra Sức khỏe - -```typescript -// Liveness probe - is service running? -app.get('/health/live', (req, res) => { - res.json({ status: 'ok', timestamp: new Date().toISOString() }); -}); - -// Readiness probe - is service ready for traffic? -app.get('/health/ready', async (req, res) => { - const checks = { - database: await checkDatabase(), - redis: await checkRedis(), - disk: await checkDiskSpace() - }; - - const ready = Object.values(checks).every(check => check === true); - - res.status(ready ? 200 : 503).json({ - ready, - checks, - timestamp: new Date().toISOString() - }); -}); - -async function checkDatabase(): Promise { - try { - await prisma.$queryRaw`SELECT 1`; - return true; - } catch { - return false; - } -} -``` - -## Alerting Rules / Quy tắc Cảnh báo - -```yaml -# Prometheus alerting rules -groups: - - name: service_alerts - interval: 30s - rules: - # High error rate - - alert: HighErrorRate - expr: | - rate(http_requests_total{status=~"5.."}[5m]) > 0.05 - for: 2m - labels: - severity: warning - annotations: - summary: "High error rate detected" - description: "Error rate is {{ $value }} (> 5%)" - - # High latency - - alert: HighLatency - expr: | - histogram_quantile(0.95, http_request_duration_seconds_bucket) > 1 - for: 5m - labels: - severity: warning - annotations: - summary: "High latency detected" - description: "P95 latency is {{ $value }}s" - - # Service down - - alert: ServiceDown - expr: up == 0 - for: 1m - labels: - severity: critical - annotations: - summary: "Service is down" -``` - -## Performance Targets / Mục tiêu Hiệu suất - -| Metric | Target | Alert Threshold | -|--------|--------|-----------------| -| Response Time (P95) | < 200ms | > 500ms | -| Response Time (P99) | < 500ms | > 1s | -| Error Rate | < 1% | > 5% | -| Availability | > 99.9% | < 99% | -| Cache Hit Rate | > 80% | < 50% | - -## Related Documentation / Tài liệu Liên quan - -- [System Design](./system-design.md) - Overall architecture -- [Caching Architecture](./caching-architecture.md) - Cache metrics - ---- - -**Last Updated**: 2026-01-07 -**Author**: VelikHo (hongochai10@icloud.com) diff --git a/apps/web-docs/content/docs/en/architecture/security-architecture.md b/apps/web-docs/content/docs/en/architecture/security-architecture.md deleted file mode 100644 index 6f137d72..00000000 --- a/apps/web-docs/content/docs/en/architecture/security-architecture.md +++ /dev/null @@ -1,569 +0,0 @@ -# Security Architecture - -> Comprehensive security architecture for GoodGo platform with zero-trust model, RBAC, and compliance - -## Overview Diagram - -```mermaid -graph TD - Request[Client Request] --> TLS[TLS/HTTPS Layer] - TLS --> RateLimit[Rate Limiting] - RateLimit --> JWT[JWT Validation] - JWT --> RBAC[RBAC Authorization] - RBAC --> ZeroTrust[Zero-Trust Checks] - ZeroTrust --> Service[Service Logic] - - Service --> Encrypt[Data Encryption
AES-256-GCM] - Encrypt --> DB[(Encrypted Data)] - - Service --> Audit[Audit Logging] - Audit --> AuditDB[(Audit Trail
7-year retention)] - - style TLS fill:#d4edda - style JWT fill:#e1f5ff - style Encrypt fill:#f8d7da - style Audit fill:#fff4e1 -``` - -## Architecture Description - -The GoodGo Security Architecture implements defense-in-depth with multiple security layers: - -**Security Principles**: -1. **Zero Trust**: Never trust, always verify -2. **Least Privilege**: Minimum required permissions -3. **Defense in Depth**: Multiple security layers -4. **Audit Everything**: Complete audit trail -5. **Encryption**: Data encrypted at rest and in transit - -**Key Components**: -- JWT Authentication (15min access, 7d refresh) -- RBAC + ABAC Authorization -- Zero-Trust Device Validation -- AES-256-GCM Encryption -- Event Sourcing for Audit Trail -- Compliance (GDPR, SOC2, ISO27001, HIPAA) - -## Authentication Flow - -```mermaid -sequenceDiagram - participant Client - participant API as API Gateway - participant IAM as IAM Service - participant DB as Database - participant Cache as Redis - - Client->>API: Login Request
(email + password) - API->>IAM: Forward Request - IAM->>DB: Verify Credentials - DB-->>IAM: User + Hash - IAM->>IAM: bcrypt.compare()
(cost 12) - - alt Valid Credentials - IAM->>IAM: Generate Tokens
(Access + Refresh) - IAM->>DB: Store Refresh Token
(hashed SHA-256) - IAM->>Cache: Cache Permissions
(5min TTL) - IAM-->>API: Tokens + User - API-->>Client: Set httpOnly Cookies - else Invalid - IAM-->>Client: 401 Unauthorized - end -``` - -**Authentication Details**: - -**1. Password Hashing**: -- Algorithm: bcrypt with cost factor 12 -- Never store plaintext passwords -- Minimum password: 8 chars with complexity rules - -**2. JWT Tokens**: -- Access Token: 15 minutes expiry -- Refresh Token: 7 days expiry -- Algorithm: RS256 (asymmetric signing) -- Payload: userId, roles, permissions - -**3. Token Storage**: -- Access: httpOnly cookie (secure, sameSite) -- Refresh: Database SHA-256 hash -- Rotation: New refresh token on each use - -**4. MFA Support**: -- TOTP (Time-based One-Time Password) -- Backup codes (10 single-use) -- Recovery email verification - -## Authorization Model - -```mermaid -graph TD - User[User] --> Roles[Roles] - User --> DirectPerms[Direct Permissions] - - Roles --> RolePerms[Role Permissions] - - RolePerms --> Check{Permission Check} - DirectPerms --> Check - - Check -->|Granted| Resource[Access Resource] - Check -->|Denied| Reject[403 Forbidden] - - subgraph "Permission Model" - Perm[Permission
resource:action:scope] - end - - style Check fill:#e1f5ff - style Perm fill:#fff4e1 -``` - -**RBAC (Role-Based Access Control)**: - -**1. Role Hierarchy**: -``` -SuperAdmin > OrgAdmin > Manager > User > Guest -``` - -**2. Permission Format**: `resource:action:scope` -- Resource: `users`, `roles`, `permissions` -- Action: `create`, `read`, `update`, `delete` -- Scope: `own`, `org`, `global` - -**Examples**: -- `users:read:own` - Read own user profile -- `users:update:org` - Update users in organization -- `roles:create:global` - Create roles globally - -**3. Permission Caching**: -```typescript -// Cache key: user:{userId}:permissions -// TTL: 5 minutes -// Invalidate on: role change, permission change -``` - -## Zero-Trust Architecture - -```mermaid -graph TD - Request[Request] --> Device[Device Fingerprint] - Device --> IP[IP Address Validation] - IP --> Behavior[Behavioral Analysis] - Behavior --> Session[Session Binding] - - Session -->|Valid| Allow[Allow Request] - Session -->|Suspicious| MFA[Require MFA] - Session -->|Anomaly| Block[Block + Alert] - - style Block fill:#f8d7da - style MFA fill:#fff3cd - style Allow fill:#d4edda -``` - -**Zero-Trust Components**: - -**1. Device Fingerprinting**: -- Browser: User-Agent, Canvas, WebGL -- Screen resolution, timezone, language -- Plugin detection, fonts available -- Hash fingerprint → Store with session - -**2. IP Address Validation**: -- Whitelist known IPs per user -- Alert on new IP + require MFA -- Block suspicious IPs (VPN, Tor) - -**3. Behavioral Analysis**: -- Login patterns (time, location) -- API usage patterns -- Failed auth attempts -- Alert on anomalies - -**4. Session Binding**: -- Bind session to device fingerprint -- Bind session to IP address -- Invalidate on mismatch - -## Data Protection - -**Encryption Strategy**: - -**1. Data at Rest**: -- PII: AES-256-GCM encryption -- Passwords: bcrypt (cost 12) -- Tokens: SHA-256 hash -- Keys: Environment variables + K8s secrets - -**2. Data in Transit**: -- TLS 1.2+ for all communications -- HTTPS enforcement -- Certificate pinning (mobile clients) - -**3. Key Management**: -- Unique key per encryption operation -- 32+ character ENCRYPTION_KEY -- Rotate keys quarterly -- Never hardcode secrets - -## Compliance & Audit - -**Compliance Requirements**: - -**1. GDPR**: -- Right to erasure (soft delete + hard delete after 90 days) -- Data portability (export user data) -- Consent management -- Breach notification (72 hours) - -**2. SOC2**: -- Access controls (RBAC) -- Encryption at rest and in transit -- Audit logging (7-year retention) -- Incident response plan - -```typescript -// Event sourcing for all auth events -{ - eventType: 'auth.login.success', - userId: 'user_123', - timestamp: '2024-01-15T10:30:00Z', - ipAddress: '192.168.1.1', - deviceFingerprint: 'fp_xyz', - metadata: {...} -} -``` - -## System Context - -```mermaid -C4Context - title Security Architecture Context - - Person(user, "User", "End user accessing platform") - Person(admin, "Admin", "System administrator") - Person(attacker, "Attacker", "Potential threat actor") - - System(iam, "IAM Service", "Authentication & Authorization") - - System_Ext(db, "Neon PostgreSQL", "Encrypted user credentials & sessions") - System_Ext(cache, "Redis", "Permission & session cache") - System_Ext(audit, "Audit Service", "Security event logging") - System_Ext(mfa, "MFA Provider", "TOTP verification") - System_Ext(monitoring, "Security Monitoring", "SIEM & alerting") - - Rel(user, iam, "Authenticates", "HTTPS + TLS 1.2+") - Rel(admin, iam, "Manages permissions", "HTTPS + TLS 1.2+") - Rel(attacker, iam, "Blocked by security layers", "") - - Rel(iam, db, "Stores credentials", "PostgreSQL + TLS") - Rel(iam, cache, "Caches permissions", "Redis + TLS") - Rel(iam, audit, "Logs security events", "Kafka") - Rel(iam, mfa, "Verifies MFA", "HTTPS") - Rel(iam, monitoring, "Sends security metrics", "Prometheus + Loki") -``` - -**Context Description**: -- **IAM Service**: Central authentication and authorization -- **Database**: Stores encrypted credentials, sessions, permissions -- **Cache**: Caches permissions and sessions to reduce database load -- **Audit Service**: Receives and stores all security events -- **MFA Provider**: External TOTP verification service (Google Authenticator compatible) -- **Security Monitoring**: SIEM (Security Information and Event Management) and alerting - -## Database Architecture - -```mermaid -erDiagram - User ||--o{ Session : has - User ||--o{ UserRole : has - User ||--o{ UserPermission : has - User ||--o{ MFADevice : has - User ||--o{ LoginHistory : has - User ||--o{ DeviceFingerprint : has - - Role ||--o{ UserRole : assigned_to - Role ||--o{ RolePermission : has - - Permission ||--o{ RolePermission : granted_to - Permission ||--o{ UserPermission : granted_to - - Organization ||--o{ User : contains - Organization ||--o{ Role : defines - - User { - string id PK "CUID" - string email UK "Unique, indexed" - string passwordHash "bcrypt cost 12" - string organizationId FK - boolean mfaEnabled "MFA required?" - datetime lastLoginAt "Tracking" - datetime createdAt "Timestamp" - datetime updatedAt "Timestamp" - datetime deletedAt "Soft delete" - } - - Session { - string id PK "CUID" - string userId FK - string refreshTokenHash "SHA-256" - string deviceFingerprint "Hashed" - string ipAddress "IPv4/IPv6" - string userAgent "Browser info" - datetime expiresAt "7 days TTL" - datetime lastActivityAt "Tracking" - datetime createdAt "Timestamp" - } - - Role { - string id PK "CUID" - string name "role-name" - string organizationId FK - int hierarchy "Priority level" - boolean isSystem "Built-in?" - datetime createdAt "Timestamp" - } - - Permission { - string id PK "CUID" - string resource "users, roles, etc" - string action "create, read, update, delete" - string scope "own, org, global" - datetime createdAt "Timestamp" - } - - MFADevice { - string id PK "CUID" - string userId FK - string type "totp, backup" - string secret "Encrypted TOTP secret" - boolean verified "Verified?" - datetime lastUsedAt "Tracking" - datetime createdAt "Timestamp" - } - - LoginHistory { - string id PK "CUID" - string userId FK - boolean success "Success/Failure" - string ipAddress "IPv4/IPv6" - string deviceFingerprint "Hashed" - string failureReason "If failed" - datetime timestamp "Event time" - } - - DeviceFingerprint { - string id PK "CUID" - string userId FK - string fingerprint "Hashed" - boolean trusted "Auto-approved?" - datetime firstSeenAt "First use" - datetime lastSeenAt "Last use" - } -``` - -**Description**: -- **User**: Stores hashed credentials, MFA settings, organization membership -- **Session**: Stores hashed refresh tokens, device fingerprint, IP tracking -- **Role & Permission**: RBAC hierarchy with system roles and custom roles -- **MFADevice**: TOTP secrets (encrypted), backup codes -- **LoginHistory**: Audit trail for all login attempts (success/failure) -- **DeviceFingerprint**: Trusted device tracking for zero-trust model - -**Database Security**: -- Password hashes: bcrypt with cost factor 12 -- Token hashes: SHA-256 -- MFA secrets: AES-256-GCM encryption -- Soft deletes: `deletedAt` field, hard delete after 90 days (GDPR) -- Indexes: email (unique), userId (foreign keys), timestamps - -## Design Decisions - -### Decision 1: JWT with RS256 (Asymmetric) - -**Context**: Need stateless authentication with ability to verify tokens in multiple services - -**Decision**: Use JWT with RS256 (RSA asymmetric signing) instead of HS256 (HMAC symmetric) - -**Consequences**: -- ✅ **Positive**: - - Services can verify tokens with public key, don't need secret - - Easier key rotation (only distribute new public key) - - Higher security (private key only in IAM service) - - Compliance: Clear audit trail of who signs tokens -- ❌ **Negative**: - - Slightly slower than HS256 (~10-20% slower) - - More complex key management - - Public/private key pair must be carefully protected - -**Alternatives**: HS256 (symmetric), EdDSA, OAuth 2.0 with Opaque Tokens - -### Decision 2: Zero-Trust Model with Device Fingerprinting - -**Context**: Need to protect against credential theft, session hijacking, and unauthorized access - -**Decision**: Implement zero-trust model with device fingerprinting, IP validation, behavioral analysis - -**Consequences**: -- ✅ **Positive**: - - Detect anomalies (new device, new IP, unusual behavior) - - Increased security by detecting and blocking suspicious activities - - Compliance: SOC2, ISO27001 requirements - - User experience: Auto-approve trusted devices -- ❌ **Negative**: - - Higher complexity - - Potential false positives (legitimate users blocked) - - Performance overhead (fingerprint hash, IP check) - - Privacy concerns (tracking devices, IPs) - -**Alternatives**: Basic authentication only, IP whitelist only, MFA required for all - -### Decision 3: Event Sourcing for Audit Trail - -**Context**: Need immutable audit trail for compliance (GDPR, SOC2, HIPAA) and security forensics - -**Decision**: Use event sourcing pattern to store all auth/security events - -**Consequences**: -- ✅ **Positive**: - - Immutable audit trail (cannot modify/delete) - - Complete history of all security events - - Compliance: GDPR (7-year retention), SOC2, HIPAA - - Security forensics: Trace back attacks, breaches - - Replay events to reconstruct state -- ❌ **Negative**: - - High storage cost (retain 7 years) - - Complexity in event schema versioning - - Performance: Event publishing overhead - - Data privacy: Must anonymize PII after retention period - -**Alternatives**: Database audit logs only, External SIEM only, No audit trail - -## Performance Characteristics - -| Metric | Target | Notes | -|--------|--------|-------| -| **Login Time (P95)** | < 500ms | Including bcrypt verification | -| **Login Time (P99)** | < 1s | Peak load | -| **Token Generation (P95)** | < 50ms | JWT sign with RS256 | -| **Token Verification (P95)** | < 10ms | JWT verify with public key | -| **Permission Check (P95)** | < 5ms | From cache (L1 or L2) | -| **Permission Check (Cache Miss)** | < 50ms | Database query | -| **MFA Verification (P95)** | < 100ms | TOTP validation | -| **Session Lookup (P95)** | < 10ms | Redis cache | -| **Password Hash (P95)** | < 200ms | bcrypt cost 12 | -| **Device Fingerprint Hash** | < 5ms | SHA-256 | -| **Failed Login Rate Limit** | 5 attempts / 15min | Per user | -| **Auth Throughput** | 500 req/s | Per IAM instance | - -**Performance Optimizations**: -- **Permission Caching**: L1 (memory) + L2 (Redis), TTL 5 minutes -- **Token Caching**: Cache public key in memory for JWT verification -- **Connection Pooling**: Reuse database connections -- **Async Operations**: Event publishing, audit logging (fire-and-forget) -- **Rate Limiting**: Prevent brute force attacks, reduce load -- **Horizontal Scaling**: Multiple IAM service instances - -## Deployment - -```mermaid -graph TD - subgraph "Security Layer" - LB[Load Balancer
TLS Termination] - WAF[WAF / Firewall
Rate Limiting
DDoS Protection] - end - - subgraph "IAM Service Layer" - IAM1[IAM Service Pod 1
Stateless] - IAM2[IAM Service Pod 2
Stateless] - IAM3[IAM Service Pod 3
Stateless] - end - - subgraph "Data Layer" - DB[(Neon PostgreSQL
Encrypted at Rest)] - Cache[(Redis Cluster
TLS Enabled)] - Vault[Secrets Manager
K8s Secrets] - end - - subgraph "Security Monitoring" - SIEM[SIEM / Security Monitoring] - Alerts[Alerting System] - end - - Client[Clients] --> LB - LB --> WAF - WAF --> IAM1 - WAF --> IAM2 - WAF --> IAM3 - - IAM1 --> DB - IAM1 --> Cache - IAM1 --> Vault - - IAM2 --> DB - IAM2 --> Cache - IAM2 --> Vault - - IAM3 --> DB - IAM3 --> Cache - IAM3 --> Vault - - IAM1 -.->|Security Events| SIEM - IAM2 -.->|Security Events| SIEM - IAM3 -.->|Security Events| SIEM - - SIEM -.->|Alerts| Alerts - - style LB fill:#d4edda - style WAF fill:#fff3cd - style DB fill:#f0e1ff - style Cache fill:#fff4e1 - style Vault fill:#f8d7da - style SIEM fill:#e1f5ff -``` - -**Deployment Strategy**: - -**Security Deployment**: -- **TLS 1.2+ Enforcement**: All connections require TLS -- **Network Policies (K8s)**: Deny all by default, whitelist specific services -- **Pod Security Policies**: Non-root user, read-only filesystem, no privilege escalation -- **Secrets Management**: Kubernetes secrets with encryption at rest -- **Image Scanning**: Trivy/Clair scan before deployment -- **RBAC (K8s)**: Least privilege for service accounts - -**Resource Allocation**: -| Component | CPU | Memory | Replicas | -|-----------|-----|--------|----------| -| **IAM Service** | 500m | 1GB | 3-10 (HPA) | -| **Redis** | 1 core | 2GB | 3 masters + 3 slaves | - -**Security Configuration**: -```yaml -# K8s Network Policy -apiVersion: networking.k8s.io/v1 -kind: NetworkPolicy -metadata: - name: iam-service-policy -spec: - podSelector: - matchLabels: - app: iam-service - policyTypes: - - Ingress - - Egress - ingress: - - from: - - podSelector: - matchLabels: - app: api-gateway - ports: - - protocol: TCP - port: 5000 - egress: - - to: - - podSelector: - matchLabels: - app: postgresql - ports: - - protocol: TCP - port: 5432 -``` diff --git a/apps/web-docs/content/docs/en/architecture/service-communication.md b/apps/web-docs/content/docs/en/architecture/service-communication.md deleted file mode 100644 index 1b615442..00000000 --- a/apps/web-docs/content/docs/en/architecture/service-communication.md +++ /dev/null @@ -1,58 +0,0 @@ -# Service Communication - -## Communication Patterns - -### Synchronous Communication (HTTP/REST) - -Services communicate synchronously via HTTP REST APIs through Traefik API Gateway. - -**Example:** -```typescript -// Web App -> Auth Service -const response = await fetch('http://api.goodgo.vn/api/v1/auth/login', { - method: 'POST', - body: JSON.stringify({ email, password }), -}); -``` - -### Service-to-Service Communication - -Services can communicate directly via internal network: - -```typescript -// Auth Service -> Notification Service (future) -const response = await fetch('http://notification-service:5003/api/v1/notifications', { - method: 'POST', - headers: { 'X-Service-Auth': process.env.INTERNAL_API_KEY }, - body: JSON.stringify({ userId, message }), -}); -``` - -## API Gateway Routing - -Traefik routes requests based on: -- Host header (`api.goodgo.vn`) -- Path prefix (`/api/v1/auth`) - -## Error Handling - -All services follow consistent error response format: - -```json -{ - "success": false, - "error": { - "code": "AUTH_001", - "message": "Invalid credentials", - "details": {} - }, - "timestamp": "2024-01-01T00:00:00.000Z" -} -``` - -## Retry and Circuit Breaker - -Future implementation: -- Exponential backoff for retries -- Circuit breaker pattern for fault tolerance -- Fallback mechanisms diff --git a/apps/web-docs/content/docs/en/architecture/system-design.md b/apps/web-docs/content/docs/en/architecture/system-design.md deleted file mode 100644 index a8a41966..00000000 --- a/apps/web-docs/content/docs/en/architecture/system-design.md +++ /dev/null @@ -1,929 +0,0 @@ -# System Design / Thiết kế Hệ thống - -> **EN**: Comprehensive system architecture for the GoodGo Microservices Platform -> **VI**: Kiến trúc hệ thống toàn diện cho GoodGo Microservices Platform - -## System Overview / Tổng quan Hệ thống - -```mermaid -graph TD - subgraph "Client Layer / Tầng Client" - WebApp[Web Application
Next.js 14+] - MobileApp[Mobile Application
Flutter/React Native] - end - - subgraph "API Gateway Layer / Tầng API Gateway" - Traefik[Traefik Gateway
Load Balancer + Routing] - end - - subgraph "Services Layer / Tầng Services" - IAM[IAM Service
Authentication & Authorization] - Template[Template Service
Example Microservice] - Future1[Future Service 1
TBD] - Future2[Future Service 2
TBD] - end - - subgraph "Data Layer / Tầng Dữ liệu" - PostgreSQL[(PostgreSQL 14+
Primary Database)] - Redis[(Redis 6+
Cache & Sessions)] - end - - subgraph "Observability / Khả năng quan sát" - Prometheus[Prometheus
Metrics Collection] - Grafana[Grafana
Metrics Visualization] - Loki[Loki
Log Aggregation] - Jaeger[Jaeger
Distributed Tracing] - end - - WebApp --> Traefik - MobileApp --> Traefik - - Traefik --> IAM - Traefik --> Template - Traefik --> Future1 - Traefik --> Future2 - - IAM --> PostgreSQL - Template --> PostgreSQL - Future1 --> PostgreSQL - Future2 --> PostgreSQL - - IAM --> Redis - Template --> Redis - Future1 --> Redis - Future2 --> Redis - - IAM -.->|Metrics| Prometheus - Template -.->|Metrics| Prometheus - Prometheus --> Grafana - - IAM -.->|Logs| Loki - Template -.->|Logs| Loki - - IAM -.->|Traces| Jaeger - Template -.->|Traces| Jaeger - - style Traefik fill:#e1f5ff - style PostgreSQL fill:#f0e1ff - style Redis fill:#fff4e1 - style Prometheus fill:#d4edda -``` - -### EN: Architecture Principles - -The GoodGo Microservices Platform follows these core principles: - -1. **Service Independence**: Each microservice: - - Has its own database schema (database per service pattern) - - Can be deployed independently without affecting others - - Owns its data and exposes APIs for data access - - Uses standardized communication patterns - -2. **API Gateway Pattern**: Traefik provides: - - Single entry point for all client requests - - Path-based routing to appropriate services - - Load balancing across service instances - - SSL/TLS termination - - Rate limiting and security headers - -3. **Shared Infrastructure**: Common concerns handled by: - - Shared packages (@goodgo/logger, @goodgo/types, @goodgo/http-client) - - Centralized observability stack - - Distributed caching layer (Redis) - - Common monitoring and alerting - -4. **Infrastructure as Code**: All configurations versioned: - - Docker Compose for local development - - Kubernetes manifests for production - - Traefik dynamic configuration - - Database migrations with Prisma - -5. **Observability First**: Built-in monitoring: - - Prometheus metrics from all services - - Structured logging with correlation IDs - - Distributed tracing with OpenTelemetry - - Health check endpoints (liveness/readiness) - -### VI: Nguyên tắc Kiến trúc - -GoodGo Microservices Platform tuân theo các nguyên tắc cốt lõi sau: - -1. **Độc lập Service**: Mỗi microservice: - - Có schema database riêng (pattern database per service) - - Có thể deploy độc lập mà không ảnh hưởng đến các service khác - - Sở hữu dữ liệu của mình và expose APIs để truy cập dữ liệu - - Sử dụng patterns giao tiếp chuẩn hóa - -2. **Pattern API Gateway**: Traefik cung cấp: - - Điểm vào duy nhất cho tất cả client requests - - Routing dựa trên path tới các service phù hợp - - Load balancing giữa các service instances - - SSL/TLS termination - - Rate limiting và security headers - -3. **Infrastructure Chia sẻ**: Các concerns chung được xử lý bởi: - - Shared packages (@goodgo/logger, @goodgo/types, @goodgo/http-client) - - Stack observability tập trung - - Tầng caching phân tán (Redis) - - Monitoring và alerting chung - -4. **Infrastructure as Code**: Tất cả cấu hình được version: - - Docker Compose cho local development - - Kubernetes manifests cho production - - Traefik dynamic configuration - - Database migrations với Prisma - -5. **Observability First**: Monitoring tích hợp sẵn: - - Prometheus metrics từ tất cả services - - Structured logging với correlation IDs - - Distributed tracing với OpenTelemetry - - Health check endpoints (liveness/readiness) - ---- - -## Detailed Component Architecture / Kiến trúc Component Chi tiết - -### 1. Client Layer / Tầng Client - -```mermaid -graph LR - User((User)) --> WebBrowser[Web Browser] - User --> MobileDevice[Mobile Device] - - WebBrowser --> NextJS[Next.js App
Port 3000] - MobileDevice --> Flutter[Flutter App
iOS/Android] - - NextJS --> APIClient[@goodgo/http-client] - Flutter --> HTTPPackage[HTTP Package] - - APIClient --> Gateway[API Gateway
localhost or api.goodgo.com] - HTTPPackage --> Gateway - - style User fill:#e1f5ff - style Gateway fill:#d4edda -``` - -**EN Components**: -- **Web Application**: Next.js 14+ with App Router - - Server-side rendering (SSR) - - Static site generation (SSG) - - API routes for BFF pattern - - Uses `@goodgo/http-client` for API calls - -- **Mobile Application**: Flutter or React Native - - Cross-platform (iOS + Android) - - Offline-first architecture (future) - - Native HTTP client - -**VI Thành phần**: -- **Web Application**: Next.js 14+ với App Router - - Server-side rendering (SSR) - - Static site generation (SSG) - - API routes cho BFF pattern - - Sử dụng `@goodgo/http-client` cho API calls - -- **Mobile Application**: Flutter hoặc React Native - - Cross-platform (iOS + Android) - - Kiến trúc offline-first (tương lai) - - Native HTTP client - ---- - -### 2. API Gateway Layer / Tầng API Gateway - -```mermaid -graph TD - Client[Client Request] --> Traefik - - subgraph "Traefik API Gateway" - Traefik[Traefik Router] --> Middlewares - - subgraph Middlewares - M1[CORS] - M2[Rate Limiting] - M3[Headers] - M4[Compression] - end - - Middlewares --> Router[Dynamic Router] - Router --> LB[Load Balancer] - end - - LB --> Service1[Service Instance 1] - LB --> Service2[Service Instance 2] - LB --> Service3[Service Instance 3] - - style Traefik fill:#e1f5ff - style Router fill:#fff4e1 - style LB fill:#d4edda -``` - -**EN: Traefik Configuration** - -**Static Configuration** (`infra/traefik/traefik.yml`): -- Entry points (HTTP: 80, HTTPS: 443) -- Docker provider for service discovery -- Certificate resolvers (Let's Encrypt) -- Dashboard configuration (port 8080) - -**Dynamic Configuration** (`infra/traefik/dynamic/`): -- Middlewares (CORS, rate limiting, security headers) -- Routes (defined via Docker labels or YAML files) -- Services (load balancing strategies) - -**Routing Pattern**: -```yaml -http: - routers: - iam-service: - rule: "PathPrefix(`/api/v1/auth`)" - service: iam-service - middlewares: - - cors - - rate-limit - - secure-headers -``` - -**Service Discovery**: Automatic via Docker labels: -```yaml -labels: - - "traefik.enable=true" - - "traefik.http.routers.iam.rule=PathPrefix(`/api/v1/auth`)" - - "traefik.http.services.iam.loadbalancer.server.port=3001" - - "traefik.http.services.iam.loadbalancer.healthcheck.path=/health/live" -``` - -**VI: Cấu hình Traefik** - -**Cấu hình Tĩnh** (`infra/traefik/traefik.yml`): -- Entry points (HTTP: 80, HTTPS: 443) -- Docker provider cho service discovery -- Certificate resolvers (Let's Encrypt) -- Cấu hình dashboard (port 8080) - -**Cấu hình Động** (`infra/traefik/dynamic/`): -- Middlewares (CORS, rate limiting, security headers) -- Routes (định nghĩa qua Docker labels hoặc YAML files) -- Services (chiến lược load balancing) - -**Pattern Routing**: -```yaml -http: - routers: - iam-service: - rule: "PathPrefix(`/api/v1/auth`)" - service: iam-service - middlewares: - - cors - - rate-limit - - secure-headers -``` - -**Service Discovery**: Tự động qua Docker labels: -```yaml -labels: - - "traefik.enable=true" - - "traefik.http.routers.iam.rule=PathPrefix(`/api/v1/auth`)" - - "traefik.http.services.iam.loadbalancer.server.port=3001" - - "traefik.http.services.iam.loadbalancer.healthcheck.path=/health/live" -``` - ---- - -### 3. Services Layer / Tầng Services - -#### Microservice Template Structure / Cấu trúc Template Microservice - -```mermaid -graph TD - subgraph "Microservice (Template Pattern)" - HTTP[HTTP Request] --> MW[Middleware Stack] - - MW --> Routes[Routes] - - subgraph "Feature Module" - Routes --> Controller - Controller --> Service - Service --> Repository - Repository --> Prisma[Prisma ORM] - end - - Service --> Cache[Cache Service] - Cache --> Redis[(Redis)] - - Prisma --> DB[(PostgreSQL)] - - MW --> Metrics[Metrics Middleware] - Metrics --> Prom[Prometheus] - end - - style MW fill:#e1f5ff - style Service fill:#f0e1ff - style Cache fill:#fff4e1 -``` - -**EN: Standard Microservice Structure** - -Each microservice follows this pattern (from `services/_template/`): - -``` -src/ -├── config/ # Configuration with Zod validation -│ ├── app.config.ts -│ ├── database.config.ts -│ └── redis.config.ts -├── core/ # Core utilities (IAM service only) -│ ├── cache/ # Multi-layer caching -│ ├── events/ # Event sourcing -│ └── security/ # Zero-trust validator -├── middlewares/ # Express middlewares -│ ├── correlation.middleware.ts -│ ├── logger.middleware.ts -│ ├── metrics.middleware.ts -│ └── error.middleware.ts -├── modules/ # Feature modules -│ ├── common/ # Shared (BaseRepository) -│ ├── feature/ # Example feature -│ ├── health/ # Health checks -│ └── metrics/ # Prometheus metrics -├── routes/ # Route definitions -│ └── index.ts -└── main.ts # Application entry point -``` - -**Middleware Execution Order**: -1. Correlation ID → 2. Logger → 3. Metrics → 4. CORS → 5. Rate Limit → 6. Body Parser → 7. Routes → 8. Error Handler - -**VI: Cấu trúc Microservice Chuẩn** - -Mỗi microservice tuân theo pattern này (từ `services/_template/`): - -``` -src/ -├── config/ # Configuration với Zod validation -│ ├── app.config.ts -│ ├── database.config.ts -│ └── redis.config.ts -├── core/ # Core utilities (chỉ IAM service) -│ ├── cache/ # Multi-layer caching -│ ├── events/ # Event sourcing -│ └── security/ # Zero-trust validator -├── middlewares/ # Express middlewares -│ ├── correlation.middleware.ts -│ ├── logger.middleware.ts -│ ├── metrics.middleware.ts -│ └── error.middleware.ts -├── modules/ # Feature modules -│ ├── common/ # Shared (BaseRepository) -│ ├── feature/ # Example feature -│ ├── health/ # Health checks -│ └── metrics/ # Prometheus metrics -├── routes/ # Route definitions -│ └── index.ts -└── main.ts # Application entry point -``` - -**Thứ tự Thực thi Middleware**: -1. Correlation ID → 2. Logger → 3. Metrics → 4. CORS → 5. Rate Limit → 6. Body Parser → 7. Routes → 8. Error Handler - ---- - -### 4. Data Layer / Tầng Dữ liệu - -#### Database Architecture / Kiến trúc Database - -```mermaid -graph TD - subgraph "Database Per Service Pattern" - Service1[IAM Service] --> Schema1[(iam_db
30+ tables)] - Service2[Template Service] --> Schema2[(template_db
Example tables)] - Service3[Future Service] --> Schema3[(future_db
TBD)] - end - - subgraph "Shared Infrastructure" - Schema1 -.->|Connection Pool| PG[PostgreSQL 14+
Neon Cloud] - Schema2 -.->|Connection Pool| PG - Schema3 -.->|Connection Pool| PG - end - - subgraph "Cache Layer" - Service1 --> L1_1[L1: Memory
60s TTL] - Service2 --> L1_2[L1: Memory
60s TTL] - - L1_1 --> L2[L2: Redis
5-15min TTL] - L1_2 --> L2 - - L2 -.->|Cache Miss| Schema1 - L2 -.->|Cache Miss| Schema2 - end - - style PG fill:#f0e1ff - style L2 fill:#fff4e1 - style L1_1 fill:#d4edda - style L1_2 fill:#d4edda -``` - -**EN: Data Management** - -**Database per Service**: -- Each service has its own database schema -- Services own their data exclusively -- Cross-service data access via APIs only -- Independent scaling and optimization - -**Multi-Layer Caching** (IAM Service): -``` -Request → L1 (Memory, 60s) → L2 (Redis, 5-15min) → L3 (Database) -``` - -**Cache Hit Rates**: -- L1: ~40-50% (hot data) -- L2: ~80-90% (permissions, user data) -- L3: 10-20% (cache miss, fetch from DB) - -**Database Technology**: -- **Provider**: Neon (Serverless PostgreSQL) -- **Version**: PostgreSQL 14+ -- **ORM**: Prisma -- **Connection Pooling**: Prisma (10 connections default) -- **Migrations**: Prisma Migrate - -**VI: Quản lý Dữ liệu** - -**Database per Service**: -- Mỗi service có schema database riêng -- Services sở hữu dữ liệu độc quyền -- Truy cập dữ liệu cross-service chỉ qua APIs -- Scaling và optimization độc lập - -**Multi-Layer Caching** (IAM Service): -``` -Request → L1 (Memory, 60s) → L2 (Redis, 5-15min) → L3 (Database) -``` - -**Tỷ lệ Cache Hit**: -- L1: ~40-50% (hot data) -- L2: ~80-90% (permissions, user data) -- L3: 10-20% (cache miss, fetch từ DB) - -**Công nghệ Database**: -- **Provider**: Neon (Serverless PostgreSQL) -- **Version**: PostgreSQL 14+ -- **ORM**: Prisma -- **Connection Pooling**: Prisma (10 connections mặc định) -- **Migrations**: Prisma Migrate - ---- - -## Communication Patterns / Patterns Giao tiếp - -### Request Flow / Luồng Request - -```mermaid -sequenceDiagram - participant Client - participant Traefik as Traefik Gateway - participant Service - participant Cache as Redis Cache - participant DB as PostgreSQL - - Client->>Traefik: HTTP Request - Traefik->>Traefik: Apply Middlewares
(CORS, Rate Limit) - Traefik->>Service: Forward Request - Service->>Cache: Check Cache - - alt Cache Hit - Cache-->>Service: Cached Data - Service-->>Traefik: Response (from cache) - else Cache Miss - Cache-->>Service: null - Service->>DB: Query Database - DB-->>Service: Data - Service->>Cache: Store in Cache - Service-->>Traefik: Response (from DB) - end - - Traefik-->>Client: HTTP Response -``` - -**EN: Communication Patterns** - -1. **Synchronous (HTTP/REST)**: - - Request-response pattern - - RESTful API design - - JSON payload format - - Standard HTTP status codes - -2. **Service-to-Service**: - - Internal HTTP calls via `@goodgo/http-client` - - Service authentication with internal API keys - - Circuit breaker pattern for resilience - - Correlation ID propagation - -3. **Service Discovery**: - - **Local**: Docker DNS (`http://service-name:port`) - - **Kubernetes**: Service DNS (`http://service-name.namespace.svc.cluster.local`) - - **Traefik**: Dynamic configuration via labels - -4. **Asynchronous (Future)**: - - Message queues (RabbitMQ/Kafka) - - Event-driven architecture - - Pub/Sub patterns - -**VI: Patterns Giao tiếp** - -1. **Đồng bộ (HTTP/REST)**: - - Pattern request-response - - Thiết kế RESTful API - - Format payload JSON - - HTTP status codes chuẩn - -2. **Service-to-Service**: - - Internal HTTP calls qua `@goodgo/http-client` - - Service authentication với internal API keys - - Circuit breaker pattern cho resilience - - Correlation ID propagation - -3. **Service Discovery**: - - **Local**: Docker DNS (`http://service-name:port`) - - **Kubernetes**: Service DNS (`http://service-name.namespace.svc.cluster.local`) - - **Traefik**: Dynamic configuration qua labels - -4. **Bất đồng bộ (Tương lai)**: - - Message queues (RabbitMQ/Kafka) - - Event-driven architecture - - Pub/Sub patterns - ---- - -## Security Architecture / Kiến trúc Bảo mật - -```mermaid -graph TD - Request[Client Request] --> TLS[TLS/HTTPS] - TLS --> RateLimit[Rate Limiting] - RateLimit --> JWT[JWT Validation] - JWT --> RBAC[RBAC Authorization] - RBAC --> ZeroTrust[Zero-Trust Validation] - ZeroTrust --> Service[Service Logic] - - Service --> Encrypt[Data Encryption
AES-256-GCM] - Encrypt --> DB[(Encrypted Data
at Rest)] - - Service --> Audit[Audit Logging
Event Sourcing] - Audit --> AuditDB[(Audit Trail
7-year retention)] - - style TLS fill:#d4edda - style JWT fill:#e1f5ff - style Encrypt fill:#f8d7da - style Audit fill:#fff4e1 -``` - -**EN: Security Layers** - -1. **Network Security**: - - TLS 1.2+ for all communications - - HTTPS enforcement - - CORS configuration - - Rate limiting (Redis-backed, distributed) - -2. **Authentication**: - - JWT tokens (15min access, 7 days refresh) - - bcrypt password hashing (cost 12) - - Refresh token rotation - - Multi-factor authentication (TOTP) - -3. **Authorization**: - - Role-Based Access Control (RBAC) - - Attribute-Based Access Control (ABAC) - - Permission model: `resource:action:scope` - - Permission caching (5min TTL) - -4. **Data Protection**: - - AES-256-GCM encryption for PII - - Token hashing (SHA-256) - - Secrets management (environment variables, K8s secrets) - -5. **Zero-Trust**: - - Device fingerprinting - - IP address validation - - Behavioral analysis - - Session binding - -6. **Audit & Compliance**: - - Event sourcing for all auth events - - 7-year retention (GDPR, SOC2) - - Correlation ID tracking - - Compliance reporting (GDPR, SOC2, ISO27001, HIPAA) - -**VI: Các Tầng Bảo mật** - -1. **Network Security**: - - TLS 1.2+ cho mọi giao tiếp - - HTTPS enforcement - - Cấu hình CORS - - Rate limiting (Redis-backed, phân tán) - -2. **Authentication**: - - JWT tokens (15min access, 7 ngày refresh) - - bcrypt password hashing (cost 12) - - Refresh token rotation - - Multi-factor authentication (TOTP) - -3. **Authorization**: - - Role-Based Access Control (RBAC) - - Attribute-Based Access Control (ABAC) - - Permission model: `resource:action:scope` - - Permission caching (5min TTL) - -4. **Data Protection**: - - AES-256-GCM encryption cho PII - - Token hashing (SHA-256) - - Secrets management (environment variables, K8s secrets) - -5. **Zero-Trust**: - - Device fingerprinting - - IP address validation - - Behavioral analysis - - Session binding - -6. **Audit & Compliance**: - - Event sourcing cho tất cả auth events - - 7-year retention (GDPR, SOC2) - - Correlation ID tracking - - Compliance reporting (GDPR, SOC2, ISO27001, HIPAA) - ---- - -## Observability Stack / Stack Khả năng quan sát - -```mermaid -graph LR - subgraph "Services" - S1[IAM Service] - S2[Template Service] - end - - subgraph "Metrics" - S1 -->|/metrics| Prom[Prometheus] - S2 -->|/metrics| Prom - Prom --> Grafana[Grafana Dashboard] - end - - subgraph "Logging" - S1 -->|JSON Logs| Loki[Loki] - S2 -->|JSON Logs| Loki - Loki --> GrafanaLog[Grafana Explore] - end - - subgraph "Tracing" - S1 -->|Spans| Jaeger[Jaeger] - S2 -->|Spans| Jaeger - Jaeger --> JaegerUI[Jaeger UI] - end - - style Prom fill:#d4edda - style Loki fill:#fff4e1 - style Jaeger fill:#e1f5ff -``` - -**EN: Three Pillars of Observability** - -1. **Metrics (Prometheus)**: - - HTTP request duration (histogram) - - HTTP request count (counter) - - Active requests (gauge) - - Cache hit/miss ratio - - Database query duration - - Custom business metrics - -2. **Logging (Winston + Loki)**: - - Structured JSON logs - - Correlation IDs in every log - - Request/response logging - - Error stack traces (dev only) - - Log levels: error, warn, info, debug - -3. **Tracing (OpenTelemetry + Jaeger)**: - - Distributed tracing across services - - HTTP request spans - - Database query spans - - Cache operation spans - - End-to-end latency tracking - -**Health Checks**: -- `/health` - Overall health status -- `/health/live` - Liveness probe (K8s) -- `/health/ready` - Readiness probe (K8s, checks DB + Redis) - -**VI: Ba Trụ cột của Khả năng quan sát** - -1. **Metrics (Prometheus)**: - - HTTP request duration (histogram) - - HTTP request count (counter) - - Active requests (gauge) - - Cache hit/miss ratio - - Database query duration - - Custom business metrics - -2. **Logging (Winston + Loki)**: - - Structured JSON logs - - Correlation IDs trong mọi log - - Request/response logging - - Error stack traces (chỉ dev) - - Log levels: error, warn, info, debug - -3. **Tracing (OpenTelemetry + Jaeger)**: - - Distributed tracing giữa các services - - HTTP request spans - - Database query spans - - Cache operation spans - - End-to-end latency tracking - -**Health Checks**: -- `/health` - Overall health status -- `/health/live` - Liveness probe (K8s) -- `/health/ready` - Readiness probe (K8s, kiểm tra DB + Redis) - ---- - -## Deployment Architecture / Kiến trúc Triển khai - -### Local Development / Phát triển Local - -```mermaid -graph TD - subgraph "Docker Compose (deployments/local)" - Traefik[Traefik
Port 80, 8080] - IAM[IAM Service
Port 3001] - Template[Template Service
Port 5000] - PostgreSQL[PostgreSQL
Port 5432] - Redis[Redis
Port 6379] - - Traefik --> IAM - Traefik --> Template - IAM --> PostgreSQL - IAM --> Redis - Template --> PostgreSQL - Template --> Redis - end - - Dev[Developer] -->|localhost| Traefik - Dev -->|:8080| TraefikDash[Traefik Dashboard] - - style Traefik fill:#e1f5ff - style PostgreSQL fill:#f0e1ff - style Redis fill:#fff4e1 -``` - -### Production Deployment / Triển khai Production - -```mermaid -graph TD - subgraph "Kubernetes Cluster" - Ingress[Ingress Controller
Traefik] - - subgraph "IAM Service" - IAM1[IAM Pod 1] - IAM2[IAM Pod 2] - IAM3[IAM Pod 3] - end - - subgraph "Template Service" - T1[Template Pod 1] - T2[Template Pod 2] - end - - Ingress --> IAM1 - Ingress --> IAM2 - Ingress --> IAM3 - Ingress --> T1 - Ingress --> T2 - end - - subgraph "Managed Services" - Neon[(Neon PostgreSQL
Serverless)] - RedisCloud[(Redis Cloud)] - end - - IAM1 --> Neon - IAM2 --> Neon - IAM3 --> Neon - T1 --> Neon - T2 --> Neon - - IAM1 --> RedisCloud - IAM2 --> RedisCloud - T1 --> RedisCloud - T2 --> RedisCloud - - style Ingress fill:#e1f5ff - style Neon fill:#f0e1ff - style RedisCloud fill:#fff4e1 -``` - -**EN: Deployment Environments** - -1. **Local (Docker Compose)**: - - All services run in Docker containers - - Shared network for service communication - - Local PostgreSQL and Redis - - Traefik for routing - - Hot reload for development - -2. **Staging (Kubernetes)**: - - Kubernetes cluster in cloud (GKE/EKS/AKS) - - 2 replicas per service - - Managed PostgreSQL (Neon) - - Managed Redis (Redis Cloud) - - Horizontal Pod Autoscaling (HPA) - -3. **Production (Kubernetes)**: - - Production K8s cluster - - 3+ replicas per service - - Managed databases with backups - - Auto-scaling (HPA + VPA) - - Blue-green deployments - - Rolling updates with health checks - -**VI: Môi trường Triển khai** - -1. **Local (Docker Compose)**: - - Tất cả services chạy trong Docker containers - - Shared network cho service communication - - Local PostgreSQL và Redis - - Traefik cho routing - - Hot reload cho development - -2. **Staging (Kubernetes)**: - - Kubernetes cluster trên cloud (GKE/EKS/AKS) - - 2 replicas mỗi service - - Managed PostgreSQL (Neon) - - Managed Redis (Redis Cloud) - - Horizontal Pod Autoscaling (HPA) - -3. **Production (Kubernetes)**: - - Production K8s cluster - - 3+ replicas mỗi service - - Managed databases với backups - - Auto-scaling (HPA + VPA) - - Blue-green deployments - - Rolling updates với health checks - ---- - -## Performance Characteristics / Đặc điểm Hiệu suất - -**EN: Performance Targets** - -| Metric | Target | Notes | -|--------|--------|-------| -| **API Response Time (P95)** | < 100ms | Excluding cold starts | -| **API Response Time (P99)** | < 200ms | | -| **Throughput** | 1000 req/s | Per service instance | -| **Cache Hit Rate** | > 80% | Redis cache | -| **Database Query Time (P95)** | < 50ms | Simple queries | -| **Memory Usage** | < 512MB | Per service instance | -| **CPU Usage** | < 60% | Under normal load | - -**Optimization Strategies**: -- Multi-layer caching (L1: Memory, L2: Redis) -- Database connection pooling -- Query optimization with indexes -- Horizontal scaling with HPA -- CDN for static assets - -**VI: Mục tiêu Hiệu suất** - -| Metric | Mục tiêu | Ghi chú | -|--------|----------|---------| -| **API Response Time (P95)** | < 100ms | Không bao gồm cold starts | -| **API Response Time (P99)** | < 200ms | | -| **Throughput** | 1000 req/s | Mỗi service instance | -| **Cache Hit Rate** | > 80% | Redis cache | -| **Database Query Time (P95)** | < 50ms | Queries đơn giản | -| **Memory Usage** | < 512MB | Mỗi service instance | -| **CPU Usage** | < 60% | Ở normal load | - -**Chiến lược Tối ưu**: -- Multi-layer caching (L1: Memory, L2: Redis) -- Database connection pooling -- Query optimization với indexes -- Horizontal scaling với HPA -- CDN cho static assets - ---- - -## Related Documentation / Tài liệu Liên quan - -- [Service Communication](./service-communication.md) - EN: Detailed inter-service communication patterns / VI: Patterns giao tiếp giữa services chi tiết -- [IAM Proposal](./iam-proposal.md) - EN: IAM service architecture and features / VI: Kiến trúc và tính năng IAM service -- [Deployment Guide](../guides/deployment.md) - EN: Step-by-step deployment instructions / VI: Hướng dẫn triển khai từng bước -- [Local Development](../guides/local-development.md) - EN: Setting up local environment / VI: Thiết lập môi trường local -- [Project Rules](../skills/project-rules.md) - EN: Project structure and conventions / VI: Cấu trúc dự án và quy ước - ---- - -**Last Updated / Cập nhật lần cuối**: 2026-01-06 -**Authors / Tác giả**: DevOps Team -**Reviewers / Người review**: Architecture Team diff --git a/apps/web-docs/content/docs/en/guides/deployment.md b/apps/web-docs/content/docs/en/guides/deployment.md deleted file mode 100644 index a7210445..00000000 --- a/apps/web-docs/content/docs/en/guides/deployment.md +++ /dev/null @@ -1,234 +0,0 @@ -# Deployment Guide - -> **Note**: This guide covers deployment strategies for GoodGo Microservices Platform across Local, Staging, and Production environments using Kubernetes and Neon PostgreSQL. - -## Table of Contents - -1. [Deployment Architecture](#deployment-architecture) -2. [Prerequisites](#prerequisites) -3. [Database Setup (Neon)](#database-setup-neon) -4. [Local Deployment](#local-deployment) -5. [CI/CD Pipeline](#cicd-pipeline) -6. [Staging Deployment](#staging-deployment) -7. [Production Deployment](#production-deployment) -8. [Scaling & Resilience](#scaling--resilience) -9. [Rollback Procedures](#rollback-procedures) - ---- - -## Deployment Architecture - -```mermaid -graph TD - subgraph "CI/CD Pipeline (GitHub Actions)" - Code[Code Push] --> Test[Run Tests] - Test --> Build[Build Docker Image] - Build --> Registry[Push to Registry] - Registry --> Deploy[Deploy to K8s] - end - - subgraph "Infrastructure (Kubernetes)" - Ingress[Traefik Ingress] --> Service[K8s Service] - Service --> Pods[Application Pods] - Pods --> Secrets[K8s Secrets] - end - - subgraph "External Services" - Pods --> Neon[(Neon PostgreSQL)] - Pods --> Redis[(Redis Cloud)] - end - - Deploy --> Ingress -``` - ---- - -## Prerequisites - -Before deploying, ensure you have: - -* **Tools**: `kubectl`, `helm`, `docker` installed. -* **Access**: - * Kubernetes Cluster (EKS/GKE/DigitalOcean). - * Container Registry (GHCR/DockerHub). - * Neon Console Account. -* **Configuration**: - * `KUBECONFIG` file set up. - * GitHub Secrets configured for CI/CD. - ---- - -## Database Setup (Neon) - -We use **Neon Serverless PostgreSQL** for all environments to leverage branching and auto-scaling. - -1. **Create Project**: Log in to [neon.tech](https://neon.tech) and create a project `goodgo-platform`. -2. **Create Branches**: - * `main` -> For Development/Local. - * `staging` -> For Staging environment. - * `production` -> For Production environment (Protected). -3. **Get Connection Strings**: - * Note the connection string for each branch (Pooler mode recommended). - ---- - -## Local Deployment - -For local development, we use Docker Compose. - -```bash -# 1. Setup Environment -cp deployments/local/env.local.example deployments/local/.env.local -# Edit .env.local with Neon `main` branch connection string - -# 2. Start Infrastructure (Redis, Traefik, etc.) -cd deployments/local -docker-compose up -d - -# 3. Start Services (Hot-reload) -pnpm dev -``` - ---- - -## CI/CD Pipeline - -We use GitHub Actions for automated deployments. - -| Workflow | Trigger | Description | -| :--- | :--- | :--- | -| `ci-check.yml` | Pull Request | Runs unit tests, linting, and build check. | -| `deploy-staging.yml` | Push to `develop` | Build image -> Deploy to Staging Namespace. | -| `deploy-prod.yml` | Release / Tag | Build image -> Deploy to Production Namespace. | - -### Secrets Configuration (GitHub) - -Set these secrets in your repository settings: - -* `NEON_DATABASE_URL_STAGING`: Connection string for staging branch. -* `NEON_DATABASE_URL_PRODUCTION`: Connection string for production branch. -* `KUBECONFIG_STAGING`: Base64 encoded kubeconfig for staging. -* `KUBECONFIG_PRODUCTION`: Base64 encoded kubeconfig for production. -* `DOCKER_REGISTRY_TOKEN`: For pushing images. - ---- - -## Staging Deployment - -Staging mirrors production but uses cost-effective resources. - -### Manual Deployment - -```bash -# 1. Create Secrets -kubectl create secret generic iam-service-secrets \ - --from-literal=database-url='' \ - --from-literal=jwt-secret='' \ - -n staging - -# 2. Apply Manifests -kubectl apply -f deployments/staging/kubernetes/ -n staging - -# 3. Verify -kubectl get pods -n staging -``` - -### via CI/CD - -Push code to `develop` branch. The action will: -1. Run tests. -2. Run `prisma migrate deploy` against Staging DB. -3. Update Kubernetes deployment image. - ---- - -## Production Deployment - -Production uses high-availability configurations. - -### 1. Database Preparation - -* Ensure Production branch in Neon is **protected**. -* Configure **Point-in-Time Recovery (PITR)** window (e.g., 7 days). - -### 2. Manual Deployment Steps - -```bash -# 1. Create Namespace -kubectl create namespace production - -# 2. Create Sealed Secrets (Recommended) or Standard Secrets -kubectl create secret generic iam-service-secrets \ - --from-literal=database-url='' \ - --from-literal=jwt-secret='' \ - --from-literal=jwt-refresh-secret='' \ - -n production - -# 3. Deploy -kubectl apply -f deployments/production/kubernetes/ -n production -``` - -### 3. Verification - -```bash -# Check Rollout Status -kubectl rollout status deployment/iam-service -n production - -# Check Logs -kubectl logs -l app=iam-service -n production -``` - ---- - -## Scaling & Resilience - -### Horizontal Pod Autoscaler (HPA) - -We use HPA to automatically scale pods based on CPU/Memory. - -```yaml -# Example HPA Config -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: iam-service-hpa -spec: - minReplicas: 2 - maxReplicas: 10 - metrics: - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: 70 -``` - -### Zero-Downtime Deployment - -Kubernetes handles this via Rolling Updates. -* **MaxSurge**: 25% (Add new pods before removing old ones). -* **MaxUnavailable**: 0 (Ensure no downtime during update). - ---- - -## Rollback Procedures - -If a deployment fails or introduces a critical bug: - -### Kubernetes Rollback - -```bash -# Undo last deployment -kubectl rollout undo deployment/iam-service -n production - -# Undo to specific revision -kubectl rollout undo deployment/iam-service -n production --to-revision=2 -``` - -### Database Rollback - -Since Neon supports branching and PITR: -1. Go to Neon Console. -2. Restore the `production` branch to a timestamp before the bad migration. -3. **Warning**: This may result in data loss for new transactions. Use with caution. diff --git a/apps/web-docs/content/docs/en/guides/development.md b/apps/web-docs/content/docs/en/guides/development.md deleted file mode 100644 index d06941c8..00000000 --- a/apps/web-docs/content/docs/en/guides/development.md +++ /dev/null @@ -1,211 +0,0 @@ -# Development Guide - -> **Note**: This guide provides comprehensive standards and workflows for contributing to the GoodGo Microservices Platform. - -## Table of Contents - -1. [Project Structure](#project-structure) -2. [Code Standards](#code-standards) -3. [Git Workflow](#git-workflow) -4. [Backend Development](#backend-development) -5. [Testing Strategy](#testing-strategy) -6. [Database Workflow](#database-workflow) -7. [Kubernetes Deployment](#kubernetes-deployment) - ---- - -## Project Structure - -We follow a strict monorepo structure managed by PNPM Workspaces. - -``` -Base/ -├── apps/ # Frontend applications -│ ├── web-client/ # Next.js 14+ (App Router) -│ └── mobile-client/ # Flutter -├── services/ # Backend microservices -│ ├── _template/ # Template for new services -│ ├── iam-service/ # Identity & Access Management -│ └── ... -├── packages/ # Shared libraries -│ ├── logger/ # Structured logging (Winston) -│ ├── types/ # Shared DTOs & Interfaces -│ ├── http-client/ # Internal Service Client -│ └── tracing/ # OpenTelemetry configuration -├── infra/ # Infrastructure-as-Code -│ ├── traefik/ # API Gateway -│ └── databases/ # Database setup scripts -└── docs/ # Documentation (EN & VI) -``` - ---- - -## Code Standards - -### Naming Conventions - -* **Files**: `kebab-case.ts` (e.g., `user.controller.ts`, `app.config.ts`) -* **Classes**: `PascalCase` (e.g., `UserController`, `AuthService`) -* **Functions/Variables**: `camelCase` (e.g., `getUserById`, `isValid`) -* **Constants**: `UPPER_SNAKE_CASE` (e.g., `MAX_RETRIES`, `DEFAULT_TIMEOUT`) -* **Interfaces**: `PascalCase` (e.g., `User`, `CreateUserDto`) - *No 'I' prefix* - -### Bilingual Comments - -For core logic and public APIs, assume both international and Vietnamese developers reading the code. - -```typescript -/** - * EN: Validates user credentials and returns a token - * VI: Xác thực thông tin người dùng và trả về token - */ -async login(dto: LoginDto): Promise { ... } -``` - -### TypeScript Usage - -* **Strict Mode**: Enabled in `tsconfig.json`. No `any` allowed (use `unknown` if needed). -* **DTOs**: Use Zod for runtime validation and type inference. -* **Return Types**: Explicitly declare return types for all public methods. - ---- - -## Git Workflow - -### Branching Strategy - -* `main`: Production-ready code. -* `develop`: Integration branch for next release. -* `feature/xyz`: New features (branch off `develop`). -* `fix/xyz`: Bug fixes (branch off `develop`). -* `hotfix/xyz`: Critical fixes (branch off `main`). - -### Commit Messages - -We follow [Conventional Commits](https://www.conventionalcommits.org/): - -``` -feat(iam): add multi-factor authentication -fix(db): correct unique constraint on email -docs(guide): update development setup -style: format code with prettier -refactor: simplify auth middleware -test: add unit tests for user service -chore: update dependencies -``` - ---- - -## Backend Development - -### Creating a New API Endpoint - -1. **Define DTO** (`modules/user/user.dto.ts`): - ```typescript - export const CreateUserDto = z.object({ - email: z.string().email(), - name: z.string().min(2), - }); - export type CreateUserDto = z.infer; - ``` - -2. **Create Service Method** (`modules/user/user.service.ts`): - * Implement business logic. - * Use `BaseRepository`. - * Throw `HttpError` (e.g., `NotFound`, `BadRequest`). - -3. **Create Controller** (`modules/user/user.controller.ts`): - * Parse body with DTO: `const dto = CreateUserDto.parse(req.body)`. - * Call service. - * Return success response: `res.json({ success: true, data: result })`. - -4. **Register Route** (`modules/user/index.ts`): - * Add to Express router with middlewares. - -### Error Handling - -Always use the custom error classes from `core/errors`: - -```typescript -import { NotFoundError, ConflictError } from '../../core/errors'; - -if (!user) { - throw new NotFoundError('User not found'); -} -``` - ---- - -## Testing Strategy - -### Unit Tests (`*.test.ts`) - -* **Scope**: Individual classes/functions. -* **Mocking**: Mock all external dependencies (DB, other services) using `jest-mock-extended`. -* **Location**: Co-located with source files. -* **Run**: `pnpm test` - -### E2E Tests (`tests/**/*.e2e.ts`) - -* **Scope**: Full API flows (Controller -> Service -> DB). -* **Database**: Use a separate test database (Dockerized). -* **Run**: `pnpm test:e2e` - -### Linting & Formatting - -* **Lint**: `pnpm lint` (ESLint) -* **Format**: `pnpm format` (Prettier) -* **Typecheck**: `pnpm typecheck` (TSC) - ---- - -## Database Workflow - -We use **Prisma** with **Neon PostgreSQL**. - -### Migrations - -1. Modify `prisma/schema.prisma`. -2. Create migration (Dev): - ```bash - ./scripts/db/migrate.sh iam-service dev --name add_user_profile - ``` -3. Apply to Production (CI/CD): - ```bash - ./scripts/db/migrate.sh iam-service deploy - ``` - -### Seed Data - -Populate database with initial data: -```bash -./scripts/db/seed.sh iam-service -``` - -### Visualizing Data - -Use Prisma Studio: -```bash -pnpm --filter @goodgo/iam-service prisma studio -``` - ---- - -## Kubernetes Deployment - -For local Kubernetes testing (Docker Desktop / Minikube): - -```bash -# 1. Build images -docker build -t goodgo/iam-service:latest -f services/iam-service/Dockerfile . - -# 2. Deploy -cd deployments/local/kubernetes -./deploy.sh - -# 3. Verify -kubectl get pods -n iam-local -kubectl logs -f -l app=iam-service -n iam-local -``` - -See [Kubernetes Guide](./kubernetes-local.md) for detailed setup. diff --git a/apps/web-docs/content/docs/en/guides/getting-started.md b/apps/web-docs/content/docs/en/guides/getting-started.md deleted file mode 100644 index 32533211..00000000 --- a/apps/web-docs/content/docs/en/guides/getting-started.md +++ /dev/null @@ -1,214 +0,0 @@ -# Getting Started - -> **Note**: This guide assumes you are setting up the project on macOS or Linux. Windows users should use WSL2. - -## Table of Contents - -1. [Prerequisites](#prerequisites) -2. [Architecture Overview](#architecture-overview) -3. [Project Structure](#project-structure) -4. [Installation & Setup](#installation--setup) -5. [Development Workflow](#development-workflow) -6. [Common Commands](#common-commands) -7. [Troubleshooting](#troubleshooting) - -## Prerequisites - -Before starting, ensure you have the following installed: - -* **Node.js**: v20.0.0 or higher - ```bash - node -v - # v20.10.0 - ``` -* **PNPM**: v8.0.0 or higher (we use pnpm workspaces) - ```bash - pnpm -v - # 8.12.0 - ``` -* **Docker & Docker Compose**: For local infrastructure - ```bash - docker -v - # Docker version 24.0.0 - ``` -* **Git**: For version control -* **Neon Account**: Serverless PostgreSQL (https://neon.tech) - -## Architecture Overview - -GoodGo Platform uses a microservices architecture with a shared infrastructure layer. - -```mermaid -graph TD - Client[Client Apps] --> Traefik[Traefik Gateway] - - Traefik --> IAM[IAM Service] - Traefik --> Template[Template Service] - - IAM --> DB[(Neon PostgreSQL)] - IAM --> Redis[(Redis Cache)] - IAM --> Kafka[Kafka Events] - - style Traefik fill:#e1f5ff - style DB fill:#f0e1ff - style Redis fill:#fff4e1 -``` - -## Project Structure - -The repository follows a monorepo structure: - -``` -Base/ -├── apps/ # Frontend applications -│ ├── web-client/ # Next.js web application -│ └── mobile-client/ # Flutter mobile application -├── services/ # Backend microservices -│ ├── iam-service/ # Authentication & Authorization -│ └── _template/ # Template for new services -├── packages/ # Shared libraries -│ ├── logger/ # Structured logging -│ ├── types/ # Shared TypeScript types -│ └── http-client/ # Internal HTTP client -├── infra/ # Infrastructure configuration -│ ├── traefik/ # API Gateway config -│ └── databases/ # Database setup scripts -├── deployments/ # Deployment configurations -│ ├── local/ # Docker Compose for dev -│ └── k8s/ # Kubernetes manifests -└── docs/ # Documentation -``` - -## Installation & Setup - -### 1. Clone the Repository - -```bash -git clone -cd Base -``` - -### 2. Configure Environment - -Each service and the local infrastructure needs environment variables. We provide templates for these. - -```bash -# Initialize project setup (copies .env.example to .env) -./scripts/setup/init-project.sh -``` - -### 3. Setup Neon Database - -We use Neon (Serverless PostgreSQL) for all environments (Dev, Staging, Prod). - -1. Create a project at [neon.tech](https://neon.tech). -2. Create a branch named `dev` (or use `main`). -3. Get the Connection String from the Neon dashboard. -4. Update `deployments/local/.env.local`: - -```env -DATABASE_URL="postgres://user:pass@ep-xyz.region.neon.tech/neondb" -``` - -### 4. Start Infrastructure - -Start the supporting infrastructure (Redis, Traefik, Observability) using Docker Compose. - -```bash -cd deployments/local -docker-compose up -d -# Expected output: Containers for traefik, redis, kafka created -``` - -### 5. Install Dependencies - -```bash -pnpm install -``` - -### 6. Setup Database Schema - -Push the Prisma schema to your Neon database. - -```bash -# Run migrations for IAM service -pnpm --filter @goodgo/iam-service prisma migrate dev -``` - -### 7. Start Services - -Start all backend services in development mode. - -```bash -pnpm dev -# or start specific service -pnpm --filter @goodgo/iam-service dev -``` - -## Development Workflow - -### Creating a New Service - -1. Copy the template: - ```bash - cp -r services/_template services/my-new-service - ``` -2. Update `package.json` name. -3. Add logic in `src/modules/`. -4. Register in `deployments/local/docker-compose.yml`. - -### Making Changes - -1. Create a new branch: `feature/my-feature`. -2. Implement changes. -3. Run tests: `pnpm test`. -4. Commit with conventional commits: `feat(iam): add login endpoint`. - -## Common Commands - -| Command | Description | -| :--- | :--- | -| `pnpm install` | Install all dependencies | -| `pnpm dev` | Start all services in dev mode | -| `pnpm build` | Build all packages and services | -| `pnpm test` | Run unit tests | -| `pnpm lint` | Lint code | -| `docker-compose up -d` | Start local infra | -| `docker-compose down` | Stop local infra | - -## Troubleshooting - -### Port Conflicts - -**Error**: `Bind for 0.0.0.0:80 failed: port is already allocated` - -**Solution**: Check what's using port 80 (likely another web server) and stop it, or change Traefik ports in `docker-compose.yml`. - -```bash -lsof -i :80 -kill -9 -``` - -### Database Connection Failed - -**Error**: `P1001: Can't reach database server` - -**Solution**: -1. Check your internet connection (Neon is cloud-based). -2. Verify `DATABASE_URL` in `deployments/local/.env.local`. -3. Ensure your IP is allowed in Neon dashboard settings. - -### Service Not Found in Gateway - -**Error**: `404 Not Found` from api.localhost - -**Solution**: -1. Check if service is running. -2. Check Traefik dashboard at http://localhost:8080. -3. Verify `PathPrefix` labels in `docker-compose.yml`. - -## Next Steps - -* [Development Guide](development.md) - Deep dive into coding standards -* [API Documentation](../api/openapi/) - Explore the APIs -* [Architecture](../architecture/system-design.md) - Understand the system design diff --git a/apps/web-docs/content/docs/en/guides/iam-migration.md b/apps/web-docs/content/docs/en/guides/iam-migration.md deleted file mode 100644 index 0ae8ed20..00000000 --- a/apps/web-docs/content/docs/en/guides/iam-migration.md +++ /dev/null @@ -1,209 +0,0 @@ -# Migration Guide: Auth Service → IAM Service - -Tài liệu này hướng dẫn cách migrate từ `auth-service` sang `iam-service`. - -## Tổng Quan - -IAM Service là phiên bản mở rộng của Auth Service với các tính năng bổ sung về Identity Management, Access Management, và Governance & Compliance. Tất cả các API endpoints của Auth Service vẫn được giữ nguyên để đảm bảo backward compatibility. - -## Backward Compatibility - -✅ **Tất cả các endpoints hiện tại vẫn hoạt động bình thường:** - -- `/api/v1/auth/*` - Authentication endpoints -- `/api/v1/rbac/*` - RBAC endpoints -- `/api/v1/mfa/*` - MFA endpoints -- `/api/v1/sessions/*` - Session management endpoints -- `/api/v1/oidc/*` - OIDC endpoints - -Không có breaking changes. Các clients hiện tại có thể tiếp tục sử dụng các endpoints này mà không cần thay đổi. - -## Các Thay Đổi - -### 1. Service Name - -- **Cũ**: `auth-service` -- **Mới**: `iam-service` - -### 2. Package Name - -- **Cũ**: `@goodgo/auth-service` -- **Mới**: `@goodgo/iam-service` - -### 3. Database Schema - -Database schema được mở rộng với các models mới nhưng **không xóa hoặc thay đổi** các models hiện có: - -**Models mới được thêm:** -- `Organization` - Quản lý tổ chức -- `Group` - Quản lý nhóm -- `GroupMember` - Thành viên nhóm -- `GroupPermission` - Quyền nhóm -- `UserProfile` - Profile mở rộng -- `IdentityVerification` - Xác thực danh tính -- `AccessRequest` - Yêu cầu truy cập -- `AccessReview` - Đánh giá truy cập -- `ComplianceReport` - Báo cáo tuân thủ -- `PolicyTemplate` - Template policy -- `RiskScore` - Điểm rủi ro - -**User model được mở rộng:** -- Thêm field `organizationId` (optional) -- Thêm các relations mới (optional) - -### 4. API Endpoints Mới - -#### Identity Management -- `/api/v1/identity/users/*` - User lifecycle management -- `/api/v1/identity/users/:id/profile` - Profile management -- `/api/v1/identity/verification/*` - Identity verification -- `/api/v1/identity/organizations/*` - Organization management -- `/api/v1/identity/groups/*` - Group management - -#### Access Management -- `/api/v1/access/requests/*` - Access requests -- `/api/v1/access/reviews/*` - Access reviews -- `/api/v1/access/analytics/*` - Access analytics - -#### Governance -- `/api/v1/governance/compliance/*` - Compliance reports -- `/api/v1/governance/policies/*` - Policy governance -- `/api/v1/governance/risk/*` - Risk management -- `/api/v1/governance/reports/*` - Reporting dashboard - -### 5. Environment Variables - -Một số biến môi trường mới có thể được thêm trong tương lai cho các tính năng IAM nâng cao (email service, SMS service, file storage), nhưng không ảnh hưởng đến các biến hiện tại. - -### 6. Deployment Configuration - -**Docker Compose:** -- Service name: `auth-service` → `iam-service` -- Container name: `auth-service-local` → `iam-service-local` -- Traefik labels: Thêm routes mới cho `/api/v1/identity/*`, `/api/v1/access/*`, `/api/v1/governance/*` - -**Kubernetes:** -- Deployment name: `auth-service` → `iam-service` -- Service name: `auth-service` → `iam-service` -- ConfigMap: `auth-service-config` → `iam-service-config` -- Secrets: `auth-service-secrets` → `iam-service-secrets` - -## Migration Steps - -### Bước 1: Backup - -```bash -# Backup database -pg_dump $DATABASE_URL > auth-service-backup.sql - -# Backup code -cp -r services/auth-service services/auth-service.backup -``` - -### Bước 2: Database Migration - -```bash -cd services/iam-service - -# Generate Prisma client với schema mới -pnpm prisma generate - -# Tạo migration -pnpm prisma migrate dev --name add_iam_models - -# Verify migration -pnpm prisma studio # Check database structure -``` - -### Bước 3: Update Dependencies - -```bash -# Install dependencies (nếu có package mới) -pnpm install - -# Verify types compile -pnpm typecheck -``` - -### Bước 4: Update Deployment - -**Local Development:** -```bash -cd deployments/local -# Update docker-compose.yml (đã được cập nhật) -docker-compose up -d iam-service -``` - -**Staging/Production:** -- Update Kubernetes manifests -- Update ingress routes -- Update ConfigMaps và Secrets - -### Bước 5: Verify Backward Compatibility - -Test tất cả các endpoints cũ vẫn hoạt động: - -```bash -# Test auth endpoints -curl http://localhost/api/v1/auth/me - -# Test RBAC endpoints -curl http://localhost/api/v1/rbac/permissions - -# Test MFA endpoints -curl http://localhost/api/v1/mfa/devices -``` - -### Bước 6: Gradual Rollout - -1. **Dual Deployment** (Optional): - - Deploy cả `auth-service` và `iam-service` cùng lúc - - Route traffic dần dần sang `iam-service` - - Monitor errors và performance - -2. **Update Clients**: - - Update clients để sử dụng các endpoints mới nếu cần - - Clients không cần update nếu chỉ dùng endpoints cũ - -3. **Deprecate Old Service**: - - Sau khi verify mọi thứ hoạt động tốt, có thể deprecate `auth-service` - - Đảm bảo tất cả clients đã migrate sang `iam-service` - -## Rollback Plan - -Nếu cần rollback: - -1. **Database Rollback**: - ```bash - # Revert Prisma migration - cd services/iam-service - pnpm prisma migrate resolve --rolled-back - - # Hoặc restore từ backup - psql $DATABASE_URL < auth-service-backup.sql - ``` - -2. **Service Rollback**: - ```bash - # Switch back to auth-service in docker-compose - # Hoặc revert Kubernetes deployment - kubectl rollout undo deployment/auth-service - ``` - -## Breaking Changes - -**Không có breaking changes** trong migration này. Tất cả các API endpoints và database models hiện có đều được giữ nguyên. - -## Notes - -- Migration này là **additive** - chỉ thêm các tính năng mới, không xóa hoặc thay đổi tính năng cũ -- Database migrations là **non-destructive** - không xóa hoặc modify dữ liệu hiện có -- Clients có thể tiếp tục sử dụng các endpoints cũ mà không cần thay đổi - -## Support - -Nếu gặp vấn đề trong quá trình migration, vui lòng: -1. Check logs: `docker-compose logs iam-service` -2. Verify database connection -3. Check environment variables -4. Review error messages và stack traces diff --git a/apps/web-docs/content/docs/en/guides/kubernetes-local.md b/apps/web-docs/content/docs/en/guides/kubernetes-local.md deleted file mode 100644 index cbc51c24..00000000 --- a/apps/web-docs/content/docs/en/guides/kubernetes-local.md +++ /dev/null @@ -1,273 +0,0 @@ -# Local Kubernetes Deployment Guide - -> **EN**: Local Kubernetes Deployment Guide -> -> **VI**: Hướng dẫn triển khai Kubernetes cục bộ - -**Last Updated**: 2026-01-05 -**Difficulty**: Intermediate -**Duration**: 30-45 minutes - -## Workflow - -```mermaid -graph TD - Start([Start]) --> EnvPrep[1. Environment Prep] - EnvPrep --> BuildImg[2. Build Docker Image] - BuildImg --> LoadImg[3. Load Image to Cluster
(Kind/Docker Desktop)] - LoadImg --> Secrets[4. Configure Secrets
& Environment] - Secrets --> Deploy[5. Deploy Service
(K8s Manifests)] - Deploy --> Verify[6. Verify Deployment] - Verify --> Test[7. Test Service
(Port Forward & Curl)] - Test --> End([Complete]) - - subgraph "Deployment Details" - Deploy --> |Apply| ConfigMap - Deploy --> |Apply| Deployment - Deploy --> |Apply| Service - end - - style Start fill:#d4edda,stroke:#28a745,stroke-width:2px - style End fill:#d4edda,stroke:#28a745,stroke-width:2px - style EnvPrep fill:#e2e3e5,stroke:#6c757d - style BuildImg fill:#fff3cd,stroke:#ffc107 - style LoadImg fill:#fff3cd,stroke:#ffc107 - style Secrets fill:#f8d7da,stroke:#dc3545 - style Deploy fill:#cce5ff,stroke:#007bff - style Verify fill:#cce5ff,stroke:#007bff -``` - -## Overview - -This guide details how to deploy the IAM Service (or any microservice in the GoodGo ecosystem) to a local Kubernetes cluster using Docker Desktop on macOS. - -> **Important Note**: This guide assumes you are using **Docker Desktop** with **Kubernetes enabled**. If you are using Minikube or plain Kind, the steps might differ slightly (especially the image loading part). - -## 1. Prerequisites - -### Software -- **Docker Desktop 4.0+**: [Download Link](https://www.docker.com/products/docker-desktop/) - - Kubernetes must be enabled in settings. -- **kubectl CLI**: Command-line tool for interacting with K8s. - ```bash - brew install kubectl - ``` -- **kind CLI**: Required to load images into the cluster if using Kind backend explicitely. - ```bash - brew install kind - ``` -- **pnpm 8+**: Project package manager. - ```bash - npm install -g pnpm - ``` - -### Knowledge -- Basic understanding of Kubernetes concepts: **Pod**, **Deployment**, **Service**, **Secret**, **ConfigMap**. -- Familiarity with basic Docker commands (`docker build`, `docker images`). -- Ability to navigate and run commands in the Terminal. - -## 2. Environment Preparation - -### 2.1 Enable Kubernetes in Docker Desktop - -1. Open **Docker Desktop**. -2. Click the **Settings (⚙️)** icon. -3. Select the **Kubernetes** tab. -4. Check **Enable Kubernetes**. -5. Select **Show system containers (advanced)** for easier debugging (optional). -6. Click **Apply & Restart**. -7. Wait 2-3 minutes until the Kubernetes icon in the bottom corner turns green. - -### 2.2 Verify Kubernetes Connection - -Check if `kubectl` is connected to the correct context: - -```bash -# EN: Check current context -# VI: Kiểm tra context hiện tại -kubectl config current-context -# Expected Output: docker-desktop - -# EN: List all nodes in the cluster -# VI: Liệt kê các node trong cluster -kubectl get nodes -# Expected Output: -# NAME STATUS ROLES AGE VERSION -# docker-desktop Ready control-plane 10m v1.29.1 -``` - -## 3. Build Docker Image - -We need to build the service image before deploying. Taking `iam-service` as an example. - -```bash -# EN: Navigate to the kubernetes deployment directory -# VI: Di chuyển đến thư mục deployments/local/kubernetes -cd deployments/local/kubernetes - -# EN: Build the Docker image from the root context -# VI: Build Docker image từ root context -# Note: -f points to service Dockerfile, context is root (../../..) -docker build -t iam-service:local -f ../../../services/iam-service/Dockerfile ../../.. - -# EN: Verify the image was built successfully -# VI: Kiểm tra image đã build thành công chưa -docker images | grep iam-service -# Expected Output: -# iam-service local [IMAGE_ID] [SIZE] [CREATED] -``` - -## 4. Load Image into Cluster - -**⚠️ IMPORTANT**: Docker Desktop can use different backends. If you are running Kubernetes inside Docker Desktop, sometimes it doesn't immediately see local images if using a `kind` node underneath. - -If you are using **Kind** (Kubernetes in Docker) separately or a specific Docker Desktop config, you need to load the image: - -```bash -# EN: Load image into kind cluster (if using kind explicitly) -# VI: Load image vào kind cluster (nếu dùng kind rõ ràng) -kind load docker-image iam-service:local --name desktop - -# EN: Validating image presence (optional, hard with Docker Desktop K8s directly) -# VI: Kiểm tra sự tồn tại của image (tùy chọn) -``` - -> **Tip**: With default Docker Desktop, building the local image (`docker build ...`) is usually automatically available to Docker Desktop's K8s cluster. This loading step is mainly for those using `kind` CLI to create separate clusters. - -## 5. Configure Secrets & ConfigMap - -Kubernetes environments need sensitive environment variables (Secrets) and general configuration (ConfigMap). - -### 5.1 Create Secrets (Manually) - -You can run a script or the following commands to create secrets securely. - -```bash -# EN: Create a dedicated namespace for local testing -# VI: Tạo namespace riêng cho local testing -kubectl create namespace iam-local - -# EN: Generate random secrets and store in Kubernetes -# VI: Tạo secrets ngẫu nhiên và lưu vào Kubernetes -kubectl create secret generic iam-service-secrets \ - --from-literal=DATABASE_URL="postgresql://user:password@host.docker.internal:5432/iam_db?schema=public" \ - --from-literal=JWT_SECRET="$(openssl rand -base64 32)" \ - --from-literal=JWT_REFRESH_SECRET="$(openssl rand -base64 32)" \ - --from-literal=ENCRYPTION_KEY="$(openssl rand -base64 32)" \ - -n iam-local - -# EN: Verify secrets creation -# VI: Kiểm tra secrets đã tạo -kubectl get secrets -n iam-local -``` - -> **Note on `host.docker.internal`**: On macOS, for a K8s pod to connect to PostgreSQL running on the host machine (or another container via port mapping), we use `host.docker.internal`. - -### 5.2 ConfigMap - -The `iam-service-configmap.yaml` file typically contains non-sensitive variables like `NODE_ENV`, `LOG_LEVEL`. - -```bash -# EN: Apply ConfigMap -# VI: Apply ConfigMap -kubectl apply -f iam-service-configmap.yaml -n iam-local -``` - -## 6. Deploy Service - -Now we will deploy the main resources. - -```bash -# EN: Apply Deployment manifest -# VI: Apply file Deployment manifest -kubectl apply -f iam-service-deployment.yaml -n iam-local - -# EN: Apply Service manifest (LoadBalancer/NodePort) -# VI: Apply file Service manifest -kubectl apply -f iam-service-service.yaml -n iam-local -``` - -## 7. Verify & Debug - -After deployment, ensure the Pod is stable (Running). - -### 7.1 Check Pods - -```bash -# EN: Get all pods in the namespace -# VI: Lấy danh sách pod trong namespace -kubectl get pods -n iam-local - -# Expected Output: -# NAME READY STATUS RESTARTS AGE -# iam-service-68994fdc79-gh2mj 1/1 Running 0 30s -``` - -### 7.2 View Detailed Logs - -If Status is not `Running` (e.g., `CrashLoopBackOff` or `ImagePullBackOff`), check logs: - -```bash -# EN: Stream logs from the pod -# VI: Xem logs thời gian thực từ pod -kubectl logs -f -n iam-local -l app=iam-service - -# EN: Describe pod to see events (pull error, mounts, scheduling) -# VI: Xem chi tiết pod để check events (lỗi pull, mount, scheduling) -kubectl describe pod -n iam-local -l app=iam-service -``` - -### 7.3 Common Errors - -1. **ImagePullBackOff**: - - **Reason**: K8s cannot find `iam-service:local` image. - - **Fix**: Ensure `imagePullPolicy: IfNotPresent` or `Never` in local deployment yaml. If using Kind, remember to run `kind load`. - -2. **CrashLoopBackOff**: - - **Reason**: Runtime error, usually unable to connect to Database. - - **Fix**: Check `DATABASE_URL` in Secret. Ensure Postgres is running and accessible from K8s (use `host.docker.internal`). - -3. **Pending Service**: - - **Reason**: `LoadBalancer` type on local sometimes hangs pending IP. - - **Fix**: Not a problem, we can use `port-forward` or access via `localhost`. - -## 8. Test Service Access - -To access the service from your local machine, the safest way is `port-forward`. - -```bash -# EN: Port forward from local port 5002 to service port 80 -# VI: Port forward từ cổng local 5002 tới cổng 80 của service -kubectl port-forward svc/iam-service 5002:80 -n iam-local - -# Terminal will hang and show: Forwarding from 127.0.0.1:5002 -> 8000 -``` - -Open another terminal and test: - -```bash -# EN: Test Health Check -# VI: Test Health Check -curl http://localhost:5002/health/live -# Response: {"status":"ok", ...} - -# EN: View Swagger/OpenAPI docs (if enabled) -# VI: Xem tài liệu Swagger/OpenAPI (nếu bật) -open http://localhost:5002/api-docs -``` - -## 9. Cleanup - -When done, delete resources to free up capacity. - -```bash -# EN: Delete the namespace (removes all resources within) -# VI: Xóa namespace (xóa tất cả resource bên trong) -kubectl delete namespace iam-local -``` - -## References - -- [Kubernetes Documentation](https://kubernetes.io/docs/) -- [Docker Desktop for Mac](https://docs.docker.com/desktop/mac/networking/) -- [Prisma Deployment Guide](https://www.prisma.io/docs/guides/deployment/deployment-guides/deploying-to-kubernetes) diff --git a/apps/web-docs/content/docs/en/guides/local-deployment.md b/apps/web-docs/content/docs/en/guides/local-deployment.md deleted file mode 100644 index aac1905e..00000000 --- a/apps/web-docs/content/docs/en/guides/local-deployment.md +++ /dev/null @@ -1,263 +0,0 @@ -# Local Development Deployment - -This directory contains Docker Compose configuration for running the entire GoodGo platform locally. - -## Quick Start - -```bash -# 1. Setup environment variables -cp env.local.example .env.local -# Edit .env.local with your values (JWT_SECRET, DATABASE_URL, etc.) - -# 2. Start all services -docker-compose up -d - -# 3. Check service status -docker-compose ps - -# 4. View logs -docker-compose logs -f -``` - -## Access Points - -| Service | URL | Description | -|---------|-----|-------------| -| **Traefik Dashboard** | http://localhost:8080 | API Gateway dashboard | -| **Auth Service** | http://localhost/api/v1/auth | Authentication API | -| **Web Admin** | http://admin.localhost | Admin dashboard | -| **Web Client** | http://localhost | Client application | -| **Redis** | localhost:6379 | Cache (direct access) | - -## Services - -### Infrastructure - -- **Traefik** (Port 80, 8080): API Gateway with automatic service discovery -- **Redis** (Port 6379): Shared cache and session store - -### Backend Services - -- **iam-service** (Port 5001): Authentication and user management - - Routes: `/api/v1/auth`, `/api/v1/users` - - Health: http://localhost/api/v1/auth/health - -### Frontend Applications - -- **web-admin** (Port 3000): Admin dashboard (Next.js) -- **web-client** (Port 3001): Client application (Next.js) - -## Environment Configuration - -Environment variables are managed in `.env.local`: - -### Required Variables - -```bash -# Authentication (MUST be same across all services) -JWT_SECRET=your-super-secret-jwt-key-min-32-characters -JWT_REFRESH_SECRET=your-super-secret-refresh-key-min-32-characters - -# Database (Neon PostgreSQL) -DATABASE_URL=postgresql://user:pass@host.neon.tech/db?sslmode=require -``` - -### Optional Variables - -```bash -# Redis -REDIS_HOST=redis -REDIS_PORT=6379 - -# Observability -TRACING_ENABLED=false -JAEGER_ENDPOINT=http://jaeger:14268/api/traces - -# CORS -CORS_ORIGIN=http://localhost:3000,http://localhost:3001 -``` - -## Common Commands - -```bash -# Start all services -docker-compose up -d - -# Start specific service -docker-compose up -d iam-service - -# Stop all services -docker-compose down - -# Stop and remove volumes -docker-compose down -v - -# View logs (all services) -docker-compose logs -f - -# View logs (specific service) -docker-compose logs -f iam-service - -# Restart service -docker-compose restart iam-service - -# Rebuild service -docker-compose up -d --build iam-service - -# Check service status -docker-compose ps - -# Execute command in container -docker-compose exec iam-service sh -``` - -## Adding New Service - -1. **Add service to docker-compose.yml**: - -```yaml -services: - my-new-service: - build: - context: ../.. - dockerfile: services/my-new-service/Dockerfile - container_name: my-new-service-local - env_file: - - .env.local - environment: - - PORT=5002 - - SERVICE_NAME=my-new-service - - DATABASE_URL=${DATABASE_URL} - - REDIS_HOST=${REDIS_HOST} - - JWT_SECRET=${JWT_SECRET} - ports: - - "5002:5002" - depends_on: - redis: - condition: service_healthy - traefik: - condition: service_started - networks: - - microservices-network - restart: unless-stopped - labels: - - "traefik.enable=true" - - "traefik.http.routers.my-new-service.rule=PathPrefix(`/api/v1/my-new-service`)" - - "traefik.http.services.my-new-service.loadbalancer.server.port=5002" -``` - -2. **Start the service**: - -```bash -docker-compose up -d my-new-service -``` - -3. **Access the service**: - - Via Traefik: http://localhost/api/v1/my-new-service - - Direct: http://localhost:5002 - -## Traefik Configuration - -Traefik is configured via: -- **Static config**: `infra/traefik/traefik.yml` -- **Dynamic config**: `infra/traefik/dynamic/` -- **Service discovery**: Docker labels in this file - -Services are automatically discovered by Traefik using Docker labels. No manual route configuration needed. - -## Troubleshooting - -### Port Already in Use - -```bash -# Find process using port -lsof -i :80 -lsof -i :5001 - -# Kill process -kill -9 -``` - -### Service Won't Start - -```bash -# Check logs -docker-compose logs service-name - -# Rebuild without cache -docker-compose build --no-cache service-name -docker-compose up -d service-name -``` - -### Database Connection Issues - -```bash -# Verify DATABASE_URL in .env.local -cat .env.local | grep DATABASE_URL - -# Test connection from service -docker-compose exec iam-service sh -# Inside container: -# curl $DATABASE_URL (won't work, but shows if var is set) -``` - -### Redis Connection Issues - -```bash -# Check Redis is running -docker-compose ps redis - -# Test Redis connection -docker-compose exec redis redis-cli ping -# Should return: PONG -``` - -### Traefik Not Routing - -```bash -# Check Traefik dashboard -open http://localhost:8080 - -# Verify service has correct labels -docker-compose config | grep -A 5 "labels:" - -# Check Traefik logs -docker-compose logs traefik -``` - -## Network Architecture - -``` -┌─────────────────────────────────────────────────────────────┐ -│ Client │ -└───────────────────────────┬─────────────────────────────────┘ - │ - ▼ - ┌───────────────┐ - │ Traefik │ :80, :8080 - │ API Gateway │ - └───────┬───────┘ - │ - ┌───────────────────┼───────────────────┐ - │ │ │ - ▼ ▼ ▼ -┌──────────────┐ ┌──────────────┐ ┌──────────────┐ -│ iam-service │ │ web-admin │ │ web-client │ -│ :5001 │ │ :3000 │ │ :3001 │ -└──────┬───────┘ └──────────────┘ └──────────────┘ - │ - ├─────────────┐ - │ │ - ▼ ▼ -┌──────────┐ ┌─────────────┐ -│ Redis │ │ PostgreSQL │ -│ :6379 │ │ (Neon) │ -└──────────┘ └─────────────┘ -``` - -## Resources - -- [Traefik Configuration](../../infra/traefik/) -- [Service Template](../../services/_template/) -- [Development Guide](../../docs/en/guides/development.md) -- [Neon Database Guide](../../docs/en/guides/neon-database.md) diff --git a/apps/web-docs/content/docs/en/guides/local-development.md b/apps/web-docs/content/docs/en/guides/local-development.md deleted file mode 100644 index d222d149..00000000 --- a/apps/web-docs/content/docs/en/guides/local-development.md +++ /dev/null @@ -1,250 +0,0 @@ -# Local Development Guide - -> **EN**: Local Development Guide -> -> **VI**: Hướng dẫn phát triển cục bộ - -**Last Updated**: 2026-01-05 -**Difficulty**: Intermediate -**Setup Time**: 15-30 minutes - -## Workflow - -```mermaid -graph TD - Start([Start]) --> Prerequisites[1. System Prerequisites] - Prerequisites --> Clone[2. Clone & Install] - Clone --> Env[3. Configure Environment
(Shared & Service-Specific)] - Env --> DB[4. Setup Database
(Migrate & Seed)] - DB --> Run[5. Run Project
(Native/Docker/Hybrid)] - Run --> Dev[6. Development Loop
(Watch Mode)] - Dev --> Test[7. Testing & Verify] - Test --> End([Complete]) - - subgraph "Run Modes" - Run --> Mode1[Mode 1: Native (Fastest)] - Run --> Mode2[Mode 2: Hybrid (Flexible)] - Run --> Mode3[Mode 3: Full Docker (Production-like)] - end - - style Start fill:#d4edda,stroke:#28a745,stroke-width:2px - style End fill:#d4edda,stroke:#28a745,stroke-width:2px - style Env fill:#fff3cd,stroke:#ffc107 - style DB fill:#fff3cd,stroke:#ffc107 - style Run fill:#cce5ff,stroke:#007bff -``` - -## Overview - -This guide provides a detailed process for setting up a development environment for the GoodGo Microservices ecosystem. You will learn how to run services, set up databases, and establish an efficient workflow with hot-reload. - -## 1. Prerequisites - -Before starting, ensure your machine has the following tools installed: - -- **Node.js**: Latest LTS version (v20+). -- **PNPM**: Main project package manager (`npm install -g pnpm`). -- **Docker Desktop**: Required for running infrastructure services (Redis, Local Database). -- **Git**: For source code management. -- **Neon Account** (Optional): If using Neon Database on cloud (recommended for dev). - -## 2. Initial Setup - -### 2.1 Clone and Install Dependencies - -```bash -# EN: Clone the repository -# VI: Clone repository về máy -git clone -cd Base - -# EN: Install dependencies using pnpm -# VI: Cài đặt các thư viện phụ thuộc bằng pnpm -pnpm install -``` - -### 2.2 Quick Init Script (Recommended) - -The project includes an automation script for basic initialization: - -```bash -# EN: Run initialization script -# VI: Chạy script khởi tạo -./scripts/setup/init-project.sh -``` - -> This script will: -> - Install dependencies. -> - Copy example environment files (`.env.example` -> `.env`). -> - Generate Prisma client. - -## 3. Environment Configuration - -The project uses a **Hybrid Environment** strategy to optimize configuration management: - -### 3.1 Shared Configuration - -File: `deployments/local/.env.local` -Contains variables shared across the entire system (JWT, Redis, Logging). - -```bash -# EN: Create shared env file from example -# VI: Tạo file môi trường chung từ file mẫu -cp deployments/local/env.local.example deployments/local/.env.local -``` - -### 3.2 Service-Specific Configuration - -Each service (e.g., `iam-service`) needs its own `.env.local` file containing specific details like Database URL and Port. - -```bash -# EN: Create service-specific env file -# VI: Tạo file môi trường riêng cho service -cp services/iam-service/env.local.example services/iam-service/.env.local -``` - -**Key contents to check in `services/iam-service/.env.local`**: - -```properties -# Database URL (Use Neon Tech or Local Postgres) -DATABASE_URL=postgresql://user:password@host:5432/db_name?sslmode=require - -# Service Port (Must be unique per service) -PORT=5001 - -# Service Name -SERVICE_NAME=iam-service - -# Redis Host (localhost for Native Dev, redis for Docker Dev) -REDIS_HOST=localhost -``` - -## 4. Setup Database - -After configuring `DATABASE_URL`, you need to sync the schema and seed initial data. - -```bash -# EN: Run migrations for iam-service -# VI: Chạy migration cho iam-service -./scripts/db/migrate.sh iam-service dev - -# EN: Seed initial data (optional) -# VI: Tạo dữ liệu mẫu (tùy chọn) -./scripts/db/seed.sh iam-service -``` - -## 5. Run Modes - -You can run the project in 3 ways depending on your needs: - -### Mode 1: Native Development (Recommended for Backend Dev) - -Run directly on the host machine. Fastest speed, fastest hot-reload. - -1. **Start Infrastructure**: - ```bash - # EN: Start Redis and Traefik in Docker background - # VI: Khởi động Redis và Traefik chạy ngầm bằng Docker - cd deployments/local - docker-compose up -d redis traefik - cd ../.. - ``` - -2. **Run Service**: - ```bash - # EN: Start iam-service in watch mode - # VI: Chạy iam-service ở chế độ watch - pnpm --filter @goodgo/iam-service dev - ``` - -### Mode 2: Hybrid Development (Flexible) - -Use when you need to run multiple auxiliary services in Docker but want to dev the main service directly. - -```bash -# EN: Start dependent services in Docker -# VI: Chạy các service phụ thuộc trong Docker -docker-compose -f deployments/local/docker-compose.yml up -d user-service payment-service - -# EN: Run the service you are working on natively -# VI: Chạy service bạn đang làm việc trực tiếp trên máy -pnpm --filter @goodgo/iam-service dev -``` - -### Mode 3: Full Docker (Production Simulation) - -Run the entire system in Docker. Good for Integration Testing but no hot-reload. - -```bash -# EN: Start everything with Docker Compose -# VI: Chạy tất cả bằng Docker Compose -cd deployments/local -docker-compose up -d -``` - -## 6. Access & Verification - -After startup, you can access the following endpoints: - -| Service | URL | Description | -|---------|-----|-------------| -| **API Gateway** | `http://localhost/api/v1` | Main entry point via Traefik | -| **IAM Service** | `http://localhost:5001` | Direct service access | -| **Health Check** | `http://localhost:5001/health` | Service status check | -| **Metrics** | `http://localhost:5001/metrics` | Prometheus metrics | -| **API Docs** | `http://localhost:5001/api-docs` | Swagger UI | - -### Health Check Validation - -```bash -# EN: Check liveness -# VI: Kiểm tra liveness -curl http://localhost:5001/health/live - -# EN: Check readiness -# VI: Kiểm tra readiness -curl http://localhost:5001/health/ready -``` - -## 7. Troubleshooting - -### Port Already In Use - -**Error**: `Error: listen EADDRINUSE: address already in use :::5001` - -**Solution**: -```bash -# EN: Find process using port 5001 -# VI: Tìm process đang chiếm port 5001 -lsof -i :5001 - -# EN: Kill the process -# VI: Tắt process đó -kill -9 -``` - -### Database Connection Error - -**Error**: `P1001: Can't reach database server` - -**Solution**: -- Re-check `DATABASE_URL` variable. -- If using local Postgres Docker, ensure container is running (`docker ps`). -- If using Neon, check internet connection. - -### Module Not Found - -**Error**: Cannot find internal packages (e.g., `@goodgo/logger`). - -**Solution**: -```bash -# EN: Re-install dependencies and build packages -# VI: Cài lại dependencies và build lại packages -pnpm install -pnpm build -``` - -## References - -- [Kubernetes Guide](kubernetes-local.md) - Deploy to Local K8s. -- [Project Architecture](../../docs/en/ARCHITECTURE.en.md) - System architecture overview. diff --git a/apps/web-docs/content/docs/en/guides/mermaid.md b/apps/web-docs/content/docs/en/guides/mermaid.md deleted file mode 100644 index 54d7b7a2..00000000 --- a/apps/web-docs/content/docs/en/guides/mermaid.md +++ /dev/null @@ -1,567 +0,0 @@ -# Mermaid Diagram Guide / Hướng dẫn Sơ đồ Mermaid - -## Overview / Tổng quan - -**EN**: This guide helps you choose the right Mermaid diagram type for your documentation and provides examples for common use cases. - -**VI**: Hướng dẫn này giúp bạn chọn loại sơ đồ Mermaid phù hợp cho tài liệu của bạn và cung cấp ví dụ cho các trường hợp sử dụng phổ biến. - -## Quick Reference / Tham chiếu Nhanh - -| Diagram Type / Loại | Use For / Sử dụng cho | Complexity / Độ phức tạp | -|----------------------|------------------------|---------------------------| -| **Flowchart** | Workflows, decision trees / Quy trình, cây quyết định | ⭐⭐ | -| **Sequence Diagram** | API interactions, request flows / Tương tác API, luồng request | ⭐⭐⭐ | -| **Class Diagram** | Code structure, patterns / Cấu trúc code, patterns | ⭐⭐⭐ | -| **Graph** | System architecture, dependencies / Kiến trúc hệ thống, dependencies | ⭐⭐ | -| **ER Diagram** | Database schema / Schema database | ⭐⭐⭐ | -| **Gantt** | Timeline, project schedule / Timeline, lịch trình dự án | ⭐⭐ | -| **C4 Diagram** | System context, containers / Bối cảnh hệ thống, containers | ⭐⭐⭐⭐ | - ---- - -## 1. Flowcharts / Sơ đồ Luồng - -### When to Use / Khi nào sử dụng - -**EN**: Use flowcharts for: -- Step-by-step guides and workflows (e.g., **"Onboarding process"**) -- Decision trees and conditional logic (e.g., **"Discount calculation"**) -- Process flows with multiple branches (e.g., **"Order fulfillment"**) -- Troubleshooting procedures (e.g., **"Login issue diagnosis"**) - -**VI**: Sử dụng flowcharts cho: -- Hướng dẫn từng bước và quy trình -- Cây quyết định và logic điều kiện -- Luồng quy trình với nhiều nhánh -- Thủ tục khắc phục sự cố - -### Basic Flowchart - -```mermaid -flowchart TD - Start([Start]) --> Input[Get Input] - Input --> Check{Valid?} - Check -->|Yes| Process[Process Data] - Check -->|No| Error[Show Error] - Process --> Output[Return Result] - Output --> End([End]) - Error --> End - - style Start fill:#e1f5ff - style End fill:#d4edda - style Check fill:#fff3cd - style Error fill:#f8d7da -``` - -**Explanation**: A basic flowchart starting with an input, followed by a validation check. If valid, it proceeds to processing and returns a result; otherwise, it shows an error and ends. - -**Code**: -````markdown -```mermaid -flowchart TD - Start([Start]) --> Input[Get Input] - Input --> Check{Valid?} - Check -->|Yes| Process[Process Data] - Check -->|No| Error[Show Error] - Process --> Output[Return Result] - Output --> End([End]) - Error --> End - - style Start fill:#e1f5ff - style End fill:#d4edda - style Check fill:#fff3cd - style Error fill:#f8d7da -``` -```` - -### Advanced Flowchart with Subgraphs - -```mermaid -flowchart LR - A[Client Request] --> B{Auth?} - B -->|No| C[401 Unauthorized] - B -->|Yes| D[Process] - - subgraph Processing["Request Processing"] - D --> E[Validate Input] - E --> F[Execute Logic] - F --> G[Format Response] - end - - G --> H[Return 200 OK] - C --> I[End] - H --> I - - style Processing fill:#f0e1ff -``` - -**Explanation**: A flowchart featuring an authorization check and a dedicated **Subgraph** for detailed request processing steps. - ---- - -## 2. Sequence Diagrams / Sơ đồ Tuần tự - -### When to Use / Khi nào sử dụng - -**EN**: Use sequence diagrams for: -- API communication flows (e.g., **"New order creation API"**) -- Authentication/authorization flows (e.g., **"OAuth2 login flow"**) -- Service-to-service interactions (e.g., **"Microservices sync/async calls"**) -- Request/response cycles (e.g., **"Checkout process"**) - -**VI**: Sử dụng sequence diagrams cho: -- Luồng giao tiếp API -- Luồng xác thực/phân quyền -- Tương tác giữa các service -- Chu kỳ request/response - -### Basic Sequence Diagram - -```mermaid -sequenceDiagram - participant Client - participant API - participant Service - participant DB - - Client->>API: POST /login - API->>Service: authenticate(credentials) - Service->>DB: findUser(email) - DB-->>Service: user - Service->>Service: verifyPassword() - Service-->>API: JWT token - API-->>Client: 200 OK {token} -``` - -**Explanation**: A login sequence illustrating the interaction between the Client, API, Service Layer, and Database, including password verification. - -**Code**: -````markdown -```mermaid -sequenceDiagram - participant Client - participant API - participant Service - participant DB - - Client->>API: POST /login - API->>Service: authenticate(credentials) - Service->>DB: findUser(email) - DB-->>Service: user - Service->>Service: verifyPassword() - Service-->>API: JWT token - API-->>Client: 200 OK {token} -``` -```` - -### Advanced with Alt/Opt/Loop - -```mermaid -sequenceDiagram - participant Client - participant API - participant Cache - participant DB - - Client->>API: GET /users/:id - API->>Cache: get(key) - - alt Cache Hit - Cache-->>API: cached data - API-->>Client: 200 OK (from cache) - else Cache Miss - Cache-->>API: null - API->>DB: SELECT * FROM users - DB-->>API: user data - API->>Cache: set(key, data, ttl) - API-->>Client: 200 OK (from DB) - end -``` - -**Explanation**: A data retrieval sequence using **Alt/Else** blocks to handle both Cache Hit and Cache Miss scenarios. - ---- - -## 3. Class Diagrams / Sơ đồ Class - -### When to Use / Khi nào sử dụng - -**EN**: Use class diagrams for: -- Design patterns and code structure (e.g., **"Singleton Pattern for Logger"**) -- Object-oriented architecture (e.g., **"Domain-Driven Design (DDD) models"**) -- Inheritance and relationships (e.g., **"Repository base and concrete classes"**) -- Module dependencies (e.g., **"Service layer dependencies"**) - -**VI**: Sử dụng class diagrams cho: -- Design patterns và cấu trúc code -- Kiến trúc hướng đối tượng -- Kế thừa và mối quan hệ -- Dependencies giữa các module - -### Basic Class Diagram - -```mermaid -classDiagram - class BaseRepository { - #prisma: PrismaClient - #modelName: string - +findById(id: string) T - +findAll(options: QueryOptions) T[] - +create(data: CreateDto) T - +update(id: string, data: UpdateDto) T - +delete(id: string) void - } - - class UserRepository { - +findByEmail(email: string) User - +findByUsername(username: string) User - } - - class FeatureRepository { - +findByName(name: string) Feature - +toggleStatus(id: string) Feature - } - - BaseRepository <|-- UserRepository - BaseRepository <|-- FeatureRepository -``` - -**Explanation**: A class diagram showing inheritance relationships between a generic `BaseRepository` and specific repository implementations. - ---- - -## 4. Graph Diagrams / Sơ đồ Graph - -### When to Use / Khi nào sử dụng - -**EN**: Use graph diagrams for: -- System architecture overview (e.g., **"Microservices Architecture"**) -- Component relationships (e.g., **"Service-to-database mapping"**) -- Data flow diagrams (e.g., **"Request processing pipeline"**) -- Dependency graphs (e.g., **"Package to package dependencies"**) - -**VI**: Sử dụng graph diagrams cho: -- Tổng quan kiến trúc hệ thống -- Mối quan hệ giữa các thành phần -- Sơ đồ luồng dữ liệu -- Đồ thị dependencies - -### System Architecture - -```mermaid -graph TD - Client[Web Client] --> Gateway[Traefik Gateway] - Gateway --> Auth[Auth Service] - Gateway --> IAM[IAM Service] - Gateway --> User[User Service] - - Auth --> DB[(PostgreSQL)] - IAM --> DB - User --> DB - - User --> Cache - - style Gateway fill:#2980b9,stroke:#333,stroke-width:2px,color:#fff - style Auth fill:#8e44ad,stroke:#333,stroke-width:2px,color:#fff - style IAM fill:#8e44ad,stroke:#333,stroke-width:2px,color:#fff - style User fill:#8e44ad,stroke:#333,stroke-width:2px,color:#fff - style DB fill:#f39c12,stroke:#333,stroke-width:2px,color:#fff - style Cache fill:#f39c12,stroke:#333,stroke-width:2px,color:#fff -``` - -**Explanation**: A high-level view of the system architecture showing how the Gateway routes requests to different services, all connected to a shared Database and Cache. - -### Data Flow - -```mermaid -graph LR - Input[User Input] --> Validation[Zod Validation] - Validation --> Controller[Controller] - Controller --> Service[Service Layer] - Service --> Repository[Repository] - Repository --> Prisma[Prisma ORM] - Prisma --> DB[(Database)] - - Cache --> Redis[(Redis)] - - style Validation fill:#27ae60,stroke:#333,stroke-width:2px,color:#fff - style Service fill:#2980b9,stroke:#333,stroke-width:2px,color:#fff - style Cache fill:#f39c12,stroke:#333,stroke-width:2px,color:#fff -``` - -**Explanation**: A detailed data flow showing Input going through Validation -> Controller -> Service -> Repository -> ORM -> DB, while also interacting with the Cache. - ---- - -## 5. ER Diagrams / Sơ đồ ER - -### When to Use / Khi nào sử dụng - -**EN**: Use ER diagrams for: -- Database schema documentation (e.g., **"User and IAM tables"**) -- Entity relationships (e.g., **"One-to-many between User and Sessions"**) -- Data modeling (e.g., **"Designing new feature entities"**) -- Prisma schema visualization (e.g., **"Mapping code entities to DB tables"**) - -**VI**: Sử dụng ER diagrams cho: -- Tài liệu schema database -- Mối quan hệ giữa các entity -- Mô hình dữ liệu -- Visualization Prisma schema - -### Database Schema - -```mermaid -erDiagram - User ||--o{ Session : has - User ||--o{ RefreshToken : has - User ||--o{ UserRole : has - Role ||--o{ UserRole : has - Role ||--o{ RolePermission : has - Permission ||--o{ RolePermission : has - - User { - string id PK - string email UK - string passwordHash - boolean mfaEnabled - datetime createdAt - datetime updatedAt - } - - Session { - string id PK - string userId FK - string token UK - string deviceId - string ipAddress - datetime expiresAt - } - - Role { - string id PK - string name UK - string description - datetime createdAt - } - - Permission { - string id PK - string code UK - string resource - string action - string scope - } -``` - -**Explanation**: An Entity-Relationship diagram illustrating a typical IAM schema with Users, Sessions, Roles, and Permissions. - ---- - -## 6. Gantt Charts / Biểu đồ Gantt - -### When to Use / Khi nào sử dụng - -**EN**: Use Gantt charts for: -- Project timelines -- Implementation phases -- Migration schedules -- Deployment plans - -**VI**: Sử dụng Gantt charts cho: -- Timeline dự án -- Các giai đoạn triển khai -- Lịch trình migration -- Kế hoạch deployment - -### Project Timeline - -```mermaid -gantt - title Documentation Update Project Timeline - dateFormat YYYY-MM-DD - section Phase 1 - Analysis & Research :done, p1, 2024-01-01, 1d - section Phase 2 - Templates & Strategy :active, p2, 2024-01-02, 0.5d - section Phase 3 - High Priority Docs :p3, 2024-01-03, 2d - section Phase 4 - Remaining Docs :p4, 2024-01-05, 3d - section Phase 5 - QA & Verification :p5, 2024-01-08, 1d -``` - ---- - -## 7. C4 Diagrams / Sơ đồ C4 - -### When to Use / Khi nào sử dụng - -**EN**: Use C4 diagrams for: -- System context (highest level) -- Container diagrams (services, databases) -- Component diagrams (modules within services) -- Code diagrams (classes, functions) - -**VI**: Sử dụng C4 diagrams cho: -- Bối cảnh hệ thống (cấp cao nhất) -- Sơ đồ container (services, databases) -- Sơ đồ component (modules trong services) -- Sơ đồ code (classes, functions) - -### System Context - -```mermaid -C4Context - title System Context Diagram for IAM System - - Person(user, "User", "System user needing authentication") - Person(admin, "Admin", "System administrator") - - System(iam, "IAM System", "Identity and Access Management") - - System_Ext(email, "Email Service", "SendGrid/AWS SES") - System_Ext(oauth, "OAuth Providers", "Google, Facebook, GitHub") - - Rel(user, iam, "Authenticates with", "HTTPS") - Rel(admin, iam, "Manages users and permissions", "HTTPS") - Rel(iam, email, "Sends emails", "SMTP/API") - Rel(iam, oauth, "OAuth login", "OAuth 2.0") -``` - ---- - -## Styling Tips / Mẹo Styling - -### Color Palette / Bảng màu - -```mermaid -graph LR - A["Primary
#e1f5ff"] --> B["Secondary
#fff4e1"] - B --> C["Success
#d4edda"] - C --> D["Warning
#fff3cd"] - D --> E["Error
#f8d7da"] - E --> F["Info
#f0e1ff"] - - style A fill:#2980B9,color:#fff - style B fill:#F39C12,color:#fff - style C fill:#27AE60,color:#fff - style D fill:#E67E22,color:#fff - style E fill:#C0392B,color:#fff - style F fill:#8E44AD,color:#fff -``` - -**Explanation**: The recommended color palette for consistent diagram styling within the project. - -### Style Syntax - -```markdown -style NodeId fill:#colorcode,stroke:#bordercolor,stroke-width:2px -``` - -**Examples**: -```markdown -style Start fill:#e1f5ff -style Error fill:#f8d7da -style Process fill:#d4edda,stroke:#28a745,stroke-width:2px -``` - ---- - -## Best Practices / Best Practices - -### EN: Guidelines - -1. **Keep it Simple**: Don't overcomplicate diagrams -2. **Use Consistent Styling**: Apply color scheme consistently -3. **Add Legends**: Explain symbols and colors when needed -4. **Limit Complexity**: Break into multiple diagrams if too complex -5. **Test Rendering**: Always test diagrams render correctly - -### VI: Hướng dẫn - -1. **Giữ đơn giản**: Đừng làm phức tạp sơ đồ quá mức -2. **Sử dụng Styling nhất quán**: Áp dụng bảng màu nhất quán -3. **Thêm Chú giải**: Giải thích ký hiệu và màu sắc khi cần -4. **Giới hạn Độ phức tạp**: Chia thành nhiều sơ đồ nếu quá phức tạp -5. **Test Rendering**: Luôn test sơ đồ render chính xác - ---- - -## Common Pitfalls / Lỗi Thường gặp - -### ❌ Too Complex - -```mermaid -graph TD - A --> B - A --> C - B --> D - B --> E - C --> F - C --> G - D --> H - E --> H - F --> I - G --> I - H --> J - I --> J -``` - -### ✅ Simplified with Subgraphs - -```mermaid -graph TD - A[Start] --> B[Process A] - B --> C[Process B] - - subgraph "Detailed Processing" - C --> D[Step 1] - D --> E[Step 2] - end - - E --> F[End] -``` - ---- - -## Testing Diagrams / Test Sơ đồ - -**EN**: Always test your diagrams before committing: - -**VI**: Luôn test sơ đồ trước khi commit: - -```bash -# Install mermaid-cli -npm install -g @mermaid-js/mermaid-cli - -# Test render (SVG) -mmdc -i your-doc.md -o test-output.svg - -# Render high-quality PNG with black background -mmdc -i your-doc.md -o test-output.png -b black -t dark -s 3 - -# Render ALL diagrams in a markdown file -mmdc -i your-doc.md - -# Parameter Explanations: -# -i: Input file (.md or .mmd) -# -o: Output file (format based on extension .svg, .png, .pdf) -# -b: Background color (hex code or color names like black, white, transparent) -# -t: Theme (default, forest, dark, neutral) -# -s: Scale (increase resolution, e.g., 3 for sharper images) -``` - ---- - -## Resources / Tài nguyên - -- [Mermaid Official Documentation](https://mermaid.js.org/) - Complete reference -- [Mermaid Live Editor](https://mermaid.live/) - Test diagrams online -- [Mermaid CheatSheet](https://jojozhuang.github.io/tutorial/mermaid-cheat-sheet/) - Quick reference - ---- - -**Last Updated**: 2026-01-05 diff --git a/apps/web-docs/content/docs/en/guides/neon-database.md b/apps/web-docs/content/docs/en/guides/neon-database.md deleted file mode 100644 index 41568825..00000000 --- a/apps/web-docs/content/docs/en/guides/neon-database.md +++ /dev/null @@ -1,215 +0,0 @@ -# Neon Database Guide - -This project uses [Neon PostgreSQL](https://neon.tech) for all environments. - -## Why Neon? - -- ✅ **Serverless**: No infrastructure management -- ✅ **Branching**: Separate databases for dev/staging/prod -- ✅ **Auto-scaling**: Handles traffic spikes automatically -- ✅ **Point-in-time restore**: Easy recovery from mistakes -- ✅ **Free tier**: Perfect for development -- ✅ **Connection pooling**: Built-in PgBouncer support - -## Quick Start - -### 1. Create Neon Account - -1. Sign up at https://neon.tech -2. Create a new project: `goodgo-platform` - -### 2. Create Branches - -In Neon Console, create branches: -- `main` (development) - already exists -- `staging` - create from main -- `production` - create from main - -### 3. Get Connection Strings - -For each branch, copy the connection string: -- Format: `postgresql://user:password@ep-xxx.region.neon.tech/dbname?sslmode=require` -- Add `?pgbouncer=true` for connection pooling (recommended) - -### 4. Configure Local Development - -```bash -# Create .env.local -cp deployments/local/env.local.example deployments/local/.env.local - -# Edit .env.local and add: -DATABASE_URL=postgresql://user:pass@ep-xxx.region.neon.tech/dbname?sslmode=require&pgbouncer=true -``` - -### 5. Run Migrations - -```bash -./scripts/db/migrate.sh iam-service dev -``` - -## Connection String Format - -``` -postgresql://[user]:[password]@[endpoint]/[dbname]?sslmode=require&pgbouncer=true -``` - -**Parameters**: -- `sslmode=require` - Required for Neon -- `pgbouncer=true` - Enable connection pooling (recommended) - -## Environment Configuration - -### Local Development - -File: `deployments/local/.env.local` - -```bash -DATABASE_URL=postgresql://user:pass@ep-xxx.region.neon.tech/dbname?sslmode=require&pgbouncer=true -``` - -### Staging - -Store in GitHub Secrets: `NEON_DATABASE_URL_STAGING` - -Or in Kubernetes: -```bash -kubectl create secret generic iam-service-secrets \ - --from-literal=database-url='postgresql://...' \ - -n staging -``` - -### Production - -Store in GitHub Secrets: `NEON_DATABASE_URL_PRODUCTION` - -Or in Kubernetes: -```bash -kubectl create secret generic iam-service-secrets \ - --from-literal=database-url='postgresql://...' \ - -n production -``` - -## Migrations - -### Development - -```bash -# Create new migration -./scripts/db/migrate.sh iam-service dev - -# This will: -# 1. Create migration file -# 2. Apply to database -# 3. Update Prisma Client -``` - -### Staging/Production - -Migrations run automatically in CI/CD: -- Before deployment to staging -- Before deployment to production (with approval) - -Manual migration: -```bash -./scripts/db/migrate.sh iam-service deploy -``` - -## Backup & Restore - -### Automatic Backups - -Neon provides automatic backups. Access via Neon Console: -- Point-in-time restore -- Branch restore -- Export data - -### Manual Backup - -```bash -./scripts/db/backup.sh iam-service -``` - -This creates a SQL dump file in `backups/` directory. - -### Restore - -```bash -# From Neon Console (recommended) -# Or using psql: -psql $DATABASE_URL < backup.sql -``` - -## Monitoring - -Monitor your databases via Neon Console: -- Connection metrics -- Query performance -- Storage usage -- Branch status - -## Troubleshooting - -### Connection Issues - -1. **Check connection string format** - - Must include `?sslmode=require` - - Verify credentials - -2. **Check IP allowlist** - - Neon may restrict IPs - - Add your IP in Neon Console - -3. **Check branch status** - - Ensure branch is active - - Check for maintenance - -### Migration Issues - -1. **DATABASE_URL not set** - ```bash - export DATABASE_URL="your-neon-url" - ``` - -2. **Schema mismatch** - ```bash - # Reset and re-migrate (dev only!) - pnpm prisma migrate reset - ``` - -3. **Connection timeout** - - Add `?pgbouncer=true` for pooling - - Check Neon console for limits - -### Performance Issues - -1. **Enable connection pooling** - - Add `?pgbouncer=true` to connection string - -2. **Check query performance** - - Use Neon Console query analyzer - - Review slow queries - -3. **Optimize indexes** - - Review Prisma schema - - Add indexes for frequent queries - -## Cost Optimization - -- **Free tier**: 0.5 GB storage, sufficient for dev -- **Staging**: Use free tier or minimal paid plan -- **Production**: Scale based on usage -- **Branching**: Free branches for testing - -## Best Practices - -1. **Always use connection pooling**: `?pgbouncer=true` -2. **Use SSL**: `?sslmode=require` -3. **Separate branches**: One per environment -4. **Regular backups**: Use Neon's automatic backups -5. **Monitor usage**: Check Neon Console regularly - -## Resources - -- [Neon Documentation](https://neon.tech/docs) -- [Neon Console](https://console.neon.tech) -- [Prisma + Neon Guide](https://neon.tech/docs/guides/prisma) diff --git a/apps/web-docs/content/docs/en/guides/observability.md b/apps/web-docs/content/docs/en/guides/observability.md deleted file mode 100644 index d3202a81..00000000 --- a/apps/web-docs/content/docs/en/guides/observability.md +++ /dev/null @@ -1,89 +0,0 @@ -# Observability Stack Guide - -This guide explains how to use the observability stack (Grafana, Prometheus, Loki, Promtail) included in the infrastructure. - -## Architecture Overview - -The stack consists of the following components: - -- **Prometheus**: Collects metrics from services. -- **Loki**: Collects logs. -- **Promtail**: Scrapes logs from Docker containers and pushes them to Loki. -- **Grafana**: Visualization dashboard for metrics (from Prometheus) and logs (from Loki). - -## Getting Started - -### Prerequisites - -- Docker and Docker Compose installed. -- Existing `microservices-network` (created by the main application stack or manually). - -### Starting the Stack - -You can easily start the stack using the provided script: - -```bash -./scripts/observability/start.sh -``` - -Or manually: - -```bash -# Ensure network exists -docker network create microservices-network || true - -cd infra/observability -docker-compose -f docker-compose.observability.yml up -d -``` - -Check if all containers are running: - -```bash -docker ps -``` - -You should see `grafana`, `prometheus`, `loki`, and `promtail`. - -## Accessing Services - -| Service | URL | Credentials (if applicable) | Description | -| :--- | :--- | :--- | :--- | -| **Grafana** | [http://localhost:3001](http://localhost:3001) | `admin` / `admin` | Main dashboard for visualization. | -| **Prometheus** | [http://localhost:9090](http://localhost:9090) | N/A | Raw metrics and target status. | -| **Loki** | [http://localhost:3100](http://localhost:3100) | N/A | Log aggregation API (no UI). | - -## Using Grafana - -1. **Login**: Access [http://localhost:3001](http://localhost:3001) and login with `admin`/`admin`. -2. **Explore Data**: - - Go to **Explore** (compass icon) in the sidebar. - - Select **Loki** from the datasource dropdown to search logs. - - Select **Prometheus** from the datasource dropdown to query metrics. - -### Viewing Logs (Loki) - -In the **Explore** view with **Loki** selected: - -1. Click **Label browser**. -2. Select a label, e.g., `container`. -3. Choose a specific container (e.g., `iam-service` or `traefik`). -4. Click **Show logs**. - -You can also write LogQL queries manually, for example: - -```logql -{container="iam-service"} -``` - -### Viewing Metrics (Prometheus) - -In the **Explore** view with **Prometheus** selected: - -1. Type a metric name in the query field (e.g., `up`, `container_memory_usage_bytes`). -2. Click **Run query**. - -## Configuration - -- **Prometheus**: Rules and targets are configured in `infra/observability/prometheus/prometheus.yml`. -- **Promtail**: Log scraping rules are configured in `infra/observability/promtail/promtail-config.yml`. -- **Grafana**: Datasources and dashboards provisioning are in `infra/observability/grafana/`. diff --git a/apps/web-docs/content/docs/en/guides/troubleshooting.md b/apps/web-docs/content/docs/en/guides/troubleshooting.md deleted file mode 100644 index 8dd969de..00000000 --- a/apps/web-docs/content/docs/en/guides/troubleshooting.md +++ /dev/null @@ -1,218 +0,0 @@ -# Troubleshooting Guide - -> **Note**: This guide focuses on debugging the GoodGo Microservices Platform in a local development environment (Docker Compose). - -## Table of Contents - -1. [General Diagnosis](#general-diagnosis) -2. [Infrastructure Issues](#infrastructure-issues) - - [Database (Neon/PostgreSQL)](#database-neonpostgresql) - - [Redis](#redis) - - [Traefik Gateway](#traefik-gateway) -3. [Service Issues](#service-issues) - - [Service Fails to Start](#service-fails-to-start) - - [Prisma/Database Errors](#prismadatabase-errors) - - [Authentication Errors](#authentication-errors) -4. [Debugging Tools](#debugging-tools) -5. [FAQ](#faq) - ---- - -## General Diagnosis - -When something goes wrong, follow this checklist: - -1. **Check Service Status**: - ```bash - cd deployments/local - docker-compose ps - ``` - *All services should be `Up` or `Running`.* - -2. **Check Logs**: - ```bash - # View logs for a specific service - docker-compose logs -f - - # View last 100 lines for all - docker-compose logs --tail=100 - ``` - -3. **Check Connectivity**: - * Can you reach the Gateway? `curl http://localhost/health` - * Can you reach the Dashboard? http://localhost:8080 - ---- - -## Infrastructure Issues - -### Database (Neon/PostgreSQL) - -**Problem**: `P1001: Can't reach database server` or `Connection timed out` - -* **Cause 1**: Internet connectivity issues (Neon is cloud-based). -* **Cause 2**: Incorrect `DATABASE_URL` in `.env`. -* **Cause 3**: IP address blocked by Neon. - -**Solution**: -1. Verify internet connection: `ping neon.tech`. -2. Check `deployments/local/.env.local`. The URL should look like: - `postgres://user:pass@ep-xyz.aws.neon.tech/neondb` -3. Go to Neon Dashboard -> Settings, ensure "Allow all IPs" or add your current IP. - -**Problem**: `P1003: Database does not exist` - -* **Reason**: You are connecting to the wrong database name. -* **Fix**: Check the end of your connection string (e.g., `/neondb` usually). If you are using a custom DB name, ensure it exists in Neon. - -### Redis - -**Problem**: `Redis connection refused` or `ECONNREFUSED` - -* **Cause**: Redis container is not running or port mapping is wrong. - -**Solution**: -1. Check Redis status: `docker-compose ps redis`. -2. Restart Redis: `docker-compose restart redis`. -3. Check logs: `docker-compose logs redis`. -4. Connection string from services: - * **Inside Docker**: `redis:6379` - * **From Host**: `localhost:6379` - -### Traefik Gateway - -**Problem**: `404 Not Found` when accessing APIs (e.g., `http://localhost/api/v1/auth`) - -* **Cause**: Service is down or Labels are misconfigured. - -**Solution**: -1. Check Traefik Dashboard at http://localhost:8080. - * Look for "HTTP Routers" and "Services". - * If your service is missing, check `docker-compose.yml` labels. -2. Verify `PathPrefix` in labels matches your request. - ```yaml - - "traefik.http.routers.iam.rule=PathPrefix(`/api/v1/auth`)" - ``` -3. Check if the service passed health checks (Health status in dashboard). - -**Problem**: `Bad Gateway` or `Gateway Timeout` - -* **Cause**: Service is crashing or taking too long to respond. -* **Fix**: Check the specific service logs (`docker-compose logs iam-service`). - ---- - -## Service Issues - -### Service Fails to Start - -**Symptom**: Container status is `Exited (1)` or `Restarting`. - -**Debugging**: -1. Check logs immediately: - ```bash - docker-compose logs iam-service - ``` -2. **Common Error**: `Config validation error` - * **Fix**: Check environment variables. Using `./scripts/setup/init-project.sh` ensures `.env` exists. -3. **Common Error**: `PrismaClientInitializationError` - * **Fix**: Database connectivity issue (see Infrastructure section). - -### Prisma/Database Errors - -**Error**: `P2025: Record to update not found` - -* **Fix**: Logic error. Ensure the ID exists before updating. - -**Error**: `P2002: Unique constraint failed` - -* **Fix**: You are trying to insert duplicate data (e.g., same email). - -**Error**: `Migration failed` - -* **Fix**: - 1. Delete `prisma/migrations` folder (only in dev!). - 2. Reset database: `pnpm prisma migrate reset`. - 3. Regenerate client: `pnpm prisma generate`. - -### Authentication Errors - -**Problem**: `401 Unauthorized` despite valid token - -* **Cause 1**: Token expired. -* **Cause 2**: Public key mismatch (Service can't verify token signed by IAM). -* **Cause 3**: Clock skew (Docker time vs Host time). - -**Solution**: -1. Check server logs for JWT verification errors. -2. Restart services to refresh keys. -3. Sync Docker time: restart Docker Desktop. - ---- - -## Debugging Tools - -### 1. Accessing Container Shell - -To inspect files or run commands inside a running container: - -```bash -docker-compose exec iam-service sh -# or /bin/bash -``` - -### 2. Inspecting Database (via Prisma Studio) - -Use Prisma Studio to view/edit data visually: - -```bash -pnpm --filter @goodgo/iam-service prisma studio -# Opens http://localhost:5555 -``` - -### 3. Inspecting Redis - -```bash -docker-compose exec redis redis-cli -> PING -PONG -> KEYS * -1) "user:123:session" -``` - -### 4. Direct API Testing - -Use `curl` or Postman. - -```bash -# Health Check -curl -v http://localhost/api/v1/auth/health/live - -# Login (example) -curl -X POST http://localhost/api/v1/auth/login \ - -H "Content-Type: application/json" \ - -d '{"email":"admin@example.com", "password":"password"}' -``` - ---- - -## FAQ - -**Q: Why is my change not reflecting?** -A: If you changed `.env` or `docker-compose.yml`, you must restart: -```bash -docker-compose down && docker-compose up -d -``` -If you changed code, hot-reloading (nodemon) should pick it up. If not, restart container. - -**Q: How do I reset everything?** -A: Be careful, this deletes all data! -```bash -docker-compose down -v -# -v removes volumes (Redis data, etc.) -``` - -**Q: My computer is slow when running everything.** -A: Docker consumes RAM. -1. Stop unused services (e.g., `future-service`). -2. Increase Docker resource limits in Docker Desktop settings. diff --git a/apps/web-docs/content/docs/en/onboarding/new-developer-guide.md b/apps/web-docs/content/docs/en/onboarding/new-developer-guide.md deleted file mode 100644 index b4769311..00000000 --- a/apps/web-docs/content/docs/en/onboarding/new-developer-guide.md +++ /dev/null @@ -1,89 +0,0 @@ -# New Developer Guide - -Welcome to VelikHo's Microservices Platform project! - -## First Day Checklist - -- [ ] Access to GitHub repository -- [ ] Access to development environment -- [ ] Docker installed and running -- [ ] Node.js and PNPM installed -- [ ] IDE configured (VS Code recommended) -- [ ] Read this guide - -## Setup Your Development Environment - -1. **Clone the repository** - ```bash - git clone - cd Base - ``` - -2. **Run initialization script** - ```bash - ./scripts/setup/init-project.sh - ``` - -3. **Start local infrastructure** - ```bash - cd deployments/local - docker-compose up -d - ``` - -4. **Verify setup** - - Check Traefik: http://localhost:8080 - - Check API: http://localhost/api/v1/health - -## Development Tools - -### Recommended VS Code Extensions - -- ESLint -- Prettier -- Prisma -- Docker -- GitLens - -### Useful Commands - -```bash -# Start all services -./scripts/dev/start-all.sh - -# Start specific service -./scripts/dev/start-service.sh iam-service - -# View logs -./scripts/dev/logs.sh iam-service - -# Run migrations -./scripts/db/migrate.sh iam-service dev - -# Run tests -pnpm test -``` - -## Code Standards - -- **TypeScript**: Strict mode enabled -- **Linting**: ESLint with shared config -- **Formatting**: Prettier -- **Commits**: Conventional Commits format -- **Tests**: Minimum 80% coverage - -## Getting Help - -- Check [Documentation](../guides/) -- Contact maintainer: hongochai10@icloud.com -- Review existing code examples -- Open an issue on GitHub - -## Next Steps - -1. Pick a small task from backlog -2. Create feature branch -3. Implement and test -4. Create pull request -5. Get code review - -Good luck! 🚀 diff --git a/apps/web-docs/content/docs/en/runbooks/incident-response.md b/apps/web-docs/content/docs/en/runbooks/incident-response.md deleted file mode 100644 index 9af6b5b4..00000000 --- a/apps/web-docs/content/docs/en/runbooks/incident-response.md +++ /dev/null @@ -1,65 +0,0 @@ -# Incident Response Runbook - -## Severity Levels - -- **P0 - Critical**: Service completely down, data loss -- **P1 - High**: Major functionality broken, affecting many users -- **P2 - Medium**: Minor functionality broken, workaround available -- **P3 - Low**: Cosmetic issues, no user impact - -## Response Process - -### 1. Acknowledge Incident - -- Identify severity level -- Notify team via Slack/email -- Create incident ticket - -### 2. Investigate - -- Check service health endpoints -- Review logs: `./scripts/dev/logs.sh ` -- Check monitoring dashboards (Grafana) -- Review recent deployments - -### 3. Mitigate - -- Apply quick fixes if available -- Rollback if recent deployment caused issue -- Scale up if resource constraint - -### 4. Resolve - -- Implement permanent fix -- Verify resolution -- Update documentation - -### 5. Post-Mortem - -- Document incident -- Identify root cause -- Create action items -- Update runbooks - -## Common Scenarios - -### Service Down - -1. Check Kubernetes pods: `kubectl get pods -n ` -2. Check pod logs: `kubectl logs -n ` -3. Restart service: `kubectl rollout restart deployment/ -n ` -4. If persistent, rollback: `kubectl rollout undo deployment/ -n ` - -### Database Issues - -1. Check database connectivity -2. Review slow queries -3. Check connection pool -4. Scale database if needed - -### High Error Rate - -1. Check error logs -2. Review recent changes -3. Check external dependencies -4. Implement circuit breaker if needed diff --git a/apps/web-docs/content/docs/en/runbooks/rollback-procedure.md b/apps/web-docs/content/docs/en/runbooks/rollback-procedure.md deleted file mode 100644 index decb798e..00000000 --- a/apps/web-docs/content/docs/en/runbooks/rollback-procedure.md +++ /dev/null @@ -1,71 +0,0 @@ -# Rollback Procedure - -## When to Rollback - -- Service is down or unstable -- Critical bugs introduced -- Performance degradation -- Data corruption risk - -## Rollback Steps - -### Kubernetes Rollback - -1. **Identify current version** - ```bash - kubectl get deployment iam-service -n production -o jsonpath='{.spec.template.spec.containers[0].image}' - ``` - -2. **Rollback to previous version** - ```bash - kubectl rollout undo deployment/iam-service -n production - ``` - -3. **Verify rollback** - ```bash - kubectl rollout status deployment/iam-service -n production - ``` - -4. **Check service health** - ```bash - curl https://api.goodgo.vn/health - ``` - -### Database Migration Rollback - -**Note**: Prisma doesn't support automatic rollback. Create a new migration to reverse changes. - -1. Create reverse migration: - ```bash - cd services/iam-service - pnpm prisma migrate dev --name rollback_previous_change - ``` - -2. Apply reverse migration: - ```bash - pnpm prisma migrate deploy - ``` - -### Docker Compose Rollback - -1. Stop current containers: - ```bash - docker-compose down - ``` - -2. Checkout previous version: - ```bash - git checkout - ``` - -3. Rebuild and start: - ```bash - docker-compose up -d --build - ``` - -## Post-Rollback - -1. Verify functionality -2. Monitor metrics -3. Document rollback reason -4. Plan fix for next deployment diff --git a/apps/web-docs/content/docs/en/skills/README.md b/apps/web-docs/content/docs/en/skills/README.md deleted file mode 100644 index 90533c9b..00000000 --- a/apps/web-docs/content/docs/en/skills/README.md +++ /dev/null @@ -1,222 +0,0 @@ -# Cursor Skills Documentation - -> Comprehensive documentation for all Cursor AI skills used in the GoodGo Microservices Platform - -## Overview - -Cursor Skills are specialized knowledge modules that guide AI assistants in following project-specific patterns, standards, and best practices. This directory contains detailed documentation for each skill, including when to use them, key concepts, common patterns, and real-world examples from the codebase. - -Each skill documentation includes **Mermaid diagrams** that visually illustrate workflows, architectures, patterns, and relationships to enhance understanding of complex concepts. - -## Available Skills - -The GoodGo platform includes **26 Cursor Skills** organized by category: - -### API & Data Layer - -#### [API Design](./api-design.md) -RESTful API design standards for GoodGo microservices. Use when creating new API endpoints, designing DTOs, implementing controllers, writing OpenAPI documentation, or standardizing API responses. - -#### [Database & Prisma](./database-prisma.md) -Prisma ORM and database patterns for GoodGo microservices. Use when working with databases, creating Prisma schemas, writing migrations, implementing repositories, or optimizing queries. - -#### [Error Handling Patterns](./error-handling-patterns.md) -Error handling patterns and conventions for GoodGo microservices. Use when implementing error handling, creating custom error classes, handling exceptions, standardizing error responses, or debugging error scenarios. - -#### [Repository Pattern](./repository-pattern.md) -Repository pattern implementation and best practices for GoodGo microservices. Use when implementing data access layers, extending BaseRepository, writing database queries, handling transactions, or optimizing database operations. - -#### [Caching Patterns](./caching-patterns.md) -Caching strategies and patterns for GoodGo microservices including multi-layer cache, Redis caching, cache key naming, TTL strategies, cache invalidation, and cache-aside patterns. - -### Code Quality & Testing - -#### [Testing Patterns](./testing-patterns.md) -Testing best practices for GoodGo microservices. Use when writing unit tests, integration tests, E2E tests, setting up Jest, mocking dependencies, or debugging test failures. - -#### [Code Comments](./comment-code.md) -Add bilingual code comments in Vietnamese and English for better documentation. Use when adding comments to code, documenting functions/classes, or when user requests Vietnamese/English documentation. - -#### [Middleware Patterns](./middleware-patterns.md) -Express middleware patterns and best practices for GoodGo microservices. Use when creating custom middleware, organizing middleware chains, handling request/response transformation, or implementing cross-cutting concerns. - -#### [Service Layer Patterns](./service-layer-patterns.md) -Service layer organization and patterns for GoodGo microservices. Use when implementing business logic, organizing service classes, using dependency injection, composing services, or separating concerns between controllers and repositories. - -### Infrastructure & Operations - -#### [Kubernetes Deployment](./deployment-kubernetes.md) -Kubernetes deployment patterns for GoodGo microservices. Use when deploying to staging/production, creating K8s manifests, configuring HPA, setting up ingress, or troubleshooting K8s deployments. - -#### [Event-Driven Architecture](./event-driven-architecture.md) -Event-driven architecture patterns with Apache Kafka for GoodGo microservices. Use when implementing async communication, event publishing/consuming, event sourcing, CQRS, or integrating event streams with HTTP endpoints. - -#### [Inter-Service Communication](./inter-service-communication.md) -Inter-service communication patterns for GoodGo microservices including gRPC, GraphQL, service-to-service authentication, protocol selection, and client patterns. Use when implementing service-to-service calls, choosing communication protocols, or building service clients. - -#### [Data Consistency Patterns](./data-consistency-patterns.md) -Data consistency patterns for distributed microservices including Saga patterns, distributed transactions, eventual consistency, compensation, and idempotency. Use when handling distributed transactions, implementing eventual consistency, or managing data synchronization across services. - -#### [API Gateway Advanced](./api-gateway-advanced.md) -Advanced API Gateway patterns for GoodGo microservices including API composition, request/response transformation, service mesh integration, advanced routing, and gateway-level resilience. Use when implementing API aggregation, service composition, or advanced gateway features. - -#### [Configuration Management](./configuration-management.md) -Configuration management patterns for GoodGo microservices including feature flags, dynamic configuration reloading, environment-specific configurations, and secrets management. Use when implementing feature toggles, managing configuration, or handling environment variables. - -#### [Performance Optimization](./performance-optimization.md) -Performance optimization patterns for GoodGo microservices including database query optimization, memory leak detection, profiling, connection pooling, and caching strategies. Use when optimizing performance, profiling applications, or detecting performance bottlenecks. - -#### [Observability & Monitoring](./observability-monitoring.md) -Observability and monitoring patterns for GoodGo microservices. Use when adding metrics, implementing logging, setting up tracing, creating health checks, or debugging production issues. - -#### [Resilience Patterns](./resilience-patterns.md) -Resilience patterns for GoodGo microservices including circuit breaker, retry strategies, timeout handling, and graceful degradation for improved fault tolerance and system reliability. - -#### [Microservices Development Process](./microservices-development-process.md) -Standard development process for creating and maintaining microservices in GoodGo platform. Use when creating new services, migrating services, refactoring services, or planning service implementations. - -#### [CI/CD Advanced Patterns](./cicd-advanced-patterns.md) -Advanced CI/CD patterns for GoodGo microservices including blue-green deployments, canary releases, automated rollback, deployment verification, and progressive delivery. Use when implementing advanced deployment strategies, automated rollbacks, or progressive delivery pipelines. - -#### [Infrastructure as Code](./infrastructure-as-code.md) -Infrastructure as Code patterns for GoodGo platform including Terraform modules, Kubernetes operators, infrastructure testing, GitOps workflows, and multi-environment management. Use when managing infrastructure, implementing GitOps, or creating reusable infrastructure modules. - -#### [API Versioning Strategy](./api-versioning-strategy.md) -API versioning strategies for GoodGo microservices including semantic versioning, backward compatibility patterns, API deprecation, version negotiation, and breaking changes handling. Use when versioning APIs, handling breaking changes, or implementing API deprecation strategies. - -#### [Service Discovery & Registry](./service-discovery-registry.md) -Service discovery and registry patterns for GoodGo microservices including service registry, health check orchestration, load balancing strategies, and service mesh integration. Use when implementing service discovery, managing service health, or integrating with service mesh. - -### Standards & Security - -#### [Project Rules](./project-rules.md) -GoodGo Microservices Platform coding standards and architecture patterns. Use when working with services, apps, packages, or infrastructure. - -#### [Security](./security.md) -Security best practices and patterns for GoodGo microservices platform. Use when implementing authentication, authorization, data protection, input validation, rate limiting, secrets management, or security testing across all services. - -#### [Documentation](./documentation.md) -Guidelines for writing technical documentation in the GoodGo project. Use when creating or updating README files, guides, architecture docs, or API documentation. Ensures bilingual (EN/VI) consistency and proper structure. - -## Quick Reference - -### By Use Case - -| Task | Recommended Skills | -|------|-------------------| -| Create new API endpoint | API Design, Security, Testing Patterns | -| Setup new service | Project Rules, Database & Prisma, Observability | -| Write tests | Testing Patterns, Comment Code | -| Deploy to production | Kubernetes Deployment, Observability, Security | -| Debug production issues | Observability & Monitoring, Security | -| Write documentation | Documentation, Comment Code | -| Implement authentication | Security, API Design, Database & Prisma | -| Optimize database queries | Database & Prisma, Observability | -| Implement event-driven communication | Event-Driven Architecture, Resilience Patterns | -| Implement service-to-service calls | Inter-Service Communication, Security, Resilience Patterns | -| Handle distributed transactions | Data Consistency Patterns, Event-Driven Architecture | -| Optimize performance | Performance Optimization, Observability & Monitoring | -| Manage feature flags | Configuration Management | -| Deploy with zero downtime | CI/CD Advanced Patterns, Deployment Kubernetes | - -### Skill Dependencies - -``` -Project Rules (Foundation) - ├── API Design - ├── Database & Prisma - ├── Security - ├── Microservices Development Process - └── Testing Patterns - └── Comment Code - -Data Layer - ├── Repository Pattern - │ ├── Database & Prisma - │ └── Error Handling Patterns - └── Caching Patterns - └── Repository Pattern - -Application Layer - ├── Service Layer Patterns - │ ├── Repository Pattern - │ ├── Caching Patterns - │ └── Error Handling Patterns - └── Middleware Patterns - └── Error Handling Patterns - -Infrastructure - ├── Event-Driven Architecture - │ ├── Resilience Patterns - │ ├── Error Handling Patterns - │ └── Observability & Monitoring - ├── Inter-Service Communication - │ ├── API Design - │ ├── Security - │ └── Resilience Patterns - ├── Data Consistency Patterns - │ ├── Event-Driven Architecture - │ ├── Database & Prisma - │ └── Error Handling Patterns - ├── API Gateway Advanced - │ ├── Middleware Patterns - │ ├── Security - │ └── API Design - ├── Configuration Management - │ └── Observability & Monitoring - ├── Performance Optimization - │ ├── Database & Prisma - │ ├── Caching Patterns - │ └── Observability & Monitoring - ├── CI/CD Advanced Patterns - │ ├── Deployment Kubernetes - │ └── Testing Patterns - ├── Infrastructure as Code - │ └── Deployment Kubernetes - ├── API Versioning Strategy - │ ├── API Design - │ └── Middleware Patterns - ├── Service Discovery & Registry - │ ├── Deployment Kubernetes - │ └── Observability & Monitoring - └── Resilience Patterns - ├── Error Handling Patterns - └── Service Layer Patterns - -Documentation (Cross-cutting) - └── All skills -Observability (Cross-cutting) - └── All services -Kubernetes Deployment (Infrastructure) - └── All services -``` - -## How to Use Skills - -1. **When starting new task**: Review relevant skills in this directory -2. **During development**: Reference skill documentation for patterns and examples -3. **When stuck**: Check skill docs for best practices and common solutions -4. **During code review**: Use skills as checklist to ensure standards compliance - -## Related Documentation - -- [System Architecture](../architecture/system-design.md) - System design patterns -- [Development Guide](../guides/development.md) - Development workflow -- [Deployment Guide](../guides/deployment.md) - Deployment procedures -- [API Documentation](../api/openapi/) - OpenAPI specifications - -## Contributing - -When updating or adding new skills: - -1. Update skill source file in `.cursor/skills/{skill-name}/SKILL.md` -2. Update corresponding documentation in `docs/en/skills/{skill-name}.md` -3. Update Vietnamese translation in `docs/vi/skills/{skill-name}.md` -4. Update this index file with any changes -5. Ensure bilingual consistency - -## Resources - -- [Cursor Skills Documentation](https://cursor.sh/docs) - Official Cursor documentation -- [Cursor AI](https://cursor.sh) - Cursor IDE homepage -- Project Skills: `.cursor/skills/` - Source skill files \ No newline at end of file diff --git a/apps/web-docs/content/docs/en/skills/api-design.md b/apps/web-docs/content/docs/en/skills/api-design.md deleted file mode 100644 index 1b0f3a89..00000000 --- a/apps/web-docs/content/docs/en/skills/api-design.md +++ /dev/null @@ -1,602 +0,0 @@ ---- -name: api-design -description: RESTful API design standards for GoodGo microservices. Use when creating new API endpoints, designing DTOs, implementing controllers, writing OpenAPI documentation, or standardizing API responses. ---- - -# RESTful API Design Standards - -## When to Use This Skill - -Use this skill when: -- Creating new API endpoints -- Designing request/response DTOs -- Implementing controllers and routes -- Writing OpenAPI/Swagger documentation -- Standardizing error responses -- Implementing pagination, filtering, and sorting -- Setting up API versioning -- Designing resource relationships - -## Core Principles - -1. **Consistency**: All APIs follow the same patterns -2. **Predictability**: Developers can guess endpoint behavior -3. **Simplicity**: Easy to understand and use -4. **Documentation**: Self-documenting through OpenAPI -5. **Error Handling**: Clear, actionable error messages - -## Request/Response Flow - -The following diagram illustrates how a request flows through the API layers: - -```mermaid -sequenceDiagram - participant Client - participant Middleware - participant Controller - participant Service - participant Repository - participant Database - - Client->>Middleware: HTTP Request - Middleware->>Middleware: Authentication
Rate Limiting
Validation - Middleware->>Controller: Validated Request - Controller->>Controller: Parse DTO
Extract Params - Controller->>Service: Business Logic Call - Service->>Repository: Data Access Call - Repository->>Database: Query Execution - Database-->>Repository: Data Result - Repository-->>Service: Entity/Entities - Service-->>Controller: Business Result - Controller->>Controller: Transform to DTO
Format Response - Controller-->>Middleware: Response Object - Middleware-->>Client: HTTP Response -``` - -## API Structure Hierarchy - -The following diagram shows the hierarchical structure of RESTful API endpoints: - -```mermaid -graph TD - A[API Base URL
https://api.goodgo.com] --> B[Version
/v1] - B --> C[Resource Collection
/users] - B --> D[Resource Collection
/orders] - B --> E[Resource Collection
/products] - - C --> F[Resource Instance
/users/:id] - C --> G[Sub-Resource
/users/:id/orders] - - F --> H[GET /users/:id
Retrieve user] - F --> I[PUT /users/:id
Update user] - F --> J[DELETE /users/:id
Delete user] - - C --> K[GET /users
List users] - C --> L[POST /users
Create user] - - G --> M[GET /users/:id/orders
List user orders] - G --> N[POST /users/:id/orders
Create order] - - style A fill:#e1f5ff - style B fill:#b3e5fc - style C fill:#81d4fa - style D fill:#81d4fa - style E fill:#81d4fa - style F fill:#4fc3f7 - style G fill:#4fc3f7 -``` - -## URL Structure - -``` -https://api.goodgo.com/v1/{resource}/{id}/{sub-resource} - -Examples: -GET /v1/users # List users -POST /v1/users # Create user -GET /v1/users/123 # Get user by ID -PUT /v1/users/123 # Update user -DELETE /v1/users/123 # Delete user -GET /v1/users/123/orders # Get user's orders -POST /v1/users/123/orders # Create order for user -``` - -## HTTP Methods - -- **GET**: Retrieve resource(s) - Safe, Idempotent -- **POST**: Create new resource - Not idempotent -- **PUT**: Full update - Idempotent -- **PATCH**: Partial update - Idempotent -- **DELETE**: Remove resource - Idempotent - -## Standard Response Format - -### Success Response - -```typescript -interface SuccessResponse { - success: true; - data: T; - metadata?: { - timestamp: string; - version: string; - requestId: string; - }; - pagination?: { - page: number; - limit: number; - total: number; - totalPages: number; - }; -} - -// Example -{ - "success": true, - "data": { - "id": "123", - "email": "user@example.com", - "name": "John Doe" - }, - "metadata": { - "timestamp": "2024-01-01T00:00:00Z", - "version": "1.0.0", - "requestId": "req_abc123" - } -} -``` - -### Error Response - -```typescript -interface ErrorResponse { - success: false; - error: { - code: string; - message: string; - details?: any; - field?: string; - stack?: string; // Only in development - }; - metadata?: { - timestamp: string; - requestId: string; - }; -} - -// Example -{ - "success": false, - "error": { - "code": "VALIDATION_ERROR", - "message": "Invalid email format", - "field": "email", - "details": { - "provided": "invalid-email", - "expected": "valid email address" - } - } -} -``` - -## Status Codes - -```typescript -// Success codes -200 OK // GET, PUT, PATCH success -201 Created // POST success with resource creation -204 No Content // DELETE success - -// Client errors -400 Bad Request // Invalid request data -401 Unauthorized // Missing/invalid authentication -403 Forbidden // Valid auth but no permission -404 Not Found // Resource doesn't exist -409 Conflict // Resource conflict (duplicate) -422 Unprocessable // Validation errors - -// Server errors -500 Internal Error // Unexpected server error -502 Bad Gateway // External service error -503 Service Unavailable // Service temporarily down -504 Gateway Timeout // External service timeout -``` - -## DTOs (Data Transfer Objects) - -### Request DTOs - -```typescript -// create.dto.ts -import { IsEmail, IsNotEmpty, IsOptional, MinLength } from 'class-validator'; - -export class CreateUserDto { - @IsEmail() - @IsNotEmpty() - email: string; - - @MinLength(6) - @IsNotEmpty() - password: string; - - @IsOptional() - name?: string; -} - -// update.dto.ts -export class UpdateUserDto { - @IsEmail() - @IsOptional() - email?: string; - - @IsOptional() - name?: string; - - @IsOptional() - avatar?: string; -} - -// query.dto.ts -export class QueryUsersDto { - @IsOptional() - @Type(() => Number) - @Min(1) - page?: number = 1; - - @IsOptional() - @Type(() => Number) - @Min(1) - @Max(100) - limit?: number = 10; - - @IsOptional() - search?: string; - - @IsOptional() - @IsIn(['createdAt', 'name', 'email']) - sortBy?: string = 'createdAt'; - - @IsOptional() - @IsIn(['asc', 'desc']) - order?: 'asc' | 'desc' = 'desc'; -} -``` - -### Response DTOs - -```typescript -// user.response.dto.ts -export class UserResponseDto { - id: string; - email: string; - name: string; - avatar?: string; - role: string; - createdAt: Date; - updatedAt: Date; - - // Hide sensitive data - static fromEntity(user: User): UserResponseDto { - const { password, ...data } = user; - return data; - } -} - -// paginated.response.dto.ts -export class PaginatedResponseDto { - data: T[]; - pagination: { - page: number; - limit: number; - total: number; - totalPages: number; - }; -} -``` - -## Controller Implementation - -```typescript -// user.controller.ts -@Controller('users') -@ApiTags('Users') -export class UserController { - constructor(private readonly userService: UserService) {} - - @Get() - @ApiOperation({ summary: 'List users' }) - @ApiQuery({ type: QueryUsersDto }) - @ApiResponse({ status: 200, type: PaginatedResponseDto }) - async list(@Query() query: QueryUsersDto): Promise { - const { data, total } = await this.userService.findAll(query); - - return { - success: true, - data: data.map(UserResponseDto.fromEntity), - pagination: { - page: query.page, - limit: query.limit, - total, - totalPages: Math.ceil(total / query.limit) - } - }; - } - - @Get(':id') - @ApiOperation({ summary: 'Get user by ID' }) - @ApiParam({ name: 'id', type: 'string' }) - @ApiResponse({ status: 200, type: UserResponseDto }) - @ApiResponse({ status: 404, description: 'User not found' }) - async getById(@Param('id') id: string): Promise { - const user = await this.userService.findById(id); - - if (!user) { - throw new HttpException( - { - success: false, - error: { - code: 'USER_NOT_FOUND', - message: `User with ID ${id} not found` - } - }, - HttpStatus.NOT_FOUND - ); - } - - return { - success: true, - data: UserResponseDto.fromEntity(user) - }; - } - - @Post() - @ApiOperation({ summary: 'Create user' }) - @ApiBody({ type: CreateUserDto }) - @ApiResponse({ status: 201, type: UserResponseDto }) - async create(@Body() dto: CreateUserDto): Promise { - const user = await this.userService.create(dto); - - return { - success: true, - data: UserResponseDto.fromEntity(user) - }; - } - - @Put(':id') - @ApiOperation({ summary: 'Update user' }) - @UseGuards(AuthGuard) - async update( - @Param('id') id: string, - @Body() dto: UpdateUserDto - ): Promise { - const user = await this.userService.update(id, dto); - - return { - success: true, - data: UserResponseDto.fromEntity(user) - }; - } - - @Delete(':id') - @ApiOperation({ summary: 'Delete user' }) - @UseGuards(AuthGuard, RolesGuard) - @Roles('admin') - async delete(@Param('id') id: string): Promise { - await this.userService.delete(id); - - return { - success: true, - data: { deleted: true } - }; - } -} -``` - -## OpenAPI/Swagger Documentation - -```yaml -# openapi/user-service.yaml -openapi: 3.0.0 -info: - title: User Service API - version: 1.0.0 - description: User management endpoints -servers: - - url: https://api.goodgo.com/v1 -paths: - /users: - get: - summary: List users - parameters: - - name: page - in: query - schema: - type: integer - default: 1 - - name: limit - in: query - schema: - type: integer - default: 10 - responses: - '200': - description: List of users - content: - application/json: - schema: - $ref: '#/components/schemas/UserListResponse' - post: - summary: Create user - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/CreateUserRequest' - responses: - '201': - description: User created - '400': - description: Validation error -``` - -## Pagination Pattern - -```typescript -// pagination.service.ts -export class PaginationService { - paginate( - query: any, - options: { - page: number; - limit: number; - sortBy?: string; - order?: 'asc' | 'desc'; - } - ) { - const skip = (options.page - 1) * options.limit; - - return { - skip, - take: options.limit, - orderBy: options.sortBy ? { - [options.sortBy]: options.order || 'desc' - } : undefined - }; - } -} -``` - -## Error Handling - -The following diagram illustrates the error handling flow in the API: - -```mermaid -flowchart TD - A[Request Received] --> B{Validation
Middleware} - B -->|Invalid| C[ValidationError] - B -->|Valid| D[Controller] - - D --> E{Business Logic} - E -->|Not Found| F[NotFoundError] - E -->|Unauthorized| G[UnauthorizedError] - E -->|Forbidden| H[ForbiddenError] - E -->|Conflict| I[ConflictError] - E -->|Success| J[Return Success Response] - - E -->|Unexpected Error| K[Generic Error] - - C --> L[Error Handler
Middleware] - F --> L - G --> L - H --> L - I --> L - K --> L - - L --> M{Error Type?} - M -->|ValidationError| N[400 Bad Request
VALIDATION_ERROR] - M -->|UnauthorizedError| O[401 Unauthorized
UNAUTHORIZED] - M -->|ForbiddenError| P[403 Forbidden
FORBIDDEN] - M -->|NotFoundError| Q[404 Not Found
NOT_FOUND] - M -->|ConflictError| R[409 Conflict
CONFLICT] - M -->|Unknown Error| S{Development
Mode?} - - S -->|Yes| T[500 Internal Error
INTERNAL_ERROR
+ Stack Trace] - S -->|No| U[500 Internal Error
INTERNAL_ERROR
Generic Message] - - N --> V[Format Error Response
success: false
error: code, message, details] - O --> V - P --> V - Q --> V - R --> V - T --> V - U --> V - - V --> W[Log Error
Server-Side] - W --> X[Send HTTP Response] - J --> X - - style A fill:#e1f5ff - style J fill:#c8e6c9 - style L fill:#fff9c4 - style V fill:#ffccbc - style X fill:#e1f5ff -``` - -### Error Handler Implementation - -```typescript -// error.middleware.ts -export function errorHandler( - err: Error, - req: Request, - res: Response, - next: NextFunction -) { - const isDev = process.env.NODE_ENV === 'development'; - - // Known errors - if (err instanceof ValidationError) { - return res.status(400).json({ - success: false, - error: { - code: 'VALIDATION_ERROR', - message: err.message, - details: err.errors - } - }); - } - - if (err instanceof UnauthorizedError) { - return res.status(401).json({ - success: false, - error: { - code: 'UNAUTHORIZED', - message: 'Authentication required' - } - }); - } - - // Unknown errors - logger.error('Unhandled error:', err); - - res.status(500).json({ - success: false, - error: { - code: 'INTERNAL_ERROR', - message: isDev ? err.message : 'Internal server error', - stack: isDev ? err.stack : undefined - } - }); -} -``` - -## Best Practices - -1. **Resource Naming** - - Use plural nouns (`/users` not `/user`) - - Use kebab-case for multi-word resources - - Keep URLs as short as possible - -2. **Versioning** - - Include version in URL (`/v1/users`) - - Maintain backward compatibility - - Deprecate old versions gracefully - -3. **Security** - - Always use HTTPS - - Implement rate limiting - - Validate all inputs - - Use proper authentication/authorization - -4. **Performance** - - Implement pagination for lists - - Use field filtering when possible - - Cache responses appropriately - - Compress responses (gzip) - -5. **Documentation** - - Keep OpenAPI spec up to date - - Include examples in documentation - - Document error responses - - Version your documentation \ No newline at end of file diff --git a/apps/web-docs/content/docs/en/skills/api-gateway-advanced.md b/apps/web-docs/content/docs/en/skills/api-gateway-advanced.md deleted file mode 100644 index c7055a31..00000000 --- a/apps/web-docs/content/docs/en/skills/api-gateway-advanced.md +++ /dev/null @@ -1,204 +0,0 @@ ---- -name: api-gateway-advanced -description: Advanced API Gateway patterns for GoodGo microservices including API composition, request/response transformation, service mesh integration, advanced routing, and gateway-level resilience. ---- - -# API Gateway Advanced Patterns - -## When to Use This Skill - -Use this skill when: -- Implementing API composition and aggregation -- Transforming requests/responses at gateway level -- Integrating service mesh with Traefik -- Implementing advanced routing strategies -- Adding gateway-level circuit breakers -- Implementing API versioning at gateway - -## API Gateway Architecture - -The API Gateway serves as the single entry point for all client requests, handling routing, composition, transformation, and resilience patterns. - -```mermaid -graph TB - Client[Client Application] --> Gateway[API Gateway] - - subgraph Gateway["API Gateway Components"] - Router[Request Router] - Auth[Authentication/Authorization] - RateLimit[Rate Limiting] - CircuitBreaker[Circuit Breaker] - Cache[Gateway Cache] - Transformer[Request/Response Transformer] - Composition[API Composition Engine] - end - - Gateway --> Router - Router --> Auth - Auth --> RateLimit - RateLimit --> CircuitBreaker - CircuitBreaker --> Cache - Cache --> Transformer - Transformer --> Composition - - Composition --> Service1[User Service] - Composition --> Service2[Order Service] - Composition --> Service3[Payment Service] - Composition --> Service4[Other Services] - - CircuitBreaker -.-> Service1 - CircuitBreaker -.-> Service2 - CircuitBreaker -.-> Service3 - CircuitBreaker -.-> Service4 - - Cache --> Redis[(Redis Cache)] - - style Gateway fill:#e1f5ff - style Composition fill:#fff4e1 - style CircuitBreaker fill:#ffe1e1 -``` - -## Request Routing Flow - -Requests flow through the gateway middleware chain in a specific order, ensuring proper handling at each stage. - -```mermaid -sequenceDiagram - participant Client - participant Gateway - participant RateLimit - participant Auth - participant CircuitBreaker - participant Cache - participant Transformer - participant Service - - Client->>Gateway: HTTP Request - Gateway->>RateLimit: Check Rate Limit - RateLimit-->>Gateway: Allowed - - Gateway->>Auth: Validate Token - Auth-->>Gateway: Authenticated - - Gateway->>CircuitBreaker: Check Circuit State - alt Circuit Open - CircuitBreaker-->>Gateway: Service Unavailable - Gateway-->>Client: 503 Error - else Circuit Closed/Half-Open - Gateway->>Cache: Check Cache - alt Cache Hit - Cache-->>Gateway: Cached Response - Gateway->>Transformer: Transform Response - Transformer-->>Gateway: Transformed - Gateway-->>Client: Response - else Cache Miss - Gateway->>Transformer: Transform Request - Transformer-->>Gateway: Transformed - Gateway->>Service: Forward Request - Service-->>Gateway: Response - Gateway->>Cache: Store in Cache - Gateway->>Transformer: Transform Response - Transformer-->>Gateway: Transformed - Gateway-->>Client: Response - end - end -``` - -## API Composition Patterns - -API composition enables the gateway to aggregate data from multiple services, reducing client round trips and improving performance. - -### Fan-Out / Fan-In Pattern (Parallel Aggregation) - -```mermaid -graph LR - Client[Client Request] --> Gateway[API Gateway] - - subgraph Gateway["API Composition"] - Comp[Composition Handler] - end - - Gateway --> Comp - - Comp -->|Parallel Calls| S1[User Service] - Comp -->|Parallel Calls| S2[Order Service] - Comp -->|Parallel Calls| S3[Payment Service] - - S1 -->|Response| Comp - S2 -->|Response| Comp - S3 -->|Response| Comp - - Comp -->|Aggregated Response| Client - - style Comp fill:#fff4e1 - style S1 fill:#e1ffe1 - style S2 fill:#e1ffe1 - style S3 fill:#e1ffe1 -``` - -### Chaining Pattern (Sequential Calls with Compensation) - -```mermaid -sequenceDiagram - participant Client - participant Gateway - participant OrderService - participant PaymentService - - Client->>Gateway: Create Order Request - Gateway->>OrderService: POST /orders - OrderService-->>Gateway: Order Created - - Gateway->>PaymentService: POST /payments - alt Payment Success - PaymentService-->>Gateway: Payment Processed - Gateway-->>Client: Success Response - else Payment Failed - PaymentService-->>Gateway: Payment Error - Gateway->>OrderService: DELETE /orders/:id (Compensate) - OrderService-->>Gateway: Order Deleted - Gateway-->>Client: Error Response - end -``` - -## Key Patterns - -### API Composition - -```typescript -// Aggregate multiple service responses -const [user, orders, payments] = await Promise.all([ - userClient.get(`/users/${userId}`), - orderClient.get(`/orders?userId=${userId}`), - paymentClient.get(`/payments?userId=${userId}`), -]); -``` - -### Request/Response Transformation - -```typescript -// Transform at gateway level -transformer.addRule({ - path: '/api/v1/users', - requestTransform: (req) => { - if (!req.query.page) req.query.page = '1'; - return req; - }, - responseTransform: (res, data) => { - return { success: true, data }; - }, -}); -``` - -## Best Practices - -1. Use API composition for aggregating related data -2. Cache at gateway for frequently accessed data -3. Implement circuit breaker at gateway level -4. Keep transformations simple and testable - -## Resources - -- [Traefik Documentation](https://doc.traefik.io/traefik/) -- [Middleware Patterns](./middleware-patterns.md) -- Skill Source: `.cursor/skills/api-gateway-advanced/SKILL.md` diff --git a/apps/web-docs/content/docs/en/skills/api-versioning-strategy.md b/apps/web-docs/content/docs/en/skills/api-versioning-strategy.md deleted file mode 100644 index efed41cd..00000000 --- a/apps/web-docs/content/docs/en/skills/api-versioning-strategy.md +++ /dev/null @@ -1,404 +0,0 @@ ---- -name: api-versioning-strategy -description: API versioning strategies for GoodGo microservices including semantic versioning, backward compatibility patterns, API deprecation, version negotiation, and breaking changes handling. ---- - -# API Versioning Strategy - -## When to Use This Skill - -Use this skill when: -- Versioning APIs -- Handling breaking changes -- Implementing API deprecation -- Maintaining backward compatibility -- Implementing version negotiation -- Managing multiple API versions -- Planning API evolution -- Communicating API changes to consumers - -## Core Concepts - -### Versioning Strategies - -1. **URL Path Versioning**: `/api/v1/users`, `/api/v2/users` -2. **Header Versioning**: `Accept: application/vnd.goodgo.v1+json` -3. **Query Parameter**: `/api/users?version=1` -4. **Semantic Versioning**: Major.Minor.Patch (e.g., 1.2.3) - -### Compatibility Types - -- **Backward Compatible**: New version works with old clients -- **Forward Compatible**: Old version works with new clients -- **Breaking Changes**: Incompatible changes requiring new version - -## Version Negotiation - -Version negotiation allows clients to request a specific API version through headers while maintaining clean URLs. The middleware extracts the version from the `Accept` header and routes to the appropriate handler. - -```mermaid -sequenceDiagram - participant Client - participant Middleware as Version Negotiation
Middleware - participant Controller as Version-Aware
Controller - participant Service - - Client->>Middleware: Request with Accept header
Accept: application/vnd.goodgo.v1+json - Middleware->>Middleware: Extract version from header - alt Version specified - Middleware->>Middleware: Parse version number - alt Version supported - Middleware->>Controller: Set req.apiVersion = 1 - Controller->>Controller: Check version - Controller->>Service: Call service method - Service-->>Controller: Return data - Controller->>Controller: Format response for v1 - Controller-->>Client: v1 response format - else Version not supported - Middleware-->>Client: 400 Unsupported Version - end - else No version specified - Middleware->>Controller: Set req.apiVersion = latest (2) - Controller->>Service: Call service method - Service-->>Controller: Return data - Controller->>Controller: Format response for v2 - Controller-->>Client: v2 response format (default) - end -``` - -### Implementation - -```typescript -// src/middlewares/version-negotiation.middleware.ts -import { Request, Response, NextFunction } from 'express'; -import { logger } from '@goodgo/logger'; - -export function versionNegotiation( - req: Request, - res: Response, - next: NextFunction -): void { - const acceptHeader = req.headers.accept || ''; - const versionMatch = acceptHeader.match(/application\/vnd\.goodgo\.v(\d+)\+json/); - - if (versionMatch) { - const requestedVersion = parseInt(versionMatch[1], 10); - req.apiVersion = requestedVersion; - - const supportedVersions = [1, 2]; - if (!supportedVersions.includes(requestedVersion)) { - return res.status(400).json({ - success: false, - error: { - code: 'UNSUPPORTED_VERSION', - message: `API version ${requestedVersion} is not supported. Supported versions: ${supportedVersions.join(', ')}`, - }, - }); - } - } else { - req.apiVersion = 2; // Default to latest - } - - next(); -} -``` - -## API Deprecation Timeline - -API deprecation follows a structured timeline to give consumers adequate time to migrate. The lifecycle progresses through active, deprecated, sunset, and removed phases. - -```mermaid -gantt - title API Version Lifecycle Timeline - dateFormat YYYY-MM-DD - section Version 1 - Active (v1 only) :active, v1-active, 2024-01-01, 2024-06-01 - Deprecated (v1 + v2) :crit, v1-deprecated, 2024-06-01, 2024-12-31 - Sunset Period :v1-sunset, 2024-12-31, 2025-01-31 - Removed :v1-removed, 2025-01-31, 1d - section Version 2 - Development :v2-dev, 2024-03-01, 2024-06-01 - Active (v1 + v2) :active, v2-active, 2024-06-01, 2025-12-31 -``` - -### Deprecation Phases - -```mermaid -stateDiagram-v2 - [*] --> Active: Version Released - Active --> Deprecated: New Version Released
Add Deprecation Headers - Deprecated --> Sunset: Sunset Date Reached
Stop Accepting New Requests - Sunset --> Removed: Grace Period Ended
Remove Routes - Removed --> [*] - - note right of Active - - Version fully supported - - No warnings - - All features available - end note - - note right of Deprecated - - Deprecation header set - - Warning headers added - - Migration guide provided - - Still functional - end note - - note right of Sunset - - Read-only mode - - No new requests accepted - - Existing requests honored - end note -``` - -### Deprecation Headers - -```typescript -// src/middlewares/deprecation.middleware.ts -export function deprecationMiddleware(version: string, sunsetDate: string) { - return (req: Request, res: Response, next: NextFunction): void => { - if (req.apiVersion && parseInt(req.apiVersion.toString()) < parseInt(version)) { - res.setHeader('Deprecation', 'true'); - res.setHeader('Sunset', sunsetDate); - res.setHeader('Link', `<${req.url.replace(/\/v\d+/, `/v${version}`)}>; rel="successor-version"`); - res.setHeader('Warning', `299 - "API version ${req.apiVersion} is deprecated. Please migrate to version ${version} by ${sunsetDate}"`); - } - - next(); - }; -} -``` - -## Migration Flow - -Breaking changes require a careful 3-phase migration strategy to ensure zero downtime and smooth client transitions. - -```mermaid -flowchart TD - Start([Breaking Change Identified]) --> Phase1[Phase 1: Support Both Versions] - - Phase1 --> DeployV2[Deploy v2 alongside v1] - DeployV2 --> Monitor1[Monitor v1 and v2 usage] - Monitor1 --> Wait1[Wait for client adoption] - Wait1 --> Phase2{Sufficient
v2 adoption?} - - Phase2 -->|No| Wait1 - Phase2 -->|Yes| Phase2Start[Phase 2: Deprecate v1] - - Phase2Start --> AddHeaders[Add deprecation headers to v1] - AddHeaders --> NotifyClients[Notify clients via
deprecation warnings] - NotifyClients --> ProvideGuide[Provide migration guide] - ProvideGuide --> Monitor2[Monitor migration progress] - Monitor2 --> Wait2[Wait until sunset date] - Wait2 --> Phase3{Sunset date
reached?} - - Phase3 -->|No| Monitor2 - Phase3 -->|Yes| Phase3Start[Phase 3: Remove v1] - - Phase3Start --> StopAccepting[Stop accepting new v1 requests] - StopAccepting --> GracePeriod[Grace period for
existing requests] - GracePeriod --> RemoveRoutes[Remove v1 routes] - RemoveRoutes --> End([Migration Complete]) - - style Phase1 fill:#e1f5ff - style Phase2Start fill:#fff4e1 - style Phase3Start fill:#ffe1e1 - style End fill:#e1ffe1 -``` - -### Implementation Strategy - -```typescript -// src/core/api/migration.strategy.ts -export class MigrationStrategy { - /** - * Phase 1: Support both versions - */ - phase1SupportBoth(): void { - router.use('/v1', v1Router); - router.use('/v2', v2Router); - } - - /** - * Phase 2: Deprecate v1 - */ - phase2DeprecateV1(): void { - router.use('/v1', deprecationMiddleware('2', '2024-12-31'), v1Router); - router.use('/v2', v2Router); - } - - /** - * Phase 3: Remove v1 - */ - phase3RemoveV1(): void { - router.use('/v2', v2Router); - // v1 routes removed after sunset date - } -} -``` - -## URL Path Versioning - -### Implementation - -```typescript -// src/routes/index.ts -import { Router } from 'express'; -import v1Router from './v1'; -import v2Router from './v2'; - -const router = Router(); - -router.use('/v1', v1Router); -router.use('/v2', v2Router); - -export default router; -``` - -## Semantic Versioning - -### Version Structure - -``` -MAJOR.MINOR.PATCH - -MAJOR: Breaking changes -MINOR: Backward-compatible additions -PATCH: Backward-compatible bug fixes -``` - -### Version Response - -```typescript -// src/core/api/version.middleware.ts -export function versionMiddleware(req: Request, res: Response, next: NextFunction): void { - const originalJson = res.json.bind(res); - - res.json = (data: any) => { - const response = { - ...data, - metadata: { - ...data.metadata, - apiVersion: req.apiVersion || '2.0.0', - serviceVersion: process.env.SERVICE_VERSION || '1.0.0', - }, - }; - - return originalJson(response); - }; - - next(); -} -``` - -## Backward Compatibility - -### Compatibility Layer - -```typescript -// src/core/api/compatibility.adapter.ts -export class CompatibilityAdapter { - adaptV1ToV2(v1Data: any): any { - return { - success: true, - data: { - user: { - ...v1Data, - profile: null, // Add default for new field - }, - }, - metadata: { - version: '2.0.0', - adapted: true, - }, - }; - } - - adaptV2RequestToV1(v2Request: any): any { - return { - email: v2Request.email, - name: v2Request.name, - // Ignore new fields - }; - } -} -``` - -## Best Practices - -1. **Versioning Strategy**: Choose URL path or header, be consistent -2. **Semantic Versioning**: Use MAJOR.MINOR.PATCH -3. **Deprecation**: Always deprecate before removing -4. **Migration Guide**: Provide clear migration documentation -5. **Backward Compatibility**: Maintain compatibility when possible -6. **Communication**: Clearly communicate version changes - -## Common Mistakes - -1. **No Deprecation Period**: Breaking clients suddenly - ```typescript - // ❌ BAD: Remove v1 immediately - router.use('/v2', v2Router); - - // ✅ GOOD: Deprecate with sunset date - router.use('/v1', deprecationMiddleware('2', '2024-12-31'), v1Router); - router.use('/v2', v2Router); - ``` - -2. **Breaking Changes Without Major Version**: Client confusion - ``` - # ❌ BAD: Breaking change in minor version - v1.1.0 → Changed response format - - # ✅ GOOD: Breaking change = new major version - v1.x.x → v2.0.0 (new response format) - ``` - -3. **Inconsistent Versioning Strategy**: Mixed approaches - ```typescript - // ❌ BAD: Mix URL and header versioning - /api/v1/users + Accept: application/vnd.v2+json - - // ✅ GOOD: Choose one approach - /api/v1/users OR Accept: application/vnd.goodgo.v1+json - ``` - -## Quick Reference - -| Strategy | Pros | Cons | Use When | -|----------|------|------|----------| -| **URL Path** | Clear, cacheable | URL changes | Public APIs | -| **Header** | Clean URLs | Less visible | Internal APIs | -| **Query Param** | Simple | Not RESTful | Quick prototypes | - -**Semantic Versioning:** -``` -MAJOR.MINOR.PATCH - │ │ └── Bug fixes (backward compatible) - │ └──────── New features (backward compatible) - └────────────── Breaking changes -``` - -**Version Lifecycle:** -``` -v1 Active → v2 Released → v1 Deprecated → v1 Sunset → v1 Removed - │ │ │ │ │ - │ │ Add headers Remove from Delete - │ Support + warnings docs routes - Solo both -``` - -**Deprecation Headers:** -```http -Deprecation: true -Sunset: Sat, 31 Dec 2024 23:59:59 GMT -Warning: 299 - "API v1 is deprecated. Migrate to v2 by 2024-12-31" -Link: ; rel="successor-version" -``` - -## Resources - -- [API Design](./api-design.md) - API design patterns -- [Middleware Patterns](./middleware-patterns.md) - Middleware patterns -- [Project Rules](./project-rules.md) - GoodGo standards -- Skill Source: `.cursor/skills/api-versioning-strategy/SKILL.md` diff --git a/apps/web-docs/content/docs/en/skills/caching-patterns.md b/apps/web-docs/content/docs/en/skills/caching-patterns.md deleted file mode 100644 index 8266cb40..00000000 --- a/apps/web-docs/content/docs/en/skills/caching-patterns.md +++ /dev/null @@ -1,369 +0,0 @@ ---- -name: caching-patterns -description: Caching strategies and patterns for GoodGo microservices including multi-layer cache, Redis caching, cache key naming, TTL strategies, cache invalidation, and cache-aside patterns. ---- - -# Caching Patterns - -## When to Use This Skill - -Use this skill when: -- Implementing caching for frequently accessed data -- Optimizing database queries with caching -- Designing cache key naming conventions -- Setting TTL (Time To Live) strategies -- Implementing cache invalidation patterns -- Using multi-layer cache (L1: Memory, L2: Redis) -- Handling cache failures gracefully - -## Core Concepts - -### Multi-Layer Cache Strategy - -The platform uses a two-layer cache architecture to balance speed and capacity: - -```mermaid -graph TB - subgraph Application["Application Layer"] - App[Application Code] - end - - subgraph L1Layer["L1 Cache - Memory (NodeCache)"] - L1[In-Memory Cache] - L1Props["• Speed: < 1ms
• Capacity: 10k keys
• TTL: 60s-5min
• Scope: Per-instance"] - end - - subgraph L2Layer["L2 Cache - Redis (Distributed)"] - L2[Redis Cache] - L2Props["• Speed: < 5ms
• Capacity: Large
• TTL: Configurable
• Scope: Shared"] - end - - subgraph DataLayer["Data Source"] - DB[(Database)] - API[External API] - end - - App -->|Check First| L1 - L1 -->|Miss| L2 - L2 -->|Miss| DB - L2 -->|Miss| API - DB -->|Store| L2 - API -->|Store| L2 - L2 -->|Warm| L1 - - L1 -.-> L1Props - L2 -.-> L2Props - - style L1 fill:#e1f5ff - style L2 fill:#fff4e1 - style DB fill:#ffe1e1 - style API fill:#ffe1e1 -``` - -**Layer Characteristics:** - -1. **L1 Cache (Memory)**: NodeCache in-memory cache - - Very fast (< 1ms access time) - - Limited capacity (10k keys default) - - Short TTL (60 seconds default, max 5 minutes) - - Per-instance (not shared across instances) - -2. **L2 Cache (Redis)**: Distributed Redis cache - - Fast (< 5ms access time) - - Large capacity - - Longer TTL (configurable) - - Shared across all service instances - -### Cache Flow - -The cache lookup follows a multi-layer approach, checking L1 first, then L2, and finally the data source. - -```mermaid -flowchart TD - Start([Request Data]) --> CheckL1{Check L1 Cache
Memory} - CheckL1 -->|Hit| ReturnL1[Return Data
from L1] - CheckL1 -->|Miss| CheckL2{Check L2 Cache
Redis} - CheckL2 -->|Hit| StoreL1[Store in L1
Warm Cache] - StoreL1 --> ReturnL2[Return Data
from L2] - CheckL2 -->|Miss| FetchSource[Fetch from
Data Source] - FetchSource --> StoreBoth[Store in L1 & L2] - StoreBoth --> ReturnSource[Return Data
from Source] - - ReturnL1 --> End([End]) - ReturnL2 --> End - ReturnSource --> End - - style CheckL1 fill:#e1f5ff - style CheckL2 fill:#fff4e1 - style FetchSource fill:#ffe1e1 - style ReturnL1 fill:#e1ffe1 - style ReturnL2 fill:#e1ffe1 - style ReturnSource fill:#e1ffe1 -``` - -## Patterns - -### Cache Service Usage - -```typescript -import { cacheService } from '../core/cache'; - -// Simple get/set -const cached = await cacheService.get('user:123'); -await cacheService.set('user:123', userData, 300); // 5 minutes TTL - -// Get or set pattern (cache-aside) -const user = await cacheService.getOrSet( - 'user:123', - async () => { - return await userRepository.findById('123'); - }, - 300 // TTL in seconds -); -``` - -### Cache Key Naming Conventions - -Use consistent naming patterns: - -```typescript -// Pattern: {entity}:{identifier} -'user:123' -'user:email:user@example.com' -'user:123:permissions' -'user:123:roles' - -// Pattern: {entity}:{identifier}:{sub-resource} -'session:abc123' -'permission:perm_123' -'role:role_123' -``` - -Cache service provides key generators: - -```typescript -cacheService.keys = { - user: (userId: string) => `user:${userId}`, - userPermissions: (userId: string) => `user:${userId}:permissions`, - userRoles: (userId: string) => `user:${userId}:roles`, - token: (token: string) => `token:${token}`, - session: (sessionId: string) => `session:${sessionId}`, -}; -``` - -### Cache-Aside Pattern - -Most common pattern - check cache first, fetch if miss: - -```typescript -async getUserPermissions(userId: string): Promise { - const cacheKey = cacheService.keys.userPermissions(userId); - - // Try cache first - const cached = await cacheService.get(cacheKey); - if (cached) { - return cached; - } - - // Cache miss - fetch from source - const permissions = await calculatePermissions(userId); - - // Store in cache - await cacheService.set(cacheKey, permissions, 300); // 5 min TTL - - return permissions; -} -``` - -### Get or Set Pattern - -Simplified cache-aside pattern: - -```typescript -const permissions = await cacheService.getOrSet( - cacheService.keys.userPermissions(userId), - async () => { - // This function only runs on cache miss - return await calculatePermissions(userId); - }, - 300 // TTL -); -``` - -### TTL Strategies - -Choose TTL based on data characteristics: - -**Short TTL (60-300s)**: Frequently changing data -- User permissions (300s) -- Session data (varies) -- Real-time statistics - -**Medium TTL (300-1800s)**: Moderately changing data -- User profiles (600s) -- Organization data (900s) -- Configuration (1800s) - -**Long TTL (1800-3600s)**: Rarely changing data -- Static configurations (3600s) -- Reference data (7200s) - -**No TTL**: Very stable data (use with caution) -- Rarely use - prefer long TTL instead - -### Cache Invalidation - -Invalidate cache when data changes to prevent serving stale data. The platform supports multiple invalidation strategies: - -```mermaid -flowchart TD - Start([Data Changed]) --> ChooseStrategy{Choose
Invalidation
Strategy} - - ChooseStrategy -->|Single Key| SingleKey[Single Key
Invalidation] - SingleKey --> DelL1[Delete from L1] - DelL1 --> DelL2[Delete from L2] - DelL2 --> Done1([Complete]) - - ChooseStrategy -->|Pattern Match| PatternMatch[Pattern-Based
Invalidation] - PatternMatch --> FindKeys[Find Matching Keys
user:123:*] - FindKeys --> DelManyL1[Delete from L1
All Matching] - DelManyL1 --> DelManyL2[Delete from L2
All Matching] - DelManyL2 --> Done2([Complete]) - - ChooseStrategy -->|Multiple Keys| MultipleKeys[Multiple Keys
Invalidation] - MultipleKeys --> ListKeys[List Keys to Delete
user:123
user:123:permissions
user:123:roles] - ListKeys --> BatchDelL1[Batch Delete from L1] - BatchDelL1 --> BatchDelL2[Batch Delete from L2] - BatchDelL2 --> Done3([Complete]) - - style SingleKey fill:#e1f5ff - style PatternMatch fill:#fff4e1 - style MultipleKeys fill:#ffe1e1 - style Done1 fill:#e1ffe1 - style Done2 fill:#e1ffe1 - style Done3 fill:#e1ffe1 -``` - -**Implementation Examples:** - -```typescript -// Single key invalidation -await cacheService.del(cacheService.keys.user(userId)); -await cacheService.del(cacheService.keys.userPermissions(userId)); - -// Pattern-based invalidation -await cacheService.invalidatePattern('user:123:*'); - -// Multiple keys -await cacheService.delMany([ - cacheService.keys.user(userId), - cacheService.keys.userPermissions(userId), - cacheService.keys.userRoles(userId), -]); -``` - -### Cache Warming - -Pre-populate cache with frequently accessed data: - -```typescript -async warmCache() { - const activeUsers = await userRepository.findActiveUsers(); - - for (const user of activeUsers) { - // Pre-cache user data - await cacheService.set( - cacheService.keys.user(user.id), - user, - 600 - ); - - // Pre-cache permissions - const permissions = await calculatePermissions(user.id); - await cacheService.set( - cacheService.keys.userPermissions(user.id), - permissions, - 300 - ); - } -} -``` - -### Error Handling - -Cache failures should not break the application: - -```typescript -async getWithCache(key: string): Promise { - try { - // Try cache first - const cached = await cacheService.get(key); - if (cached) return cached; - } catch (error) { - // Log but continue - fallback to source - logger.warn('Cache get failed, falling back to source', { key, error }); - } - - // Fallback to data source - return await fetchFromSource(); -} -``` - -## Best Practices - -1. **Cache Keys**: Use consistent naming conventions -2. **TTL Selection**: Choose TTL based on data change frequency -3. **Invalidation**: Invalidate cache when data changes -4. **Error Handling**: Don't let cache failures break the app -5. **Cache-Aside**: Use cache-aside pattern for most cases -6. **Avoid Over-Caching**: Don't cache data that changes too frequently -7. **Monitor Hit Rates**: Track cache hit rates to optimize TTL -8. **Warm Cache**: Pre-populate cache for critical data -9. **Use Multi-Layer**: Leverage both L1 and L2 cache -10. **Serialize Properly**: Ensure data is JSON serializable - -## Common Mistakes - -1. **Cache Key Collisions**: Using generic keys that collide -2. **Stale Data**: Not invalidating cache when data changes -3. **Too Short TTL**: Setting TTL too short, negating cache benefits -4. **Too Long TTL**: Setting TTL too long, serving stale data -5. **No Error Handling**: Letting cache errors break the application -6. **Caching Everything**: Caching data that doesn't benefit from caching -7. **Not Warming Cache**: Not pre-populating critical cache data -8. **Ignoring Hit Rates**: Not monitoring cache performance - -## Troubleshooting - -### Low Cache Hit Rate - -**Problem**: Cache hit rate is low, cache not effective -**Solution**: -- Review TTL values - may be too short -- Check cache key patterns - ensure consistent usage -- Verify cache invalidation isn't too aggressive -- Monitor what data is being cached - -### Stale Data Issues - -**Problem**: Serving stale data from cache -**Solution**: -- Review TTL values - may be too long -- Ensure cache invalidation on data updates -- Use shorter TTL for frequently changing data -- Implement cache versioning if needed - -### Cache Performance Issues - -**Problem**: Cache operations are slow -**Solution**: -- Check Redis connection and network latency -- Monitor Redis memory usage -- Review cache key patterns for efficiency -- Consider L1 cache hit rate (should be high) - -## Resources - -- [Multi-Layer Cache](../../services/iam-service/src/core/cache/multi-layer-cache.ts) - Multi-layer cache implementation -- [Cache Service](../../services/iam-service/src/core/cache/cache.service.ts) - Cache service wrapper -- [Cache Usage Example](../../services/iam-service/src/modules/rbac/rbac.service.ts) - Real-world cache usage diff --git a/apps/web-docs/content/docs/en/skills/cicd-advanced-patterns.md b/apps/web-docs/content/docs/en/skills/cicd-advanced-patterns.md deleted file mode 100644 index d7b95e2b..00000000 --- a/apps/web-docs/content/docs/en/skills/cicd-advanced-patterns.md +++ /dev/null @@ -1,544 +0,0 @@ ---- -name: cicd-advanced-patterns -description: Advanced CI/CD patterns for GoodGo microservices including blue-green deployments, canary releases, automated rollback, deployment verification, and progressive delivery. ---- - -# CI/CD Advanced Patterns - -## When to Use This Skill - -Use this skill when: -- Implementing blue-green deployments -- Setting up canary releases -- Implementing automated rollback mechanisms -- Creating deployment verification pipelines -- Implementing progressive delivery -- Setting up deployment gates -- Implementing smoke tests -- Managing deployment strategies in Kubernetes - -## Core Concepts - -### Deployment Strategies - -1. **Rolling Update**: Gradual replacement (default K8s) -2. **Blue-Green**: Two identical environments, switch traffic -3. **Canary**: Gradual rollout to subset of users -4. **Recreate**: Stop old, start new (downtime) - -### Deployment Verification - -- Smoke tests -- Health checks -- Performance tests -- Rollback triggers - -## Blue-Green Deployment - -Blue-green deployment maintains two identical production environments (blue and green). At any time, only one environment serves live traffic. The new version is deployed to the idle environment, verified, and then traffic is switched. - -```mermaid -flowchart TD - Start([Deployment Triggered]) --> DeployGreen[Deploy to Green Environment] - DeployGreen --> WaitRollout[Wait for Rollout Complete] - WaitRollout --> RunSmokeTests[Run Smoke Tests] - RunSmokeTests --> TestsPassed{Tests Passed?} - - TestsPassed -->|Yes| SwitchTraffic[Switch Service Selector to Green] - TestsPassed -->|No| RollbackToBlue[Rollback: Keep Blue Active] - - SwitchTraffic --> MonitorHealth[Monitor Health Metrics] - MonitorHealth --> HealthOK{Health OK?} - - HealthOK -->|Yes| Complete([Deployment Complete]) - HealthOK -->|No| AutoRollback[Auto Rollback to Blue] - - AutoRollback --> Complete - RollbackToBlue --> Fail([Deployment Failed]) - - style Start fill:#e1f5ff - style Complete fill:#d4edda - style Fail fill:#f8d7da - style TestsPassed fill:#fff3cd - style HealthOK fill:#fff3cd -``` - -### Kubernetes Implementation - -```yaml -# deployments/production/kubernetes/user-service-blue.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: user-service-blue - labels: - app: user-service - version: blue -spec: - replicas: 3 - selector: - matchLabels: - app: user-service - version: blue - template: - metadata: - labels: - app: user-service - version: blue - spec: - containers: - - name: user-service - image: goodgo/user-service:v1.0.0 - ports: - - containerPort: 5000 - ---- -# deployments/production/kubernetes/user-service-green.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: user-service-green - labels: - app: user-service - version: green -spec: - replicas: 3 - selector: - matchLabels: - app: user-service - version: green - template: - metadata: - labels: - app: user-service - version: green - spec: - containers: - - name: user-service - image: goodgo/user-service:v1.1.0 - ports: - - containerPort: 5000 - ---- -# Service selector switches between blue/green -apiVersion: v1 -kind: Service -metadata: - name: user-service -spec: - selector: - app: user-service - version: blue # Switch to green after verification - ports: - - port: 80 - targetPort: 5000 -``` - -## Canary Deployment - -Canary deployment gradually rolls out changes to a small subset of users before making them available to everyone. This allows for real-world testing with minimal risk. - -```mermaid -flowchart TD - Start([Canary Deployment Started]) --> DeployCanary[Deploy Canary Version
1 Replica] - DeployCanary --> Route10[Route 10% Traffic to Canary] - Route10 --> Wait10[Wait 5-10 minutes] - Wait10 --> Check10{Health & Metrics OK?} - - Check10 -->|No| RollbackCanary[Rollback: Route 0% to Canary] - Check10 -->|Yes| Route25[Route 25% Traffic to Canary] - - Route25 --> Wait25[Wait 5-10 minutes] - Wait25 --> Check25{Health & Metrics OK?} - - Check25 -->|No| RollbackCanary - Check25 -->|Yes| Route50[Route 50% Traffic to Canary] - - Route50 --> Wait50[Wait 5-10 minutes] - Wait50 --> Check50{Health & Metrics OK?} - - Check50 -->|No| RollbackCanary - Check50 -->|Yes| Route75[Route 75% Traffic to Canary] - - Route75 --> Wait75[Wait 5-10 minutes] - Wait75 --> Check75{Health & Metrics OK?} - - Check75 -->|No| RollbackCanary - Check75 -->|Yes| Route100[Route 100% Traffic to Canary] - - Route100 --> PromoteCanary[Promote Canary to Stable] - PromoteCanary --> Complete([Canary Complete]) - - RollbackCanary --> Fail([Canary Failed]) - - style Start fill:#e1f5ff - style Complete fill:#d4edda - style Fail fill:#f8d7da - style Check10 fill:#fff3cd - style Check25 fill:#fff3cd - style Check50 fill:#fff3cd - style Check75 fill:#fff3cd -``` - -### Kubernetes Canary with Service Mesh - -```yaml -# deployments/production/kubernetes/user-service-canary.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: user-service-canary - labels: - app: user-service - version: canary -spec: - replicas: 1 # Start with 1 replica (10% traffic) - selector: - matchLabels: - app: user-service - version: canary - template: - metadata: - labels: - app: user-service - version: canary - spec: - containers: - - name: user-service - image: goodgo/user-service:v1.1.0 - ---- -# VirtualService splits traffic -apiVersion: networking.istio.io/v1alpha3 -kind: VirtualService -metadata: - name: user-service -spec: - hosts: - - user-service - http: - - match: - - headers: - canary: - exact: "true" - route: - - destination: - host: user-service - subset: canary - weight: 100 - - route: - - destination: - host: user-service - subset: stable - weight: 90 - - destination: - host: user-service - subset: canary - weight: 10 # 10% traffic to canary -``` - -## Automated Rollback - -Automated rollback mechanisms detect deployment failures and automatically revert to the previous stable version, minimizing downtime and impact. - -```mermaid -flowchart TD - Start([Deployment Completed]) --> RunSmokeTests[Run Smoke Tests] - RunSmokeTests --> SmokePassed{Smoke Tests Pass?} - - SmokePassed -->|No| GetPreviousRev[Get Previous Revision] - GetPreviousRev --> RollbackDeploy[Rollback Deployment] - RollbackDeploy --> VerifyRollback[Verify Rollback Success] - VerifyRollback --> RollbackComplete([Rollback Complete]) - - SmokePassed -->|Yes| MonitorHealth[Monitor Health Metrics] - MonitorHealth --> HealthOK{Health OK?} - - HealthOK -->|Yes| MonitorErrors[Monitor Error Rates] - HealthOK -->|No| GetPreviousRev - - MonitorErrors --> ErrorRateOK{Error Rate < Threshold?} - - ErrorRateOK -->|Yes| MonitorPerformance[Monitor Performance] - ErrorRateOK -->|No| GetPreviousRev - - MonitorPerformance --> PerfOK{Performance OK?} - - PerfOK -->|Yes| DeploymentSuccess([Deployment Successful]) - PerfOK -->|No| GetPreviousRev - - style Start fill:#e1f5ff - style DeploymentSuccess fill:#d4edda - style RollbackComplete fill:#f8d7da - style SmokePassed fill:#fff3cd - style HealthOK fill:#fff3cd - style ErrorRateOK fill:#fff3cd - style PerfOK fill:#fff3cd -``` - -### Rollback Script - -```bash -#!/bin/bash -# scripts/deployment/rollback.sh -# Automated rollback to previous version - -SERVICE_NAME=$1 -NAMESPACE=${2:-production} - -# Get previous deployment revision -PREVIOUS_REVISION=$(kubectl rollout history deployment/$SERVICE_NAME -n $NAMESPACE --no-headers | tail -1 | awk '{print $1}') - -if [ -z "$PREVIOUS_REVISION" ]; then - echo "No previous revision found" - exit 1 -fi - -echo "Rolling back to revision $PREVIOUS_REVISION" - -# Rollback deployment -kubectl rollout undo deployment/$SERVICE_NAME -n $NAMESPACE --to-revision=$PREVIOUS_REVISION - -# Wait for rollout -kubectl rollout status deployment/$SERVICE_NAME -n $NAMESPACE - -echo "Rollback complete" -``` - -### Automated Rollback on Failure - -```yaml -# .github/workflows/deploy-production.yml -name: Deploy Production - -on: - push: - branches: [main] - -jobs: - deploy: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Deploy to Kubernetes - run: | - kubectl apply -f deployments/production/kubernetes/ - kubectl rollout status deployment/user-service - - - name: Run Smoke Tests - run: ./scripts/deployment/smoke-tests.sh user-service - - - name: Rollback on Failure - if: failure() - run: ./scripts/deployment/rollback.sh user-service production -``` - -## Deployment Verification - -### Smoke Tests - -```typescript -// scripts/deployment/smoke-tests.ts -// Smoke tests for deployment verification -import axios from 'axios'; - -const SERVICE_URL = process.env.SERVICE_URL || 'http://localhost'; - -async function runSmokeTests(): Promise { - try { - // Health check - const healthResponse = await axios.get(`${SERVICE_URL}/health`); - if (healthResponse.status !== 200) { - console.error('Health check failed'); - return false; - } - - // Basic functionality test - const testResponse = await axios.get(`${SERVICE_URL}/api/v1/users`, { - timeout: 5000, - }); - - if (testResponse.status !== 200) { - console.error('Functionality test failed'); - return false; - } - - console.log('Smoke tests passed'); - return true; - } catch (error) { - console.error('Smoke tests failed', error); - return false; - } -} - -runSmokeTests().then((success) => { - process.exit(success ? 0 : 1); -}); -``` - -### Health Check Script - -```bash -#!/bin/bash -# scripts/deployment/health-checks.sh -# Comprehensive health checks - -SERVICE_NAME=$1 -NAMESPACE=${2:-production} - -echo "Running health checks for $SERVICE_NAME" - -# Check pods are ready -READY_PODS=$(kubectl get pods -n $NAMESPACE -l app=$SERVICE_NAME --field-selector=status.phase=Running --no-headers | wc -l) - -if [ $READY_PODS -eq 0 ]; then - echo "No ready pods found" - exit 1 -fi - -# Check service endpoints -ENDPOINTS=$(kubectl get endpoints $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.subsets[0].addresses[*].ip}' | wc -w) - -if [ $ENDPOINTS -eq 0 ]; then - echo "No service endpoints found" - exit 1 -fi - -# Check health endpoint -SERVICE_URL=$(kubectl get service $SERVICE_NAME -n $NAMESPACE -o jsonpath='{.status.loadBalancer.ingress[0].hostname}') - -if [ -z "$SERVICE_URL" ]; then - SERVICE_URL="http://$SERVICE_NAME.$NAMESPACE.svc.cluster.local" -fi - -HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" $SERVICE_URL/health) - -if [ $HTTP_CODE -ne 200 ]; then - echo "Health endpoint returned $HTTP_CODE" - exit 1 -fi - -echo "Health checks passed" -``` - -## Deployment Gates - -Deployment gates add checkpoints in the CI/CD pipeline that must pass before proceeding to the next stage. - -```yaml -# .github/workflows/deploy-with-gates.yml -name: Deploy with Gates - -jobs: - deploy: - steps: - - name: Deploy - run: kubectl apply -f deployments/ - - - name: Wait for Rollout - run: kubectl rollout status deployment/service - - - name: Smoke Tests Gate - id: smoke-tests - run: ./scripts/deployment/smoke-tests.sh - - - name: Performance Tests Gate - if: steps.smoke-tests.outcome == 'success' - run: ./scripts/deployment/performance-tests.sh - - - name: Manual Approval Gate - if: steps.smoke-tests.outcome == 'success' - uses: trstringer/manual-approval@v1 - with: - secret: ${{ secrets.GITHUB_TOKEN }} - approvers: team-leads - minimum-approvals: 1 - issue-title: "Approve deployment" -``` - -## Best Practices - -1. **Blue-Green**: Use for zero-downtime deployments -2. **Canary**: Use for gradual rollouts with monitoring -3. **Automated Rollback**: Always have rollback plan -4. **Smoke Tests**: Run immediately after deployment -5. **Health Checks**: Monitor health continuously -6. **Gates**: Use deployment gates for critical deployments - -## Common Mistakes - -1. **No Rollback Plan**: Can't recover from failed deployment - ```yaml - # ✅ Always have rollback command ready - kubectl rollout undo deployment/service - ``` - -2. **Skipping Smoke Tests**: Catching issues too late - ```yaml - # ✅ Run smoke tests immediately after deploy - - name: Smoke Tests - run: ./scripts/smoke-tests.sh - ``` - -3. **100% Traffic Switch**: All-or-nothing failures - ```yaml - # ❌ BAD: Immediate full switch - # ✅ GOOD: Gradual rollout (10% → 50% → 100%) - ``` - -4. **No Health Monitoring**: Missing deployment issues - ```yaml - # ✅ Monitor health after deployment - - name: Monitor Health - run: kubectl rollout status deployment/service --timeout=5m - ``` - -## Quick Reference - -| Strategy | Risk | Downtime | Resource Cost | -|----------|------|----------|---------------| -| **Blue-Green** | Low | Zero | 2x (temporary) | -| **Canary** | Low | Zero | +10-20% | -| **Rolling** | Medium | Zero | 1x | -| **Recreate** | High | Yes | 1x | - -**Deployment Commands:** -```bash -# Apply deployment -kubectl apply -f kubernetes/ - -# Check rollout status -kubectl rollout status deployment/service - -# Rollback -kubectl rollout undo deployment/service - -# Canary traffic split (Istio) -kubectl apply -f virtualservice-canary.yaml -``` - -**GitHub Actions Triggers:** -```yaml -on: - push: - branches: [main] # Deploy to prod - tags: ['v*'] # Release - pull_request: - branches: [main] # PR checks -``` - -**Deployment Gates:** -``` -Build → Test → Security Scan → Deploy Staging -→ Smoke Tests → Manual Approval → Deploy Prod -``` - -## Resources - -- [Kubernetes Deployment](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) -- [Istio Traffic Management](https://istio.io/latest/docs/tasks/traffic-management/) -- [Deployment Kubernetes](./deployment-kubernetes.md) - K8s deployment patterns -- [Testing Patterns](./testing-patterns.md) - Testing strategies -- [Project Rules](./project-rules.md) - GoodGo coding standards -- Skill Source: `.cursor/skills/cicd-advanced-patterns/SKILL.md` diff --git a/apps/web-docs/content/docs/en/skills/comment-code.md b/apps/web-docs/content/docs/en/skills/comment-code.md deleted file mode 100644 index b760170e..00000000 --- a/apps/web-docs/content/docs/en/skills/comment-code.md +++ /dev/null @@ -1,489 +0,0 @@ ---- -name: comment-code -description: Add bilingual code comments in Vietnamese and English for better documentation. Use when adding comments to code, documenting functions/classes, or when user requests Vietnamese/English documentation. ---- - -# Bilingual Code Comments - -Add comprehensive code comments in both Vietnamese and English to improve code readability for international and Vietnamese teams. - -## When to Use - -- Adding comments to new code -- Documenting existing code -- Creating JSDoc/TSDoc documentation -- Writing function/class descriptions -- Explaining complex logic -- Adding inline comments - -## Comment Structure - -The following diagram illustrates the structure and hierarchy of comment types used in the GoodGo codebase: - -```mermaid -graph TB - subgraph CommentTypes["Comment Types"] - SingleLine["Single-line Comments
// EN: ...
// VI: ..."] - MultiLine["Multi-line Comments
/* EN: ...
VI: ... */"] - JSDoc["JSDoc Comments
/** EN: ...
VI: ... */"] - Prisma["Prisma Comments
/// EN: ...
VI: ..."] - end - - subgraph Contexts["Code Contexts"] - Functions["Functions"] - Classes["Classes"] - Interfaces["Interfaces/Types"] - Components["React Components"] - Controllers["API Controllers"] - Middleware["Middleware"] - Schema["Prisma Schema"] - Config["Configuration"] - end - - subgraph SpecialTypes["Special Comment Types"] - TODO["TODO Comments"] - FIXME["FIXME Comments"] - WARNING["WARNING Comments"] - NOTE["NOTE Comments"] - end - - subgraph Format["Bilingual Format"] - EN["English (EN)
First"] - VI["Vietnamese (VI)
Second"] - end - - JSDoc --> Functions - JSDoc --> Classes - JSDoc --> Interfaces - JSDoc --> Components - JSDoc --> Controllers - JSDoc --> Middleware - - SingleLine --> Config - SingleLine --> SpecialTypes - - MultiLine --> Functions - MultiLine --> Classes - - Prisma --> Schema - - Format --> CommentTypes - Format --> Contexts - Format --> SpecialTypes - - style CommentTypes fill:#e1f5ff - style Contexts fill:#fff4e1 - style SpecialTypes fill:#ffe1f5 - style Format fill:#e1ffe1 -``` - -## Documentation Flow - -The following diagram shows the decision flow for adding comments to code: - -```mermaid -flowchart TD - Start([Start: Writing Code]) --> CheckType{What type of
code element?} - - CheckType -->|Public API| HighPriority[High Priority:
Always Document] - CheckType -->|Complex Logic| HighPriority - CheckType -->|Security Code| HighPriority - CheckType -->|Config/Setup| HighPriority - CheckType -->|Error Handling| HighPriority - - CheckType -->|Helper Function| MediumPriority[Medium Priority:
Document if Helpful] - CheckType -->|Data Transform| MediumPriority - CheckType -->|External Integration| MediumPriority - - CheckType -->|Simple Getter/Setter| LowPriority[Low Priority:
Optional] - CheckType -->|Self-explanatory| LowPriority - CheckType -->|Standard CRUD| LowPriority - - HighPriority --> ChooseFormat{Choose Comment
Format} - MediumPriority --> ChooseFormat - LowPriority --> ChooseFormat - - ChooseFormat -->|Function/Class| UseJSDoc[Use JSDoc Format
/** EN: ...
VI: ... */] - ChooseFormat -->|Brief Explanation| UseSingleLine[Use Single-line
// EN: ...
// VI: ...] - ChooseFormat -->|Multi-step Process| UseMultiLine[Use Multi-line
/* EN: ...
VI: ... */] - ChooseFormat -->|Prisma Schema| UsePrisma[Use Prisma Format
/// EN: ...
VI: ...] - - UseJSDoc --> AddParams[Add @param tags
Add @returns tag
Add @throws if needed] - UseSingleLine --> WriteBilingual[Write Bilingual:
EN first, VI second] - UseMultiLine --> WriteBilingual - UsePrisma --> WriteBilingual - - AddParams --> WriteBilingual - WriteBilingual --> CheckSpecial{Special
Comment Type?} - - CheckSpecial -->|Future Work| AddTODO[Add TODO prefix] - CheckSpecial -->|Needs Fix| AddFIXME[Add FIXME prefix] - CheckSpecial -->|Important Warning| AddWARNING[Add WARNING prefix] - CheckSpecial -->|Important Note| AddNOTE[Add NOTE prefix] - CheckSpecial -->|No| End([Done]) - - AddTODO --> End - AddFIXME --> End - AddWARNING --> End - AddNOTE --> End - - style HighPriority fill:#ffcccc - style MediumPriority fill:#ffffcc - style LowPriority fill:#ccffcc - style UseJSDoc fill:#cce5ff - style UseSingleLine fill:#cce5ff - style UseMultiLine fill:#cce5ff - style UsePrisma fill:#cce5ff -``` - -## Comment Format - -### Single-line Comments - -```typescript -// EN: Initialize database connection -// VI: Khởi tạo kết nối database -const db = await createConnection(); -``` - -### Multi-line Comments - -```typescript -/** - * EN: Validates user credentials and returns JWT token - * VI: Xác thực thông tin đăng nhập và trả về JWT token - * - * @param email - User email address / Địa chỉ email người dùng - * @param password - User password / Mật khẩu người dùng - * @returns JWT token / Mã JWT token - * @throws AuthenticationError if credentials invalid / Lỗi xác thực nếu thông tin không hợp lệ - */ -async function login(email: string, password: string): Promise { - // Implementation -} -``` - -## Core Comment Patterns - -### Function Documentation -```typescript -/** - * EN: Calculates the total price including tax and discount - * VI: Tính tổng giá bao gồm thuế và giảm giá - * - * @param basePrice - Original price / Giá gốc - * @param taxRate - Tax rate (0-1) / Tỷ lệ thuế (0-1) - * @param discount - Discount amount / Số tiền giảm giá - * @returns Final price / Giá cuối cùng - */ -function calculateTotal( - basePrice: number, - taxRate: number, - discount: number -): number { - // EN: Apply discount first - // VI: Áp dụng giảm giá trước - const discountedPrice = basePrice - discount; - - // EN: Then calculate tax - // VI: Sau đó tính thuế - const tax = discountedPrice * taxRate; - - return discountedPrice + tax; -} -``` - -### Class Documentation -```typescript -/** - * EN: Handles user authentication and authorization - * VI: Xử lý xác thực và phân quyền người dùng - */ -export class AuthService { - /** - * EN: JWT secret key from environment - * VI: Khóa bí mật JWT từ biến môi trường - */ - private readonly jwtSecret: string; - - /** - * EN: Verify JWT token and return user payload - * VI: Xác minh JWT token và trả về thông tin người dùng - * - * @param token - JWT token to verify / JWT token cần xác minh - * @returns User payload / Thông tin người dùng - * @throws TokenExpiredError if token expired / Lỗi token hết hạn - */ - async verifyToken(token: string): Promise { - // Implementation - } -} -``` - -### Interface/Type Documentation -```typescript -/** - * EN: User data transfer object - * VI: Đối tượng truyền dữ liệu người dùng - */ -interface UserDto { - /** EN: Unique user identifier / VI: Mã định danh duy nhất */ - id: string; - - /** EN: User email address / VI: Địa chỉ email người dùng */ - email: string; - - /** EN: User display name / VI: Tên hiển thị người dùng */ - name: string; - - /** EN: User role for authorization / VI: Vai trò người dùng để phân quyền */ - role: 'admin' | 'user' | 'guest'; -} -``` - -### React Components -```typescript -/** - * EN: User profile card component - * VI: Component thẻ hồ sơ người dùng - * - * @param user - User data to display / Dữ liệu người dùng để hiển thị - * @param onEdit - Callback when edit button clicked / Callback khi nhấn nút chỉnh sửa - */ -export function UserCard({ user, onEdit }: UserCardProps) { - // EN: Local state for loading status - // VI: State cục bộ cho trạng thái loading - const [isLoading, setIsLoading] = useState(false); - - return ( -
- {/* EN: Display user avatar / VI: Hiển thị avatar người dùng */} - {user.name} - - {/* EN: User information section / VI: Phần thông tin người dùng */} -
-

{user.name}

-

{user.email}

-
-
- ); -} -``` - -### Prisma Schema -```prisma -/// EN: User model for authentication and profile -/// VI: Model người dùng cho xác thực và hồ sơ -model User { - /// EN: Unique identifier / VI: Mã định danh duy nhất - id String @id @default(cuid()) - - /// EN: User email (unique) / VI: Email người dùng (duy nhất) - email String @unique - - /// EN: Hashed password / VI: Mật khẩu đã mã hóa - password String - - /// EN: Display name / VI: Tên hiển thị - name String - - /// EN: Account creation timestamp / VI: Thời gian tạo tài khoản - createdAt DateTime @default(now()) - - /// EN: Last update timestamp / VI: Thời gian cập nhật cuối - updatedAt DateTime @updatedAt - - @@map("users") -} -``` - -### API Controllers -```typescript -/** - * EN: User management controller - * VI: Controller quản lý người dùng - */ -export class UserController { - /** - * EN: Get user by ID - * VI: Lấy thông tin người dùng theo ID - * - * GET /api/users/:id - */ - async getById(req: Request, res: Response) { - try { - // EN: Extract user ID from params - // VI: Lấy ID người dùng từ params - const { id } = req.params; - - // EN: Fetch user from database - // VI: Lấy người dùng từ database - const user = await this.userService.findById(id); - - if (!user) { - return res.status(404).json({ - success: false, - error: { - code: 'USER_NOT_FOUND', - message: 'User not found / Không tìm thấy người dùng', - }, - }); - } - - return res.json({ - success: true, - data: user, - }); - } catch (error) { - logger.error('Failed to get user', { error, userId: req.params.id }); - return res.status(500).json({ - success: false, - error: { - code: 'INTERNAL_ERROR', - message: 'Internal server error / Lỗi máy chủ nội bộ', - }, - }); - } - } -} -``` - -### Middleware -```typescript -/** - * EN: Authentication middleware to verify JWT tokens - * VI: Middleware xác thực để kiểm tra JWT token - */ -export function authMiddleware( - req: Request, - res: Response, - next: NextFunction -) { - // EN: Extract token from Authorization header - // VI: Lấy token từ header Authorization - const authHeader = req.headers.authorization; - const token = authHeader?.replace('Bearer ', ''); - - if (!token) { - return res.status(401).json({ - success: false, - error: { - code: 'NO_TOKEN', - message: 'Authentication required / Yêu cầu xác thực', - }, - }); - } - - try { - // EN: Verify token and extract payload - // VI: Xác minh token và lấy payload - const payload = jwt.verify(token, JWT_SECRET); - req.user = payload; - next(); - } catch (error) { - return res.status(401).json({ - success: false, - error: { - code: 'INVALID_TOKEN', - message: 'Invalid or expired token / Token không hợp lệ hoặc hết hạn', - }, - }); - } -} -``` - -## Best Practices - -### 1. Comment Placement -- Place bilingual comments together (EN first, then VI) -- Keep comments close to the code they describe -- Use JSDoc format for functions and classes - -### 2. Comment Content -- **DO**: Explain WHY, not WHAT (code shows what) -- **DO**: Document complex logic and business rules -- **DO**: Include parameter descriptions and return types -- **DO**: Document error conditions and exceptions -- **DON'T**: State the obvious -- **DON'T**: Write redundant comments - -### 3. Language Guidelines -- **English**: Use clear, concise technical English -- **Vietnamese**: Use proper Vietnamese technical terms -- Keep translations accurate and natural -- Use consistent terminology across codebase - -### 4. Special Cases - -#### TODO Comments -```typescript -// TODO EN: Implement caching for better performance -// TODO VI: Triển khai caching để cải thiện hiệu suất -``` - -#### FIXME Comments -```typescript -// FIXME EN: This causes memory leak, needs refactoring -// FIXME VI: Đoạn này gây rò rỉ bộ nhớ, cần refactor -``` - -#### WARNING Comments -```typescript -// WARNING EN: Do not modify this without updating the database schema -// WARNING VI: Không sửa đổi phần này mà không cập nhật schema database -``` - -### 5. Documentation Priority - -**High Priority** (Always document): -- Public APIs and exported functions -- Complex algorithms and business logic -- Security-critical code -- Configuration and environment setup -- Error handling strategies - -**Medium Priority** (Document when helpful): -- Helper functions with non-obvious behavior -- Data transformations -- Integration points with external services - -**Low Priority** (Optional): -- Simple getters/setters -- Self-explanatory code -- Standard CRUD operations - - -## Integration with Project Rules - -When commenting code in this project: -- Follow the code organization from `project-rules` skill -- Use consistent terminology with project documentation -- Align with the API response format standards -- Document according to the testing standards -- Include security considerations where relevant - -## Quick Reference - -### Function Comment Template -```typescript -/** - * EN: [Brief description in English] - * VI: [Mô tả ngắn gọn bằng tiếng Việt] - * - * @param paramName - EN description / VI mô tả - * @returns EN description / VI mô tả - * @throws ErrorType EN when / VI khi nào - */ -``` - -### Inline Comment Template -```typescript -// EN: [English explanation] -// VI: [Giải thích tiếng Việt] -``` - -### Complex Block Template -```typescript -// EN: Step N: [What this block does] -// VI: Bước N: [Block này làm gì] -``` diff --git a/apps/web-docs/content/docs/en/skills/configuration-management.md b/apps/web-docs/content/docs/en/skills/configuration-management.md deleted file mode 100644 index b0db6753..00000000 --- a/apps/web-docs/content/docs/en/skills/configuration-management.md +++ /dev/null @@ -1,131 +0,0 @@ ---- -name: configuration-management -description: Configuration management patterns for GoodGo microservices including feature flags, dynamic configuration reloading, environment-specific configurations, and secrets management. ---- - -# Configuration Management Patterns - -## When to Use This Skill - -Use this skill when: -- Implementing feature flags and feature toggles -- Managing environment-specific configurations -- Implementing dynamic configuration reloading -- Managing secrets and sensitive configuration -- Implementing configuration validation - -## Key Patterns - -### Configuration Loading Flow - -The configuration loading process fetches configuration from multiple sources, validates it, and supports dynamic reloading: - -```mermaid -flowchart TD - Start([Application Startup]) --> LoadConfig[Load Configuration] - LoadConfig --> FetchSource{Fetch from Source} - - FetchSource --> |Environment Variables| EnvVars[Read Env Vars] - FetchSource --> |Config Files| ConfigFiles[Read JSON/YAML] - FetchSource --> |Database| Database[Query Config Table] - FetchSource --> |External Service| ExternalService[Call Config API] - - EnvVars --> Validate[Validate with Zod Schema] - ConfigFiles --> Validate - Database --> Validate - ExternalService --> Validate - - Validate --> |Valid| StoreConfig[Store in Memory Map] - Validate --> |Invalid| LogError[Log Validation Error] - LogError --> ThrowError[Throw Error] - ThrowError --> End([Application Fails to Start]) - - StoreConfig --> CheckChange{Value Changed?} - CheckChange --> |Yes| EmitEvent[Emit 'config-changed' Event] - CheckChange --> |No| SkipEvent[Skip Event] - - EmitEvent --> Ready[Configuration Ready] - SkipEvent --> Ready - Ready --> End - - Ready --> AutoReload{Auto-Reload Enabled?} - AutoReload --> |Yes| SetInterval[Set Interval Timer] - AutoReload --> |No| End - SetInterval --> Wait[Wait Interval] - Wait --> LoadConfig -``` - -### Feature Flag Evaluation Flow - -Feature flags support multiple evaluation strategies including global flags, user-specific flags, and percentage-based rollouts: - -```mermaid -flowchart TD - Start([Check Feature Flag]) --> GetFlag[Get Flag by Key] - GetFlag --> FlagExists{Flag Exists?} - - FlagExists --> |No| ReturnFalse[Return false] - ReturnFalse --> End([End]) - - FlagExists --> |Yes| CheckEnabled{Flag Enabled?} - CheckEnabled --> |No| ReturnFalse - - CheckEnabled --> |Yes| HasUserId{User ID Provided?} - HasUserId --> |No| ReturnTrue[Return true] - ReturnTrue --> End - - HasUserId --> |Yes| CheckUserSpecific{User-Specific Flag?} - CheckUserSpecific --> |Yes| MatchUser{User ID Matches?} - MatchUser --> |Yes| ReturnTrue - MatchUser --> |No| CheckPercentage - - CheckUserSpecific --> |No| CheckPercentage{Percentage Rollout?} - CheckPercentage --> |No| ReturnTrue - - CheckPercentage --> |Yes| HashUser[Hash User ID] - HashUser --> CalcHash[Calculate Hash % 100] - CalcHash --> CompareHash{Hash < Percentage?} - - CompareHash --> |Yes| ReturnTrue - CompareHash --> |No| ReturnFalse -``` - -### Feature Flags - -```typescript -// Check if feature is enabled -const enabled = await featureFlagService.isEnabled('new-feature', userId); - -if (enabled) { - // Use new feature -} -``` - -### Dynamic Configuration - -```typescript -// Load and auto-reload configuration -await configService.load(); -configService.startAutoReload(60000); // Reload every minute - -const value = configService.get('config-key', 'default-value'); -``` - -### Configuration Validation - -```typescript -// Validate with Zod -const config = validateConfig(process.env); -``` - -## Best Practices - -1. Always validate configuration at startup -2. Provide sensible defaults -3. Never commit secrets to code -4. Use feature flags for gradual rollouts - -## Resources - -- [Feature Flags Pattern](https://martinfowler.com/articles/feature-toggles.html) -- Skill Source: `.cursor/skills/configuration-management/SKILL.md` diff --git a/apps/web-docs/content/docs/en/skills/data-consistency-patterns.md b/apps/web-docs/content/docs/en/skills/data-consistency-patterns.md deleted file mode 100644 index d3845a80..00000000 --- a/apps/web-docs/content/docs/en/skills/data-consistency-patterns.md +++ /dev/null @@ -1,363 +0,0 @@ ---- -name: data-consistency-patterns -description: Data consistency patterns for distributed microservices including Saga patterns, distributed transactions, eventual consistency, compensation, and idempotency. Use when handling distributed transactions, implementing eventual consistency, or managing data synchronization across services. ---- - -# Data Consistency Patterns - -## When to Use This Skill - -Use this skill when: -- Implementing distributed transactions across multiple services -- Handling eventual consistency in microservices -- Implementing Saga patterns for distributed workflows -- Designing compensation strategies for failed transactions -- Implementing idempotent operations -- Managing data synchronization across services -- Handling conflict resolution - -## Core Concepts - -### ACID vs BASE - -**ACID (Traditional):** Atomicity, Consistency, Isolation, Durability - -**BASE (Distributed):** Basic Availability, Soft state, Eventual consistency - -### Consistency Models - -- **Strong Consistency**: All nodes see same data at same time -- **Eventual Consistency**: System becomes consistent over time -- **Weak Consistency**: No guarantees about when consistency occurs - -## Key Patterns - -### Saga Orchestrator Pattern - -#### Saga Orchestration Flow - -The following diagram illustrates how a Saga orchestrator executes steps sequentially and handles compensation on failure: - -```mermaid -sequenceDiagram - participant Client - participant Orchestrator - participant Step1 as Step 1: Create Order - participant Step2 as Step 2: Reserve Inventory - participant Step3 as Step 3: Process Payment - - Client->>Orchestrator: Execute Saga - Orchestrator->>Step1: Execute Step 1 - Step1-->>Orchestrator: Success (Order Created) - Orchestrator->>Step2: Execute Step 2 - Step2-->>Orchestrator: Success (Inventory Reserved) - Orchestrator->>Step3: Execute Step 3 - Step3-->>Orchestrator: Failure (Payment Failed) - Orchestrator->>Step2: Compensate Step 2 - Step2-->>Orchestrator: Compensation Complete - Orchestrator->>Step1: Compensate Step 1 - Step1-->>Orchestrator: Compensation Complete - Orchestrator-->>Client: Saga Failed (Compensated) -``` - -#### Compensation Flow - -When a step fails, the orchestrator compensates all previously completed steps in reverse order: - -```mermaid -flowchart TD - Start([Saga Execution Starts]) --> Step1[Execute Step 1] - Step1 -->|Success| Step2[Execute Step 2] - Step1 -->|Failure| Fail1[Saga Failed
No Compensation Needed] - - Step2 -->|Success| Step3[Execute Step 3] - Step2 -->|Failure| Comp1[Compensate Step 1] - - Step3 -->|Success| Complete([Saga Completed]) - Step3 -->|Failure| Comp2[Compensate Step 2] - - Comp2 --> Comp1 - Comp1 --> Fail2[Saga Failed
All Steps Compensated] - - style Start fill:#e1f5ff - style Complete fill:#d4edda - style Fail1 fill:#f8d7da - style Fail2 fill:#f8d7da - style Comp1 fill:#fff3cd - style Comp2 fill:#fff3cd -``` - -#### Eventual Consistency Flow - -This diagram shows how data becomes consistent across services over time through event propagation: - -```mermaid -flowchart LR - subgraph ServiceA[Service A: Write Model] - Write[Write Operation] --> EventStore[Event Store] - EventStore --> Publish[Publish Event] - end - - subgraph EventBus[Event Bus] - Publish --> Queue[Event Queue] - end - - subgraph ServiceB[Service B: Read Model] - Queue --> Consume[Consume Event] - Consume --> Update[Update Read Model] - Update --> Consistent[Eventually Consistent] - end - - subgraph ServiceC[Service C: Read Model] - Queue --> Consume2[Consume Event] - Consume2 --> Update2[Update Read Model] - Update2 --> Consistent2[Eventually Consistent] - end - - style Write fill:#e1f5ff - style Consistent fill:#d4edda - style Consistent2 fill:#d4edda - style Queue fill:#fff3cd -``` - -```typescript -// Centralized orchestrator coordinates steps -const saga = new SagaOrchestrator(); -await saga.execute({ - sagaId: 'saga_123', - steps: [ - { name: 'create-order', execute: createOrder, compensate: cancelOrder }, - { name: 'reserve-inventory', execute: reserveInventory, compensate: releaseInventory }, - { name: 'process-payment', execute: chargePayment, compensate: refundPayment }, - ], - data: {}, - status: 'pending', -}); -``` - -### Saga Choreography Pattern - -In choreography, services react to events without a central coordinator: - -```typescript -// Services react to events -eventConsumer.on('order.created', async (event) => { - await inventoryService.reserve(event.data.items); - await eventPublisher.publish('inventory.reserved', {...}); -}); -``` - -#### Saga Choreography Flow - -The following diagram shows how services coordinate through events in a choreography pattern: - -```mermaid -sequenceDiagram - participant OrderService - participant EventBus - participant InventoryService - participant PaymentService - participant NotificationService - - OrderService->>EventBus: Publish order.created - EventBus->>InventoryService: order.created event - InventoryService->>InventoryService: Reserve Inventory - InventoryService->>EventBus: Publish inventory.reserved - EventBus->>PaymentService: inventory.reserved event - PaymentService->>PaymentService: Process Payment - PaymentService->>EventBus: Publish payment.processed - EventBus->>NotificationService: payment.processed event - NotificationService->>NotificationService: Send Confirmation - - Note over InventoryService,PaymentService: If payment fails,
compensation events are published - PaymentService->>EventBus: Publish payment.failed - EventBus->>InventoryService: payment.failed event - InventoryService->>InventoryService: Release Inventory - EventBus->>OrderService: payment.failed event - OrderService->>OrderService: Cancel Order -``` - -### Idempotency - -Idempotency ensures operations can be safely retried without side effects: - -```typescript -// Execute operation with idempotency check -await idempotencyHandler.execute( - idempotencyKey, - async () => await userService.create(data) -); -``` - -#### Idempotency Flow - -```mermaid -flowchart TD - Request[Client Request] --> Check{Idempotency Key
Exists?} - Check -->|Yes| Return[Return Cached Result] - Check -->|No| Execute[Execute Operation] - Execute --> Store[Store Result with Key] - Store --> Return2[Return Result] - - Return --> Client[Client Response] - Return2 --> Client - - style Check fill:#fff3cd - style Return fill:#d4edda - style Return2 fill:#d4edda -``` - -### Optimistic Locking - -Optimistic locking prevents lost updates using version fields: - -```typescript -// Update with version check -await optimisticLockService.updateWithLock( - repository, - id, - (current) => ({ ...current, name: newName }) -); -``` - -#### Optimistic Locking Flow - -```mermaid -sequenceDiagram - participant Client1 - participant Client2 - participant Service - participant DB[(Database)] - - Client1->>Service: Read Entity (version=1) - Service->>DB: SELECT * WHERE id=123 - DB-->>Service: Entity (version=1) - Service-->>Client1: Entity Data - - Client2->>Service: Read Entity (version=1) - Service->>DB: SELECT * WHERE id=123 - DB-->>Service: Entity (version=1) - Service-->>Client2: Entity Data - - Client1->>Service: Update (version=1) - Service->>DB: UPDATE WHERE id=123 AND version=1 - DB-->>Service: Success (version=2) - - Client2->>Service: Update (version=1) - Service->>DB: UPDATE WHERE id=123 AND version=1 - DB-->>Service: Conflict (version=2 exists) - Service-->>Client2: OptimisticLockError - Note over Client2: Retry with new version -``` - -### CQRS Pattern - -Command Query Responsibility Segregation separates read and write operations for optimized performance and scalability. - -#### CQRS Architecture Flow - -The following diagram illustrates how CQRS separates write and read paths: - -```mermaid -flowchart TB - subgraph WritePath[Write Path] - Command[Command] --> WriteModel[Write Model
Normalized] - WriteModel --> Event[Domain Event] - Event --> EventStore[(Event Store)] - end - - subgraph ReadPath[Read Path] - Query[Query] --> ReadModel[Read Model
Denormalized] - ReadModel --> Response[Query Response] - end - - subgraph Sync[Eventual Consistency] - EventStore --> EventHandler[Event Handler] - EventHandler --> Projection[Projection] - Projection --> ReadModel - end - - style WritePath fill:#e1f5ff - style ReadPath fill:#d4edda - style Sync fill:#fff3cd - style EventStore fill:#f8d7da -``` - -```typescript -// Write path: Command handler -await commandHandler.handle({ - type: 'CREATE_ORDER', - data: { userId, items } -}); - -// Read path: Optimized query -const orders = await readModel.findOrdersByUser(userId); -``` - -### Outbox Pattern - -The Outbox pattern ensures reliable event publishing by storing events in the same database transaction as business data. - -#### Outbox Pattern Flow - -This diagram shows how the Outbox pattern guarantees event delivery: - -```mermaid -sequenceDiagram - participant Client - participant Service - participant DB[(Database)] - participant OutboxTable[(Outbox Table)] - participant Processor[Outbox Processor] - participant EventBus[Event Bus] - - Client->>Service: Business Operation - Service->>DB: Begin Transaction - Service->>DB: Update Business Data - Service->>OutboxTable: Insert Event (same transaction) - Service->>DB: Commit Transaction - - Note over Service,OutboxTable: Event stored atomically
with business data - - Processor->>OutboxTable: Poll for Unpublished Events - OutboxTable-->>Processor: Return Events - Processor->>EventBus: Publish Event - EventBus-->>Processor: Publish Confirmed - Processor->>OutboxTable: Mark as Published - - Note over Processor,EventBus: Background processor
ensures delivery -``` - -```typescript -// Execute business operation and store event in same transaction -await prisma.$transaction(async (tx) => { - // Business operation - await tx.order.create({ data: orderData }); - - // Store event in outbox (same transaction) - await tx.outboxEvent.create({ - data: { - eventType: 'order.created', - payload: orderData, - status: 'pending' - } - }); -}); - -// Background processor publishes events from outbox -``` - -## Best Practices - -1. **Design Compensations**: Every step needs compensation -2. **Idempotent Steps**: Make steps idempotent for retries -3. **Conflict Resolution**: Define resolution strategies -4. **Monitoring**: Track saga execution and consistency lag -5. **Read Models**: Use separate read models for queries (CQRS) - -## Resources - -- [Saga Pattern](https://microservices.io/patterns/data/saga.html) -- [Event-Driven Architecture](./event-driven-architecture.md) -- [Error Handling Patterns](./error-handling-patterns.md) -- Skill Source: `.cursor/skills/data-consistency-patterns/SKILL.md` diff --git a/apps/web-docs/content/docs/en/skills/database-prisma.md b/apps/web-docs/content/docs/en/skills/database-prisma.md deleted file mode 100644 index 9887f581..00000000 --- a/apps/web-docs/content/docs/en/skills/database-prisma.md +++ /dev/null @@ -1,571 +0,0 @@ ---- -name: database-prisma -description: Prisma ORM and database patterns for GoodGo microservices. Use when working with databases, creating Prisma schemas, writing migrations, implementing repositories, or optimizing queries. ---- - -# Prisma Database Patterns - -## When to Use This Skill - -Use this skill when: -- Setting up Prisma for a new service -- Creating or modifying database schemas -- Writing database migrations -- Implementing repository patterns -- Optimizing database queries -- Setting up database connections -- Implementing transactions -- Working with Neon PostgreSQL - -## Core Concepts - -### Architecture -- Repository pattern for data access -- Prisma as ORM for type safety -- Neon PostgreSQL as primary database -- Connection pooling for performance -- Transaction support for data consistency - -## Prisma Setup - -### Installation - -```bash -npm install @prisma/client prisma -npm install --save-dev @types/node -``` - -### Configuration - -```typescript -// prisma/schema.prisma -generator client { - provider = "prisma-client-js" -} - -datasource db { - provider = "postgresql" - url = env("DATABASE_URL") -} - -// Base model with common fields -model User { - id String @id @default(cuid()) - email String @unique - name String? - password String - role Role @default(USER) - createdAt DateTime @default(now()) - updatedAt DateTime @updatedAt - - // Relations - posts Post[] - profile Profile? - - // Indexes for performance - @@index([email]) - @@index([createdAt]) - @@map("users") -} - -model Post { - id String @id @default(cuid()) - title String - content String? - published Boolean @default(false) - authorId String - author User @relation(fields: [authorId], references: [id]) - createdAt DateTime @default(now()) - updatedAt DateTime @updatedAt - - @@index([authorId]) - @@index([published, createdAt]) - @@map("posts") -} - -model Profile { - id String @id @default(cuid()) - bio String? - avatar String? - userId String @unique - user User @relation(fields: [userId], references: [id]) - - @@map("profiles") -} - -enum Role { - USER - ADMIN - MODERATOR -} -``` - -### Schema Relationships - -The following diagram illustrates the relationships between User, Post, and Profile models: - -```mermaid -erDiagram - User ||--o{ Post : "has many" - User ||--o| Profile : "has one" - - User { - string id PK - string email UK - string name - string password - enum role - datetime createdAt - datetime updatedAt - } - - Post { - string id PK - string title - string content - boolean published - string authorId FK - datetime createdAt - datetime updatedAt - } - - Profile { - string id PK - string bio - string avatar - string userId FK,UK - } -``` - -## Database Connection - -```typescript -// src/lib/prisma.ts -import { PrismaClient } from '@prisma/client'; - -const globalForPrisma = global as unknown as { - prisma: PrismaClient | undefined; -}; - -export const prisma = globalForPrisma.prisma ?? - new PrismaClient({ - log: process.env.NODE_ENV === 'development' - ? ['query', 'error', 'warn'] - : ['error'], - }); - -if (process.env.NODE_ENV !== 'production') { - globalForPrisma.prisma = prisma; -} - -// Middleware for soft delete -prisma.$use(async (params, next) => { - if (params.model && params.action === 'delete') { - return next({ - ...params, - action: 'update', - args: { - ...params.args, - data: { deletedAt: new Date() } - } - }); - } - return next(params); -}); -``` - -## Repository Pattern - -```typescript -// src/repositories/base.repository.ts -export abstract class BaseRepository { - constructor(protected prisma: PrismaClient) {} - - abstract findById(id: string): Promise; - abstract findAll(options?: any): Promise; - abstract create(data: any): Promise; - abstract update(id: string, data: any): Promise; - abstract delete(id: string): Promise; -} - -// src/repositories/user.repository.ts -export class UserRepository extends BaseRepository { - async findById(id: string): Promise { - return this.prisma.user.findUnique({ - where: { id }, - include: { profile: true } - }); - } - - async findByEmail(email: string): Promise { - return this.prisma.user.findUnique({ - where: { email } - }); - } - - async findAll(options: { - page?: number; - limit?: number; - search?: string; - sortBy?: string; - order?: 'asc' | 'desc'; - } = {}): Promise<{ data: User[]; total: number }> { - const { - page = 1, - limit = 10, - search, - sortBy = 'createdAt', - order = 'desc' - } = options; - - const where = search ? { - OR: [ - { email: { contains: search, mode: 'insensitive' } }, - { name: { contains: search, mode: 'insensitive' } } - ] - } : {}; - - const [data, total] = await Promise.all([ - this.prisma.user.findMany({ - where, - skip: (page - 1) * limit, - take: limit, - orderBy: { [sortBy]: order }, - include: { profile: true } - }), - this.prisma.user.count({ where }) - ]); - - return { data, total }; - } - - async create(data: CreateUserDto): Promise { - return this.prisma.user.create({ - data: { - email: data.email, - password: data.password, - name: data.name, - profile: data.bio ? { - create: { bio: data.bio } - } : undefined - }, - include: { profile: true } - }); - } - - async update(id: string, data: UpdateUserDto): Promise { - return this.prisma.user.update({ - where: { id }, - data, - include: { profile: true } - }); - } - - async delete(id: string): Promise { - await this.prisma.user.delete({ - where: { id } - }); - } -} -``` - -## Transactions - -```typescript -// Transaction example -export class TransferService { - async transferFunds( - fromAccountId: string, - toAccountId: string, - amount: number - ) { - return await this.prisma.$transaction(async (tx) => { - // Check balance - const fromAccount = await tx.account.findUnique({ - where: { id: fromAccountId } - }); - - if (!fromAccount || fromAccount.balance < amount) { - throw new Error('Insufficient funds'); - } - - // Deduct from sender - const updatedFrom = await tx.account.update({ - where: { id: fromAccountId }, - data: { balance: { decrement: amount } } - }); - - // Add to receiver - const updatedTo = await tx.account.update({ - where: { id: toAccountId }, - data: { balance: { increment: amount } } - }); - - // Create transaction record - const transaction = await tx.transaction.create({ - data: { - fromAccountId, - toAccountId, - amount, - type: 'TRANSFER', - status: 'COMPLETED' - } - }); - - return transaction; - }, { - maxWait: 5000, - timeout: 10000, - }); - } -} -``` - -## Migrations - -### Migration Workflow - -The following diagram shows the typical migration workflow from development to production: - -```mermaid -flowchart TD - Start([Start Migration]) --> EditSchema[Edit schema.prisma] - EditSchema --> CreateMigration[Run: prisma migrate dev] - CreateMigration --> GenerateSQL[Prisma generates SQL] - GenerateSQL --> ReviewSQL{Review SQL?} - ReviewSQL -->|Yes| CheckSQL[Check migration SQL] - ReviewSQL -->|No| ApplyDev[Apply to dev database] - CheckSQL --> ApplyDev - ApplyDev --> GenerateClient[Generate Prisma Client] - GenerateClient --> TestDev[Test in development] - TestDev --> TestPass{Tests pass?} - TestPass -->|No| FixIssues[Fix issues] - FixIssues --> EditSchema - TestPass -->|Yes| CommitMigration[Commit migration files] - CommitMigration --> DeployProd[Deploy to production] - DeployProd --> RunDeploy[Run: prisma migrate deploy] - RunDeploy --> End([Migration Complete]) - - style Start fill:#e1f5e1 - style End fill:#e1f5e1 - style TestPass fill:#fff4e1 - style ReviewSQL fill:#fff4e1 -``` - -```bash -# Create migration -npx prisma migrate dev --name add_user_table - -# Apply migrations -npx prisma migrate deploy - -# Reset database -npx prisma migrate reset - -# Generate Prisma Client -npx prisma generate -``` - -### Migration Files - -```sql --- migrations/20240101000000_add_user_table/migration.sql -CREATE TABLE "users" ( - "id" TEXT NOT NULL, - "email" TEXT NOT NULL, - "name" TEXT, - "password" TEXT NOT NULL, - "role" TEXT NOT NULL DEFAULT 'USER', - "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, - "updatedAt" TIMESTAMP(3) NOT NULL, - - CONSTRAINT "users_pkey" PRIMARY KEY ("id") -); - -CREATE UNIQUE INDEX "users_email_key" ON "users"("email"); -CREATE INDEX "users_createdAt_idx" ON "users"("createdAt"); -``` - -## Query Optimization - -### Query Execution Flow - -The following sequence diagram illustrates how Prisma queries flow from the application layer to the database: - -```mermaid -sequenceDiagram - participant App as Application - participant Repo as Repository - participant Client as Prisma Client - participant Pool as Connection Pool - participant DB as PostgreSQL - - App->>Repo: findById(id) - Repo->>Client: prisma.user.findUnique() - Client->>Client: Validate query - Client->>Client: Generate SQL - Client->>Pool: Request connection - Pool->>DB: Execute SQL query - DB-->>Pool: Return results - Pool-->>Client: Return data - Client->>Client: Transform to TypeScript types - Client-->>Repo: Return typed result - Repo-->>App: Return User | null - - Note over App,DB: Prisma ensures type safety
throughout the flow -``` - -```typescript -// Optimized queries -export class OptimizedUserRepository { - // Select only needed fields - async findUsersLight() { - return this.prisma.user.findMany({ - select: { - id: true, - email: true, - name: true - } - }); - } - - // Use pagination - async findPaginated(cursor?: string) { - return this.prisma.user.findMany({ - take: 10, - skip: cursor ? 1 : 0, - cursor: cursor ? { id: cursor } : undefined, - orderBy: { createdAt: 'desc' } - }); - } - - // Batch operations - async createMany(users: CreateUserDto[]) { - return this.prisma.user.createMany({ - data: users, - skipDuplicates: true - }); - } - - // Use raw SQL for complex queries - async getStatistics() { - return this.prisma.$queryRaw` - SELECT - COUNT(*) as total, - COUNT(CASE WHEN role = 'ADMIN' THEN 1 END) as admins, - COUNT(CASE WHEN created_at > NOW() - INTERVAL '30 days' THEN 1 END) as new_users - FROM users - `; - } -} -``` - -## Seeding - -```typescript -// prisma/seed.ts -import { PrismaClient } from '@prisma/client'; -import bcrypt from 'bcrypt'; - -const prisma = new PrismaClient(); - -async function main() { - // Create admin user - const adminPassword = await bcrypt.hash('admin123', 10); - const admin = await prisma.user.upsert({ - where: { email: 'admin@goodgo.com' }, - update: {}, - create: { - email: 'admin@goodgo.com', - name: 'Admin User', - password: adminPassword, - role: 'ADMIN' - } - }); - - // Create test users - const testUsers = Array.from({ length: 10 }, (_, i) => ({ - email: `user${i}@example.com`, - name: `Test User ${i}`, - password: bcrypt.hashSync('password123', 10) - })); - - await prisma.user.createMany({ - data: testUsers, - skipDuplicates: true - }); - - console.log('Database seeded successfully'); -} - -main() - .catch(console.error) - .finally(() => prisma.$disconnect()); -``` - -## Neon PostgreSQL Configuration - -```typescript -// .env -DATABASE_URL="postgresql://user:password@ep-xxx.us-east-1.aws.neon.tech/dbname?sslmode=require" - -// Connection pooling for serverless -DIRECT_URL="postgresql://user:password@ep-xxx.us-east-1.aws.neon.tech/dbname?sslmode=require" -``` - -## Testing with Prisma - -```typescript -// __tests__/user.repository.test.ts -import { mockDeep, mockReset } from 'jest-mock-extended'; -import { PrismaClient } from '@prisma/client'; - -jest.mock('../src/lib/prisma', () => ({ - __esModule: true, - prisma: mockDeep() -})); - -describe('UserRepository', () => { - beforeEach(() => { - mockReset(prismaMock); - }); - - it('should create user', async () => { - const user = { id: '1', email: 'test@example.com' }; - prismaMock.user.create.mockResolvedValue(user); - - const result = await repository.create({ - email: 'test@example.com', - password: 'password' - }); - - expect(result).toEqual(user); - }); -}); -``` - -## Best Practices - -1. **Schema Design** - - Use appropriate field types - - Add indexes for frequently queried fields - - Use relations instead of storing JSON - - Implement soft deletes when needed - -2. **Performance** - - Use select to fetch only needed fields - - Implement pagination for large datasets - - Use connection pooling - - Cache frequently accessed data - -3. **Security** - - Never expose sensitive fields - - Use parameterized queries - - Validate input before database operations - - Implement row-level security - -4. **Maintenance** - - Keep migrations small and focused - - Test migrations before production - - Backup before major changes - - Monitor query performance \ No newline at end of file diff --git a/apps/web-docs/content/docs/en/skills/deployment-kubernetes.md b/apps/web-docs/content/docs/en/skills/deployment-kubernetes.md deleted file mode 100644 index b55581ba..00000000 --- a/apps/web-docs/content/docs/en/skills/deployment-kubernetes.md +++ /dev/null @@ -1,618 +0,0 @@ ---- -name: deployment-kubernetes -description: Kubernetes deployment patterns for GoodGo microservices. Use when deploying to staging/production, creating K8s manifests, configuring HPA, setting up ingress, or troubleshooting K8s deployments. ---- - -# Kubernetes Deployment Patterns - -## When to Use This Skill - -Use this skill when: -- Deploying services to staging/production environments -- Creating or updating Kubernetes manifests -- Configuring autoscaling (HPA/VPA) -- Setting up ingress and load balancing -- Managing secrets and configmaps -- Troubleshooting deployment issues -- Implementing health checks and probes -- Setting up monitoring and logging - -## Core Concepts - -### Kubernetes Architecture - -The following diagram illustrates the key Kubernetes components and their relationships in a typical GoodGo service deployment: - -```mermaid -graph TB - subgraph External["External Traffic"] - Client[Client Request] - end - - subgraph IngressLayer["Ingress Layer"] - Ingress[Ingress
api.goodgo.com] - end - - subgraph ServiceLayer["Service Layer"] - Service[Service
ClusterIP] - end - - subgraph DeploymentLayer["Deployment Layer"] - Deployment[Deployment
iam-service] - HPA[HorizontalPodAutoscaler
HPA] - end - - subgraph PodLayer["Pod Layer"] - Pod1[Pod 1
Container] - Pod2[Pod 2
Container] - Pod3[Pod 3
Container] - end - - subgraph ConfigLayer["Configuration Layer"] - ConfigMap[ConfigMap
app-config] - Secret[Secret
database-secrets] - end - - Client -->|HTTPS| Ingress - Ingress -->|Route /auth| Service - Service -->|Load Balance| Pod1 - Service -->|Load Balance| Pod2 - Service -->|Load Balance| Pod3 - - Deployment -->|Manages| Pod1 - Deployment -->|Manages| Pod2 - Deployment -->|Manages| Pod3 - - HPA -->|Scales| Deployment - - Pod1 -->|Reads| ConfigMap - Pod2 -->|Reads| ConfigMap - Pod3 -->|Reads| ConfigMap - - Pod1 -->|Reads| Secret - Pod2 -->|Reads| Secret - Pod3 -->|Reads| Secret -``` - -### Deployment Strategy -- Rolling updates for zero-downtime deployments -- Resource limits and requests for stability -- Health checks (liveness/readiness probes) -- Horizontal Pod Autoscaler (HPA) for auto-scaling -- ConfigMaps for configuration -- Secrets for sensitive data - -### Pod Lifecycle - -Pods go through various states during their lifecycle. Health checks (liveness and readiness probes) determine pod availability: - -```mermaid -stateDiagram-v2 - [*] --> Pending: Pod Created - - Pending --> ContainerCreating: Scheduler Assigned - ContainerCreating --> Running: Containers Started - - Running --> Running: Liveness Check Pass - Running --> Restarting: Liveness Check Fail (3x) - Restarting --> Running: Container Restarted - - Running --> Ready: Readiness Check Pass - Ready --> Running: Readiness Check Fail (3x) - - Ready --> Terminating: Pod Deleted - Terminating --> [*]: Cleanup Complete - - note right of Ready - Pod receives traffic - from Service - end note - - note right of Running - Liveness probe checks - if container is alive - end note - - note right of Restarting - Container restarted - after 3 failures - end note -``` - -### Service Discovery Flow - -Kubernetes provides built-in service discovery through DNS. The following diagram shows how requests flow from external clients to pods: - -```mermaid -sequenceDiagram - participant Client - participant Ingress - participant Service - participant Pod1 - participant Pod2 - participant Pod3 - - Client->>Ingress: HTTPS Request
api.goodgo.com/auth/login - Ingress->>Ingress: TLS Termination - Ingress->>Ingress: Path Routing
/auth → iam-service - - Ingress->>Service: HTTP Request
iam-service:80 - Service->>Service: DNS Resolution
iam-service.goodgo.svc.cluster.local - - Service->>Service: Endpoint Selection
Load Balancing - - alt Pod1 Selected - Service->>Pod1: Forward Request - Pod1->>Pod1: Process Request - Pod1->>Service: Response - else Pod2 Selected - Service->>Pod2: Forward Request - Pod2->>Pod2: Process Request - Pod2->>Service: Response - else Pod3 Selected - Service->>Pod3: Forward Request - Pod3->>Pod3: Process Request - Pod3->>Service: Response - end - - Service->>Ingress: Response - Ingress->>Client: HTTPS Response -``` - -## Service Deployment Manifest - -```yaml -# kubernetes/iam-service.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: iam-service - namespace: goodgo - labels: - app: iam-service - version: v1 -spec: - replicas: 3 - selector: - matchLabels: - app: iam-service - template: - metadata: - labels: - app: iam-service - version: v1 - spec: - containers: - - name: iam-service - image: goodgo/iam-service:latest - imagePullPolicy: IfNotPresent - ports: - - containerPort: 3000 - name: http - env: - - name: NODE_ENV - value: "production" - - name: PORT - value: "3000" - - name: DATABASE_URL - valueFrom: - secretKeyRef: - name: database-secrets - key: url - - name: JWT_SECRET - valueFrom: - secretKeyRef: - name: auth-secrets - key: jwt-secret - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: redis-config - key: url - resources: - requests: - memory: "256Mi" - cpu: "250m" - limits: - memory: "512Mi" - cpu: "500m" - livenessProbe: - httpGet: - path: /health - port: 3000 - initialDelaySeconds: 30 - periodSeconds: 10 - readinessProbe: - httpGet: - path: /ready - port: 3000 - initialDelaySeconds: 5 - periodSeconds: 5 ---- -apiVersion: v1 -kind: Service -metadata: - name: iam-service - namespace: goodgo -spec: - type: ClusterIP - selector: - app: iam-service - ports: - - port: 80 - targetPort: 3000 - protocol: TCP -``` - -## Horizontal Pod Autoscaler - -```yaml -# kubernetes/hpa.yaml -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: iam-service-hpa - namespace: goodgo -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: iam-service - minReplicas: 2 - maxReplicas: 10 - metrics: - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: 70 - - type: Resource - resource: - name: memory - target: - type: Utilization - averageUtilization: 80 - behavior: - scaleDown: - stabilizationWindowSeconds: 300 - policies: - - type: Percent - value: 50 - periodSeconds: 60 - scaleUp: - stabilizationWindowSeconds: 0 - policies: - - type: Percent - value: 100 - periodSeconds: 15 -``` - -## ConfigMap & Secrets - -```yaml -# kubernetes/configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: app-config - namespace: goodgo -data: - NODE_ENV: "production" - LOG_LEVEL: "info" - REDIS_URL: "redis://redis-service:6379" - METRICS_ENABLED: "true" - ---- -# kubernetes/secrets.yaml (example - use sealed-secrets in production) -apiVersion: v1 -kind: Secret -metadata: - name: database-secrets - namespace: goodgo -type: Opaque -stringData: - url: "postgresql://user:pass@postgres:5432/db" - ---- -apiVersion: v1 -kind: Secret -metadata: - name: auth-secrets - namespace: goodgo -type: Opaque -stringData: - jwt-secret: "your-secret-key" - refresh-secret: "your-refresh-secret" -``` - -## Ingress Configuration - -```yaml -# kubernetes/ingress.yaml -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: api-ingress - namespace: goodgo - annotations: - kubernetes.io/ingress.class: nginx - cert-manager.io/cluster-issuer: letsencrypt-prod - nginx.ingress.kubernetes.io/rate-limit: "100" - nginx.ingress.kubernetes.io/ssl-redirect: "true" -spec: - tls: - - hosts: - - api.goodgo.com - secretName: api-tls-secret - rules: - - host: api.goodgo.com - http: - paths: - - path: /auth - pathType: Prefix - backend: - service: - name: iam-service - port: - number: 80 - - path: /users - pathType: Prefix - backend: - service: - name: user-service - port: - number: 80 -``` - -## Database Deployment (Development Only) - -```yaml -# kubernetes/postgres.yaml -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: postgres - namespace: goodgo -spec: - serviceName: postgres - replicas: 1 - selector: - matchLabels: - app: postgres - template: - metadata: - labels: - app: postgres - spec: - containers: - - name: postgres - image: postgres:14-alpine - ports: - - containerPort: 5432 - env: - - name: POSTGRES_DB - value: goodgo - - name: POSTGRES_USER - value: postgres - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: postgres-secret - key: password - volumeMounts: - - name: postgres-storage - mountPath: /var/lib/postgresql/data - volumeClaimTemplates: - - metadata: - name: postgres-storage - spec: - accessModes: ["ReadWriteOnce"] - resources: - requests: - storage: 10Gi -``` - -## Deployment Scripts - -```bash -#!/bin/bash -# scripts/deploy-k8s.sh - -# Set namespace -NAMESPACE="goodgo" -ENVIRONMENT="${1:-staging}" - -# Create namespace if not exists -kubectl create namespace $NAMESPACE --dry-run=client -o yaml | kubectl apply -f - - -# Apply configurations -echo "Applying ConfigMaps..." -kubectl apply -f kubernetes/configmap-$ENVIRONMENT.yaml - -echo "Applying Secrets..." -kubectl apply -f kubernetes/secrets-$ENVIRONMENT.yaml - -echo "Deploying services..." -kubectl apply -f kubernetes/iam-service.yaml -kubectl apply -f kubernetes/user-service.yaml - -echo "Configuring autoscaling..." -kubectl apply -f kubernetes/hpa.yaml - -echo "Setting up ingress..." -kubectl apply -f kubernetes/ingress.yaml - -# Wait for rollout -kubectl rollout status deployment/iam-service -n $NAMESPACE -kubectl rollout status deployment/user-service -n $NAMESPACE - -echo "Deployment complete!" -``` - -## Health Check Implementation - -```typescript -// src/modules/health/health.controller.ts -export class HealthController { - constructor( - private prisma: PrismaClient, - private redis: Redis - ) {} - - // Liveness probe - is the service alive? - async liveness(req: Request, res: Response) { - res.status(200).json({ status: 'ok' }); - } - - // Readiness probe - is the service ready to accept traffic? - async readiness(req: Request, res: Response) { - try { - // Check database connection - await this.prisma.$queryRaw`SELECT 1`; - - // Check Redis connection - await this.redis.ping(); - - res.status(200).json({ - status: 'ready', - checks: { - database: 'ok', - redis: 'ok' - } - }); - } catch (error) { - res.status(503).json({ - status: 'not ready', - error: error.message - }); - } - } -} -``` - -## Monitoring with Prometheus - -```yaml -# kubernetes/servicemonitor.yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: iam-service-monitor - namespace: goodgo -spec: - selector: - matchLabels: - app: iam-service - endpoints: - - port: http - path: /metrics - interval: 30s -``` - -## Common Commands - -```bash -# Deploy to staging -kubectl apply -f kubernetes/ -n goodgo-staging - -# Check deployment status -kubectl get deployments -n goodgo -kubectl get pods -n goodgo -kubectl get svc -n goodgo - -# View logs -kubectl logs -f deployment/iam-service -n goodgo -kubectl logs -f pod-name -n goodgo --tail=100 - -# Scale manually -kubectl scale deployment iam-service --replicas=5 -n goodgo - -# Update image -kubectl set image deployment/iam-service iam-service=goodgo/iam-service:v1.2.3 -n goodgo - -# Rollback -kubectl rollout undo deployment/iam-service -n goodgo - -# Port forward for debugging -kubectl port-forward service/iam-service 3000:80 -n goodgo - -# Execute command in pod -kubectl exec -it pod-name -n goodgo -- /bin/sh - -# View HPA status -kubectl get hpa -n goodgo -kubectl describe hpa iam-service-hpa -n goodgo - -# View resource usage -kubectl top nodes -kubectl top pods -n goodgo -``` - -## Troubleshooting - -### Pod Not Starting - -```bash -# Check pod status -kubectl describe pod pod-name -n goodgo - -# Check events -kubectl get events -n goodgo --sort-by='.lastTimestamp' - -# Check logs -kubectl logs pod-name -n goodgo --previous -``` - -### ImagePullBackOff - -```bash -# Check image name and tag -kubectl describe pod pod-name -n goodgo | grep -i image - -# Check image pull secrets -kubectl get secrets -n goodgo -``` - -### CrashLoopBackOff - -```bash -# Check logs of crashed container -kubectl logs pod-name -n goodgo --previous - -# Check resource limits -kubectl describe pod pod-name -n goodgo | grep -A 5 Limits -``` - -## Best Practices - -1. **Resource Management** - - Always set resource requests and limits - - Monitor actual usage and adjust accordingly - - Use HPA for automatic scaling - -2. **Configuration** - - Use ConfigMaps for non-sensitive config - - Use Secrets for sensitive data - - Never hardcode configuration in images - -3. **Health Checks** - - Implement both liveness and readiness probes - - Set appropriate timeouts and thresholds - - Include dependency checks in readiness probe - -4. **Deployment** - - Use rolling updates for zero-downtime - - Set maxSurge and maxUnavailable appropriately - - Test deployments in staging first - -5. **Security** - - Run containers as non-root user - - Use network policies to restrict traffic - - Regularly update base images - - Use sealed-secrets or external secret manager - -6. **Monitoring** - - Expose metrics endpoint - - Set up alerts for critical issues - - Monitor resource usage and performance \ No newline at end of file diff --git a/apps/web-docs/content/docs/en/skills/documentation.md b/apps/web-docs/content/docs/en/skills/documentation.md deleted file mode 100644 index b75b697e..00000000 --- a/apps/web-docs/content/docs/en/skills/documentation.md +++ /dev/null @@ -1,507 +0,0 @@ ---- -name: documentation -description: Guidelines for writing technical documentation in the GoodGo project. Use when creating or updating README files, guides, architecture docs, or API documentation. Ensures bilingual (EN/VI) consistency and proper structure. ---- - -# Documentation Writing Guidelines - -## Documentation Structure - -The project follows a structured documentation hierarchy organized by location and content type: - -``` -docs/ -├── en/ # English documentation -│ ├── guides/ # How-to guides -│ │ ├── getting-started.md -│ │ ├── development.md -│ │ ├── deployment.md -│ │ └── local-development.md -│ ├── architecture/ # System design docs -│ │ ├── system-design.md -│ │ └── service-communication.md -│ ├── api/ # API documentation -│ │ └── openapi/ -│ └── runbooks/ # Operational guides -│ ├── incident-response.md -│ └── rollback-procedure.md -├── vi/ # Vietnamese documentation (mirror structure) -└── README.md # Documentation index -``` - -### Documentation Structure Diagram - -The following diagram illustrates where different types of documentation should be placed: - -```mermaid -graph TD - Start[Documentation Need] --> TypeDecision{Documentation Type?} - - TypeDecision -->|Project-Level| ProjectDocs[Project-Level Documentation] - TypeDecision -->|Service/Package| ServiceDocs[Service/Package Documentation] - TypeDecision -->|Deployment| DeployDocs[Deployment Documentation] - TypeDecision -->|Infrastructure| InfraDocs[Infrastructure Documentation] - - ProjectDocs --> ProjectLoc["docs/en/
docs/vi/"] - ProjectLoc --> ProjectSub{Content Type?} - ProjectSub -->|Guides| GuidesLoc["guides/
(getting-started.md,
deployment.md)"] - ProjectSub -->|Architecture| ArchLoc["architecture/
(system-design.md)"] - ProjectSub -->|API Specs| APILoc["api/openapi/
(*.yaml)"] - ProjectSub -->|Runbooks| RunbookLoc["runbooks/
(incident-response.md)"] - - ServiceDocs --> ServiceLoc["services/[name]/README.md
packages/[name]/README.md"] - ServiceLoc --> ServiceFormat[Format: Side-by-side bilingual] - - DeployDocs --> DeployLoc["deployments/[env]/README.md"] - DeployLoc --> DeployFormat[Format: Technical, operations-focused] - - InfraDocs --> InfraLoc["infra/[component]/README.md"] - InfraLoc --> InfraFormat[Format: Side-by-side bilingual] - - style ProjectDocs fill:#e1f5ff - style ServiceDocs fill:#fff4e1 - style DeployDocs fill:#ffe1f5 - style InfraDocs fill:#e1ffe1 -``` - -## Where to Put Documentation - -### Project-Level Documentation -- **Location**: `docs/en/` and `docs/vi/` -- **Examples**: Getting started, deployment guides, architecture -- **Format**: Markdown with bilingual support - -### Service/Package Documentation -- **Location**: `services/[service-name]/README.md` or `packages/[package-name]/README.md` -- **Content**: Service-specific setup, API endpoints, configuration -- **Format**: Single README with bilingual sections - -### Deployment Documentation -- **Location**: `deployments/[environment]/README.md` -- **Content**: Environment-specific deployment instructions -- **Format**: Technical, operations-focused - -### Infrastructure Documentation -- **Location**: `infra/[component]/README.md` -- **Content**: Infrastructure component configuration and usage -- **Examples**: `infra/traefik/README.md`, `infra/observability/README.md` - -## Bilingual Documentation Rules - -### Format Options - -**Option 1: Side-by-side (Recommended for short content)** -```markdown -# Service Name / Tên Dịch Vụ - -This is a description. -Đây là mô tả. -``` - -**Option 2: Separate files (Recommended for long content)** -``` -docs/ -├── en/ -│ └── guides/ -│ └── deployment.md -└── vi/ - └── guides/ - └── deployment.md -``` - -**Option 3: Sections (For mixed content)** -```markdown -# English Section - -Content in English... - ---- - -# Phần Tiếng Việt - -Nội dung bằng tiếng Việt... -``` - -### When to Use Each Format - -- **Side-by-side**: README files, short guides, configuration docs -- **Separate files**: Long guides (>200 lines), architecture docs, runbooks -- **Sections**: API documentation, technical specifications - -### Bilingual Format Decision Flow - -Use the following decision tree to choose the appropriate bilingual format: - -```mermaid -flowchart TD - Start[Creating Documentation] --> CheckLength{Content Length?} - - CheckLength -->|Short
< 200 lines| CheckLocation{Document Location?} - CheckLength -->|Long
> 200 lines| SeparateFiles[Use Separate Files Format] - - CheckLocation -->|README files
Service/Package docs
Infrastructure docs| SideBySide[Use Side-by-side Format] - CheckLocation -->|docs/guides/
Short configuration docs| SideBySide - - CheckLength -->|Medium| CheckType{Content Type?} - CheckType -->|API Documentation
Technical Specifications| Sections[Use Sections Format] - CheckType -->|Mixed Content| Sections - - SeparateFiles --> SeparateAction["Create docs/en/[path]/file.md
Create docs/vi/[path]/file.md
(Mirror structure)"] - SideBySide --> SideBySideAction["Single file with
EN / VI inline
Example: 'Title / Tiêu Đề'"] - Sections --> SectionsAction["Single file with
--- separator
EN section then VI section"] - - SeparateAction --> Done[Documentation Complete] - SideBySideAction --> Done - SectionsAction --> Done - - style SideBySide fill:#e1f5ff - style SeparateFiles fill:#fff4e1 - style Sections fill:#ffe1f5 - style Done fill:#e1ffe1 -``` - -## Documentation Templates - -### Service README Template - -```markdown -# Service Name / Tên Dịch Vụ - -> **EN**: Brief description in English -> **VI**: Mô tả ngắn gọn bằng tiếng Việt - -## Features / Tính Năng - -- Feature 1 / Tính năng 1 -- Feature 2 / Tính năng 2 - -## Prerequisites / Yêu Cầu - -- Node.js 20+ -- PostgreSQL (Neon) -- Redis - -## Quick Start / Bắt Đầu Nhanh - -```bash -# Install dependencies / Cài đặt dependencies -pnpm install - -# Setup environment / Thiết lập môi trường -cp .env.example .env - -# Start service / Khởi động service -pnpm dev -``` - -## Configuration / Cấu Hình - -| Variable | Description / Mô Tả | Default | Required | -|----------|---------------------|---------|----------| -| PORT | Server port / Cổng server | 5000 | No | - -## API Endpoints - -See [API Documentation](../../docs/api/openapi/service-name.yaml) - -## Development / Phát Triển - -[Development instructions...] - -## Testing / Kiểm Thử - -```bash -pnpm test -``` - -## Deployment / Triển Khai - -See [Deployment Guide](../../docs/en/guides/deployment.md) -``` - -### Guide Template (docs/en/guides/) - -```markdown -# Guide Title - -**Last Updated**: 2024-01-01 -**Difficulty**: Beginner/Intermediate/Advanced - -## Overview - -Brief overview of what this guide covers. - -## Prerequisites - -- Requirement 1 -- Requirement 2 - -## Step-by-Step Instructions - -### Step 1: Title - -Description and commands... - -```bash -command here -``` - -### Step 2: Title - -Description and commands... - -## Troubleshooting - -### Issue 1 - -**Problem**: Description -**Solution**: Steps to fix - -## Next Steps - -- Link to related guide -- Link to another resource - -## Resources - -- [Related Doc](../path/to/doc.md) -- [External Link](https://example.com) -``` - -### Architecture Document Template - -```markdown -# Component Architecture - -## Overview - -High-level description of the component. - -## Architecture Diagram - -```mermaid -graph TD - A[Component A] --> B[Component B] - B --> C[Component C] -``` - -## Components - -### Component Name - -**Purpose**: What it does -**Technology**: Tech stack -**Dependencies**: What it depends on - -## Data Flow - -1. Step 1 -2. Step 2 -3. Step 3 - -## Design Decisions - -### Decision 1 - -**Context**: Why this decision was needed -**Decision**: What was decided -**Consequences**: Impact of the decision - -## Deployment - -How this component is deployed. - -## Monitoring - -How to monitor this component. -``` - -## Writing Style Guidelines - -### Technical Writing Principles - -1. **Clear and Concise**: Use simple language, avoid jargon -2. **Action-Oriented**: Start with verbs (Install, Configure, Deploy) -3. **Structured**: Use headings, lists, and tables -4. **Examples**: Provide code examples and commands -5. **Visual**: Use diagrams where helpful - -### Code Examples - -```markdown -# Good: With context and explanation -Install dependencies using pnpm: - -```bash -pnpm install -``` - -# Bad: No context -```bash -pnpm install -``` -``` - -### Commands - -- Always show the full command -- Include comments for clarity -- Show expected output when helpful - -```bash -# Good -docker-compose up -d -# Expected output: Creating network, Starting containers... - -# Bad -docker-compose up -``` - -### Links - -- Use relative links for internal docs -- Use descriptive link text (not "click here") - -```markdown -# Good -See the [Deployment Guide](../guides/deployment.md) for details. - -# Bad -Click [here](../guides/deployment.md) for more info. -``` - -## Documentation Checklist - -### Before Writing - -- [ ] Determine correct location (docs/ vs service README) -- [ ] Choose bilingual format (side-by-side vs separate) -- [ ] Review existing docs for consistency - -### While Writing - -- [ ] Use clear, concise language -- [ ] Include code examples -- [ ] Add diagrams where helpful -- [ ] Provide troubleshooting section -- [ ] Link to related documentation - -### After Writing - -- [ ] Test all commands and code examples -- [ ] Check all links work -- [ ] Ensure bilingual consistency -- [ ] Update documentation index (docs/README.md) -- [ ] Request review from team - -## Common Mistakes to Avoid - -### ❌ Don't - -- Write documentation in only one language -- Put detailed guides in service README (use docs/) -- Use absolute paths in links -- Assume prior knowledge -- Skip code examples -- Forget to update when code changes - -### ✅ Do - -- Maintain bilingual documentation -- Use appropriate location (docs/ vs README) -- Use relative links -- Explain prerequisites -- Provide working examples -- Keep docs up-to-date with code - -## Documentation Maintenance - -### When to Update Documentation - -- New feature added -- API changes -- Configuration changes -- Deployment process changes -- Bug fixes affecting usage -- Architecture changes - -### Version Documentation - -For major changes, consider: -- Adding "Last Updated" date -- Creating versioned docs (v1/, v2/) -- Maintaining changelog - -## Tools and Resources - -### Markdown Tools - -- **Mermaid**: For diagrams -- **Tables Generator**: For complex tables -- **Markdown Linter**: For consistency - -### Documentation Testing - -```bash -# Check for broken links -find docs -name "*.md" -exec markdown-link-check {} \; - -# Lint markdown files -markdownlint docs/**/*.md -``` - -## Examples from Project - -### Good Documentation Examples - -- `docs/en/guides/getting-started.md` - Clear step-by-step guide -- `services/_template/README.md` - Comprehensive service README -- `deployments/local/README.md` - Operations-focused deployment guide - -### Documentation Locations Reference - -| Content Type | Location | Format | -|--------------|----------|--------| -| Getting Started | `docs/en/guides/getting-started.md` | Separate files | -| Service Setup | `services/[name]/README.md` | Side-by-side | -| Deployment | `docs/en/guides/deployment.md` | Separate files | -| Architecture | `docs/en/architecture/` | Separate files | -| API Specs | `docs/en/api/openapi/` | OpenAPI YAML | -| Runbooks | `docs/en/runbooks/` | Separate files | -| Infrastructure | `infra/[component]/README.md` | Side-by-side | -| Environment Config | `deployments/[env]/README.md` | Technical only | - -## Quick Reference - -### File Naming - -- Use kebab-case: `getting-started.md` -- Be descriptive: `local-development.md` not `dev.md` -- Match EN and VI filenames - -### Heading Levels - -```markdown -# H1: Document Title (only one per file) -## H2: Major Sections -### H3: Subsections -#### H4: Details (use sparingly) -``` - -### Bilingual Patterns - -```markdown -# Pattern 1: Inline -Description / Mô tả - -# Pattern 2: After slash -PORT=5000 # Server port / Cổng server - -# Pattern 3: Table -| Variable | Description / Mô Tả | - -# Pattern 4: Code comments -# EN: Install dependencies -# VI: Cài đặt dependencies -pnpm install -``` diff --git a/apps/web-docs/content/docs/en/skills/error-handling-patterns.md b/apps/web-docs/content/docs/en/skills/error-handling-patterns.md deleted file mode 100644 index fc243ee9..00000000 --- a/apps/web-docs/content/docs/en/skills/error-handling-patterns.md +++ /dev/null @@ -1,460 +0,0 @@ ---- -name: error-handling-patterns -description: Error handling patterns and conventions for GoodGo microservices. Use when implementing error handling, creating custom error classes, handling exceptions, standardizing error responses, or debugging error scenarios. ---- - -# Error Handling Patterns - -## When to Use This Skill - -Use this skill when: -- Implementing error handling in services, controllers, or repositories -- Creating custom error classes for specific error scenarios -- Standardizing error responses across APIs -- Handling exceptions from external services or database operations -- Implementing error middleware and global error handlers -- Debugging error scenarios and improving error messages -- Distinguishing between operational and programming errors - -## Core Concepts - -### Error Types - -1. **Operational Errors**: Expected errors that occur during normal operation - - Examples: Validation errors, authentication failures, resource not found - - Should be handled gracefully and return appropriate HTTP status codes - - Safe to expose error details to clients (with caution) - -2. **Programming Errors**: Unexpected errors due to bugs in code - - Examples: Null pointer exceptions, type errors, logic bugs - - Should be logged with full details for debugging - - Should return generic error messages to clients (hide implementation details) - -### Error Propagation Flow - -The following diagram illustrates how errors propagate through the application layers: - -```mermaid -flowchart TD - A[Request] --> B[Controller] - B --> C[Service Layer] - C --> D[Repository Layer] - D --> E{Error Occurs?} - E -->|Yes| F[Throw HttpError] - E -->|No| G[Return Data] - F --> H[Error Middleware] - H --> I{Error Type?} - I -->|HttpError| J[Extract Status/Code] - I -->|Prisma Error| K[Map to HttpError] - I -->|Zod Error| L[Map to ValidationError] - I -->|Unknown| M[Map to InternalServerError] - J --> N[Log Error] - K --> N - L --> N - M --> N - N --> O{Is Operational?} - O -->|Yes| P[Log as Warning] - O -->|No| Q[Log as Error] - P --> R[Format Response] - Q --> R - R --> S{Is Production?} - S -->|Yes & 5xx| T[Generic Message] - S -->|No or < 5xx| U[Detailed Message] - T --> V[Send Response] - U --> V - G --> V -``` - -### Error Hierarchy Structure - -The error class hierarchy shows the relationship between different error types: - -```mermaid -classDiagram - class Error { - <> - +message: string - +stack: string - } - class HttpError { - +statusCode: number - +errorCode: string - +isOperational: boolean - +details?: any - +toApiResponse() - } - class NotFoundError { - +statusCode: 404 - } - class BadRequestError { - +statusCode: 400 - } - class ValidationError { - +statusCode: 422 - } - class UnauthorizedError { - +statusCode: 401 - } - class ForbiddenError { - +statusCode: 403 - } - class ConflictError { - +statusCode: 409 - } - class RateLimitError { - +statusCode: 429 - } - class InternalServerError { - +statusCode: 500 - } - class ServiceUnavailableError { - +statusCode: 503 - } - class DatabaseError { - +statusCode: 500 - } - class ExternalServiceError { - +statusCode: 502 - } - - Error <|-- HttpError - HttpError <|-- NotFoundError - HttpError <|-- BadRequestError - HttpError <|-- ValidationError - HttpError <|-- UnauthorizedError - HttpError <|-- ForbiddenError - HttpError <|-- ConflictError - HttpError <|-- RateLimitError - HttpError <|-- InternalServerError - HttpError <|-- ServiceUnavailableError - HttpError <|-- DatabaseError - HttpError <|-- ExternalServiceError -``` - -### Error Handling Decision Tree - -Use this decision tree to determine which error class to use: - -```mermaid -flowchart TD - A[Error Occurs] --> B{Error Type?} - B -->|Resource Not Found| C[NotFoundError
404] - B -->|Invalid Input| D{Validation?} - B -->|Authentication| E{Type?} - B -->|Resource Conflict| F[ConflictError
409] - B -->|Rate Limit| G[RateLimitError
429] - B -->|Database| H[DatabaseError
500] - B -->|External Service| I[ExternalServiceError
502] - B -->|Service Unavailable| J[ServiceUnavailableError
503] - B -->|Unknown/Programming| K[InternalServerError
500] - D -->|Schema Validation| L[ValidationError
422] - D -->|Bad Request Format| M[BadRequestError
400] - E -->|No Token/Invalid| N[UnauthorizedError
401] - E -->|No Permission| O[ForbiddenError
403] - C --> P[Set isOperational: true] - L --> P - M --> P - N --> P - O --> P - F --> P - G --> P - H --> Q{Is Operational?} - I --> Q - J --> Q - K --> R[Set isOperational: false] - Q -->|Yes| P - Q -->|No| R - P --> S[Include Error Code] - R --> S - S --> T[Add Context Details] - T --> U[Throw Error] -``` - -### Error Code System - -The platform uses a centralized error code system (`ErrorCode` enum) that: -- Provides unique identifiers for each error type -- Maps to HTTP status codes consistently -- Enables error tracking and analytics -- Supports internationalization - -Error codes follow the pattern: `{CATEGORY}_{NUMBER}` -- `AUTH_001` - Authentication errors -- `VALIDATION_001` - Validation errors -- `RESOURCE_001` - Resource errors -- `DB_001` - Database errors - -## Patterns - -### Base Error Class: HttpError - -All custom errors extend the `HttpError` base class: - -```typescript -export class HttpError extends Error { - public readonly statusCode: number; - public readonly errorCode: string; - public readonly isOperational: boolean; - public readonly details?: any; - - constructor( - message: string, - statusCode: number = 500, - errorCode: string = 'INTERNAL_ERROR', - isOperational: boolean = true, - details?: any - ) { - super(message); - this.statusCode = statusCode; - this.errorCode = errorCode; - this.isOperational = isOperational; - this.details = details; - Error.captureStackTrace(this, this.constructor); - } - - toApiResponse() { - return { - success: false, - error: { - code: this.errorCode, - message: this.message, - ...(this.details && { details: this.details }), - }, - timestamp: new Date().toISOString(), - }; - } -} -``` - -### Standard Error Classes - -Use these predefined error classes for common scenarios: - -**Resource Errors:** -- `NotFoundError` - 404: Resource not found -- `ConflictError` - 409: Resource conflict (e.g., duplicate) - -**Validation Errors:** -- `ValidationError` - 422: Input validation failed -- `BadRequestError` - 400: Invalid request - -**Authentication/Authorization:** -- `UnauthorizedError` - 401: Authentication required -- `ForbiddenError` - 403: Access denied - -**System Errors:** -- `InternalServerError` - 500: Internal server error (programming error) -- `ServiceUnavailableError` - 503: Service temporarily unavailable -- `DatabaseError` - 500: Database operation failed -- `ExternalServiceError` - 502: External service error - -**Rate Limiting:** -- `RateLimitError` - 429: Too many requests - -### Error Code Enum - -Centralized error codes in `ErrorCode` enum: - -```typescript -export enum ErrorCode { - // Authentication & Authorization - UNAUTHORIZED = 'AUTH_001', - FORBIDDEN = 'AUTH_002', - INVALID_TOKEN = 'AUTH_003', - TOKEN_EXPIRED = 'AUTH_004', - - // Validation - VALIDATION_ERROR = 'VALIDATION_001', - INVALID_FORMAT = 'VALIDATION_002', - - // Resources - NOT_FOUND = 'RESOURCE_001', - ALREADY_EXISTS = 'RESOURCE_002', - CONFLICT = 'RESOURCE_003', - - // Database - DATABASE_ERROR = 'DB_001', - CONSTRAINT_VIOLATION = 'DB_004', - - // System - INTERNAL_ERROR = 'SYS_001', - RATE_LIMIT_EXCEEDED = 'SYS_003', -} -``` - -### Using Errors in Services - -```typescript -import { NotFoundError, ConflictError } from '../errors/http-error'; -import { ErrorCode } from '../errors/error-codes'; - -export class UserService { - async getUserById(id: string) { - const user = await this.repository.findById(id); - - if (!user) { - throw new NotFoundError('User', { id }); - } - - return user; - } - - async createUser(data: CreateUserInput) { - const existing = await this.repository.findByEmail(data.email); - - if (existing) { - throw new ConflictError('User with this email already exists'); - } - - return await this.repository.create(data); - } -} -``` - -### Error Middleware Pattern - -Global error handler middleware processes all errors: - -```typescript -export const errorHandler = ( - err: any, - req: express.Request, - res: express.Response, - _next: express.NextFunction -): void => { - let statusCode = 500; - let errorCode = ErrorCode.INTERNAL_ERROR; - let message = 'Internal server error'; - let isOperational = false; - - // Handle HttpError instances - if (err instanceof HttpError) { - statusCode = err.statusCode; - errorCode = err.errorCode as ErrorCode; - message = err.message; - isOperational = err.isOperational; - } - // Handle Prisma errors - else if (err.code === 'P2002') { - statusCode = 409; - errorCode = ErrorCode.CONSTRAINT_VIOLATION; - message = 'Resource already exists'; - isOperational = true; - } - // Handle Zod validation errors - else if (err.name === 'ZodError') { - statusCode = 422; - errorCode = ErrorCode.VALIDATION_ERROR; - message = 'Validation failed'; - // Extract validation details - } - - // Log error - if (!isOperational || statusCode >= 500) { - logger.error('Unhandled error', { error: err, statusCode, errorCode }); - } else { - logger.warn('Operational error', { error: err, statusCode, errorCode }); - } - - // Send response - const response = { - success: false, - error: { - code: errorCode, - message: isProduction && statusCode >= 500 - ? 'Internal server error' - : message, - }, - timestamp: new Date().toISOString(), - }; - - res.status(statusCode).json(response); -}; -``` - -### Async Error Wrapper - -Wrap async route handlers to catch promise rejections: - -```typescript -export const asyncHandler = (fn: Function) => { - return (req: express.Request, res: express.Response, next: express.NextFunction) => { - Promise.resolve(fn(req, res, next)).catch(next); - }; -}; - -// Usage -router.get('/users/:id', asyncHandler(async (req, res) => { - const user = await userService.getUserById(req.params.id); - res.json({ success: true, data: user }); -})); -``` - -### Error Response Format - -Standardized error response format: - -```typescript -{ - success: false, - error: { - code: "RESOURCE_001", - message: "User not found", - details?: { - // Optional additional details (not in production for 5xx errors) - } - }, - timestamp: "2024-01-01T00:00:00.000Z" -} -``` - -## Best Practices - -1. **Use Specific Error Classes**: Use the most specific error class available -2. **Include Context**: Provide helpful error messages with context -3. **Mark Operational Errors**: Set `isOperational: true` for expected errors -4. **Don't Expose Internal Details**: Hide implementation details in production -5. **Log Appropriately**: Use `logger.error()` for programming errors, `logger.warn()` for operational errors -6. **Handle Database Errors**: Map Prisma errors to appropriate HTTP errors -7. **Use Error Codes**: Always use `ErrorCode` enum for consistency -8. **Validate Early**: Validate input early to catch errors before processing - -## Common Mistakes - -1. **Not Using Error Classes**: Using generic `Error` instead of specific error classes -2. **Exposing Stack Traces**: Including stack traces in production responses -3. **Ignoring Errors**: Not handling errors in async operations -4. **Generic Error Messages**: Using vague error messages without context -5. **Not Logging**: Forgetting to log errors for debugging -6. **Wrong HTTP Status Codes**: Using incorrect status codes for error types -7. **Not Using Error Middleware**: Handling errors manually instead of using middleware - -## Troubleshooting - -### Error Not Caught by Middleware - -**Problem**: Error not being caught by error middleware -**Solution**: Ensure error middleware is added last, after all routes. Use `asyncHandler` for async route handlers. - -### Generic Error Messages in Production - -**Problem**: Generic "Internal server error" shown even for operational errors -**Solution**: Check `isOperational` flag is set correctly. Verify error middleware handles all error types. - -### Error Code Not Found - -**Problem**: Error code not in `ErrorCode` enum -**Solution**: Add error code to enum following naming convention. Update `ERROR_CODE_TO_STATUS` mapping. - -### Stack Traces Exposed - -**Problem**: Stack traces visible in API responses -**Solution**: Ensure production environment checks are in place. Use error middleware to filter stack traces. - -## Resources - -- [Error Classes](../../services/iam-service/src/errors/http-error.ts) - Base error classes -- [Error Codes](../../services/iam-service/src/errors/error-codes.ts) - Error code definitions -- [Error Middleware](../../services/iam-service/src/middlewares/error.middleware.ts) - Global error handler -- [API Design](../api-design/SKILL.md) - API response formats -- [Security](../security/SKILL.md) - Security error handling diff --git a/apps/web-docs/content/docs/en/skills/event-driven-architecture.md b/apps/web-docs/content/docs/en/skills/event-driven-architecture.md deleted file mode 100644 index 6dbb8c5f..00000000 --- a/apps/web-docs/content/docs/en/skills/event-driven-architecture.md +++ /dev/null @@ -1,452 +0,0 @@ ---- -name: event-driven-architecture -description: Event-driven architecture patterns with Apache Kafka for GoodGo microservices. Use when implementing async communication, event publishing/consuming, event sourcing, CQRS, or integrating event streams with HTTP endpoints. ---- - -# Event-Driven Architecture Patterns - -## When to Use This Skill - -Use this skill when: -- Implementing asynchronous communication between services -- Decoupling services for better scalability -- Publishing domain events for downstream consumers -- Consuming events from other services -- Implementing event sourcing patterns -- Implementing CQRS (Command Query Responsibility Segregation) -- Exposing event streams via HTTP (SSE/WebSocket) -- Handling eventual consistency across services -- Building reactive systems that respond to changes -- Integrating with Apache Kafka message broker - -## Core Concepts - -### Event-Driven vs Request-Response - -**Request-Response (Synchronous):** -- Client waits for response -- Tight coupling between services -- Blocking operations -- Immediate consistency -- Use Traefik API Gateway for HTTP/REST - -**Event-Driven (Asynchronous):** -- Fire-and-forget publishing -- Loose coupling between services -- Non-blocking operations -- Eventual consistency -- Use Kafka for message broker - -### Kafka Fundamentals - -- **Topics**: Named streams of events (e.g., `user.created`, `order.placed`) -- **Partitions**: Physical division of topics for parallelism and scaling -- **Consumer Groups**: Groups of consumers that work together to process events -- **Producers**: Services that publish events to topics -- **Consumers**: Services that subscribe to topics and process events - -#### Consumer Groups Architecture - -The following diagram illustrates how consumer groups distribute work across partitions: - -```mermaid -graph TB - subgraph Topic["Topic: user.created"] - P0["Partition 0"] - P1["Partition 1"] - P2["Partition 2"] - end - - subgraph ConsumerGroup["Consumer Group: notification-service"] - C1["Consumer 1"] - C2["Consumer 2"] - end - - subgraph ConsumerGroup2["Consumer Group: analytics-service"] - C3["Consumer 3"] - C4["Consumer 4"] - C5["Consumer 5"] - end - - P0 --> C1 - P1 --> C2 - P2 --> C1 - - P0 --> C3 - P1 --> C4 - P2 --> C5 - - style Topic fill:#e1f5ff - style ConsumerGroup fill:#fff4e1 - style ConsumerGroup2 fill:#e8f5e9 -``` - -**Key Points:** -- Each partition is consumed by only one consumer per consumer group -- Multiple consumer groups can independently consume from the same topic -- Consumers in a group automatically rebalance when members join or leave -- More partitions enable better parallelism within a consumer group - -### Traefik Integration - -Traefik serves dual purpose: -- **API Gateway**: Routes synchronous HTTP/REST requests -- **Event Streaming Gateway**: Routes SSE/WebSocket connections to event streaming endpoints - -Services publish events to Kafka, then expose SSE/WebSocket endpoints that consume from Kafka for HTTP clients. - -## Key Patterns - -### Event Publishing - -```typescript -// src/core/events/event-publisher.ts -import { producer } from '../config/kafka.config'; -import { logger } from '@goodgo/logger'; -import { v4 as uuidv4 } from 'uuid'; - -export class EventPublisher { - async publish( - topic: string, - event: Omit, - options?: { partitionKey?: string } - ): Promise { - const fullEvent: T = { - ...event, - eventId: uuidv4(), - timestamp: new Date().toISOString(), - source: this.serviceName, - } as T; - - await producer.send({ - topic, - messages: [{ - key: options?.partitionKey || fullEvent.eventId, - value: JSON.stringify(fullEvent), - headers: { - 'event-type': event.eventType, - 'event-version': event.eventVersion, - }, - }], - }); - } -} -``` - -#### Event Publishing Flow - -The following sequence diagram shows how events are published from a service to Kafka: - -```mermaid -sequenceDiagram - participant Service as Service Layer - participant Publisher as EventPublisher - participant Kafka as Kafka Broker - participant Topic as Topic Partition - - Service->>Publisher: publish(topic, event, options) - activate Publisher - Publisher->>Publisher: Generate eventId - Publisher->>Publisher: Add timestamp & source - Publisher->>Publisher: Determine partition key - Publisher->>Kafka: send({ topic, messages }) - activate Kafka - Kafka->>Topic: Route to partition - activate Topic - Topic-->>Kafka: Acknowledge - deactivate Topic - Kafka-->>Publisher: Success - deactivate Kafka - Publisher-->>Service: Complete (fire-and-forget) - deactivate Publisher - Note over Service,Publisher: Non-blocking operation -``` - -**Key Points:** -- Publishing is asynchronous and non-blocking -- Partition key determines which partition receives the event -- Events are acknowledged by Kafka before completion -- Fire-and-forget pattern prevents blocking request handlers - -### Event Consuming - -```typescript -// src/core/events/event-consumer.ts -import { kafka } from '../config/kafka.config'; - -export class EventConsumer { - private handlers: Map = new Map(); - - on(eventType: string, handler: EventHandler): void { - if (!this.handlers.has(eventType)) { - this.handlers.set(eventType, []); - } - this.handlers.get(eventType)!.push(handler); - } - - async start(topics: string[]): Promise { - await this.consumer.connect(); - await this.consumer.subscribe({ topics, fromBeginning: false }); - - await this.consumer.run({ - eachMessage: async ({ topic, partition, message }) => { - const event: BaseEvent = JSON.parse(message.value?.toString() || '{}'); - const handlers = this.handlers.get(event.eventType) || []; - await Promise.all(handlers.map(h => h.handle(event))); - }, - }); - } -} -``` - -### Outbox Pattern for Transactional Publishing - -The Outbox pattern ensures transactional consistency by storing events in the database within the same transaction as business data, then publishing them asynchronously. - -#### Outbox Pattern Flow - -The following sequence diagram illustrates the outbox pattern workflow: - -```mermaid -sequenceDiagram - participant Service as Service Layer - participant DB as Database - participant Outbox as Outbox Table - participant Processor as Outbox Processor - participant Publisher as EventPublisher - participant Kafka as Kafka Broker - - Service->>DB: Begin Transaction - activate DB - Service->>DB: Create business entity - Service->>Outbox: Insert event (status: PENDING) - Outbox-->>DB: Stored - Service->>DB: Commit Transaction - deactivate DB - Note over Service,DB: Event stored atomically with business data - - loop Polling Interval - Processor->>Outbox: Find PENDING events - Outbox-->>Processor: Return events - Processor->>Publisher: publish(event) - activate Publisher - Publisher->>Kafka: Send to topic - Kafka-->>Publisher: Acknowledge - Publisher-->>Processor: Success - deactivate Publisher - Processor->>Outbox: Update status to PUBLISHED - end -``` - -**Key Points:** -- Events are stored in the database within the same transaction as business data -- A separate background process (Outbox Processor) publishes events to Kafka -- Ensures at-least-once delivery guarantee -- Prevents lost events if Kafka is temporarily unavailable - -```typescript -// Store event in database within transaction -await prisma.outboxEvent.create({ - data: { - eventType: 'user.created', - eventData: userData, - topic: 'user.created', - status: 'PENDING', - }, -}); - -// Separate process publishes from outbox to Kafka -async function processOutbox() { - const events = await prisma.outboxEvent.findMany({ - where: { status: 'PENDING' }, - }); - - for (const event of events) { - await eventPublisher.publish(event.topic, event.eventData); - await prisma.outboxEvent.update({ - where: { id: event.id }, - data: { status: 'PUBLISHED' }, - }); - } -} -``` - -### SSE Endpoint for Event Streaming - -```typescript -// src/modules/events/events.controller.ts -async streamEvents(req: Request, res: Response): Promise { - res.setHeader('Content-Type', 'text/event-stream'); - res.setHeader('Cache-Control', 'no-cache'); - res.setHeader('Connection', 'keep-alive'); - - const topic = req.query.topic as string; - const consumer = kafka.consumer({ groupId: `sse-${Date.now()}` }); - - await consumer.connect(); - await consumer.subscribe({ topic, fromBeginning: false }); - - await consumer.run({ - eachMessage: async ({ message }) => { - const event = JSON.parse(message.value?.toString() || '{}'); - res.write(`data: ${JSON.stringify(event)}\n\n`); - }, - }); - - req.on('close', async () => { - await consumer.disconnect(); - }); -} -``` - -## Event Structure - -```typescript -interface BaseEvent { - eventId: string; - eventType: string; - eventVersion: string; - timestamp: string; - source: string; - correlationId?: string; - traceId?: string; - data: unknown; -} -``` - -## Best Practices - -### Event Naming Conventions - -- **Event Type**: `{domain}.{action}.v{version}` (e.g., `user.created.v1`) -- **Topic**: `{domain}.{entity}.{action}` (e.g., `user.created`) -- Use lowercase with dots as separators -- Keep names descriptive and consistent - -### Partition Key Selection - -- Use entity ID for ordering guarantees (same entity → same partition) -- Use correlation ID for request tracing -- Use user ID for user-scoped events -- Avoid high-cardinality keys (distributes evenly) - -### Event Ordering Guarantees - -- Kafka guarantees ordering **per partition** -- Use partition key to ensure related events go to same partition -- Events in different partitions have no ordering guarantee -- Don't rely on global ordering across all events - -### Error Handling - -- Implement Dead Letter Queue (DLQ) for failed events -- Use retry with exponential backoff -- Log all event processing failures -- Monitor consumer lag and DLQ size - -### Observability - -- Log all published and consumed events -- Track metrics: events published/consumed, processing duration, consumer lag -- Add distributed tracing to event flows -- Include correlation IDs for request tracking - -## Infrastructure Setup - -### Docker Compose (Local) - -```yaml -services: - kafka: - image: confluentinc/cp-kafka:7.4.0 - ports: - - "9092:9092" - environment: - KAFKA_BROKER_ID: 1 - KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 - KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092 - - schema-registry: - image: confluentinc/cp-schema-registry:7.4.0 - ports: - - "8081:8081" - environment: - SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: kafka:9092 -``` - -## Testing - -### Unit Testing - -```typescript -import { EventPublisher } from '../event-publisher'; -import { producer } from '../../config/kafka.config'; - -jest.mock('../../config/kafka.config'); - -describe('EventPublisher', () => { - it('should publish event successfully', async () => { - const publisher = new EventPublisher(); - const mockSend = jest.fn().mockResolvedValue({}); - (producer.send as jest.Mock) = mockSend; - - await publisher.publish('user.created', { - eventType: 'user.created', - eventVersion: '1.0.0', - data: { userId: '123' }, - }); - - expect(mockSend).toHaveBeenCalled(); - }); -}); -``` - -### Integration Testing with Test Containers - -```typescript -import { KafkaContainer } from '@testcontainers/kafka'; - -describe('Event Flow E2E', () => { - let kafkaContainer: StartedKafkaContainer; - - beforeAll(async () => { - kafkaContainer = await new KafkaContainer().start(); - process.env.KAFKA_BROKERS = kafkaContainer.getBootstrapServer(); - }); - - it('should publish and consume event', async () => { - // Test implementation - }); -}); -``` - -## Common Use Cases - -### User Created Event Flow - -1. Auth Service creates user in database -2. Publishes `user.created` event to Kafka -3. Notification Service consumes event and sends welcome email -4. Analytics Service consumes event and updates metrics - -### Order Processing with Multiple Consumers - -1. Order Service publishes `order.placed` event -2. Payment Service processes payment -3. Inventory Service reserves items -4. Notification Service sends confirmation - -## Related Skills - -- [Resilience Patterns](./resilience-patterns.md) - Circuit breaker, retry patterns -- [Error Handling Patterns](./error-handling-patterns.md) - Error handling best practices -- [Observability & Monitoring](./observability-monitoring.md) - Logging, metrics, tracing -- [Middleware Patterns](./middleware-patterns.md) - SSE endpoint middleware -- [Project Rules](./project-rules.md) - GoodGo coding standards - -## Resources - -- [KafkaJS Documentation](https://kafka.js.org/) - Node.js Kafka client -- [Confluent Schema Registry](https://docs.confluent.io/platform/current/schema-registry/index.html) - Schema versioning -- [Kafka Best Practices](https://kafka.apache.org/documentation/#best_practices) - Official Kafka documentation -- Skill Source: `.cursor/skills/event-driven-architecture/SKILL.md` diff --git a/apps/web-docs/content/docs/en/skills/infrastructure-as-code.md b/apps/web-docs/content/docs/en/skills/infrastructure-as-code.md deleted file mode 100644 index 280d06ca..00000000 --- a/apps/web-docs/content/docs/en/skills/infrastructure-as-code.md +++ /dev/null @@ -1,224 +0,0 @@ ---- -name: infrastructure-as-code -description: Infrastructure as Code patterns for GoodGo platform including Terraform modules, Kubernetes operators, infrastructure testing, GitOps workflows, and multi-environment management. ---- - -# Infrastructure as Code Patterns - -## When to Use This Skill - -Use this skill when: -- Managing infrastructure with code -- Implementing Terraform modules -- Setting up GitOps workflows -- Creating Kubernetes operators -- Testing infrastructure changes - -## Infrastructure as Code Workflow - -The following diagram illustrates the complete IaC workflow from code changes to infrastructure deployment: - -```mermaid -flowchart TD - A[Developer writes IaC code] --> B[Commit to Git repository] - B --> C{Code Review} - C -->|Rejected| D[Fix issues] - D --> B - C -->|Approved| E[Merge to branch] - E --> F[CI/CD Pipeline triggers] - F --> G{Terraform or
Kubernetes?} - G -->|Terraform| H[Terraform Workflow] - G -->|Kubernetes| I[GitOps Workflow] - H --> J[terraform init] - J --> K[terraform validate] - K --> L[terraform plan] - L --> M{Plan review} - M -->|Issues found| D - M -->|Approved| N[terraform apply] - N --> O[Infrastructure updated] - I --> P[GitOps tool detects changes] - P --> Q[Sync to Kubernetes cluster] - Q --> O - O --> R[Health checks] - R --> S{Deployment successful?} - S -->|No| T[Rollback] - S -->|Yes| U[Monitor infrastructure] -``` - -## GitOps Flow - -GitOps enables automated synchronization of Kubernetes manifests from Git to clusters: - -```mermaid -sequenceDiagram - participant Dev as Developer - participant Git as Git Repository - participant ArgoCD as ArgoCD/Flux - participant K8s as Kubernetes Cluster - - Dev->>Git: Push manifest changes - Git->>ArgoCD: Detect changes (poll/webhook) - ArgoCD->>Git: Fetch latest manifests - ArgoCD->>ArgoCD: Compare desired vs actual state - alt Drift detected - ArgoCD->>K8s: Apply changes (sync) - K8s->>K8s: Update resources - K8s->>ArgoCD: Status update - else Auto-heal enabled - ArgoCD->>K8s: Self-heal (correct drift) - end - ArgoCD->>Git: Update sync status -``` - -## Terraform Execution Flow - -The Terraform workflow ensures safe and predictable infrastructure changes: - -```mermaid -flowchart LR - A[terraform init] --> B[Load providers & modules] - B --> C[terraform validate] - C --> D{Syntax valid?} - D -->|No| E[Fix errors] - E --> C - D -->|Yes| F[terraform plan] - F --> G[Read state] - G --> H[Build dependency graph] - H --> I[Calculate changes] - I --> J[Generate plan] - J --> K{Review plan} - K -->|Issues| L[Adjust code] - L --> F - K -->|Approved| M[terraform apply] - M --> N[Lock state] - N --> O[Execute changes] - O --> P{Success?} - P -->|No| Q[Rollback] - P -->|Yes| R[Update state] - R --> S[Unlock state] - S --> T[Save state to backend] -``` - -## Key Patterns - -### Terraform Modules - -Terraform modules enable reusable infrastructure components across environments: - -```hcl -# Reusable module -module "postgresql" { - source = "../../modules/postgresql" - database_name = "goodgo" - environment = "staging" -} -``` - -### Module Structure - -The following diagram shows the typical Terraform module structure and how modules are composed: - -```mermaid -graph TD - A[Module: postgresql] --> B[variables.tf
Input parameters] - A --> C[main.tf
Resource definitions] - A --> D[outputs.tf
Exported values] - E[Environment: staging] --> F[main.tf] - F --> G[Module: postgresql] - F --> H[Module: redis] - F --> I[Module: kubernetes-cluster] - G --> J[Output: database_url] - H --> K[Output: redis_url] - I --> L[Output: cluster_endpoint] - F --> M[terraform.tfvars
Environment config] -``` - -### GitOps with ArgoCD - -GitOps tools like ArgoCD and Flux automatically sync Kubernetes manifests from Git repositories: - -```yaml -# Automated sync from Git -spec: - source: - repoURL: https://github.com/goodgo/platform - path: deployments/production/kubernetes - syncPolicy: - automated: - prune: true - selfHeal: true -``` - -### Multi-Environment Management - -Managing infrastructure across multiple environments requires clear separation and consistent patterns: - -```mermaid -graph TB - subgraph "Git Repository" - A[infra/terraform] - end - subgraph "Modules (Reusable)" - B[modules/postgresql] - C[modules/redis] - D[modules/kubernetes-cluster] - end - subgraph "Environments" - E[environments/staging] - F[environments/production] - end - A --> B - A --> C - A --> D - A --> E - A --> F - E --> B - E --> C - E --> D - F --> B - F --> C - F --> D - E --> G[terraform.tfvars
staging config] - F --> H[terraform.tfvars
production config] - G --> I[Remote State Backend
staging/terraform.tfstate] - H --> J[Remote State Backend
production/terraform.tfstate] -``` - -## Infrastructure Testing - -Always validate infrastructure changes before applying them: - -```bash -# Validate Terraform syntax -terraform init -terraform validate - -# Preview changes -terraform plan -out=tfplan - -# Review plan before applying -terraform show tfplan -``` - -## Best Practices - -1. **Version Control**: Keep all infrastructure in version control -2. **Modules**: Create reusable Terraform modules for common components -3. **Testing**: Test infrastructure changes before applying to production -4. **GitOps**: Use GitOps (ArgoCD/Flux) for Kubernetes deployments -5. **Environment Isolation**: Separate environments completely with different state backends -6. **State Management**: Use remote state backends (S3, GCS) with state locking -7. **Secrets**: Never commit secrets - use environment variables or secrets managers - -### Common Mistakes to Avoid - -1. **Committing Secrets**: Never hardcode passwords or API keys -2. **Local State Only**: Always use remote state backends for team collaboration -3. **No State Locking**: Enable state locking to prevent concurrent modifications -4. **Direct Apply**: Always review `terraform plan` output before applying - -## Resources - -- [Terraform Documentation](https://www.terraform.io/docs) -- [Deployment Kubernetes](./deployment-kubernetes.md) -- Skill Source: `.cursor/skills/infrastructure-as-code/SKILL.md` diff --git a/apps/web-docs/content/docs/en/skills/inter-service-communication.md b/apps/web-docs/content/docs/en/skills/inter-service-communication.md deleted file mode 100644 index 4649c318..00000000 --- a/apps/web-docs/content/docs/en/skills/inter-service-communication.md +++ /dev/null @@ -1,280 +0,0 @@ ---- -name: inter-service-communication -description: Inter-service communication patterns for GoodGo microservices including gRPC, GraphQL, service-to-service authentication, protocol selection, and client patterns. Use when implementing service-to-service calls, choosing communication protocols, or building service clients. ---- - -# Inter-Service Communication Patterns - -## When to Use This Skill - -Use this skill when: -- Implementing service-to-service communication -- Choosing between REST, gRPC, or GraphQL protocols -- Setting up gRPC services and clients -- Implementing GraphQL services and resolvers -- Implementing service-to-service authentication -- Building resilient service clients with circuit breakers -- Managing connection pooling for service clients -- Implementing request/response interceptors -- Handling service discovery for internal calls -- Optimizing inter-service communication performance - -## Core Concepts - -### Communication Protocol Options - -**HTTP/REST:** -- Human-readable, easy to debug -- Browser-compatible -- Standard HTTP semantics -- JSON payloads -- Good for external APIs - -**gRPC:** -- Binary protocol (Protocol Buffers) -- High performance, low latency -- Streaming support -- Strong typing with .proto files -- HTTP/2 based - -**GraphQL:** -- Flexible query language -- Single endpoint -- Client-controlled data fetching -- Strong typing with schema - -### Protocol Selection Guidelines - -Choose protocol based on use case, performance requirements, team expertise, and ecosystem needs. - -#### Protocol Selection Decision Tree - -```mermaid -flowchart TD - Start([Need Inter-Service Communication]) --> CheckExternal{External/Public API?} - - CheckExternal -->|Yes| UseREST[Use REST] - CheckExternal -->|No| CheckPerformance{High Performance
Required?} - - CheckPerformance -->|Yes| CheckStreaming{Need Streaming?} - CheckPerformance -->|No| CheckFlexible{Need Flexible
Queries?} - - CheckStreaming -->|Yes| UseGRPC[Use gRPC] - CheckStreaming -->|No| CheckLowLatency{Ultra Low
Latency?} - - CheckLowLatency -->|Yes| UseGRPC - CheckLowLatency -->|No| UseREST - - CheckFlexible -->|Yes| UseGraphQL[Use GraphQL] - CheckFlexible -->|No| UseREST - - UseREST --> RESTDesc["REST: External APIs
Browser clients
Simple CRUD"] - UseGRPC --> GRPCDesc["gRPC: Internal services
High performance
Streaming support"] - UseGraphQL --> GraphQLDesc["GraphQL: Complex queries
Mobile apps
Flexible data"] - - style UseREST fill:#e1f5ff - style UseGRPC fill:#fff4e1 - style UseGraphQL fill:#e8f5e9 -``` - -## Key Patterns - -### Service-to-Service Call Flow - -The following diagram illustrates the complete flow of a service-to-service call, including authentication, interceptors, and error handling: - -```mermaid -sequenceDiagram - participant ClientService as Client Service - participant ClientLib as Service Client
(HTTP/gRPC/GraphQL) - participant Interceptor as Request
Interceptor - participant Auth as Auth
Middleware - participant TargetService as Target Service - participant Logger as Logger - participant Metrics as Metrics - - ClientService->>ClientLib: Make request - ClientLib->>Interceptor: Add correlation ID
Add service auth header
Add request ID - Interceptor->>Logger: Log request start - Interceptor->>Metrics: Track request start - Interceptor->>TargetService: HTTP/gRPC/GraphQL Request
(with headers) - - TargetService->>Auth: Validate x-service-auth - alt Invalid Auth - Auth-->>TargetService: 403 Forbidden - TargetService-->>ClientLib: Error Response - ClientLib->>Logger: Log error - ClientLib->>Metrics: Track failure - ClientLib-->>ClientService: ServiceError - else Valid Auth - Auth->>TargetService: Authenticated - TargetService->>TargetService: Process request - TargetService-->>ClientLib: Success Response - ClientLib->>Logger: Log success
(with correlation ID) - ClientLib->>Metrics: Track success
(duration, status) - ClientLib-->>ClientService: Response Data - end -``` - -### HTTP/REST Service Client - -```typescript -// Base service client with circuit breaker and interceptors -import { ServiceClient } from '../../core/clients/service-client'; - -const notificationClient = new ServiceClient({ - baseURL: process.env.NOTIFICATION_SERVICE_URL || 'http://notification-service:5003', - serviceName: 'notification-service', - timeout: 5000, - enableCircuitBreaker: true, -}); - -// Usage -await notificationClient.post('/api/v1/notifications', { - userId, - message, -}); -``` - -### gRPC Service - -```typescript -// gRPC server implementation -import { UserGrpcServer } from './user.grpc.service'; - -const grpcServer = new UserGrpcServer(userService); -grpcServer.start(50051); - -// gRPC client -import { GrpcClient } from '../../core/clients/grpc-client'; - -const userGrpcClient = new GrpcClient({ - protoPath: './proto/user_service.proto', - packageName: 'goodgo.user.v1', - serviceName: 'UserService', - serverUrl: 'localhost:50051', -}); - -const user = await userGrpcClient.call('getUser', { user_id: '123' }); -``` - -### GraphQL Service - -```typescript -// GraphQL client -import { GraphQLServiceClient } from '../../core/clients/graphql-client'; - -const userGraphQLClient = new GraphQLServiceClient({ - endpoint: 'http://user-service:5002/graphql', -}); - -const GET_USER_QUERY = ` - query GetUser($id: ID!) { - user(id: $id) { - id - email - name - } - } -`; - -const user = await userGraphQLClient.query(GET_USER_QUERY, { id: '123' }); -``` - -### Service-to-Service Authentication - -The authentication flow ensures secure communication between services: - -```mermaid -sequenceDiagram - participant ClientService as Client Service - participant ServiceClient as Service Client - participant Env as Environment
Variables - participant TargetService as Target Service - participant AuthMiddleware as Auth
Middleware - - ClientService->>ServiceClient: Create client instance - ServiceClient->>Env: Read INTERNAL_API_KEY - Env-->>ServiceClient: API Key - - ClientService->>ServiceClient: Make request - ServiceClient->>ServiceClient: Auto-add x-service-auth
header with API key - - ServiceClient->>TargetService: HTTP Request
(x-service-auth: token) - - TargetService->>AuthMiddleware: Extract x-service-auth - AuthMiddleware->>AuthMiddleware: Compare with
INTERNAL_API_KEY - - alt Token Matches - AuthMiddleware->>TargetService: Auth Success - TargetService->>TargetService: Process request - TargetService-->>ServiceClient: 200 OK + Data - else Token Mismatch - AuthMiddleware->>TargetService: Auth Failed - TargetService-->>ServiceClient: 403 Forbidden
(INVALID_SERVICE_AUTH) - ServiceClient->>ServiceClient: Throw ServiceError - ServiceClient-->>ClientService: Error Response - end -``` - -#### Implementation - -```typescript -// Internal auth middleware -import { internalAuthMiddleware } from '../../middlewares/internal-auth.middleware'; - -router.use('/internal', internalAuthMiddleware); - -// Client automatically adds auth header -const client = new ServiceClient({ - baseURL: 'http://service:5000', - serviceName: 'service', -}); -// X-Service-Auth header is added automatically -``` - -## Best Practices - -### Protocol Selection - -- **REST**: External APIs, browser clients, simple CRUD -- **gRPC**: Internal services, high performance, streaming -- **GraphQL**: Complex queries, mobile apps, flexible data - -### Performance - -- Use connection pooling -- Enable HTTP keep-alive -- Set appropriate timeouts -- Implement circuit breakers - -### Security - -- Always authenticate internal calls -- Use TLS/mTLS -- Store secrets securely -- Implement rate limiting - -### Observability - -- Log with correlation IDs -- Track metrics (duration, success rate) -- Add distributed tracing -- Monitor service health - -## Testing - -```typescript -// Mock service client -const mockClient = createMockServiceClient(); -mockClient.get.mockResolvedValue({ id: '123' }); -``` - -## Resources - -- [gRPC Documentation](https://grpc.io/docs/) -- [GraphQL Documentation](https://graphql.org/learn/) -- [Protocol Buffers](https://developers.google.com/protocol-buffers) -- [Resilience Patterns](./resilience-patterns.md) -- [Security](./security.md) -- Skill Source: `.cursor/skills/inter-service-communication/SKILL.md` diff --git a/apps/web-docs/content/docs/en/skills/microservices-development-process.md b/apps/web-docs/content/docs/en/skills/microservices-development-process.md deleted file mode 100644 index c97c5674..00000000 --- a/apps/web-docs/content/docs/en/skills/microservices-development-process.md +++ /dev/null @@ -1,660 +0,0 @@ ---- -name: microservices-development-process -description: Standard development process for creating and maintaining microservices in GoodGo platform. Use when creating new services, migrating services, refactoring services, or planning service implementations. ---- - -# Microservices Development Process - -## When to Use This Skill - -Use this skill when: -- Creating a new microservice from scratch -- Migrating or refactoring an existing service -- Planning service implementation with multiple phases -- Ensuring comprehensive coverage of all development aspects -- Need structured approach to service development - -## Development Process Overview - -The microservices development process follows these phases: -1. **Planning & Impact Analysis** - Define scope, impact, dependencies -2. **Foundation Setup** - Service structure, configs, infrastructure -3. **Core Implementation** - Business logic, APIs, data layer -4. **Integration** - Routes, middleware, external services -5. **Testing** - Unit, integration, E2E tests -6. **Documentation** - API docs, README, guides -7. **Cleanup & Verification** - Remove temporary files, verify completeness -8. **Deployment** - Staging deployment, production deployment - -### Process Flow Diagram - -This diagram shows the complete 8-phase development process with decision points and feedback loops. - -```mermaid -graph TD - Start([Start: New Service Requirements]) --> Phase1[Phase 1: Planning & Impact Analysis] - Phase1 --> ImpactCheck{Impact Analysis
Complete?} - ImpactCheck -->|No| Phase1 - ImpactCheck -->|Yes| Phase2[Phase 2: Foundation Setup] - - Phase2 --> FoundationCheck{Service Starts
& Health Check Passes?} - FoundationCheck -->|No| Phase2 - FoundationCheck -->|Yes| Phase3[Phase 3: Core Implementation] - - Phase3 --> ImplementationCheck{Business Logic
Implemented?} - ImplementationCheck -->|No| Phase3 - ImplementationCheck -->|Yes| Phase4[Phase 4: Integration] - - Phase4 --> IntegrationCheck{Routes & Middleware
Working?} - IntegrationCheck -->|No| Phase4 - IntegrationCheck -->|Yes| Phase5[Phase 5: Testing] - - Phase5 --> TestCheck{Tests Pass
& Coverage Met?} - TestCheck -->|No| Phase5 - TestCheck -->|Yes| Phase6[Phase 6: Documentation] - - Phase6 --> DocCheck{Docs
Complete?} - DocCheck -->|No| Phase6 - DocCheck -->|Yes| Phase7[Phase 7: Cleanup & Verification] - - Phase7 --> VerificationCheck{All Checks
Pass?} - VerificationCheck -->|No| Phase7 - VerificationCheck -->|Yes| Phase8[Phase 8: Deployment] - - Phase8 --> DeployCheck{Staging
Deployed?} - DeployCheck -->|No| Phase8 - DeployCheck -->|Yes| Production{Deploy to
Production?} - Production -->|Yes| ProdDeploy[Production Deployment] - Production -->|No| Complete([Complete]) - ProdDeploy --> Complete - - style Phase1 fill:#e1f5ff - style Phase2 fill:#fff4e1 - style Phase3 fill:#f0e1ff - style Phase4 fill:#e1ffe1 - style Phase5 fill:#ffe1e1 - style Phase6 fill:#e1ffff - style Phase7 fill:#fff0e1 - style Phase8 fill:#ffe1f5 - style Complete fill:#d4edda -``` - -### Detailed Phase Flow - -This diagram breaks down the tasks within each phase and shows the sequential flow between phases. - -```mermaid -graph LR - subgraph Planning["Phase 1: Planning"] - P1A[Define Scope] --> P1B[Impact Analysis] - P1B --> P1C[Dependencies Map] - P1C --> P1D[Acceptance Criteria] - end - - subgraph Foundation["Phase 2: Foundation"] - F2A[Copy Template] --> F2B[Configure Package] - F2B --> F2C[Setup Database] - F2C --> F2D[Configure Docker] - F2D --> F2E[Setup Traefik] - end - - subgraph Implementation["Phase 3: Implementation"] - I3A[DTOs] --> I3B[Repository] - I3B --> I3C[Service] - I3C --> I3D[Controller] - I3D --> I3E[Module] - end - - subgraph Integration["Phase 4: Integration"] - IN4A[Register Routes] --> IN4B[Setup Middleware] - IN4B --> IN4C[External Services] - IN4C --> IN4D[Health Checks] - end - - subgraph Testing["Phase 5: Testing"] - T5A[Unit Tests] --> T5B[Integration Tests] - T5B --> T5C[E2E Tests] - T5C --> T5D[Coverage Check] - end - - subgraph Documentation["Phase 6: Documentation"] - D6A[README] --> D6B[API Docs] - D6B --> D6C[Architecture Docs] - end - - subgraph Cleanup["Phase 7: Cleanup"] - C7A[Remove Temp Files] --> C7B[Update References] - C7B --> C7C[Verify Everything] - end - - subgraph Deployment["Phase 8: Deployment"] - DEP8A[Staging] --> DEP8B[Verification] - DEP8B --> DEP8C[Production] - end - - Planning --> Foundation - Foundation --> Implementation - Implementation --> Integration - Integration --> Testing - Testing --> Documentation - Documentation --> Cleanup - Cleanup --> Deployment - - style Planning fill:#e1f5ff - style Foundation fill:#fff4e1 - style Implementation fill:#f0e1ff - style Integration fill:#e1ffe1 - style Testing fill:#ffe1e1 - style Documentation fill:#e1ffff - style Cleanup fill:#fff0e1 - style Deployment fill:#ffe1f5 -``` - -## Phase 1: Planning & Impact Analysis - -### Scope Definition - -Define clearly: -- **Service Purpose**: What business capability does it provide? -- **API Surface**: What endpoints are needed? -- **Data Models**: What data structures are required? -- **Dependencies**: What services/packages does it depend on? -- **Breaking Changes**: Any backward compatibility concerns? - -### Impact Analysis Checklist - -Before starting implementation, identify all affected areas: - -**Files to Create:** -- [ ] Service directory: `services/service-name/` -- [ ] Prisma schema: `services/service-name/prisma/schema.prisma` -- [ ] Dockerfile: `services/service-name/Dockerfile` -- [ ] Service README: `services/service-name/README.md` - -**Files to Update:** -- [ ] Root `package.json` workspace config -- [ ] `deployments/local/docker-compose.yml` - Add service -- [ ] `infra/traefik/dynamic/routes.yml` - Add routes -- [ ] `.github/workflows/ci-*.yml` - Add CI workflow (if needed) -- [ ] Documentation: `docs/en/guides/`, `docs/vi/guides/` -- [ ] Scripts: `scripts/db/*.sh`, `scripts/dev/*.sh` (if service-specific) - -**Infrastructure Changes:** -- [ ] Database: New schema/tables -- [ ] Redis: New cache keys/patterns (if needed) -- [ ] Traefik: New routes and services -- [ ] Observability: New service metrics/traces - -**Dependencies:** -- [ ] External: Database, Redis, third-party APIs -- [ ] Internal: Shared packages (@goodgo/logger, @goodgo/types, etc.) -- [ ] Other Services: List dependent services - -## Phase 2: Foundation Setup - -### Service Structure Creation - -**Template Usage:** -```bash -cp -r services/_template services/new-service-name -cd services/new-service-name -# Update package.json name to @goodgo/new-service-name -``` - -**Required Files:** -- Service structure from template -- `package.json` with correct name and dependencies -- `src/config/app.config.ts` - Configuration with Zod validation -- `.env.example` - Environment variables template -- `prisma/schema.prisma` - Database schema -- `Dockerfile` - Container configuration -- `jest.config.ts` - Test configuration - -### Database Setup - -```bash -# Create initial migration -cd services/service-name -pnpm prisma migrate dev --name init -pnpm prisma generate -``` - -### Docker & Infrastructure - -**Docker Compose Integration:** -Add service to `deployments/local/docker-compose.yml` with: -- Build context and dockerfile -- Environment variables -- Traefik labels for routing -- Health check configuration - -**Traefik Routes:** -Update `infra/traefik/dynamic/routes.yml` with: -- Router rules (PathPrefix) -- Service configuration -- Middleware chain (CORS, rate-limit, auth) - -### Acceptance Criteria for Phase 2 - -- [ ] Service directory created from template -- [ ] `package.json` configured correctly -- [ ] Environment variables defined -- [ ] Prisma schema created and migration run -- [ ] Service starts: `pnpm dev` (health check passes) -- [ ] Docker build succeeds -- [ ] Service accessible via Traefik -- [ ] No TypeScript errors: `pnpm typecheck` - -## Phase 3: Core Implementation - -### Module Structure - -Each feature module follows this pattern: -``` -modules/feature-name/ -├── feature.controller.ts # HTTP handlers -├── feature.service.ts # Business logic -├── feature.repository.ts # Data access -├── feature.dto.ts # Validation schemas (Zod) -├── feature.module.ts # Module registration -└── index.ts # Public exports -``` - -### Implementation Flow - -This diagram shows the step-by-step implementation order for each feature module within Phase 3. - -```mermaid -graph TD - Start[Start Implementation] --> DTOs[1. Create DTOs
Zod Validation Schemas] - DTOs --> Repo[2. Create Repository
Prisma Data Access] - Repo --> Service[3. Create Service
Business Logic] - Service --> Controller[4. Create Controller
HTTP Handlers] - Controller --> Module[5. Create Module
Wire Up Components] - Module --> Test[Manual Testing] - Test --> Pass{Tests Pass?} - Pass -->|No| Repo - Pass -->|Yes| Next[Next Feature Module] - - style DTOs fill:#e1f5ff - style Repo fill:#fff4e1 - style Service fill:#f0e1ff - style Controller fill:#e1ffe1 - style Module fill:#ffe1e1 -``` - -### Implementation Order - -1. **DTOs** - Zod schemas for request/response validation -2. **Repository** - Prisma-based data access, CRUD operations -3. **Service** - Business logic, error handling, validation -4. **Controller** - HTTP request handling, standardized responses -5. **Module** - Wire up components, export router - -### Code Patterns - -**Repository:** Extend base Repository, use Prisma client for data access -**Service:** Inject repository, implement business logic, use logger -**Controller:** Handle HTTP requests, validate with DTOs, call services -**Module:** Wire up dependencies, export router - -### Acceptance Criteria for Phase 3 - -- [ ] All DTOs defined with Zod validation -- [ ] Repository methods implemented -- [ ] Service business logic implemented -- [ ] Controllers handle requests correctly -- [ ] Modules configured properly -- [ ] No TypeScript errors -- [ ] Manual API testing successful - -## Phase 4: Integration - -### Route Registration - -Update `src/routes/index.ts`: -- Import feature modules -- Create router instances -- Register routes with path prefixes -- Mount to main app with `/api/v1/service-name` prefix - -### Middleware Setup - -**Required Middlewares (in order):** -1. Correlation middleware -2. Logging middleware -3. Metrics middleware -4. CORS middleware -5. Rate limiting middleware -6. Authentication middleware (if needed) -7. Error middleware (always last) - -### External Service Integration - -- HTTP clients: Use `@goodgo/http-client` for external APIs -- Redis caching: Implement cache patterns for frequently accessed data -- Error handling: Handle external service failures gracefully - -### Acceptance Criteria for Phase 4 - -- [ ] All routes registered and accessible -- [ ] Middlewares applied in correct order -- [ ] Error handling works for all scenarios -- [ ] External services integrated (if any) -- [ ] Caching implemented (if needed) -- [ ] Health check endpoint works: `/health` - -## Phase 5: Testing - -### Test Structure - -**Unit Tests:** Next to source files (`*.test.ts`), mock all dependencies -**Integration Tests:** `src/__tests__/`, test component interactions -**E2E Tests:** `src/__tests__/*.e2e.ts`, test full API workflows - -### Test Coverage Targets - -- Minimum: 70% coverage (branches, functions, lines, statements) -- Critical paths: 90%+ coverage -- Repositories: 80%+ coverage -- Services: 80%+ coverage -- Controllers: 70%+ coverage - -### Testing Checklist - -**Unit Tests:** -- [ ] Repository tests: All CRUD operations -- [ ] Service tests: Business logic, error handling -- [ ] Controller tests: Request/response handling -- [ ] DTO tests: Validation rules - -**Integration Tests:** -- [ ] Module integration: Controller → Service → Repository -- [ ] Database operations: Real Prisma client with test DB -- [ ] Middleware chain: Request flow through middlewares - -**E2E Tests:** -- [ ] API endpoints: Full request/response cycle -- [ ] Authentication: Protected routes -- [ ] Error scenarios: 400, 401, 403, 404, 500 -- [ ] Health checks: /health endpoint - -### Acceptance Criteria for Phase 5 - -- [ ] All unit tests pass: `pnpm test` -- [ ] Integration tests pass -- [ ] E2E tests pass -- [ ] Coverage meets thresholds: `pnpm test:coverage` -- [ ] No test warnings or errors -- [ ] Tests run in CI pipeline successfully - -## Phase 6: Documentation - -### Required Documentation - -**Service README:** -- Service overview (bilingual EN/VI) -- Features list -- Prerequisites -- Quick start guide -- Configuration reference (environment variables table) -- API endpoints overview -- Development guide -- Testing instructions - -**API Documentation:** -- Swagger/OpenAPI spec: `src/docs/swagger.ts` -- Document all endpoints -- Request/response schemas -- Examples - -**Architecture Documentation (if complex):** -- `ARCHITECTURE.en.md` / `ARCHITECTURE.vi.md` -- System design, data flow, component interactions - -### Documentation Checklist - -- [ ] README is comprehensive and bilingual -- [ ] Swagger docs accessible: `/api-docs` -- [ ] All endpoints appear in Swagger -- [ ] Examples are clear and accurate -- [ ] Environment variables documented -- [ ] Architecture docs created (if needed) - -### Acceptance Criteria for Phase 6 - -- [ ] README is comprehensive and bilingual -- [ ] Swagger docs accessible: `/api-docs` -- [ ] All endpoints documented with examples -- [ ] Documentation reviewed and accurate - -## Phase 7: Cleanup & Verification - -### Verification Process Flow - -This diagram illustrates the cleanup and verification workflow for Phase 7, including the decision point for migrations and the comprehensive verification steps. - -```mermaid -graph TD - Start[Start Cleanup] --> Remove[Remove Temporary Files] - Remove --> Update{Is Migration?} - Update -->|Yes| RefUpdate[Update References
grep & replace] - Update -->|No| Verify[Run Verification] - RefUpdate --> Verify - - Verify --> TypeCheck[TypeScript Check] - TypeCheck --> LintCheck[Lint Check] - LintCheck --> TestCheck[Test Check] - TestCheck --> BuildCheck[Build Check] - BuildCheck --> DockerCheck[Docker Build] - DockerCheck --> HealthCheck[Health Check] - HealthCheck --> TraefikCheck[Traefik Check] - TraefikCheck --> AllPass{All Pass?} - - AllPass -->|No| Fix[Fix Issues] - Fix --> Verify - AllPass -->|Yes| Complete[Phase Complete] - - style Remove fill:#ffe1e1 - style RefUpdate fill:#fff4e1 - style Verify fill:#e1ffe1 - style Complete fill:#d4edda -``` - -### Cleanup Checklist - -**Remove Temporary Files:** -- [ ] Remove backup directories (e.g., `service-name.backup/`) -- [ ] Remove temporary status files (e.g., `*_STATUS.md`, `*_CHECKLIST.md`) -- [ ] Remove debug/scratch files -- [ ] Clean up unused imports -- [ ] Remove commented-out code - -**Reference Updates (for migrations/renames):** -```bash -# Find all references -grep -r "old-service-name" . --exclude-dir=node_modules --exclude-dir=.git - -# Update checklist: -- [ ] Package names: `@goodgo/old-name` → `@goodgo/new-name` -- [ ] Service paths: `services/old-name` → `services/new-name` -- [ ] Docker images: `goodgo/old-name` → `goodgo/new-name` -- [ ] Deployment names: `old-name` → `new-name` -- [ ] Environment variables updated -- [ ] CI/CD workflows updated -- [ ] Scripts updated (if needed) -- [ ] Documentation updated (except historical context) -``` - -### Verification Steps - -**Comprehensive Verification:** -```bash -# 1. Service starts successfully -pnpm dev && curl http://localhost:5000/health - -# 2. Type checking passes -pnpm typecheck - -# 3. Linting passes -pnpm lint - -# 4. Tests pass with coverage -pnpm test && pnpm test:coverage - -# 5. Build succeeds -pnpm build - -# 6. Docker build succeeds -docker build -t service-name . - -# 7. Service accessible via Traefik -curl http://localhost/api/v1/service-name/health - -# 8. No broken references (if migration) -grep -r "old-reference" . --exclude-dir=node_modules -``` - -### Final Verification Checklist - -**Code Quality:** -- [ ] No TypeScript errors -- [ ] No linting errors -- [ ] No unused imports/variables -- [ ] Code follows project conventions -- [ ] Comments are clear (bilingual if needed) - -**Functionality:** -- [ ] Service starts without errors -- [ ] Health check works -- [ ] All API endpoints functional -- [ ] Database operations work -- [ ] External integrations work (if any) - -**Testing:** -- [ ] All tests pass -- [ ] Coverage meets requirements -- [ ] E2E tests verify full workflows - -**Documentation:** -- [ ] README is complete and accurate -- [ ] API documentation is up-to-date -- [ ] Code comments are helpful - -**Infrastructure:** -- [ ] Docker image builds -- [ ] Service works in Docker Compose -- [ ] Traefik routes configured correctly -- [ ] Environment variables documented - -**Cleanup:** -- [ ] Temporary files removed -- [ ] All references updated (if migration) -- [ ] No orphaned files - -### Acceptance Criteria for Phase 7 - -- [ ] All cleanup tasks completed -- [ ] All verification steps pass -- [ ] No broken references or links -- [ ] Code is production-ready -- [ ] Documentation is complete - -## Phase 8: Deployment - -### Staging Deployment - -**Pre-deployment Checklist:** -- [ ] Database migrations tested: `pnpm prisma migrate deploy` -- [ ] Environment variables configured in staging -- [ ] Kubernetes manifests reviewed -- [ ] Secrets configured in Kubernetes -- [ ] Health checks configured - -**Deployment Steps:** -```bash -# 1. Build and push Docker image -docker build -t goodgo/service-name:latest . -docker push goodgo/service-name:latest - -# 2. Apply Kubernetes configs -kubectl apply -f deployments/staging/kubernetes/service-name.yaml -kubectl apply -f deployments/staging/kubernetes/service-name-configmap.yaml - -# 3. Wait for rollout -kubectl rollout status deployment/service-name -n staging - -# 4. Verify deployment -kubectl get pods -n staging -l app=service-name -curl https://staging-api.example.com/api/v1/service-name/health -``` - -### Production Deployment - -**Pre-production Checklist:** -- [ ] Staging tests passed -- [ ] Database backup created -- [ ] Rollback plan documented -- [ ] Monitoring dashboards ready -- [ ] Alerting configured - -### Acceptance Criteria for Phase 8 - -- [ ] Service deployed to staging successfully -- [ ] All staging tests pass -- [ ] Monitoring shows healthy metrics -- [ ] Production deployment completed (if applicable) -- [ ] Post-deployment verification successful - -## Rollback Strategy - -### When to Rollback - -- Critical errors in staging/production -- Performance degradation -- Data integrity issues -- Security vulnerabilities discovered - -### Rollback Steps - -```bash -# 1. Identify previous working version -kubectl rollout history deployment/service-name -n staging - -# 2. Rollback to previous version -kubectl rollout undo deployment/service-name -n staging - -# 3. Verify rollback -kubectl rollout status deployment/service-name -n staging - -# 4. Database rollback (if needed) -# Revert migrations if schema changes were made -``` - -## Best Practices Summary - -1. **Always Plan First**: Complete impact analysis before coding -2. **Follow Phases**: Don't skip verification steps -3. **Test Early**: Write tests alongside implementation -4. **Document as You Go**: Don't leave documentation for the end -5. **Clean Up Regularly**: Remove temporary files during development -6. **Verify Comprehensively**: Use checklists to ensure nothing is missed -7. **Plan for Rollback**: Always have a rollback strategy - -## Common Pitfalls to Avoid - -1. **Skipping Impact Analysis**: Leads to missing updates in scripts/configs -2. **No Verification Steps**: Misses broken references or incomplete implementation -3. **Deferring Cleanup**: Accumulates technical debt -4. **Incomplete Testing**: Missing edge cases and error scenarios -5. **Poor Documentation**: Makes maintenance difficult -6. **No Rollback Plan**: Difficult to recover from failures - -## Resources - -- [Project Rules](../project-rules/SKILL.md) - Architecture and conventions -- [API Design](../api-design/SKILL.md) - API design patterns -- [Testing Patterns](../testing-patterns/SKILL.md) - Testing best practices -- [Documentation](../documentation/SKILL.md) - Documentation guidelines -- [Database Prisma](../database-prisma/SKILL.md) - Prisma patterns -- Service Template: `services/_template/` diff --git a/apps/web-docs/content/docs/en/skills/middleware-patterns.md b/apps/web-docs/content/docs/en/skills/middleware-patterns.md deleted file mode 100644 index b5af1845..00000000 --- a/apps/web-docs/content/docs/en/skills/middleware-patterns.md +++ /dev/null @@ -1,413 +0,0 @@ ---- -name: middleware-patterns -description: Express middleware patterns and best practices for GoodGo microservices. Use when creating custom middleware, organizing middleware chains, handling request/response transformation, or implementing cross-cutting concerns. ---- - -# Middleware Patterns - -## When to Use This Skill - -Use this skill when: -- Creating custom Express middleware -- Organizing middleware chains and ordering -- Implementing authentication/authorization middleware -- Creating request/response transformation middleware -- Handling cross-cutting concerns (logging, metrics, validation) -- Implementing async middleware patterns -- Testing middleware implementations - -## Core Concepts - -### Middleware Function Signature - -Express middleware functions have this signature: - -```typescript -(req: Request, res: Response, next: NextFunction) => void | Promise -``` - -### Middleware Types - -1. **Application-level**: Applied to all routes (`app.use()`) -2. **Router-level**: Applied to specific routes (`router.use()`) -3. **Route-level**: Applied to specific route handlers - -### Middleware Execution Order - -**Critical**: Middleware order matters! Execution flows top-to-bottom: - -``` -Request → Middleware 1 → Middleware 2 → ... → Route Handler → Response -``` - -The following diagram illustrates the complete middleware chain flow in GoodGo services: - -```mermaid -flowchart TD - Start([HTTP Request]) --> Security["Security Middleware
Helmet, CORS"] - Security --> RateLimit["Rate Limiting
Middleware"] - RateLimit --> Correlation["Correlation ID
Middleware"] - Correlation --> BodyParsing["Body Parsing
JSON, URLEncoded, Cookies"] - BodyParsing --> Logging["Request Logging
Middleware"] - Logging --> Metrics["Metrics Collection
Middleware"] - Metrics --> Routes["Route Handlers"] - Routes -->|Success| Response([HTTP Response]) - Routes -->|Error| ErrorHandler["Error Handler
Middleware"] - Routes -->|Not Found| NotFound["Not Found
Handler"] - ErrorHandler --> Response - NotFound --> Response -``` - -## Patterns - -### Middleware Chain Order - -Standard middleware order in GoodGo services: - -```typescript -// 1. Security (Helmet, CORS) -app.use(helmet()); -app.use(cors({ ... })); - -// 2. Rate Limiting -app.use('/api', rateLimitMiddleware); - -// 3. Correlation ID (early for tracing) -app.use(correlationMiddleware()); - -// 4. Body Parsing -app.use(express.json()); -app.use(express.urlencoded({ extended: true })); -app.use(cookieParser()); - -// 5. Request Logging -app.use(loggerMiddleware); - -// 6. Metrics -app.use(metricsMiddleware); - -// 7. Routes -app.use(createRouter()); - -// 8. Error Handling (ALWAYS LAST) -app.use(notFoundHandler); -app.use(errorHandler); -``` - -### Correlation Middleware Pattern - -Adds correlation ID for request tracing: - -```typescript -export const correlationMiddleware = () => { - return (req: Request, res: Response, next: NextFunction) => { - const correlationId = req.headers['x-correlation-id'] || generateId(); - req.correlationId = correlationId; - res.setHeader('x-correlation-id', correlationId); - next(); - }; -}; -``` - -### Authentication Middleware Pattern - -Verifies JWT tokens and attaches user to request: - -```typescript -export const authenticate = () => { - return async (req: Request, res: Response, next: NextFunction) => { - try { - const token = extractToken(req); - if (!token) { - return res.status(401).json({ error: 'Unauthorized' }); - } - - const payload = await jwtService.verify(token); - req.user = payload; - next(); - } catch (error) { - return res.status(401).json({ error: 'Invalid token' }); - } - }; -}; -``` - -The following sequence diagram illustrates the authentication middleware flow: - -```mermaid -sequenceDiagram - participant Client - participant AuthMW as Authentication Middleware - participant JWTService as JWT Service - participant RouteHandler as Route Handler - - Client->>AuthMW: HTTP Request with Token - AuthMW->>AuthMW: Extract token from headers - alt Token exists - AuthMW->>JWTService: Verify token - alt Token valid - JWTService-->>AuthMW: Payload (user data) - AuthMW->>AuthMW: Attach user to req.user - AuthMW->>RouteHandler: next() - Continue - RouteHandler->>Client: HTTP Response (200) - else Token invalid - JWTService-->>AuthMW: Verification error - AuthMW->>Client: HTTP Response (401 Unauthorized) - end - else No token - AuthMW->>Client: HTTP Response (401 Unauthorized) - end -``` - -### Validation Middleware Pattern - -Validates request data using Zod: - -```typescript -export const validateDto = (schema: AnyZodObject, property: 'body' | 'query' | 'params' = 'body') => { - return (req: Request, res: Response, next: NextFunction) => { - try { - const validatedData = schema.parse(req[property]); - (req as any)[property] = validatedData; - next(); - } catch (error) { - if (error instanceof ZodError) { - return res.status(400).json({ - success: false, - error: { - code: 'VALIDATION_ERROR', - details: error.errors, - }, - }); - } - next(error); - } - }; -}; -``` - -The following sequence diagram shows the validation middleware flow: - -```mermaid -sequenceDiagram - participant Client - participant ValidateMW as Validation Middleware - participant ZodSchema as Zod Schema - participant RouteHandler as Route Handler - - Client->>ValidateMW: HTTP Request with Data - ValidateMW->>ValidateMW: Extract data from req[property] - ValidateMW->>ZodSchema: schema.parse(data) - alt Validation successful - ZodSchema-->>ValidateMW: Validated data - ValidateMW->>ValidateMW: Replace req[property] with validated data - ValidateMW->>RouteHandler: next() - Continue - RouteHandler->>Client: HTTP Response (200) - else Validation failed - ZodSchema-->>ValidateMW: ZodError with details - ValidateMW->>ValidateMW: Format error response - ValidateMW->>Client: HTTP Response (400 Validation Error) - end -``` - -### Conditional Middleware - -Apply middleware conditionally: - -```typescript -const conditionalAuth = (options: { optional?: boolean } = {}) => { - return async (req: Request, res: Response, next: NextFunction) => { - try { - const token = extractToken(req); - if (token) { - const payload = await jwtService.verify(token); - req.user = payload; - } else if (!options.optional) { - return res.status(401).json({ error: 'Unauthorized' }); - } - next(); - } catch (error) { - if (!options.optional) { - return res.status(401).json({ error: 'Invalid token' }); - } - next(); - } - }; -}; -``` - -### Async Middleware Pattern - -Handle async operations properly: - -```typescript -export const asyncMiddleware = (fn: Function) => { - return (req: Request, res: Response, next: NextFunction) => { - Promise.resolve(fn(req, res, next)).catch(next); - }; -}; - -// Usage -app.get('/users', asyncMiddleware(async (req, res) => { - const users = await userService.findAll(); - res.json({ success: true, data: users }); -})); -``` - -The following sequence diagram illustrates async middleware error handling: - -```mermaid -sequenceDiagram - participant Client - participant AsyncMW as Async Middleware Wrapper - participant AsyncHandler as Async Route Handler - participant ErrorHandler as Error Handler - - Client->>AsyncMW: HTTP Request - AsyncMW->>AsyncHandler: Execute async function - alt Async operation succeeds - AsyncHandler->>AsyncHandler: Process request - AsyncHandler->>Client: HTTP Response (200) - else Async operation fails - AsyncHandler-->>AsyncMW: Promise rejection (Error) - AsyncMW->>ErrorHandler: next(error) - ErrorHandler->>ErrorHandler: Format error response - ErrorHandler->>Client: HTTP Response (500 Error) - end -``` - -### Request/Response Transformation - -Transform request or response data: - -```typescript -export const transformResponse = () => { - return (req: Request, res: Response, next: NextFunction) => { - const originalJson = res.json.bind(res); - - res.json = function(data: any) { - const transformed = { - success: true, - data, - timestamp: new Date().toISOString(), - }; - return originalJson(transformed); - }; - - next(); - }; -}; -``` - -The following sequence diagram shows how request and response transformation middleware works: - -```mermaid -sequenceDiagram - participant Client - participant TransformMW as Transform Middleware - participant RouteHandler as Route Handler - participant Response as Response Object - - Client->>TransformMW: HTTP Request - Note over TransformMW: Intercept res.json() - TransformMW->>TransformMW: Store original res.json - TransformMW->>TransformMW: Override res.json with transformation logic - TransformMW->>RouteHandler: next() - Continue chain - - RouteHandler->>RouteHandler: Process request, Generate data - RouteHandler->>Response: res.json(rawData) - - Note over Response: Transformed res.json executes - Response->>Response: Wrap data: success, data, timestamp - Response->>Client: HTTP Response (transformed) -``` - -### Logging Middleware Pattern - -Log request details: - -```typescript -export const requestLogger = (req: Request, res: Response, next: NextFunction) => { - const startTime = Date.now(); - - res.on('finish', () => { - const duration = Date.now() - startTime; - logger.info('Request completed', { - method: req.method, - url: req.url, - statusCode: res.statusCode, - duration, - correlationId: req.correlationId, - }); - }); - - next(); -}; -``` - -The following sequence diagram illustrates how logging middleware tracks request lifecycle: - -```mermaid -sequenceDiagram - participant Client - participant LogMW as Logging Middleware - participant RouteHandler as Route Handler - participant Logger as Logger Service - participant Response as Response Object - - Client->>LogMW: HTTP Request - LogMW->>LogMW: Record startTime = Date.now() - LogMW->>RouteHandler: next() - Continue chain - - RouteHandler->>RouteHandler: Process request - RouteHandler->>Response: res.json(data) or res.send() - - Response->>Response: Set statusCode, send response - Response->>Response: Emit 'finish' event - - Response->>LogMW: 'finish' event triggered - LogMW->>LogMW: Calculate duration = Date.now() - startTime - LogMW->>Logger: logger.info('Request completed', {
method, url, statusCode,
duration, correlationId}) - Logger->>Logger: Write structured log entry - - Response->>Client: HTTP Response -``` - -## Best Practices - -1. **Order Matters**: Place middleware in correct order (security → correlation → parsing → logging → routes → errors) -2. **Error Handling**: Always handle errors and call `next(error)` for error middleware -3. **Async Support**: Wrap async middleware properly to catch promise rejections -4. **Early Returns**: Use early returns for validation failures (don't call `next()`) -5. **Request Extension**: Use TypeScript declaration merging to extend Request type -6. **Conditional Logic**: Use middleware factories for conditional middleware -7. **Reusability**: Create reusable middleware functions -8. **Performance**: Keep middleware lightweight, avoid heavy operations - -## Common Mistakes - -1. **Wrong Order**: Placing middleware in incorrect order (e.g., error handler before routes) -2. **Not Calling Next**: Forgetting to call `next()` or `next(error)` -3. **Async Errors**: Not handling promise rejections in async middleware -4. **Early Return Issues**: Calling `next()` after sending response -5. **Type Safety**: Not extending Express Request type properly -6. **Performance**: Doing heavy operations in middleware - -## Troubleshooting - -### Middleware Not Executing - -**Problem**: Middleware not being called -**Solution**: Check middleware order, ensure it's added before routes. Verify `next()` is called. - -### Async Errors Not Caught - -**Problem**: Unhandled promise rejections in async middleware -**Solution**: Use `asyncHandler` wrapper or wrap async code in try-catch with `next(error)`. - -## Resources - -- [Correlation Middleware](../../services/iam-service/src/middlewares/correlation.middleware.ts) -- [Auth Middleware](../../services/iam-service/src/middlewares/auth.middleware.ts) -- [Validation Middleware](../../services/iam-service/src/middlewares/validation.middleware.ts) -- [Error Handling](../error-handling-patterns/SKILL.md) - Error middleware patterns diff --git a/apps/web-docs/content/docs/en/skills/observability-monitoring.md b/apps/web-docs/content/docs/en/skills/observability-monitoring.md deleted file mode 100644 index 8622cc3a..00000000 --- a/apps/web-docs/content/docs/en/skills/observability-monitoring.md +++ /dev/null @@ -1,658 +0,0 @@ ---- -name: observability-monitoring -description: Observability and monitoring patterns for GoodGo microservices. Use when adding metrics, implementing logging, setting up tracing, creating health checks, or debugging production issues. ---- - -# Observability & Monitoring Patterns - -## When to Use This Skill - -Use this skill when: -- Setting up logging infrastructure -- Implementing metrics collection -- Adding distributed tracing -- Creating health check endpoints -- Setting up monitoring dashboards -- Debugging production issues -- Implementing alerting rules -- Analyzing performance bottlenecks - -## Core Concepts - -### Three Pillars of Observability -1. **Logs**: Event records for debugging -2. **Metrics**: Numerical measurements over time -3. **Traces**: Request flow across services - -### Tech Stack -- **Logging**: Winston, Pino -- **Metrics**: Prometheus + Grafana -- **Tracing**: OpenTelemetry + Jaeger -- **APM**: DataDog or New Relic (optional) - -### Observability Stack Architecture - -The observability stack consists of three pillars working together to provide comprehensive visibility into system behavior: - -```mermaid -graph TB - subgraph "Application Layer" - App[Microservice] - end - - subgraph "Three Pillars of Observability" - Logs[Logs
Winston/Pino] - Metrics[Metrics
Prometheus] - Traces[Traces
OpenTelemetry] - end - - subgraph "Aggregation & Storage" - Loki[Loki
Log Aggregation] - Prom[Prometheus
Metrics Storage] - Jaeger[Jaeger
Trace Storage] - end - - subgraph "Visualization & Alerting" - Grafana[Grafana
Dashboards] - AlertManager[AlertManager
Alerts] - end - - App -->|Structured Logs| Logs - App -->|HTTP Metrics| Metrics - App -->|Distributed Spans| Traces - - Logs -->|Collect| Loki - Metrics -->|Scrape /metrics| Prom - Traces -->|Export| Jaeger - - Loki -->|Query| Grafana - Prom -->|Query| Grafana - Prom -->|Alerts| AlertManager - Jaeger -->|Query| Grafana - - style App fill:#e1f5ff - style Logs fill:#fff4e1 - style Metrics fill:#e1ffe1 - style Traces fill:#ffe1f5 - style Grafana fill:#e1e1ff -``` - -## Structured Logging - -```typescript -// src/lib/logger.ts -import winston from 'winston'; - -const logFormat = winston.format.combine( - winston.format.timestamp(), - winston.format.errors({ stack: true }), - winston.format.json() -); - -export const logger = winston.createLogger({ - level: process.env.LOG_LEVEL || 'info', - format: logFormat, - defaultMeta: { - service: process.env.SERVICE_NAME || 'unknown', - environment: process.env.NODE_ENV || 'development' - }, - transports: [ - new winston.transports.Console({ - format: process.env.NODE_ENV === 'development' - ? winston.format.combine( - winston.format.colorize(), - winston.format.simple() - ) - : logFormat - }), - // Production: Send to log aggregation service - ...(process.env.NODE_ENV === 'production' - ? [new winston.transports.Http({ - host: 'logs.example.com', - path: '/collect', - ssl: true - })] - : []) - ] -}); - -// Request logger middleware -export const requestLogger = (req: Request, res: Response, next: NextFunction) => { - const start = Date.now(); - - res.on('finish', () => { - const duration = Date.now() - start; - - logger.info('HTTP Request', { - method: req.method, - url: req.url, - status: res.statusCode, - duration, - ip: req.ip, - userAgent: req.get('user-agent'), - correlationId: req.headers['x-correlation-id'] - }); - }); - - next(); -}; -``` - -### Logging Flow - -The logging flow shows how requests are logged with correlation IDs and flow through the system: - -```mermaid -sequenceDiagram - participant Client - participant Service as Microservice - participant Logger as Winston/Pino Logger - participant Aggregator as Log Aggregator
(Loki) - participant Dashboard as Grafana Dashboard - - Client->>Service: HTTP Request
(with x-correlation-id) - Service->>Service: Generate/Extract
Correlation ID - Service->>Logger: Log Request Start
{correlationId, method, url} - Service->>Service: Process Request - Service->>Logger: Log Business Event
{correlationId, event, data} - Service->>Client: HTTP Response
(with x-correlation-id) - Service->>Logger: Log Request End
{correlationId, status, duration} - - Logger->>Aggregator: Send Structured Logs
(JSON format) - Aggregator->>Dashboard: Index & Store Logs - Dashboard->>Dashboard: Query by correlationId
to trace request flow -``` - -## Metrics Collection - -```typescript -// src/lib/metrics.ts -import { Registry, Counter, Histogram, Gauge } from 'prom-client'; - -export const register = new Registry(); - -// HTTP metrics -export const httpRequestDuration = new Histogram({ - name: 'http_request_duration_seconds', - help: 'Duration of HTTP requests in seconds', - labelNames: ['method', 'route', 'status'], - buckets: [0.1, 0.3, 0.5, 0.7, 1, 3, 5, 7, 10] -}); - -export const httpRequestTotal = new Counter({ - name: 'http_requests_total', - help: 'Total number of HTTP requests', - labelNames: ['method', 'route', 'status'] -}); - -// Business metrics -export const userRegistrations = new Counter({ - name: 'user_registrations_total', - help: 'Total number of user registrations', - labelNames: ['type'] -}); - -export const activeUsers = new Gauge({ - name: 'active_users', - help: 'Number of active users', - labelNames: ['status'] -}); - -// Register metrics -register.registerMetric(httpRequestDuration); -register.registerMetric(httpRequestTotal); -register.registerMetric(userRegistrations); -register.registerMetric(activeUsers); - -// Metrics middleware -export const metricsMiddleware = (req: Request, res: Response, next: NextFunction) => { - const start = Date.now(); - - res.on('finish', () => { - const duration = (Date.now() - start) / 1000; - const route = req.route?.path || req.path; - - httpRequestDuration - .labels(req.method, route, res.statusCode.toString()) - .observe(duration); - - httpRequestTotal - .labels(req.method, route, res.statusCode.toString()) - .inc(); - }); - - next(); -}; - -// Metrics endpoint -export const metricsHandler = async (req: Request, res: Response) => { - res.set('Content-Type', register.contentType); - res.end(await register.metrics()); -}; -``` - -### Metrics Collection Flow - -Metrics are collected from services and exposed to Prometheus for monitoring and alerting: - -```mermaid -graph LR - subgraph "Service Instance" - App[Application] - Middleware[Metrics Middleware] - Registry[Prometheus Registry] - Endpoint[/metrics Endpoint] - end - - subgraph "Metrics Types" - Counter[Counter
http_requests_total] - Gauge[Gauge
active_users] - Histogram[Histogram
request_duration] - end - - subgraph "Collection" - Prometheus[Prometheus
Scraper] - end - - subgraph "Storage & Query" - PromDB[(Prometheus
Time Series DB)] - end - - subgraph "Visualization" - Grafana[Grafana
Dashboards] - Alerts[AlertManager
Rules] - end - - App -->|HTTP Request| Middleware - Middleware -->|Record| Counter - Middleware -->|Record| Histogram - App -->|Update| Gauge - - Counter --> Registry - Gauge --> Registry - Histogram --> Registry - Registry --> Endpoint - - Prometheus -->|Scrape every 15s| Endpoint - Prometheus -->|Store| PromDB - - PromDB -->|Query| Grafana - PromDB -->|Evaluate| Alerts - Alerts -->|Trigger| Grafana - - style App fill:#e1f5ff - style Prometheus fill:#ffe1e1 - style Grafana fill:#e1e1ff -``` - -## Distributed Tracing - -```typescript -// src/lib/tracing.ts -import { NodeSDK } from '@opentelemetry/sdk-node'; -import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node'; -import { Resource } from '@opentelemetry/resources'; -import { SemanticResourceAttributes } from '@opentelemetry/semantic-conventions'; -import { JaegerExporter } from '@opentelemetry/exporter-jaeger'; - -export const initTracing = () => { - const jaegerExporter = new JaegerExporter({ - endpoint: process.env.JAEGER_ENDPOINT || 'http://localhost:14268/api/traces', - }); - - const sdk = new NodeSDK({ - resource: new Resource({ - [SemanticResourceAttributes.SERVICE_NAME]: process.env.SERVICE_NAME || 'unknown', - [SemanticResourceAttributes.SERVICE_VERSION]: process.env.SERVICE_VERSION || '1.0.0', - }), - traceExporter: jaegerExporter, - instrumentations: [getNodeAutoInstrumentations()] - }); - - sdk.start(); - - process.on('SIGTERM', () => { - sdk.shutdown() - .then(() => console.log('Tracing terminated')) - .catch((error) => console.log('Error terminating tracing', error)) - .finally(() => process.exit(0)); - }); -}; - -// Custom span creation -import { trace, SpanStatusCode } from '@opentelemetry/api'; - -export const tracedOperation = async (name: string, fn: Function) => { - const tracer = trace.getTracer('application'); - const span = tracer.startSpan(name); - - try { - const result = await fn(); - span.setStatus({ code: SpanStatusCode.OK }); - return result; - } catch (error) { - span.setStatus({ - code: SpanStatusCode.ERROR, - message: error.message - }); - span.recordException(error); - throw error; - } finally { - span.end(); - } -}; -``` - -### Distributed Tracing Flow - -Distributed tracing tracks requests across multiple services using OpenTelemetry: - -```mermaid -sequenceDiagram - participant Client - participant Gateway as API Gateway - participant ServiceA as Service A
(User Service) - participant ServiceB as Service B
(Order Service) - participant DB as Database - participant Jaeger as Jaeger
Collector - - Client->>Gateway: Request
(Trace ID: abc123) - Gateway->>Gateway: Create Root Span
Span: gateway.request - Gateway->>ServiceA: HTTP Call
(Trace ID: abc123,
Span ID: span-1) - - ServiceA->>ServiceA: Create Child Span
Span: user.getById - ServiceA->>DB: Query User
(Trace ID: abc123,
Span ID: span-2) - DB-->>ServiceA: User Data - ServiceA->>ServiceA: End Span span-2 - ServiceA->>ServiceB: HTTP Call
(Trace ID: abc123,
Span ID: span-3) - - ServiceB->>ServiceB: Create Child Span
Span: order.getByUserId - ServiceB->>DB: Query Orders
(Trace ID: abc123,
Span ID: span-4) - DB-->>ServiceB: Orders Data - ServiceB->>ServiceB: End Span span-4 - ServiceB->>ServiceB: End Span span-3 - ServiceB-->>ServiceA: Response - ServiceA->>ServiceA: End Span span-1 - ServiceA-->>Gateway: Response - Gateway->>Gateway: End Span gateway.request - Gateway-->>Client: Final Response - - ServiceA->>Jaeger: Export Spans
(Trace ID: abc123) - ServiceB->>Jaeger: Export Spans
(Trace ID: abc123) - Gateway->>Jaeger: Export Spans
(Trace ID: abc123) - - Note over Jaeger: All spans linked by
Trace ID: abc123 -``` - -## Health Checks - -```typescript -// src/modules/health/health.controller.ts -export class HealthController { - constructor( - private prisma: PrismaClient, - private redis: Redis - ) {} - - // Liveness probe - is the service running? - async liveness(req: Request, res: Response) { - res.json({ - status: 'ok', - timestamp: new Date().toISOString() - }); - } - - // Readiness probe - is the service ready for traffic? - async readiness(req: Request, res: Response) { - const checks = await this.runHealthChecks(); - const isHealthy = Object.values(checks).every(check => check.status === 'healthy'); - - res.status(isHealthy ? 200 : 503).json({ - status: isHealthy ? 'ready' : 'not ready', - checks, - timestamp: new Date().toISOString() - }); - } - - // Detailed health check - async health(req: Request, res: Response) { - const checks = await this.runHealthChecks(); - const isHealthy = Object.values(checks).every(check => check.status === 'healthy'); - - res.status(isHealthy ? 200 : 503).json({ - status: isHealthy ? 'healthy' : 'unhealthy', - version: process.env.SERVICE_VERSION || '1.0.0', - uptime: process.uptime(), - checks, - timestamp: new Date().toISOString() - }); - } - - private async runHealthChecks() { - const checks: Record = {}; - - // Database check - try { - const start = Date.now(); - await this.prisma.$queryRaw`SELECT 1`; - checks.database = { - status: 'healthy', - responseTime: Date.now() - start - }; - } catch (error) { - checks.database = { - status: 'unhealthy', - error: error.message - }; - } - - // Redis check - try { - const start = Date.now(); - await this.redis.ping(); - checks.redis = { - status: 'healthy', - responseTime: Date.now() - start - }; - } catch (error) { - checks.redis = { - status: 'unhealthy', - error: error.message - }; - } - - // Memory check - const memUsage = process.memoryUsage(); - checks.memory = { - status: memUsage.heapUsed < 500 * 1024 * 1024 ? 'healthy' : 'warning', - heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024), - heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024), - rss: Math.round(memUsage.rss / 1024 / 1024) - }; - - return checks; - } -} -``` - -## Error Tracking - -```typescript -// src/lib/error-tracking.ts -import * as Sentry from '@sentry/node'; - -export const initErrorTracking = () => { - if (process.env.SENTRY_DSN) { - Sentry.init({ - dsn: process.env.SENTRY_DSN, - environment: process.env.NODE_ENV, - tracesSampleRate: 0.1, - beforeSend(event, hint) { - // Filter sensitive data - if (event.request?.cookies) { - delete event.request.cookies; - } - return event; - } - }); - } -}; - -// Error handler middleware -export const errorHandler = ( - err: Error, - req: Request, - res: Response, - next: NextFunction -) => { - // Log error - logger.error('Unhandled error', { - error: err.message, - stack: err.stack, - url: req.url, - method: req.method, - correlationId: req.headers['x-correlation-id'] - }); - - // Report to Sentry - Sentry.captureException(err, { - tags: { - service: process.env.SERVICE_NAME - }, - user: { - id: req.user?.id - } - }); - - // Send response - res.status(500).json({ - success: false, - error: { - code: 'INTERNAL_ERROR', - message: process.env.NODE_ENV === 'production' - ? 'Internal server error' - : err.message - } - }); -}; -``` - -## Performance Monitoring - -```typescript -// src/middlewares/performance.middleware.ts -export const performanceMiddleware = (req: Request, res: Response, next: NextFunction) => { - const start = process.hrtime.bigint(); - - res.on('finish', () => { - const end = process.hrtime.bigint(); - const duration = Number(end - start) / 1000000; // Convert to milliseconds - - // Log slow requests - if (duration > 1000) { - logger.warn('Slow request detected', { - method: req.method, - url: req.url, - duration, - threshold: 1000 - }); - } - - // Add to response header - res.set('X-Response-Time', `${duration}ms`); - }); - - next(); -}; -``` - -## Grafana Dashboard Config - -```json -{ - "dashboard": { - "title": "Service Metrics", - "panels": [ - { - "title": "Request Rate", - "targets": [{ - "expr": "rate(http_requests_total[5m])" - }] - }, - { - "title": "Request Duration", - "targets": [{ - "expr": "histogram_quantile(0.95, http_request_duration_seconds)" - }] - }, - { - "title": "Error Rate", - "targets": [{ - "expr": "rate(http_requests_total{status=~\"5..\"}[5m])" - }] - }, - { - "title": "Active Users", - "targets": [{ - "expr": "active_users" - }] - } - ] - } -} -``` - -## Alerting Rules - -```yaml -# prometheus/alerts.yml -groups: - - name: service_alerts - rules: - - alert: HighErrorRate - expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.05 - for: 5m - annotations: - summary: "High error rate detected" - description: "Error rate is above 5% for 5 minutes" - - - alert: HighLatency - expr: histogram_quantile(0.95, http_request_duration_seconds) > 1 - for: 5m - annotations: - summary: "High latency detected" - description: "95th percentile latency is above 1s" - - - alert: ServiceDown - expr: up{job="service"} == 0 - for: 1m - annotations: - summary: "Service is down" - description: "Service has been down for 1 minute" -``` - -## Best Practices - -1. **Logging** - - Use structured logging (JSON format) - - Include correlation IDs for request tracing - - Log at appropriate levels (ERROR, WARN, INFO, DEBUG) - - Avoid logging sensitive data - -2. **Metrics** - - Use standard metric types (Counter, Gauge, Histogram) - - Keep cardinality low (avoid high-cardinality labels) - - Define SLIs and SLOs for critical paths - - Monitor business metrics, not just technical ones - -3. **Tracing** - - Add traces for critical operations - - Include relevant context in spans - - Sample appropriately to control costs - - Use distributed tracing for microservices - -4. **Alerting** - - Alert on symptoms, not causes - - Include runbook links in alerts - - Avoid alert fatigue with proper thresholds - - Test alerting rules regularly \ No newline at end of file diff --git a/apps/web-docs/content/docs/en/skills/performance-optimization.md b/apps/web-docs/content/docs/en/skills/performance-optimization.md deleted file mode 100644 index 6a13b6cf..00000000 --- a/apps/web-docs/content/docs/en/skills/performance-optimization.md +++ /dev/null @@ -1,158 +0,0 @@ ---- -name: performance-optimization -description: Performance optimization patterns for GoodGo microservices including database query optimization, memory leak detection, profiling, connection pooling, and caching strategies. ---- - -# Performance Optimization Patterns - -## When to Use This Skill - -Use this skill when: -- Optimizing database queries -- Detecting and fixing memory leaks -- Profiling application performance -- Optimizing connection pooling -- Improving caching strategies -- Identifying N+1 query problems - -## Performance Optimization Workflow - -The performance optimization process follows a systematic approach to identify, analyze, and resolve performance bottlenecks. - -```mermaid -flowchart TD - Start([Start Optimization]) --> Identify[Identify Performance Issue] - Identify --> Monitor[Monitor Metrics] - Monitor --> Profiling[Profile Application] - Profiling --> Analyze[Analyze Results] - Analyze --> IdentifyBottleneck{Identify Bottleneck} - - IdentifyBottleneck -->|Database| OptimizeDB[Optimize Queries] - IdentifyBottleneck -->|Memory| OptimizeMem[Fix Memory Leaks] - IdentifyBottleneck -->|Network| OptimizeNet[Optimize Connections] - IdentifyBottleneck -->|Cache| OptimizeCache[Improve Caching] - - OptimizeDB --> Implement[Implement Optimization] - OptimizeMem --> Implement - OptimizeNet --> Implement - OptimizeCache --> Implement - - Implement --> Test[Test Changes] - Test --> Verify{Performance Improved?} - Verify -->|Yes| Monitor - Verify -->|No| Analyze - Monitor --> Threshold{Meets Targets?} - Threshold -->|Yes| Complete([Optimization Complete]) - Threshold -->|No| Profiling -``` - -## Query Optimization Flow - -Database query optimization is a critical aspect of performance. This flow shows how to systematically optimize queries. - -```mermaid -flowchart TD - Start([Query Performance Issue]) --> Analyze[Analyze Query Performance] - Analyze --> CheckIndexes[Check Indexes] - CheckIndexes --> Explain[Run EXPLAIN ANALYZE] - Explain --> IdentifyIssues{Identify Issues} - - IdentifyIssues -->|N+1 Queries| FixN1[Use Include/Join] - IdentifyIssues -->|Missing Index| AddIndex[Add Database Index] - IdentifyIssues -->|Slow Query| OptimizeQuery[Rewrite Query] - IdentifyIssues -->|Unbounded| AddPagination[Add Pagination] - - FixN1 --> VerifyQuery[Verify Query Performance] - AddIndex --> VerifyQuery - OptimizeQuery --> VerifyQuery - AddPagination --> VerifyQuery - - VerifyQuery --> TestQuery{Query Time < 50ms?} - TestQuery -->|Yes| Complete([Optimization Complete]) - TestQuery -->|No| Analyze - - style FixN1 fill:#e1f5e1 - style AddIndex fill:#e1f5e1 - style OptimizeQuery fill:#e1f5e1 - style AddPagination fill:#e1f5e1 -``` - -## Profiling Process - -The profiling process helps identify performance bottlenecks through systematic data collection and analysis. - -```mermaid -flowchart TD - Start([Start Profiling]) --> Setup[Setup Profiling Tools] - Setup --> ChooseTool{Choose Profiling Type} - - ChooseTool -->|CPU| CPUProf[CPU Profiling] - ChooseTool -->|Memory| MemProf[Memory Profiling] - ChooseTool -->|Database| DBProf[Database Query Profiling] - - CPUProf --> CollectCPU[Collect CPU Metrics] - MemProf --> CollectMem[Collect Memory Metrics] - DBProf --> CollectDB[Collect Query Metrics] - - CollectCPU --> Analyze[Analyze Profiling Data] - CollectMem --> Analyze - CollectDB --> Analyze - - Analyze --> IdentifyHotspots[Identify Hotspots] - IdentifyHotspots --> Prioritize[Prioritize Issues] - Prioritize --> Optimize[Optimize Critical Paths] - Optimize --> ReProfile[Re-run Profiling] - ReProfile --> Compare{Performance Improved?} - Compare -->|Yes| Complete([Profiling Complete]) - Compare -->|No| IdentifyHotspots - - style CPUProf fill:#e3f2fd - style MemProf fill:#e3f2fd - style DBProf fill:#e3f2fd -``` - -## Key Patterns - -### Database Query Optimization - -```typescript -// Avoid N+1 queries -// Bad: Multiple queries -for (const user of users) { - user.orders = await orderRepository.findByUserId(user.id); -} - -// Good: Single query with join -const users = await userRepository.findAll({ - include: { orders: true }, -}); -``` - -### Memory Profiling - -```typescript -// Monitor memory usage -const profiler = new MemoryProfiler(); -profiler.start(); // Monitor every minute -``` - -### Batch Operations - -```typescript -// Batch database operations -await batchOperations.batchCreate(items, 100); // Process 100 at a time -``` - -## Best Practices - -1. Use indexes, avoid N+1 queries -2. Monitor memory usage, detect leaks -3. Cache frequently accessed data -4. Configure connection pools appropriately -5. Profile regularly to identify bottlenecks - -## Resources - -- [Caching Patterns](./caching-patterns.md) -- [Observability & Monitoring](./observability-monitoring.md) -- Skill Source: `.cursor/skills/performance-optimization/SKILL.md` diff --git a/apps/web-docs/content/docs/en/skills/project-rules.md b/apps/web-docs/content/docs/en/skills/project-rules.md deleted file mode 100644 index 8219bf1e..00000000 --- a/apps/web-docs/content/docs/en/skills/project-rules.md +++ /dev/null @@ -1,400 +0,0 @@ ---- -name: project-rules -description: GoodGo Microservices Platform coding standards and architecture patterns. Use when working with services, apps, packages, or infrastructure. ---- - -# GoodGo Project Rules - -## Architecture - -**Monorepo Structure:** -- **Apps**: Next.js (web) + Flutter (mobile) -- **Services**: Node.js/TypeScript microservices (Express) -- **Packages**: Shared libraries (logger, types, http-client, auth-sdk, tracing) -- **Infrastructure**: Traefik (API Gateway), Redis, Neon PostgreSQL, Observability -- **Deployments**: Local (Docker Compose), Staging/Production (Kubernetes) - -**Template Location**: `services/_template/` - Use as starting point for new services - -### Monorepo Architecture - -```mermaid -graph TB - subgraph apps[Apps Layer] - webAdmin[web-admin
Next.js Admin] - webClient[web-client
Next.js Client] - appAdmin[app-admin
Flutter Admin] - appClient[app-client
Flutter Client] - end - - subgraph gateway[API Gateway] - traefik[Traefik
Path-based Routing] - end - - subgraph services[Services Layer] - iamService[iam-service
IAM Service] - templateService[_template
Service Template] - otherServices[Other Services
Node.js/TypeScript] - end - - subgraph packages[Shared Packages] - loggerPackage[@goodgo/logger
Centralized Logging] - typesPackage[@goodgo/types
TypeScript Types] - httpClientPackage[@goodgo/http-client
API Client] - authSdkPackage[@goodgo/auth-sdk
Auth Utilities] - tracingPackage[@goodgo/tracing
OpenTelemetry] - configPackage[@goodgo/config
Shared Configs] - end - - subgraph infrastructure[Infrastructure] - postgres[Neon PostgreSQL
Database] - redis[Redis
Cache] - prometheus[Prometheus
Metrics] - grafana[Grafana
Dashboards] - loki[Loki
Log Aggregation] - end - - subgraph deployments[Deployments] - dockerCompose[Docker Compose
Local Development] - kubernetes[Kubernetes
Staging/Production] - end - - webAdmin --> traefik - webClient --> traefik - appAdmin --> traefik - appClient --> traefik - traefik --> iamService - traefik --> otherServices - iamService --> packages - otherServices --> packages - iamService --> postgres - otherServices --> postgres - iamService --> redis - otherServices --> redis - services --> prometheus - services --> loki - prometheus --> grafana - loki --> grafana - services --> deployments -``` - -## Tech Stack - -**Frontend:** -- Next.js 14+ (App Router), TypeScript, Tailwind CSS, Zustand -- Flutter 3.x with Provider pattern -- Use `@goodgo/types` and `@goodgo/http-client` - -**Backend:** -- Node.js 20+, TypeScript 5+, Express -- Prisma ORM + Neon PostgreSQL -- Zod validation, `@goodgo/logger`, `@goodgo/tracing`, `@goodgo/auth-sdk` - -**Infrastructure:** -- Traefik (path-based routing), Redis (cache), Prometheus + Grafana + Loki - -## Project Structure - -**Service:** `src/{config,modules,middlewares,routes,main.ts}` + `prisma/` + `Dockerfile` -**Package:** `src/index.ts` + `package.json` + `tsconfig.json` + `README.md` -**App:** `src/{app,services/api,stores}` + `Dockerfile` - -### Detailed Structure Diagram - -```mermaid -graph TB - subgraph service[Service Structure] - serviceRoot[service-name/] - serviceSrc[src/] - serviceConfig[config/
Configuration] - serviceModules[modules/
Feature Modules] - serviceMiddlewares[middlewares/
Express Middlewares] - serviceRoutes[routes/
Route Definitions] - serviceMain[main.ts
Entry Point] - servicePrisma[prisma/
Schema & Migrations] - serviceDockerfile[Dockerfile
Container Definition] - servicePackageJson[package.json
Dependencies] - - serviceRoot --> serviceSrc - serviceRoot --> servicePrisma - serviceRoot --> serviceDockerfile - serviceRoot --> servicePackageJson - serviceSrc --> serviceConfig - serviceSrc --> serviceModules - serviceSrc --> serviceMiddlewares - serviceSrc --> serviceRoutes - serviceSrc --> serviceMain - end - - subgraph package[Package Structure] - packageRoot[package-name/] - packageSrc[src/] - packageIndex[index.ts
Main Export] - packagePackageJson[package.json
Package Metadata] - packageTsconfig[tsconfig.json
TypeScript Config] - packageReadme[README.md
Documentation] - - packageRoot --> packageSrc - packageRoot --> packagePackageJson - packageRoot --> packageTsconfig - packageRoot --> packageReadme - packageSrc --> packageIndex - end - - subgraph app[App Structure - Next.js] - appRoot[app-name/] - appSrc[src/] - appApp[app/
Next.js App Router] - appServicesApi[services/api/
API Clients] - appStores[stores/
State Management] - appDockerfile[Dockerfile
Container Definition] - appPackageJson[package.json
Dependencies] - - appRoot --> appSrc - appRoot --> appDockerfile - appRoot --> appPackageJson - appSrc --> appApp - appSrc --> appServicesApi - appSrc --> appStores - end - - subgraph module[Module Structure inside modules/] - moduleRoot[modules/feature-name/] - moduleController[feature.controller.ts
HTTP Handlers] - moduleService[feature.service.ts
Business Logic] - moduleRepository[feature.repository.ts
Data Access] - moduleDto[feature.dto.ts
Zod Schemas] - moduleTypes[feature.types.ts
TypeScript Types] - moduleTest[feature.controller.test.ts
Unit Tests] - - moduleRoot --> moduleController - moduleRoot --> moduleService - moduleRoot --> moduleRepository - moduleRoot --> moduleDto - moduleRoot --> moduleTypes - moduleRoot --> moduleTest - moduleController --> moduleService - moduleService --> moduleRepository - end - - serviceModules --> moduleRoot -``` - -## Naming Conventions - -- **Services/Packages**: `kebab-case` (e.g., `auth-service`, `http-client`) -- **Files**: `kebab-case.type.ts` (e.g., `user.controller.ts`) -- **Components**: `PascalCase.tsx` (React), `snake_case.dart` (Flutter) -- **Classes**: `PascalCase`, **Functions**: `camelCase`, **Constants**: `UPPER_SNAKE_CASE` -- **Package Names**: `@goodgo/package-name` - -## Workflows - -**New Service:** -1. Copy `services/_template/` -2. Update `package.json` name to `@goodgo/service-name` -3. Add to `deployments/local/docker-compose.yml` with Traefik labels -4. Configure Prisma schema if needed -5. Add health check endpoint - -**New Package:** -1. Create in `packages/`, export from `src/index.ts` -2. Add to `pnpm-workspace.yaml` -3. Use TypeScript strict mode - -**Dependencies:** -```bash -pnpm --filter @goodgo/service-name add package-name -pnpm --filter @goodgo/service-name add @goodgo/logger # workspace -pnpm --filter @goodgo/service-name add -D @types/pkg # dev -``` - -**Database:** -```bash -pnpm --filter @goodgo/service-name prisma migrate dev -pnpm --filter @goodgo/service-name prisma generate -``` - -## Code Standards - -**TypeScript:** -- Strict mode, no `any` (use `unknown`) -- Zod for runtime validation -- Export shared types from `@goodgo/types` - -**API Responses:** -```typescript -// Success: { success: true, data: any } -// Error: { success: false, error: { code, message, details? } } -``` - -**Logging:** -```typescript -import { logger } from '@goodgo/logger'; -logger.info('Message', { context }); -logger.error('Error', { error, context }); -``` - -**Environment:** -- Use `.env.example` template, never commit `.env` -- Validate with Zod at startup -- Document all vars in README - -## Testing - -- **Unit**: Place tests next to source (`*.test.ts`), use Jest, mock dependencies, >80% coverage -- **Integration**: Test API endpoints, use test database, cleanup after -- **Commands**: `pnpm test`, `pnpm --filter @goodgo/service-name test`, `pnpm test:coverage` - -## Docker - -**Multi-stage Build Pattern:** -```dockerfile -FROM node:20-alpine AS builder -# ... build stage -FROM node:20-alpine -# ... production stage with non-root user -``` - -**Image Naming:** `goodgo/service-name:version` (semantic versioning) - -## Git Workflow - -**Branches:** `feature/`, `fix/`, `hotfix/`, `release/` - -**Commits:** Conventional Commits format -``` -type(scope): subject -``` -Types: `feat`, `fix`, `docs`, `style`, `refactor`, `test`, `chore` - -**PRs:** Use template, link issues, ensure CI passes, squash merge to main - -## CI/CD - -**GitHub Actions:** PR (lint, test, build) → `develop` (staging) → `main` (production) - -**Deployment Checklist:** Tests pass, no lint errors, env vars set, migrations applied, docs updated, monitoring configured - -## Security - -**Auth:** JWT (15min access, 7d refresh), httpOnly cookies, use `@goodgo/auth-sdk` -**Authorization:** RBAC, check permissions at service level, middleware for routes -**Data:** bcrypt (cost 12), HTTPS, sanitize inputs, Zod validation -**Secrets:** Environment variables, Kubernetes secrets, never hardcode, rotate regularly - -## Performance - -**Backend:** Redis caching, connection pooling, pagination, database indexes, rate limiting -**Frontend:** Next.js Image optimization, code splitting, lazy loading, React.memo, bundle optimization -**Database:** Prisma optimization, indexes, transactions, soft deletes - -## Observability - -**Metrics:** Prometheus (request count, duration, errors), set alerts -**Logging:** `@goodgo/logger` with trace IDs, levels (error, warn, info, debug), Loki aggregation -**Tracing:** OpenTelemetry via `@goodgo/tracing`, trace cross-service requests - -## Documentation - -**Code:** JSDoc for public APIs, inline comments for complex logic, README per service/package -**API:** OpenAPI/Swagger specs in `docs/api/openapi/`, document endpoints with examples -**Architecture:** System design in `docs/architecture/`, service communication, data flows, ADRs - -## Architecture Patterns - -**Modular Structure:** Controller → Service → Repository pattern -**DTO Validation:** Zod schemas with type inference -**Error Handling:** Custom error classes, global error middleware -**Dependency Injection:** Constructor injection for testability - -**Example Module:** -```typescript -// DTO with Zod -export const CreateFeatureDto = z.object({ - name: z.string().min(1), - email: z.string().email() -}); -export type CreateFeatureDto = z.infer; - -// Controller -export class FeatureController { - constructor(private service: FeatureService) {} - async create(req: Request, res: Response, next: NextFunction) { - try { - const dto = CreateFeatureDto.parse(req.body); - const result = await this.service.create(dto); - res.json({ success: true, data: result }); - } catch (error) { next(error); } - } -} - -// Service -export class FeatureService { - constructor(private repository: FeatureRepository) {} - async create(dto: CreateFeatureDto) { - return this.repository.create(dto); - } -} - -// Repository -export class FeatureRepository extends BaseRepository { - async create(data: CreateFeatureDto) { - return this.prisma.feature.create({ data }); - } -} -``` - -## Deployment & Traefik - -**Service Registration:** -Services are deployed via `deployments/local/docker-compose.yml` and auto-discovered by Traefik: - -```yaml -services: - my-service: - build: - context: ../.. - dockerfile: services/my-service/Dockerfile - labels: - - "traefik.enable=true" - - "traefik.http.routers.my-service.rule=PathPrefix(`/api/v1/my-service`)" - - "traefik.http.services.my-service.loadbalancer.server.port=5002" - - "traefik.http.services.my-service.loadbalancer.healthcheck.path=/health/live" -``` - -**Traefik Configuration:** -- **Location**: `infra/traefik/` (platform-level, not per-service) -- **Static Config**: `traefik.yml` - Entry points, providers, dashboard -- **Dynamic Config**: `dynamic/middlewares.yml`, `dynamic/routes.yml` -- **Dashboard**: http://localhost:8080 - -**Access Points:** -- API: `http://localhost/api/v1/service-name` -- Health: `http://localhost/api/v1/service-name/health` -- Docs: `http://localhost/api/v1/service-name/api-docs` - -## Troubleshooting - -**Common Issues:** -- Port conflicts: Check `deployments/local/docker-compose.yml` -- Database: Verify `DATABASE_URL` in `.env.local` -- Module not found: Run `pnpm install` -- Type errors: Run `pnpm --filter @goodgo/service-name prisma generate` - -**Debug:** -```bash -cd deployments/local -docker-compose logs -f service-name -docker-compose ps -docker-compose up -d --build -``` - -## Resources - -- [Architecture Docs](../../docs/architecture/) -- [API Specs](../../docs/api/openapi/) -- [Development Guide](../../docs/guides/development.md) -- [Deployment Guide](../../docs/guides/deployment.md) -- [Neon Database Guide](../../docs/guides/neon-database.md) -- [Contributing Guide](../../CONTRIBUTING.md) diff --git a/apps/web-docs/content/docs/en/skills/repository-pattern.md b/apps/web-docs/content/docs/en/skills/repository-pattern.md deleted file mode 100644 index b1a41d84..00000000 --- a/apps/web-docs/content/docs/en/skills/repository-pattern.md +++ /dev/null @@ -1,334 +0,0 @@ ---- -name: repository-pattern -description: Repository pattern implementation and best practices for GoodGo microservices. Use when implementing data access layers, extending BaseRepository, writing database queries, handling transactions, or optimizing database operations. ---- - -# Repository Pattern - -## When to Use This Skill - -Use this skill when: -- Implementing data access layers for new modules -- Extending BaseRepository for specific entity types -- Writing custom database queries -- Handling database transactions -- Optimizing database queries and operations -- Testing repository implementations -- Organizing data access code - -## Core Concepts - -### Repository Pattern Benefits - -1. **Abstraction**: Separates business logic from data access -2. **Testability**: Easy to mock repositories for testing -3. **Maintainability**: Centralized database operations -4. **Consistency**: Standardized CRUD operations -5. **Type Safety**: TypeScript generics provide type safety - -### Repository Architecture - -The repository pattern creates an abstraction layer between the service layer and data access layer, providing a clean separation of concerns. - -```mermaid -graph TB - Controller["Controller
(HTTP Handler)"] --> Service["Service Layer
(Business Logic)"] - Service --> Repository["Repository
(Data Access)"] - Repository --> Prisma["Prisma Client
(ORM)"] - Prisma --> Database[("Database
(PostgreSQL)")] - - style Controller fill:#e1f5ff - style Service fill:#fff4e1 - style Repository fill:#e8f5e9 - style Prisma fill:#f3e5f5 - style Database fill:#ffebee -``` - -### BaseRepository Class - -The `BaseRepository` abstract class provides common database operations that can be extended: - -- `findById(id)` - Find entity by ID -- `findByUnique(field, value)` - Find by unique field -- `findAll(options)` - Find all with filtering, pagination, sorting -- `create(data)` - Create new entity -- `update(id, data)` - Update entity -- `delete(id)` - Delete entity -- `count(where)` - Count entities -- `exists(id)` - Check if entity exists -- `transaction(callback)` - Execute transaction - -### Class Hierarchy - -Repositories extend the `BaseRepository` abstract class, inheriting common CRUD operations while allowing custom query methods. - -```mermaid -classDiagram - class BaseRepository { - <> - #prisma: PrismaClient - #modelName: string - +findById(id: string) Promise~T~null~ - +findByUnique(field: string, value: any) Promise~T~null~ - +findAll(options?: any) Promise~T[]~ - +create(data: CreateInput) Promise~T~ - +update(id: string, data: UpdateInput) Promise~T~ - +delete(id: string) Promise~boolean~ - +count(where?: any) Promise~number~ - +exists(id: string) Promise~boolean~ - +transaction(callback: Function) Promise~R~ - } - - class IRepository { - <> - +findById(id: string) Promise~T~null~ - +findByUnique(field: string, value: any) Promise~T~null~ - +findAll(options?: any) Promise~T[]~ - +create(data: CreateInput) Promise~T~ - +update(id: string, data: UpdateInput) Promise~T~ - +delete(id: string) Promise~boolean~ - +count(where?: any) Promise~number~ - +exists(id: string) Promise~boolean~ - } - - class UserRepository { - +findByEmail(email: string) Promise~User~null~ - +findByUsername(username: string) Promise~User~null~ - +findWithPermissions(userId: string) Promise~User~null~ - +findActiveUsers(organizationId?: string) Promise~User[]~ - } - - class ProductRepository { - +findByCategory(categoryId: string) Promise~Product[]~ - +findActiveProducts() Promise~Product[]~ - } - - BaseRepository <|-- UserRepository : extends - BaseRepository <|-- ProductRepository : extends - IRepository <|.. UserRepository : implements -``` - -**Note**: Specific repositories like `UserRepository` and `ProductRepository` extend `BaseRepository` and can implement the `IRepository` interface for additional type safety. They inherit all base CRUD methods and add domain-specific query methods. - -## Patterns - -### Extending BaseRepository - -```typescript -import { PrismaClient, User } from '@prisma/client'; -import { BaseRepository } from '../modules/common/repository'; - -export class UserRepository extends BaseRepository { - constructor(prisma: PrismaClient) { - super(prisma, 'user'); - } - - // Add custom methods - async findByEmail(email: string): Promise { - return this.prisma.user.findUnique({ - where: { email }, - }); - } - - async findByUsername(username: string): Promise { - return this.prisma.user.findUnique({ - where: { username }, - }); - } -} -``` - -### Custom Query Methods - -Add domain-specific query methods: - -```typescript -export class UserRepository extends BaseRepository { - // Find user with related data - async findWithPermissions(userId: string): Promise { - return this.prisma.user.findUnique({ - where: { id: userId }, - include: { - userRoles: { - include: { role: true }, - }, - userPermissions: { - include: { permission: true }, - }, - }, - }); - } - - // Complex query with filtering - async findActiveUsers(organizationId?: string): Promise { - return this.prisma.user.findMany({ - where: { - isActive: true, - ...(organizationId && { organizationId }), - }, - orderBy: { createdAt: 'desc' }, - }); - } -} -``` - -### Using Repository Interface - -Implement the `IRepository` interface for type safety: - -```typescript -import { IRepository } from '../modules/common/repository'; - -export class UserRepository - extends BaseRepository - implements IRepository { - - // Implementation... -} -``` - -### Error Handling - -BaseRepository handles errors automatically: - -```typescript -async findById(id: string): Promise { - try { - // Database operation - const entity = await this.prisma.user.findUnique({ where: { id } }); - return entity; - } catch (error: any) { - logger.error(`Failed to find ${this.modelName} by ID`, { error, id }); - throw new DatabaseError(`Failed to find ${this.modelName}`, { id, originalError: error }); - } -} -``` - -### Transactions - -Use repository transaction method for multiple operations: - -```typescript -await repository.transaction(async (tx) => { - const user = await tx.user.create({ data: userData }); - await tx.userProfile.create({ data: { userId: user.id, ...profileData } }); - return user; -}); -``` - -**Transaction Flow**: - -```mermaid -sequenceDiagram - participant Service - participant Repository - participant Prisma as Prisma Client - participant DB as Database - - Service->>Repository: transaction(callback) - Repository->>Prisma: $transaction(callback) - Prisma->>DB: BEGIN TRANSACTION - - Note over Service,DB: All operations use transaction client (tx) - - Service->>Repository: tx.user.create(data) - Repository->>Prisma: tx.user.create(data) - Prisma->>DB: INSERT INTO users ... - DB-->>Prisma: User created - Prisma-->>Repository: User entity - Repository-->>Service: User entity - - Service->>Repository: tx.userProfile.create(data) - Repository->>Prisma: tx.userProfile.create(data) - Prisma->>DB: INSERT INTO user_profiles ... - DB-->>Prisma: Profile created - Prisma-->>Repository: Profile entity - Repository-->>Service: Profile entity - - alt All operations succeed - Prisma->>DB: COMMIT - DB-->>Prisma: Transaction committed - Prisma-->>Repository: Success result - Repository-->>Service: Success result - else Error occurs - Prisma->>DB: ROLLBACK - DB-->>Prisma: Transaction rolled back - Prisma-->>Repository: Error thrown - Repository-->>Service: DatabaseError thrown - end -``` - -**Important**: All operations within the transaction callback must use the transaction client (`tx`) parameter, not the main Prisma client, to ensure atomicity. - -### Query Options - -Use Prisma query options in findAll: - -```typescript -// Pagination -const users = await userRepository.findAll({ - skip: (page - 1) * limit, - take: limit, -}); - -// Filtering -const activeUsers = await userRepository.findAll({ - where: { isActive: true }, -}); - -// Sorting -const recentUsers = await userRepository.findAll({ - orderBy: { createdAt: 'desc' }, -}); - -// Including relations -const usersWithRoles = await userRepository.findAll({ - include: { userRoles: true }, -}); -``` - -## Best Practices - -1. **Extend BaseRepository**: Always extend BaseRepository instead of implementing from scratch -2. **Custom Methods**: Add domain-specific query methods in repository subclasses -3. **Type Safety**: Use TypeScript generics for type safety -4. **Error Handling**: Let BaseRepository handle common errors, handle domain-specific errors in custom methods -5. **Logging**: BaseRepository handles logging automatically -6. **Transactions**: Use repository transaction method for multi-step operations -7. **Query Optimization**: Use Prisma query options (select, include) to optimize queries -8. **Single Responsibility**: Each repository handles one entity type -9. **Dependency Injection**: Inject PrismaClient in constructor for testability - -## Common Mistakes - -1. **Not Extending BaseRepository**: Implementing CRUD from scratch instead of extending -2. **Business Logic in Repository**: Putting business logic in repository instead of service layer -3. **Exposing Prisma Client**: Exposing raw Prisma client instead of using repository methods -4. **Missing Error Handling**: Not handling errors in custom query methods -5. **Over-fetching Data**: Using `include` unnecessarily, fetching too much data -6. **No Type Safety**: Not using TypeScript generics properly -7. **Transaction Mistakes**: Not using repository transaction method for related operations - -## Troubleshooting - -### Type Errors with Prisma - -**Problem**: TypeScript errors when using Prisma client methods -**Solution**: Ensure Prisma client is generated: `pnpm prisma generate`. Use proper type assertions if needed. - -### Transaction Rollback Issues - -**Problem**: Transaction not rolling back on error -**Solution**: Ensure all operations in transaction callback use the transaction client (`tx`) parameter, not the main Prisma client. - -### Performance Issues - -**Problem**: Slow queries or N+1 query problems -**Solution**: Use `include` to fetch related data in single query. Use `select` to limit fields. Add database indexes. - -## Resources - -- [BaseRepository](../../services/iam-service/src/modules/common/repository.ts) - Base repository implementation -- [User Repository](../../services/iam-service/src/repositories/user.repository.ts) - Example repository -- [Database Prisma](../database-prisma/SKILL.md) - Prisma ORM patterns -- [Error Handling](../error-handling-patterns/SKILL.md) - Error handling in repositories diff --git a/apps/web-docs/content/docs/en/skills/resilience-patterns.md b/apps/web-docs/content/docs/en/skills/resilience-patterns.md deleted file mode 100644 index 147eb049..00000000 --- a/apps/web-docs/content/docs/en/skills/resilience-patterns.md +++ /dev/null @@ -1,239 +0,0 @@ ---- -name: resilience-patterns -description: Resilience patterns for GoodGo microservices including circuit breaker, retry strategies, timeout handling, and graceful degradation. Use when implementing fault tolerance, handling external service failures, or improving system reliability. ---- - -# Resilience Patterns - -## When to Use This Skill - -Use this skill when: -- Implementing circuit breaker patterns for external services -- Adding retry logic for transient failures -- Setting timeout handling for long-running operations -- Implementing graceful degradation strategies -- Handling external service failures -- Improving system fault tolerance - -## Core Concepts - -### Resilience Patterns - -1. **Circuit Breaker**: Prevents cascading failures by stopping calls to failing services -2. **Retry**: Automatically retries failed operations with backoff -3. **Timeout**: Sets maximum time limits for operations -4. **Bulkhead**: Isolates failures to prevent spread -5. **Graceful Degradation**: Provides fallback behavior when services fail - -## Patterns - -### Circuit Breaker Pattern - -Protects against cascading failures: - -The circuit breaker has three states that transition based on error rates and timeouts: - -```mermaid -stateDiagram-v2 - [*] --> CLOSED: Initial State - CLOSED --> OPEN: Errors exceed threshold
(errorThresholdPercentage: 50%) - OPEN --> HALF_OPEN: Reset timeout expires
(resetTimeout: 30s) - HALF_OPEN --> CLOSED: Request succeeds - HALF_OPEN --> OPEN: Request fails - CLOSED --> [*]: Normal operation - OPEN --> [*]: Circuit open (rejecting requests) - HALF_OPEN --> [*]: Testing recovery -``` - -**Circuit Breaker States:** -- **CLOSED**: Normal operation, requests pass through -- **OPEN**: Circuit is open, requests are immediately rejected -- **HALF-OPEN**: Testing if service has recovered, allows limited requests - -```typescript -import CircuitBreaker from 'opossum'; -import { logger } from '@goodgo/logger'; - -export const createCircuitBreaker = ( - action: (...args: TArgs) => Promise, - name: string, - options: Partial = {} -): CircuitBreaker => { - const breaker = new CircuitBreaker(action, { - timeout: 3000, - errorThresholdPercentage: 50, - resetTimeout: 30000, - ...options, - name, - }); - - breaker.on('open', () => { - logger.warn(`Circuit Breaker OPEN: ${name}`); - }); - - breaker.on('halfOpen', () => { - logger.info(`Circuit Breaker HALF-OPEN: ${name}`); - }); - - breaker.on('close', () => { - logger.info(`Circuit Breaker CLOSED: ${name}`); - }); - - return breaker; -}; - -// Usage -const externalApiBreaker = createCircuitBreaker( - async (data) => await externalApi.call(data), - 'external-api' -); - -try { - const result = await externalApiBreaker.fire(requestData); -} catch (error) { - // Handle circuit breaker error or fallback -} -``` - -### Retry Pattern - -Retry transient failures with exponential backoff: - -The retry pattern attempts an operation multiple times with increasing delays between attempts: - -```mermaid -flowchart TD - Start([Start Operation]) --> Attempt[Attempt Operation] - Attempt --> Success{Success?} - Success -->|Yes| Return([Return Result]) - Success -->|No| CheckRetries{Attempt < Max Retries?} - CheckRetries -->|No| ThrowError([Throw Error]) - CheckRetries -->|Yes| CalculateDelay[Calculate Delay:
baseDelay × 2^attempt] - CalculateDelay --> Wait[Wait for Delay] - Wait --> IncrementAttempt[Increment Attempt] - IncrementAttempt --> Attempt - - style Start fill:#e1f5e1 - style Return fill:#e1f5e1 - style ThrowError fill:#ffe1e1 - style CalculateDelay fill:#fff4e1 -``` - -**Exponential Backoff Example:** -- Attempt 1: 1s delay -- Attempt 2: 2s delay -- Attempt 3: 4s delay -- Attempt 4: 8s delay - -```typescript -async function retryWithBackoff( - fn: () => Promise, - maxRetries: number = 3, - baseDelay: number = 1000 -): Promise { - for (let attempt = 0; attempt <= maxRetries; attempt++) { - try { - return await fn(); - } catch (error) { - if (attempt === maxRetries) throw error; - - const delay = baseDelay * Math.pow(2, attempt); - await new Promise(resolve => setTimeout(resolve, delay)); - } - } - throw new Error('Retry exhausted'); -} -``` - -### Timeout Pattern - -Set maximum time limits: - -The timeout pattern uses Promise.race to enforce maximum execution time: - -```mermaid -sequenceDiagram - participant Client - participant TimeoutWrapper - participant Operation - participant TimeoutTimer - - Client->>TimeoutWrapper: Execute with timeout - TimeoutWrapper->>Operation: Start operation - TimeoutWrapper->>TimeoutTimer: Start timeout timer - - alt Operation completes first - Operation-->>TimeoutWrapper: Return result - TimeoutWrapper-->>Client: Return result - TimeoutWrapper->>TimeoutTimer: Cancel timer - else Timeout expires first - TimeoutTimer-->>TimeoutWrapper: Timeout error - TimeoutWrapper->>Operation: (Operation may continue) - TimeoutWrapper-->>Client: Reject with timeout error - end -``` - -**Timeout Behavior:** -- Uses `Promise.race()` to compete operation vs timeout -- First to resolve/reject wins -- Operation may continue after timeout, but result is ignored - -```typescript -async function withTimeout( - promise: Promise, - timeoutMs: number -): Promise { - const timeout = new Promise((_, reject) => { - setTimeout(() => reject(new Error('Operation timeout')), timeoutMs); - }); - - return Promise.race([promise, timeout]); -} - -// Usage -try { - const result = await withTimeout( - externalService.call(), - 5000 // 5 second timeout - ); -} catch (error) { - if (error.message === 'Operation timeout') { - // Handle timeout - } -} -``` - -### Graceful Degradation - -Provide fallback behavior: - -```typescript -async function getDataWithFallback() { - try { - return await primaryDataSource.get(); - } catch (error) { - logger.warn('Primary source failed, using fallback', { error }); - return await fallbackDataSource.get(); - } -} -``` - -## Best Practices - -1. **Circuit Breaker**: Use for external service calls -2. **Retry**: Retry only transient failures (network, timeout) -3. **Timeout**: Set appropriate timeouts for all external calls -4. **Fallback**: Always provide fallback behavior -5. **Monitoring**: Monitor circuit breaker states and retry rates -6. **Logging**: Log all resilience actions for debugging - -## Common Mistakes - -1. **Retrying Non-Retryable Errors**: Retrying 4xx errors (client errors) -2. **No Timeout**: Missing timeouts on external calls -3. **No Fallback**: No graceful degradation strategy -4. **Too Many Retries**: Excessive retries causing performance issues - -## Resources - -- [Circuit Breaker](../../services/iam-service/src/modules/common/circuit-breaker.ts) - Circuit breaker implementation diff --git a/apps/web-docs/content/docs/en/skills/security.md b/apps/web-docs/content/docs/en/skills/security.md deleted file mode 100644 index c7a082dc..00000000 --- a/apps/web-docs/content/docs/en/skills/security.md +++ /dev/null @@ -1,925 +0,0 @@ ---- -name: security -description: Security best practices and patterns for GoodGo microservices platform. Use when implementing authentication, authorization, data protection, input validation, rate limiting, secrets management, or security testing across all services. ---- - -# Security Patterns for GoodGo Microservices - -## When to Use This Skill - -Use this skill when: -- Implementing authentication and authorization in any service -- Protecting sensitive data (PII, credentials, tokens) -- Validating user inputs and file uploads -- Implementing rate limiting and DDoS protection -- Setting up audit logging and security monitoring -- Encrypting data at rest and in transit -- Managing secrets and credentials -- Implementing security testing -- Handling security incidents -- Designing secure API endpoints - -## Core Security Principles - -1. **Defense in Depth**: Multiple layers of security controls -2. **Least Privilege**: Grant minimum required permissions -3. **Fail Secure**: Default to deny access -4. **Separation of Duties**: Critical operations require multiple approvals -5. **Audit Everything**: Log all security-relevant events -6. **Encrypt Sensitive Data**: PII, tokens, credentials must be encrypted -7. **Validate All Inputs**: Never trust user input -8. **Principle of Least Exposure**: Minimize attack surface -9. **Secure by Default**: Security built-in, not bolted on -10. **Assume Breach**: Design for detection and response - -## Authentication & Authorization - -### JWT Token Validation - -The following diagram illustrates the authentication flow when a client makes a request with a JWT token: - -```mermaid -sequenceDiagram - participant Client - participant Middleware as Auth Middleware - participant JWTService as JWT Service - participant Request as Express Request - - Client->>Middleware: HTTP Request with Token - Middleware->>Middleware: Extract token from
Authorization header or cookie - - alt Token not found - Middleware->>Client: 401 Unauthorized
(AUTH_REQUIRED) - else Token found - Middleware->>JWTService: verifyAccessToken(token) - - alt Token invalid or expired - JWTService->>Middleware: Verification failed - Middleware->>Client: 401 Unauthorized
(INVALID_TOKEN) - else Token valid - JWTService->>Middleware: Payload (sub, email, roles, permissions) - Middleware->>Request: Attach user to req.user - Middleware->>Client: Continue to next middleware - end - end -``` - -```typescript -// src/middlewares/auth.middleware.ts -import { Request, Response, NextFunction } from 'express'; -import { jwtService } from '@goodgo/auth-sdk'; -import { logger } from '@goodgo/logger'; - -export const authenticate = () => { - return async (req: Request, res: Response, next: NextFunction) => { - try { - // Extract token from Authorization header or cookie - let token: string | null = null; - - const authHeader = req.headers.authorization; - if (authHeader?.startsWith('Bearer ')) { - token = authHeader.substring(7); - } else if (req.cookies?.access_token) { - token = req.cookies.access_token; - } - - if (!token) { - return res.status(401).json({ - success: false, - error: { code: 'AUTH_REQUIRED', message: 'Authentication required' } - }); - } - - // Verify token - const payload = await jwtService.verifyAccessToken(token); - - // Attach user to request - req.user = { - id: payload.sub, - userId: payload.sub, - email: payload.email, - roles: payload.roles || [], - permissions: payload.permissions || [] - }; - - next(); - } catch (error) { - logger.warn('Authentication failed', { error: error.message }); - return res.status(401).json({ - success: false, - error: { code: 'INVALID_TOKEN', message: 'Invalid or expired token' } - }); - } - }; -}; -``` - -### Role-Based Authorization - -The authorization decision flow determines whether a user has the required permissions to access a resource: - -```mermaid -flowchart TD - Start([Request Received]) --> CheckAuth{User
Authenticated?} - - CheckAuth -->|No| Return401[Return 401
AUTH_REQUIRED] - CheckAuth -->|Yes| CheckType{Authorization
Type?} - - CheckType -->|Role-Based| CheckRole{User has
Required Role?} - CheckType -->|Permission-Based| CheckPermission{User has
Resource:Action
Permission?} - CheckType -->|Ownership| CheckOwnership{Resource ID
matches User ID?} - - CheckRole -->|No| LogDenial[Log Permission Denied
with user roles] - CheckPermission -->|No| LogDenial - CheckOwnership -->|No| LogDenial - - LogDenial --> Return403[Return 403
FORBIDDEN] - - CheckRole -->|Yes| Allow[Allow Request
Continue to Handler] - CheckPermission -->|Yes| Allow - CheckOwnership -->|Yes| Allow - - Return401 --> End([End]) - Return403 --> End - Allow --> End - - style CheckAuth fill:#e1f5ff - style CheckType fill:#e1f5ff - style Return401 fill:#ffebee - style Return403 fill:#ffebee - style Allow fill:#e8f5e9 -``` - -```typescript -// src/middlewares/rbac.middleware.ts -export const requireRole = (...allowedRoles: string[]) => { - return (req: Request, res: Response, next: NextFunction) => { - if (!req.user) { - return res.status(401).json({ - success: false, - error: { code: 'AUTH_REQUIRED', message: 'Authentication required' } - }); - } - - const userRoles = req.user.roles || []; - const hasRole = userRoles.some(role => allowedRoles.includes(role)); - - if (!hasRole) { - logger.warn('Access denied - insufficient role', { - userId: req.user.id, - userRoles, - requiredRoles: allowedRoles - }); - - return res.status(403).json({ - success: false, - error: { code: 'FORBIDDEN', message: 'Insufficient permissions' } - }); - } - - next(); - }; -}; - -// Permission-based authorization -export const requirePermission = (resource: string, action: string) => { - return async (req: Request, res: Response, next: NextFunction) => { - if (!req.user) { - return res.status(401).json({ - success: false, - error: { code: 'AUTH_REQUIRED', message: 'Authentication required' } - }); - } - - const permission = `${resource}:${action}`; - const hasPermission = req.user.permissions?.includes(permission); - - if (!hasPermission) { - logger.warn('Access denied - insufficient permission', { - userId: req.user.id, - required: permission - }); - - return res.status(403).json({ - success: false, - error: { code: 'FORBIDDEN', message: 'Insufficient permissions' } - }); - } - - next(); - }; -}; - -// Usage in routes -router.post( - '/api/v1/users', - authenticate(), - requirePermission('users', 'create'), - userController.create -); -``` - -### Resource Ownership Validation - -```typescript -// Ensure users can only access their own resources -export const requireOwnership = (resourceIdParam: string = 'id') => { - return (req: Request, res: Response, next: NextFunction) => { - const resourceId = req.params[resourceIdParam]; - const userId = req.user?.id; - - if (resourceId !== userId) { - logger.warn('Access denied - resource ownership mismatch', { - userId, - resourceId - }); - - return res.status(403).json({ - success: false, - error: { code: 'FORBIDDEN', message: 'Access denied' } - }); - } - - next(); - }; -}; -``` - -## Data Protection - -### Encryption Service - -The encryption and decryption flow for protecting sensitive data at rest: - -```mermaid -sequenceDiagram - participant Service - participant EncryptionService - participant Crypto as Node.js Crypto - participant Database - - Note over Service,Database: Encryption Flow - Service->>EncryptionService: encrypt(plaintext) - EncryptionService->>Crypto: Generate random IV
(16 bytes) - EncryptionService->>Crypto: Create cipher
(AES-256-GCM) - EncryptionService->>Crypto: Encrypt plaintext - Crypto->>EncryptionService: Encrypted data + Auth Tag - EncryptionService->>Service: Format: iv:tag:ciphertext - Service->>Database: Store encrypted data - - Note over Service,Database: Decryption Flow - Service->>Database: Retrieve encrypted data - Database->>Service: iv:tag:ciphertext - Service->>EncryptionService: decrypt(encryptedText) - EncryptionService->>EncryptionService: Split iv, tag, ciphertext - EncryptionService->>Crypto: Create decipher
(AES-256-GCM) - EncryptionService->>Crypto: Set auth tag - EncryptionService->>Crypto: Decrypt ciphertext - Crypto->>EncryptionService: Plaintext - EncryptionService->>Service: Return plaintext -``` - -```typescript -// src/core/security/encryption.service.ts -import crypto from 'crypto'; - -const ALGORITHM = 'aes-256-gcm'; -const IV_LENGTH = 16; -const TAG_LENGTH = 16; - -export class EncryptionService { - private getKey(): Buffer { - const secret = process.env.ENCRYPTION_KEY; - if (!secret || secret.length < 32) { - throw new Error('ENCRYPTION_KEY must be at least 32 characters'); - } - return crypto.scryptSync(secret, 'salt', 32); - } - - encrypt(text: string): string { - const key = this.getKey(); - const iv = crypto.randomBytes(IV_LENGTH); - const cipher = crypto.createCipheriv(ALGORITHM, key, iv); - - let encrypted = cipher.update(text, 'utf8', 'hex'); - encrypted += cipher.final('hex'); - const tag = cipher.getAuthTag(); - - return `${iv.toString('hex')}:${tag.toString('hex')}:${encrypted}`; - } - - decrypt(encryptedText: string): string { - const [ivHex, tagHex, encrypted] = encryptedText.split(':'); - const iv = Buffer.from(ivHex, 'hex'); - const tag = Buffer.from(tagHex, 'hex'); - - const key = this.getKey(); - const decipher = crypto.createDecipheriv(ALGORITHM, key, iv); - decipher.setAuthTag(tag); - - let decrypted = decipher.update(encrypted, 'hex', 'utf8'); - decrypted += decipher.final('utf8'); - return decrypted; - } -} - -// Usage: Encrypt PII before storing -const encryption = new EncryptionService(); -const encryptedPhone = encryption.encrypt(user.phone); -``` - -### Password Hashing - -The password hashing and verification flow: - -```mermaid -sequenceDiagram - participant User - participant Service - participant PasswordService - participant Bcrypt - participant Database - - Note over User,Database: Registration/Password Change - User->>Service: Submit password - Service->>PasswordService: hash(password) - PasswordService->>Bcrypt: bcrypt.hash(password, 12) - Bcrypt->>Bcrypt: Generate salt - Bcrypt->>Bcrypt: Hash with cost factor 12 - Bcrypt->>PasswordService: Hashed password - PasswordService->>Service: Return hash - Service->>Database: Store passwordHash - Service->>Service: Sanitize password
before logging - - Note over User,Database: Login Verification - User->>Service: Submit credentials - Service->>Database: Fetch user by email - Database->>Service: User with passwordHash - Service->>PasswordService: verify(password, hash) - PasswordService->>Bcrypt: bcrypt.compare(password, hash) - Bcrypt->>PasswordService: Boolean result - PasswordService->>Service: Return verification result - - alt Password matches - Service->>User: Authentication success - else Password mismatch - Service->>User: Invalid credentials
(generic error) - end -``` - -```typescript -// Always use bcrypt with appropriate cost factor -import bcrypt from 'bcrypt'; - -const SALT_ROUNDS = 12; // Production: 12, Development: 10 - -export class PasswordService { - async hash(password: string): Promise { - return bcrypt.hash(password, SALT_ROUNDS); - } - - async verify(password: string, hash: string): Promise { - return bcrypt.compare(password, hash); - } - - // Never log passwords - sanitizeForLogging(data: any): any { - const sanitized = { ...data }; - if (sanitized.password) sanitized.password = '[REDACTED]'; - if (sanitized.passwordHash) sanitized.passwordHash = '[REDACTED]'; - return sanitized; - } -} -``` - -### Token Hashing - -```typescript -// Hash tokens before storing in database -import crypto from 'crypto'; - -export class TokenService { - hashToken(token: string): string { - const salt = process.env.TOKEN_SALT || 'default-salt-change-in-production'; - return crypto - .createHash('sha256') - .update(token + salt) - .digest('hex'); - } - - generateSecureToken(length: number = 32): string { - return crypto.randomBytes(length).toString('hex'); - } -} -``` - -## Input Validation - -### Zod Schema Validation - -```typescript -// Always validate inputs with Zod -import { z } from 'zod'; - -// DTO with validation -export const CreateUserDto = z.object({ - email: z.string().email('Invalid email format'), - password: z.string() - .min(8, 'Password must be at least 8 characters') - .regex(/[A-Z]/, 'Password must contain uppercase letter') - .regex(/[a-z]/, 'Password must contain lowercase letter') - .regex(/[0-9]/, 'Password must contain number') - .regex(/[^A-Za-z0-9]/, 'Password must contain special character'), - phone: z.string() - .regex(/^\+[1-9]\d{1,14}$/, 'Invalid phone format (E.164)') - .optional(), - name: z.string().min(1).max(255) -}); - -// In controller -export class UserController { - async create(req: Request, res: Response) { - try { - const dto = CreateUserDto.parse(req.body); - const user = await this.service.create(dto); - res.status(201).json({ success: true, data: user }); - } catch (error) { - if (error instanceof z.ZodError) { - return res.status(400).json({ - success: false, - error: { - code: 'VALIDATION_ERROR', - message: 'Invalid input data', - details: error.errors - } - }); - } - throw error; - } - } -} -``` - -### File Upload Validation - -```typescript -// Validate file uploads -import fileType from 'file-type'; - -export class FileValidationService { - private readonly MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB - private readonly ALLOWED_TYPES = ['image/jpeg', 'image/png', 'application/pdf']; - - async validateFile(file: Express.Multer.File): Promise { - // Size check - if (file.size > this.MAX_FILE_SIZE) { - throw new HttpError(400, 'FILE_TOO_LARGE', 'File exceeds maximum size'); - } - - // Type check - if (!this.ALLOWED_TYPES.includes(file.mimetype)) { - throw new HttpError(400, 'INVALID_FILE_TYPE', 'File type not allowed'); - } - - // Content validation (prevent MIME type spoofing) - const type = await fileType.fromBuffer(file.buffer); - if (!type || !this.ALLOWED_TYPES.includes(type.mime)) { - throw new HttpError(400, 'INVALID_FILE_CONTENT', 'File content mismatch'); - } - - // TODO: Add virus scanning for production - } -} -``` - -### SQL Injection Prevention - -```typescript -// Always use Prisma parameterized queries (automatic) -// Never use string concatenation for queries - -// ❌ BAD - Never do this -const query = `SELECT * FROM users WHERE email = '${email}'`; - -// ✅ GOOD - Use Prisma -const user = await prisma.user.findUnique({ - where: { email } -}); - -// ✅ GOOD - For dynamic queries -const where: any = {}; -if (email) where.email = email; -if (status) where.status = status; - -const users = await prisma.user.findMany({ where }); -``` - -## Rate Limiting - -```typescript -// Implement rate limiting for all endpoints -import rateLimit from 'express-rate-limit'; -import RedisStore from 'rate-limit-redis'; -import Redis from 'ioredis'; - -const redis = new Redis(process.env.REDIS_URL); - -// Standard rate limit -export const standardLimiter = rateLimit({ - store: new RedisStore({ - client: redis, - prefix: 'rl:standard:' - }), - windowMs: 15 * 60 * 1000, // 15 minutes - max: 100, // 100 requests per window - message: 'Too many requests, please try again later', - standardHeaders: true, - legacyHeaders: false -}); - -// Strict rate limit for sensitive operations -export const strictLimiter = rateLimit({ - store: new RedisStore({ - client: redis, - prefix: 'rl:strict:' - }), - windowMs: 60 * 60 * 1000, // 1 hour - max: 10, - message: 'Rate limit exceeded for this operation' -}); - -// Login-specific rate limit -export const loginLimiter = rateLimit({ - store: new RedisStore({ - client: redis, - prefix: 'rl:login:' - }), - windowMs: 15 * 60 * 1000, - max: 5, // 5 login attempts per 15 minutes - skipSuccessfulRequests: true, - message: 'Too many login attempts, please try again later' -}); - -// Usage -router.post('/api/v1/auth/login', loginLimiter, authController.login); -router.post('/api/v1/users', authenticate(), strictLimiter, userController.create); -``` - -## Error Handling Security - -```typescript -// Sanitize error messages to prevent information disclosure -export class SecureErrorHandler { - handleError(error: Error, req: Request, res: Response) { - const isDev = process.env.NODE_ENV === 'development'; - const isProd = process.env.NODE_ENV === 'production'; - - // Log full error internally - logger.error('Request error', { - error: error.message, - stack: error.stack, - path: req.path, - method: req.method, - userId: req.user?.id - }); - - // Don't expose user existence - if (error.message.includes('user not found') || - error.message.includes('invalid credentials')) { - return res.status(401).json({ - success: false, - error: { - code: 'INVALID_CREDENTIALS', - message: 'Invalid email or password' - } - }); - } - - // Validation errors - safe to expose - if (error instanceof z.ZodError) { - return res.status(400).json({ - success: false, - error: { - code: 'VALIDATION_ERROR', - message: 'Invalid input data', - details: error.errors - } - }); - } - - // Generic errors for production - if (isProd) { - return res.status(500).json({ - success: false, - error: { - code: 'INTERNAL_ERROR', - message: 'An error occurred. Please try again later.' - } - }); - } - - // Detailed errors only in development - return res.status(500).json({ - success: false, - error: { - code: 'INTERNAL_ERROR', - message: error.message, - stack: isDev ? error.stack : undefined - } - }); - } -} -``` - -## Secrets Management - -```typescript -// Never hardcode secrets -// Always use environment variables with validation -import { z } from 'zod'; - -const secretsSchema = z.object({ - JWT_SECRET: z.string().min(32, 'JWT_SECRET must be at least 32 characters'), - JWT_REFRESH_SECRET: z.string().min(32), - DATABASE_URL: z.string().url(), - REDIS_URL: z.string().url().optional(), - ENCRYPTION_KEY: z.string().min(32).optional() -}); - -export const secrets = secretsSchema.parse(process.env); - -// For production, use secret management: -// - AWS Secrets Manager -// - HashiCorp Vault -// - Kubernetes Secrets -// - Azure Key Vault - -// Rotate secrets regularly (quarterly recommended) -``` - -## Audit Logging - -```typescript -// Log all security-relevant events -export class AuditService { - async logSecurityEvent( - event: string, - userId: string | null, - details: Record, - req?: Request - ) { - await this.prisma.auditLog.create({ - data: { - event, - userId, - type: 'SECURITY', - details: this.sanitizeDetails(details), - ipAddress: req?.ip || details.ipAddress, - userAgent: req?.get('user-agent'), - timestamp: new Date() - } - }); - } - - // Sanitize PII from logs - private sanitizeDetails(details: Record): Record { - const sensitive = ['password', 'token', 'secret', 'ssn', 'creditCard']; - const sanitized = { ...details }; - - for (const key of sensitive) { - if (sanitized[key]) { - sanitized[key] = '[REDACTED]'; - } - } - - return sanitized; - } -} - -// Usage -await auditService.logSecurityEvent('LOGIN_SUCCESS', user.id, { - email: user.email, - ipAddress: req.ip -}, req); - -await auditService.logSecurityEvent('PERMISSION_DENIED', user.id, { - resource: 'users', - action: 'delete', - targetId: targetUserId -}, req); -``` - -## Security Headers - -```typescript -// Add security headers middleware -import helmet from 'helmet'; - -app.use(helmet({ - contentSecurityPolicy: { - directives: { - defaultSrc: ["'self'"], - styleSrc: ["'self'", "'unsafe-inline'"], - scriptSrc: ["'self'"], - imgSrc: ["'self'", "data:", "https:"] - } - }, - hsts: { - maxAge: 31536000, - includeSubDomains: true, - preload: true - } -})); - -// Additional headers -app.use((req, res, next) => { - res.setHeader('X-Content-Type-Options', 'nosniff'); - res.setHeader('X-Frame-Options', 'DENY'); - res.setHeader('X-XSS-Protection', '1; mode=block'); - res.setHeader('Referrer-Policy', 'strict-origin-when-cross-origin'); - next(); -}); -``` - -## CORS Configuration - -```typescript -// Configure CORS securely -import cors from 'cors'; - -const allowedOrigins = process.env.CORS_ORIGIN?.split(',') || []; - -app.use(cors({ - origin: (origin, callback) => { - if (!origin || allowedOrigins.includes(origin)) { - callback(null, true); - } else { - callback(new Error('Not allowed by CORS')); - } - }, - credentials: true, - methods: ['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'OPTIONS'], - allowedHeaders: ['Content-Type', 'Authorization'], - exposedHeaders: ['X-Request-ID'], - maxAge: 86400 // 24 hours -})); -``` - -## Security Testing - -```typescript -// Security test patterns -describe('Security Tests', () => { - it('should prevent SQL injection', async () => { - const maliciousInput = "'; DROP TABLE users; --"; - const response = await request(app) - .get(`/api/v1/users?search=${encodeURIComponent(maliciousInput)}`) - .set('Authorization', `Bearer ${token}`); - - expect(response.status).not.toBe(500); - // Should return 400 or empty results, not crash - }); - - it('should prevent XSS attacks', async () => { - const xssPayload = ''; - const response = await request(app) - .post('/api/v1/users') - .send({ email: xssPayload, password: 'test123' }); - - // Response should sanitize or reject - expect(response.body.data?.email).not.toContain('