diff --git a/.env.example b/.env.example index 393fab7..d9d1312 100644 --- a/.env.example +++ b/.env.example @@ -32,6 +32,19 @@ REDIS_PORT=6379 REDIS_PASSWORD=CHANGE_ME_IN_PRODUCTION REDIS_URL=redis://:${REDIS_PASSWORD}@${REDIS_HOST}:${REDIS_PORT} +# ----------------------------------------------------------------------------- +# Redis — Queue (BullMQ) +# +# RFC-004 Phase 3: the async backbone (BullMQ) can point at a Redis instance +# separate from cache / throttler / websocket to keep hot cache traffic from +# starving queue operations. If unset, queue traffic falls back to the cache +# REDIS_* vars above (single-instance dev and small deployments keep working +# unchanged). +# ----------------------------------------------------------------------------- +# REDIS_QUEUE_HOST= +# REDIS_QUEUE_PORT= +# REDIS_QUEUE_PASSWORD= + # ----------------------------------------------------------------------------- # Typesense # ----------------------------------------------------------------------------- diff --git a/apps/api/package.json b/apps/api/package.json index 9839f9e..67148eb 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -16,6 +16,9 @@ "@anthropic-ai/sdk": "^0.89.0", "@aws-sdk/client-s3": "^3.1026.0", "@aws-sdk/s3-request-presigner": "^3.1026.0", + "@bull-board/api": "^7.0.0", + "@bull-board/express": "^7.0.0", + "@bull-board/nestjs": "^7.0.0", "@goodgo/mcp-servers": "workspace:*", "@goodgo/contracts-events": "workspace:*", "@nest-lab/throttler-storage-redis": "^1.2.0", @@ -50,6 +53,7 @@ "handlebars": "^4.7.9", "helmet": "^8.1.0", "ioredis": "^5.4.0", + "jsonwebtoken": "^9.0.3", "nodemailer": "^8.0.5", "otplib": "^13.4.0", "passport": "^0.7.0", @@ -76,6 +80,7 @@ "@types/bcrypt": "^6.0.0", "@types/cookie-parser": "^1.4.10", "@types/express": "^5.0.0", + "@types/jsonwebtoken": "^9.0.10", "@types/node": "^25.5.2", "@types/nodemailer": "^8.0.0", "@types/passport-google-oauth20": "^2.0.17", diff --git a/apps/api/src/app.module.ts b/apps/api/src/app.module.ts index cc98d4e..b58601b 100644 --- a/apps/api/src/app.module.ts +++ b/apps/api/src/app.module.ts @@ -22,6 +22,7 @@ import { HttpMetricsInterceptor, MetricsModule } from '@modules/metrics'; import { NotificationsModule } from '@modules/notifications'; import { PaymentsModule } from '@modules/payments'; import { ProjectsModule } from '@modules/projects'; +import { QueuesModule } from '@modules/queues/queues.module'; import { ReportsModule } from '@modules/reports'; import { ReviewsModule } from '@modules/reviews'; import { SearchModule } from '@modules/search'; @@ -29,6 +30,7 @@ import { SharedModule } from '@modules/shared'; import { ThrottlerBehindProxyGuard } from '@modules/shared/infrastructure/guards/throttler-behind-proxy.guard'; import { CsrfMiddleware } from '@modules/shared/infrastructure/middleware/csrf.middleware'; import { SanitizeInputMiddleware } from '@modules/shared/infrastructure/middleware/sanitize-input.middleware'; +import { getRedisConnection } from '@modules/shared/infrastructure/redis-connection.config'; import { SubscriptionsModule } from '@modules/subscriptions'; import { TransferModule } from '@modules/transfer'; import { AppController } from './app.controller'; @@ -37,11 +39,11 @@ import { AppController } from './app.controller'; imports: [ SentryModule.forRoot(), BullModule.forRoot({ - connection: { - host: process.env['REDIS_HOST'] ?? 'localhost', - port: Number(process.env['REDIS_PORT'] ?? 6379), - password: process.env['REDIS_PASSWORD'] ?? undefined, - }, + // RFC-004 Phase 3 — use the queue-specific Redis connection so ops can + // split cache traffic from queue traffic without a code change. Falls + // back to REDIS_HOST/PORT/PASSWORD when the queue-specific vars are + // unset. See shared/infrastructure/redis-connection.config.ts. + connection: getRedisConnection('queue'), }), CqrsModule.forRoot(), ScheduleModule.forRoot(), @@ -61,6 +63,7 @@ import { AppController } from './app.controller'; AdminModule, AnalyticsModule, MetricsModule, + MetricsModule.withQueueMetrics(), McpIntegrationModule, MessagingModule, ReportsModule, @@ -68,6 +71,9 @@ import { AppController } from './app.controller'; IndustrialModule, TransferModule, + // ── Bull Board UI (RFC-004 Phase 3 WS3b) ── + QueuesModule, + // ── Rate Limiting ── // Default: 60 requests per 60 seconds per IP // Override per-route with @Throttle() decorator @@ -142,6 +148,8 @@ export class AppModule implements NestModule { { path: 'health/(.*)', method: RequestMethod.GET }, { path: 'api/v1/web-vitals', method: RequestMethod.POST }, // sendBeacon cannot send CSRF headers { path: 'web-vitals', method: RequestMethod.POST }, // middleware exclude uses controller-relative path + { path: 'api/v1/admin/queues', method: RequestMethod.ALL }, + { path: 'api/v1/admin/queues/(.*)', method: RequestMethod.ALL }, ) .forRoutes('*'); } diff --git a/apps/api/src/modules/auth/application/__tests__/login-user.handler.spec.ts b/apps/api/src/modules/auth/application/__tests__/login-user.handler.spec.ts index 18b6319..276fc06 100644 --- a/apps/api/src/modules/auth/application/__tests__/login-user.handler.spec.ts +++ b/apps/api/src/modules/auth/application/__tests__/login-user.handler.spec.ts @@ -5,6 +5,8 @@ describe('LoginUserHandler', () => { let handler: LoginUserHandler; let mockTokenService: { generateTokenPair: ReturnType }; let mockChallengeRepo: { create: ReturnType }; + let mockUserRepo: { updateMfaGraceStartedAt: ReturnType }; + let mockLogger: { error: ReturnType; warn: ReturnType }; const tokenPair = { accessToken: 'access-jwt', @@ -15,22 +17,30 @@ describe('LoginUserHandler', () => { beforeEach(() => { mockTokenService = { generateTokenPair: vi.fn().mockResolvedValue(tokenPair) }; mockChallengeRepo = { create: vi.fn().mockResolvedValue({}) }; - handler = new LoginUserHandler(mockTokenService as any, mockChallengeRepo as any); + mockUserRepo = { updateMfaGraceStartedAt: vi.fn().mockResolvedValue(undefined) }; + mockLogger = { error: vi.fn(), warn: vi.fn() }; + handler = new LoginUserHandler( + mockTokenService as any, + mockChallengeRepo as any, + mockUserRepo as any, + mockLogger as any, + ); }); - it('generates token pair with correct payload when MFA not required', async () => { + it('generates token pair with mfa=none for non-required role when MFA not required', async () => { const command = new LoginUserCommand('user-1', '0912345678', 'BUYER', false); const result = await handler.execute(command); - expect(result).toEqual({ requiresMfa: false, tokens: tokenPair }); + expect(result).toEqual({ requiresMfa: false, tokens: tokenPair, mfaGraceRemainingDays: undefined }); expect(mockTokenService.generateTokenPair).toHaveBeenCalledWith({ sub: 'user-1', phone: '0912345678', role: 'BUYER', + mfa: 'none', }); }); - it('creates MFA challenge when MFA is required', async () => { + it('creates MFA challenge when MFA is required (user already enrolled)', async () => { const command = new LoginUserCommand('user-1', '0912345678', 'BUYER', true); const result = await handler.execute(command); @@ -49,7 +59,7 @@ describe('LoginUserHandler', () => { ); }); - it('passes AGENT role correctly', async () => { + it('AGENT role does not require MFA — issues mfa=none claim', async () => { const command = new LoginUserCommand('user-2', '0987654321', 'AGENT'); await handler.execute(command); @@ -57,17 +67,51 @@ describe('LoginUserHandler', () => { sub: 'user-2', phone: '0987654321', role: 'AGENT', + mfa: 'none', }); }); - it('passes ADMIN role correctly', async () => { - const command = new LoginUserCommand('admin-1', '0901234567', 'ADMIN'); - await handler.execute(command); + it('ADMIN without TOTP enters grace period on first login under enforcement', async () => { + const command = new LoginUserCommand( + 'admin-1', + '0901234567', + 'ADMIN', + false, + false, // totpEnabled + null, // mfaGraceStartedAt — first login + ); + const result = await handler.execute(command); + // Grace was started lazily + expect(mockUserRepo.updateMfaGraceStartedAt).toHaveBeenCalledWith('admin-1', expect.any(Date)); + expect(result.mfaGraceRemainingDays).toBe(14); expect(mockTokenService.generateTokenPair).toHaveBeenCalledWith({ sub: 'admin-1', phone: '0901234567', role: 'ADMIN', + mfa: 'grace', + }); + }); + + it('ADMIN past grace window receives mfa=enrollment_required claim', async () => { + const longAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000); // 30 days ago + const command = new LoginUserCommand( + 'admin-1', + '0901234567', + 'ADMIN', + false, + false, + longAgo, + ); + const result = await handler.execute(command); + + expect(mockUserRepo.updateMfaGraceStartedAt).not.toHaveBeenCalled(); + expect(result.mfaGraceRemainingDays).toBe(0); + expect(mockTokenService.generateTokenPair).toHaveBeenCalledWith({ + sub: 'admin-1', + phone: '0901234567', + role: 'ADMIN', + mfa: 'enrollment_required', }); }); }); diff --git a/apps/api/src/modules/auth/application/commands/login-user/login-user.command.ts b/apps/api/src/modules/auth/application/commands/login-user/login-user.command.ts index 46422df..0e36eff 100644 --- a/apps/api/src/modules/auth/application/commands/login-user/login-user.command.ts +++ b/apps/api/src/modules/auth/application/commands/login-user/login-user.command.ts @@ -4,5 +4,7 @@ export class LoginUserCommand { public readonly phone: string, public readonly role: string, public readonly isMfaRequired: boolean = false, + public readonly totpEnabled: boolean = false, + public readonly mfaGraceStartedAt: Date | null = null, ) {} } diff --git a/apps/api/src/modules/auth/application/commands/login-user/login-user.handler.ts b/apps/api/src/modules/auth/application/commands/login-user/login-user.handler.ts index bcca12a..349d247 100644 --- a/apps/api/src/modules/auth/application/commands/login-user/login-user.handler.ts +++ b/apps/api/src/modules/auth/application/commands/login-user/login-user.handler.ts @@ -1,12 +1,18 @@ import { Inject, InternalServerErrorException } from '@nestjs/common'; import { CommandHandler, type ICommandHandler } from '@nestjs/cqrs'; +import { type UserRole } from '@prisma/client'; import { createId } from '@paralleldrive/cuid2'; import { LoggerService, DomainException } from '@modules/shared'; +import { MFA_GRACE_PERIOD_DAYS, MFA_REQUIRED_ROLES } from '../../../domain/mfa-policy'; import { MFA_CHALLENGE_REPOSITORY, type IMfaChallengeRepository, } from '../../../domain/repositories/mfa-challenge.repository'; -import { TokenService, type TokenPair } from '../../../infrastructure/services/token.service'; +import { + USER_REPOSITORY, + type IUserRepository, +} from '../../../domain/repositories/user.repository'; +import { TokenService, type MfaClaim, type TokenPair } from '../../../infrastructure/services/token.service'; import { LoginUserCommand } from './login-user.command'; const MFA_CHALLENGE_TTL_MINUTES = 5; @@ -15,6 +21,7 @@ export interface LoginResult { requiresMfa: boolean; challengeId?: string; tokens?: TokenPair; + mfaGraceRemainingDays?: number; } @CommandHandler(LoginUserCommand) @@ -23,12 +30,14 @@ export class LoginUserHandler implements ICommandHandler { private readonly tokenService: TokenService, @Inject(MFA_CHALLENGE_REPOSITORY) private readonly challengeRepo: IMfaChallengeRepository, + @Inject(USER_REPOSITORY) + private readonly userRepo: IUserRepository, private readonly logger: LoggerService, ) {} async execute(command: LoginUserCommand): Promise { try { - // If MFA is required, create a challenge instead of tokens + // If MFA is required (user already enrolled), create a challenge if (command.isMfaRequired) { const challengeId = createId(); const expiresAt = new Date(); @@ -50,16 +59,32 @@ export class LoginUserHandler implements ICommandHandler { }; } - // No MFA — issue tokens directly + // Determine MFA claim for non-enrolled users + const roleRequiresMfa = MFA_REQUIRED_ROLES.includes(command.role as UserRole); + + let mfaClaim: MfaClaim = 'none'; + let mfaGraceRemainingDays: number | undefined; + + if (roleRequiresMfa && !command.totpEnabled) { + const result = await this.resolveMfaGraceClaim( + command.userId, + command.mfaGraceStartedAt, + ); + mfaClaim = result.claim; + mfaGraceRemainingDays = result.remainingDays; + } + const tokens = await this.tokenService.generateTokenPair({ sub: command.userId, phone: command.phone, role: command.role, + mfa: mfaClaim, }); return { requiresMfa: false, tokens, + mfaGraceRemainingDays, }; } catch (error) { if (error instanceof DomainException) throw error; @@ -71,5 +96,33 @@ export class LoginUserHandler implements ICommandHandler { throw new InternalServerErrorException('Không thể tạo phiên đăng nhập, vui lòng thử lại'); } } -} + /** + * Lazy-initialises mfaGraceStartedAt if the role requires MFA but + * the user hasn't enrolled yet. Returns the appropriate MFA claim + * and the number of grace days remaining (if any). + */ + private async resolveMfaGraceClaim( + userId: string, + mfaGraceStartedAt: Date | null, + ): Promise<{ claim: MfaClaim; remainingDays?: number }> { + const now = new Date(); + + if (!mfaGraceStartedAt) { + // First login since enforcement — start the grace period + await this.userRepo.updateMfaGraceStartedAt(userId, now); + return { claim: 'grace', remainingDays: MFA_GRACE_PERIOD_DAYS }; + } + + const elapsedMs = now.getTime() - mfaGraceStartedAt.getTime(); + const elapsedDays = elapsedMs / (1000 * 60 * 60 * 24); + const remainingDays = Math.max(0, Math.ceil(MFA_GRACE_PERIOD_DAYS - elapsedDays)); + + if (remainingDays > 0) { + return { claim: 'grace', remainingDays }; + } + + // Grace period expired — enrollment is now mandatory + return { claim: 'enrollment_required', remainingDays: 0 }; + } +} diff --git a/apps/api/src/modules/auth/domain/entities/user.entity.ts b/apps/api/src/modules/auth/domain/entities/user.entity.ts index ecd5180..b42b627 100644 --- a/apps/api/src/modules/auth/domain/entities/user.entity.ts +++ b/apps/api/src/modules/auth/domain/entities/user.entity.ts @@ -22,6 +22,8 @@ export interface UserProps { totpEnabled: boolean; totpBackupCodes: string[]; totpEnabledAt: Date | null; + mfaGraceStartedAt: Date | null; + mfaLastVerifiedAt: Date | null; } export class UserEntity extends AggregateRoot { @@ -39,6 +41,8 @@ export class UserEntity extends AggregateRoot { private _totpEnabled: boolean; private _totpBackupCodes: string[]; private _totpEnabledAt: Date | null; + private _mfaGraceStartedAt: Date | null; + private _mfaLastVerifiedAt: Date | null; constructor(id: string, props: UserProps, createdAt?: Date, updatedAt?: Date) { super(id, createdAt, updatedAt); @@ -56,6 +60,8 @@ export class UserEntity extends AggregateRoot { this._totpEnabled = props.totpEnabled; this._totpBackupCodes = props.totpBackupCodes; this._totpEnabledAt = props.totpEnabledAt; + this._mfaGraceStartedAt = props.mfaGraceStartedAt; + this._mfaLastVerifiedAt = props.mfaLastVerifiedAt; } get email(): Email | null { return this._email; } @@ -72,6 +78,8 @@ export class UserEntity extends AggregateRoot { get totpEnabled(): boolean { return this._totpEnabled; } get totpBackupCodes(): string[] { return this._totpBackupCodes; } get totpEnabledAt(): Date | null { return this._totpEnabledAt; } + get mfaGraceStartedAt(): Date | null { return this._mfaGraceStartedAt; } + get mfaLastVerifiedAt(): Date | null { return this._mfaLastVerifiedAt; } static createNew( id: string, @@ -96,6 +104,8 @@ export class UserEntity extends AggregateRoot { totpEnabled: false, totpBackupCodes: [], totpEnabledAt: null, + mfaGraceStartedAt: null, + mfaLastVerifiedAt: null, }); user.addDomainEvent(new UserRegisteredEvent(id, phone.value, role)); @@ -133,6 +143,8 @@ export class UserEntity extends AggregateRoot { totpEnabled: false, totpBackupCodes: [], totpEnabledAt: null, + mfaGraceStartedAt: null, + mfaLastVerifiedAt: null, }); user.addDomainEvent(new UserRegisteredEvent(id, phone.value, role)); diff --git a/apps/api/src/modules/auth/domain/mfa-policy.ts b/apps/api/src/modules/auth/domain/mfa-policy.ts new file mode 100644 index 0000000..f515d60 --- /dev/null +++ b/apps/api/src/modules/auth/domain/mfa-policy.ts @@ -0,0 +1,28 @@ +import { UserRole } from '@prisma/client'; + +/** + * MFA enrolment policy — central source of truth for which roles require + * TOTP and how long the grace period lasts. + * + * Backed by `User.mfaGraceStartedAt` and `User.mfaLastVerifiedAt` columns. + * + * Policy summary: + * - On first login under enforcement, `mfaGraceStartedAt` is stamped. + * - For `MFA_GRACE_PERIOD_DAYS` after that timestamp, the user keeps full + * access but receives `mfa: 'grace'` in their JWT (UI nudges enrollment). + * - After grace expires, the JWT carries `mfa: 'enrollment_required'` and + * sensitive routes (admin guards) reject until the user enrols. + */ + +/** Roles for which TOTP is mandatory after the grace window expires. */ +export const MFA_REQUIRED_ROLES: ReadonlyArray = ['ADMIN']; + +/** Length of the grace window before MFA enrolment becomes mandatory. */ +export const MFA_GRACE_PERIOD_DAYS = 14; + +/** + * Re-auth window for "step-up" admin operations (e.g. user impersonation, + * mass actions). After this many minutes since `mfaLastVerifiedAt`, the + * admin re-auth interceptor must challenge again. + */ +export const MFA_REAUTH_WINDOW_MINUTES = 15; diff --git a/apps/api/src/modules/auth/domain/repositories/user.repository.ts b/apps/api/src/modules/auth/domain/repositories/user.repository.ts index d916afd..65cc91b 100644 --- a/apps/api/src/modules/auth/domain/repositories/user.repository.ts +++ b/apps/api/src/modules/auth/domain/repositories/user.repository.ts @@ -12,4 +12,6 @@ export interface IUserRepository { updateMfaEnabled(userId: string, enabled: boolean, secret: string, backupCodes: string[]): Promise; updateMfaDisabled(userId: string): Promise; updateBackupCodes(userId: string, backupCodes: string[]): Promise; + updateMfaGraceStartedAt(userId: string, date: Date): Promise; + updateMfaLastVerifiedAt(userId: string, date: Date): Promise; } diff --git a/apps/api/src/modules/auth/infrastructure/__tests__/jwt-rotation.spec.ts b/apps/api/src/modules/auth/infrastructure/__tests__/jwt-rotation.spec.ts new file mode 100644 index 0000000..db0dfa8 --- /dev/null +++ b/apps/api/src/modules/auth/infrastructure/__tests__/jwt-rotation.spec.ts @@ -0,0 +1,27 @@ +import { sign as jwtSign } from 'jsonwebtoken'; +import { describe, it, expect } from 'vitest'; +import { verifyWithRotation, makeSecretOrKeyProvider } from '../utils/jwt-rotation'; + +const P = 'primary-secret-long-enough-for-hmac-signing-32!!'; +const Q = 'previous-secret-long-enough-for-hmac-signing-32!'; +const U = 'unknown-secret-long-enough-for-hmac-signing-32!!'; +const O = { audience: 'goodgo-api', issuer: 'goodgo-platform', expiresIn: '15m' } as const; +const D = { sub: 'u1', phone: '0900000000', role: 'BUYER' }; + +describe('verifyWithRotation', () => { + it('succeeds with primary', () => { expect(verifyWithRotation(jwtSign(D, P, O), P, undefined)).toMatchObject(D); }); + it('falls back to previous', () => { expect(verifyWithRotation(jwtSign(D, Q, O), P, Q)).toMatchObject(D); }); + it('null when both fail', () => { expect(verifyWithRotation(jwtSign(D, U, O), P, Q)).toBeNull(); }); + it('null without previous', () => { expect(verifyWithRotation(jwtSign(D, U, O), P, undefined)).toBeNull(); }); + it('null for expired', () => { expect(verifyWithRotation(jwtSign(D, P, { ...O, expiresIn: '-1s' }), P, undefined)).toBeNull(); }); + it('null for wrong audience', () => { expect(verifyWithRotation(jwtSign(D, P, { ...O, audience: 'x' }), P, undefined)).toBeNull(); }); +}); + +describe('makeSecretOrKeyProvider', () => { + const call = (p: ReturnType, t: string) => + new Promise<{ err: Error | null; secret?: string }>((r) => p({}, t, (e, s) => r({ err: e, secret: s }))); + + it('returns primary for primary-signed', async () => { const r = await call(makeSecretOrKeyProvider(P, Q), jwtSign(D, P, O)); expect(r.secret).toBe(P); }); + it('returns previous for previous-signed', async () => { const r = await call(makeSecretOrKeyProvider(P, Q), jwtSign(D, Q, O)); expect(r.secret).toBe(Q); }); + it('returns primary when both fail', async () => { const r = await call(makeSecretOrKeyProvider(P, Q), jwtSign(D, U, O)); expect(r.secret).toBe(P); }); +}); diff --git a/apps/api/src/modules/auth/infrastructure/__tests__/local.strategy.spec.ts b/apps/api/src/modules/auth/infrastructure/__tests__/local.strategy.spec.ts index 8d01bd1..10a89af 100644 --- a/apps/api/src/modules/auth/infrastructure/__tests__/local.strategy.spec.ts +++ b/apps/api/src/modules/auth/infrastructure/__tests__/local.strategy.spec.ts @@ -160,6 +160,8 @@ describe('LocalStrategy', () => { phone: '+84912345678', role: 'BUYER', isMfaRequired: false, + totpEnabled: false, + mfaGraceStartedAt: undefined, }); }); diff --git a/apps/api/src/modules/auth/infrastructure/__tests__/token.service.spec.ts b/apps/api/src/modules/auth/infrastructure/__tests__/token.service.spec.ts index a84c7b9..c956bef 100644 --- a/apps/api/src/modules/auth/infrastructure/__tests__/token.service.spec.ts +++ b/apps/api/src/modules/auth/infrastructure/__tests__/token.service.spec.ts @@ -1,158 +1,61 @@ +import { sign as jwtSign } from 'jsonwebtoken'; import { type IRefreshTokenRepository, type RefreshTokenRecord } from '../../domain/repositories/refresh-token.repository'; import { TokenService } from '../services/token.service'; +const PRIMARY_SECRET = 'primary-secret-that-is-long-enough-for-tests-32chars!'; +const PREVIOUS_SECRET = 'previous-secret-that-is-long-enough-for-tests-32chars!'; +const JWT_SIGN_OPTS = { audience: 'goodgo-api', issuer: 'goodgo-platform', expiresIn: '15m' } as const; + describe('TokenService', () => { let service: TokenService; let mockJwtService: { sign: ReturnType; verify: ReturnType }; let mockRefreshTokenRepo: { [K in keyof IRefreshTokenRepository]: ReturnType }; - const payload = { sub: 'user-1', phone: '0912345678', role: 'BUYER' }; beforeEach(() => { - mockJwtService = { - sign: vi.fn().mockReturnValue('signed-jwt'), - verify: vi.fn(), - }; - mockRefreshTokenRepo = { - create: vi.fn().mockResolvedValue({} as RefreshTokenRecord), - findByToken: vi.fn(), - revokeByFamily: vi.fn().mockResolvedValue(undefined), - revokeAllForUser: vi.fn().mockResolvedValue(undefined), - deleteExpired: vi.fn(), - }; - - service = new TokenService( - mockJwtService as any, - mockRefreshTokenRepo as any, - ); + process.env['JWT_SECRET'] = PRIMARY_SECRET; + delete process.env['JWT_SECRET_PREVIOUS']; + mockJwtService = { sign: vi.fn().mockReturnValue('signed-jwt'), verify: vi.fn() }; + mockRefreshTokenRepo = { create: vi.fn().mockResolvedValue({} as RefreshTokenRecord), findByToken: vi.fn(), revokeByFamily: vi.fn().mockResolvedValue(undefined), revokeAllForUser: vi.fn().mockResolvedValue(undefined), deleteExpired: vi.fn() }; + service = new TokenService(mockJwtService as any, mockRefreshTokenRepo as any); }); describe('generateTokenPair', () => { it('returns access token, refresh token with family prefix, and expiresIn', async () => { const result = await service.generateTokenPair(payload); - expect(result.accessToken).toBe('signed-jwt'); expect(result.refreshToken).toContain('.'); expect(result.expiresIn).toBe(900); - expect(mockJwtService.sign).toHaveBeenCalledWith(payload); - expect(mockRefreshTokenRepo.create).toHaveBeenCalledWith( - expect.objectContaining({ - userId: 'user-1', - revokedAt: null, - }), - ); }); - it('creates refresh token record with 30-day expiry', async () => { await service.generateTokenPair(payload); - - const createCall = mockRefreshTokenRepo.create.mock.calls[0][0]; - const expiresAt = createCall.expiresAt as Date; - const now = new Date(); - const daysDiff = Math.round((expiresAt.getTime() - now.getTime()) / (1000 * 60 * 60 * 24)); + const expiresAt = mockRefreshTokenRepo.create.mock.calls[0][0].expiresAt as Date; + const daysDiff = Math.round((expiresAt.getTime() - Date.now()) / 86400000); expect(daysDiff).toBeGreaterThanOrEqual(29); expect(daysDiff).toBeLessThanOrEqual(31); }); }); describe('rotateRefreshToken', () => { - const makeExistingToken = (overrides?: Partial): RefreshTokenRecord => ({ - id: 'rt-1', - userId: 'user-1', - token: 'hashed-token', - family: 'old-family', - expiresAt: new Date(Date.now() + 86400000), - revokedAt: null, - createdAt: new Date(), - ...overrides, - }); - - it('rotates valid token: revokes old family, creates new token', async () => { - mockRefreshTokenRepo.findByToken.mockResolvedValue(makeExistingToken()); - mockRefreshTokenRepo.create.mockResolvedValue({} as RefreshTokenRecord); - - const result = await service.rotateRefreshToken('old-family.raw-token-hex'); - - expect(result).not.toBeNull(); - expect(result!.userId).toBe('user-1'); - expect(result!.refreshToken).toContain('.'); - expect(mockRefreshTokenRepo.revokeByFamily).toHaveBeenCalledWith('old-family'); - expect(mockRefreshTokenRepo.create).toHaveBeenCalled(); - }); - - it('returns null for malformed token (no dot separator)', async () => { - const result = await service.rotateRefreshToken('no-dot-separator'); - expect(result).toBeNull(); - }); - - it('returns null and revokes family when token not found (reuse attack)', async () => { - mockRefreshTokenRepo.findByToken.mockResolvedValue(null); - - const result = await service.rotateRefreshToken('suspect-family.unknown-token'); - - expect(result).toBeNull(); - expect(mockRefreshTokenRepo.revokeByFamily).toHaveBeenCalledWith('suspect-family'); - }); - - it('returns null and revokes family when token is already revoked', async () => { - mockRefreshTokenRepo.findByToken.mockResolvedValue( - makeExistingToken({ revokedAt: new Date() }), - ); - - const result = await service.rotateRefreshToken('old-family.revoked-token'); - - expect(result).toBeNull(); - expect(mockRefreshTokenRepo.revokeByFamily).toHaveBeenCalled(); - }); - - it('returns null and revokes family when token is expired', async () => { - mockRefreshTokenRepo.findByToken.mockResolvedValue( - makeExistingToken({ expiresAt: new Date(Date.now() - 86400000) }), - ); - - const result = await service.rotateRefreshToken('old-family.expired-token'); - - expect(result).toBeNull(); - expect(mockRefreshTokenRepo.revokeByFamily).toHaveBeenCalled(); - }); - - it('returns null for empty family segment', async () => { - const result = await service.rotateRefreshToken('.some-raw-token'); - expect(result).toBeNull(); - }); - - it('returns null for empty raw token segment', async () => { - const result = await service.rotateRefreshToken('some-family.'); - expect(result).toBeNull(); - }); + const makeTok = (o?: Partial): RefreshTokenRecord => ({ id: 'rt-1', userId: 'user-1', token: 'h', family: 'old-family', expiresAt: new Date(Date.now() + 86400000), revokedAt: null, createdAt: new Date(), ...o }); + it('rotates valid token', async () => { mockRefreshTokenRepo.findByToken.mockResolvedValue(makeTok()); mockRefreshTokenRepo.create.mockResolvedValue({} as RefreshTokenRecord); const r = await service.rotateRefreshToken('old-family.raw'); expect(r).not.toBeNull(); expect(r!.userId).toBe('user-1'); }); + it('null for malformed', async () => { expect(await service.rotateRefreshToken('nodot')).toBeNull(); }); + it('null + revoke when not found', async () => { mockRefreshTokenRepo.findByToken.mockResolvedValue(null); expect(await service.rotateRefreshToken('f.t')).toBeNull(); expect(mockRefreshTokenRepo.revokeByFamily).toHaveBeenCalledWith('f'); }); + it('null when revoked', async () => { mockRefreshTokenRepo.findByToken.mockResolvedValue(makeTok({ revokedAt: new Date() })); expect(await service.rotateRefreshToken('old-family.t')).toBeNull(); }); + it('null when expired', async () => { mockRefreshTokenRepo.findByToken.mockResolvedValue(makeTok({ expiresAt: new Date(Date.now() - 86400000) })); expect(await service.rotateRefreshToken('old-family.t')).toBeNull(); }); + it('null for empty family', async () => { expect(await service.rotateRefreshToken('.raw')).toBeNull(); }); + it('null for empty raw', async () => { expect(await service.rotateRefreshToken('fam.')).toBeNull(); }); }); - describe('generateAccessToken', () => { - it('delegates to jwtService.sign', () => { - const token = service.generateAccessToken(payload); - expect(token).toBe('signed-jwt'); - expect(mockJwtService.sign).toHaveBeenCalledWith(payload); - }); - }); - - describe('revokeAllUserTokens', () => { - it('revokes all tokens for a user', async () => { - await service.revokeAllUserTokens('user-1'); - expect(mockRefreshTokenRepo.revokeAllForUser).toHaveBeenCalledWith('user-1'); - }); - }); + describe('generateAccessToken', () => { it('delegates to jwtService.sign', () => { expect(service.generateAccessToken(payload)).toBe('signed-jwt'); }); }); + describe('revokeAllUserTokens', () => { it('revokes', async () => { await service.revokeAllUserTokens('user-1'); expect(mockRefreshTokenRepo.revokeAllForUser).toHaveBeenCalledWith('user-1'); }); }); describe('verifyAccessToken', () => { - it('returns decoded payload for valid token', () => { - mockJwtService.verify.mockReturnValue(payload); - const result = service.verifyAccessToken('valid-jwt'); - expect(result).toEqual(payload); - }); - - it('returns null for invalid token', () => { - mockJwtService.verify.mockImplementation(() => { throw new Error('invalid'); }); - const result = service.verifyAccessToken('bad-jwt'); - expect(result).toBeNull(); - }); + function svc(p: string, q?: string) { const o = process.env['JWT_SECRET']; const oq = process.env['JWT_SECRET_PREVIOUS']; process.env['JWT_SECRET'] = p; if (q) process.env['JWT_SECRET_PREVIOUS'] = q; else delete process.env['JWT_SECRET_PREVIOUS']; const s = new TokenService(mockJwtService as any, mockRefreshTokenRepo as any); if (o) process.env['JWT_SECRET'] = o; if (oq) process.env['JWT_SECRET_PREVIOUS'] = oq; else delete process.env['JWT_SECRET_PREVIOUS']; return s; } + it('primary succeeds', () => { expect(service.verifyAccessToken(jwtSign(payload, PRIMARY_SECRET, JWT_SIGN_OPTS))).toMatchObject(payload); }); + it('fallback to previous', () => { expect(svc(PRIMARY_SECRET, PREVIOUS_SECRET).verifyAccessToken(jwtSign(payload, PREVIOUS_SECRET, JWT_SIGN_OPTS))).toMatchObject(payload); }); + it('null when both fail', () => { expect(svc(PRIMARY_SECRET, PREVIOUS_SECRET).verifyAccessToken(jwtSign(payload, 'unknown-secret-that-is-long-enough-for-test!!!', JWT_SIGN_OPTS))).toBeNull(); }); + it('null for garbage', () => { expect(service.verifyAccessToken('garbage')).toBeNull(); }); + it('null for expired', () => { expect(service.verifyAccessToken(jwtSign(payload, PRIMARY_SECRET, { ...JWT_SIGN_OPTS, expiresIn: '-1s' }))).toBeNull(); }); }); }); diff --git a/apps/api/src/modules/auth/infrastructure/repositories/prisma-user.repository.ts b/apps/api/src/modules/auth/infrastructure/repositories/prisma-user.repository.ts index 91e591a..35d22c9 100644 --- a/apps/api/src/modules/auth/infrastructure/repositories/prisma-user.repository.ts +++ b/apps/api/src/modules/auth/infrastructure/repositories/prisma-user.repository.ts @@ -123,6 +123,14 @@ export class PrismaUserRepository implements IUserRepository { }); } + async updateMfaGraceStartedAt(userId: string, date: Date): Promise { + await this.prisma.user.update({ where: { id: userId }, data: { mfaGraceStartedAt: date } }); + } + + async updateMfaLastVerifiedAt(userId: string, date: Date): Promise { + await this.prisma.user.update({ where: { id: userId }, data: { mfaLastVerifiedAt: date } }); + } + private toDomain(raw: PrismaUser): UserEntity { const phone = Phone.create(raw.phone).unwrap(); const email = raw.email ? Email.create(raw.email).unwrap() : null; @@ -145,6 +153,8 @@ export class PrismaUserRepository implements IUserRepository { totpEnabled: raw.totpEnabled, totpBackupCodes: raw.totpBackupCodes, totpEnabledAt: raw.totpEnabledAt, + mfaGraceStartedAt: raw.mfaGraceStartedAt, + mfaLastVerifiedAt: raw.mfaLastVerifiedAt, }; return new UserEntity(raw.id, props, raw.createdAt, raw.updatedAt); diff --git a/apps/api/src/modules/auth/infrastructure/services/oauth.service.ts b/apps/api/src/modules/auth/infrastructure/services/oauth.service.ts index d19e482..31c1d53 100644 --- a/apps/api/src/modules/auth/infrastructure/services/oauth.service.ts +++ b/apps/api/src/modules/auth/infrastructure/services/oauth.service.ts @@ -121,10 +121,13 @@ export class OAuthService { kycStatus: 'NONE', kycData: null, isActive: true, + deletedAt: null, totpSecret: null, totpEnabled: false, totpBackupCodes: [], totpEnabledAt: null, + mfaGraceStartedAt: null, + mfaLastVerifiedAt: null, }); await this.userRepo.save(user); diff --git a/apps/api/src/modules/auth/infrastructure/services/token.service.ts b/apps/api/src/modules/auth/infrastructure/services/token.service.ts index 744da7a..13cc91f 100644 --- a/apps/api/src/modules/auth/infrastructure/services/token.service.ts +++ b/apps/api/src/modules/auth/infrastructure/services/token.service.ts @@ -5,11 +5,25 @@ import { REFRESH_TOKEN_REPOSITORY, type IRefreshTokenRepository, } from '../../domain/repositories/refresh-token.repository'; +import { verifyWithRotation } from '../utils/jwt-rotation'; + +/** + * MFA enrolment status carried inside the access-token JWT. + * + * - `none` — role does not require MFA, or user is enrolled and + * has just verified (`requiresMfa === true` flow). + * - `grace` — role requires MFA but the user is inside the + * enforcement grace window. UI nudges enrollment. + * - `enrollment_required`— grace window has expired; backend guards on + * sensitive routes must reject and force enrollment. + */ +export type MfaClaim = 'none' | 'grace' | 'enrollment_required'; export interface JwtPayload { sub: string; phone: string; role: string; + mfa?: MfaClaim; } export interface TokenPair { @@ -26,102 +40,60 @@ export interface RotateResult { @Injectable() export class TokenService { private readonly REFRESH_TOKEN_EXPIRY_DAYS = 30; + private readonly primarySecret: string; + private readonly previousSecret: string | undefined; constructor( private readonly jwtService: JwtService, @Inject(REFRESH_TOKEN_REPOSITORY) private readonly refreshTokenRepo: IRefreshTokenRepository, - ) {} + ) { + const secret = process.env['JWT_SECRET']; + if (!secret) { + throw new Error('JWT_SECRET environment variable is required'); + } + this.primarySecret = secret; + this.previousSecret = process.env['JWT_SECRET_PREVIOUS'] || undefined; + } async generateTokenPair(payload: JwtPayload): Promise { const accessToken = this.jwtService.sign(payload); - const rawRefreshToken = randomBytes(64).toString('hex'); const hashedToken = this.hashToken(rawRefreshToken); const family = randomBytes(16).toString('hex'); - const expiresAt = new Date(); expiresAt.setDate(expiresAt.getDate() + this.REFRESH_TOKEN_EXPIRY_DAYS); - - await this.refreshTokenRepo.create({ - userId: payload.sub, - token: hashedToken, - family, - expiresAt, - revokedAt: null, - }); - - return { - accessToken, - refreshToken: `${family}.${rawRefreshToken}`, - expiresIn: 900, - }; + await this.refreshTokenRepo.create({ userId: payload.sub, token: hashedToken, family, expiresAt, revokedAt: null }); + return { accessToken, refreshToken: `${family}.${rawRefreshToken}`, expiresIn: 900 }; } async rotateRefreshToken(refreshToken: string): Promise { const dotIndex = refreshToken.indexOf('.'); if (dotIndex === -1) return null; - const family = refreshToken.substring(0, dotIndex); const rawToken = refreshToken.substring(dotIndex + 1); if (!family || !rawToken) return null; - const hashedToken = this.hashToken(rawToken); const existing = await this.refreshTokenRepo.findByToken(hashedToken); - - if (!existing) { - // Possible token reuse attack — revoke entire family - await this.refreshTokenRepo.revokeByFamily(family); - return null; - } - - if (existing.revokedAt || existing.expiresAt < new Date()) { - await this.refreshTokenRepo.revokeByFamily(existing.family); - return null; - } - - // Revoke all tokens in this family + if (!existing) { await this.refreshTokenRepo.revokeByFamily(family); return null; } + if (existing.revokedAt || existing.expiresAt < new Date()) { await this.refreshTokenRepo.revokeByFamily(existing.family); return null; } await this.refreshTokenRepo.revokeByFamily(existing.family); - - // Create new token in a new family const newRawToken = randomBytes(64).toString('hex'); const newHashedToken = this.hashToken(newRawToken); const newFamily = randomBytes(16).toString('hex'); - const expiresAt = new Date(); expiresAt.setDate(expiresAt.getDate() + this.REFRESH_TOKEN_EXPIRY_DAYS); - - await this.refreshTokenRepo.create({ - userId: existing.userId, - token: newHashedToken, - family: newFamily, - expiresAt, - revokedAt: null, - }); - - return { - userId: existing.userId, - refreshToken: `${newFamily}.${newRawToken}`, - }; + await this.refreshTokenRepo.create({ userId: existing.userId, token: newHashedToken, family: newFamily, expiresAt, revokedAt: null }); + return { userId: existing.userId, refreshToken: `${newFamily}.${newRawToken}` }; } - generateAccessToken(payload: JwtPayload): string { - return this.jwtService.sign(payload); - } + generateAccessToken(payload: JwtPayload): string { return this.jwtService.sign(payload); } - async revokeAllUserTokens(userId: string): Promise { - await this.refreshTokenRepo.revokeAllForUser(userId); - } + async revokeAllUserTokens(userId: string): Promise { await this.refreshTokenRepo.revokeAllForUser(userId); } verifyAccessToken(token: string): JwtPayload | null { - try { - return this.jwtService.verify(token); - } catch { - return null; - } + return verifyWithRotation(token, this.primarySecret, this.previousSecret); } - private hashToken(token: string): string { - return createHash('sha256').update(token).digest('hex'); - } + private hashToken(token: string): string { return createHash('sha256').update(token).digest('hex'); } } diff --git a/apps/api/src/modules/auth/infrastructure/strategies/jwt.strategy.ts b/apps/api/src/modules/auth/infrastructure/strategies/jwt.strategy.ts index 57009fd..ae8c9c7 100644 --- a/apps/api/src/modules/auth/infrastructure/strategies/jwt.strategy.ts +++ b/apps/api/src/modules/auth/infrastructure/strategies/jwt.strategy.ts @@ -5,6 +5,7 @@ import { ExtractJwt, Strategy } from 'passport-jwt'; // eslint-disable-next-line @typescript-eslint/consistent-type-imports -- NestJS DI requires value imports for emitDecoratorMetadata import { PrismaService, RedisService } from '@modules/shared'; import { type JwtPayload } from '../services/token.service'; +import { makeSecretOrKeyProvider } from '../utils/jwt-rotation'; function extractJwtFromCookieOrHeader(req: Request): string | null { const cookieToken = req.cookies?.['access_token'] as string | undefined; @@ -12,88 +13,33 @@ function extractJwtFromCookieOrHeader(req: Request): string | null { return ExtractJwt.fromAuthHeaderAsBearerToken()(req); } -/** Cached user status — JSON encoded in Redis. */ -interface CachedUserStatus { - isActive: boolean; - deletedAt: string | null; -} +interface CachedUserStatus { isActive: boolean; deletedAt: string | null; } -/** - * Redis key prefix for user status cache. Versioned so that a schema - * change can invalidate all stale entries by bumping the version. - */ export const USER_STATUS_CACHE_PREFIX = 'auth:user_status:v1'; -/** TTL for cached user status (seconds). */ export const USER_STATUS_CACHE_TTL_SECONDS = 60; @Injectable() export class JwtStrategy extends PassportStrategy(Strategy) { - constructor( - private readonly prisma: PrismaService, - private readonly redis: RedisService, - ) { + constructor(private readonly prisma: PrismaService, private readonly redis: RedisService) { const jwtSecret = process.env['JWT_SECRET']; - if (!jwtSecret) { - throw new Error('JWT_SECRET environment variable is required'); - } - - super({ - jwtFromRequest: extractJwtFromCookieOrHeader, - ignoreExpiration: false, - secretOrKey: jwtSecret, - audience: 'goodgo-api', - issuer: 'goodgo-platform', - }); + if (!jwtSecret) throw new Error('JWT_SECRET environment variable is required'); + const previousSecret = process.env['JWT_SECRET_PREVIOUS'] || undefined; + super({ jwtFromRequest: extractJwtFromCookieOrHeader, ignoreExpiration: false, secretOrKeyProvider: makeSecretOrKeyProvider(jwtSecret, previousSecret), audience: 'goodgo-api', issuer: 'goodgo-platform' }); } async validate(payload: JwtPayload): Promise { const status = await this.loadUserStatus(payload.sub); - if (!status || !status.isActive || status.deletedAt !== null) { - throw new UnauthorizedException('User account is inactive or deleted'); - } + if (!status || !status.isActive || status.deletedAt !== null) throw new UnauthorizedException('User account is inactive or deleted'); return { sub: payload.sub, phone: payload.phone, role: payload.role }; } - /** - * Loads user status from Redis cache if present, otherwise from DB and - * populates the cache with a 60 s TTL. Redis failures are non-fatal: - * we fall back to DB so a Redis outage cannot lock out all users. - * - * Returns null only when the user does not exist in the DB. - */ private async loadUserStatus(userId: string): Promise { const cacheKey = `${USER_STATUS_CACHE_PREFIX}:${userId}`; - - if (this.redis.isAvailable()) { - try { - const cached = await this.redis.get(cacheKey); - if (cached !== null) { - return JSON.parse(cached) as CachedUserStatus; - } - } catch { - // Swallow: degrade to DB on Redis read error. - } - } - - const user = await this.prisma.user.findUnique({ - where: { id: userId }, - select: { isActive: true, deletedAt: true }, - }); + if (this.redis.isAvailable()) { try { const cached = await this.redis.get(cacheKey); if (cached !== null) return JSON.parse(cached) as CachedUserStatus; } catch { /* swallow */ } } + const user = await this.prisma.user.findUnique({ where: { id: userId }, select: { isActive: true, deletedAt: true } }); if (!user) return null; - - const status: CachedUserStatus = { - isActive: user.isActive, - deletedAt: user.deletedAt ? user.deletedAt.toISOString() : null, - }; - - if (this.redis.isAvailable()) { - try { - await this.redis.set(cacheKey, JSON.stringify(status), USER_STATUS_CACHE_TTL_SECONDS); - } catch { - // Swallow: cache population is best-effort. - } - } - + const status: CachedUserStatus = { isActive: user.isActive, deletedAt: user.deletedAt ? user.deletedAt.toISOString() : null }; + if (this.redis.isAvailable()) { try { await this.redis.set(cacheKey, JSON.stringify(status), USER_STATUS_CACHE_TTL_SECONDS); } catch { /* swallow */ } } return status; } } diff --git a/apps/api/src/modules/auth/infrastructure/strategies/local.strategy.ts b/apps/api/src/modules/auth/infrastructure/strategies/local.strategy.ts index d222a81..fa1b5b3 100644 --- a/apps/api/src/modules/auth/infrastructure/strategies/local.strategy.ts +++ b/apps/api/src/modules/auth/infrastructure/strategies/local.strategy.ts @@ -9,6 +9,8 @@ export interface LocalStrategyResult { phone: string; role: string; isMfaRequired: boolean; + totpEnabled: boolean; + mfaGraceStartedAt: Date | null; } @Injectable() @@ -56,6 +58,8 @@ export class LocalStrategy extends PassportStrategy(Strategy) { phone: user.phone.value, role: user.role, isMfaRequired: user.totpEnabled, + totpEnabled: user.totpEnabled, + mfaGraceStartedAt: user.mfaGraceStartedAt, }; } catch (error) { if (error instanceof DomainException) throw error; diff --git a/apps/api/src/modules/auth/infrastructure/utils/jwt-rotation.ts b/apps/api/src/modules/auth/infrastructure/utils/jwt-rotation.ts new file mode 100644 index 0000000..f6c67c6 --- /dev/null +++ b/apps/api/src/modules/auth/infrastructure/utils/jwt-rotation.ts @@ -0,0 +1,21 @@ +import { verify as jwtVerify, type JwtPayload as JsonWebTokenPayload } from 'jsonwebtoken'; + +const JWT_VERIFY_OPTIONS = { audience: 'goodgo-api', issuer: 'goodgo-platform' } as const; + +export function verifyWithRotation( + token: string, primarySecret: string, previousSecret: string | undefined, +): T | null { + try { return jwtVerify(token, primarySecret, JWT_VERIFY_OPTIONS) as T; } catch { /* primary failed */ } + if (previousSecret) { try { return jwtVerify(token, previousSecret, JWT_VERIFY_OPTIONS) as T; } catch { /* both failed */ } } + return null; +} + +export function makeSecretOrKeyProvider( + primarySecret: string, previousSecret: string | undefined, +): (request: unknown, rawJwtToken: string, done: (err: Error | null, secret?: string) => void) => void { + return (_request: unknown, rawJwtToken: string, done: (err: Error | null, secret?: string) => void) => { + try { jwtVerify(rawJwtToken, primarySecret, JWT_VERIFY_OPTIONS); return done(null, primarySecret); } catch { /* primary failed */ } + if (previousSecret) { try { jwtVerify(rawJwtToken, previousSecret, JWT_VERIFY_OPTIONS); return done(null, previousSecret); } catch { /* both failed */ } } + return done(null, primarySecret); + }; +} diff --git a/apps/api/src/modules/metrics/infrastructure/__tests__/queue-metrics.collector.spec.ts b/apps/api/src/modules/metrics/infrastructure/__tests__/queue-metrics.collector.spec.ts new file mode 100644 index 0000000..b2595fd --- /dev/null +++ b/apps/api/src/modules/metrics/infrastructure/__tests__/queue-metrics.collector.spec.ts @@ -0,0 +1,93 @@ +import { Counter, Gauge, Registry } from 'prom-client'; +import { describe, expect, it, vi } from 'vitest'; +import { + QueueMetricsCollector, + type QueueLike, +} from '../queue-metrics.collector'; + +function makeMetrics() { + const registry = new Registry(); + const depth = new Gauge({ + name: 'goodgo_queue_depth', + help: 'depth', + labelNames: ['queue', 'state'], + registers: [registry], + }); + const outcomes = new Counter({ + name: 'goodgo_queue_job_outcomes_total', + help: 'outcomes', + labelNames: ['queue', 'outcome'], + registers: [registry], + }); + return { registry, depth, outcomes }; +} + +function makeQueue(name: string, counts: Record): QueueLike { + return { + name, + async getJobCounts(..._types: string[]): Promise> { + return counts; + }, + }; +} + +describe('QueueMetricsCollector', () => { + it('samples each queue and writes labelled gauge values', async () => { + const { depth, outcomes } = makeMetrics(); + const q = makeQueue('report-generation', { waiting: 3, active: 1, completed: 100, failed: 2, delayed: 0 }); + const collector = new QueueMetricsCollector([q], depth, outcomes, { + setInterval: () => 0 as unknown as ReturnType, + clearInterval: () => undefined, + }); + await collector.sampleOnce(); + const v = await depth.get(); + const byState = Object.fromEntries(v.values.map((s) => [s.labels['state'], s.value])); + expect(byState).toMatchObject({ waiting: 3, active: 1, completed: 100, failed: 2, delayed: 0 }); + }); + + it('does not throw when getJobCounts rejects (e.g. Redis down)', async () => { + const { depth, outcomes } = makeMetrics(); + const broken: QueueLike = { + name: 'broken', + async getJobCounts() { + throw new Error('redis down'); + }, + }; + const collector = new QueueMetricsCollector([broken], depth, outcomes, { + setInterval: () => 0 as unknown as ReturnType, + clearInterval: () => undefined, + }); + await expect(collector.sampleOnce()).resolves.toBeUndefined(); + }); + + it('recordJobOutcome increments the outcome counter', async () => { + const { depth, outcomes } = makeMetrics(); + const collector = new QueueMetricsCollector([], depth, outcomes, { + setInterval: () => 0 as unknown as ReturnType, + clearInterval: () => undefined, + }); + collector.recordJobOutcome('report-generation', 'completed'); + collector.recordJobOutcome('report-generation', 'failed'); + collector.recordJobOutcome('report-generation', 'completed'); + const v = await outcomes.get(); + const completed = v.values.find((s) => s.labels['outcome'] === 'completed'); + const failed = v.values.find((s) => s.labels['outcome'] === 'failed'); + expect(completed?.value).toBe(2); + expect(failed?.value).toBe(1); + }); + + it('onModuleInit schedules the timer and onModuleDestroy clears it', () => { + const { depth, outcomes } = makeMetrics(); + const setIntervalSpy = vi.fn(() => 'h' as unknown as ReturnType); + const clearIntervalSpy = vi.fn(); + const collector = new QueueMetricsCollector([], depth, outcomes, { + intervalMs: 1234, + setInterval: setIntervalSpy, + clearInterval: clearIntervalSpy, + }); + collector.onModuleInit(); + expect(setIntervalSpy).toHaveBeenCalledWith(expect.any(Function), 1234); + collector.onModuleDestroy(); + expect(clearIntervalSpy).toHaveBeenCalledWith('h'); + }); +}); diff --git a/apps/api/src/modules/metrics/infrastructure/queue-metrics.collector.ts b/apps/api/src/modules/metrics/infrastructure/queue-metrics.collector.ts new file mode 100644 index 0000000..e67006c --- /dev/null +++ b/apps/api/src/modules/metrics/infrastructure/queue-metrics.collector.ts @@ -0,0 +1,103 @@ +import { Inject, Injectable, Logger, type OnModuleDestroy, type OnModuleInit } from '@nestjs/common'; +import { InjectMetric } from '@willsoto/nestjs-prometheus'; +import type { Queue } from 'bullmq'; +import { Counter, Gauge } from 'prom-client'; +import { QUEUE_DEPTH_GAUGE, QUEUE_JOB_OUTCOMES_TOTAL } from './queue-metrics.constants'; + +/** + * Minimal subset of BullMQ's Queue surface needed by the collector. + * Defined here so unit tests can pass a plain object without a live Redis. + */ +export interface QueueLike { + readonly name: string; + getJobCounts(...types: string[]): Promise>; +} + +export interface QueueMetricsCollectorOptions { + /** Polling interval in ms. Default 5_000. */ + intervalMs?: number; + /** Inject a clock for tests; defaults to setInterval/clearInterval. */ + setInterval?: (fn: () => void, ms: number) => ReturnType; + clearInterval?: (handle: ReturnType) => void; +} + +export const QUEUE_METRICS_COLLECTOR_QUEUES = 'QUEUE_METRICS_COLLECTOR_QUEUES'; +export const QUEUE_METRICS_COLLECTOR_OPTIONS = 'QUEUE_METRICS_COLLECTOR_OPTIONS'; + +/** + * Samples every registered BullMQ queue on a timer and updates the + * `goodgo_queue_depth` gauge. The gauge carries a `state` label so a single + * metric name fans out to waiting / active / completed / failed / delayed. + * + * Job-outcome counters (`goodgo_queue_job_outcomes_total`) are incremented + * from processor hooks rather than by polling so they capture every job, not + * just the ones alive at tick time. + */ +@Injectable() +export class QueueMetricsCollector implements OnModuleInit, OnModuleDestroy { + private readonly logger = new Logger(QueueMetricsCollector.name); + private handle: ReturnType | null = null; + private readonly intervalMs: number; + private readonly setIntervalFn: NonNullable; + private readonly clearIntervalFn: NonNullable; + + constructor( + @Inject(QUEUE_METRICS_COLLECTOR_QUEUES) private readonly queues: ReadonlyArray, + @InjectMetric(QUEUE_DEPTH_GAUGE) private readonly depthGauge: Gauge, + @InjectMetric(QUEUE_JOB_OUTCOMES_TOTAL) private readonly outcomes: Counter, + @Inject(QUEUE_METRICS_COLLECTOR_OPTIONS) options: QueueMetricsCollectorOptions = {}, + ) { + this.intervalMs = options.intervalMs ?? 5_000; + this.setIntervalFn = options.setInterval ?? ((fn, ms) => setInterval(fn, ms)); + this.clearIntervalFn = options.clearInterval ?? ((handle) => clearInterval(handle)); + } + + onModuleInit(): void { + // Kick off an immediate sample so gauges are populated before the first + // timer tick — useful for /metrics scrapes that land in the first 5 s. + void this.sampleOnce(); + this.handle = this.setIntervalFn(() => { + void this.sampleOnce(); + }, this.intervalMs); + } + + onModuleDestroy(): void { + if (this.handle) { + this.clearIntervalFn(this.handle); + this.handle = null; + } + } + + /** + * Increment the outcome counter. Call from a processor's `@OnWorkerEvent` + * completed/failed hook so every job is accounted for, not just the ones + * present during a poll tick. + */ + recordJobOutcome(queueName: string, outcome: 'completed' | 'failed'): void { + this.outcomes.labels(queueName, outcome).inc(1); + } + + /** Exposed for tests. */ + async sampleOnce(): Promise { + for (const queue of this.queues) { + try { + const counts = await queue.getJobCounts('waiting', 'active', 'completed', 'failed', 'delayed'); + for (const [state, count] of Object.entries(counts)) { + this.depthGauge.labels(queue.name, state).set(count); + } + } catch (err) { + // Redis outage or queue not yet ready — log and keep going. The + // gauge retains its last value; Prometheus `rate` / `absent()` + // alerts cover the "stopped updating" case. + this.logger.warn( + `queue-metrics sample failed for ${queue.name}: ${err instanceof Error ? err.message : String(err)}`, + ); + } + } + } +} + +/** Casts a real BullMQ Queue to the minimal surface the collector needs. */ +export function asQueueLike(q: Queue): QueueLike { + return q; +} diff --git a/apps/api/src/modules/metrics/infrastructure/queue-metrics.constants.ts b/apps/api/src/modules/metrics/infrastructure/queue-metrics.constants.ts new file mode 100644 index 0000000..15c3c45 --- /dev/null +++ b/apps/api/src/modules/metrics/infrastructure/queue-metrics.constants.ts @@ -0,0 +1,29 @@ +/** + * Queue metrics — RFC-004 Phase 3 workstream 3a. + * + * Exposes Prometheus gauges for BullMQ queue depth and a counter for job + * outcomes. The collector is intentionally polling-based (not subscribing + * to bullmq events) so a single collector tick covers every queue without + * adding per-queue listener wiring. Polling cadence is small (5 s) — depth + * gauges are coarse-grained and cheap to read via `queue.getJobCounts`. + * + * Adding a new queue means: + * 1. Register it via BullModule.registerQueue (existing pattern). + * 2. Pass its name into QUEUE_METRICS_QUEUE_NAMES (or extend the + * registration helper) so the collector samples it each tick. + */ + +export const QUEUE_DEPTH_GAUGE = 'goodgo_queue_depth'; +export const QUEUE_JOB_OUTCOMES_TOTAL = 'goodgo_queue_job_outcomes_total'; + +/** + * Queues sampled by the metrics collector. Add new BullMQ queue names here + * when they are registered via BullModule.registerQueue. + * + * Keeping this as a constant (rather than scanning the Nest container) keeps + * the collector's set deterministic and trivially testable; the cost is one + * extra line of bookkeeping per queue. + */ +export const QUEUE_METRICS_QUEUE_NAMES: readonly string[] = [ + 'report-generation', +]; diff --git a/apps/api/src/modules/metrics/metrics.module.ts b/apps/api/src/modules/metrics/metrics.module.ts index 4abd71d..6b81601 100644 --- a/apps/api/src/modules/metrics/metrics.module.ts +++ b/apps/api/src/modules/metrics/metrics.module.ts @@ -1,10 +1,24 @@ -import { Module } from '@nestjs/common'; +import { BullModule, getQueueToken } from '@nestjs/bullmq'; +import { Module, type DynamicModule } from '@nestjs/common'; import { makeCounterProvider, makeHistogramProvider, makeGaugeProvider, } from '@willsoto/nestjs-prometheus'; +import type { Queue } from 'bullmq'; import { MetricsService } from './infrastructure/metrics.service'; +import { + QueueMetricsCollector, + QUEUE_METRICS_COLLECTOR_OPTIONS, + QUEUE_METRICS_COLLECTOR_QUEUES, + type QueueLike, + type QueueMetricsCollectorOptions, +} from './infrastructure/queue-metrics.collector'; +import { + QUEUE_DEPTH_GAUGE, + QUEUE_JOB_OUTCOMES_TOTAL, + QUEUE_METRICS_QUEUE_NAMES, +} from './infrastructure/queue-metrics.constants'; import { GOODGO_API_REQUEST_DURATION, GOODGO_LISTINGS_CREATED_TOTAL, @@ -141,4 +155,48 @@ import { HttpMetricsInterceptor } from './presentation/interceptors/http-metrics controllers: [WebVitalsController], exports: [MetricsService, HttpMetricsInterceptor], }) -export class MetricsModule {} +export class MetricsModule { + /** + * Register the queue-metrics collector with a fixed list of BullMQ queue + * names. Each name must already be registered via BullModule.registerQueue + * somewhere in the app (root or feature module). + * + * RFC-004 Phase 3 — workstream 3a. + */ + static withQueueMetrics( + queueNames: readonly string[] = QUEUE_METRICS_QUEUE_NAMES, + options: QueueMetricsCollectorOptions = {}, + ): DynamicModule { + const queueTokens = queueNames.map((name) => getQueueToken(name)); + return { + module: MetricsModule, + imports: [ + // Re-register each queue here so the collector can resolve them via + // BullMQ's standard token even if MetricsModule is imported before + // the feature module that owns the queue. BullMQ deduplicates the + // queue instance under the hood. + ...queueNames.map((name) => BullModule.registerQueue({ name })), + ], + providers: [ + makeGaugeProvider({ + name: QUEUE_DEPTH_GAUGE, + help: 'BullMQ queue depth by state (waiting, active, completed, failed, delayed)', + labelNames: ['queue', 'state'], + }), + makeCounterProvider({ + name: QUEUE_JOB_OUTCOMES_TOTAL, + help: 'BullMQ job outcomes (completed, failed) by queue', + labelNames: ['queue', 'outcome'], + }), + { + provide: QUEUE_METRICS_COLLECTOR_QUEUES, + inject: queueTokens, + useFactory: (...queues: Queue[]): QueueLike[] => queues, + }, + { provide: QUEUE_METRICS_COLLECTOR_OPTIONS, useValue: options }, + QueueMetricsCollector, + ], + exports: [QueueMetricsCollector], + }; + } +} diff --git a/apps/api/src/modules/queues/__tests__/bull-board-auth.middleware.spec.ts b/apps/api/src/modules/queues/__tests__/bull-board-auth.middleware.spec.ts new file mode 100644 index 0000000..d694740 --- /dev/null +++ b/apps/api/src/modules/queues/__tests__/bull-board-auth.middleware.spec.ts @@ -0,0 +1,94 @@ +import { ForbiddenException, UnauthorizedException } from '@nestjs/common'; +import { JwtService } from '@nestjs/jwt'; +import { describe, expect, it, beforeEach, afterEach, vi } from 'vitest'; +import { BullBoardAuthMiddleware } from '../bull-board-auth.middleware'; + +describe('BullBoardAuthMiddleware', () => { + const ORIGINAL_SECRET = process.env['JWT_SECRET']; + let jwtService: JwtService; + let middleware: BullBoardAuthMiddleware; + + beforeEach(() => { + process.env['JWT_SECRET'] = 'test-secret-for-bull-board-mw'; + jwtService = new JwtService({}); + middleware = new BullBoardAuthMiddleware(jwtService); + }); + + afterEach(() => { + if (ORIGINAL_SECRET === undefined) { + delete process.env['JWT_SECRET']; + } else { + process.env['JWT_SECRET'] = ORIGINAL_SECRET; + } + }); + + function makeReq(overrides: Partial<{ cookies: Record; headers: Record }> = {}): any { + return { cookies: overrides.cookies ?? {}, headers: overrides.headers ?? {} }; + } + + function signToken(payload: Record, opts: { audience?: string; issuer?: string } = {}): string { + return jwtService.sign(payload, { + secret: process.env['JWT_SECRET']!, + audience: opts.audience ?? 'goodgo-api', + issuer: opts.issuer ?? 'goodgo-platform', + }); + } + + it('rejects when no token is present', () => { + const next = vi.fn(); + expect(() => middleware.use(makeReq(), {} as any, next)).toThrow(UnauthorizedException); + expect(next).not.toHaveBeenCalled(); + }); + + it('rejects when JWT signature is invalid', () => { + const next = vi.fn(); + const req = makeReq({ headers: { authorization: 'Bearer not-a-valid-token' } }); + expect(() => middleware.use(req, {} as any, next)).toThrow(UnauthorizedException); + }); + + it('rejects when JWT audience does not match', () => { + const token = signToken({ sub: 'u1', role: 'ADMIN' }, { audience: 'wrong-aud' }); + const req = makeReq({ headers: { authorization: 'Bearer ' + token } }); + const next = vi.fn(); + expect(() => middleware.use(req, {} as any, next)).toThrow(UnauthorizedException); + }); + + it('rejects non-ADMIN role with 403', () => { + const token = signToken({ sub: 'u1', role: 'USER' }); + const req = makeReq({ headers: { authorization: 'Bearer ' + token } }); + const next = vi.fn(); + expect(() => middleware.use(req, {} as any, next)).toThrow(ForbiddenException); + }); + + it('accepts ADMIN via Authorization header', () => { + const token = signToken({ sub: 'u1', role: 'ADMIN' }); + const req = makeReq({ headers: { authorization: 'Bearer ' + token } }); + const next = vi.fn(); + middleware.use(req, {} as any, next); + expect(next).toHaveBeenCalledOnce(); + }); + + it('accepts ADMIN via access_token cookie', () => { + const token = signToken({ sub: 'u1', role: 'ADMIN' }); + const req = makeReq({ cookies: { access_token: token } }); + const next = vi.fn(); + middleware.use(req, {} as any, next); + expect(next).toHaveBeenCalledOnce(); + }); + + it('prefers cookie over header', () => { + const cookieToken = signToken({ sub: 'admin', role: 'ADMIN' }); + const headerToken = signToken({ sub: 'user', role: 'USER' }); + const req = makeReq({ cookies: { access_token: cookieToken }, headers: { authorization: 'Bearer ' + headerToken } }); + const next = vi.fn(); + middleware.use(req, {} as any, next); + expect(next).toHaveBeenCalledOnce(); + }); + + it('fails closed when JWT_SECRET unset', () => { + delete process.env['JWT_SECRET']; + const req = makeReq({ headers: { authorization: 'Bearer anything' } }); + const next = vi.fn(); + expect(() => middleware.use(req, {} as any, next)).toThrow(UnauthorizedException); + }); +}); diff --git a/apps/api/src/modules/queues/bull-board-auth.middleware.ts b/apps/api/src/modules/queues/bull-board-auth.middleware.ts new file mode 100644 index 0000000..e15e4cf --- /dev/null +++ b/apps/api/src/modules/queues/bull-board-auth.middleware.ts @@ -0,0 +1,48 @@ +import { Injectable, type NestMiddleware, UnauthorizedException, ForbiddenException } from '@nestjs/common'; +import { JwtService } from '@nestjs/jwt'; +import type { NextFunction, Request, Response } from 'express'; + +@Injectable() +export class BullBoardAuthMiddleware implements NestMiddleware { + constructor(private readonly jwtService: JwtService) {} + + use(req: Request, _res: Response, next: NextFunction): void { + const token = this.extractToken(req); + if (!token) { + throw new UnauthorizedException('Bull Board requires authentication'); + } + + const secret = process.env['JWT_SECRET']; + if (!secret) { + throw new UnauthorizedException('Server mis-configured'); + } + + let payload: { sub?: string; role?: string }; + try { + payload = this.jwtService.verify<{ sub?: string; role?: string }>(token, { + secret, + audience: 'goodgo-api', + issuer: 'goodgo-platform', + }); + } catch { + throw new UnauthorizedException('Invalid or expired token'); + } + + if (payload.role !== 'ADMIN') { + throw new ForbiddenException('Admin role required'); + } + + next(); + } + + private extractToken(req: Request): string | null { + const cookieToken = (req.cookies?.['access_token'] as string | undefined) ?? null; + if (cookieToken) return cookieToken; + + const header = req.headers.authorization; + if (header && header.startsWith('Bearer ')) { + return header.slice('Bearer '.length); + } + return null; + } +} diff --git a/apps/api/src/modules/queues/queues.module.ts b/apps/api/src/modules/queues/queues.module.ts new file mode 100644 index 0000000..ed13ec9 --- /dev/null +++ b/apps/api/src/modules/queues/queues.module.ts @@ -0,0 +1,36 @@ +import { BullModule } from '@nestjs/bullmq'; +import { type MiddlewareConsumer, Module, type NestModule, RequestMethod } from '@nestjs/common'; +import { JwtModule } from '@nestjs/jwt'; +import { BullMQAdapter } from '@bull-board/api/bullMQAdapter'; +import { ExpressAdapter } from '@bull-board/express'; +import { BullBoardModule } from '@bull-board/nestjs'; +import { QUEUE_METRICS_QUEUE_NAMES } from '../metrics/infrastructure/queue-metrics.constants'; +import { BullBoardAuthMiddleware } from './bull-board-auth.middleware'; + +@Module({ + imports: [ + JwtModule.register({}), + BullBoardModule.forRoot({ + route: '/admin/queues', + adapter: ExpressAdapter, + }), + ...QUEUE_METRICS_QUEUE_NAMES.map((name) => BullModule.registerQueue({ name })), + BullBoardModule.forFeature( + ...QUEUE_METRICS_QUEUE_NAMES.map((name) => ({ + name, + adapter: BullMQAdapter, + })), + ), + ], + providers: [BullBoardAuthMiddleware], +}) +export class QueuesModule implements NestModule { + configure(consumer: MiddlewareConsumer): void { + consumer + .apply(BullBoardAuthMiddleware) + .forRoutes( + { path: 'admin/queues', method: RequestMethod.ALL }, + { path: 'admin/queues/(.*)', method: RequestMethod.ALL }, + ); + } +} diff --git a/apps/api/src/modules/shared/infrastructure/__tests__/redis-connection.config.spec.ts b/apps/api/src/modules/shared/infrastructure/__tests__/redis-connection.config.spec.ts new file mode 100644 index 0000000..3dcd439 --- /dev/null +++ b/apps/api/src/modules/shared/infrastructure/__tests__/redis-connection.config.spec.ts @@ -0,0 +1,72 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { describeRedisTopology, getRedisConnection } from '../redis-connection.config'; + +const KEYS = [ + 'REDIS_HOST', + 'REDIS_PORT', + 'REDIS_PASSWORD', + 'REDIS_QUEUE_HOST', + 'REDIS_QUEUE_PORT', + 'REDIS_QUEUE_PASSWORD', +] as const; + +describe('redis-connection.config', () => { + let original: Record; + + beforeEach(() => { + original = Object.fromEntries(KEYS.map((k) => [k, process.env[k]])); + for (const k of KEYS) delete process.env[k]; + }); + + afterEach(() => { + for (const k of KEYS) { + if (original[k] === undefined) delete process.env[k]; + else process.env[k] = original[k]; + } + }); + + it('defaults cache and queue to localhost:6379 when nothing is set', () => { + expect(getRedisConnection('cache')).toEqual({ host: 'localhost', port: 6379, password: undefined }); + expect(getRedisConnection('queue')).toEqual({ host: 'localhost', port: 6379, password: undefined }); + }); + + it('queue falls back to cache vars when queue-specific vars are unset', () => { + process.env['REDIS_HOST'] = 'cache.internal'; + process.env['REDIS_PORT'] = '6380'; + process.env['REDIS_PASSWORD'] = 'pw'; + expect(getRedisConnection('queue')).toEqual({ host: 'cache.internal', port: 6380, password: 'pw' }); + }); + + it('queue vars take precedence when set', () => { + process.env['REDIS_HOST'] = 'cache.internal'; + process.env['REDIS_QUEUE_HOST'] = 'queue.internal'; + process.env['REDIS_QUEUE_PORT'] = '6400'; + process.env['REDIS_QUEUE_PASSWORD'] = 'qpw'; + expect(getRedisConnection('queue')).toEqual({ host: 'queue.internal', port: 6400, password: 'qpw' }); + expect(getRedisConnection('cache').host).toBe('cache.internal'); + }); + + it('describeRedisTopology reports shared=true when cache and queue resolve to the same host/port', () => { + process.env['REDIS_HOST'] = 'one'; + process.env['REDIS_PORT'] = '6379'; + const t = describeRedisTopology(); + expect(t.shared).toBe(true); + expect(t.cache.passwordSet).toBe(false); + }); + + it('describeRedisTopology reports shared=false when queue is split off', () => { + process.env['REDIS_HOST'] = 'one'; + process.env['REDIS_QUEUE_HOST'] = 'two'; + process.env['REDIS_PASSWORD'] = 'pw'; + const t = describeRedisTopology(); + expect(t.shared).toBe(false); + expect(t.queue.host).toBe('two'); + expect(t.cache.passwordSet).toBe(true); + }); + + it('never leaks the password through describeRedisTopology', () => { + process.env['REDIS_PASSWORD'] = 'super-secret'; + const t = describeRedisTopology(); + expect(JSON.stringify(t)).not.toContain('super-secret'); + }); +}); diff --git a/apps/api/src/modules/shared/infrastructure/env-validation.ts b/apps/api/src/modules/shared/infrastructure/env-validation.ts index e25077a..340d678 100644 --- a/apps/api/src/modules/shared/infrastructure/env-validation.ts +++ b/apps/api/src/modules/shared/infrastructure/env-validation.ts @@ -45,6 +45,17 @@ const REQUIRED_WHEN_USED: ReadonlyMap = new Map([ * Known placeholder values that must never be used as real secrets. * Comparison is case-insensitive to catch common variants. */ +/** + * Previous-version secrets used during key rotation. Validated if set but never + * required. Note: JWT_REFRESH_SECRET_PREVIOUS currently has no runtime consumer + * because refresh tokens are opaque random bytes, not JWTs — the variable is + * accepted here for forward-compatibility should the refresh mechanism change. + */ +const OPTIONAL_PREVIOUS_SECRETS: readonly string[] = [ + 'JWT_SECRET_PREVIOUS', + 'JWT_REFRESH_SECRET_PREVIOUS', +]; + const FORBIDDEN_SECRET_VALUES: readonly string[] = [ 'change_me', 'changeme', @@ -127,6 +138,25 @@ export function validateEnv(): void { ); } + // Validate optional previous secrets if they are set (rotation window). + const prevSecretErrors: string[] = []; + for (const key of OPTIONAL_PREVIOUS_SECRETS) { + const value = process.env[key]; + if (value) { + const error = validateJwtSecret(key, value); + if (error) { + prevSecretErrors.push(error); + } + } + } + + if (prevSecretErrors.length > 0) { + throw new Error( + `Insecure previous-secret configuration:\n ${prevSecretErrors.join('\n ')}\n` + + 'Previous secrets must meet the same strength requirements as primary secrets.', + ); + } + if (!isProduction) { return; } diff --git a/apps/api/src/modules/shared/infrastructure/redis-connection.config.ts b/apps/api/src/modules/shared/infrastructure/redis-connection.config.ts new file mode 100644 index 0000000..484d93a --- /dev/null +++ b/apps/api/src/modules/shared/infrastructure/redis-connection.config.ts @@ -0,0 +1,63 @@ +/** + * Redis connection configuration helpers. + * + * RFC-004 Phase 3 — workstream 1: split the Redis connection used by BullMQ + * from the connection used for cache / throttler / websocket adapter. + * + * Contract: + * - Cache / throttler / ws adapter: read REDIS_HOST / REDIS_PORT / REDIS_PASSWORD. + * - Queue (BullMQ): read REDIS_QUEUE_HOST / REDIS_QUEUE_PORT / REDIS_QUEUE_PASSWORD, + * falling back to the cache vars so dev / single-instance deploys keep working + * with a single Redis. + * - In production, split connections are recommended so a hot cache path cannot + * starve queue operations (and vice versa). The two hosts can still point at + * the same server; the split exists so ops can point them elsewhere without a + * code change. + */ + +export type RedisConnectionPurpose = 'cache' | 'queue'; + +export interface RedisConnectionOptions { + host: string; + port: number; + password: string | undefined; +} + +function readCacheConnection(): RedisConnectionOptions { + return { + host: process.env['REDIS_HOST'] ?? 'localhost', + port: Number(process.env['REDIS_PORT'] ?? 6379), + password: process.env['REDIS_PASSWORD'] ?? undefined, + }; +} + +function readQueueConnection(): RedisConnectionOptions { + const cache = readCacheConnection(); + return { + host: process.env['REDIS_QUEUE_HOST'] ?? cache.host, + port: Number(process.env['REDIS_QUEUE_PORT'] ?? cache.port), + password: process.env['REDIS_QUEUE_PASSWORD'] ?? cache.password, + }; +} + +export function getRedisConnection(purpose: RedisConnectionPurpose): RedisConnectionOptions { + return purpose === 'queue' ? readQueueConnection() : readCacheConnection(); +} + +/** + * Returns a loggable summary of how cache vs queue connections are bound. + * Never includes the password — only host, port, and whether a password is set. + */ +export function describeRedisTopology(): { + cache: { host: string; port: number; passwordSet: boolean }; + queue: { host: string; port: number; passwordSet: boolean }; + shared: boolean; +} { + const cache = readCacheConnection(); + const queue = readQueueConnection(); + return { + cache: { host: cache.host, port: cache.port, passwordSet: Boolean(cache.password) }, + queue: { host: queue.host, port: queue.port, passwordSet: Boolean(queue.password) }, + shared: cache.host === queue.host && cache.port === queue.port, + }; +} diff --git a/docs/audits/slo-soak-test-log.md b/docs/audits/slo-soak-test-log.md new file mode 100644 index 0000000..6df6276 --- /dev/null +++ b/docs/audits/slo-soak-test-log.md @@ -0,0 +1,12 @@ +# SLO Staging Soak Test Log — GOO-227 + +**Period**: 2026-04-26 → 2026-05-03 (7 days) +**Config**: `monitoring/prometheus/slo-rules.yml` + +## Endpoints: listings (99.9%), listings/:id (99.9%), payments (99.95%), auth (99.9%), search (99.9%) + +## Daily Log +Fill each day with: Time | Alert | Endpoint | Window | Value | TP/FP | Action + +## Summary (end of soak) +Total alerts: _ | TP: _ | FP: _ | Recommendation: [ ] Prod ready / [ ] More tuning diff --git a/docs/security/SECRET_ROTATION_POLICY.md b/docs/security/SECRET_ROTATION_POLICY.md new file mode 100644 index 0000000..b68119f --- /dev/null +++ b/docs/security/SECRET_ROTATION_POLICY.md @@ -0,0 +1,192 @@ +# Payment Gateway Secret Rotation Policy + +> **Status:** Active — GOO-197 / parent [GOO-102](/GOO/issues/GOO-102) (CLO data-security work). +> **Owner:** Security Engineer + Platform on-call. +> **Last reviewed:** 2026-04-24. + +This document is the canonical policy for rotating all secrets that gate +access to GoodGo's payment gateways and adjacent integrations (OAuth, +storage, webhook signing, JWT). It ships alongside the `SecretProvider` +abstraction in +`apps/api/src/modules/shared/domain/ports/secret-provider.port.ts` and the +env-backed implementation in +`apps/api/src/modules/shared/infrastructure/env-secret-provider.service.ts`. + +--- + +## 1. Why rotate + +A stolen or leaked HMAC key for a payment gateway is the most direct path +to financial fraud against GoodGo. Rotation reduces the **window of abuse** +when a key is exposed (insider misuse, accidental git commit, third-party +breach, log scraping, etc.). It also forces us to verify that every +runtime that relies on the key can still read it — i.e. that we have not +lost the ability to rotate. + +## 2. Scope (rotation-sensitive secrets) + +The following secrets are in scope. Each is registered with the +`SecretProvider` by default (see `DEFAULT_REGISTERED_SECRETS`) and has a +matching entry in `env-validation.ts`. + +| Secret env var | Purpose | Cadence | Owner | +| ------------------------------ | ------------------------------- | -------- | -------------- | +| `JWT_SECRET` | Access-token HMAC | 90 days | Auth | +| `JWT_REFRESH_SECRET` | Refresh-token HMAC | 90 days | Auth | +| `VNPAY_HASH_SECRET` | VNPay request/callback HMAC | 90 days | Payments | +| `MOMO_SECRET_KEY` | MoMo request/callback HMAC | 90 days | Payments | +| `ZALOPAY_KEY1` | ZaloPay order signing | 90 days | Payments | +| `ZALOPAY_KEY2` | ZaloPay callback signing | 90 days | Payments | +| `BANK_TRANSFER_WEBHOOK_SECRET` | Bank-transfer webhook signature | 90 days | Payments | +| `GOOGLE_CLIENT_SECRET` | Google OAuth | 180 days | Auth | +| `ZALO_APP_SECRET` | Zalo OAuth | 180 days | Auth | +| `ZALO_OA_ACCESS_TOKEN` | Zalo Official Account API token | 90 days | Notifications | +| `MINIO_SECRET_KEY` | Object-storage access key | 180 days | Platform | +| `FIELD_ENCRYPTION_KEY` | At-rest PII encryption key | annually | Platform + CLO | + +Secrets **not** in this table (e.g. `DATABASE_URL` password, `REDIS_HOST`) +follow the platform-credential rotation policy and are out of scope here. + +## 3. Cadence and triggers + +- **Routine rotation:** every 90 days for HMAC/signing keys, 180 days for + OAuth client secrets, annually for the field-encryption key (which has + expensive data-rewrap implications). +- **Event-driven rotation (always immediately):** + - any commit accidentally containing a real value of one of the secrets + above (regardless of how briefly); + - departure of any individual with production access to the secret store; + - downstream provider notification that the credential may be exposed; + - confirmed or strongly suspected breach of any system that handled the + secret in plaintext (CI runner, dev laptop, log aggregator, …). + +## 4. Operator workflow (env-backed backend) + +1. **Generate** a new high-entropy value: + + ```bash + openssl rand -base64 48 + ``` + +2. **Stage the dual-key grace period.** Copy the current secret to the + `_PREVIOUS` variable and set the new secret as the primary: + + ```bash + # Example for JWT_SECRET rotation: + JWT_SECRET_PREVIOUS= + JWT_SECRET= + # Same pattern for JWT_REFRESH_SECRET if rotating refresh keys. + ``` + + The auth layer automatically tries the primary key first and falls + back to `_PREVIOUS`, so tokens signed with the old key continue to + validate during the grace period (≤ access-token TTL, typically 15 m). + +3. **Deploy** the change. On boot, every API instance logs: + + ``` + [EnvSecretProvider] Secret versions at boot: VNPAY_HASH_SECRET=2026-04-24, … + ``` + + Verify the version field matches the staged version on every instance. + The raw value **must never** appear in this or any other log line. + +4. **Smoke-test** payment flows for the rotated provider: + - issue one sandbox payment + - confirm callback verification succeeds + - confirm refund signing succeeds + Record the rotation in the security audit log + (`docs/security/secret-rotation-log.md` — append-only). + +5. **Decommission** the old credential in the gateway's merchant portal. + +6. **Remove the previous secret.** After the grace period (at least one + full access-token TTL cycle, typically 15 minutes), remove + `JWT_SECRET_PREVIOUS` (and/or `JWT_REFRESH_SECRET_PREVIOUS`) from the + environment and redeploy. This closes the dual-key window. + +## 5. SecretProvider abstraction (developer workflow) + +All new and existing code that consumes a rotation-sensitive secret MUST +go through the `SecretProvider` port: + +```ts +import { Inject, Injectable } from '@nestjs/common'; +import { SECRET_PROVIDER, type ISecretProvider } from '@modules/shared/domain/ports'; + +@Injectable() +export class VnpayService { + constructor(@Inject(SECRET_PROVIDER) private readonly secrets: ISecretProvider) {} + + async sign(payload: string): Promise { + const { value } = await this.secrets.getSecret('VNPAY_HASH_SECRET'); + // … HMAC with `value`, never store it on `this`, never log it. + } +} +``` + +Rules: + +- **Never** capture the raw value into a service field. Always re-read on + the request path so a rotation takes effect at the next request. +- **Never** include `material.value` in log messages, error messages, or + exception payloads. `material.version` is safe to log. +- **Never** stringify a `SecretMaterial` directly into a response body. +- For bootstrap-only contexts where `await` is awkward, use + `getSecretSync` — but note that a future remote backend may throw + `UnsupportedSyncReadError`. + +## 6. Backends + +- **Short term — `EnvSecretProvider` (current).** Reads from `process.env` + via `ConfigService`. Operationally identical to the pre-existing + `getOrThrow('VNPAY_HASH_SECRET')` calls, but with a stable audit surface + (versions logged, port-based DI). +- **Mid term — `AwsSecretsManagerSecretProvider` / `VaultSecretProvider`.** + Same port. Adds: + - automatic refresh from the remote store + - per-secret IAM / Vault-policy scoping + - native version ids (`AWSCURRENT` / `AWSPREVIOUS` etc.) surfaced as + `material.version` + - `getSecretSync` may throw `UnsupportedSyncReadError`; bootstrap + callers must migrate to `getSecret`. + +Switching backends is a one-line change in `SharedModule` (replace +`EnvSecretProvider` with the new implementation under the +`SECRET_PROVIDER` token). No call sites change. + +## 7. Logging discipline + +- The `EnvSecretProvider` logs only `name=version` pairs at boot. +- The `version` is either an operator-provided `_SECRET_VERSION` env + var, or a 10-char SHA-256 fingerprint of the value (40 bits of entropy; + non-invertible; useful for distinguishing rotations across instances). +- Negative tests in + `apps/api/src/modules/shared/infrastructure/__tests__/env-secret-provider.service.spec.ts` + assert the raw value never appears in logger output, error messages, or + serialized provider state. +- The repo also has a global `pii-masker` and `GlobalExceptionFilter` — + those are defence-in-depth, not the primary control. The primary control + is "never put the value into a string in the first place." + +## 8. Incident response (suspected leak) + +1. Open a P1 incident in `#sec-incident`. Page Security on-call. +2. Rotate the affected secret immediately following §4 — do not wait for + forensic confirmation. +3. Search logs / CI artifacts / git history for the leaked value + fingerprint (NOT the value itself; use `fingerprint()` from + `env-secret-provider.service.ts`). +4. Coordinate with the gateway's anti-fraud team where applicable (VNPay, + MoMo, ZaloPay merchant support). +5. File a post-mortem within 5 business days; update this policy if + process gaps were found. + +## 9. References + +- Source port: `apps/api/src/modules/shared/domain/ports/secret-provider.port.ts` +- Env-backed impl: `apps/api/src/modules/shared/infrastructure/env-secret-provider.service.ts` +- Env validation: `apps/api/src/modules/shared/infrastructure/env-validation.ts` +- Negative tests: `apps/api/src/modules/shared/infrastructure/__tests__/env-secret-provider.service.spec.ts` +- Parent issue: [GOO-102](/GOO/issues/GOO-102) +- This issue: [GOO-197](/GOO/issues/GOO-197) diff --git a/monitoring/alertmanager/alertmanager.yml b/monitoring/alertmanager/alertmanager.yml index 2c14de7..5fcd19f 100644 --- a/monitoring/alertmanager/alertmanager.yml +++ b/monitoring/alertmanager/alertmanager.yml @@ -31,6 +31,17 @@ route: repeat_interval: 4h routes: + # Staging SLO soak — burn-rate alerts to Slack only, no pager + - matchers: + - environment = staging + - slo_type =~ "availability|latency" + receiver: 'slack-sre-staging-soak' + group_by: ['alertname', 'route', 'burn_window'] + group_wait: 15s + group_interval: 5m + repeat_interval: 30m + continue: false + # Critical alerts — immediate notification, shorter repeat - matchers: - severity = critical @@ -77,6 +88,17 @@ receivers: {{ if .Annotations.runbook_url }}*Runbook:* {{ .Annotations.runbook_url }}{{ end }} {{ end }} + - name: 'slack-sre-staging-soak' + slack_configs: + - channel: '#sre-staging-soak' + send_resolved: true + title: 'SOAK {{ .CommonLabels.alertname }}' + text: >- + Route: {{ .CommonLabels.method }} {{ .CommonLabels.route }} + Burn: {{ .CommonLabels.burn_window }} | {{ .CommonLabels.severity }} + {{ range .Alerts }}{{ .Annotations.summary }}{{ end }} + Staging soak — NOT paging. + - name: 'slack-infrastructure' slack_configs: - channel: '#infrastructure' diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml index 3deee0f..968d2f0 100644 --- a/monitoring/prometheus/prometheus.yml +++ b/monitoring/prometheus/prometheus.yml @@ -4,6 +4,7 @@ global: rule_files: - 'alert-rules.yml' + - 'slo-rules.yml' alerting: alertmanagers: diff --git a/monitoring/prometheus/slo-rules.yml b/monitoring/prometheus/slo-rules.yml new file mode 100644 index 0000000..2d93495 --- /dev/null +++ b/monitoring/prometheus/slo-rules.yml @@ -0,0 +1,150 @@ +groups: + - name: slo:availability:recording + interval: 30s + rules: + - record: slo:http_requests:rate5m + expr: sum(rate(http_requests_total{job="goodgo-api"}[5m])) by (route, method) + - record: slo:http_errors:rate5m + expr: sum(rate(http_requests_total{job="goodgo-api", status_code=~"5.."}[5m])) by (route, method) + - record: slo:error_ratio:rate5m + expr: slo:http_errors:rate5m / slo:http_requests:rate5m + - record: slo:http_requests:rate30m + expr: sum(rate(http_requests_total{job="goodgo-api"}[30m])) by (route, method) + - record: slo:http_errors:rate30m + expr: sum(rate(http_requests_total{job="goodgo-api", status_code=~"5.."}[30m])) by (route, method) + - record: slo:error_ratio:rate30m + expr: slo:http_errors:rate30m / slo:http_requests:rate30m + - record: slo:http_requests:rate1h + expr: sum(rate(http_requests_total{job="goodgo-api"}[1h])) by (route, method) + - record: slo:http_errors:rate1h + expr: sum(rate(http_requests_total{job="goodgo-api", status_code=~"5.."}[1h])) by (route, method) + - record: slo:error_ratio:rate1h + expr: slo:http_errors:rate1h / slo:http_requests:rate1h + - record: slo:http_requests:rate6h + expr: sum(rate(http_requests_total{job="goodgo-api"}[6h])) by (route, method) + - record: slo:http_errors:rate6h + expr: sum(rate(http_requests_total{job="goodgo-api", status_code=~"5.."}[6h])) by (route, method) + - record: slo:error_ratio:rate6h + expr: slo:http_errors:rate6h / slo:http_requests:rate6h + - record: slo:http_requests:rate1d + expr: sum(rate(http_requests_total{job="goodgo-api"}[1d])) by (route, method) + - record: slo:http_errors:rate1d + expr: sum(rate(http_requests_total{job="goodgo-api", status_code=~"5.."}[1d])) by (route, method) + - record: slo:error_ratio:rate1d + expr: slo:http_errors:rate1d / slo:http_requests:rate1d + - record: slo:http_requests:rate3d + expr: sum(rate(http_requests_total{job="goodgo-api"}[3d])) by (route, method) + - record: slo:http_errors:rate3d + expr: sum(rate(http_requests_total{job="goodgo-api", status_code=~"5.."}[3d])) by (route, method) + - record: slo:error_ratio:rate3d + expr: slo:http_errors:rate3d / slo:http_requests:rate3d + - name: slo:latency:recording + interval: 30s + rules: + - record: slo:latency_good:rate5m + expr: > + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/listings", le="0.5"}[5m])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/listings/:id", le="0.25"}[5m])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/payments/create", le="1"}[5m])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/auth/login", le="0.5"}[5m])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/search", le="0.5"}[5m])) by (route, method) + - record: slo:latency_total:rate5m + expr: sum(rate(goodgo_api_request_duration_seconds_count{job="goodgo-api", route=~"/api/listings|/api/listings/:id|/api/payments/create|/api/auth/login|/api/search"}[5m])) by (route, method) + - record: slo:latency_good_ratio:rate5m + expr: slo:latency_good:rate5m / slo:latency_total:rate5m + - record: slo:latency_good:rate1h + expr: > + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/listings", le="0.5"}[1h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/listings/:id", le="0.25"}[1h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/payments/create", le="1"}[1h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/auth/login", le="0.5"}[1h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/search", le="0.5"}[1h])) by (route, method) + - record: slo:latency_total:rate1h + expr: sum(rate(goodgo_api_request_duration_seconds_count{job="goodgo-api", route=~"/api/listings|/api/listings/:id|/api/payments/create|/api/auth/login|/api/search"}[1h])) by (route, method) + - record: slo:latency_good_ratio:rate1h + expr: slo:latency_good:rate1h / slo:latency_total:rate1h + - record: slo:latency_good:rate6h + expr: > + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/listings", le="0.5"}[6h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/listings/:id", le="0.25"}[6h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/payments/create", le="1"}[6h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/auth/login", le="0.5"}[6h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/search", le="0.5"}[6h])) by (route, method) + - record: slo:latency_total:rate6h + expr: sum(rate(goodgo_api_request_duration_seconds_count{job="goodgo-api", route=~"/api/listings|/api/listings/:id|/api/payments/create|/api/auth/login|/api/search"}[6h])) by (route, method) + - record: slo:latency_good_ratio:rate6h + expr: slo:latency_good:rate6h / slo:latency_total:rate6h + - name: slo:availability:burn_rate_alerts + rules: + - alert: SloAvailFastBurn + expr: > + (slo:error_ratio:rate1h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 0.0144 + and slo:error_ratio:rate5m{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 0.0144) + and slo:http_requests:rate1h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 1 + for: 2m + labels: {severity: critical, team: sre, slo_type: availability, burn_window: fast, slo_target: "99.9", environment: staging} + annotations: + summary: "SLO FAST BURN: {{ $labels.method }} {{ $labels.route }} availability (14.4x)" + description: "Error ratio {{ $value | printf \"%.4f\" }} exceeds 14.4x burn threshold 0.0144." + - alert: SloAvailFastBurnPayments + expr: > + (slo:error_ratio:rate1h{route="/api/payments/create"} > 0.0072 + and slo:error_ratio:rate5m{route="/api/payments/create"} > 0.0072) + and slo:http_requests:rate1h{route="/api/payments/create"} > 1 + for: 2m + labels: {severity: critical, team: sre, slo_type: availability, burn_window: fast, slo_target: "99.95", environment: staging} + annotations: + summary: "SLO FAST BURN: payments availability (14.4x)" + description: "Payments error ratio {{ $value | printf \"%.4f\" }} exceeds threshold 0.0072." + - alert: SloAvailSlowBurn + expr: > + (slo:error_ratio:rate6h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 0.006 + and slo:error_ratio:rate30m{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 0.006) + and slo:http_requests:rate6h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 1 + for: 5m + labels: {severity: warning, team: sre, slo_type: availability, burn_window: slow, slo_target: "99.9", environment: staging} + annotations: + summary: "SLO SLOW BURN: {{ $labels.method }} {{ $labels.route }} availability (6x)" + description: "6h error ratio {{ $value | printf \"%.4f\" }} exceeds 6x threshold 0.006." + - alert: SloAvailSlowBurnPayments + expr: > + (slo:error_ratio:rate6h{route="/api/payments/create"} > 0.003 + and slo:error_ratio:rate30m{route="/api/payments/create"} > 0.003) + and slo:http_requests:rate6h{route="/api/payments/create"} > 1 + for: 5m + labels: {severity: warning, team: sre, slo_type: availability, burn_window: slow, slo_target: "99.95", environment: staging} + annotations: + summary: "SLO SLOW BURN: payments availability (6x)" + description: "Payments 6h error ratio {{ $value | printf \"%.4f\" }} exceeds threshold 0.003." + - name: slo:latency:burn_rate_alerts + rules: + - alert: SloLatencyFastBurn + expr: > + (slo:latency_good_ratio:rate1h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} < 0.9856 + and slo:latency_good_ratio:rate5m{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} < 0.9856) + and slo:latency_total:rate1h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 1 + for: 2m + labels: {severity: critical, team: sre, slo_type: latency, burn_window: fast, environment: staging} + annotations: + summary: "SLO LATENCY FAST BURN: {{ $labels.method }} {{ $labels.route }} (14.4x)" + description: "Good ratio {{ $value | printf \"%.4f\" }} below 0.9856 threshold." + - alert: SloLatencyFastBurnPayments + expr: > + (slo:latency_good_ratio:rate1h{route="/api/payments/create"} < 0.9928 + and slo:latency_good_ratio:rate5m{route="/api/payments/create"} < 0.9928) + and slo:latency_total:rate1h{route="/api/payments/create"} > 1 + for: 2m + labels: {severity: critical, team: sre, slo_type: latency, burn_window: fast, environment: staging} + annotations: + summary: "SLO LATENCY FAST BURN: payments (14.4x)" + description: "Payments good ratio {{ $value | printf \"%.4f\" }} below 0.9928." + - alert: SloLatencySlowBurn + expr: > + (slo:latency_good_ratio:rate6h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} < 0.994 + and slo:latency_good_ratio:rate5m{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} < 0.994) + and slo:latency_total:rate6h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 1 + for: 5m + labels: {severity: warning, team: sre, slo_type: latency, burn_window: slow, environment: staging} + annotations: + summary: "SLO latency slow burn: {{ $labels.method }} {{ $labels.route }} (6x)" + description: "6h good ratio {{ $value | printf \"%.4f\" }} below 0.994." diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 010a0c5..6709479 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -87,6 +87,15 @@ importers: '@aws-sdk/s3-request-presigner': specifier: ^3.1026.0 version: 3.1026.0 + '@bull-board/api': + specifier: ^7.0.0 + version: 7.0.0(@bull-board/ui@7.0.0) + '@bull-board/express': + specifier: ^7.0.0 + version: 7.0.0 + '@bull-board/nestjs': + specifier: ^7.0.0 + version: 7.0.0(@bull-board/api@7.0.0(@bull-board/ui@7.0.0))(@nestjs/bull-shared@11.0.4(@nestjs/common@11.1.18(class-transformer@0.5.1)(class-validator@0.15.1)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/core@11.1.18))(@nestjs/common@11.1.18(class-transformer@0.5.1)(class-validator@0.15.1)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/core@11.1.18)(reflect-metadata@0.2.2)(rxjs@7.8.2) '@goodgo/mcp-servers': specifier: workspace:* version: link:../../libs/mcp-servers @@ -186,6 +195,9 @@ importers: ioredis: specifier: ^5.4.0 version: 5.10.1 + jsonwebtoken: + specifier: ^9.0.3 + version: 9.0.3 nodemailer: specifier: ^8.0.5 version: 8.0.5 @@ -259,6 +271,9 @@ importers: '@types/express': specifier: ^5.0.0 version: 5.0.6 + '@types/jsonwebtoken': + specifier: ^9.0.10 + version: 9.0.10 '@types/node': specifier: ^25.5.2 version: 25.5.2 @@ -420,6 +435,12 @@ importers: specifier: ^4.1.3 version: 4.1.3(@opentelemetry/api@1.9.1)(@types/node@25.5.2)(jsdom@29.0.2(@noble/hashes@2.0.1))(msw@2.13.2(@types/node@25.5.2)(typescript@6.0.2))(vite@7.3.2(@types/node@25.5.2)(jiti@1.21.7)(terser@5.46.1)(tsx@4.21.0)(yaml@2.8.3)) + libs/contracts/events: + devDependencies: + typescript: + specifier: ^5.5.0 + version: 5.9.3 + libs/mcp-servers: dependencies: '@modelcontextprotocol/sdk': @@ -761,6 +782,27 @@ packages: resolution: {integrity: sha512-ctxtJ/eA+t+6q2++vj5j7FYX3nRu311q1wfYH3xjlLOsczhlhxAg2FWNUXhpGvAw3BWo1xBcvOV6/YLc2r5FJw==} hasBin: true + '@bull-board/api@7.0.0': + resolution: {integrity: sha512-ISNspLHVmUWUSq/eLw+wd1FuBBUnqpLbYP2xUNmehpfKhS+NoZWMbBvqjUYVeE/HLfUkRcR1edzMKpl5n9zlSw==} + peerDependencies: + '@bull-board/ui': 7.0.0 + + '@bull-board/express@7.0.0': + resolution: {integrity: sha512-3Tc/EyU5PQMTcTzcafFSrmRDiEbJBEU/EaVQ5OVYcuJ7DZCp5Pkvm0/2VtaCe2uywdtwn0ZaynlSIpB27FKX6A==} + + '@bull-board/nestjs@7.0.0': + resolution: {integrity: sha512-ypXm0eJHIMQzjN+3fjf84cVxugBg/K4Bpo0eYcV4u/AsteR/dnr6e7F79ICRgg1WWoczqmSMl0JhlmykpyhAMg==} + peerDependencies: + '@bull-board/api': ^7.0.0 + '@nestjs/bull-shared': ^10.0.0 || ^11.0.0 + '@nestjs/common': ^9.0.0 || ^10.0.0 || ^11.0.0 + '@nestjs/core': ^9.0.0 || ^10.0.0 || ^11.0.0 + reflect-metadata: ^0.1.13 || ^0.2.0 + rxjs: ^7.8.1 + + '@bull-board/ui@7.0.0': + resolution: {integrity: sha512-AnKeklpDn0iMFgu4ukDU6uTNmw4oudl07G4k2Fh95SknKDrXSiWRV0N1TGUawMqyfG1Yi5P/W/8d7raBq/Uw6w==} + '@colors/colors@1.5.0': resolution: {integrity: sha512-ooWCrlZP11i8GImSjTHYHLkvFDP48nS4+204nGb1RiX/WXYHmJA2III9/e2DWVabCESdW7hBAEzHRqUn9OUVvQ==} engines: {node: '>=0.1.90'} @@ -4403,6 +4445,11 @@ packages: effect@3.20.0: resolution: {integrity: sha512-qMLfDJscrNG8p/aw+IkT9W7fgj50Z4wG5bLBy0Txsxz8iUHjDIkOgO3SV0WZfnQbNG2VJYb0b+rDLMrhM4+Krw==} + ejs@5.0.2: + resolution: {integrity: sha512-IpbUaI/CAW86l3f+T8zN0iggSc0LmMZLcIW5eRVStLVNCoTXkE0YlncbbH50fp8Cl6zHIky0sW2uUbhBqGw0Jw==} + engines: {node: '>=0.12.18'} + hasBin: true + electron-to-chromium@1.5.332: resolution: {integrity: sha512-7OOtytmh/rINMLwaFTbcMVvYXO3AUm029X0LcyfYk0B557RlPkdpTpnH9+htMlfu5dKwOmT0+Zs2Aw+lnn6TeQ==} @@ -6298,6 +6345,9 @@ packages: resolution: {integrity: sha512-1qny3OExCf0UvUV/5wpYKf2YwPcOqXzkwKKSmKHiE6ZMQs5heeE/c8eXK+PNllPvmjgAbfnsbpkGZWy8cBpn9w==} engines: {node: '>=4'} + redis-info@3.1.0: + resolution: {integrity: sha512-ER4L9Sh/vm63DkIE0bkSjxluQlioBiBgf5w1UuldaW/3vPcecdljVDisZhmnCMvsxHNiARTTDDHGg9cGwTfrKg==} + redis-parser@3.0.0: resolution: {integrity: sha512-DJnGAeenTdpMEH6uAJRK/uiyEIH9WVsUmoLwzudwGJUwZPp80PDBWPHXSAGNPwNvIXAbe7MSUB1zQFugFml66A==} engines: {node: '>=4'} @@ -8010,6 +8060,33 @@ snapshots: dependencies: css-tree: 3.2.1 + '@bull-board/api@7.0.0(@bull-board/ui@7.0.0)': + dependencies: + '@bull-board/ui': 7.0.0 + redis-info: 3.1.0 + + '@bull-board/express@7.0.0': + dependencies: + '@bull-board/api': 7.0.0(@bull-board/ui@7.0.0) + '@bull-board/ui': 7.0.0 + ejs: 5.0.2 + express: 5.2.1 + transitivePeerDependencies: + - supports-color + + '@bull-board/nestjs@7.0.0(@bull-board/api@7.0.0(@bull-board/ui@7.0.0))(@nestjs/bull-shared@11.0.4(@nestjs/common@11.1.18(class-transformer@0.5.1)(class-validator@0.15.1)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/core@11.1.18))(@nestjs/common@11.1.18(class-transformer@0.5.1)(class-validator@0.15.1)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/core@11.1.18)(reflect-metadata@0.2.2)(rxjs@7.8.2)': + dependencies: + '@bull-board/api': 7.0.0(@bull-board/ui@7.0.0) + '@nestjs/bull-shared': 11.0.4(@nestjs/common@11.1.18(class-transformer@0.5.1)(class-validator@0.15.1)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/core@11.1.18) + '@nestjs/common': 11.1.18(class-transformer@0.5.1)(class-validator@0.15.1)(reflect-metadata@0.2.2)(rxjs@7.8.2) + '@nestjs/core': 11.1.18(@nestjs/common@11.1.18(class-transformer@0.5.1)(class-validator@0.15.1)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/platform-express@11.1.18)(@nestjs/websockets@11.1.19)(reflect-metadata@0.2.2)(rxjs@7.8.2) + reflect-metadata: 0.2.2 + rxjs: 7.8.2 + + '@bull-board/ui@7.0.0': + dependencies: + '@bull-board/api': 7.0.0(@bull-board/ui@7.0.0) + '@colors/colors@1.5.0': optional: true @@ -11771,6 +11848,8 @@ snapshots: '@standard-schema/spec': 1.1.0 fast-check: 3.23.2 + ejs@5.0.2: {} + electron-to-chromium@1.5.332: {} emoji-regex@10.6.0: {} @@ -13890,6 +13969,10 @@ snapshots: redis-errors@1.2.0: {} + redis-info@3.1.0: + dependencies: + lodash: 4.18.1 + redis-parser@3.0.0: dependencies: redis-errors: 1.2.0 diff --git a/prisma/migrations/20260429000000_add_mfa_grace_columns/migration.sql b/prisma/migrations/20260429000000_add_mfa_grace_columns/migration.sql new file mode 100644 index 0000000..e1dcf09 --- /dev/null +++ b/prisma/migrations/20260429000000_add_mfa_grace_columns/migration.sql @@ -0,0 +1,7 @@ +-- Add MFA grace period + last-verified columns to support +-- enrollment grace window for MFA-required roles (currently ADMIN) +-- and re-auth checks for sensitive admin operations. + +ALTER TABLE "User" + ADD COLUMN "mfaGraceStartedAt" TIMESTAMP(3), + ADD COLUMN "mfaLastVerifiedAt" TIMESTAMP(3); diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 355ac84..d6e4bbd 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -56,10 +56,17 @@ model User { updatedAt DateTime @updatedAt // MFA fields - totpSecret String? // Encrypted TOTP secret - totpEnabled Boolean @default(false) - totpBackupCodes String[] // Bcrypt-hashed backup codes - totpEnabledAt DateTime? + totpSecret String? // Encrypted TOTP secret + totpEnabled Boolean @default(false) + totpBackupCodes String[] // Bcrypt-hashed backup codes + totpEnabledAt DateTime? + /// First login under MFA enforcement when the user had not yet enrolled. + /// Used to compute the remaining grace period before enrollment becomes + /// mandatory for roles in MFA_REQUIRED_ROLES (currently ADMIN). + mfaGraceStartedAt DateTime? + /// Last successful MFA verification (TOTP or backup code). Used by the + /// admin re-auth interceptor for sensitive operations. + mfaLastVerifiedAt DateTime? agent Agent? listings Listing[]