diff --git a/apps/api/package.json b/apps/api/package.json index a87a1b1..7b6e2fc 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -52,6 +52,7 @@ "handlebars": "^4.7.9", "helmet": "^8.1.0", "ioredis": "^5.4.0", + "jsonwebtoken": "^9.0.3", "nodemailer": "^8.0.5", "otplib": "^13.4.0", "passport": "^0.7.0", @@ -78,6 +79,7 @@ "@types/bcrypt": "^6.0.0", "@types/cookie-parser": "^1.4.10", "@types/express": "^5.0.0", + "@types/jsonwebtoken": "^9.0.10", "@types/node": "^25.5.2", "@types/nodemailer": "^8.0.0", "@types/passport-google-oauth20": "^2.0.17", diff --git a/apps/api/src/modules/auth/application/__tests__/login-user.handler.spec.ts b/apps/api/src/modules/auth/application/__tests__/login-user.handler.spec.ts index 18b6319..276fc06 100644 --- a/apps/api/src/modules/auth/application/__tests__/login-user.handler.spec.ts +++ b/apps/api/src/modules/auth/application/__tests__/login-user.handler.spec.ts @@ -5,6 +5,8 @@ describe('LoginUserHandler', () => { let handler: LoginUserHandler; let mockTokenService: { generateTokenPair: ReturnType }; let mockChallengeRepo: { create: ReturnType }; + let mockUserRepo: { updateMfaGraceStartedAt: ReturnType }; + let mockLogger: { error: ReturnType; warn: ReturnType }; const tokenPair = { accessToken: 'access-jwt', @@ -15,22 +17,30 @@ describe('LoginUserHandler', () => { beforeEach(() => { mockTokenService = { generateTokenPair: vi.fn().mockResolvedValue(tokenPair) }; mockChallengeRepo = { create: vi.fn().mockResolvedValue({}) }; - handler = new LoginUserHandler(mockTokenService as any, mockChallengeRepo as any); + mockUserRepo = { updateMfaGraceStartedAt: vi.fn().mockResolvedValue(undefined) }; + mockLogger = { error: vi.fn(), warn: vi.fn() }; + handler = new LoginUserHandler( + mockTokenService as any, + mockChallengeRepo as any, + mockUserRepo as any, + mockLogger as any, + ); }); - it('generates token pair with correct payload when MFA not required', async () => { + it('generates token pair with mfa=none for non-required role when MFA not required', async () => { const command = new LoginUserCommand('user-1', '0912345678', 'BUYER', false); const result = await handler.execute(command); - expect(result).toEqual({ requiresMfa: false, tokens: tokenPair }); + expect(result).toEqual({ requiresMfa: false, tokens: tokenPair, mfaGraceRemainingDays: undefined }); expect(mockTokenService.generateTokenPair).toHaveBeenCalledWith({ sub: 'user-1', phone: '0912345678', role: 'BUYER', + mfa: 'none', }); }); - it('creates MFA challenge when MFA is required', async () => { + it('creates MFA challenge when MFA is required (user already enrolled)', async () => { const command = new LoginUserCommand('user-1', '0912345678', 'BUYER', true); const result = await handler.execute(command); @@ -49,7 +59,7 @@ describe('LoginUserHandler', () => { ); }); - it('passes AGENT role correctly', async () => { + it('AGENT role does not require MFA — issues mfa=none claim', async () => { const command = new LoginUserCommand('user-2', '0987654321', 'AGENT'); await handler.execute(command); @@ -57,17 +67,51 @@ describe('LoginUserHandler', () => { sub: 'user-2', phone: '0987654321', role: 'AGENT', + mfa: 'none', }); }); - it('passes ADMIN role correctly', async () => { - const command = new LoginUserCommand('admin-1', '0901234567', 'ADMIN'); - await handler.execute(command); + it('ADMIN without TOTP enters grace period on first login under enforcement', async () => { + const command = new LoginUserCommand( + 'admin-1', + '0901234567', + 'ADMIN', + false, + false, // totpEnabled + null, // mfaGraceStartedAt — first login + ); + const result = await handler.execute(command); + // Grace was started lazily + expect(mockUserRepo.updateMfaGraceStartedAt).toHaveBeenCalledWith('admin-1', expect.any(Date)); + expect(result.mfaGraceRemainingDays).toBe(14); expect(mockTokenService.generateTokenPair).toHaveBeenCalledWith({ sub: 'admin-1', phone: '0901234567', role: 'ADMIN', + mfa: 'grace', + }); + }); + + it('ADMIN past grace window receives mfa=enrollment_required claim', async () => { + const longAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000); // 30 days ago + const command = new LoginUserCommand( + 'admin-1', + '0901234567', + 'ADMIN', + false, + false, + longAgo, + ); + const result = await handler.execute(command); + + expect(mockUserRepo.updateMfaGraceStartedAt).not.toHaveBeenCalled(); + expect(result.mfaGraceRemainingDays).toBe(0); + expect(mockTokenService.generateTokenPair).toHaveBeenCalledWith({ + sub: 'admin-1', + phone: '0901234567', + role: 'ADMIN', + mfa: 'enrollment_required', }); }); }); diff --git a/apps/api/src/modules/auth/application/commands/login-user/login-user.command.ts b/apps/api/src/modules/auth/application/commands/login-user/login-user.command.ts index 46422df..0e36eff 100644 --- a/apps/api/src/modules/auth/application/commands/login-user/login-user.command.ts +++ b/apps/api/src/modules/auth/application/commands/login-user/login-user.command.ts @@ -4,5 +4,7 @@ export class LoginUserCommand { public readonly phone: string, public readonly role: string, public readonly isMfaRequired: boolean = false, + public readonly totpEnabled: boolean = false, + public readonly mfaGraceStartedAt: Date | null = null, ) {} } diff --git a/apps/api/src/modules/auth/application/commands/login-user/login-user.handler.ts b/apps/api/src/modules/auth/application/commands/login-user/login-user.handler.ts index bcca12a..349d247 100644 --- a/apps/api/src/modules/auth/application/commands/login-user/login-user.handler.ts +++ b/apps/api/src/modules/auth/application/commands/login-user/login-user.handler.ts @@ -1,12 +1,18 @@ import { Inject, InternalServerErrorException } from '@nestjs/common'; import { CommandHandler, type ICommandHandler } from '@nestjs/cqrs'; +import { type UserRole } from '@prisma/client'; import { createId } from '@paralleldrive/cuid2'; import { LoggerService, DomainException } from '@modules/shared'; +import { MFA_GRACE_PERIOD_DAYS, MFA_REQUIRED_ROLES } from '../../../domain/mfa-policy'; import { MFA_CHALLENGE_REPOSITORY, type IMfaChallengeRepository, } from '../../../domain/repositories/mfa-challenge.repository'; -import { TokenService, type TokenPair } from '../../../infrastructure/services/token.service'; +import { + USER_REPOSITORY, + type IUserRepository, +} from '../../../domain/repositories/user.repository'; +import { TokenService, type MfaClaim, type TokenPair } from '../../../infrastructure/services/token.service'; import { LoginUserCommand } from './login-user.command'; const MFA_CHALLENGE_TTL_MINUTES = 5; @@ -15,6 +21,7 @@ export interface LoginResult { requiresMfa: boolean; challengeId?: string; tokens?: TokenPair; + mfaGraceRemainingDays?: number; } @CommandHandler(LoginUserCommand) @@ -23,12 +30,14 @@ export class LoginUserHandler implements ICommandHandler { private readonly tokenService: TokenService, @Inject(MFA_CHALLENGE_REPOSITORY) private readonly challengeRepo: IMfaChallengeRepository, + @Inject(USER_REPOSITORY) + private readonly userRepo: IUserRepository, private readonly logger: LoggerService, ) {} async execute(command: LoginUserCommand): Promise { try { - // If MFA is required, create a challenge instead of tokens + // If MFA is required (user already enrolled), create a challenge if (command.isMfaRequired) { const challengeId = createId(); const expiresAt = new Date(); @@ -50,16 +59,32 @@ export class LoginUserHandler implements ICommandHandler { }; } - // No MFA — issue tokens directly + // Determine MFA claim for non-enrolled users + const roleRequiresMfa = MFA_REQUIRED_ROLES.includes(command.role as UserRole); + + let mfaClaim: MfaClaim = 'none'; + let mfaGraceRemainingDays: number | undefined; + + if (roleRequiresMfa && !command.totpEnabled) { + const result = await this.resolveMfaGraceClaim( + command.userId, + command.mfaGraceStartedAt, + ); + mfaClaim = result.claim; + mfaGraceRemainingDays = result.remainingDays; + } + const tokens = await this.tokenService.generateTokenPair({ sub: command.userId, phone: command.phone, role: command.role, + mfa: mfaClaim, }); return { requiresMfa: false, tokens, + mfaGraceRemainingDays, }; } catch (error) { if (error instanceof DomainException) throw error; @@ -71,5 +96,33 @@ export class LoginUserHandler implements ICommandHandler { throw new InternalServerErrorException('Không thể tạo phiên đăng nhập, vui lòng thử lại'); } } -} + /** + * Lazy-initialises mfaGraceStartedAt if the role requires MFA but + * the user hasn't enrolled yet. Returns the appropriate MFA claim + * and the number of grace days remaining (if any). + */ + private async resolveMfaGraceClaim( + userId: string, + mfaGraceStartedAt: Date | null, + ): Promise<{ claim: MfaClaim; remainingDays?: number }> { + const now = new Date(); + + if (!mfaGraceStartedAt) { + // First login since enforcement — start the grace period + await this.userRepo.updateMfaGraceStartedAt(userId, now); + return { claim: 'grace', remainingDays: MFA_GRACE_PERIOD_DAYS }; + } + + const elapsedMs = now.getTime() - mfaGraceStartedAt.getTime(); + const elapsedDays = elapsedMs / (1000 * 60 * 60 * 24); + const remainingDays = Math.max(0, Math.ceil(MFA_GRACE_PERIOD_DAYS - elapsedDays)); + + if (remainingDays > 0) { + return { claim: 'grace', remainingDays }; + } + + // Grace period expired — enrollment is now mandatory + return { claim: 'enrollment_required', remainingDays: 0 }; + } +} diff --git a/apps/api/src/modules/auth/domain/entities/user.entity.ts b/apps/api/src/modules/auth/domain/entities/user.entity.ts index ecd5180..b42b627 100644 --- a/apps/api/src/modules/auth/domain/entities/user.entity.ts +++ b/apps/api/src/modules/auth/domain/entities/user.entity.ts @@ -22,6 +22,8 @@ export interface UserProps { totpEnabled: boolean; totpBackupCodes: string[]; totpEnabledAt: Date | null; + mfaGraceStartedAt: Date | null; + mfaLastVerifiedAt: Date | null; } export class UserEntity extends AggregateRoot { @@ -39,6 +41,8 @@ export class UserEntity extends AggregateRoot { private _totpEnabled: boolean; private _totpBackupCodes: string[]; private _totpEnabledAt: Date | null; + private _mfaGraceStartedAt: Date | null; + private _mfaLastVerifiedAt: Date | null; constructor(id: string, props: UserProps, createdAt?: Date, updatedAt?: Date) { super(id, createdAt, updatedAt); @@ -56,6 +60,8 @@ export class UserEntity extends AggregateRoot { this._totpEnabled = props.totpEnabled; this._totpBackupCodes = props.totpBackupCodes; this._totpEnabledAt = props.totpEnabledAt; + this._mfaGraceStartedAt = props.mfaGraceStartedAt; + this._mfaLastVerifiedAt = props.mfaLastVerifiedAt; } get email(): Email | null { return this._email; } @@ -72,6 +78,8 @@ export class UserEntity extends AggregateRoot { get totpEnabled(): boolean { return this._totpEnabled; } get totpBackupCodes(): string[] { return this._totpBackupCodes; } get totpEnabledAt(): Date | null { return this._totpEnabledAt; } + get mfaGraceStartedAt(): Date | null { return this._mfaGraceStartedAt; } + get mfaLastVerifiedAt(): Date | null { return this._mfaLastVerifiedAt; } static createNew( id: string, @@ -96,6 +104,8 @@ export class UserEntity extends AggregateRoot { totpEnabled: false, totpBackupCodes: [], totpEnabledAt: null, + mfaGraceStartedAt: null, + mfaLastVerifiedAt: null, }); user.addDomainEvent(new UserRegisteredEvent(id, phone.value, role)); @@ -133,6 +143,8 @@ export class UserEntity extends AggregateRoot { totpEnabled: false, totpBackupCodes: [], totpEnabledAt: null, + mfaGraceStartedAt: null, + mfaLastVerifiedAt: null, }); user.addDomainEvent(new UserRegisteredEvent(id, phone.value, role)); diff --git a/apps/api/src/modules/auth/domain/mfa-policy.ts b/apps/api/src/modules/auth/domain/mfa-policy.ts new file mode 100644 index 0000000..f515d60 --- /dev/null +++ b/apps/api/src/modules/auth/domain/mfa-policy.ts @@ -0,0 +1,28 @@ +import { UserRole } from '@prisma/client'; + +/** + * MFA enrolment policy — central source of truth for which roles require + * TOTP and how long the grace period lasts. + * + * Backed by `User.mfaGraceStartedAt` and `User.mfaLastVerifiedAt` columns. + * + * Policy summary: + * - On first login under enforcement, `mfaGraceStartedAt` is stamped. + * - For `MFA_GRACE_PERIOD_DAYS` after that timestamp, the user keeps full + * access but receives `mfa: 'grace'` in their JWT (UI nudges enrollment). + * - After grace expires, the JWT carries `mfa: 'enrollment_required'` and + * sensitive routes (admin guards) reject until the user enrols. + */ + +/** Roles for which TOTP is mandatory after the grace window expires. */ +export const MFA_REQUIRED_ROLES: ReadonlyArray = ['ADMIN']; + +/** Length of the grace window before MFA enrolment becomes mandatory. */ +export const MFA_GRACE_PERIOD_DAYS = 14; + +/** + * Re-auth window for "step-up" admin operations (e.g. user impersonation, + * mass actions). After this many minutes since `mfaLastVerifiedAt`, the + * admin re-auth interceptor must challenge again. + */ +export const MFA_REAUTH_WINDOW_MINUTES = 15; diff --git a/apps/api/src/modules/auth/domain/repositories/user.repository.ts b/apps/api/src/modules/auth/domain/repositories/user.repository.ts index d916afd..65cc91b 100644 --- a/apps/api/src/modules/auth/domain/repositories/user.repository.ts +++ b/apps/api/src/modules/auth/domain/repositories/user.repository.ts @@ -12,4 +12,6 @@ export interface IUserRepository { updateMfaEnabled(userId: string, enabled: boolean, secret: string, backupCodes: string[]): Promise; updateMfaDisabled(userId: string): Promise; updateBackupCodes(userId: string, backupCodes: string[]): Promise; + updateMfaGraceStartedAt(userId: string, date: Date): Promise; + updateMfaLastVerifiedAt(userId: string, date: Date): Promise; } diff --git a/apps/api/src/modules/auth/infrastructure/__tests__/local.strategy.spec.ts b/apps/api/src/modules/auth/infrastructure/__tests__/local.strategy.spec.ts index 8d01bd1..10a89af 100644 --- a/apps/api/src/modules/auth/infrastructure/__tests__/local.strategy.spec.ts +++ b/apps/api/src/modules/auth/infrastructure/__tests__/local.strategy.spec.ts @@ -160,6 +160,8 @@ describe('LocalStrategy', () => { phone: '+84912345678', role: 'BUYER', isMfaRequired: false, + totpEnabled: false, + mfaGraceStartedAt: undefined, }); }); diff --git a/apps/api/src/modules/auth/infrastructure/repositories/prisma-user.repository.ts b/apps/api/src/modules/auth/infrastructure/repositories/prisma-user.repository.ts index 91e591a..35d22c9 100644 --- a/apps/api/src/modules/auth/infrastructure/repositories/prisma-user.repository.ts +++ b/apps/api/src/modules/auth/infrastructure/repositories/prisma-user.repository.ts @@ -123,6 +123,14 @@ export class PrismaUserRepository implements IUserRepository { }); } + async updateMfaGraceStartedAt(userId: string, date: Date): Promise { + await this.prisma.user.update({ where: { id: userId }, data: { mfaGraceStartedAt: date } }); + } + + async updateMfaLastVerifiedAt(userId: string, date: Date): Promise { + await this.prisma.user.update({ where: { id: userId }, data: { mfaLastVerifiedAt: date } }); + } + private toDomain(raw: PrismaUser): UserEntity { const phone = Phone.create(raw.phone).unwrap(); const email = raw.email ? Email.create(raw.email).unwrap() : null; @@ -145,6 +153,8 @@ export class PrismaUserRepository implements IUserRepository { totpEnabled: raw.totpEnabled, totpBackupCodes: raw.totpBackupCodes, totpEnabledAt: raw.totpEnabledAt, + mfaGraceStartedAt: raw.mfaGraceStartedAt, + mfaLastVerifiedAt: raw.mfaLastVerifiedAt, }; return new UserEntity(raw.id, props, raw.createdAt, raw.updatedAt); diff --git a/apps/api/src/modules/auth/infrastructure/services/oauth.service.ts b/apps/api/src/modules/auth/infrastructure/services/oauth.service.ts index d19e482..31c1d53 100644 --- a/apps/api/src/modules/auth/infrastructure/services/oauth.service.ts +++ b/apps/api/src/modules/auth/infrastructure/services/oauth.service.ts @@ -121,10 +121,13 @@ export class OAuthService { kycStatus: 'NONE', kycData: null, isActive: true, + deletedAt: null, totpSecret: null, totpEnabled: false, totpBackupCodes: [], totpEnabledAt: null, + mfaGraceStartedAt: null, + mfaLastVerifiedAt: null, }); await this.userRepo.save(user); diff --git a/apps/api/src/modules/auth/infrastructure/services/token.service.ts b/apps/api/src/modules/auth/infrastructure/services/token.service.ts index b43b23f..13cc91f 100644 --- a/apps/api/src/modules/auth/infrastructure/services/token.service.ts +++ b/apps/api/src/modules/auth/infrastructure/services/token.service.ts @@ -7,10 +7,23 @@ import { } from '../../domain/repositories/refresh-token.repository'; import { verifyWithRotation } from '../utils/jwt-rotation'; +/** + * MFA enrolment status carried inside the access-token JWT. + * + * - `none` — role does not require MFA, or user is enrolled and + * has just verified (`requiresMfa === true` flow). + * - `grace` — role requires MFA but the user is inside the + * enforcement grace window. UI nudges enrollment. + * - `enrollment_required`— grace window has expired; backend guards on + * sensitive routes must reject and force enrollment. + */ +export type MfaClaim = 'none' | 'grace' | 'enrollment_required'; + export interface JwtPayload { sub: string; phone: string; role: string; + mfa?: MfaClaim; } export interface TokenPair { diff --git a/apps/api/src/modules/auth/infrastructure/strategies/local.strategy.ts b/apps/api/src/modules/auth/infrastructure/strategies/local.strategy.ts index d222a81..fa1b5b3 100644 --- a/apps/api/src/modules/auth/infrastructure/strategies/local.strategy.ts +++ b/apps/api/src/modules/auth/infrastructure/strategies/local.strategy.ts @@ -9,6 +9,8 @@ export interface LocalStrategyResult { phone: string; role: string; isMfaRequired: boolean; + totpEnabled: boolean; + mfaGraceStartedAt: Date | null; } @Injectable() @@ -56,6 +58,8 @@ export class LocalStrategy extends PassportStrategy(Strategy) { phone: user.phone.value, role: user.role, isMfaRequired: user.totpEnabled, + totpEnabled: user.totpEnabled, + mfaGraceStartedAt: user.mfaGraceStartedAt, }; } catch (error) { if (error instanceof DomainException) throw error; diff --git a/docs/audits/slo-soak-test-log.md b/docs/audits/slo-soak-test-log.md new file mode 100644 index 0000000..6df6276 --- /dev/null +++ b/docs/audits/slo-soak-test-log.md @@ -0,0 +1,12 @@ +# SLO Staging Soak Test Log — GOO-227 + +**Period**: 2026-04-26 → 2026-05-03 (7 days) +**Config**: `monitoring/prometheus/slo-rules.yml` + +## Endpoints: listings (99.9%), listings/:id (99.9%), payments (99.95%), auth (99.9%), search (99.9%) + +## Daily Log +Fill each day with: Time | Alert | Endpoint | Window | Value | TP/FP | Action + +## Summary (end of soak) +Total alerts: _ | TP: _ | FP: _ | Recommendation: [ ] Prod ready / [ ] More tuning diff --git a/monitoring/alertmanager/alertmanager.yml b/monitoring/alertmanager/alertmanager.yml index 2c14de7..5fcd19f 100644 --- a/monitoring/alertmanager/alertmanager.yml +++ b/monitoring/alertmanager/alertmanager.yml @@ -31,6 +31,17 @@ route: repeat_interval: 4h routes: + # Staging SLO soak — burn-rate alerts to Slack only, no pager + - matchers: + - environment = staging + - slo_type =~ "availability|latency" + receiver: 'slack-sre-staging-soak' + group_by: ['alertname', 'route', 'burn_window'] + group_wait: 15s + group_interval: 5m + repeat_interval: 30m + continue: false + # Critical alerts — immediate notification, shorter repeat - matchers: - severity = critical @@ -77,6 +88,17 @@ receivers: {{ if .Annotations.runbook_url }}*Runbook:* {{ .Annotations.runbook_url }}{{ end }} {{ end }} + - name: 'slack-sre-staging-soak' + slack_configs: + - channel: '#sre-staging-soak' + send_resolved: true + title: 'SOAK {{ .CommonLabels.alertname }}' + text: >- + Route: {{ .CommonLabels.method }} {{ .CommonLabels.route }} + Burn: {{ .CommonLabels.burn_window }} | {{ .CommonLabels.severity }} + {{ range .Alerts }}{{ .Annotations.summary }}{{ end }} + Staging soak — NOT paging. + - name: 'slack-infrastructure' slack_configs: - channel: '#infrastructure' diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml index 3deee0f..968d2f0 100644 --- a/monitoring/prometheus/prometheus.yml +++ b/monitoring/prometheus/prometheus.yml @@ -4,6 +4,7 @@ global: rule_files: - 'alert-rules.yml' + - 'slo-rules.yml' alerting: alertmanagers: diff --git a/monitoring/prometheus/slo-rules.yml b/monitoring/prometheus/slo-rules.yml new file mode 100644 index 0000000..2d93495 --- /dev/null +++ b/monitoring/prometheus/slo-rules.yml @@ -0,0 +1,150 @@ +groups: + - name: slo:availability:recording + interval: 30s + rules: + - record: slo:http_requests:rate5m + expr: sum(rate(http_requests_total{job="goodgo-api"}[5m])) by (route, method) + - record: slo:http_errors:rate5m + expr: sum(rate(http_requests_total{job="goodgo-api", status_code=~"5.."}[5m])) by (route, method) + - record: slo:error_ratio:rate5m + expr: slo:http_errors:rate5m / slo:http_requests:rate5m + - record: slo:http_requests:rate30m + expr: sum(rate(http_requests_total{job="goodgo-api"}[30m])) by (route, method) + - record: slo:http_errors:rate30m + expr: sum(rate(http_requests_total{job="goodgo-api", status_code=~"5.."}[30m])) by (route, method) + - record: slo:error_ratio:rate30m + expr: slo:http_errors:rate30m / slo:http_requests:rate30m + - record: slo:http_requests:rate1h + expr: sum(rate(http_requests_total{job="goodgo-api"}[1h])) by (route, method) + - record: slo:http_errors:rate1h + expr: sum(rate(http_requests_total{job="goodgo-api", status_code=~"5.."}[1h])) by (route, method) + - record: slo:error_ratio:rate1h + expr: slo:http_errors:rate1h / slo:http_requests:rate1h + - record: slo:http_requests:rate6h + expr: sum(rate(http_requests_total{job="goodgo-api"}[6h])) by (route, method) + - record: slo:http_errors:rate6h + expr: sum(rate(http_requests_total{job="goodgo-api", status_code=~"5.."}[6h])) by (route, method) + - record: slo:error_ratio:rate6h + expr: slo:http_errors:rate6h / slo:http_requests:rate6h + - record: slo:http_requests:rate1d + expr: sum(rate(http_requests_total{job="goodgo-api"}[1d])) by (route, method) + - record: slo:http_errors:rate1d + expr: sum(rate(http_requests_total{job="goodgo-api", status_code=~"5.."}[1d])) by (route, method) + - record: slo:error_ratio:rate1d + expr: slo:http_errors:rate1d / slo:http_requests:rate1d + - record: slo:http_requests:rate3d + expr: sum(rate(http_requests_total{job="goodgo-api"}[3d])) by (route, method) + - record: slo:http_errors:rate3d + expr: sum(rate(http_requests_total{job="goodgo-api", status_code=~"5.."}[3d])) by (route, method) + - record: slo:error_ratio:rate3d + expr: slo:http_errors:rate3d / slo:http_requests:rate3d + - name: slo:latency:recording + interval: 30s + rules: + - record: slo:latency_good:rate5m + expr: > + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/listings", le="0.5"}[5m])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/listings/:id", le="0.25"}[5m])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/payments/create", le="1"}[5m])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/auth/login", le="0.5"}[5m])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/search", le="0.5"}[5m])) by (route, method) + - record: slo:latency_total:rate5m + expr: sum(rate(goodgo_api_request_duration_seconds_count{job="goodgo-api", route=~"/api/listings|/api/listings/:id|/api/payments/create|/api/auth/login|/api/search"}[5m])) by (route, method) + - record: slo:latency_good_ratio:rate5m + expr: slo:latency_good:rate5m / slo:latency_total:rate5m + - record: slo:latency_good:rate1h + expr: > + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/listings", le="0.5"}[1h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/listings/:id", le="0.25"}[1h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/payments/create", le="1"}[1h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/auth/login", le="0.5"}[1h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/search", le="0.5"}[1h])) by (route, method) + - record: slo:latency_total:rate1h + expr: sum(rate(goodgo_api_request_duration_seconds_count{job="goodgo-api", route=~"/api/listings|/api/listings/:id|/api/payments/create|/api/auth/login|/api/search"}[1h])) by (route, method) + - record: slo:latency_good_ratio:rate1h + expr: slo:latency_good:rate1h / slo:latency_total:rate1h + - record: slo:latency_good:rate6h + expr: > + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/listings", le="0.5"}[6h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/listings/:id", le="0.25"}[6h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/payments/create", le="1"}[6h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/auth/login", le="0.5"}[6h])) by (route, method) or + sum(rate(goodgo_api_request_duration_seconds_bucket{job="goodgo-api", route="/api/search", le="0.5"}[6h])) by (route, method) + - record: slo:latency_total:rate6h + expr: sum(rate(goodgo_api_request_duration_seconds_count{job="goodgo-api", route=~"/api/listings|/api/listings/:id|/api/payments/create|/api/auth/login|/api/search"}[6h])) by (route, method) + - record: slo:latency_good_ratio:rate6h + expr: slo:latency_good:rate6h / slo:latency_total:rate6h + - name: slo:availability:burn_rate_alerts + rules: + - alert: SloAvailFastBurn + expr: > + (slo:error_ratio:rate1h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 0.0144 + and slo:error_ratio:rate5m{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 0.0144) + and slo:http_requests:rate1h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 1 + for: 2m + labels: {severity: critical, team: sre, slo_type: availability, burn_window: fast, slo_target: "99.9", environment: staging} + annotations: + summary: "SLO FAST BURN: {{ $labels.method }} {{ $labels.route }} availability (14.4x)" + description: "Error ratio {{ $value | printf \"%.4f\" }} exceeds 14.4x burn threshold 0.0144." + - alert: SloAvailFastBurnPayments + expr: > + (slo:error_ratio:rate1h{route="/api/payments/create"} > 0.0072 + and slo:error_ratio:rate5m{route="/api/payments/create"} > 0.0072) + and slo:http_requests:rate1h{route="/api/payments/create"} > 1 + for: 2m + labels: {severity: critical, team: sre, slo_type: availability, burn_window: fast, slo_target: "99.95", environment: staging} + annotations: + summary: "SLO FAST BURN: payments availability (14.4x)" + description: "Payments error ratio {{ $value | printf \"%.4f\" }} exceeds threshold 0.0072." + - alert: SloAvailSlowBurn + expr: > + (slo:error_ratio:rate6h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 0.006 + and slo:error_ratio:rate30m{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 0.006) + and slo:http_requests:rate6h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 1 + for: 5m + labels: {severity: warning, team: sre, slo_type: availability, burn_window: slow, slo_target: "99.9", environment: staging} + annotations: + summary: "SLO SLOW BURN: {{ $labels.method }} {{ $labels.route }} availability (6x)" + description: "6h error ratio {{ $value | printf \"%.4f\" }} exceeds 6x threshold 0.006." + - alert: SloAvailSlowBurnPayments + expr: > + (slo:error_ratio:rate6h{route="/api/payments/create"} > 0.003 + and slo:error_ratio:rate30m{route="/api/payments/create"} > 0.003) + and slo:http_requests:rate6h{route="/api/payments/create"} > 1 + for: 5m + labels: {severity: warning, team: sre, slo_type: availability, burn_window: slow, slo_target: "99.95", environment: staging} + annotations: + summary: "SLO SLOW BURN: payments availability (6x)" + description: "Payments 6h error ratio {{ $value | printf \"%.4f\" }} exceeds threshold 0.003." + - name: slo:latency:burn_rate_alerts + rules: + - alert: SloLatencyFastBurn + expr: > + (slo:latency_good_ratio:rate1h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} < 0.9856 + and slo:latency_good_ratio:rate5m{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} < 0.9856) + and slo:latency_total:rate1h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 1 + for: 2m + labels: {severity: critical, team: sre, slo_type: latency, burn_window: fast, environment: staging} + annotations: + summary: "SLO LATENCY FAST BURN: {{ $labels.method }} {{ $labels.route }} (14.4x)" + description: "Good ratio {{ $value | printf \"%.4f\" }} below 0.9856 threshold." + - alert: SloLatencyFastBurnPayments + expr: > + (slo:latency_good_ratio:rate1h{route="/api/payments/create"} < 0.9928 + and slo:latency_good_ratio:rate5m{route="/api/payments/create"} < 0.9928) + and slo:latency_total:rate1h{route="/api/payments/create"} > 1 + for: 2m + labels: {severity: critical, team: sre, slo_type: latency, burn_window: fast, environment: staging} + annotations: + summary: "SLO LATENCY FAST BURN: payments (14.4x)" + description: "Payments good ratio {{ $value | printf \"%.4f\" }} below 0.9928." + - alert: SloLatencySlowBurn + expr: > + (slo:latency_good_ratio:rate6h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} < 0.994 + and slo:latency_good_ratio:rate5m{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} < 0.994) + and slo:latency_total:rate6h{route=~"/api/listings|/api/listings/:id|/api/auth/login|/api/search"} > 1 + for: 5m + labels: {severity: warning, team: sre, slo_type: latency, burn_window: slow, environment: staging} + annotations: + summary: "SLO latency slow burn: {{ $labels.method }} {{ $labels.route }} (6x)" + description: "6h good ratio {{ $value | printf \"%.4f\" }} below 0.994." diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4db4f63..6709479 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -195,6 +195,9 @@ importers: ioredis: specifier: ^5.4.0 version: 5.10.1 + jsonwebtoken: + specifier: ^9.0.3 + version: 9.0.3 nodemailer: specifier: ^8.0.5 version: 8.0.5 @@ -268,6 +271,9 @@ importers: '@types/express': specifier: ^5.0.0 version: 5.0.6 + '@types/jsonwebtoken': + specifier: ^9.0.10 + version: 9.0.10 '@types/node': specifier: ^25.5.2 version: 25.5.2 diff --git a/prisma/migrations/20260429000000_add_mfa_grace_columns/migration.sql b/prisma/migrations/20260429000000_add_mfa_grace_columns/migration.sql new file mode 100644 index 0000000..e1dcf09 --- /dev/null +++ b/prisma/migrations/20260429000000_add_mfa_grace_columns/migration.sql @@ -0,0 +1,7 @@ +-- Add MFA grace period + last-verified columns to support +-- enrollment grace window for MFA-required roles (currently ADMIN) +-- and re-auth checks for sensitive admin operations. + +ALTER TABLE "User" + ADD COLUMN "mfaGraceStartedAt" TIMESTAMP(3), + ADD COLUMN "mfaLastVerifiedAt" TIMESTAMP(3); diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 64a1c0b..6947989 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -56,10 +56,17 @@ model User { updatedAt DateTime @updatedAt // MFA fields - totpSecret String? // Encrypted TOTP secret - totpEnabled Boolean @default(false) - totpBackupCodes String[] // Bcrypt-hashed backup codes - totpEnabledAt DateTime? + totpSecret String? // Encrypted TOTP secret + totpEnabled Boolean @default(false) + totpBackupCodes String[] // Bcrypt-hashed backup codes + totpEnabledAt DateTime? + /// First login under MFA enforcement when the user had not yet enrolled. + /// Used to compute the remaining grace period before enrollment becomes + /// mandatory for roles in MFA_REQUIRED_ROLES (currently ADMIN). + mfaGraceStartedAt DateTime? + /// Last successful MFA verification (TOTP or backup code). Used by the + /// admin re-auth interceptor for sensitive operations. + mfaLastVerifiedAt DateTime? agent Agent? listings Listing[]