feat(auth): add row/size caps + streaming to export-user-data

- Add per-collection row cap (default 10k, env EXPORT_ROW_CAP) via Prisma
  take on all findMany calls
- Add total size cap (default 100MB, env EXPORT_SIZE_CAP_MB); throws
  PayloadTooLargeException (413) when exceeded
- Convert response to Node.js Readable stream piped via NestJS StreamableFile
  to avoid large in-memory buffers
- Export ExportUserDataResult interface (stream + truncated flag) from handler
- Update controller to set Content-Type/Content-Disposition headers and
  return StreamableFile
- Document EXPORT_ROW_CAP and EXPORT_SIZE_CAP_MB env vars in Swagger
- Extend tests: row-cap assertion (take arg), size-cap 413 path, stream assertions

Fixes GOO-223 (M-1 from GOO-200 audit).

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Ho Ngoc Hai
2026-04-24 12:10:54 +07:00
parent b4bb05479e
commit fa3ba88f40
34 changed files with 1494 additions and 45 deletions

View File

@@ -0,0 +1,69 @@
import { isUuidV7 } from './uuid-v7';
export const EVENT_ENVELOPE_SCHEMA_VERSION = 1;
export interface EventEnvelope<TPayload = unknown> {
schemaVersion: number;
eventId: string;
eventType: string;
occurredAt: string;
producer: string;
traceId: string;
payload: TPayload;
}
const TRACE_ID_RE = /^[0-9a-f]{32}$/i;
const ISO_8601_RE =
/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d{1,9})?(?:Z|[+-]\d{2}:\d{2})$/;
export interface EnvelopeValidationIssue {
path: string;
message: string;
}
export function validateEnvelope(envelope: unknown): EnvelopeValidationIssue[] {
const issues: EnvelopeValidationIssue[] = [];
if (envelope === null || typeof envelope !== 'object') {
return [{ path: '$', message: 'envelope must be an object' }];
}
const e = envelope as Record<string, unknown>;
if (e['schemaVersion'] !== EVENT_ENVELOPE_SCHEMA_VERSION) {
issues.push({
path: 'schemaVersion',
message: `expected ${EVENT_ENVELOPE_SCHEMA_VERSION}, got ${String(e['schemaVersion'])}`,
});
}
if (typeof e['eventId'] !== 'string' || !isUuidV7(e['eventId'])) {
issues.push({ path: 'eventId', message: 'must be a UUIDv7 string' });
}
if (
typeof e['eventType'] !== 'string' ||
!/^[a-z][a-z0-9_]*(\.[a-z][a-z0-9_]*)+$/.test(e['eventType'])
) {
issues.push({
path: 'eventType',
message: 'must match /^[a-z][a-z0-9_]*(\\.[a-z][a-z0-9_]*)+$/',
});
}
if (typeof e['occurredAt'] !== 'string' || !ISO_8601_RE.test(e['occurredAt'])) {
issues.push({ path: 'occurredAt', message: 'must be an ISO-8601 timestamp' });
}
if (typeof e['producer'] !== 'string' || e['producer'].length === 0) {
issues.push({ path: 'producer', message: 'must be a non-empty string' });
}
if (typeof e['traceId'] !== 'string' || !TRACE_ID_RE.test(e['traceId'])) {
issues.push({ path: 'traceId', message: 'must be 32 hex characters' });
}
if (!('payload' in e)) {
issues.push({ path: 'payload', message: 'is required (use {} for empty)' });
}
return issues;
}
export function assertValidEnvelope(envelope: unknown): asserts envelope is EventEnvelope {
const issues = validateEnvelope(envelope);
if (issues.length > 0) {
const flat = issues.map((i) => `${i.path}: ${i.message}`).join('; ');
throw new Error(`Invalid EventEnvelope — ${flat}`);
}
}

View File

@@ -0,0 +1,11 @@
export const KNOWN_EVENT_TYPES = [
'kyc.verified',
'listing.approved',
'payment.completed',
] as const;
export type KnownEventType = (typeof KNOWN_EVENT_TYPES)[number];
export function isKnownEventType(value: string): value is KnownEventType {
return (KNOWN_EVENT_TYPES as readonly string[]).includes(value);
}

View File

@@ -0,0 +1,37 @@
export {
EVENT_ENVELOPE_SCHEMA_VERSION,
type EventEnvelope,
type EnvelopeValidationIssue,
validateEnvelope,
assertValidEnvelope,
} from './envelope';
export { uuidv7, isUuidV7 } from './uuid-v7';
export { KNOWN_EVENT_TYPES, type KnownEventType, isKnownEventType } from './event-types';
export interface PaymentCompletedPayload {
paymentId: string;
orderId: string;
userId: string;
amount: string;
currency: 'VND' | 'USD';
gateway: 'vnpay' | 'momo' | 'zalopay';
gatewayTransactionId: string;
paidAt: string;
}
export interface ListingApprovedPayload {
listingId: string;
propertyId: string;
agentId: string;
approvedByUserId: string;
approvedAt: string;
expiresAt: string | null;
}
export interface KycVerifiedPayload {
userId: string;
verifiedByUserId: string;
level: 'basic' | 'enhanced';
verifiedAt: string;
documentRefs: string[];
}

View File

@@ -0,0 +1,44 @@
import { randomBytes } from 'node:crypto';
/**
* UUIDv7 — 48-bit Unix-ms timestamp in the high bits, 74 random bits below.
*
* Time-ordered, monotonic enough for our needs (idempotency keys + Stream IDs).
* No dependency on the `uuid` package — Phase 0 keeps the foundation
* tree-shakeable for the Python side (which uses its own implementation).
*
* Reference: RFC 9562 §5.7.
*/
export function uuidv7(now: number = Date.now()): string {
const ts = BigInt(now); // milliseconds since epoch
const bytes = randomBytes(16);
// 48-bit timestamp (big-endian) in bytes 0..5
bytes[0] = Number((ts >> 40n) & 0xffn);
bytes[1] = Number((ts >> 32n) & 0xffn);
bytes[2] = Number((ts >> 24n) & 0xffn);
bytes[3] = Number((ts >> 16n) & 0xffn);
bytes[4] = Number((ts >> 8n) & 0xffn);
bytes[5] = Number(ts & 0xffn);
// Version 7 in the high nibble of byte 6
bytes[6] = (bytes[6]! & 0x0f) | 0x70;
// RFC 4122 variant (10xx) in the high bits of byte 8
bytes[8] = (bytes[8]! & 0x3f) | 0x80;
const hex = Buffer.from(bytes).toString('hex');
return [
hex.slice(0, 8),
hex.slice(8, 12),
hex.slice(12, 16),
hex.slice(16, 20),
hex.slice(20, 32),
].join('-');
}
const UUID_V7_RE =
/^[0-9a-f]{8}-[0-9a-f]{4}-7[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
export function isUuidV7(value: string): boolean {
return UUID_V7_RE.test(value);
}