import { Injectable } from '@nestjs/common'; import { Cron } from '@nestjs/schedule'; import { PrismaService, LoggerService } from '@modules/shared'; @Injectable() export class AvmRetrainCronService { private readonly aiServiceUrl: string; private readonly aiServiceApiKey: string; constructor( private readonly prisma: PrismaService, private readonly logger: LoggerService, ) { this.aiServiceUrl = process.env['AI_SERVICE_URL'] ?? 'http://localhost:8000'; this.aiServiceApiKey = process.env['AI_SERVICE_API_KEY'] ?? ''; } /** * Weekly retrain — every Sunday at 3 AM. * * 1. Export training data from database to the AI service * 2. Trigger ensemble retraining via POST /avm/v2/train * 3. Log results (version, metrics) */ @Cron('0 3 * * 0', { name: 'avm-v2-weekly-retrain' }) async weeklyRetrain(): Promise { this.logger.log('Starting weekly AVM v2 retrain...', 'AvmRetrainCronService'); try { // Step 1: Export training data const trainingData = await this.exportTrainingData(); if (trainingData.length < 50) { this.logger.warn( `Insufficient training data (${trainingData.length} rows). Skipping retrain.`, 'AvmRetrainCronService', ); return; } // Step 2: Upload training data to AI service await this.uploadTrainingData(trainingData); // Step 3: Trigger retraining const result = await this.triggerRetrain(); this.logger.log( `AVM v2 retrain completed: version=${result.model_version}, ` + `MAPE=${result.metrics?.mape ?? 'N/A'}%, ` + `samples=${result.training_samples}`, 'AvmRetrainCronService', ); } catch (err) { this.logger.error( `AVM v2 weekly retrain failed: ${(err as Error).message}`, undefined, 'AvmRetrainCronService', ); } } /** * Export property + listing + market data as training rows. * * Each row maps to the feature columns expected by the Python * AVM v2 training pipeline (see avm_v2_service._prepare_training_data). */ async exportTrainingData(): Promise { const rows = await this.prisma.$queryRaw` WITH market AS ( SELECT mi.district, mi.city, mi."avgPriceM2" AS avg_price_m2, mi."totalListings" AS listing_density, COALESCE(mi."absorptionRate", 0) AS absorption_rate, mi."daysOnMarket" AS dom_avg, COALESCE(mi."yoyChange", 0) AS yoy_change FROM "MarketIndex" mi WHERE mi.period = ( SELECT MAX(period) FROM "MarketIndex" ) ) SELECT p."propertyType"::text AS property_type, p."areaM2" AS area_m2, COALESCE(p.bedrooms, 2) AS rooms, COALESCE(p.floor, 0) AS floor_level, COALESCE(p."totalFloors", p.floors, 0) AS total_floors, COALESCE(p.direction::text, 'unknown') AS direction, CASE WHEN p."totalFloors" > 0 AND p."areaM2" > 0 THEN (p."totalFloors"::float * p."areaM2") / NULLIF(p."areaM2", 0) ELSE 1.0 END AS floor_ratio, CASE WHEN p."yearBuilt" IS NOT NULL THEN EXTRACT(YEAR FROM NOW())::int - p."yearBuilt" ELSE 5 END AS building_age_years, CASE WHEN p.amenities::text ILIKE '%elevator%' THEN 1.0 ELSE 0.0 END AS has_elevator, CASE WHEN p.amenities::text ILIKE '%parking%' THEN 1.0 ELSE 0.0 END AS has_parking, CASE WHEN p.amenities::text ILIKE '%pool%' THEN 1.0 ELSE 0.0 END AS has_pool, CASE WHEN p."legalStatus" IN ('so_do', 'so_hong', 'SO_DO', 'SO_HONG') THEN 1.0 ELSE 0.0 END AS has_legal_paper, 0.5 AS developer_reputation, 0.5 AS neighborhood_score, COALESCE( ST_Distance( p.location::geography, ST_SetSRID(ST_MakePoint(106.6297, 10.8231), 4326)::geography ) / 1000.0, 10.0 ) AS distance_to_cbd_km, COALESCE(p."metroDistanceM" / 1000.0, 5.0) AS distance_to_metro_km, 5.0 AS distance_to_school_km, 3.0 AS distance_to_hospital_km, 2.0 AS distance_to_park_km, 4.0 AS distance_to_mall_km, 0.1 AS flood_zone_risk, COALESCE(m.avg_price_m2, 0) AS avg_price_district_3m_vnd_m2, COALESCE(m.listing_density, 0) AS listing_density, COALESCE(m.absorption_rate, 0) AS absorption_rate, COALESCE(m.dom_avg, 30) AS dom_avg, 0.0 AS price_momentum_30d, COALESCE(m.yoy_change, 0) AS yoy_change, 0.5 AS renovation_score, 0.5 AS view_quality, 0.5 AS interior_quality, 0.3 AS noise_level, 0.5 AS natural_light, EXTRACT(MONTH FROM l."publishedAt")::int AS month, p.district AS district, l."priceVND"::float AS price_vnd FROM "Listing" l JOIN "Property" p ON l."propertyId" = p.id LEFT JOIN market m ON m.district = p.district AND m.city = p.city WHERE l.status IN ('ACTIVE', 'SOLD', 'RENTED') AND l."priceVND" > 100000000 AND l."publishedAt" IS NOT NULL AND p."areaM2" > 0 ORDER BY l."publishedAt" DESC LIMIT 50000 `; return rows.map((r) => ({ property_type: String(r.property_type).toLowerCase(), area_m2: Number(r.area_m2), rooms: Number(r.rooms), floor_level: Number(r.floor_level), total_floors: Number(r.total_floors), direction: String(r.direction).toLowerCase(), floor_ratio: Number(r.floor_ratio), building_age_years: Number(r.building_age_years), has_elevator: Number(r.has_elevator), has_parking: Number(r.has_parking), has_pool: Number(r.has_pool), has_legal_paper: Number(r.has_legal_paper), developer_reputation: Number(r.developer_reputation), neighborhood_score: Number(r.neighborhood_score), distance_to_cbd_km: Number(r.distance_to_cbd_km), distance_to_metro_km: Number(r.distance_to_metro_km), distance_to_school_km: Number(r.distance_to_school_km), distance_to_hospital_km: Number(r.distance_to_hospital_km), distance_to_park_km: Number(r.distance_to_park_km), distance_to_mall_km: Number(r.distance_to_mall_km), flood_zone_risk: Number(r.flood_zone_risk), avg_price_district_3m_vnd_m2: Number(r.avg_price_district_3m_vnd_m2), listing_density: Number(r.listing_density), absorption_rate: Number(r.absorption_rate), dom_avg: Number(r.dom_avg), price_momentum_30d: Number(r.price_momentum_30d), yoy_change: Number(r.yoy_change), renovation_score: Number(r.renovation_score), view_quality: Number(r.view_quality), interior_quality: Number(r.interior_quality), noise_level: Number(r.noise_level), natural_light: Number(r.natural_light), month: Number(r.month), district: String(r.district), price_vnd: Number(r.price_vnd), })); } private async uploadTrainingData(rows: TrainingRow[]): Promise { const headers = Object.keys(rows[0]!); const csvLines = [headers.join(',')]; for (const row of rows) { csvLines.push(headers.map((h) => String(row[h as keyof TrainingRow])).join(',')); } const csv = csvLines.join('\n'); const url = `${this.aiServiceUrl}/avm/v2/upload-training-data`; const reqHeaders: Record = { 'Content-Type': 'text/csv' }; if (this.aiServiceApiKey) { reqHeaders['X-API-Key'] = this.aiServiceApiKey; } const response = await fetch(url, { method: 'POST', headers: reqHeaders, body: csv, signal: AbortSignal.timeout(30_000), }); if (!response.ok) { const text = await response.text().catch(() => ''); throw new Error(`Training data upload failed (${response.status}): ${text}`); } this.logger.log( `Uploaded ${rows.length} training rows to AI service`, 'AvmRetrainCronService', ); } private async triggerRetrain(): Promise { const url = `${this.aiServiceUrl}/avm/v2/train`; const headers: Record = { 'Content-Type': 'application/json' }; if (this.aiServiceApiKey) { headers['X-API-Key'] = this.aiServiceApiKey; } const response = await fetch(url, { method: 'POST', headers, body: JSON.stringify({ optuna_trials: 50, test_size: 0.15, val_size: 0.15, }), signal: AbortSignal.timeout(600_000), // 10 min — training can take a while }); if (!response.ok) { const text = await response.text().catch(() => ''); throw new Error(`Retrain request failed (${response.status}): ${text}`); } return response.json() as Promise; } } interface RawTrainingRow { property_type: string; area_m2: number; rooms: number; floor_level: number; total_floors: number; direction: string; floor_ratio: number; building_age_years: number; has_elevator: number; has_parking: number; has_pool: number; has_legal_paper: number; developer_reputation: number; neighborhood_score: number; distance_to_cbd_km: number; distance_to_metro_km: number; distance_to_school_km: number; distance_to_hospital_km: number; distance_to_park_km: number; distance_to_mall_km: number; flood_zone_risk: number; avg_price_district_3m_vnd_m2: number; listing_density: number; absorption_rate: number; dom_avg: number; price_momentum_30d: number; yoy_change: number; renovation_score: number; view_quality: number; interior_quality: number; noise_level: number; natural_light: number; month: number; district: string; price_vnd: number; } type TrainingRow = RawTrainingRow; interface RetrainResult { model_version: string; metrics: { mae: number; mape: number; rmse: number; r2: number; }; training_samples: number; validation_samples: number; test_samples: number; best_params: Record; }