feat: implement project development module, transfer management features, and industrial AVM model integration

This commit is contained in:
Ho Ngoc Hai
2026-04-18 20:34:35 +07:00
parent 0f3b4d7b0d
commit 38b9def99a
66 changed files with 9051 additions and 17 deletions

View File

@@ -0,0 +1,297 @@
import { Injectable } from '@nestjs/common';
import { Cron } from '@nestjs/schedule';
import { type PrismaService, type LoggerService } from '@modules/shared';
@Injectable()
export class AvmRetrainCronService {
private readonly aiServiceUrl: string;
private readonly aiServiceApiKey: string;
constructor(
private readonly prisma: PrismaService,
private readonly logger: LoggerService,
) {
this.aiServiceUrl = process.env['AI_SERVICE_URL'] ?? 'http://localhost:8000';
this.aiServiceApiKey = process.env['AI_SERVICE_API_KEY'] ?? '';
}
/**
* Weekly retrain — every Sunday at 3 AM.
*
* 1. Export training data from database to the AI service
* 2. Trigger ensemble retraining via POST /avm/v2/train
* 3. Log results (version, metrics)
*/
@Cron('0 3 * * 0', { name: 'avm-v2-weekly-retrain' })
async weeklyRetrain(): Promise<void> {
this.logger.log('Starting weekly AVM v2 retrain...', 'AvmRetrainCronService');
try {
// Step 1: Export training data
const trainingData = await this.exportTrainingData();
if (trainingData.length < 50) {
this.logger.warn(
`Insufficient training data (${trainingData.length} rows). Skipping retrain.`,
'AvmRetrainCronService',
);
return;
}
// Step 2: Upload training data to AI service
await this.uploadTrainingData(trainingData);
// Step 3: Trigger retraining
const result = await this.triggerRetrain();
this.logger.log(
`AVM v2 retrain completed: version=${result.model_version}, ` +
`MAPE=${result.metrics?.mape ?? 'N/A'}%, ` +
`samples=${result.training_samples}`,
'AvmRetrainCronService',
);
} catch (err) {
this.logger.error(
`AVM v2 weekly retrain failed: ${(err as Error).message}`,
undefined,
'AvmRetrainCronService',
);
}
}
/**
* Export property + listing + market data as training rows.
*
* Each row maps to the feature columns expected by the Python
* AVM v2 training pipeline (see avm_v2_service._prepare_training_data).
*/
async exportTrainingData(): Promise<TrainingRow[]> {
const rows = await this.prisma.$queryRaw<RawTrainingRow[]>`
WITH market AS (
SELECT
mi.district,
mi.city,
mi."avgPriceM2" AS avg_price_m2,
mi."totalListings" AS listing_density,
COALESCE(mi."absorptionRate", 0) AS absorption_rate,
mi."daysOnMarket" AS dom_avg,
COALESCE(mi."yoyChange", 0) AS yoy_change
FROM "MarketIndex" mi
WHERE mi.period = (
SELECT MAX(period) FROM "MarketIndex"
)
)
SELECT
p."propertyType"::text AS property_type,
p."areaM2" AS area_m2,
COALESCE(p.bedrooms, 2) AS rooms,
COALESCE(p.floor, 0) AS floor_level,
COALESCE(p."totalFloors", p.floors, 0) AS total_floors,
COALESCE(p.direction::text, 'unknown') AS direction,
CASE
WHEN p."totalFloors" > 0 AND p."areaM2" > 0
THEN (p."totalFloors"::float * p."areaM2") / NULLIF(p."areaM2", 0)
ELSE 1.0
END AS floor_ratio,
CASE
WHEN p."yearBuilt" IS NOT NULL
THEN EXTRACT(YEAR FROM NOW())::int - p."yearBuilt"
ELSE 5
END AS building_age_years,
CASE WHEN p.amenities::text ILIKE '%elevator%' THEN 1.0 ELSE 0.0 END AS has_elevator,
CASE WHEN p.amenities::text ILIKE '%parking%' THEN 1.0 ELSE 0.0 END AS has_parking,
CASE WHEN p.amenities::text ILIKE '%pool%' THEN 1.0 ELSE 0.0 END AS has_pool,
CASE
WHEN p."legalStatus" IN ('so_do', 'so_hong', 'SO_DO', 'SO_HONG') THEN 1.0
ELSE 0.0
END AS has_legal_paper,
0.5 AS developer_reputation,
0.5 AS neighborhood_score,
COALESCE(
ST_Distance(
p.location::geography,
ST_SetSRID(ST_MakePoint(106.6297, 10.8231), 4326)::geography
) / 1000.0,
10.0
) AS distance_to_cbd_km,
COALESCE(p."metroDistanceM" / 1000.0, 5.0) AS distance_to_metro_km,
5.0 AS distance_to_school_km,
3.0 AS distance_to_hospital_km,
2.0 AS distance_to_park_km,
4.0 AS distance_to_mall_km,
0.1 AS flood_zone_risk,
COALESCE(m.avg_price_m2, 0) AS avg_price_district_3m_vnd_m2,
COALESCE(m.listing_density, 0) AS listing_density,
COALESCE(m.absorption_rate, 0) AS absorption_rate,
COALESCE(m.dom_avg, 30) AS dom_avg,
0.0 AS price_momentum_30d,
COALESCE(m.yoy_change, 0) AS yoy_change,
0.5 AS renovation_score,
0.5 AS view_quality,
0.5 AS interior_quality,
0.3 AS noise_level,
0.5 AS natural_light,
EXTRACT(MONTH FROM l."publishedAt")::int AS month,
p.district AS district,
l."priceVND"::float AS price_vnd
FROM "Listing" l
JOIN "Property" p ON l."propertyId" = p.id
LEFT JOIN market m ON m.district = p.district AND m.city = p.city
WHERE l.status IN ('ACTIVE', 'SOLD', 'RENTED')
AND l."priceVND" > 100000000
AND l."publishedAt" IS NOT NULL
AND p."areaM2" > 0
ORDER BY l."publishedAt" DESC
LIMIT 50000
`;
return rows.map((r) => ({
property_type: String(r.property_type).toLowerCase(),
area_m2: Number(r.area_m2),
rooms: Number(r.rooms),
floor_level: Number(r.floor_level),
total_floors: Number(r.total_floors),
direction: String(r.direction).toLowerCase(),
floor_ratio: Number(r.floor_ratio),
building_age_years: Number(r.building_age_years),
has_elevator: Number(r.has_elevator),
has_parking: Number(r.has_parking),
has_pool: Number(r.has_pool),
has_legal_paper: Number(r.has_legal_paper),
developer_reputation: Number(r.developer_reputation),
neighborhood_score: Number(r.neighborhood_score),
distance_to_cbd_km: Number(r.distance_to_cbd_km),
distance_to_metro_km: Number(r.distance_to_metro_km),
distance_to_school_km: Number(r.distance_to_school_km),
distance_to_hospital_km: Number(r.distance_to_hospital_km),
distance_to_park_km: Number(r.distance_to_park_km),
distance_to_mall_km: Number(r.distance_to_mall_km),
flood_zone_risk: Number(r.flood_zone_risk),
avg_price_district_3m_vnd_m2: Number(r.avg_price_district_3m_vnd_m2),
listing_density: Number(r.listing_density),
absorption_rate: Number(r.absorption_rate),
dom_avg: Number(r.dom_avg),
price_momentum_30d: Number(r.price_momentum_30d),
yoy_change: Number(r.yoy_change),
renovation_score: Number(r.renovation_score),
view_quality: Number(r.view_quality),
interior_quality: Number(r.interior_quality),
noise_level: Number(r.noise_level),
natural_light: Number(r.natural_light),
month: Number(r.month),
district: String(r.district),
price_vnd: Number(r.price_vnd),
}));
}
private async uploadTrainingData(rows: TrainingRow[]): Promise<void> {
const headers = Object.keys(rows[0]!);
const csvLines = [headers.join(',')];
for (const row of rows) {
csvLines.push(headers.map((h) => String(row[h as keyof TrainingRow])).join(','));
}
const csv = csvLines.join('\n');
const url = `${this.aiServiceUrl}/avm/v2/upload-training-data`;
const reqHeaders: Record<string, string> = { 'Content-Type': 'text/csv' };
if (this.aiServiceApiKey) {
reqHeaders['X-API-Key'] = this.aiServiceApiKey;
}
const response = await fetch(url, {
method: 'POST',
headers: reqHeaders,
body: csv,
signal: AbortSignal.timeout(30_000),
});
if (!response.ok) {
const text = await response.text().catch(() => '');
throw new Error(`Training data upload failed (${response.status}): ${text}`);
}
this.logger.log(
`Uploaded ${rows.length} training rows to AI service`,
'AvmRetrainCronService',
);
}
private async triggerRetrain(): Promise<RetrainResult> {
const url = `${this.aiServiceUrl}/avm/v2/train`;
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
if (this.aiServiceApiKey) {
headers['X-API-Key'] = this.aiServiceApiKey;
}
const response = await fetch(url, {
method: 'POST',
headers,
body: JSON.stringify({
optuna_trials: 50,
test_size: 0.15,
val_size: 0.15,
}),
signal: AbortSignal.timeout(600_000), // 10 min — training can take a while
});
if (!response.ok) {
const text = await response.text().catch(() => '');
throw new Error(`Retrain request failed (${response.status}): ${text}`);
}
return response.json() as Promise<RetrainResult>;
}
}
interface RawTrainingRow {
property_type: string;
area_m2: number;
rooms: number;
floor_level: number;
total_floors: number;
direction: string;
floor_ratio: number;
building_age_years: number;
has_elevator: number;
has_parking: number;
has_pool: number;
has_legal_paper: number;
developer_reputation: number;
neighborhood_score: number;
distance_to_cbd_km: number;
distance_to_metro_km: number;
distance_to_school_km: number;
distance_to_hospital_km: number;
distance_to_park_km: number;
distance_to_mall_km: number;
flood_zone_risk: number;
avg_price_district_3m_vnd_m2: number;
listing_density: number;
absorption_rate: number;
dom_avg: number;
price_momentum_30d: number;
yoy_change: number;
renovation_score: number;
view_quality: number;
interior_quality: number;
noise_level: number;
natural_light: number;
month: number;
district: string;
price_vnd: number;
}
interface TrainingRow extends RawTrainingRow {}
interface RetrainResult {
model_version: string;
metrics: {
mae: number;
mape: number;
rmse: number;
r2: number;
};
training_samples: number;
validation_samples: number;
test_samples: number;
best_params: Record<string, unknown>;
}