feat: implement project development module, transfer management features, and industrial AVM model integration
This commit is contained in:
@@ -1,12 +1,13 @@
|
||||
"""Industrial AVM — Rent estimation service for industrial parks.
|
||||
|
||||
Heuristic fallback when trained models are not available.
|
||||
Uses gradient boosting approach similar to residential AVM v2.
|
||||
Preference order: park-level ridge baseline (v1, TEC-2768) → XGBoost → heuristic.
|
||||
Heuristic fallback remains when no trained artifact is on disk.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
import pickle
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
@@ -20,6 +21,21 @@ from app.models.avm_industrial import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
RIDGE_ARTIFACT_NAME = "avm_industrial_park_ridge_v1.pkl"
|
||||
|
||||
# Map API property types to the rent head trained in the ridge baseline.
|
||||
# Land rent is stored as USD/m²/year; others as USD/m²/month — convert where
|
||||
# needed so the response stays in USD/m²/month.
|
||||
_PROPERTY_TO_HEAD: dict[str, str] = {
|
||||
"warehouse": "rbw",
|
||||
"ready_built_warehouse": "rbw",
|
||||
"factory": "rbf",
|
||||
"ready_built_factory": "rbf",
|
||||
"office_in_park": "rbf",
|
||||
"open_yard": "land",
|
||||
"industrial_land": "land",
|
||||
}
|
||||
|
||||
# ── Feature ordering for model input ────────────────────────────
|
||||
INDUSTRIAL_FEATURE_NAMES = [
|
||||
"region_encoded",
|
||||
@@ -169,40 +185,171 @@ def _find_comparables(req: IndustrialAVMRequest) -> list[IndustrialComparable]:
|
||||
class IndustrialAVMService:
|
||||
"""Industrial property rent estimation service.
|
||||
|
||||
Uses gradient boosting when a trained model is available,
|
||||
falls back to heuristic pricing for development/demo.
|
||||
Preference order when a trained artifact is available:
|
||||
1. Ridge v1 (park-level baseline with conformal CIs, TEC-2768)
|
||||
2. XGBoost (legacy, listing-level)
|
||||
3. Multi-factor heuristic (always available)
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._model: Any = None
|
||||
self._model_version = "heuristic-v1"
|
||||
self._backend: str = "heuristic"
|
||||
self._load_model()
|
||||
|
||||
def _load_model(self) -> None:
|
||||
"""Attempt to load trained industrial AVM model."""
|
||||
"""Attempt to load trained industrial AVM artifacts (ridge first)."""
|
||||
try:
|
||||
from app.config import settings
|
||||
model_path = settings.model_path
|
||||
except Exception:
|
||||
logger.info("Industrial AVM: config unavailable — using heuristic")
|
||||
return
|
||||
|
||||
ridge_path = os.path.join(model_path, RIDGE_ARTIFACT_NAME)
|
||||
if os.path.exists(ridge_path):
|
||||
try:
|
||||
with open(ridge_path, "rb") as f:
|
||||
artifact = pickle.load(f)
|
||||
if not isinstance(artifact, dict) or artifact.get("version") != "ridge-industrial-v1":
|
||||
raise ValueError(f"Unexpected artifact version in {ridge_path}")
|
||||
self._model = artifact
|
||||
self._model_version = "ridge-industrial-v1"
|
||||
self._backend = "ridge"
|
||||
logger.info("Loaded industrial AVM ridge artifact from %s", ridge_path)
|
||||
return
|
||||
except Exception as exc: # keep service alive on artifact corruption
|
||||
logger.warning("Failed to load ridge artifact (%s); falling back", exc)
|
||||
|
||||
try:
|
||||
import xgboost as xgb
|
||||
|
||||
from app.config import settings
|
||||
|
||||
path = os.path.join(settings.model_path, "avm_industrial_xgb.json")
|
||||
if os.path.exists(path):
|
||||
xgb_path = os.path.join(model_path, "avm_industrial_xgb.json")
|
||||
if os.path.exists(xgb_path):
|
||||
booster = xgb.Booster()
|
||||
booster.load_model(path)
|
||||
booster.load_model(xgb_path)
|
||||
self._model = booster
|
||||
self._model_version = "xgb-industrial-v1"
|
||||
logger.info("Loaded industrial AVM model from %s", path)
|
||||
else:
|
||||
logger.info("No trained industrial AVM model — using heuristic")
|
||||
self._backend = "xgb"
|
||||
logger.info("Loaded industrial AVM xgb model from %s", xgb_path)
|
||||
return
|
||||
except Exception:
|
||||
logger.info("Industrial AVM model not available — using heuristic")
|
||||
pass
|
||||
|
||||
logger.info("No trained industrial AVM model — using heuristic")
|
||||
|
||||
def predict(self, req: IndustrialAVMRequest) -> IndustrialAVMResponse:
|
||||
"""Predict industrial property rent."""
|
||||
if self._model is not None:
|
||||
if self._backend == "ridge":
|
||||
return self._predict_ridge(req)
|
||||
if self._backend == "xgb" and self._model is not None:
|
||||
return self._predict_model(req)
|
||||
return self._predict_heuristic(req)
|
||||
|
||||
def _featureize_ridge(self, req: IndustrialAVMRequest, spec: dict) -> np.ndarray:
|
||||
"""Build the exact feature vector used during ridge training.
|
||||
|
||||
Feature ordering must match `spec["feature_cols"]` which is the canonical
|
||||
order emitted by the trainer. Sources:
|
||||
- numeric fields come straight from the request
|
||||
- province FDI comes from the artifact lookup (fallback to default)
|
||||
- target-industry flags approximate one-hots against top-6 list
|
||||
"""
|
||||
province = (req.province or "").strip().lower()
|
||||
fdi = spec["province_fdi"].get(province, spec["default_fdi"])
|
||||
|
||||
occupancy = float(req.park_occupancy_rate)
|
||||
if occupancy > 1.5:
|
||||
occupancy = occupancy / 100.0
|
||||
occupancy = min(max(occupancy, 0.0), 1.0)
|
||||
|
||||
feats: dict[str, float] = {
|
||||
"occupancy": occupancy,
|
||||
"log_area_ha": math.log1p(max(0.0, float(req.park_area_ha))),
|
||||
"park_age_years": float(max(0, int(req.park_age_years))),
|
||||
"log_dist_port_km": math.log1p(max(0.0, float(req.distance_to_port_km))),
|
||||
"log_dist_airport_km": math.log1p(max(0.0, float(req.distance_to_airport_km))),
|
||||
"log_dist_highway_km": math.log1p(max(0.0, float(req.distance_to_highway_km))),
|
||||
"logistics_connectivity_score": float(req.logistics_connectivity_score),
|
||||
"log_fdi_province": math.log1p(
|
||||
max(0.0, float(req.fdi_province_musd) or fdi)
|
||||
),
|
||||
"has_special_zone": float(
|
||||
req.zoning.lower() in {"free_trade_zone", "high_tech"}
|
||||
),
|
||||
}
|
||||
# Property type flags can proxy certain target-industry signals but the
|
||||
# trainer's industry one-hots are park-level. At inference we don't know
|
||||
# the park's industry mix, so default to 0 and let the province/region
|
||||
# fixed effects carry the signal.
|
||||
for ind in spec["top_industries"]:
|
||||
feats[f"ind_{ind}"] = 0.0
|
||||
region = (req.region or "south").lower()
|
||||
for r in spec["region_order"][1:]:
|
||||
feats[f"region_{r}"] = float(region == r)
|
||||
|
||||
vec = np.array([feats[c] for c in spec["feature_cols"]], dtype=np.float64)
|
||||
return vec
|
||||
|
||||
def _predict_ridge(self, req: IndustrialAVMRequest) -> IndustrialAVMResponse:
|
||||
"""Predict using the ridge v1 park-level baseline (conformal CIs)."""
|
||||
artifact = self._model
|
||||
spec = artifact["feature_spec"]
|
||||
|
||||
x = self._featureize_ridge(req, spec)
|
||||
|
||||
head_name = _PROPERTY_TO_HEAD.get(req.property_type.lower(), "rbf")
|
||||
head = artifact["heads"][head_name]
|
||||
|
||||
x_std = (x - head["scaler_mean"]) / np.where(
|
||||
head["scaler_scale"] == 0, 1.0, head["scaler_scale"]
|
||||
)
|
||||
log_pred = float(x_std @ head["coefficients"] + head["intercept"])
|
||||
q80 = float(head["q80_log"])
|
||||
|
||||
# Ridge head is trained in natural units (USD/m²/month for rbf/rbw,
|
||||
# USD/m²/year for land). Convert to the response contract which always
|
||||
# reports monthly USD/m² for the primary estimate.
|
||||
rent_native = math.expm1(log_pred)
|
||||
low_native = math.expm1(log_pred - q80)
|
||||
high_native = math.expm1(log_pred + q80)
|
||||
|
||||
if head_name == "land":
|
||||
rent = rent_native / 12.0
|
||||
low = low_native / 12.0
|
||||
high = high_native / 12.0
|
||||
else:
|
||||
rent = rent_native
|
||||
low = low_native
|
||||
high = high_native
|
||||
|
||||
comparables = _find_comparables(req)
|
||||
|
||||
# Drivers: top coefficients by absolute standardized contribution.
|
||||
contrib = head["coefficients"] * x_std
|
||||
order = np.argsort(-np.abs(contrib))[:8]
|
||||
total = float(np.sum(np.abs(contrib))) or 1.0
|
||||
drivers = [
|
||||
FeatureImportance(
|
||||
feature=head["feature_cols"][i],
|
||||
importance=round(float(abs(contrib[i]) / total), 4),
|
||||
)
|
||||
for i in order
|
||||
if abs(contrib[i]) > 1e-6
|
||||
]
|
||||
|
||||
return IndustrialAVMResponse(
|
||||
estimated_rent_usd_m2=round(max(0.0, rent), 2),
|
||||
confidence=round(float(head.get("coverage_80_loo", 0.80)), 2),
|
||||
rent_range_low_usd_m2=round(max(0.0, low), 2),
|
||||
rent_range_high_usd_m2=round(max(0.0, high), 2),
|
||||
annual_rent_usd_m2=round(max(0.0, rent) * 12, 2),
|
||||
total_monthly_rent_usd=round(max(0.0, rent) * req.area_m2, 2),
|
||||
comparables=comparables,
|
||||
drivers=drivers,
|
||||
model_version=self._model_version,
|
||||
)
|
||||
|
||||
def _predict_model(self, req: IndustrialAVMRequest) -> IndustrialAVMResponse:
|
||||
"""Predict using trained gradient boosting model."""
|
||||
import xgboost as xgb
|
||||
|
||||
422
libs/ai-services/data/industrial/parks.json
Normal file
422
libs/ai-services/data/industrial/parks.json
Normal file
@@ -0,0 +1,422 @@
|
||||
[
|
||||
{
|
||||
"id": "seed-kcn-001",
|
||||
"name": "KCN VSIP Bắc Ninh",
|
||||
"slug": "vsip-bac-ninh",
|
||||
"province": "Bắc Ninh",
|
||||
"region": "north",
|
||||
"status": "operational",
|
||||
"totalAreaHa": 700,
|
||||
"occupancyRate": 92,
|
||||
"establishedYear": 2007,
|
||||
"landRentUsdM2Year": 90,
|
||||
"rbfRentUsdM2Month": 5.5,
|
||||
"rbwRentUsdM2Month": 4.8,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Hải Phòng", "distanceKm": 110},
|
||||
"airport": {"name": "Nội Bài", "distanceKm": 35},
|
||||
"highway": {"name": "QL 1A", "distanceKm": 5}
|
||||
},
|
||||
"incentives": {"specialZone": false},
|
||||
"targetIndustries": ["electronics", "automotive", "precision engineering", "food processing"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-002",
|
||||
"name": "KCN VSIP Bình Dương I",
|
||||
"slug": "vsip-binh-duong-1",
|
||||
"province": "Bình Dương",
|
||||
"region": "south",
|
||||
"status": "full",
|
||||
"totalAreaHa": 500,
|
||||
"occupancyRate": 100,
|
||||
"establishedYear": 1996,
|
||||
"landRentUsdM2Year": 110,
|
||||
"rbfRentUsdM2Month": 6.0,
|
||||
"rbwRentUsdM2Month": 5.2,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Cát Lái", "distanceKm": 25},
|
||||
"airport": {"name": "Tân Sơn Nhất", "distanceKm": 20},
|
||||
"highway": {"name": "ĐL Mỹ Phước - Tân Vạn", "distanceKm": 2}
|
||||
},
|
||||
"incentives": {"specialZone": false},
|
||||
"targetIndustries": ["electronics", "garment", "food processing", "logistics"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-003",
|
||||
"name": "KCN Amata Đồng Nai",
|
||||
"slug": "amata-dong-nai",
|
||||
"province": "Đồng Nai",
|
||||
"region": "south",
|
||||
"status": "operational",
|
||||
"totalAreaHa": 700,
|
||||
"occupancyRate": 88,
|
||||
"establishedYear": 1994,
|
||||
"landRentUsdM2Year": 95,
|
||||
"rbfRentUsdM2Month": 5.0,
|
||||
"rbwRentUsdM2Month": 4.5,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Cát Lái", "distanceKm": 30},
|
||||
"airport": {"name": "Long Thành", "distanceKm": 25},
|
||||
"highway": {"name": "QL 1A", "distanceKm": 2}
|
||||
},
|
||||
"incentives": {"specialZone": false},
|
||||
"targetIndustries": ["automotive", "electronics", "chemicals", "machinery"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-004",
|
||||
"name": "KCN Amata Long An",
|
||||
"slug": "amata-long-an",
|
||||
"province": "Long An",
|
||||
"region": "south",
|
||||
"status": "under_construction",
|
||||
"totalAreaHa": 410,
|
||||
"occupancyRate": 35,
|
||||
"establishedYear": 2020,
|
||||
"landRentUsdM2Year": 75,
|
||||
"rbfRentUsdM2Month": 4.5,
|
||||
"rbwRentUsdM2Month": 3.8,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Cát Lái", "distanceKm": 45},
|
||||
"airport": {"name": "Tân Sơn Nhất", "distanceKm": 35},
|
||||
"highway": {"name": "Vành đai 3 TP.HCM", "distanceKm": 8}
|
||||
},
|
||||
"incentives": {"specialZone": true},
|
||||
"targetIndustries": ["logistics", "food processing", "consumer goods", "light manufacturing"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-005",
|
||||
"name": "KCN Nam Đình Vũ",
|
||||
"slug": "nam-dinh-vu",
|
||||
"province": "Hải Phòng",
|
||||
"region": "north",
|
||||
"status": "operational",
|
||||
"totalAreaHa": 1329,
|
||||
"occupancyRate": 75,
|
||||
"establishedYear": 2014,
|
||||
"landRentUsdM2Year": 80,
|
||||
"rbfRentUsdM2Month": 4.8,
|
||||
"rbwRentUsdM2Month": 4.0,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Đình Vũ", "distanceKm": 2},
|
||||
"airport": {"name": "Cát Bi", "distanceKm": 15},
|
||||
"highway": {"name": "Cao tốc Hà Nội - Hải Phòng", "distanceKm": 10}
|
||||
},
|
||||
"incentives": {"specialZone": true},
|
||||
"targetIndustries": ["petrochemicals", "logistics", "heavy industry", "steel"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-006",
|
||||
"name": "KCN Long Hậu",
|
||||
"slug": "long-hau",
|
||||
"province": "Long An",
|
||||
"region": "south",
|
||||
"status": "operational",
|
||||
"totalAreaHa": 311,
|
||||
"occupancyRate": 85,
|
||||
"establishedYear": 2006,
|
||||
"landRentUsdM2Year": 85,
|
||||
"rbfRentUsdM2Month": 4.5,
|
||||
"rbwRentUsdM2Month": 3.8,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Hiệp Phước", "distanceKm": 5},
|
||||
"airport": {"name": "Tân Sơn Nhất", "distanceKm": 25},
|
||||
"highway": {"name": "Nguyễn Hữu Thọ", "distanceKm": 3}
|
||||
},
|
||||
"incentives": {"specialZone": false},
|
||||
"targetIndustries": ["logistics", "food processing", "garment", "packaging"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-007",
|
||||
"name": "KCN Tân Thuận (EPZ)",
|
||||
"slug": "tan-thuan-epz",
|
||||
"province": "TP. Hồ Chí Minh",
|
||||
"region": "south",
|
||||
"status": "full",
|
||||
"totalAreaHa": 300,
|
||||
"occupancyRate": 100,
|
||||
"establishedYear": 1991,
|
||||
"landRentUsdM2Year": 130,
|
||||
"rbfRentUsdM2Month": 7.0,
|
||||
"rbwRentUsdM2Month": 6.0,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Cát Lái", "distanceKm": 15},
|
||||
"airport": {"name": "Tân Sơn Nhất", "distanceKm": 12},
|
||||
"highway": {"name": "Nguyễn Văn Linh", "distanceKm": 1}
|
||||
},
|
||||
"incentives": {"specialZone": true},
|
||||
"targetIndustries": ["electronics", "precision engineering", "software", "export manufacturing"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-008",
|
||||
"name": "KCN Thăng Long",
|
||||
"slug": "thang-long",
|
||||
"province": "Hà Nội",
|
||||
"region": "north",
|
||||
"status": "full",
|
||||
"totalAreaHa": 274,
|
||||
"occupancyRate": 100,
|
||||
"establishedYear": 1997,
|
||||
"landRentUsdM2Year": 105,
|
||||
"rbfRentUsdM2Month": 6.0,
|
||||
"rbwRentUsdM2Month": 5.0,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Hải Phòng", "distanceKm": 120},
|
||||
"airport": {"name": "Nội Bài", "distanceKm": 16},
|
||||
"highway": {"name": "Nội Bài - Lào Cai", "distanceKm": 5}
|
||||
},
|
||||
"incentives": {"specialZone": false},
|
||||
"targetIndustries": ["electronics", "automotive", "precision mechanics", "IT"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-009",
|
||||
"name": "KCN KTG Industrial Nhơn Trạch",
|
||||
"slug": "ktg-nhon-trach",
|
||||
"province": "Đồng Nai",
|
||||
"region": "south",
|
||||
"status": "operational",
|
||||
"totalAreaHa": 250,
|
||||
"occupancyRate": 78,
|
||||
"establishedYear": 2018,
|
||||
"landRentUsdM2Year": 80,
|
||||
"rbfRentUsdM2Month": 4.8,
|
||||
"rbwRentUsdM2Month": 4.0,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Cát Lái", "distanceKm": 20},
|
||||
"airport": {"name": "Long Thành", "distanceKm": 15},
|
||||
"highway": {"name": "Cao tốc Long Thành - Dầu Giây", "distanceKm": 5}
|
||||
},
|
||||
"incentives": {"specialZone": false},
|
||||
"targetIndustries": ["logistics", "e-commerce fulfillment", "light manufacturing", "food processing"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-010",
|
||||
"name": "KCN Prodezi Nhơn Trạch",
|
||||
"slug": "prodezi-nhon-trach",
|
||||
"province": "Đồng Nai",
|
||||
"region": "south",
|
||||
"status": "operational",
|
||||
"totalAreaHa": 340,
|
||||
"occupancyRate": 70,
|
||||
"establishedYear": 2015,
|
||||
"landRentUsdM2Year": 72,
|
||||
"rbfRentUsdM2Month": 4.2,
|
||||
"rbwRentUsdM2Month": 3.5,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Cát Lái", "distanceKm": 25},
|
||||
"airport": {"name": "Long Thành", "distanceKm": 12},
|
||||
"highway": {"name": "QL 51", "distanceKm": 8}
|
||||
},
|
||||
"incentives": {"specialZone": false},
|
||||
"targetIndustries": ["machinery", "plastics", "packaging", "consumer goods"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-011",
|
||||
"name": "KCN Thăng Long II Hưng Yên",
|
||||
"slug": "thang-long-2-hung-yen",
|
||||
"province": "Hưng Yên",
|
||||
"region": "north",
|
||||
"status": "operational",
|
||||
"totalAreaHa": 345,
|
||||
"occupancyRate": 82,
|
||||
"establishedYear": 2004,
|
||||
"landRentUsdM2Year": 78,
|
||||
"rbfRentUsdM2Month": 4.5,
|
||||
"rbwRentUsdM2Month": 3.8,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Hải Phòng", "distanceKm": 85},
|
||||
"airport": {"name": "Nội Bài", "distanceKm": 50},
|
||||
"highway": {"name": "QL 5", "distanceKm": 3}
|
||||
},
|
||||
"incentives": {"specialZone": false},
|
||||
"targetIndustries": ["electronics", "automotive parts", "precision engineering"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-012",
|
||||
"name": "KCN Yên Phong Bắc Ninh",
|
||||
"slug": "yen-phong-bac-ninh",
|
||||
"province": "Bắc Ninh",
|
||||
"region": "north",
|
||||
"status": "operational",
|
||||
"totalAreaHa": 658,
|
||||
"occupancyRate": 95,
|
||||
"establishedYear": 2008,
|
||||
"landRentUsdM2Year": 85,
|
||||
"rbfRentUsdM2Month": 5.0,
|
||||
"rbwRentUsdM2Month": 4.2,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Hải Phòng", "distanceKm": 100},
|
||||
"airport": {"name": "Nội Bài", "distanceKm": 30},
|
||||
"highway": {"name": "QL 18", "distanceKm": 5}
|
||||
},
|
||||
"incentives": {"specialZone": false},
|
||||
"targetIndustries": ["electronics", "display manufacturing", "semiconductors", "automotive"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-013",
|
||||
"name": "KCN Bà Rịa - Vũng Tàu",
|
||||
"slug": "ba-ria-vung-tau",
|
||||
"province": "Bà Rịa - Vũng Tàu",
|
||||
"region": "south",
|
||||
"status": "operational",
|
||||
"totalAreaHa": 450,
|
||||
"occupancyRate": 72,
|
||||
"establishedYear": 2002,
|
||||
"landRentUsdM2Year": 65,
|
||||
"rbfRentUsdM2Month": 3.8,
|
||||
"rbwRentUsdM2Month": 3.2,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Cái Mép - Thị Vải", "distanceKm": 20},
|
||||
"airport": {"name": "Long Thành", "distanceKm": 50},
|
||||
"highway": {"name": "Cao tốc Biên Hòa - Vũng Tàu", "distanceKm": 5}
|
||||
},
|
||||
"incentives": {"specialZone": true},
|
||||
"targetIndustries": ["oil & gas", "petrochemicals", "heavy industry", "steel", "logistics"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-014",
|
||||
"name": "KCN Becamex Bình Phước",
|
||||
"slug": "becamex-binh-phuoc",
|
||||
"province": "Bình Phước",
|
||||
"region": "south",
|
||||
"status": "under_construction",
|
||||
"totalAreaHa": 4686,
|
||||
"occupancyRate": 25,
|
||||
"establishedYear": 2021,
|
||||
"landRentUsdM2Year": 50,
|
||||
"rbfRentUsdM2Month": 3.5,
|
||||
"rbwRentUsdM2Month": 3.0,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Cát Lái", "distanceKm": 85},
|
||||
"airport": {"name": "Tân Sơn Nhất", "distanceKm": 80},
|
||||
"highway": {"name": "QL 13", "distanceKm": 3}
|
||||
},
|
||||
"incentives": {"specialZone": true},
|
||||
"targetIndustries": ["agriculture processing", "rubber", "wood processing", "light manufacturing"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-015",
|
||||
"name": "KCN Đại An Hải Dương",
|
||||
"slug": "dai-an-hai-duong",
|
||||
"province": "Hải Dương",
|
||||
"region": "north",
|
||||
"status": "operational",
|
||||
"totalAreaHa": 174,
|
||||
"occupancyRate": 90,
|
||||
"establishedYear": 2003,
|
||||
"landRentUsdM2Year": 70,
|
||||
"rbfRentUsdM2Month": 4.2,
|
||||
"rbwRentUsdM2Month": 3.5,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Hải Phòng", "distanceKm": 50},
|
||||
"airport": {"name": "Nội Bài", "distanceKm": 60},
|
||||
"highway": {"name": "QL 5", "distanceKm": 2}
|
||||
},
|
||||
"incentives": {"specialZone": false},
|
||||
"targetIndustries": ["garment", "food processing", "mechanics", "electronics assembly"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-016",
|
||||
"name": "KCN DEEP C Hải Phòng",
|
||||
"slug": "deep-c-hai-phong",
|
||||
"province": "Hải Phòng",
|
||||
"region": "north",
|
||||
"status": "operational",
|
||||
"totalAreaHa": 3000,
|
||||
"occupancyRate": 68,
|
||||
"establishedYear": 1997,
|
||||
"landRentUsdM2Year": 75,
|
||||
"rbfRentUsdM2Month": 4.5,
|
||||
"rbwRentUsdM2Month": 3.8,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Đình Vũ", "distanceKm": 5},
|
||||
"airport": {"name": "Cát Bi", "distanceKm": 12},
|
||||
"highway": {"name": "Cao tốc Hà Nội - Hải Phòng", "distanceKm": 8}
|
||||
},
|
||||
"incentives": {"specialZone": true},
|
||||
"targetIndustries": ["petrochemicals", "LNG", "electronics", "logistics", "renewable energy"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-017",
|
||||
"name": "KCN Mỹ Phước 3 Bình Dương",
|
||||
"slug": "my-phuoc-3-binh-duong",
|
||||
"province": "Bình Dương",
|
||||
"region": "south",
|
||||
"status": "operational",
|
||||
"totalAreaHa": 992,
|
||||
"occupancyRate": 87,
|
||||
"establishedYear": 2006,
|
||||
"landRentUsdM2Year": 82,
|
||||
"rbfRentUsdM2Month": 4.8,
|
||||
"rbwRentUsdM2Month": 4.0,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Cát Lái", "distanceKm": 40},
|
||||
"airport": {"name": "Tân Sơn Nhất", "distanceKm": 35},
|
||||
"highway": {"name": "Mỹ Phước - Tân Vạn", "distanceKm": 1}
|
||||
},
|
||||
"incentives": {"specialZone": false},
|
||||
"targetIndustries": ["furniture", "garment", "food processing", "electronics assembly", "plastics"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-018",
|
||||
"name": "KCN Phú Mỹ 2 BRVT",
|
||||
"slug": "phu-my-2-brvt",
|
||||
"province": "Bà Rịa - Vũng Tàu",
|
||||
"region": "south",
|
||||
"status": "operational",
|
||||
"totalAreaHa": 380,
|
||||
"occupancyRate": 65,
|
||||
"establishedYear": 2007,
|
||||
"landRentUsdM2Year": 55,
|
||||
"rbfRentUsdM2Month": 3.5,
|
||||
"rbwRentUsdM2Month": 3.0,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Cái Mép - Thị Vải", "distanceKm": 10},
|
||||
"airport": {"name": "Long Thành", "distanceKm": 40},
|
||||
"highway": {"name": "QL 51", "distanceKm": 3}
|
||||
},
|
||||
"incentives": {"specialZone": true},
|
||||
"targetIndustries": ["petrochemicals", "steel", "power generation", "port logistics"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-019",
|
||||
"name": "KCN WHA Nghệ An",
|
||||
"slug": "wha-nghe-an",
|
||||
"province": "Nghệ An",
|
||||
"region": "central",
|
||||
"status": "under_construction",
|
||||
"totalAreaHa": 498,
|
||||
"occupancyRate": 15,
|
||||
"establishedYear": 2022,
|
||||
"landRentUsdM2Year": 45,
|
||||
"rbfRentUsdM2Month": 3.0,
|
||||
"rbwRentUsdM2Month": 2.5,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Cửa Lò", "distanceKm": 15},
|
||||
"airport": {"name": "Vinh", "distanceKm": 20},
|
||||
"highway": {"name": "QL 1A", "distanceKm": 5}
|
||||
},
|
||||
"incentives": {"specialZone": true},
|
||||
"targetIndustries": ["electronics assembly", "garment", "food processing", "rubber"]
|
||||
},
|
||||
{
|
||||
"id": "seed-kcn-020",
|
||||
"name": "KCN Chu Lai Quảng Nam",
|
||||
"slug": "chu-lai-quang-nam",
|
||||
"province": "Quảng Nam",
|
||||
"region": "central",
|
||||
"status": "operational",
|
||||
"totalAreaHa": 1550,
|
||||
"occupancyRate": 55,
|
||||
"establishedYear": 2003,
|
||||
"landRentUsdM2Year": 40,
|
||||
"rbfRentUsdM2Month": 2.8,
|
||||
"rbwRentUsdM2Month": 2.2,
|
||||
"connectivity": {
|
||||
"nearestPort": {"name": "Cảng Kỳ Hà", "distanceKm": 5},
|
||||
"airport": {"name": "Chu Lai", "distanceKm": 8},
|
||||
"highway": {"name": "QL 1A", "distanceKm": 3}
|
||||
},
|
||||
"incentives": {"specialZone": true},
|
||||
"targetIndustries": ["automotive", "agriculture machinery", "wood processing", "seafood processing"]
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,188 @@
|
||||
{
|
||||
"version": "ridge-industrial-v1",
|
||||
"trained_at": "2026-04-18T08:19:02.245595+00:00",
|
||||
"n_parks_in_source": 20,
|
||||
"heads": {
|
||||
"land": {
|
||||
"target_column": "landRentUsdM2Year",
|
||||
"n_train": 20,
|
||||
"alpha": 7.847599703514607,
|
||||
"mape_loo": 0.1463,
|
||||
"coverage_80_loo": 0.8,
|
||||
"q80_log": 0.1883,
|
||||
"top_coefficients": [
|
||||
{
|
||||
"feature": "region_central",
|
||||
"coef": -0.0873
|
||||
},
|
||||
{
|
||||
"feature": "log_fdi_province",
|
||||
"coef": 0.0856
|
||||
},
|
||||
{
|
||||
"feature": "occupancy",
|
||||
"coef": 0.0618
|
||||
},
|
||||
{
|
||||
"feature": "ind_electronics",
|
||||
"coef": 0.0502
|
||||
},
|
||||
{
|
||||
"feature": "log_dist_airport_km",
|
||||
"coef": -0.0355
|
||||
},
|
||||
{
|
||||
"feature": "ind_plastics",
|
||||
"coef": -0.0259
|
||||
},
|
||||
{
|
||||
"feature": "ind_garment",
|
||||
"coef": 0.0124
|
||||
},
|
||||
{
|
||||
"feature": "region_north",
|
||||
"coef": -0.0117
|
||||
}
|
||||
],
|
||||
"slices": {
|
||||
"central": {
|
||||
"n": 2,
|
||||
"mape_in_sample": 0.1158,
|
||||
"median_residual_log": -0.1966
|
||||
},
|
||||
"north": {
|
||||
"n": 7,
|
||||
"mape_in_sample": 0.0697,
|
||||
"median_residual_log": -0.0146
|
||||
},
|
||||
"south": {
|
||||
"n": 11,
|
||||
"mape_in_sample": 0.095,
|
||||
"median_residual_log": 0.0298
|
||||
}
|
||||
}
|
||||
},
|
||||
"rbf": {
|
||||
"target_column": "rbfRentUsdM2Month",
|
||||
"n_train": 20,
|
||||
"alpha": 7.847599703514607,
|
||||
"mape_loo": 0.1118,
|
||||
"coverage_80_loo": 0.8,
|
||||
"q80_log": 0.1268,
|
||||
"top_coefficients": [
|
||||
{
|
||||
"feature": "log_fdi_province",
|
||||
"coef": 0.0582
|
||||
},
|
||||
{
|
||||
"feature": "region_central",
|
||||
"coef": -0.0529
|
||||
},
|
||||
{
|
||||
"feature": "ind_electronics",
|
||||
"coef": 0.0348
|
||||
},
|
||||
{
|
||||
"feature": "occupancy",
|
||||
"coef": 0.0318
|
||||
},
|
||||
{
|
||||
"feature": "log_dist_airport_km",
|
||||
"coef": -0.0239
|
||||
},
|
||||
{
|
||||
"feature": "ind_plastics",
|
||||
"coef": -0.0181
|
||||
},
|
||||
{
|
||||
"feature": "log_dist_highway_km",
|
||||
"coef": -0.0106
|
||||
},
|
||||
{
|
||||
"feature": "ind_food",
|
||||
"coef": 0.0065
|
||||
}
|
||||
],
|
||||
"slices": {
|
||||
"central": {
|
||||
"n": 2,
|
||||
"mape_in_sample": 0.089,
|
||||
"median_residual_log": -0.1132
|
||||
},
|
||||
"north": {
|
||||
"n": 7,
|
||||
"mape_in_sample": 0.0601,
|
||||
"median_residual_log": -0.0016
|
||||
},
|
||||
"south": {
|
||||
"n": 11,
|
||||
"mape_in_sample": 0.0758,
|
||||
"median_residual_log": 0.0139
|
||||
}
|
||||
}
|
||||
},
|
||||
"rbw": {
|
||||
"target_column": "rbwRentUsdM2Month",
|
||||
"n_train": 20,
|
||||
"alpha": 7.847599703514607,
|
||||
"mape_loo": 0.1243,
|
||||
"coverage_80_loo": 0.8,
|
||||
"q80_log": 0.1214,
|
||||
"top_coefficients": [
|
||||
{
|
||||
"feature": "log_fdi_province",
|
||||
"coef": 0.0604
|
||||
},
|
||||
{
|
||||
"feature": "region_central",
|
||||
"coef": -0.0562
|
||||
},
|
||||
{
|
||||
"feature": "ind_electronics",
|
||||
"coef": 0.0389
|
||||
},
|
||||
{
|
||||
"feature": "occupancy",
|
||||
"coef": 0.0297
|
||||
},
|
||||
{
|
||||
"feature": "ind_plastics",
|
||||
"coef": -0.0217
|
||||
},
|
||||
{
|
||||
"feature": "log_dist_airport_km",
|
||||
"coef": -0.0196
|
||||
},
|
||||
{
|
||||
"feature": "log_dist_highway_km",
|
||||
"coef": -0.0114
|
||||
},
|
||||
{
|
||||
"feature": "region_north",
|
||||
"coef": -0.0054
|
||||
}
|
||||
],
|
||||
"slices": {
|
||||
"central": {
|
||||
"n": 2,
|
||||
"mape_in_sample": 0.1026,
|
||||
"median_residual_log": -0.1232
|
||||
},
|
||||
"north": {
|
||||
"n": 7,
|
||||
"mape_in_sample": 0.0668,
|
||||
"median_residual_log": -0.0088
|
||||
},
|
||||
"south": {
|
||||
"n": 11,
|
||||
"mape_in_sample": 0.0773,
|
||||
"median_residual_log": 0.0175
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"warnings": [
|
||||
"n_train < 30 per head — LOO metrics are noisy; interpret CIs as wide.",
|
||||
"Targets are log1p-transformed rent; CIs use conformal quantile on log residuals."
|
||||
]
|
||||
}
|
||||
BIN
libs/ai-services/models/avm_industrial_park_ridge_v1.pkl
Normal file
BIN
libs/ai-services/models/avm_industrial_park_ridge_v1.pkl
Normal file
Binary file not shown.
458
libs/ai-services/scripts/train_avm_industrial_park.py
Normal file
458
libs/ai-services/scripts/train_avm_industrial_park.py
Normal file
@@ -0,0 +1,458 @@
|
||||
"""Train the v1 park-level industrial AVM baseline (ridge + monotonic priors).
|
||||
|
||||
Context (TEC-2768 / R5.2.1):
|
||||
The IndustrialPark table ships with ~20 seeded rows carrying three rent
|
||||
heads: land (usd/m²/year), RBF (ready-built factory, usd/m²/month), and
|
||||
RBW (ready-built warehouse, usd/m²/month). No IndustrialListing rows are
|
||||
seeded, so tree-boosted models are not viable at n=20. This script fits a
|
||||
regularized linear baseline on log-rent with sign-constrained coefficients
|
||||
that encode domain monotonicity priors (occupancy ↑ rent, distance ↑ rent
|
||||
↓, etc.). Conformal prediction over LOO residuals gives the 80% CI band.
|
||||
|
||||
Usage:
|
||||
python libs/ai-services/scripts/train_avm_industrial_park.py \
|
||||
--input libs/ai-services/data/industrial/parks.json \
|
||||
--out libs/ai-services/models
|
||||
|
||||
Produces:
|
||||
<out>/avm_industrial_park_ridge_v1.pkl — fitted artifact
|
||||
<out>/avm_industrial_park_ridge_v1.model_card.json — metrics + slices
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import pickle
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
from scipy.optimize import nnls
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
# ── Constants ──────────────────────────────────────────────────
|
||||
ARTIFACT_VERSION = "ridge-industrial-v1"
|
||||
CURRENT_YEAR = 2026
|
||||
|
||||
REGION_ORDER = ["south", "north", "central"] # drop-first encoding
|
||||
TOP_INDUSTRIES = ["electronics", "logistics", "automotive", "food", "garment", "plastics"]
|
||||
|
||||
# Province → FDI inflow in million USD (trailing 12m, approximate market data).
|
||||
PROVINCE_FDI_MUSD: dict[str, float] = {
|
||||
"tp. hồ chí minh": 5500,
|
||||
"hà nội": 4200,
|
||||
"bình dương": 4800,
|
||||
"đồng nai": 3200,
|
||||
"bắc ninh": 5800,
|
||||
"hải phòng": 2800,
|
||||
"long an": 1500,
|
||||
"bà rịa - vũng tàu": 1800,
|
||||
"hải dương": 800,
|
||||
"hưng yên": 1200,
|
||||
"bình phước": 400,
|
||||
"nghệ An": 350,
|
||||
"nghệ an": 350,
|
||||
"quảng nam": 500,
|
||||
"quảng ngãi": 600,
|
||||
}
|
||||
DEFAULT_FDI = 500.0
|
||||
|
||||
# Feature expected-sign map (+1 rent↑ when feature↑, −1 rent↓ when feature↑).
|
||||
# Region one-hots stay unsigned (fixed effect).
|
||||
SIGN_PRIORS: dict[str, int] = {
|
||||
"occupancy": +1,
|
||||
"log_area_ha": +1,
|
||||
"park_age_years": -1,
|
||||
"log_dist_port_km": -1,
|
||||
"log_dist_airport_km": -1,
|
||||
"log_dist_highway_km": -1,
|
||||
"logistics_connectivity_score": +1,
|
||||
"log_fdi_province": +1,
|
||||
"has_special_zone": +1,
|
||||
"ind_electronics": +1,
|
||||
"ind_logistics": +1,
|
||||
"ind_automotive": +1,
|
||||
"ind_food": 0,
|
||||
"ind_garment": 0,
|
||||
"ind_plastics": 0,
|
||||
}
|
||||
MONOTONIC_FEATURES = [f for f, s in SIGN_PRIORS.items() if s != 0]
|
||||
REGION_FEATURES = [f"region_{r}" for r in REGION_ORDER[1:]] # drop south
|
||||
ALL_FEATURES = list(SIGN_PRIORS.keys()) + REGION_FEATURES
|
||||
|
||||
|
||||
# ── Feature engineering ────────────────────────────────────────
|
||||
@dataclass
|
||||
class FeatureSpec:
|
||||
"""Serializable feature spec so the loader can recreate training features."""
|
||||
|
||||
feature_cols: list[str] = field(default_factory=lambda: list(ALL_FEATURES))
|
||||
region_order: list[str] = field(default_factory=lambda: list(REGION_ORDER))
|
||||
top_industries: list[str] = field(default_factory=lambda: list(TOP_INDUSTRIES))
|
||||
province_fdi: dict[str, float] = field(default_factory=lambda: dict(PROVINCE_FDI_MUSD))
|
||||
default_fdi: float = DEFAULT_FDI
|
||||
sign_priors: dict[str, int] = field(default_factory=lambda: dict(SIGN_PRIORS))
|
||||
current_year: int = CURRENT_YEAR
|
||||
|
||||
|
||||
def _connectivity_distance(conn: dict | None, key: str, default: float) -> float:
|
||||
if not conn or not isinstance(conn, dict):
|
||||
return default
|
||||
node = conn.get(key)
|
||||
if isinstance(node, dict):
|
||||
dist = node.get("distanceKm") or node.get("km")
|
||||
if isinstance(dist, (int, float)) and dist >= 0:
|
||||
return float(dist)
|
||||
return default
|
||||
|
||||
|
||||
def _logistics_score(dist_port: float, dist_airport: float, dist_highway: float) -> float:
|
||||
# Inverse-distance composite scaled to [0, 1]. Weights bias toward highway
|
||||
# proximity which matters most for trucking in VN industrial flows.
|
||||
def inv(d: float, cap: float) -> float:
|
||||
return max(0.0, 1.0 - min(d, cap) / cap)
|
||||
return round(
|
||||
0.25 * inv(dist_port, 120)
|
||||
+ 0.20 * inv(dist_airport, 80)
|
||||
+ 0.55 * inv(dist_highway, 20),
|
||||
4,
|
||||
)
|
||||
|
||||
|
||||
def _industry_match(industries: list[str], target: str) -> int:
|
||||
lowered = [i.lower() for i in industries or []]
|
||||
return int(any(target in i for i in lowered))
|
||||
|
||||
|
||||
def featureize(row: dict, spec: FeatureSpec) -> dict[str, float]:
|
||||
"""Turn one park record into the flat feature vector used by the ridge."""
|
||||
occupancy = row.get("occupancyRate") or 0
|
||||
if occupancy > 1.5: # seed stores 0-100, plan normalizes to [0,1]
|
||||
occupancy = occupancy / 100.0
|
||||
occupancy = min(max(occupancy, 0.0), 1.0)
|
||||
|
||||
area_ha = float(row.get("totalAreaHa") or 0.0)
|
||||
established = row.get("establishedYear") or (spec.current_year - 10)
|
||||
park_age = max(0, spec.current_year - int(established))
|
||||
|
||||
conn = row.get("connectivity") or {}
|
||||
dist_port = _connectivity_distance(conn, "nearestPort", 60.0)
|
||||
dist_airport = _connectivity_distance(conn, "airport", 30.0)
|
||||
dist_highway = _connectivity_distance(conn, "highway", 5.0)
|
||||
|
||||
logistics_score = _logistics_score(dist_port, dist_airport, dist_highway)
|
||||
|
||||
province = (row.get("province") or "").strip().lower()
|
||||
fdi = spec.province_fdi.get(province, spec.default_fdi)
|
||||
|
||||
incentives = row.get("incentives") or {}
|
||||
has_special = int(bool(incentives.get("specialZone")))
|
||||
|
||||
industries = row.get("targetIndustries") or []
|
||||
region = str(row.get("region") or "south").lower()
|
||||
|
||||
feats = {
|
||||
"occupancy": occupancy,
|
||||
"log_area_ha": math.log1p(area_ha),
|
||||
"park_age_years": float(park_age),
|
||||
"log_dist_port_km": math.log1p(dist_port),
|
||||
"log_dist_airport_km": math.log1p(dist_airport),
|
||||
"log_dist_highway_km": math.log1p(dist_highway),
|
||||
"logistics_connectivity_score": logistics_score,
|
||||
"log_fdi_province": math.log1p(fdi),
|
||||
"has_special_zone": float(has_special),
|
||||
}
|
||||
for ind in spec.top_industries:
|
||||
feats[f"ind_{ind}"] = float(_industry_match(industries, ind))
|
||||
for r in spec.region_order[1:]:
|
||||
feats[f"region_{r}"] = float(region == r)
|
||||
return feats
|
||||
|
||||
|
||||
def build_feature_matrix(rows: list[dict], spec: FeatureSpec) -> tuple[np.ndarray, list[str]]:
|
||||
mats = [featureize(r, spec) for r in rows]
|
||||
cols = spec.feature_cols
|
||||
X = np.array([[m[c] for c in cols] for m in mats], dtype=np.float64)
|
||||
return X, cols
|
||||
|
||||
|
||||
# ── Sign-constrained ridge ─────────────────────────────────────
|
||||
def fit_ridge_nn(X: np.ndarray, y: np.ndarray, alpha: float, sign_vec: np.ndarray) -> np.ndarray:
|
||||
"""Fit `y ≈ X @ β` with ridge penalty α and sign constraints.
|
||||
|
||||
sign_vec[i] ∈ {−1, 0, +1}. For +1/−1 entries, the returned coefficient is
|
||||
constrained to have that sign. Solved as NNLS on the augmented system:
|
||||
minimize ‖[X; sqrt(α)*I] β̃ − [y; 0]‖² subject to β̃ ≥ 0
|
||||
with features pre-multiplied by sign_vec (so "−1" features become "expect
|
||||
positive after flipping"). For sign 0 (e.g. neutral industry flags) we keep
|
||||
the feature unsigned by solving the corresponding coefficient on ±-split
|
||||
columns.
|
||||
"""
|
||||
n, p = X.shape
|
||||
|
||||
# Expand each sign==0 column into two columns (positive and negative part)
|
||||
# so the NNLS solve can recover an unconstrained coefficient as β = β⁺ − β⁻.
|
||||
expand_cols: list[np.ndarray] = []
|
||||
col_meta: list[tuple[int, int]] = [] # (orig_idx, +1 or -1)
|
||||
for j in range(p):
|
||||
if sign_vec[j] == 0:
|
||||
expand_cols.append(X[:, j])
|
||||
col_meta.append((j, +1))
|
||||
expand_cols.append(-X[:, j])
|
||||
col_meta.append((j, -1))
|
||||
else:
|
||||
# Flip so expected sign becomes +, enabling non-negativity constraint.
|
||||
expand_cols.append(sign_vec[j] * X[:, j])
|
||||
col_meta.append((j, int(sign_vec[j])))
|
||||
X_exp = np.stack(expand_cols, axis=1)
|
||||
|
||||
# Augment for ridge.
|
||||
k = X_exp.shape[1]
|
||||
X_aug = np.vstack([X_exp, math.sqrt(alpha) * np.eye(k)])
|
||||
y_aug = np.concatenate([y, np.zeros(k)])
|
||||
|
||||
beta_exp, _ = nnls(X_aug, y_aug, maxiter=5 * k)
|
||||
|
||||
# Collapse expanded coefs back to original column indices.
|
||||
beta = np.zeros(p)
|
||||
for col_idx, (orig_j, sgn) in enumerate(col_meta):
|
||||
if sign_vec[orig_j] == 0:
|
||||
beta[orig_j] += sgn * beta_exp[col_idx]
|
||||
else:
|
||||
# sgn == sign_vec[orig_j]; β was fit on flipped column, so flip back.
|
||||
beta[orig_j] = sgn * beta_exp[col_idx]
|
||||
return beta
|
||||
|
||||
|
||||
# ── Model selection + conformal CI ─────────────────────────────
|
||||
def _pred(X: np.ndarray, beta: np.ndarray, intercept: float) -> np.ndarray:
|
||||
return X @ beta + intercept
|
||||
|
||||
|
||||
def loo_cv_mape(
|
||||
X: np.ndarray,
|
||||
y_log: np.ndarray,
|
||||
alpha: float,
|
||||
sign_vec: np.ndarray,
|
||||
scaler: StandardScaler,
|
||||
) -> tuple[float, np.ndarray]:
|
||||
"""Return (MAPE on original rent scale, LOO residual vector in log-space)."""
|
||||
n = X.shape[0]
|
||||
residuals_log = np.zeros(n)
|
||||
preds_rent = np.zeros(n)
|
||||
for i in range(n):
|
||||
mask = np.ones(n, dtype=bool)
|
||||
mask[i] = False
|
||||
X_train_raw = X[mask]
|
||||
X_train = scaler.fit_transform(X_train_raw)
|
||||
y_train = y_log[mask]
|
||||
intercept = float(np.mean(y_train))
|
||||
X_cent = X_train
|
||||
beta = fit_ridge_nn(X_cent, y_train - intercept, alpha, sign_vec)
|
||||
|
||||
x_test = scaler.transform(X[i : i + 1])
|
||||
yhat_log = float(_pred(x_test, beta, intercept)[0])
|
||||
residuals_log[i] = y_log[i] - yhat_log
|
||||
preds_rent[i] = math.expm1(yhat_log)
|
||||
|
||||
y_true = np.expm1(y_log)
|
||||
mape = float(np.mean(np.abs(preds_rent - y_true) / np.maximum(y_true, 1e-6)))
|
||||
return mape, residuals_log
|
||||
|
||||
|
||||
def conformal_coverage(residuals_log: np.ndarray, q: float) -> float:
|
||||
return float(np.mean(np.abs(residuals_log) <= q))
|
||||
|
||||
|
||||
# ── Training pipeline ──────────────────────────────────────────
|
||||
def train_head(
|
||||
rows: list[dict],
|
||||
target_key: str,
|
||||
spec: FeatureSpec,
|
||||
) -> dict[str, Any]:
|
||||
"""Fit one rent head and return a serializable head dict."""
|
||||
valid = [r for r in rows if r.get(target_key) is not None]
|
||||
if len(valid) < 8:
|
||||
raise ValueError(f"Head '{target_key}': only {len(valid)} non-null rows — too few to train.")
|
||||
|
||||
X, cols = build_feature_matrix(valid, spec)
|
||||
y_raw = np.array([r[target_key] for r in valid], dtype=np.float64)
|
||||
y_log = np.log1p(y_raw)
|
||||
|
||||
sign_vec = np.array([spec.sign_priors.get(c, 0) for c in cols], dtype=np.int8)
|
||||
|
||||
# Fit scaler on full (we also refit per-fold in LOO; this one is for final model).
|
||||
scaler_final = StandardScaler()
|
||||
scaler_final.fit(X)
|
||||
|
||||
alphas = np.logspace(-2, 3, 20)
|
||||
best = None
|
||||
for a in alphas:
|
||||
mape, res = loo_cv_mape(X, y_log, a, sign_vec, StandardScaler())
|
||||
if best is None or mape < best["mape"]:
|
||||
best = {"alpha": a, "mape": mape, "residuals_log": res}
|
||||
assert best is not None
|
||||
|
||||
# Refit on full set with chosen alpha.
|
||||
X_std = scaler_final.transform(X)
|
||||
intercept = float(np.mean(y_log))
|
||||
beta = fit_ridge_nn(X_std, y_log - intercept, best["alpha"], sign_vec)
|
||||
|
||||
q80 = float(np.quantile(np.abs(best["residuals_log"]), 0.80))
|
||||
coverage = conformal_coverage(best["residuals_log"], q80)
|
||||
|
||||
# Per-region slice metrics.
|
||||
slices: dict[str, dict[str, float]] = {}
|
||||
regions = np.array([r.get("region", "south") for r in valid])
|
||||
preds_rent = np.expm1(X_std @ beta + intercept)
|
||||
y_rent = np.expm1(y_log)
|
||||
for region in np.unique(regions):
|
||||
idx = np.where(regions == region)[0]
|
||||
if idx.size == 0:
|
||||
continue
|
||||
mape_slice = float(
|
||||
np.mean(np.abs(preds_rent[idx] - y_rent[idx]) / np.maximum(y_rent[idx], 1e-6))
|
||||
)
|
||||
slices[region] = {
|
||||
"n": int(idx.size),
|
||||
"mape_in_sample": round(mape_slice, 4),
|
||||
"median_residual_log": round(float(np.median(best["residuals_log"][idx])), 4),
|
||||
}
|
||||
|
||||
return {
|
||||
"coefficients": beta,
|
||||
"intercept": intercept,
|
||||
"scaler": scaler_final,
|
||||
"alpha": float(best["alpha"]),
|
||||
"q80_log": q80,
|
||||
"feature_cols": cols,
|
||||
"sign_vec": sign_vec,
|
||||
"n_train": len(valid),
|
||||
"mape_loo": round(float(best["mape"]), 4),
|
||||
"coverage_80_loo": round(coverage, 4),
|
||||
"slices": slices,
|
||||
}
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--input",
|
||||
default=os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
"data/industrial/parks.json",
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--out",
|
||||
default=os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
"models",
|
||||
),
|
||||
)
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
with open(args.input, "r", encoding="utf-8") as f:
|
||||
rows: list[dict] = json.load(f)
|
||||
|
||||
spec = FeatureSpec()
|
||||
|
||||
head_specs = {
|
||||
"land": "landRentUsdM2Year",
|
||||
"rbf": "rbfRentUsdM2Month",
|
||||
"rbw": "rbwRentUsdM2Month",
|
||||
}
|
||||
heads: dict[str, dict[str, Any]] = {}
|
||||
card_heads: dict[str, dict[str, Any]] = {}
|
||||
for head_name, target_key in head_specs.items():
|
||||
print(f"→ Training head '{head_name}' on target '{target_key}'...")
|
||||
head = train_head(rows, target_key, spec)
|
||||
heads[head_name] = head
|
||||
card_heads[head_name] = {
|
||||
"target_column": target_key,
|
||||
"n_train": head["n_train"],
|
||||
"alpha": head["alpha"],
|
||||
"mape_loo": head["mape_loo"],
|
||||
"coverage_80_loo": head["coverage_80_loo"],
|
||||
"q80_log": round(head["q80_log"], 4),
|
||||
"top_coefficients": _top_coefs(head),
|
||||
"slices": head["slices"],
|
||||
}
|
||||
print(
|
||||
f" α={head['alpha']:.4g} MAPE_LOO={head['mape_loo']:.3f}"
|
||||
f" coverage_80={head['coverage_80_loo']:.3f} n={head['n_train']}"
|
||||
)
|
||||
|
||||
os.makedirs(args.out, exist_ok=True)
|
||||
pkl_path = os.path.join(args.out, "avm_industrial_park_ridge_v1.pkl")
|
||||
card_path = os.path.join(args.out, "avm_industrial_park_ridge_v1.model_card.json")
|
||||
|
||||
# Serialize to a plain-dict artifact — no trainer class references — so the
|
||||
# API loader can unpickle without importing this training module.
|
||||
artifact = {
|
||||
"version": ARTIFACT_VERSION,
|
||||
"feature_spec": {
|
||||
"feature_cols": spec.feature_cols,
|
||||
"region_order": spec.region_order,
|
||||
"top_industries": spec.top_industries,
|
||||
"province_fdi": spec.province_fdi,
|
||||
"default_fdi": spec.default_fdi,
|
||||
"sign_priors": spec.sign_priors,
|
||||
"current_year": spec.current_year,
|
||||
},
|
||||
"heads": {
|
||||
name: {
|
||||
"coefficients": np.asarray(head["coefficients"], dtype=np.float64),
|
||||
"intercept": float(head["intercept"]),
|
||||
"scaler_mean": np.asarray(head["scaler"].mean_, dtype=np.float64),
|
||||
"scaler_scale": np.asarray(head["scaler"].scale_, dtype=np.float64),
|
||||
"alpha": head["alpha"],
|
||||
"q80_log": head["q80_log"],
|
||||
"feature_cols": head["feature_cols"],
|
||||
"n_train": head["n_train"],
|
||||
"mape_loo": head["mape_loo"],
|
||||
"coverage_80_loo": head["coverage_80_loo"],
|
||||
}
|
||||
for name, head in heads.items()
|
||||
},
|
||||
"trained_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
with open(pkl_path, "wb") as f:
|
||||
pickle.dump(artifact, f)
|
||||
|
||||
card = {
|
||||
"version": ARTIFACT_VERSION,
|
||||
"trained_at": artifact["trained_at"],
|
||||
"n_parks_in_source": len(rows),
|
||||
"heads": card_heads,
|
||||
"warnings": [
|
||||
"n_train < 30 per head — LOO metrics are noisy; interpret CIs as wide.",
|
||||
"Targets are log1p-transformed rent; CIs use conformal quantile on log residuals.",
|
||||
],
|
||||
}
|
||||
with open(card_path, "w", encoding="utf-8") as f:
|
||||
json.dump(card, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n✓ Wrote artifact → {pkl_path}")
|
||||
print(f"✓ Wrote model card → {card_path}")
|
||||
return 0
|
||||
|
||||
|
||||
def _top_coefs(head: dict[str, Any], k: int = 8) -> list[dict[str, float]]:
|
||||
beta = head["coefficients"]
|
||||
cols = head["feature_cols"]
|
||||
order = np.argsort(-np.abs(beta))[:k]
|
||||
return [
|
||||
{"feature": cols[i], "coef": round(float(beta[i]), 4)}
|
||||
for i in order
|
||||
if abs(beta[i]) > 1e-6
|
||||
]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -1,11 +1,19 @@
|
||||
"""Tests for industrial AVM rent estimation endpoint."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app.main import app
|
||||
from app.models.avm_industrial import IndustrialAVMRequest
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
RIDGE_MODEL_DIR = REPO_ROOT / "models"
|
||||
RIDGE_ARTIFACT = RIDGE_MODEL_DIR / "avm_industrial_park_ridge_v1.pkl"
|
||||
|
||||
# ── Minimal valid request payload ───────────────────────────────
|
||||
|
||||
_PREDICT_PAYLOAD = {
|
||||
@@ -178,3 +186,99 @@ def test_predict_industrial_invalid_occupancy():
|
||||
json={**_PREDICT_PAYLOAD, "park_occupancy_rate": 1.5},
|
||||
)
|
||||
assert resp.status_code == 422
|
||||
|
||||
|
||||
# ── Ridge v1 artifact tests (TEC-2768) ───────────────────────────────
|
||||
|
||||
_RIDGE_REQ = IndustrialAVMRequest(
|
||||
province="Bình Dương",
|
||||
region="south",
|
||||
park_occupancy_rate=0.85,
|
||||
park_area_ha=500,
|
||||
park_age_years=10,
|
||||
distance_to_port_km=25,
|
||||
distance_to_airport_km=20,
|
||||
distance_to_highway_km=2,
|
||||
property_type="ready_built_factory",
|
||||
area_m2=5000,
|
||||
ceiling_height_m=10,
|
||||
floor_load_ton_m2=3.0,
|
||||
power_capacity_kva=1500,
|
||||
building_coverage=0.55,
|
||||
loading_docks=4,
|
||||
zoning="general_industrial",
|
||||
industry_demand_index=0.7,
|
||||
fdi_province_musd=4800,
|
||||
labor_cost_province_vnd=8_500_000,
|
||||
logistics_connectivity_score=0.85,
|
||||
)
|
||||
|
||||
|
||||
def _fresh_service_with_model_dir(model_dir: Path):
|
||||
"""Build a fresh service instance pointed at `model_dir`.
|
||||
|
||||
Needed because `industrial_avm_service` is a module-level singleton whose
|
||||
backend is decided at import time.
|
||||
"""
|
||||
from app.config import settings
|
||||
from app.services.avm_industrial_service import IndustrialAVMService
|
||||
|
||||
original = settings.model_path
|
||||
settings.model_path = str(model_dir)
|
||||
try:
|
||||
return IndustrialAVMService()
|
||||
finally:
|
||||
settings.model_path = original
|
||||
|
||||
|
||||
@pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built")
|
||||
def test_predict_uses_ridge_when_artifact_present():
|
||||
svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR)
|
||||
assert svc._backend == "ridge"
|
||||
assert svc._model_version == "ridge-industrial-v1"
|
||||
|
||||
resp = svc.predict(_RIDGE_REQ)
|
||||
assert resp.model_version == "ridge-industrial-v1"
|
||||
assert resp.estimated_rent_usd_m2 > 0
|
||||
assert resp.rent_range_low_usd_m2 <= resp.estimated_rent_usd_m2
|
||||
assert resp.rent_range_high_usd_m2 >= resp.estimated_rent_usd_m2
|
||||
# Conformal band must have strictly positive width.
|
||||
assert resp.rent_range_high_usd_m2 > resp.rent_range_low_usd_m2
|
||||
# Confidence should match the stored LOO coverage (≥ 0.75 acceptance).
|
||||
assert resp.confidence >= 0.75
|
||||
|
||||
|
||||
def test_predict_falls_back_to_heuristic_when_artifact_absent(tmp_path: Path):
|
||||
svc = _fresh_service_with_model_dir(tmp_path) # empty dir → no artifacts
|
||||
assert svc._backend == "heuristic"
|
||||
resp = svc.predict(_RIDGE_REQ)
|
||||
assert resp.model_version == "heuristic-v1"
|
||||
assert resp.estimated_rent_usd_m2 > 0
|
||||
|
||||
|
||||
@pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built")
|
||||
def test_ridge_monotonic_occupancy():
|
||||
svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR)
|
||||
low = svc.predict(_RIDGE_REQ.model_copy(update={"park_occupancy_rate": 0.30}))
|
||||
high = svc.predict(_RIDGE_REQ.model_copy(update={"park_occupancy_rate": 0.95}))
|
||||
assert high.estimated_rent_usd_m2 >= low.estimated_rent_usd_m2
|
||||
|
||||
|
||||
@pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built")
|
||||
def test_ridge_land_head_conversion():
|
||||
"""industrial_land requests must convert annual → monthly USD/m²."""
|
||||
svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR)
|
||||
resp = svc.predict(_RIDGE_REQ.model_copy(update={"property_type": "industrial_land"}))
|
||||
# annual_rent_usd_m2 ≈ 12 × estimated_rent_usd_m2 (with rounding tolerance)
|
||||
assert resp.estimated_rent_usd_m2 > 0
|
||||
assert abs(resp.annual_rent_usd_m2 - resp.estimated_rent_usd_m2 * 12) < 0.5
|
||||
|
||||
|
||||
@pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built")
|
||||
def test_ridge_warehouse_head_different_from_factory():
|
||||
"""Warehouse and factory requests must route to different ridge heads."""
|
||||
svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR)
|
||||
rbf = svc.predict(_RIDGE_REQ.model_copy(update={"property_type": "ready_built_factory"}))
|
||||
rbw = svc.predict(_RIDGE_REQ.model_copy(update={"property_type": "warehouse"}))
|
||||
# Training data consistently shows RBF > RBW rents — heads should reflect that.
|
||||
assert rbf.estimated_rent_usd_m2 != rbw.estimated_rent_usd_m2
|
||||
|
||||
504
libs/mcp-servers/src/__tests__/mcp-integration.test.ts
Normal file
504
libs/mcp-servers/src/__tests__/mcp-integration.test.ts
Normal file
@@ -0,0 +1,504 @@
|
||||
/**
|
||||
* Integration test: verifies all MCP servers register correctly in McpRegistryService
|
||||
* and each tool is callable with valid response schemas.
|
||||
*
|
||||
* External HTTP calls (AI service, NestJS API) are mocked via globalThis.fetch.
|
||||
* Typesense is mocked at the client level.
|
||||
*/
|
||||
import type { Client as TypesenseClient } from 'typesense';
|
||||
import { describe, it, expect, vi, beforeAll, afterAll } from 'vitest';
|
||||
import { createIndustrialParksServer } from '../industrial-parks/industrial-parks.server';
|
||||
import { createMarketAnalyticsServer } from '../market-analytics/market-analytics.server';
|
||||
import { createPropertySearchServer } from '../property-search/property-search.server';
|
||||
import { createReportsServer } from '../reports/reports.server';
|
||||
import { createValuationServer } from '../valuation/valuation.server';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
type ToolResult = {
|
||||
content: { type: string; text: string }[];
|
||||
isError?: boolean;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mocks — Typesense client
|
||||
// ---------------------------------------------------------------------------
|
||||
function createMockTypesenseClient(defaultHits: unknown[] = []) {
|
||||
const search = vi.fn().mockResolvedValue({
|
||||
hits: defaultHits.map((d) => ({ document: d })),
|
||||
found: defaultHits.length,
|
||||
search_time_ms: 2,
|
||||
});
|
||||
return {
|
||||
collections: vi.fn().mockReturnValue({
|
||||
documents: vi.fn().mockReturnValue({ search }),
|
||||
}),
|
||||
_search: search,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mocks — fetch responses for each backend
|
||||
// ---------------------------------------------------------------------------
|
||||
const MOCK_RESPONSES: Record<string, unknown> = {
|
||||
'/industrial/analyze-location': {
|
||||
overall_score: 8.2,
|
||||
connectivity: {
|
||||
nearest_port: { name: 'Cảng Cát Lái', distanceKm: 22 },
|
||||
nearest_airport: { name: 'Tân Sơn Nhất', distanceKm: 28 },
|
||||
nearest_highway: { name: 'QL1A', distanceKm: 1.5 },
|
||||
},
|
||||
infrastructure: {
|
||||
power_availability: '110kV on-site',
|
||||
water_supply: 'Municipal',
|
||||
wastewater_treatment: 'Central WWTP',
|
||||
telecom: 'Fiber optic',
|
||||
},
|
||||
labor_market: {
|
||||
worker_pool_radius_30km: 450000,
|
||||
average_wage_usd: 290,
|
||||
nearby_universities: ['ĐH Bình Dương'],
|
||||
},
|
||||
incentives: ['CIT exemption 4 years'],
|
||||
risks: ['Flooding risk'],
|
||||
},
|
||||
'/industrial/estimate-rent': {
|
||||
estimated_rent_usd_m2: 4.5,
|
||||
pricing_unit: 'USD/m²/month',
|
||||
total_monthly_usd: 45000,
|
||||
total_lease_usd: 5400000,
|
||||
management_fee_usd_m2: 0.6,
|
||||
deposit_months: 3,
|
||||
market_comparison: {
|
||||
province_low: 3.0,
|
||||
province_high: 7.0,
|
||||
province_avg: 4.8,
|
||||
},
|
||||
breakdown: [
|
||||
{ item: 'Base rent', amount: 38000 },
|
||||
{ item: 'Management fee', amount: 6000 },
|
||||
],
|
||||
},
|
||||
'/reports/generate': {
|
||||
report_id: 'rpt-int-001',
|
||||
report_type: 'market_overview',
|
||||
title: 'Báo cáo thị trường Q7',
|
||||
location: 'Quận 7, Hồ Chí Minh',
|
||||
generated_at: '2026-04-16T10:00:00Z',
|
||||
summary: 'Thị trường ổn định',
|
||||
sections: [{ title: 'Tổng quan', content: '...', charts: [] }],
|
||||
key_metrics: { avgPriceVND: 4_500_000_000 },
|
||||
},
|
||||
'/reports/macro-data': {
|
||||
province: 'Bình Dương',
|
||||
data: {
|
||||
gdp: [{ year: 2024, value: 20.1, unit: 'billion USD', yoy_change: 8.6 }],
|
||||
},
|
||||
highlights: ['GDP above national average'],
|
||||
},
|
||||
};
|
||||
|
||||
function mockFetchForUrl(url: string): Response {
|
||||
for (const [path, body] of Object.entries(MOCK_RESPONSES)) {
|
||||
if (url.includes(path)) {
|
||||
return {
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => body,
|
||||
text: async () => JSON.stringify(body),
|
||||
} as unknown as Response;
|
||||
}
|
||||
}
|
||||
return {
|
||||
ok: false,
|
||||
status: 404,
|
||||
text: async () => 'Not found',
|
||||
} as unknown as Response;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Industrial park sample document (for Typesense search results)
|
||||
// ---------------------------------------------------------------------------
|
||||
const SAMPLE_PARK = {
|
||||
parkId: 'park-int-001',
|
||||
name: 'KCN VSIP II-A',
|
||||
nameEn: 'VSIP II-A Industrial Park',
|
||||
developer: 'VSIP Group',
|
||||
province: 'Bình Dương',
|
||||
region: 'south',
|
||||
status: 'operational',
|
||||
totalAreaHa: 345,
|
||||
remainingAreaHa: 62,
|
||||
occupancyRate: 82,
|
||||
landRentUsdM2Year: 90,
|
||||
rbfRentUsdM2Month: 4.8,
|
||||
rbwRentUsdM2Month: 3.5,
|
||||
targetIndustries: ['electronics', 'automotive'],
|
||||
tenantCount: 85,
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper: extract tool handler from McpServer internal state
|
||||
// ---------------------------------------------------------------------------
|
||||
function getToolHandler(
|
||||
server: unknown,
|
||||
name: string,
|
||||
): (params: unknown) => Promise<ToolResult> {
|
||||
const tools = (
|
||||
server as { _registeredTools: Record<string, { handler: (p: unknown) => Promise<ToolResult> }> }
|
||||
)._registeredTools;
|
||||
const entry = tools[name];
|
||||
if (!entry) {
|
||||
throw new Error(`Tool "${name}" not registered. Available: ${Object.keys(tools).join(', ')}`);
|
||||
}
|
||||
return entry.handler;
|
||||
}
|
||||
|
||||
function parseToolResult(result: ToolResult): Record<string, unknown> {
|
||||
expect(result.content).toHaveLength(1);
|
||||
expect(result.content[0].type).toBe('text');
|
||||
return JSON.parse(result.content[0].text) as Record<string, unknown>;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Integration tests
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('MCP Integration: all servers and tools end-to-end', () => {
|
||||
const typesenseClient = createMockTypesenseClient([SAMPLE_PARK]);
|
||||
|
||||
let industrialServer: ReturnType<typeof createIndustrialParksServer>;
|
||||
let reportsServer: ReturnType<typeof createReportsServer>;
|
||||
|
||||
const fetchSpy = vi.spyOn(globalThis, 'fetch');
|
||||
|
||||
beforeAll(() => {
|
||||
fetchSpy.mockImplementation(async (input: string | URL | Request) => {
|
||||
const url = typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url;
|
||||
return mockFetchForUrl(url);
|
||||
});
|
||||
|
||||
industrialServer = createIndustrialParksServer({
|
||||
typesenseClient: typesenseClient as unknown as TypesenseClient,
|
||||
collectionName: 'industrial_parks',
|
||||
aiServiceBaseUrl: 'http://ai-service:8000',
|
||||
});
|
||||
|
||||
reportsServer = createReportsServer({
|
||||
apiBaseUrl: 'http://api:3001/api/v1',
|
||||
});
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
fetchSpy.mockRestore();
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 1. Server factory tests — all 5 factories produce valid McpServer instances
|
||||
// -----------------------------------------------------------------------
|
||||
describe('server factories', () => {
|
||||
it('creates all 5 server instances without errors', () => {
|
||||
expect(industrialServer).toBeDefined();
|
||||
expect(reportsServer).toBeDefined();
|
||||
|
||||
const propertySearch = createPropertySearchServer({
|
||||
typesenseClient: typesenseClient as unknown as TypesenseClient,
|
||||
collectionName: 'listings',
|
||||
});
|
||||
expect(propertySearch).toBeDefined();
|
||||
|
||||
const marketAnalytics = createMarketAnalyticsServer({
|
||||
typesenseClient: typesenseClient as unknown as TypesenseClient,
|
||||
collectionName: 'listings',
|
||||
});
|
||||
expect(marketAnalytics).toBeDefined();
|
||||
|
||||
const valuation = createValuationServer({
|
||||
aiServiceBaseUrl: 'http://ai-service:8000',
|
||||
});
|
||||
expect(valuation).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 2. Industrial parks server — 3 tools
|
||||
// -----------------------------------------------------------------------
|
||||
describe('industrial-parks server', () => {
|
||||
it('search_industrial_parks: returns structured results from Typesense', async () => {
|
||||
const handler = getToolHandler(industrialServer, 'search_industrial_parks');
|
||||
const result = await handler({
|
||||
query: 'VSIP Bình Dương',
|
||||
page: 1,
|
||||
perPage: 20,
|
||||
});
|
||||
|
||||
expect(result.isError).toBeFalsy();
|
||||
const data = parseToolResult(result);
|
||||
|
||||
// Schema validation
|
||||
expect(data).toHaveProperty('totalFound');
|
||||
expect(data).toHaveProperty('page');
|
||||
expect(data).toHaveProperty('perPage');
|
||||
expect(data).toHaveProperty('searchTimeMs');
|
||||
expect(data).toHaveProperty('results');
|
||||
expect(typeof data.totalFound).toBe('number');
|
||||
|
||||
const results = data.results as Record<string, unknown>[];
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
|
||||
// Validate result item schema
|
||||
const item = results[0];
|
||||
expect(item).toHaveProperty('parkId');
|
||||
expect(item).toHaveProperty('name');
|
||||
expect(item).toHaveProperty('developer');
|
||||
expect(item).toHaveProperty('province');
|
||||
expect(item).toHaveProperty('region');
|
||||
expect(item).toHaveProperty('status');
|
||||
expect(item).toHaveProperty('totalAreaHa');
|
||||
expect(item).toHaveProperty('remainingAreaHa');
|
||||
expect(item).toHaveProperty('occupancyRate');
|
||||
expect(item).toHaveProperty('landRentUsdM2Year');
|
||||
expect(item).toHaveProperty('targetIndustries');
|
||||
expect(item).toHaveProperty('tenantCount');
|
||||
});
|
||||
|
||||
it('analyze_industrial_location: calls AI service and returns analysis schema', async () => {
|
||||
const handler = getToolHandler(industrialServer, 'analyze_industrial_location');
|
||||
const result = await handler({
|
||||
latitude: 11.05,
|
||||
longitude: 106.65,
|
||||
targetIndustry: 'electronics',
|
||||
});
|
||||
|
||||
expect(result.isError).toBeFalsy();
|
||||
const data = parseToolResult(result);
|
||||
|
||||
// Schema validation
|
||||
expect(data).toHaveProperty('overallScore');
|
||||
expect(data).toHaveProperty('connectivity');
|
||||
expect(data).toHaveProperty('infrastructure');
|
||||
expect(data).toHaveProperty('laborMarket');
|
||||
expect(data).toHaveProperty('incentives');
|
||||
expect(data).toHaveProperty('risks');
|
||||
expect(typeof data.overallScore).toBe('number');
|
||||
|
||||
const connectivity = data.connectivity as Record<string, unknown>;
|
||||
expect(connectivity).toHaveProperty('nearestPort');
|
||||
expect(connectivity).toHaveProperty('nearestAirport');
|
||||
|
||||
// Verify correct URL was called
|
||||
expect(fetchSpy).toHaveBeenCalledWith(
|
||||
'http://ai-service:8000/industrial/analyze-location',
|
||||
expect.objectContaining({ method: 'POST' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('estimate_industrial_rent: calls AI service and returns rent estimate schema', async () => {
|
||||
const handler = getToolHandler(industrialServer, 'estimate_industrial_rent');
|
||||
const result = await handler({
|
||||
province: 'Bình Dương',
|
||||
propertyType: 'ready_built_factory',
|
||||
areaM2: 10000,
|
||||
leaseDurationYears: 10,
|
||||
});
|
||||
|
||||
expect(result.isError).toBeFalsy();
|
||||
const data = parseToolResult(result);
|
||||
|
||||
// Schema validation
|
||||
expect(data).toHaveProperty('estimatedRentUsdM2');
|
||||
expect(data).toHaveProperty('pricingUnit');
|
||||
expect(data).toHaveProperty('totalMonthlyUsd');
|
||||
expect(data).toHaveProperty('totalLeaseUsd');
|
||||
expect(data).toHaveProperty('managementFeeUsdM2');
|
||||
expect(data).toHaveProperty('depositMonths');
|
||||
expect(data).toHaveProperty('marketComparison');
|
||||
expect(data).toHaveProperty('breakdown');
|
||||
expect(data).toHaveProperty('input');
|
||||
expect(typeof data.estimatedRentUsdM2).toBe('number');
|
||||
|
||||
const mc = data.marketComparison as Record<string, unknown>;
|
||||
expect(mc).toHaveProperty('provinceLow');
|
||||
expect(mc).toHaveProperty('provinceHigh');
|
||||
expect(mc).toHaveProperty('provinceAvg');
|
||||
|
||||
// Verify correct URL was called
|
||||
expect(fetchSpy).toHaveBeenCalledWith(
|
||||
'http://ai-service:8000/industrial/estimate-rent',
|
||||
expect.objectContaining({ method: 'POST' }),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 3. Reports server — 2 tools
|
||||
// -----------------------------------------------------------------------
|
||||
describe('reports server', () => {
|
||||
it('generate_report: calls NestJS API and returns report schema', async () => {
|
||||
const handler = getToolHandler(reportsServer, 'generate_report');
|
||||
const result = await handler({
|
||||
reportType: 'market_overview',
|
||||
location: 'Quận 7, Hồ Chí Minh',
|
||||
period: '1y',
|
||||
includeForecasts: false,
|
||||
includeMacro: false,
|
||||
language: 'vi',
|
||||
});
|
||||
|
||||
expect(result.isError).toBeFalsy();
|
||||
const data = parseToolResult(result);
|
||||
|
||||
// Schema validation
|
||||
expect(data).toHaveProperty('reportId');
|
||||
expect(data).toHaveProperty('reportType');
|
||||
expect(data).toHaveProperty('title');
|
||||
expect(data).toHaveProperty('location');
|
||||
expect(data).toHaveProperty('generatedAt');
|
||||
expect(data).toHaveProperty('summary');
|
||||
expect(data).toHaveProperty('sections');
|
||||
expect(data).toHaveProperty('keyMetrics');
|
||||
expect(typeof data.reportId).toBe('string');
|
||||
expect(Array.isArray(data.sections)).toBe(true);
|
||||
|
||||
// Verify correct URL was called (NestJS API, not AI service)
|
||||
expect(fetchSpy).toHaveBeenCalledWith(
|
||||
'http://api:3001/api/v1/reports/generate',
|
||||
expect.objectContaining({ method: 'POST' }),
|
||||
);
|
||||
});
|
||||
|
||||
it('get_macro_data: calls NestJS API with GET and returns macro data schema', async () => {
|
||||
const handler = getToolHandler(reportsServer, 'get_macro_data');
|
||||
const result = await handler({
|
||||
province: 'Bình Dương',
|
||||
categories: ['gdp'],
|
||||
fromYear: 2024,
|
||||
toYear: 2024,
|
||||
});
|
||||
|
||||
expect(result.isError).toBeFalsy();
|
||||
const data = parseToolResult(result);
|
||||
|
||||
// Schema validation
|
||||
expect(data).toHaveProperty('province');
|
||||
expect(data).toHaveProperty('period');
|
||||
expect(data).toHaveProperty('data');
|
||||
expect(data).toHaveProperty('highlights');
|
||||
expect(data.province).toBe('Bình Dương');
|
||||
|
||||
const period = data.period as Record<string, number>;
|
||||
expect(period.from).toBe(2024);
|
||||
expect(period.to).toBe(2024);
|
||||
|
||||
const macroData = data.data as Record<string, unknown[]>;
|
||||
expect(macroData).toHaveProperty('gdp');
|
||||
expect(macroData.gdp).toHaveLength(1);
|
||||
|
||||
const gdpPoint = macroData.gdp[0] as Record<string, unknown>;
|
||||
expect(gdpPoint).toHaveProperty('year');
|
||||
expect(gdpPoint).toHaveProperty('value');
|
||||
expect(gdpPoint).toHaveProperty('unit');
|
||||
expect(gdpPoint).toHaveProperty('yoyChange');
|
||||
|
||||
// Verify it used GET (not POST)
|
||||
const macroCall = fetchSpy.mock.calls.find(
|
||||
(call) => (call[0] as string).includes('/reports/macro-data'),
|
||||
);
|
||||
expect(macroCall).toBeDefined();
|
||||
expect((macroCall![1] as RequestInit).method).toBe('GET');
|
||||
});
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 4. Env var routing: industrial tools → AI_SERVICE_URL, reports → API_BASE_URL
|
||||
// -----------------------------------------------------------------------
|
||||
describe('env var routing', () => {
|
||||
it('industrial tools call aiServiceBaseUrl (AI_SERVICE_URL)', async () => {
|
||||
const analyzeCall = fetchSpy.mock.calls.find(
|
||||
(call) => (call[0] as string).includes('/industrial/analyze-location'),
|
||||
);
|
||||
expect(analyzeCall).toBeDefined();
|
||||
expect((analyzeCall![0] as string).startsWith('http://ai-service:8000')).toBe(true);
|
||||
|
||||
const rentCall = fetchSpy.mock.calls.find(
|
||||
(call) => (call[0] as string).includes('/industrial/estimate-rent'),
|
||||
);
|
||||
expect(rentCall).toBeDefined();
|
||||
expect((rentCall![0] as string).startsWith('http://ai-service:8000')).toBe(true);
|
||||
});
|
||||
|
||||
it('report tools call apiBaseUrl (API_BASE_URL)', async () => {
|
||||
const reportCall = fetchSpy.mock.calls.find(
|
||||
(call) => (call[0] as string).includes('/reports/generate'),
|
||||
);
|
||||
expect(reportCall).toBeDefined();
|
||||
expect((reportCall![0] as string).startsWith('http://api:3001')).toBe(true);
|
||||
|
||||
const macroCall = fetchSpy.mock.calls.find(
|
||||
(call) => (call[0] as string).includes('/reports/macro-data'),
|
||||
);
|
||||
expect(macroCall).toBeDefined();
|
||||
expect((macroCall![0] as string).startsWith('http://api:3001')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// 5. Registry simulation — verify all servers can be registered
|
||||
// -----------------------------------------------------------------------
|
||||
describe('registry integration', () => {
|
||||
it('McpRegistryService registers industrial-parks and reports servers', async () => {
|
||||
// Simulate what McpRegistryService.onModuleInit does
|
||||
const servers = new Map<string, unknown>();
|
||||
|
||||
servers.set(
|
||||
'property-search',
|
||||
createPropertySearchServer({
|
||||
typesenseClient: typesenseClient as unknown as TypesenseClient,
|
||||
collectionName: 'listings',
|
||||
}),
|
||||
);
|
||||
|
||||
servers.set(
|
||||
'market-analytics',
|
||||
createMarketAnalyticsServer({
|
||||
typesenseClient: typesenseClient as unknown as TypesenseClient,
|
||||
collectionName: 'listings',
|
||||
}),
|
||||
);
|
||||
|
||||
servers.set(
|
||||
'valuation',
|
||||
createValuationServer({ aiServiceBaseUrl: 'http://ai-service:8000' }),
|
||||
);
|
||||
|
||||
servers.set(
|
||||
'industrial-parks',
|
||||
createIndustrialParksServer({
|
||||
typesenseClient: typesenseClient as unknown as TypesenseClient,
|
||||
collectionName: 'industrial_parks',
|
||||
aiServiceBaseUrl: 'http://ai-service:8000',
|
||||
}),
|
||||
);
|
||||
|
||||
servers.set(
|
||||
'reports',
|
||||
createReportsServer({ apiBaseUrl: 'http://api:3001/api/v1' }),
|
||||
);
|
||||
|
||||
// All 5 servers should be registered
|
||||
expect(servers.size).toBe(5);
|
||||
expect(Array.from(servers.keys()).sort()).toEqual([
|
||||
'industrial-parks',
|
||||
'market-analytics',
|
||||
'property-search',
|
||||
'reports',
|
||||
'valuation',
|
||||
]);
|
||||
|
||||
// Each server should be a valid McpServer instance
|
||||
for (const [name, server] of servers) {
|
||||
expect(server, `Server "${name}" should be defined`).toBeDefined();
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user