Files
goodgo-platform/libs/ai-services/tests/test_avm_industrial.py

285 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests for industrial AVM rent estimation endpoint."""
from pathlib import Path
import pytest
from fastapi.testclient import TestClient
from app.main import app
from app.models.avm_industrial import IndustrialAVMRequest
client = TestClient(app)
REPO_ROOT = Path(__file__).resolve().parent.parent
RIDGE_MODEL_DIR = REPO_ROOT / "models"
RIDGE_ARTIFACT = RIDGE_MODEL_DIR / "avm_industrial_park_ridge_v1.pkl"
# ── Minimal valid request payload ───────────────────────────────
_PREDICT_PAYLOAD = {
"province": "Bình Dương",
"region": "south",
"park_occupancy_rate": 0.85,
"park_area_ha": 500,
"park_age_years": 10,
"distance_to_port_km": 60,
"distance_to_airport_km": 30,
"distance_to_highway_km": 5,
"property_type": "factory",
"area_m2": 5000,
"ceiling_height_m": 10,
"floor_load_ton_m2": 3.0,
"power_capacity_kva": 1000,
}
def test_predict_industrial_heuristic():
"""Predict using heuristic fallback (no trained model)."""
resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD)
assert resp.status_code == 200
data = resp.json()
assert data["estimated_rent_usd_m2"] > 0
assert 0 <= data["confidence"] <= 1
assert data["rent_range_low_usd_m2"] < data["estimated_rent_usd_m2"]
assert data["rent_range_high_usd_m2"] > data["estimated_rent_usd_m2"]
assert data["annual_rent_usd_m2"] > 0
assert data["total_monthly_rent_usd"] > 0
assert data["model_version"] == "heuristic-v1"
def test_predict_industrial_returns_comparables():
"""Heuristic should return comparable industrial properties."""
resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD)
data = resp.json()
comps = data["comparables"]
assert len(comps) > 0
for c in comps:
assert c["park_name"]
assert c["rent_usd_m2"] > 0
assert 0 <= c["similarity_score"] <= 1
def test_predict_industrial_returns_drivers():
"""Heuristic should return feature importance drivers."""
resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD)
data = resp.json()
drivers = data["drivers"]
assert len(drivers) > 0
assert all(0 <= d["importance"] <= 1 for d in drivers)
def test_predict_industrial_ready_built_premium():
"""Ready-built factories should be priced higher than standard."""
standard = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD).json()
rbf_payload = {**_PREDICT_PAYLOAD, "property_type": "ready_built_factory"}
ready_built = client.post("/avm/industrial/predict", json=rbf_payload).json()
assert ready_built["estimated_rent_usd_m2"] > standard["estimated_rent_usd_m2"]
def test_predict_industrial_open_yard_discount():
"""Open yards should be cheaper than factories."""
factory = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD).json()
yard_payload = {**_PREDICT_PAYLOAD, "property_type": "open_yard"}
yard = client.post("/avm/industrial/predict", json=yard_payload).json()
assert yard["estimated_rent_usd_m2"] < factory["estimated_rent_usd_m2"]
def test_predict_industrial_high_occupancy_premium():
"""Higher park occupancy should increase rent."""
low = client.post(
"/avm/industrial/predict",
json={**_PREDICT_PAYLOAD, "park_occupancy_rate": 0.50},
).json()
high = client.post(
"/avm/industrial/predict",
json={**_PREDICT_PAYLOAD, "park_occupancy_rate": 0.95},
).json()
assert high["estimated_rent_usd_m2"] > low["estimated_rent_usd_m2"]
def test_predict_industrial_annual_rent():
"""Annual rent should be 12x monthly rent."""
resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD).json()
expected_annual = round(resp["estimated_rent_usd_m2"] * 12, 2)
assert resp["annual_rent_usd_m2"] == expected_annual
def test_predict_industrial_total_rent():
"""Total monthly rent should be rent/m² × area."""
resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD).json()
expected_total = resp["estimated_rent_usd_m2"] * _PREDICT_PAYLOAD["area_m2"]
assert abs(resp["total_monthly_rent_usd"] - expected_total) < 1.0
def test_predict_industrial_free_trade_zone_premium():
"""Free-trade-zone zoning should command higher rent than general_industrial."""
general = client.post(
"/avm/industrial/predict",
json={**_PREDICT_PAYLOAD, "zoning": "general_industrial"},
).json()
ftz = client.post(
"/avm/industrial/predict",
json={**_PREDICT_PAYLOAD, "zoning": "free_trade_zone"},
).json()
assert ftz["estimated_rent_usd_m2"] > general["estimated_rent_usd_m2"]
def test_predict_industrial_high_tech_zone_premium():
"""High-tech zoning should command higher rent than general_industrial."""
general = client.post(
"/avm/industrial/predict",
json={**_PREDICT_PAYLOAD, "zoning": "general_industrial"},
).json()
ht = client.post(
"/avm/industrial/predict",
json={**_PREDICT_PAYLOAD, "zoning": "high_tech"},
).json()
assert ht["estimated_rent_usd_m2"] > general["estimated_rent_usd_m2"]
def test_predict_industrial_loading_docks_premium():
"""More loading docks should increase rent."""
no_docks = client.post(
"/avm/industrial/predict",
json={**_PREDICT_PAYLOAD, "loading_docks": 0},
).json()
many_docks = client.post(
"/avm/industrial/predict",
json={**_PREDICT_PAYLOAD, "loading_docks": 6},
).json()
assert many_docks["estimated_rent_usd_m2"] > no_docks["estimated_rent_usd_m2"]
def test_predict_industrial_building_coverage_premium():
"""Higher building coverage should increase rent."""
low_cov = client.post(
"/avm/industrial/predict",
json={**_PREDICT_PAYLOAD, "building_coverage": 0.3},
).json()
high_cov = client.post(
"/avm/industrial/predict",
json={**_PREDICT_PAYLOAD, "building_coverage": 0.7},
).json()
assert high_cov["estimated_rent_usd_m2"] > low_cov["estimated_rent_usd_m2"]
def test_predict_industrial_validation_error():
"""Missing required fields should return 422."""
resp = client.post("/avm/industrial/predict", json={"area_m2": 5000})
assert resp.status_code == 422
def test_predict_industrial_invalid_occupancy():
"""Occupancy rate outside 0-1 should be rejected."""
resp = client.post(
"/avm/industrial/predict",
json={**_PREDICT_PAYLOAD, "park_occupancy_rate": 1.5},
)
assert resp.status_code == 422
# ── Ridge v1 artifact tests (TEC-2768) ───────────────────────────────
_RIDGE_REQ = IndustrialAVMRequest(
province="Bình Dương",
region="south",
park_occupancy_rate=0.85,
park_area_ha=500,
park_age_years=10,
distance_to_port_km=25,
distance_to_airport_km=20,
distance_to_highway_km=2,
property_type="ready_built_factory",
area_m2=5000,
ceiling_height_m=10,
floor_load_ton_m2=3.0,
power_capacity_kva=1500,
building_coverage=0.55,
loading_docks=4,
zoning="general_industrial",
industry_demand_index=0.7,
fdi_province_musd=4800,
labor_cost_province_vnd=8_500_000,
logistics_connectivity_score=0.85,
)
def _fresh_service_with_model_dir(model_dir: Path):
"""Build a fresh service instance pointed at `model_dir`.
Needed because `industrial_avm_service` is a module-level singleton whose
backend is decided at import time.
"""
from app.config import settings
from app.services.avm_industrial_service import IndustrialAVMService
original = settings.model_path
settings.model_path = str(model_dir)
try:
return IndustrialAVMService()
finally:
settings.model_path = original
@pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built")
def test_predict_uses_ridge_when_artifact_present():
svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR)
assert svc._backend == "ridge"
assert svc._model_version == "ridge-industrial-v1"
resp = svc.predict(_RIDGE_REQ)
assert resp.model_version == "ridge-industrial-v1"
assert resp.estimated_rent_usd_m2 > 0
assert resp.rent_range_low_usd_m2 <= resp.estimated_rent_usd_m2
assert resp.rent_range_high_usd_m2 >= resp.estimated_rent_usd_m2
# Conformal band must have strictly positive width.
assert resp.rent_range_high_usd_m2 > resp.rent_range_low_usd_m2
# Confidence should match the stored LOO coverage (≥ 0.75 acceptance).
assert resp.confidence >= 0.75
def test_predict_falls_back_to_heuristic_when_artifact_absent(tmp_path: Path):
svc = _fresh_service_with_model_dir(tmp_path) # empty dir → no artifacts
assert svc._backend == "heuristic"
resp = svc.predict(_RIDGE_REQ)
assert resp.model_version == "heuristic-v1"
assert resp.estimated_rent_usd_m2 > 0
@pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built")
def test_ridge_monotonic_occupancy():
svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR)
low = svc.predict(_RIDGE_REQ.model_copy(update={"park_occupancy_rate": 0.30}))
high = svc.predict(_RIDGE_REQ.model_copy(update={"park_occupancy_rate": 0.95}))
assert high.estimated_rent_usd_m2 >= low.estimated_rent_usd_m2
@pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built")
def test_ridge_land_head_conversion():
"""industrial_land requests must convert annual → monthly USD/m²."""
svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR)
resp = svc.predict(_RIDGE_REQ.model_copy(update={"property_type": "industrial_land"}))
# annual_rent_usd_m2 ≈ 12 × estimated_rent_usd_m2 (with rounding tolerance)
assert resp.estimated_rent_usd_m2 > 0
assert abs(resp.annual_rent_usd_m2 - resp.estimated_rent_usd_m2 * 12) < 0.5
@pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built")
def test_ridge_warehouse_head_different_from_factory():
"""Warehouse and factory requests must route to different ridge heads."""
svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR)
rbf = svc.predict(_RIDGE_REQ.model_copy(update={"property_type": "ready_built_factory"}))
rbw = svc.predict(_RIDGE_REQ.model_copy(update={"property_type": "warehouse"}))
# Training data consistently shows RBF > RBW rents — heads should reflect that.
assert rbf.estimated_rent_usd_m2 != rbw.estimated_rent_usd_m2