"""Tests for industrial AVM rent estimation endpoint.""" from pathlib import Path import pytest from fastapi.testclient import TestClient from app.main import app from app.models.avm_industrial import IndustrialAVMRequest client = TestClient(app) REPO_ROOT = Path(__file__).resolve().parent.parent RIDGE_MODEL_DIR = REPO_ROOT / "models" RIDGE_ARTIFACT = RIDGE_MODEL_DIR / "avm_industrial_park_ridge_v1.pkl" # ── Minimal valid request payload ─────────────────────────────── _PREDICT_PAYLOAD = { "province": "Bình Dương", "region": "south", "park_occupancy_rate": 0.85, "park_area_ha": 500, "park_age_years": 10, "distance_to_port_km": 60, "distance_to_airport_km": 30, "distance_to_highway_km": 5, "property_type": "factory", "area_m2": 5000, "ceiling_height_m": 10, "floor_load_ton_m2": 3.0, "power_capacity_kva": 1000, } def test_predict_industrial_heuristic(): """Predict using heuristic fallback (no trained model).""" resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD) assert resp.status_code == 200 data = resp.json() assert data["estimated_rent_usd_m2"] > 0 assert 0 <= data["confidence"] <= 1 assert data["rent_range_low_usd_m2"] < data["estimated_rent_usd_m2"] assert data["rent_range_high_usd_m2"] > data["estimated_rent_usd_m2"] assert data["annual_rent_usd_m2"] > 0 assert data["total_monthly_rent_usd"] > 0 assert data["model_version"] == "heuristic-v1" def test_predict_industrial_returns_comparables(): """Heuristic should return comparable industrial properties.""" resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD) data = resp.json() comps = data["comparables"] assert len(comps) > 0 for c in comps: assert c["park_name"] assert c["rent_usd_m2"] > 0 assert 0 <= c["similarity_score"] <= 1 def test_predict_industrial_returns_drivers(): """Heuristic should return feature importance drivers.""" resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD) data = resp.json() drivers = data["drivers"] assert len(drivers) > 0 assert all(0 <= d["importance"] <= 1 for d in drivers) def test_predict_industrial_ready_built_premium(): """Ready-built factories should be priced higher than standard.""" standard = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD).json() rbf_payload = {**_PREDICT_PAYLOAD, "property_type": "ready_built_factory"} ready_built = client.post("/avm/industrial/predict", json=rbf_payload).json() assert ready_built["estimated_rent_usd_m2"] > standard["estimated_rent_usd_m2"] def test_predict_industrial_open_yard_discount(): """Open yards should be cheaper than factories.""" factory = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD).json() yard_payload = {**_PREDICT_PAYLOAD, "property_type": "open_yard"} yard = client.post("/avm/industrial/predict", json=yard_payload).json() assert yard["estimated_rent_usd_m2"] < factory["estimated_rent_usd_m2"] def test_predict_industrial_high_occupancy_premium(): """Higher park occupancy should increase rent.""" low = client.post( "/avm/industrial/predict", json={**_PREDICT_PAYLOAD, "park_occupancy_rate": 0.50}, ).json() high = client.post( "/avm/industrial/predict", json={**_PREDICT_PAYLOAD, "park_occupancy_rate": 0.95}, ).json() assert high["estimated_rent_usd_m2"] > low["estimated_rent_usd_m2"] def test_predict_industrial_annual_rent(): """Annual rent should be 12x monthly rent.""" resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD).json() expected_annual = round(resp["estimated_rent_usd_m2"] * 12, 2) assert resp["annual_rent_usd_m2"] == expected_annual def test_predict_industrial_total_rent(): """Total monthly rent should be rent/m² × area.""" resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD).json() expected_total = resp["estimated_rent_usd_m2"] * _PREDICT_PAYLOAD["area_m2"] assert abs(resp["total_monthly_rent_usd"] - expected_total) < 1.0 def test_predict_industrial_free_trade_zone_premium(): """Free-trade-zone zoning should command higher rent than general_industrial.""" general = client.post( "/avm/industrial/predict", json={**_PREDICT_PAYLOAD, "zoning": "general_industrial"}, ).json() ftz = client.post( "/avm/industrial/predict", json={**_PREDICT_PAYLOAD, "zoning": "free_trade_zone"}, ).json() assert ftz["estimated_rent_usd_m2"] > general["estimated_rent_usd_m2"] def test_predict_industrial_high_tech_zone_premium(): """High-tech zoning should command higher rent than general_industrial.""" general = client.post( "/avm/industrial/predict", json={**_PREDICT_PAYLOAD, "zoning": "general_industrial"}, ).json() ht = client.post( "/avm/industrial/predict", json={**_PREDICT_PAYLOAD, "zoning": "high_tech"}, ).json() assert ht["estimated_rent_usd_m2"] > general["estimated_rent_usd_m2"] def test_predict_industrial_loading_docks_premium(): """More loading docks should increase rent.""" no_docks = client.post( "/avm/industrial/predict", json={**_PREDICT_PAYLOAD, "loading_docks": 0}, ).json() many_docks = client.post( "/avm/industrial/predict", json={**_PREDICT_PAYLOAD, "loading_docks": 6}, ).json() assert many_docks["estimated_rent_usd_m2"] > no_docks["estimated_rent_usd_m2"] def test_predict_industrial_building_coverage_premium(): """Higher building coverage should increase rent.""" low_cov = client.post( "/avm/industrial/predict", json={**_PREDICT_PAYLOAD, "building_coverage": 0.3}, ).json() high_cov = client.post( "/avm/industrial/predict", json={**_PREDICT_PAYLOAD, "building_coverage": 0.7}, ).json() assert high_cov["estimated_rent_usd_m2"] > low_cov["estimated_rent_usd_m2"] def test_predict_industrial_validation_error(): """Missing required fields should return 422.""" resp = client.post("/avm/industrial/predict", json={"area_m2": 5000}) assert resp.status_code == 422 def test_predict_industrial_invalid_occupancy(): """Occupancy rate outside 0-1 should be rejected.""" resp = client.post( "/avm/industrial/predict", json={**_PREDICT_PAYLOAD, "park_occupancy_rate": 1.5}, ) assert resp.status_code == 422 # ── Ridge v1 artifact tests (TEC-2768) ─────────────────────────────── _RIDGE_REQ = IndustrialAVMRequest( province="Bình Dương", region="south", park_occupancy_rate=0.85, park_area_ha=500, park_age_years=10, distance_to_port_km=25, distance_to_airport_km=20, distance_to_highway_km=2, property_type="ready_built_factory", area_m2=5000, ceiling_height_m=10, floor_load_ton_m2=3.0, power_capacity_kva=1500, building_coverage=0.55, loading_docks=4, zoning="general_industrial", industry_demand_index=0.7, fdi_province_musd=4800, labor_cost_province_vnd=8_500_000, logistics_connectivity_score=0.85, ) def _fresh_service_with_model_dir(model_dir: Path): """Build a fresh service instance pointed at `model_dir`. Needed because `industrial_avm_service` is a module-level singleton whose backend is decided at import time. """ from app.config import settings from app.services.avm_industrial_service import IndustrialAVMService original = settings.model_path settings.model_path = str(model_dir) try: return IndustrialAVMService() finally: settings.model_path = original @pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built") def test_predict_uses_ridge_when_artifact_present(): svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR) assert svc._backend == "ridge" assert svc._model_version == "ridge-industrial-v1" resp = svc.predict(_RIDGE_REQ) assert resp.model_version == "ridge-industrial-v1" assert resp.estimated_rent_usd_m2 > 0 assert resp.rent_range_low_usd_m2 <= resp.estimated_rent_usd_m2 assert resp.rent_range_high_usd_m2 >= resp.estimated_rent_usd_m2 # Conformal band must have strictly positive width. assert resp.rent_range_high_usd_m2 > resp.rent_range_low_usd_m2 # Confidence should match the stored LOO coverage (≥ 0.75 acceptance). assert resp.confidence >= 0.75 def test_predict_falls_back_to_heuristic_when_artifact_absent(tmp_path: Path): svc = _fresh_service_with_model_dir(tmp_path) # empty dir → no artifacts assert svc._backend == "heuristic" resp = svc.predict(_RIDGE_REQ) assert resp.model_version == "heuristic-v1" assert resp.estimated_rent_usd_m2 > 0 @pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built") def test_ridge_monotonic_occupancy(): svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR) low = svc.predict(_RIDGE_REQ.model_copy(update={"park_occupancy_rate": 0.30})) high = svc.predict(_RIDGE_REQ.model_copy(update={"park_occupancy_rate": 0.95})) assert high.estimated_rent_usd_m2 >= low.estimated_rent_usd_m2 @pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built") def test_ridge_land_head_conversion(): """industrial_land requests must convert annual → monthly USD/m².""" svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR) resp = svc.predict(_RIDGE_REQ.model_copy(update={"property_type": "industrial_land"})) # annual_rent_usd_m2 ≈ 12 × estimated_rent_usd_m2 (with rounding tolerance) assert resp.estimated_rent_usd_m2 > 0 assert abs(resp.annual_rent_usd_m2 - resp.estimated_rent_usd_m2 * 12) < 0.5 @pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built") def test_ridge_warehouse_head_different_from_factory(): """Warehouse and factory requests must route to different ridge heads.""" svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR) rbf = svc.predict(_RIDGE_REQ.model_copy(update={"property_type": "ready_built_factory"})) rbw = svc.predict(_RIDGE_REQ.model_copy(update={"property_type": "warehouse"})) # Training data consistently shows RBF > RBW rents — heads should reflect that. assert rbf.estimated_rent_usd_m2 != rbw.estimated_rent_usd_m2