goodgo-platform/libs/ai-services/tests/test_avm_industrial.py

"""Tests for industrial AVM rent estimation endpoint."""

from pathlib import Path

import pytest
from fastapi.testclient import TestClient

from app.main import app
from app.models.avm_industrial import IndustrialAVMRequest

client = TestClient(app)

REPO_ROOT = Path(__file__).resolve().parent.parent
RIDGE_MODEL_DIR = REPO_ROOT / "models"
RIDGE_ARTIFACT = RIDGE_MODEL_DIR / "avm_industrial_park_ridge_v1.pkl"

# ── Minimal valid request payload ───────────────────────────────

_PREDICT_PAYLOAD = {
    "province": "Bình Dương",
    "region": "south",
    "park_occupancy_rate": 0.85,
    "park_area_ha": 500,
    "park_age_years": 10,
    "distance_to_port_km": 60,
    "distance_to_airport_km": 30,
    "distance_to_highway_km": 5,
    "property_type": "factory",
    "area_m2": 5000,
    "ceiling_height_m": 10,
    "floor_load_ton_m2": 3.0,
    "power_capacity_kva": 1000,
}


def test_predict_industrial_heuristic():
    """Predict using heuristic fallback (no trained model)."""
    resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD)
    assert resp.status_code == 200
    data = resp.json()

    assert data["estimated_rent_usd_m2"] > 0
    assert 0 <= data["confidence"] <= 1
    assert data["rent_range_low_usd_m2"] < data["estimated_rent_usd_m2"]
    assert data["rent_range_high_usd_m2"] > data["estimated_rent_usd_m2"]
    assert data["annual_rent_usd_m2"] > 0
    assert data["total_monthly_rent_usd"] > 0
    assert data["model_version"] == "heuristic-v1"


def test_predict_industrial_returns_comparables():
    """Heuristic should return comparable industrial properties."""
    resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD)
    data = resp.json()

    comps = data["comparables"]
    assert len(comps) > 0
    for c in comps:
        assert c["park_name"]
        assert c["rent_usd_m2"] > 0
        assert 0 <= c["similarity_score"] <= 1


def test_predict_industrial_returns_drivers():
    """Heuristic should return feature importance drivers."""
    resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD)
    data = resp.json()

    drivers = data["drivers"]
    assert len(drivers) > 0
    assert all(0 <= d["importance"] <= 1 for d in drivers)


def test_predict_industrial_ready_built_premium():
    """Ready-built factories should be priced higher than standard."""
    standard = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD).json()
    rbf_payload = {**_PREDICT_PAYLOAD, "property_type": "ready_built_factory"}
    ready_built = client.post("/avm/industrial/predict", json=rbf_payload).json()

    assert ready_built["estimated_rent_usd_m2"] > standard["estimated_rent_usd_m2"]


def test_predict_industrial_open_yard_discount():
    """Open yards should be cheaper than factories."""
    factory = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD).json()
    yard_payload = {**_PREDICT_PAYLOAD, "property_type": "open_yard"}
    yard = client.post("/avm/industrial/predict", json=yard_payload).json()

    assert yard["estimated_rent_usd_m2"] < factory["estimated_rent_usd_m2"]


def test_predict_industrial_high_occupancy_premium():
    """Higher park occupancy should increase rent."""
    low = client.post(
        "/avm/industrial/predict",
        json={**_PREDICT_PAYLOAD, "park_occupancy_rate": 0.50},
    ).json()
    high = client.post(
        "/avm/industrial/predict",
        json={**_PREDICT_PAYLOAD, "park_occupancy_rate": 0.95},
    ).json()

    assert high["estimated_rent_usd_m2"] > low["estimated_rent_usd_m2"]


def test_predict_industrial_annual_rent():
    """Annual rent should be 12x monthly rent."""
    resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD).json()
    expected_annual = round(resp["estimated_rent_usd_m2"] * 12, 2)
    assert resp["annual_rent_usd_m2"] == expected_annual


def test_predict_industrial_total_rent():
    """Total monthly rent should be rent/m² × area."""
    resp = client.post("/avm/industrial/predict", json=_PREDICT_PAYLOAD).json()
    expected_total = resp["estimated_rent_usd_m2"] * _PREDICT_PAYLOAD["area_m2"]
    assert abs(resp["total_monthly_rent_usd"] - expected_total) < 1.0


def test_predict_industrial_free_trade_zone_premium():
    """Free-trade-zone zoning should command higher rent than general_industrial."""
    general = client.post(
        "/avm/industrial/predict",
        json={**_PREDICT_PAYLOAD, "zoning": "general_industrial"},
    ).json()
    ftz = client.post(
        "/avm/industrial/predict",
        json={**_PREDICT_PAYLOAD, "zoning": "free_trade_zone"},
    ).json()

    assert ftz["estimated_rent_usd_m2"] > general["estimated_rent_usd_m2"]


def test_predict_industrial_high_tech_zone_premium():
    """High-tech zoning should command higher rent than general_industrial."""
    general = client.post(
        "/avm/industrial/predict",
        json={**_PREDICT_PAYLOAD, "zoning": "general_industrial"},
    ).json()
    ht = client.post(
        "/avm/industrial/predict",
        json={**_PREDICT_PAYLOAD, "zoning": "high_tech"},
    ).json()

    assert ht["estimated_rent_usd_m2"] > general["estimated_rent_usd_m2"]


def test_predict_industrial_loading_docks_premium():
    """More loading docks should increase rent."""
    no_docks = client.post(
        "/avm/industrial/predict",
        json={**_PREDICT_PAYLOAD, "loading_docks": 0},
    ).json()
    many_docks = client.post(
        "/avm/industrial/predict",
        json={**_PREDICT_PAYLOAD, "loading_docks": 6},
    ).json()

    assert many_docks["estimated_rent_usd_m2"] > no_docks["estimated_rent_usd_m2"]


def test_predict_industrial_building_coverage_premium():
    """Higher building coverage should increase rent."""
    low_cov = client.post(
        "/avm/industrial/predict",
        json={**_PREDICT_PAYLOAD, "building_coverage": 0.3},
    ).json()
    high_cov = client.post(
        "/avm/industrial/predict",
        json={**_PREDICT_PAYLOAD, "building_coverage": 0.7},
    ).json()

    assert high_cov["estimated_rent_usd_m2"] > low_cov["estimated_rent_usd_m2"]


def test_predict_industrial_validation_error():
    """Missing required fields should return 422."""
    resp = client.post("/avm/industrial/predict", json={"area_m2": 5000})
    assert resp.status_code == 422


def test_predict_industrial_invalid_occupancy():
    """Occupancy rate outside 0-1 should be rejected."""
    resp = client.post(
        "/avm/industrial/predict",
        json={**_PREDICT_PAYLOAD, "park_occupancy_rate": 1.5},
    )
    assert resp.status_code == 422


# ── Ridge v1 artifact tests (TEC-2768) ───────────────────────────────

_RIDGE_REQ = IndustrialAVMRequest(
    province="Bình Dương",
    region="south",
    park_occupancy_rate=0.85,
    park_area_ha=500,
    park_age_years=10,
    distance_to_port_km=25,
    distance_to_airport_km=20,
    distance_to_highway_km=2,
    property_type="ready_built_factory",
    area_m2=5000,
    ceiling_height_m=10,
    floor_load_ton_m2=3.0,
    power_capacity_kva=1500,
    building_coverage=0.55,
    loading_docks=4,
    zoning="general_industrial",
    industry_demand_index=0.7,
    fdi_province_musd=4800,
    labor_cost_province_vnd=8_500_000,
    logistics_connectivity_score=0.85,
)


def _fresh_service_with_model_dir(model_dir: Path):
    """Build a fresh service instance pointed at `model_dir`.

    Needed because `industrial_avm_service` is a module-level singleton whose
    backend is decided at import time.
    """
    from app.config import settings
    from app.services.avm_industrial_service import IndustrialAVMService

    original = settings.model_path
    settings.model_path = str(model_dir)
    try:
        return IndustrialAVMService()
    finally:
        settings.model_path = original


@pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built")
def test_predict_uses_ridge_when_artifact_present():
    svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR)
    assert svc._backend == "ridge"
    assert svc._model_version == "ridge-industrial-v1"

    resp = svc.predict(_RIDGE_REQ)
    assert resp.model_version == "ridge-industrial-v1"
    assert resp.estimated_rent_usd_m2 > 0
    assert resp.rent_range_low_usd_m2 <= resp.estimated_rent_usd_m2
    assert resp.rent_range_high_usd_m2 >= resp.estimated_rent_usd_m2
    # Conformal band must have strictly positive width.
    assert resp.rent_range_high_usd_m2 > resp.rent_range_low_usd_m2
    # Confidence should match the stored LOO coverage (≥ 0.75 acceptance).
    assert resp.confidence >= 0.75


def test_predict_falls_back_to_heuristic_when_artifact_absent(tmp_path: Path):
    svc = _fresh_service_with_model_dir(tmp_path)  # empty dir → no artifacts
    assert svc._backend == "heuristic"
    resp = svc.predict(_RIDGE_REQ)
    assert resp.model_version == "heuristic-v1"
    assert resp.estimated_rent_usd_m2 > 0


@pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built")
def test_ridge_monotonic_occupancy():
    svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR)
    low = svc.predict(_RIDGE_REQ.model_copy(update={"park_occupancy_rate": 0.30}))
    high = svc.predict(_RIDGE_REQ.model_copy(update={"park_occupancy_rate": 0.95}))
    assert high.estimated_rent_usd_m2 >= low.estimated_rent_usd_m2


@pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built")
def test_ridge_land_head_conversion():
    """industrial_land requests must convert annual → monthly USD/m²."""
    svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR)
    resp = svc.predict(_RIDGE_REQ.model_copy(update={"property_type": "industrial_land"}))
    # annual_rent_usd_m2 ≈ 12 × estimated_rent_usd_m2 (with rounding tolerance)
    assert resp.estimated_rent_usd_m2 > 0
    assert abs(resp.annual_rent_usd_m2 - resp.estimated_rent_usd_m2 * 12) < 0.5


@pytest.mark.skipif(not RIDGE_ARTIFACT.exists(), reason="ridge artifact not built")
def test_ridge_warehouse_head_different_from_factory():
    """Warehouse and factory requests must route to different ridge heads."""
    svc = _fresh_service_with_model_dir(RIDGE_MODEL_DIR)
    rbf = svc.predict(_RIDGE_REQ.model_copy(update={"property_type": "ready_built_factory"}))
    rbw = svc.predict(_RIDGE_REQ.model_copy(update={"property_type": "warehouse"}))
    # Training data consistently shows RBF > RBW rents — heads should reflect that.
    assert rbf.estimated_rent_usd_m2 != rbw.estimated_rent_usd_m2