feat(ai-services): AVM v2 residential — expanded features, training pipeline, model versioning
Add neighborhood_score, developer_reputation, floor_level, direction premiums to the multi-model ensemble. Implement real Optuna-based training pipeline for XGBoost/LightGBM/CatBoost with grouped train/val/test splits. Add file-based model registry with rollback and list-versions endpoints. 23 Python tests covering all new features. Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
@@ -65,9 +65,10 @@ def test_predict_v2_returns_drivers():
|
||||
|
||||
|
||||
def test_predict_v2_with_full_features():
|
||||
"""Predict with all features populated."""
|
||||
"""Predict with all features populated (including new v2 features)."""
|
||||
payload = {
|
||||
**_PREDICT_PAYLOAD,
|
||||
"neighborhood_score": 0.8,
|
||||
"distance_to_cbd_km": 5.0,
|
||||
"distance_to_metro_km": 0.8,
|
||||
"distance_to_school_km": 0.5,
|
||||
@@ -75,11 +76,15 @@ def test_predict_v2_with_full_features():
|
||||
"distance_to_park_km": 0.3,
|
||||
"distance_to_mall_km": 1.0,
|
||||
"flood_zone_risk": 0.1,
|
||||
"floor_level": 12,
|
||||
"total_floors": 25,
|
||||
"direction": "southeast",
|
||||
"floor_ratio": 1.2,
|
||||
"building_age_years": 5,
|
||||
"has_elevator": True,
|
||||
"has_parking": True,
|
||||
"has_pool": False,
|
||||
"developer_reputation": 0.9,
|
||||
"avg_price_district_3m_vnd_m2": 85_000_000,
|
||||
"listing_density": 12.5,
|
||||
"absorption_rate": 0.3,
|
||||
@@ -149,8 +154,93 @@ def test_predict_v2_invalid_area():
|
||||
assert resp.status_code == 422
|
||||
|
||||
|
||||
def test_train_v2_scaffold():
|
||||
"""Training endpoint should return scaffold response."""
|
||||
# ── New v2 features: neighborhood, floor, direction, developer ──
|
||||
|
||||
|
||||
def test_predict_v2_neighborhood_premium():
|
||||
"""High neighborhood score should increase price."""
|
||||
low_nb = client.post(
|
||||
"/avm/v2/predict",
|
||||
json={**_PREDICT_PAYLOAD, "neighborhood_score": 0.2},
|
||||
).json()
|
||||
high_nb = client.post(
|
||||
"/avm/v2/predict",
|
||||
json={**_PREDICT_PAYLOAD, "neighborhood_score": 0.9},
|
||||
).json()
|
||||
|
||||
assert high_nb["estimated_price_vnd"] > low_nb["estimated_price_vnd"]
|
||||
|
||||
|
||||
def test_predict_v2_floor_level_premium():
|
||||
"""Higher floor apartments should command a premium."""
|
||||
ground = client.post(
|
||||
"/avm/v2/predict",
|
||||
json={**_PREDICT_PAYLOAD, "floor_level": 2, "total_floors": 25},
|
||||
).json()
|
||||
high = client.post(
|
||||
"/avm/v2/predict",
|
||||
json={**_PREDICT_PAYLOAD, "floor_level": 20, "total_floors": 25},
|
||||
).json()
|
||||
|
||||
assert high["estimated_price_vnd"] > ground["estimated_price_vnd"]
|
||||
|
||||
|
||||
def test_predict_v2_direction_premium():
|
||||
"""South-facing properties should be priced higher than north-facing."""
|
||||
south = client.post(
|
||||
"/avm/v2/predict",
|
||||
json={**_PREDICT_PAYLOAD, "direction": "south"},
|
||||
).json()
|
||||
north = client.post(
|
||||
"/avm/v2/predict",
|
||||
json={**_PREDICT_PAYLOAD, "direction": "north"},
|
||||
).json()
|
||||
|
||||
assert south["estimated_price_vnd"] > north["estimated_price_vnd"]
|
||||
|
||||
|
||||
def test_predict_v2_developer_reputation():
|
||||
"""Properties from reputable developers should be valued higher."""
|
||||
low_rep = client.post(
|
||||
"/avm/v2/predict",
|
||||
json={**_PREDICT_PAYLOAD, "developer_reputation": 0.2},
|
||||
).json()
|
||||
high_rep = client.post(
|
||||
"/avm/v2/predict",
|
||||
json={**_PREDICT_PAYLOAD, "developer_reputation": 0.9},
|
||||
).json()
|
||||
|
||||
assert high_rep["estimated_price_vnd"] > low_rep["estimated_price_vnd"]
|
||||
|
||||
|
||||
def test_predict_v2_direction_defaults_unknown():
|
||||
"""Unknown direction should not affect price (neutral)."""
|
||||
explicit = client.post(
|
||||
"/avm/v2/predict",
|
||||
json={**_PREDICT_PAYLOAD, "direction": "unknown"},
|
||||
).json()
|
||||
default = client.post("/avm/v2/predict", json=_PREDICT_PAYLOAD).json()
|
||||
|
||||
assert explicit["estimated_price_vnd"] == default["estimated_price_vnd"]
|
||||
|
||||
|
||||
def test_predict_v2_drivers_include_new_features():
|
||||
"""Drivers should include neighborhood_score, direction, floor_level."""
|
||||
resp = client.post("/avm/v2/predict", json=_PREDICT_PAYLOAD)
|
||||
data = resp.json()
|
||||
driver_names = {d["feature"] for d in data["drivers"]}
|
||||
|
||||
assert "neighborhood_score" in driver_names
|
||||
assert "direction_encoded" in driver_names
|
||||
assert "floor_level" in driver_names
|
||||
assert "developer_reputation" in driver_names
|
||||
|
||||
|
||||
# ── Training & model info ───────────────────────────────────────
|
||||
|
||||
|
||||
def test_train_v2_no_data():
|
||||
"""Training without data returns scaffold with zero metrics."""
|
||||
resp = client.post(
|
||||
"/avm/v2/train",
|
||||
json={"optuna_trials": 10},
|
||||
@@ -159,10 +249,7 @@ def test_train_v2_scaffold():
|
||||
data = resp.json()
|
||||
assert "model_version" in data
|
||||
assert "ensemble-v2-" in data["model_version"]
|
||||
assert data["metrics"]["mae"] == 0.0 # scaffold returns zeros
|
||||
assert "xgboost" in data["best_params"]
|
||||
assert "lightgbm" in data["best_params"]
|
||||
assert "catboost" in data["best_params"]
|
||||
assert data["training_samples"] == 0
|
||||
|
||||
|
||||
def test_model_info_v2():
|
||||
@@ -174,6 +261,26 @@ def test_model_info_v2():
|
||||
assert data["is_active"] is True
|
||||
|
||||
|
||||
# ── Model versioning ────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_list_versions():
|
||||
"""Versions endpoint returns a list."""
|
||||
resp = client.get("/avm/v2/versions")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert isinstance(data, list)
|
||||
|
||||
|
||||
def test_rollback_not_found():
|
||||
"""Rollback to non-existent version returns 404."""
|
||||
resp = client.post(
|
||||
"/avm/v2/rollback",
|
||||
json={"target_version": "nonexistent-version-xyz"},
|
||||
)
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
# ── A/B comparison tests ─────────────────────────────────────
|
||||
|
||||
_COMPARE_PAYLOAD = {
|
||||
@@ -227,12 +334,17 @@ def test_compare_v1_with_v2_features():
|
||||
"""Compare endpoint passes v2-specific features correctly."""
|
||||
payload = {
|
||||
**_COMPARE_PAYLOAD,
|
||||
"neighborhood_score": 0.8,
|
||||
"distance_to_cbd_km": 5.0,
|
||||
"distance_to_metro_km": 0.8,
|
||||
"flood_zone_risk": 0.1,
|
||||
"building_age_years": 3,
|
||||
"floor_level": 15,
|
||||
"total_floors": 30,
|
||||
"direction": "southeast",
|
||||
"has_elevator": True,
|
||||
"has_parking": True,
|
||||
"developer_reputation": 0.85,
|
||||
"renovation_score": 0.9,
|
||||
"view_quality": 0.8,
|
||||
"interior_quality": 0.85,
|
||||
|
||||
Reference in New Issue
Block a user