feat(ai-services): complete AVM v2 ensemble — upload endpoint, per-district metrics, A/B routing

- Add POST /avm/v2/upload-training-data so AvmRetrainCronService can push
  CSV rows before triggering retraining (was called but missing)
- Add per-district MAE/MAPE/RMSE/R² to _evaluate_ensemble output;
  district_metrics are now returned in AVMv2TrainResponse and stored
  separately from global metrics in the model registry
- Add predict_with_ab() that applies the active model's ab_test_traffic_pct
  for deterministic per-property cohort assignment (v2 vs heuristic baseline)
- Add POST /avm/v2/ab-config to set traffic_pct on the active registry entry
- Add AVMv2ABConfigRequest schema
- Expand test suite: 24 → 28 tests covering upload, A/B config, and new
  validation paths; all green

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Ho Ngoc Hai
2026-04-21 04:39:57 +07:00
parent 9cefd439db
commit 66f952a4a8
4 changed files with 224 additions and 8 deletions

View File

@@ -377,3 +377,68 @@ def test_compare_v1_with_v2_features():
# v2 should capture these extra features
assert data["v2"]["estimated_price_vnd"] > 0
assert data["v2"]["model_version"] is not None
# ── Upload training data ────────────────────────────────────────
_CSV_HEADER = (
"property_type,area_m2,rooms,floor_level,total_floors,direction,floor_ratio,"
"building_age_years,has_elevator,has_parking,has_pool,has_legal_paper,"
"developer_reputation,neighborhood_score,distance_to_cbd_km,distance_to_metro_km,"
"distance_to_school_km,distance_to_hospital_km,distance_to_park_km,distance_to_mall_km,"
"flood_zone_risk,avg_price_district_3m_vnd_m2,listing_density,absorption_rate,"
"dom_avg,price_momentum_30d,yoy_change,renovation_score,view_quality,interior_quality,"
"noise_level,natural_light,month,district,price_vnd"
)
_CSV_ROW = (
"apartment,80,2,5,20,south,1.0,3,1,1,0,1,0.8,0.7,5,1,0.5,2,1,3,"
"0.1,85000000,10,0.3,30,0.01,0.05,0.8,0.7,0.75,0.3,0.8,3,Cầu Giấy,7000000000"
)
def test_upload_training_data_ok(tmp_path):
"""Upload endpoint accepts valid CSV and returns row count."""
from unittest.mock import patch
from app import config as cfg
with patch.object(cfg.settings, "model_path", str(tmp_path)):
csv_body = f"{_CSV_HEADER}\n{_CSV_ROW}\n"
resp = client.post(
"/avm/v2/upload-training-data",
content=csv_body,
headers={"Content-Type": "text/csv"},
)
assert resp.status_code == 200
data = resp.json()
assert data["rows_received"] == 1
def test_upload_training_data_missing_price_vnd():
"""Upload endpoint rejects CSV without price_vnd column."""
bad_csv = "property_type,area_m2\napartment,80\n"
resp = client.post(
"/avm/v2/upload-training-data",
content=bad_csv,
headers={"Content-Type": "text/csv"},
)
assert resp.status_code == 400
assert "price_vnd" in resp.json()["detail"]
def test_upload_training_data_empty_body():
"""Upload endpoint rejects empty body."""
resp = client.post(
"/avm/v2/upload-training-data",
content=b"",
headers={"Content-Type": "text/csv"},
)
assert resp.status_code == 400
# ── A/B config endpoint ─────────────────────────────────────────
def test_ab_config_no_registry():
"""AB config endpoint returns 404 when no model is registered (heuristic-only run)."""
resp = client.post("/avm/v2/ab-config", json={"traffic_pct": 0.10})
# Fresh test env has no registry → 404
assert resp.status_code == 404