feat(ai-services): add AVM v2 A/B comparison endpoint and tests
Add POST /avm/v2/compare-v1 endpoint that runs both v1 (single-model) and v2 (ensemble) AVM predictions on the same property and returns a side-by-side comparison with price diff, confidence delta, and a recommendation on which model to prefer. - ABComparisonRequest/Response schemas in avm_v2 models - compare_v1() method in AVMv2EnsembleService - 4 new integration tests for the comparison endpoint - All 47 Python tests pass Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
@@ -183,3 +183,64 @@ class AVMv2ModelInfo(BaseModel):
|
|||||||
metrics: dict
|
metrics: dict
|
||||||
is_active: bool = Field(True)
|
is_active: bool = Field(True)
|
||||||
ab_test_traffic_pct: float = Field(0.0, ge=0, le=1)
|
ab_test_traffic_pct: float = Field(0.0, ge=0, le=1)
|
||||||
|
|
||||||
|
|
||||||
|
class AVMv1Summary(BaseModel):
|
||||||
|
"""Compact summary of a v1 prediction for comparison."""
|
||||||
|
|
||||||
|
estimated_price_vnd: float
|
||||||
|
confidence: float
|
||||||
|
price_per_m2: float
|
||||||
|
price_range_low: float
|
||||||
|
price_range_high: float
|
||||||
|
|
||||||
|
|
||||||
|
class AVMv2Summary(BaseModel):
|
||||||
|
"""Compact summary of a v2 prediction for comparison."""
|
||||||
|
|
||||||
|
estimated_price_vnd: float
|
||||||
|
confidence: float
|
||||||
|
price_per_m2_vnd: float
|
||||||
|
price_range_low_vnd: float
|
||||||
|
price_range_high_vnd: float
|
||||||
|
model_version: str
|
||||||
|
ensemble_method: str
|
||||||
|
|
||||||
|
|
||||||
|
class ABComparisonRequest(BaseModel):
|
||||||
|
"""Request for A/B comparison between v1 and v2."""
|
||||||
|
|
||||||
|
district: str = Field(..., min_length=1)
|
||||||
|
city: str = Field(..., min_length=1)
|
||||||
|
property_type: str = Field(...)
|
||||||
|
area_m2: float = Field(..., gt=0)
|
||||||
|
rooms: int = Field(0, ge=0)
|
||||||
|
bedrooms: int = Field(0, ge=0, description="Alias for rooms, used by v1")
|
||||||
|
floors: int = Field(0, ge=0)
|
||||||
|
frontage: float = Field(0.0, ge=0)
|
||||||
|
has_legal_paper: bool = Field(True)
|
||||||
|
# v2-specific features (optional, defaults applied)
|
||||||
|
distance_to_cbd_km: float = Field(0.0, ge=0)
|
||||||
|
distance_to_metro_km: float = Field(0.0, ge=0)
|
||||||
|
flood_zone_risk: float = Field(0.0, ge=0, le=1)
|
||||||
|
building_age_years: int = Field(0, ge=0)
|
||||||
|
has_elevator: bool = Field(False)
|
||||||
|
has_parking: bool = Field(False)
|
||||||
|
has_pool: bool = Field(False)
|
||||||
|
renovation_score: float = Field(0.5, ge=0, le=1)
|
||||||
|
view_quality: float = Field(0.5, ge=0, le=1)
|
||||||
|
interior_quality: float = Field(0.5, ge=0, le=1)
|
||||||
|
month: int = Field(1, ge=1, le=12)
|
||||||
|
quarter: int = Field(1, ge=1, le=4)
|
||||||
|
is_year_end: bool = Field(False)
|
||||||
|
|
||||||
|
|
||||||
|
class ABComparisonResponse(BaseModel):
|
||||||
|
"""Side-by-side A/B comparison of v1 vs v2 predictions."""
|
||||||
|
|
||||||
|
v1: AVMv1Summary
|
||||||
|
v2: AVMv2Summary
|
||||||
|
price_diff_vnd: float = Field(..., description="v2 - v1 price difference")
|
||||||
|
price_diff_pct: float = Field(..., description="Percentage difference ((v2-v1)/v1 * 100)")
|
||||||
|
confidence_diff: float = Field(..., description="v2 - v1 confidence difference")
|
||||||
|
recommendation: str = Field(..., description="Which model to prefer and why")
|
||||||
|
|||||||
@@ -3,6 +3,8 @@
|
|||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
|
|
||||||
from app.models.avm_v2 import (
|
from app.models.avm_v2 import (
|
||||||
|
ABComparisonRequest,
|
||||||
|
ABComparisonResponse,
|
||||||
AVMv2ModelInfo,
|
AVMv2ModelInfo,
|
||||||
AVMv2PredictRequest,
|
AVMv2PredictRequest,
|
||||||
AVMv2PredictResponse,
|
AVMv2PredictResponse,
|
||||||
@@ -33,6 +35,16 @@ def train_v2(req: AVMv2TrainRequest) -> AVMv2TrainResponse:
|
|||||||
return avm_v2_service.train(req)
|
return avm_v2_service.train(req)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/compare-v1", response_model=ABComparisonResponse)
|
||||||
|
def compare_v1(req: ABComparisonRequest) -> ABComparisonResponse:
|
||||||
|
"""Compare v1 (single-model) vs v2 (ensemble) predictions side by side.
|
||||||
|
|
||||||
|
Runs both models on the same property and returns price difference,
|
||||||
|
confidence delta, and a recommendation on which to prefer.
|
||||||
|
"""
|
||||||
|
return avm_v2_service.compare_v1(req)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/model-info", response_model=AVMv2ModelInfo)
|
@router.get("/model-info", response_model=AVMv2ModelInfo)
|
||||||
def model_info_v2() -> AVMv2ModelInfo:
|
def model_info_v2() -> AVMv2ModelInfo:
|
||||||
"""Get current active ensemble model information."""
|
"""Get current active ensemble model information."""
|
||||||
|
|||||||
@@ -12,12 +12,17 @@ from typing import Any
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from app.models.avm import AVMPredictRequest
|
||||||
from app.models.avm_v2 import (
|
from app.models.avm_v2 import (
|
||||||
|
ABComparisonRequest,
|
||||||
|
ABComparisonResponse,
|
||||||
|
AVMv1Summary,
|
||||||
AVMv2Comparable,
|
AVMv2Comparable,
|
||||||
AVMv2FeatureImportance,
|
AVMv2FeatureImportance,
|
||||||
AVMv2ModelInfo,
|
AVMv2ModelInfo,
|
||||||
AVMv2PredictRequest,
|
AVMv2PredictRequest,
|
||||||
AVMv2PredictResponse,
|
AVMv2PredictResponse,
|
||||||
|
AVMv2Summary,
|
||||||
AVMv2TrainRequest,
|
AVMv2TrainRequest,
|
||||||
AVMv2TrainResponse,
|
AVMv2TrainResponse,
|
||||||
ModelPrediction,
|
ModelPrediction,
|
||||||
@@ -530,6 +535,91 @@ class AVMv2EnsembleService:
|
|||||||
ab_test_traffic_pct=0.0,
|
ab_test_traffic_pct=0.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ── A/B comparison ─────────────────────────────────────────
|
||||||
|
|
||||||
|
def compare_v1(self, req: ABComparisonRequest) -> ABComparisonResponse:
|
||||||
|
"""Compare v1 and v2 predictions on the same property."""
|
||||||
|
from app.services.avm_service import avm_service
|
||||||
|
|
||||||
|
# Build v1 request
|
||||||
|
v1_req = AVMPredictRequest(
|
||||||
|
area=req.area_m2,
|
||||||
|
district=req.district,
|
||||||
|
city=req.city,
|
||||||
|
property_type=req.property_type,
|
||||||
|
bedrooms=req.bedrooms or req.rooms,
|
||||||
|
floors=req.floors,
|
||||||
|
frontage=req.frontage,
|
||||||
|
has_legal_paper=req.has_legal_paper,
|
||||||
|
)
|
||||||
|
v1_result = avm_service.predict(v1_req)
|
||||||
|
|
||||||
|
# Build v2 request
|
||||||
|
v2_req = AVMv2PredictRequest(
|
||||||
|
district=req.district,
|
||||||
|
city=req.city,
|
||||||
|
property_type=req.property_type,
|
||||||
|
area_m2=req.area_m2,
|
||||||
|
rooms=req.rooms or req.bedrooms,
|
||||||
|
has_legal_paper=req.has_legal_paper,
|
||||||
|
distance_to_cbd_km=req.distance_to_cbd_km,
|
||||||
|
distance_to_metro_km=req.distance_to_metro_km,
|
||||||
|
flood_zone_risk=req.flood_zone_risk,
|
||||||
|
building_age_years=req.building_age_years,
|
||||||
|
has_elevator=req.has_elevator,
|
||||||
|
has_parking=req.has_parking,
|
||||||
|
has_pool=req.has_pool,
|
||||||
|
renovation_score=req.renovation_score,
|
||||||
|
view_quality=req.view_quality,
|
||||||
|
interior_quality=req.interior_quality,
|
||||||
|
month=req.month,
|
||||||
|
quarter=req.quarter,
|
||||||
|
is_year_end=req.is_year_end,
|
||||||
|
)
|
||||||
|
v2_result = self.predict(v2_req)
|
||||||
|
|
||||||
|
# Compute diffs
|
||||||
|
price_diff = v2_result.estimated_price_vnd - v1_result.estimated_price_vnd
|
||||||
|
price_diff_pct = (
|
||||||
|
(price_diff / v1_result.estimated_price_vnd * 100)
|
||||||
|
if v1_result.estimated_price_vnd > 0
|
||||||
|
else 0.0
|
||||||
|
)
|
||||||
|
confidence_diff = v2_result.confidence - v1_result.confidence
|
||||||
|
|
||||||
|
# Recommendation logic
|
||||||
|
if v2_result.confidence > v1_result.confidence + 0.05:
|
||||||
|
recommendation = "v2 — higher confidence from ensemble model agreement"
|
||||||
|
elif v1_result.confidence > v2_result.confidence + 0.05:
|
||||||
|
recommendation = "v1 — higher confidence, v2 models may disagree on this property"
|
||||||
|
elif abs(price_diff_pct) < 5:
|
||||||
|
recommendation = "Both models agree (< 5% price difference)"
|
||||||
|
else:
|
||||||
|
recommendation = "v2 — richer feature set captures more market factors"
|
||||||
|
|
||||||
|
return ABComparisonResponse(
|
||||||
|
v1=AVMv1Summary(
|
||||||
|
estimated_price_vnd=v1_result.estimated_price_vnd,
|
||||||
|
confidence=v1_result.confidence,
|
||||||
|
price_per_m2=v1_result.price_per_m2,
|
||||||
|
price_range_low=v1_result.price_range_low,
|
||||||
|
price_range_high=v1_result.price_range_high,
|
||||||
|
),
|
||||||
|
v2=AVMv2Summary(
|
||||||
|
estimated_price_vnd=v2_result.estimated_price_vnd,
|
||||||
|
confidence=v2_result.confidence,
|
||||||
|
price_per_m2_vnd=v2_result.price_per_m2_vnd,
|
||||||
|
price_range_low_vnd=v2_result.price_range_low_vnd,
|
||||||
|
price_range_high_vnd=v2_result.price_range_high_vnd,
|
||||||
|
model_version=v2_result.model_version,
|
||||||
|
ensemble_method=v2_result.ensemble_method,
|
||||||
|
),
|
||||||
|
price_diff_vnd=round(price_diff, -3),
|
||||||
|
price_diff_pct=round(price_diff_pct, 2),
|
||||||
|
confidence_diff=round(confidence_diff, 4),
|
||||||
|
recommendation=recommendation,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Module-level singleton
|
# Module-level singleton
|
||||||
avm_v2_service = AVMv2EnsembleService()
|
avm_v2_service = AVMv2EnsembleService()
|
||||||
|
|||||||
@@ -172,3 +172,75 @@ def test_model_info_v2():
|
|||||||
data = resp.json()
|
data = resp.json()
|
||||||
assert "model_version" in data
|
assert "model_version" in data
|
||||||
assert data["is_active"] is True
|
assert data["is_active"] is True
|
||||||
|
|
||||||
|
|
||||||
|
# ── A/B comparison tests ─────────────────────────────────────
|
||||||
|
|
||||||
|
_COMPARE_PAYLOAD = {
|
||||||
|
"district": "Cầu Giấy",
|
||||||
|
"city": "Hà Nội",
|
||||||
|
"property_type": "apartment",
|
||||||
|
"area_m2": 80.0,
|
||||||
|
"rooms": 2,
|
||||||
|
"month": 3,
|
||||||
|
"quarter": 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_compare_v1_returns_both_models():
|
||||||
|
"""Compare endpoint returns v1 and v2 predictions."""
|
||||||
|
resp = client.post("/avm/v2/compare-v1", json=_COMPARE_PAYLOAD)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
|
assert "v1" in data
|
||||||
|
assert "v2" in data
|
||||||
|
assert data["v1"]["estimated_price_vnd"] > 0
|
||||||
|
assert data["v2"]["estimated_price_vnd"] > 0
|
||||||
|
assert 0 <= data["v1"]["confidence"] <= 1
|
||||||
|
assert 0 <= data["v2"]["confidence"] <= 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_compare_v1_returns_diffs():
|
||||||
|
"""Compare endpoint computes price and confidence differences."""
|
||||||
|
resp = client.post("/avm/v2/compare-v1", json=_COMPARE_PAYLOAD)
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
|
expected_diff = data["v2"]["estimated_price_vnd"] - data["v1"]["estimated_price_vnd"]
|
||||||
|
assert abs(data["price_diff_vnd"] - expected_diff) < 10_000 # rounding tolerance
|
||||||
|
|
||||||
|
assert "price_diff_pct" in data
|
||||||
|
assert isinstance(data["price_diff_pct"], float)
|
||||||
|
assert "confidence_diff" in data
|
||||||
|
|
||||||
|
|
||||||
|
def test_compare_v1_returns_recommendation():
|
||||||
|
"""Compare endpoint provides a recommendation string."""
|
||||||
|
resp = client.post("/avm/v2/compare-v1", json=_COMPARE_PAYLOAD)
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
|
assert "recommendation" in data
|
||||||
|
assert len(data["recommendation"]) > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_compare_v1_with_v2_features():
|
||||||
|
"""Compare endpoint passes v2-specific features correctly."""
|
||||||
|
payload = {
|
||||||
|
**_COMPARE_PAYLOAD,
|
||||||
|
"distance_to_cbd_km": 5.0,
|
||||||
|
"distance_to_metro_km": 0.8,
|
||||||
|
"flood_zone_risk": 0.1,
|
||||||
|
"building_age_years": 3,
|
||||||
|
"has_elevator": True,
|
||||||
|
"has_parking": True,
|
||||||
|
"renovation_score": 0.9,
|
||||||
|
"view_quality": 0.8,
|
||||||
|
"interior_quality": 0.85,
|
||||||
|
}
|
||||||
|
resp = client.post("/avm/v2/compare-v1", json=payload)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
|
# v2 should capture these extra features
|
||||||
|
assert data["v2"]["estimated_price_vnd"] > 0
|
||||||
|
assert data["v2"]["model_version"] is not None
|
||||||
|
|||||||
Reference in New Issue
Block a user