From a6e53e3d068c6ef9cb7afa43e91b4556f03c1e38 Mon Sep 17 00:00:00 2001 From: Ho Ngoc Hai Date: Thu, 16 Apr 2026 17:35:30 +0700 Subject: [PATCH] feat(ai-services): add AVM v2 A/B comparison endpoint and tests Add POST /avm/v2/compare-v1 endpoint that runs both v1 (single-model) and v2 (ensemble) AVM predictions on the same property and returns a side-by-side comparison with price diff, confidence delta, and a recommendation on which model to prefer. - ABComparisonRequest/Response schemas in avm_v2 models - compare_v1() method in AVMv2EnsembleService - 4 new integration tests for the comparison endpoint - All 47 Python tests pass Co-Authored-By: Paperclip --- libs/ai-services/app/models/avm_v2.py | 61 +++++++++++++ libs/ai-services/app/routers/avm_v2.py | 12 +++ .../app/services/avm_v2_service.py | 90 +++++++++++++++++++ libs/ai-services/tests/test_avm_v2.py | 72 +++++++++++++++ 4 files changed, 235 insertions(+) diff --git a/libs/ai-services/app/models/avm_v2.py b/libs/ai-services/app/models/avm_v2.py index 3ac98bc..735c677 100644 --- a/libs/ai-services/app/models/avm_v2.py +++ b/libs/ai-services/app/models/avm_v2.py @@ -183,3 +183,64 @@ class AVMv2ModelInfo(BaseModel): metrics: dict is_active: bool = Field(True) ab_test_traffic_pct: float = Field(0.0, ge=0, le=1) + + +class AVMv1Summary(BaseModel): + """Compact summary of a v1 prediction for comparison.""" + + estimated_price_vnd: float + confidence: float + price_per_m2: float + price_range_low: float + price_range_high: float + + +class AVMv2Summary(BaseModel): + """Compact summary of a v2 prediction for comparison.""" + + estimated_price_vnd: float + confidence: float + price_per_m2_vnd: float + price_range_low_vnd: float + price_range_high_vnd: float + model_version: str + ensemble_method: str + + +class ABComparisonRequest(BaseModel): + """Request for A/B comparison between v1 and v2.""" + + district: str = Field(..., min_length=1) + city: str = Field(..., min_length=1) + property_type: str = Field(...) + area_m2: float = Field(..., gt=0) + rooms: int = Field(0, ge=0) + bedrooms: int = Field(0, ge=0, description="Alias for rooms, used by v1") + floors: int = Field(0, ge=0) + frontage: float = Field(0.0, ge=0) + has_legal_paper: bool = Field(True) + # v2-specific features (optional, defaults applied) + distance_to_cbd_km: float = Field(0.0, ge=0) + distance_to_metro_km: float = Field(0.0, ge=0) + flood_zone_risk: float = Field(0.0, ge=0, le=1) + building_age_years: int = Field(0, ge=0) + has_elevator: bool = Field(False) + has_parking: bool = Field(False) + has_pool: bool = Field(False) + renovation_score: float = Field(0.5, ge=0, le=1) + view_quality: float = Field(0.5, ge=0, le=1) + interior_quality: float = Field(0.5, ge=0, le=1) + month: int = Field(1, ge=1, le=12) + quarter: int = Field(1, ge=1, le=4) + is_year_end: bool = Field(False) + + +class ABComparisonResponse(BaseModel): + """Side-by-side A/B comparison of v1 vs v2 predictions.""" + + v1: AVMv1Summary + v2: AVMv2Summary + price_diff_vnd: float = Field(..., description="v2 - v1 price difference") + price_diff_pct: float = Field(..., description="Percentage difference ((v2-v1)/v1 * 100)") + confidence_diff: float = Field(..., description="v2 - v1 confidence difference") + recommendation: str = Field(..., description="Which model to prefer and why") diff --git a/libs/ai-services/app/routers/avm_v2.py b/libs/ai-services/app/routers/avm_v2.py index fa9b482..584714b 100644 --- a/libs/ai-services/app/routers/avm_v2.py +++ b/libs/ai-services/app/routers/avm_v2.py @@ -3,6 +3,8 @@ from fastapi import APIRouter from app.models.avm_v2 import ( + ABComparisonRequest, + ABComparisonResponse, AVMv2ModelInfo, AVMv2PredictRequest, AVMv2PredictResponse, @@ -33,6 +35,16 @@ def train_v2(req: AVMv2TrainRequest) -> AVMv2TrainResponse: return avm_v2_service.train(req) +@router.post("/compare-v1", response_model=ABComparisonResponse) +def compare_v1(req: ABComparisonRequest) -> ABComparisonResponse: + """Compare v1 (single-model) vs v2 (ensemble) predictions side by side. + + Runs both models on the same property and returns price difference, + confidence delta, and a recommendation on which to prefer. + """ + return avm_v2_service.compare_v1(req) + + @router.get("/model-info", response_model=AVMv2ModelInfo) def model_info_v2() -> AVMv2ModelInfo: """Get current active ensemble model information.""" diff --git a/libs/ai-services/app/services/avm_v2_service.py b/libs/ai-services/app/services/avm_v2_service.py index a0efecd..e1d6e87 100644 --- a/libs/ai-services/app/services/avm_v2_service.py +++ b/libs/ai-services/app/services/avm_v2_service.py @@ -12,12 +12,17 @@ from typing import Any import numpy as np +from app.models.avm import AVMPredictRequest from app.models.avm_v2 import ( + ABComparisonRequest, + ABComparisonResponse, + AVMv1Summary, AVMv2Comparable, AVMv2FeatureImportance, AVMv2ModelInfo, AVMv2PredictRequest, AVMv2PredictResponse, + AVMv2Summary, AVMv2TrainRequest, AVMv2TrainResponse, ModelPrediction, @@ -530,6 +535,91 @@ class AVMv2EnsembleService: ab_test_traffic_pct=0.0, ) + # ── A/B comparison ───────────────────────────────────────── + + def compare_v1(self, req: ABComparisonRequest) -> ABComparisonResponse: + """Compare v1 and v2 predictions on the same property.""" + from app.services.avm_service import avm_service + + # Build v1 request + v1_req = AVMPredictRequest( + area=req.area_m2, + district=req.district, + city=req.city, + property_type=req.property_type, + bedrooms=req.bedrooms or req.rooms, + floors=req.floors, + frontage=req.frontage, + has_legal_paper=req.has_legal_paper, + ) + v1_result = avm_service.predict(v1_req) + + # Build v2 request + v2_req = AVMv2PredictRequest( + district=req.district, + city=req.city, + property_type=req.property_type, + area_m2=req.area_m2, + rooms=req.rooms or req.bedrooms, + has_legal_paper=req.has_legal_paper, + distance_to_cbd_km=req.distance_to_cbd_km, + distance_to_metro_km=req.distance_to_metro_km, + flood_zone_risk=req.flood_zone_risk, + building_age_years=req.building_age_years, + has_elevator=req.has_elevator, + has_parking=req.has_parking, + has_pool=req.has_pool, + renovation_score=req.renovation_score, + view_quality=req.view_quality, + interior_quality=req.interior_quality, + month=req.month, + quarter=req.quarter, + is_year_end=req.is_year_end, + ) + v2_result = self.predict(v2_req) + + # Compute diffs + price_diff = v2_result.estimated_price_vnd - v1_result.estimated_price_vnd + price_diff_pct = ( + (price_diff / v1_result.estimated_price_vnd * 100) + if v1_result.estimated_price_vnd > 0 + else 0.0 + ) + confidence_diff = v2_result.confidence - v1_result.confidence + + # Recommendation logic + if v2_result.confidence > v1_result.confidence + 0.05: + recommendation = "v2 — higher confidence from ensemble model agreement" + elif v1_result.confidence > v2_result.confidence + 0.05: + recommendation = "v1 — higher confidence, v2 models may disagree on this property" + elif abs(price_diff_pct) < 5: + recommendation = "Both models agree (< 5% price difference)" + else: + recommendation = "v2 — richer feature set captures more market factors" + + return ABComparisonResponse( + v1=AVMv1Summary( + estimated_price_vnd=v1_result.estimated_price_vnd, + confidence=v1_result.confidence, + price_per_m2=v1_result.price_per_m2, + price_range_low=v1_result.price_range_low, + price_range_high=v1_result.price_range_high, + ), + v2=AVMv2Summary( + estimated_price_vnd=v2_result.estimated_price_vnd, + confidence=v2_result.confidence, + price_per_m2_vnd=v2_result.price_per_m2_vnd, + price_range_low_vnd=v2_result.price_range_low_vnd, + price_range_high_vnd=v2_result.price_range_high_vnd, + model_version=v2_result.model_version, + ensemble_method=v2_result.ensemble_method, + ), + price_diff_vnd=round(price_diff, -3), + price_diff_pct=round(price_diff_pct, 2), + confidence_diff=round(confidence_diff, 4), + recommendation=recommendation, + ) + # Module-level singleton avm_v2_service = AVMv2EnsembleService() diff --git a/libs/ai-services/tests/test_avm_v2.py b/libs/ai-services/tests/test_avm_v2.py index a6d1001..5bc5acf 100644 --- a/libs/ai-services/tests/test_avm_v2.py +++ b/libs/ai-services/tests/test_avm_v2.py @@ -172,3 +172,75 @@ def test_model_info_v2(): data = resp.json() assert "model_version" in data assert data["is_active"] is True + + +# ── A/B comparison tests ───────────────────────────────────── + +_COMPARE_PAYLOAD = { + "district": "Cầu Giấy", + "city": "Hà Nội", + "property_type": "apartment", + "area_m2": 80.0, + "rooms": 2, + "month": 3, + "quarter": 1, +} + + +def test_compare_v1_returns_both_models(): + """Compare endpoint returns v1 and v2 predictions.""" + resp = client.post("/avm/v2/compare-v1", json=_COMPARE_PAYLOAD) + assert resp.status_code == 200 + data = resp.json() + + assert "v1" in data + assert "v2" in data + assert data["v1"]["estimated_price_vnd"] > 0 + assert data["v2"]["estimated_price_vnd"] > 0 + assert 0 <= data["v1"]["confidence"] <= 1 + assert 0 <= data["v2"]["confidence"] <= 1 + + +def test_compare_v1_returns_diffs(): + """Compare endpoint computes price and confidence differences.""" + resp = client.post("/avm/v2/compare-v1", json=_COMPARE_PAYLOAD) + data = resp.json() + + expected_diff = data["v2"]["estimated_price_vnd"] - data["v1"]["estimated_price_vnd"] + assert abs(data["price_diff_vnd"] - expected_diff) < 10_000 # rounding tolerance + + assert "price_diff_pct" in data + assert isinstance(data["price_diff_pct"], float) + assert "confidence_diff" in data + + +def test_compare_v1_returns_recommendation(): + """Compare endpoint provides a recommendation string.""" + resp = client.post("/avm/v2/compare-v1", json=_COMPARE_PAYLOAD) + data = resp.json() + + assert "recommendation" in data + assert len(data["recommendation"]) > 0 + + +def test_compare_v1_with_v2_features(): + """Compare endpoint passes v2-specific features correctly.""" + payload = { + **_COMPARE_PAYLOAD, + "distance_to_cbd_km": 5.0, + "distance_to_metro_km": 0.8, + "flood_zone_risk": 0.1, + "building_age_years": 3, + "has_elevator": True, + "has_parking": True, + "renovation_score": 0.9, + "view_quality": 0.8, + "interior_quality": 0.85, + } + resp = client.post("/avm/v2/compare-v1", json=payload) + assert resp.status_code == 200 + data = resp.json() + + # v2 should capture these extra features + assert data["v2"]["estimated_price_vnd"] > 0 + assert data["v2"]["model_version"] is not None