feat(ai-services): AVM v2 residential — expanded features, training pipeline, model versioning

Add neighborhood_score, developer_reputation, floor_level, direction premiums
to the multi-model ensemble. Implement real Optuna-based training pipeline
for XGBoost/LightGBM/CatBoost with grouped train/val/test splits. Add
file-based model registry with rollback and list-versions endpoints.
23 Python tests covering all new features.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Ho Ngoc Hai
2026-04-16 17:55:03 +07:00
parent 6cf2c23170
commit 9eaec46a37
4 changed files with 743 additions and 56 deletions

View File

@@ -29,10 +29,28 @@ class AVMv2PredictRequest(BaseModel):
0.0, ge=0, le=1, description="Flood zone risk score (0=safe, 1=high risk)"
)
# ── Neighborhood features ─────────────────────────────
neighborhood_score: float = Field(
0.5, ge=0, le=1,
description="Overall neighborhood quality score (0-1, aggregated from safety, amenities, walkability)",
)
# ── Physical features ──────────────────────────────────
property_type: str = Field(..., description="e.g. apartment, house, villa, land")
area_m2: float = Field(..., gt=0, description="Property area in m²")
rooms: int = Field(0, ge=0, description="Total rooms (bedrooms)")
floor_level: int = Field(
0, ge=0,
description="Floor level (0=ground or N/A, relevant for apartments/penthouses)",
)
total_floors: int = Field(
0, ge=0,
description="Total floors in the building (0=N/A)",
)
direction: str = Field(
"unknown",
description="Facing direction: north, south, east, west, northeast, northwest, southeast, southwest, unknown",
)
floor_ratio: float = Field(
1.0, gt=0, description="Total floor area / land area ratio"
)
@@ -41,6 +59,10 @@ class AVMv2PredictRequest(BaseModel):
has_parking: bool = Field(False, description="Property has dedicated parking")
has_pool: bool = Field(False, description="Property has swimming pool")
has_legal_paper: bool = Field(True, description="Has sổ đỏ/sổ hồng")
developer_reputation: float = Field(
0.5, ge=0, le=1,
description="Project developer reputation score (0-1, based on past projects, delivery record)",
)
# ── Market features ────────────────────────────────────
avg_price_district_3m_vnd_m2: float = Field(
@@ -185,6 +207,12 @@ class AVMv2ModelInfo(BaseModel):
ab_test_traffic_pct: float = Field(0.0, ge=0, le=1)
class AVMv2RollbackRequest(BaseModel):
"""Request to rollback to a specific model version."""
target_version: str = Field(..., min_length=1, description="Model version to roll back to")
class AVMv1Summary(BaseModel):
"""Compact summary of a v1 prediction for comparison."""
@@ -220,13 +248,18 @@ class ABComparisonRequest(BaseModel):
frontage: float = Field(0.0, ge=0)
has_legal_paper: bool = Field(True)
# v2-specific features (optional, defaults applied)
neighborhood_score: float = Field(0.5, ge=0, le=1)
distance_to_cbd_km: float = Field(0.0, ge=0)
distance_to_metro_km: float = Field(0.0, ge=0)
flood_zone_risk: float = Field(0.0, ge=0, le=1)
building_age_years: int = Field(0, ge=0)
floor_level: int = Field(0, ge=0)
total_floors: int = Field(0, ge=0)
direction: str = Field("unknown")
has_elevator: bool = Field(False)
has_parking: bool = Field(False)
has_pool: bool = Field(False)
developer_reputation: float = Field(0.5, ge=0, le=1)
renovation_score: float = Field(0.5, ge=0, le=1)
view_quality: float = Field(0.5, ge=0, le=1)
interior_quality: float = Field(0.5, ge=0, le=1)