feat(ai-services): add Python FastAPI AI/ML services container

Create libs/ai-services/ with FastAPI app providing:
- POST /avm/predict — XGBoost-backed property price prediction (heuristic fallback)
- POST /avm/extract-features — Vietnamese NLP feature extraction from listing text
- POST /moderation/check — content moderation with rule-based flagging
- GET /health — health check endpoint

Includes Dockerfile (Python 3.12), docker-compose integration, Pydantic models,
and 9 passing tests covering all endpoints.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Ho Ngoc Hai
2026-04-08 03:08:39 +07:00
parent 4ef54027d6
commit b392bc3570
20 changed files with 730 additions and 0 deletions

View File

View File

@@ -0,0 +1,48 @@
from pydantic import BaseModel, Field
class AVMPredictRequest(BaseModel):
area: float = Field(..., gt=0, description="Property area in m²")
district: str = Field(..., min_length=1, description="District name")
city: str = Field(..., min_length=1, description="City name")
property_type: str = Field(..., description="e.g. apartment, house, land")
bedrooms: int = Field(0, ge=0)
bathrooms: int = Field(0, ge=0)
floors: int = Field(0, ge=0)
frontage: float = Field(0.0, ge=0, description="Frontage width in meters")
road_width: float = Field(0.0, ge=0, description="Adjacent road width in meters")
year_built: int | None = Field(None, description="Year the property was built")
has_legal_paper: bool = Field(True, description="Whether property has sổ đỏ/sổ hồng")
class AVMPredictResponse(BaseModel):
estimated_price_vnd: float = Field(..., description="Estimated price in VND")
confidence: float = Field(..., ge=0, le=1, description="Prediction confidence score")
price_per_m2: float = Field(..., description="Price per m² in VND")
price_range_low: float = Field(..., description="Lower bound estimate in VND")
price_range_high: float = Field(..., description="Upper bound estimate in VND")
class FeatureExtractRequest(BaseModel):
text: str = Field(..., min_length=1, description="Vietnamese property listing text")
class ExtractedFeatures(BaseModel):
area: float | None = None
district: str | None = None
city: str | None = None
property_type: str | None = None
bedrooms: int | None = None
bathrooms: int | None = None
floors: int | None = None
frontage: float | None = None
road_width: float | None = None
price_mentioned: float | None = None
has_legal_paper: bool | None = None
address_raw: str | None = None
class FeatureExtractResponse(BaseModel):
features: ExtractedFeatures
tokens: list[str] = Field(default_factory=list, description="Tokenized words")
entities: list[dict] = Field(default_factory=list, description="Named entities found")

View File

@@ -0,0 +1,20 @@
from pydantic import BaseModel, Field
class ModerationRequest(BaseModel):
text: str = Field(..., min_length=1, description="Text content to moderate")
context: str = Field("listing", description="Context: listing, comment, profile")
class ModerationFlag(BaseModel):
category: str
severity: str = Field(..., description="low, medium, high")
matched_text: str
reason: str
class ModerationResponse(BaseModel):
is_flagged: bool
score: float = Field(..., ge=0, le=1, description="Overall risk score")
flags: list[ModerationFlag] = Field(default_factory=list)
cleaned_text: str | None = Field(None, description="Text with flagged content redacted")