feat(ai-services): add Python FastAPI AI/ML services container

Create libs/ai-services/ with FastAPI app providing:
- POST /avm/predict — XGBoost-backed property price prediction (heuristic fallback)
- POST /avm/extract-features — Vietnamese NLP feature extraction from listing text
- POST /moderation/check — content moderation with rule-based flagging
- GET /health — health check endpoint

Includes Dockerfile (Python 3.12), docker-compose integration, Pydantic models,
and 9 passing tests covering all endpoints.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Ho Ngoc Hai
2026-04-08 03:08:39 +07:00
parent 4ef54027d6
commit b392bc3570
20 changed files with 730 additions and 0 deletions

View File

View File

@@ -0,0 +1,59 @@
from fastapi.testclient import TestClient
from app.main import app
client = TestClient(app)
def test_predict_heuristic():
resp = client.post(
"/avm/predict",
json={
"area": 80.0,
"district": "Cầu Giấy",
"city": "Hà Nội",
"property_type": "apartment",
"bedrooms": 2,
"bathrooms": 2,
"floors": 1,
"frontage": 0,
"road_width": 0,
"has_legal_paper": True,
},
)
assert resp.status_code == 200
data = resp.json()
assert data["estimated_price_vnd"] > 0
assert 0 <= data["confidence"] <= 1
assert data["price_per_m2"] > 0
assert data["price_range_low"] < data["estimated_price_vnd"]
assert data["price_range_high"] > data["estimated_price_vnd"]
def test_predict_validation_error():
resp = client.post(
"/avm/predict",
json={"area": -10, "district": "", "city": "HN", "property_type": "house"},
)
assert resp.status_code == 422
def test_extract_features():
text = "Bán căn hộ chung cư 80m2 3 phòng ngủ 2 WC tầng 10 giá 3.5 tỷ sổ đỏ chính chủ"
resp = client.post("/avm/extract-features", json={"text": text})
assert resp.status_code == 200
data = resp.json()
features = data["features"]
assert features["area"] == 80.0
assert features["bedrooms"] == 3
assert features["bathrooms"] == 2
assert features["property_type"] == "apartment"
assert features["has_legal_paper"] is True
assert features["price_mentioned"] == 3_500_000_000
def test_extract_features_minimal():
resp = client.post("/avm/extract-features", json={"text": "Bán nhà riêng"})
assert resp.status_code == 200
data = resp.json()
assert data["features"]["property_type"] == "house"

View File

@@ -0,0 +1,12 @@
from fastapi.testclient import TestClient
from app.main import app
client = TestClient(app)
def test_health():
resp = client.get("/health")
assert resp.status_code == 200
data = resp.json()
assert data["status"] == "ok"

View File

@@ -0,0 +1,50 @@
from fastapi.testclient import TestClient
from app.main import app
client = TestClient(app)
def test_clean_text():
resp = client.post(
"/moderation/check",
json={"text": "Bán căn hộ đẹp tại quận 1", "context": "listing"},
)
assert resp.status_code == 200
data = resp.json()
assert data["is_flagged"] is False
assert data["score"] == 0.0
def test_phone_number_flagged():
resp = client.post(
"/moderation/check",
json={"text": "Liên hệ 0912345678 để xem nhà", "context": "listing"},
)
assert resp.status_code == 200
data = resp.json()
assert data["is_flagged"] is True
assert any(f["category"] == "contact_info" for f in data["flags"])
assert "[REDACTED]" in data["cleaned_text"]
def test_scam_language_flagged():
resp = client.post(
"/moderation/check",
json={"text": "Cảnh báo lừa đảo từ chủ nhà", "context": "comment"},
)
assert resp.status_code == 200
data = resp.json()
assert data["is_flagged"] is True
assert any(f["category"] == "profanity" for f in data["flags"])
def test_prohibited_property():
resp = client.post(
"/moderation/check",
json={"text": "Bán lô đất rừng phòng hộ 500m2", "context": "listing"},
)
assert resp.status_code == 200
data = resp.json()
assert data["is_flagged"] is True
assert any(f["category"] == "prohibited_content" for f in data["flags"])