feat(ai-services): add Python FastAPI AI/ML services container

Create libs/ai-services/ with FastAPI app providing: - POST /avm/predict — XGBoost-backed property price prediction (heuristic fallback) - POST /avm/extract-features — Vietnamese NLP feature extraction from listing text - POST /moderation/check — content moderation with rule-based flagging - GET /health — health check endpoint Includes Dockerfile (Python 3.12), docker-compose integration, Pydantic models, and 9 passing tests covering all endpoints. Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-08 03:08:39 +07:00
parent 4ef54027d6
commit b392bc3570
20 changed files with 730 additions and 0 deletions
--- a/libs/ai-services/tests/test_moderation.py
+++ b/libs/ai-services/tests/test_moderation.py
@@ -0,0 +1,50 @@
+from fastapi.testclient import TestClient
+
+from app.main import app
+
+client = TestClient(app)
+
+
+def test_clean_text():
+    resp = client.post(
+        "/moderation/check",
+        json={"text": "Bán căn hộ đẹp tại quận 1", "context": "listing"},
+    )
+    assert resp.status_code == 200
+    data = resp.json()
+    assert data["is_flagged"] is False
+    assert data["score"] == 0.0
+
+
+def test_phone_number_flagged():
+    resp = client.post(
+        "/moderation/check",
+        json={"text": "Liên hệ 0912345678 để xem nhà", "context": "listing"},
+    )
+    assert resp.status_code == 200
+    data = resp.json()
+    assert data["is_flagged"] is True
+    assert any(f["category"] == "contact_info" for f in data["flags"])
+    assert "[REDACTED]" in data["cleaned_text"]
+
+
+def test_scam_language_flagged():
+    resp = client.post(
+        "/moderation/check",
+        json={"text": "Cảnh báo lừa đảo từ chủ nhà", "context": "comment"},
+    )
+    assert resp.status_code == 200
+    data = resp.json()
+    assert data["is_flagged"] is True
+    assert any(f["category"] == "profanity" for f in data["flags"])
+
+
+def test_prohibited_property():
+    resp = client.post(
+        "/moderation/check",
+        json={"text": "Bán lô đất rừng phòng hộ 500m2", "context": "listing"},
+    )
+    assert resp.status_code == 200
+    data = resp.json()
+    assert data["is_flagged"] is True
+    assert any(f["category"] == "prohibited_content" for f in data["flags"])