feat(ai-services): add Python FastAPI AI/ML services container
Create libs/ai-services/ with FastAPI app providing: - POST /avm/predict — XGBoost-backed property price prediction (heuristic fallback) - POST /avm/extract-features — Vietnamese NLP feature extraction from listing text - POST /moderation/check — content moderation with rule-based flagging - GET /health — health check endpoint Includes Dockerfile (Python 3.12), docker-compose integration, Pydantic models, and 9 passing tests covering all endpoints. Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
96
libs/ai-services/app/services/moderation_service.py
Normal file
96
libs/ai-services/app/services/moderation_service.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import re
|
||||
|
||||
from app.models.moderation import ModerationFlag, ModerationRequest, ModerationResponse
|
||||
|
||||
# Blocklist categories with patterns and severity
|
||||
_RULES: list[dict] = [
|
||||
{
|
||||
"category": "contact_info",
|
||||
"severity": "medium",
|
||||
"patterns": [
|
||||
re.compile(r"0\d{9,10}"), # Vietnamese phone numbers
|
||||
re.compile(r"\b[\w.+-]+@[\w-]+\.[\w.]+\b"), # Email
|
||||
re.compile(r"(?:zalo|viber|telegram|whatsapp)\s*[:\-]?\s*\d+", re.IGNORECASE),
|
||||
],
|
||||
"reason": "Contact information detected — may bypass platform messaging",
|
||||
},
|
||||
{
|
||||
"category": "spam",
|
||||
"severity": "low",
|
||||
"patterns": [
|
||||
re.compile(r"(.)\1{5,}"), # Repeated characters
|
||||
re.compile(r"(!!!|\.\.\.){3,}"), # Excessive punctuation
|
||||
re.compile(r"(?:click|nhấn|bấm)\s+(?:here|vào đây|link)", re.IGNORECASE),
|
||||
],
|
||||
"reason": "Spam-like content pattern",
|
||||
},
|
||||
{
|
||||
"category": "profanity",
|
||||
"severity": "high",
|
||||
"patterns": [
|
||||
re.compile(
|
||||
r"\b(?:lừa đảo|scam|fake|giả mạo)\b",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
],
|
||||
"reason": "Potentially harmful or fraudulent language",
|
||||
},
|
||||
{
|
||||
"category": "prohibited_content",
|
||||
"severity": "high",
|
||||
"patterns": [
|
||||
re.compile(
|
||||
r"\b(?:đất rừng phòng hộ|đất quốc phòng|đất tranh chấp)\b",
|
||||
re.IGNORECASE,
|
||||
),
|
||||
],
|
||||
"reason": "Listing references prohibited property types",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class ModerationService:
|
||||
def check(self, req: ModerationRequest) -> ModerationResponse:
|
||||
flags: list[ModerationFlag] = []
|
||||
text = req.text
|
||||
|
||||
for rule in _RULES:
|
||||
for pattern in rule["patterns"]:
|
||||
for match in pattern.finditer(text):
|
||||
flags.append(
|
||||
ModerationFlag(
|
||||
category=rule["category"],
|
||||
severity=rule["severity"],
|
||||
matched_text=match.group(),
|
||||
reason=rule["reason"],
|
||||
)
|
||||
)
|
||||
|
||||
if not flags:
|
||||
return ModerationResponse(
|
||||
is_flagged=False,
|
||||
score=0.0,
|
||||
flags=[],
|
||||
cleaned_text=text,
|
||||
)
|
||||
|
||||
# Compute aggregate score
|
||||
severity_weights = {"low": 0.2, "medium": 0.5, "high": 0.9}
|
||||
max_score = max(severity_weights.get(f.severity, 0.5) for f in flags)
|
||||
avg_score = sum(severity_weights.get(f.severity, 0.5) for f in flags) / len(flags)
|
||||
score = round(min(1.0, max_score * 0.7 + avg_score * 0.3), 3)
|
||||
|
||||
# Redact flagged content
|
||||
cleaned = text
|
||||
for flag in flags:
|
||||
cleaned = cleaned.replace(flag.matched_text, "[REDACTED]")
|
||||
|
||||
return ModerationResponse(
|
||||
is_flagged=True,
|
||||
score=score,
|
||||
flags=flags,
|
||||
cleaned_text=cleaned,
|
||||
)
|
||||
|
||||
|
||||
moderation_service = ModerationService()
|
||||
Reference in New Issue
Block a user