feat(ai-services): add Python FastAPI AI/ML services container

Create libs/ai-services/ with FastAPI app providing: - POST /avm/predict — XGBoost-backed property price prediction (heuristic fallback) - POST /avm/extract-features — Vietnamese NLP feature extraction from listing text - POST /moderation/check — content moderation with rule-based flagging - GET /health — health check endpoint Includes Dockerfile (Python 3.12), docker-compose integration, Pydantic models, and 9 passing tests covering all endpoints. Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-08 03:08:39 +07:00
parent 4ef54027d6
commit b392bc3570
20 changed files with 730 additions and 0 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -81,6 +81,76 @@ services:
    networks:
      - goodgo-net
  ai-services:
    build:
      context: ./libs/ai-services
      dockerfile: Dockerfile
    container_name: goodgo-ai-services
    restart: unless-stopped
    ports:
      - '${AI_SERVICES_PORT:-8000}:8000'
    environment:
      AI_DEBUG: ${AI_DEBUG:-false}
      AI_LOG_LEVEL: ${AI_LOG_LEVEL:-info}
    healthcheck:
      test: ['CMD', 'python', '-c', 'import httpx; httpx.get("http://localhost:8000/health").raise_for_status()']
      interval: 30s
      timeout: 5s
      retries: 5
      start_period: 30s
    networks:
      - goodgo-net
  prometheus:
    image: prom/prometheus:v2.51.0
    container_name: goodgo-prometheus
    restart: unless-stopped
    ports:
      - '${PROMETHEUS_PORT:-9090}:9090'
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.retention.time=15d'
      - '--web.enable-lifecycle'
    volumes:
      - ./monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
      - prometheus_data:/prometheus
    extra_hosts:
      - 'host.docker.internal:host-gateway'
    healthcheck:
      test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:9090/-/healthy']
      interval: 15s
      timeout: 5s
      retries: 3
      start_period: 10s
    networks:
      - goodgo-net
  grafana:
    image: grafana/grafana:10.4.1
    container_name: goodgo-grafana
    restart: unless-stopped
    ports:
      - '${GRAFANA_PORT:-3002}:3000'
    environment:
      GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin}
      GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin}
      GF_USERS_ALLOW_SIGN_UP: 'false'
    volumes:
      - ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro
      - ./monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro
      - grafana_data:/var/lib/grafana
    depends_on:
      prometheus:
        condition: service_healthy
    healthcheck:
      test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3000/api/health']
      interval: 15s
      timeout: 5s
      retries: 3
      start_period: 15s
    networks:
      - goodgo-net
 volumes:
  pgdata:
    driver: local
@@ -90,6 +160,10 @@ volumes:
    driver: local
  minio_data:
    driver: local
  prometheus_data:
    driver: local
  grafana_data:
    driver: local
 networks:
  goodgo-net:
--- a/libs/ai-services/.gitignore
+++ b/libs/ai-services/.gitignore
@@ -0,0 +1,5 @@
 __pycache__/
 *.pyc
 *.egg-info/
 .pytest_cache/
 dist/
--- a/libs/ai-services/Dockerfile
+++ b/libs/ai-services/Dockerfile
@@ -0,0 +1,31 @@
 FROM python:3.12-slim
 WORKDIR /app
 # Install system deps for underthesea / numpy
 RUN apt-get update && \
    apt-get install -y --no-install-recommends gcc g++ && \
    rm -rf /var/lib/apt/lists/*
 COPY pyproject.toml .
 RUN pip install --no-cache-dir . 2>/dev/null || pip install --no-cache-dir \
    "fastapi>=0.115.0" \
    "uvicorn[standard]>=0.32.0" \
    "xgboost>=2.1.0" \
    "numpy>=1.26.0" \
    "underthesea>=6.8.0" \
    "pydantic>=2.9.0" \
    "pydantic-settings>=2.5.0" \
    "httpx>=0.27.0"
 COPY app/ ./app/
 # Pre-download underthesea models at build time
 RUN python -c "from underthesea import word_tokenize; word_tokenize('test')" 2>/dev/null || true
 EXPOSE 8000
 HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
    CMD python -c "import httpx; httpx.get('http://localhost:8000/health').raise_for_status()"
 CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/libs/ai-services/app/init.py
+++ b/libs/ai-services/app/init.py
--- a/libs/ai-services/app/config.py
+++ b/libs/ai-services/app/config.py
@@ -0,0 +1,13 @@
 from pydantic_settings import BaseSettings
 class Settings(BaseSettings):
    app_name: str = "Goodgo AI Services"
    debug: bool = False
    model_path: str = "/app/models"
    log_level: str = "info"
    model_config = {"env_prefix": "AI_"}
 settings = Settings()
--- a/libs/ai-services/app/main.py
+++ b/libs/ai-services/app/main.py
@@ -0,0 +1,28 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from app.config import settings
 from app.routers import avm, moderation
 app = FastAPI(
    title=settings.app_name,
    version="0.1.0",
    docs_url="/docs",
    redoc_url="/redoc",
 )
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 app.include_router(avm.router)
 app.include_router(moderation.router)
@app.get("/health")
 def health() -> dict:
    return {"status": "ok", "service": settings.app_name}
--- a/libs/ai-services/app/models/init.py
+++ b/libs/ai-services/app/models/init.py
--- a/libs/ai-services/app/models/avm.py
+++ b/libs/ai-services/app/models/avm.py
@@ -0,0 +1,48 @@
 from pydantic import BaseModel, Field
 class AVMPredictRequest(BaseModel):
    area: float = Field(..., gt=0, description="Property area in m²")
    district: str = Field(..., min_length=1, description="District name")
    city: str = Field(..., min_length=1, description="City name")
    property_type: str = Field(..., description="e.g. apartment, house, land")
    bedrooms: int = Field(0, ge=0)
    bathrooms: int = Field(0, ge=0)
    floors: int = Field(0, ge=0)
    frontage: float = Field(0.0, ge=0, description="Frontage width in meters")
    road_width: float = Field(0.0, ge=0, description="Adjacent road width in meters")
    year_built: int | None = Field(None, description="Year the property was built")
    has_legal_paper: bool = Field(True, description="Whether property has sổ đỏ/sổ hồng")
 class AVMPredictResponse(BaseModel):
    estimated_price_vnd: float = Field(..., description="Estimated price in VND")
    confidence: float = Field(..., ge=0, le=1, description="Prediction confidence score")
    price_per_m2: float = Field(..., description="Price per m² in VND")
    price_range_low: float = Field(..., description="Lower bound estimate in VND")
    price_range_high: float = Field(..., description="Upper bound estimate in VND")
 class FeatureExtractRequest(BaseModel):
    text: str = Field(..., min_length=1, description="Vietnamese property listing text")
 class ExtractedFeatures(BaseModel):
    area: float | None = None
    district: str | None = None
    city: str | None = None
    property_type: str | None = None
    bedrooms: int | None = None
    bathrooms: int | None = None
    floors: int | None = None
    frontage: float | None = None
    road_width: float | None = None
    price_mentioned: float | None = None
    has_legal_paper: bool | None = None
    address_raw: str | None = None
 class FeatureExtractResponse(BaseModel):
    features: ExtractedFeatures
    tokens: list[str] = Field(default_factory=list, description="Tokenized words")
    entities: list[dict] = Field(default_factory=list, description="Named entities found")
--- a/libs/ai-services/app/models/moderation.py
+++ b/libs/ai-services/app/models/moderation.py
@@ -0,0 +1,20 @@
 from pydantic import BaseModel, Field
 class ModerationRequest(BaseModel):
    text: str = Field(..., min_length=1, description="Text content to moderate")
    context: str = Field("listing", description="Context: listing, comment, profile")
 class ModerationFlag(BaseModel):
    category: str
    severity: str = Field(..., description="low, medium, high")
    matched_text: str
    reason: str
 class ModerationResponse(BaseModel):
    is_flagged: bool
    score: float = Field(..., ge=0, le=1, description="Overall risk score")
    flags: list[ModerationFlag] = Field(default_factory=list)
    cleaned_text: str | None = Field(None, description="Text with flagged content redacted")
--- a/libs/ai-services/app/routers/init.py
+++ b/libs/ai-services/app/routers/init.py
--- a/libs/ai-services/app/routers/avm.py
+++ b/libs/ai-services/app/routers/avm.py
@@ -0,0 +1,23 @@
 from fastapi import APIRouter
 from app.models.avm import (
    AVMPredictRequest,
    AVMPredictResponse,
    FeatureExtractRequest,
    FeatureExtractResponse,
 )
 from app.services.avm_service import avm_service, feature_extract_service
 router = APIRouter(prefix="/avm", tags=["AVM"])
@router.post("/predict", response_model=AVMPredictResponse)
 def predict(req: AVMPredictRequest) -> AVMPredictResponse:
    """Predict property price using the Automated Valuation Model."""
    return avm_service.predict(req)
@router.post("/extract-features", response_model=FeatureExtractResponse)
 def extract_features(req: FeatureExtractRequest) -> FeatureExtractResponse:
    """Extract real-estate features from Vietnamese listing text."""
    return feature_extract_service.extract(req)
--- a/libs/ai-services/app/routers/moderation.py
+++ b/libs/ai-services/app/routers/moderation.py
@@ -0,0 +1,12 @@
 from fastapi import APIRouter
 from app.models.moderation import ModerationRequest, ModerationResponse
 from app.services.moderation_service import moderation_service
 router = APIRouter(prefix="/moderation", tags=["Moderation"])
@router.post("/check", response_model=ModerationResponse)
 def check(req: ModerationRequest) -> ModerationResponse:
    """Check text content for policy violations."""
    return moderation_service.check(req)
--- a/libs/ai-services/app/services/init.py
+++ b/libs/ai-services/app/services/init.py
--- a/libs/ai-services/app/services/avm_service.py
+++ b/libs/ai-services/app/services/avm_service.py
@@ -0,0 +1,229 @@
 import logging
 import re
 import numpy as np
 from app.models.avm import (
    AVMPredictRequest,
    AVMPredictResponse,
    ExtractedFeatures,
    FeatureExtractRequest,
    FeatureExtractResponse,
 )
 logger = logging.getLogger(__name__)
 # Property type encoding for the model
 PROPERTY_TYPE_MAP = {
    "apartment": 0,
    "house": 1,
    "townhouse": 2,
    "villa": 3,
    "land": 4,
    "shophouse": 5,
 }
 # City-level price multiplier (baseline: millions VND/m²)
 CITY_BASELINE = {
    "hà nội": 85.0,
    "hồ chí minh": 90.0,
    "đà nẵng": 45.0,
    "hải phòng": 35.0,
    "cần thơ": 25.0,
 }
 DEFAULT_BASELINE = 30.0
 class AVMService:
    """Automated Valuation Model service.
    Uses XGBoost when a trained model is available,
    falls back to heuristic pricing for development/demo.
    """
    def __init__(self) -> None:
        self._model = None
        self._load_model()
    def _load_model(self) -> None:
        try:
            import xgboost as xgb
            from app.config import settings
            model_file = f"{settings.model_path}/avm_model.json"
            self._model = xgb.Booster()
            self._model.load_model(model_file)
            logger.info("Loaded XGBoost AVM model from %s", model_file)
        except Exception:
            logger.info("No trained AVM model found — using heuristic fallback")
            self._model = None
    def predict(self, req: AVMPredictRequest) -> AVMPredictResponse:
        if self._model is not None:
            return self._predict_xgboost(req)
        return self._predict_heuristic(req)
    def _predict_xgboost(self, req: AVMPredictRequest) -> AVMPredictResponse:
        import xgboost as xgb
        features = np.array(
            [[
                req.area,
                PROPERTY_TYPE_MAP.get(req.property_type.lower(), 1),
                req.bedrooms,
                req.bathrooms,
                req.floors,
                req.frontage,
                req.road_width,
                req.year_built or 2020,
                1.0 if req.has_legal_paper else 0.0,
            ]]
        )
        dmatrix = xgb.DMatrix(features)
        pred_log = self._model.predict(dmatrix)[0]
        estimated = float(np.exp(pred_log))
        price_per_m2 = estimated / req.area
        return AVMPredictResponse(
            estimated_price_vnd=estimated,
            confidence=0.82,
            price_per_m2=price_per_m2,
            price_range_low=estimated * 0.85,
            price_range_high=estimated * 1.15,
        )
    def _predict_heuristic(self, req: AVMPredictRequest) -> AVMPredictResponse:
        city_key = req.city.lower().strip()
        base = CITY_BASELINE.get(city_key, DEFAULT_BASELINE)
        # Property type multiplier
        type_mult = {
            "apartment": 0.9,
            "house": 1.0,
            "townhouse": 1.1,
            "villa": 1.4,
            "land": 0.7,
            "shophouse": 1.3,
        }.get(req.property_type.lower(), 1.0)
        # Adjustments
        bedroom_adj = 1.0 + req.bedrooms * 0.02
        frontage_adj = 1.0 + (req.frontage / 10.0) * 0.15 if req.frontage > 0 else 1.0
        legal_adj = 1.0 if req.has_legal_paper else 0.7
        price_per_m2 = base * type_mult * bedroom_adj * frontage_adj * legal_adj * 1_000_000
        estimated = price_per_m2 * req.area
        return AVMPredictResponse(
            estimated_price_vnd=round(estimated, -3),
            confidence=0.65,
            price_per_m2=round(price_per_m2, -3),
            price_range_low=round(estimated * 0.75, -3),
            price_range_high=round(estimated * 1.25, -3),
        )
 class FeatureExtractService:
    """Extract real-estate features from Vietnamese listing text."""
    _AREA_PATTERN = re.compile(r"(\d+(?:[.,]\d+)?)\s*(?:m2|m²|mét vuông)", re.IGNORECASE)
    _BEDROOM_PATTERN = re.compile(r"(\d+)\s*(?:phòng ngủ|pn|PN)", re.IGNORECASE)
    _BATHROOM_PATTERN = re.compile(r"(\d+)\s*(?:phòng tắm|wc|WC|toilet)", re.IGNORECASE)
    _FLOOR_PATTERN = re.compile(r"(\d+)\s*(?:tầng|lầu)", re.IGNORECASE)
    _FRONTAGE_PATTERN = re.compile(r"(?:mặt tiền|ngang)\s*(\d+(?:[.,]\d+)?)\s*m", re.IGNORECASE)
    _ROAD_WIDTH_PATTERN = re.compile(r"(?:đường|hẻm)\s*(\d+(?:[.,]\d+)?)\s*m", re.IGNORECASE)
    _PRICE_PATTERN = re.compile(
        r"(\d+(?:[.,]\d+)?)\s*(?:tỷ|tỉ|triệu)", re.IGNORECASE
    )
    _LEGAL_KEYWORDS = ["sổ đỏ", "sổ hồng", "chính chủ", "pháp lý rõ ràng"]
    _PROPERTY_TYPES = {
        "căn hộ": "apartment",
        "chung cư": "apartment",
        "nhà phố": "townhouse",
        "nhà riêng": "house",
        "biệt thự": "villa",
        "đất": "land",
        "đất nền": "land",
        "shophouse": "shophouse",
    }
    def extract(self, req: FeatureExtractRequest) -> FeatureExtractResponse:
        text = req.text
        features = ExtractedFeatures()
        # Area
        m = self._AREA_PATTERN.search(text)
        if m:
            features.area = float(m.group(1).replace(",", "."))
        # Bedrooms
        m = self._BEDROOM_PATTERN.search(text)
        if m:
            features.bedrooms = int(m.group(1))
        # Bathrooms
        m = self._BATHROOM_PATTERN.search(text)
        if m:
            features.bathrooms = int(m.group(1))
        # Floors
        m = self._FLOOR_PATTERN.search(text)
        if m:
            features.floors = int(m.group(1))
        # Frontage
        m = self._FRONTAGE_PATTERN.search(text)
        if m:
            features.frontage = float(m.group(1).replace(",", "."))
        # Road width
        m = self._ROAD_WIDTH_PATTERN.search(text)
        if m:
            features.road_width = float(m.group(1).replace(",", "."))
        # Price
        m = self._PRICE_PATTERN.search(text)
        if m:
            val = float(m.group(1).replace(",", "."))
            unit = text[m.end() - 3 : m.end()].lower()
            if "tỷ" in unit or "tỉ" in unit:
                features.price_mentioned = val * 1_000_000_000
            else:
                features.price_mentioned = val * 1_000_000
        # Legal
        text_lower = text.lower()
        features.has_legal_paper = any(kw in text_lower for kw in self._LEGAL_KEYWORDS)
        # Property type
        for vn_type, en_type in self._PROPERTY_TYPES.items():
            if vn_type in text_lower:
                features.property_type = en_type
                break
        # Tokenization and NER via underthesea
        tokens: list[str] = []
        entities: list[dict] = []
        try:
            from underthesea import ner, word_tokenize
            tokens = word_tokenize(text)
            ner_results = ner(text)
            for chunk in ner_results:
                if len(chunk) >= 4 and chunk[3] != "O":
                    entities.append({"text": chunk[0], "label": chunk[3]})
        except ImportError:
            logger.warning("underthesea not available — skipping NLP tokenization")
            tokens = text.split()
        return FeatureExtractResponse(
            features=features,
            tokens=tokens,
            entities=entities,
        )
 avm_service = AVMService()
 feature_extract_service = FeatureExtractService()
--- a/libs/ai-services/app/services/moderation_service.py
+++ b/libs/ai-services/app/services/moderation_service.py
@@ -0,0 +1,96 @@
 import re
 from app.models.moderation import ModerationFlag, ModerationRequest, ModerationResponse
 # Blocklist categories with patterns and severity
 _RULES: list[dict] = [
    {
        "category": "contact_info",
        "severity": "medium",
        "patterns": [
            re.compile(r"0\d{9,10}"),  # Vietnamese phone numbers
            re.compile(r"\b[\w.+-]+@[\w-]+\.[\w.]+\b"),  # Email
            re.compile(r"(?:zalo|viber|telegram|whatsapp)\s*[:\-]?\s*\d+", re.IGNORECASE),
        ],
        "reason": "Contact information detected — may bypass platform messaging",
    },
    {
        "category": "spam",
        "severity": "low",
        "patterns": [
            re.compile(r"(.)\1{5,}"),  # Repeated characters
            re.compile(r"(!!!|\.\.\.){3,}"),  # Excessive punctuation
            re.compile(r"(?:click|nhấn|bấm)\s+(?:here|vào đây|link)", re.IGNORECASE),
        ],
        "reason": "Spam-like content pattern",
    },
    {
        "category": "profanity",
        "severity": "high",
        "patterns": [
            re.compile(
                r"\b(?:lừa đảo|scam|fake|giả mạo)\b",
                re.IGNORECASE,
            ),
        ],
        "reason": "Potentially harmful or fraudulent language",
    },
    {
        "category": "prohibited_content",
        "severity": "high",
        "patterns": [
            re.compile(
                r"\b(?:đất rừng phòng hộ|đất quốc phòng|đất tranh chấp)\b",
                re.IGNORECASE,
            ),
        ],
        "reason": "Listing references prohibited property types",
    },
 ]
 class ModerationService:
    def check(self, req: ModerationRequest) -> ModerationResponse:
        flags: list[ModerationFlag] = []
        text = req.text
        for rule in _RULES:
            for pattern in rule["patterns"]:
                for match in pattern.finditer(text):
                    flags.append(
                        ModerationFlag(
                            category=rule["category"],
                            severity=rule["severity"],
                            matched_text=match.group(),
                            reason=rule["reason"],
                        )
                    )
        if not flags:
            return ModerationResponse(
                is_flagged=False,
                score=0.0,
                flags=[],
                cleaned_text=text,
            )
        # Compute aggregate score
        severity_weights = {"low": 0.2, "medium": 0.5, "high": 0.9}
        max_score = max(severity_weights.get(f.severity, 0.5) for f in flags)
        avg_score = sum(severity_weights.get(f.severity, 0.5) for f in flags) / len(flags)
        score = round(min(1.0, max_score * 0.7 + avg_score * 0.3), 3)
        # Redact flagged content
        cleaned = text
        for flag in flags:
            cleaned = cleaned.replace(flag.matched_text, "[REDACTED]")
        return ModerationResponse(
            is_flagged=True,
            score=score,
            flags=flags,
            cleaned_text=cleaned,
        )
 moderation_service = ModerationService()
--- a/libs/ai-services/pyproject.toml
+++ b/libs/ai-services/pyproject.toml
@@ -0,0 +1,30 @@
 [project]
 name = "goodgo-ai-services"
 version = "0.1.0"
 description = "AI/ML services for Goodgo Platform — AVM, feature extraction, moderation"
 requires-python = ">=3.12"
 dependencies = [
    "fastapi>=0.115.0",
    "uvicorn[standard]>=0.32.0",
    "xgboost>=2.1.0",
    "numpy>=1.26.0",
    "underthesea>=6.8.0",
    "pydantic>=2.9.0",
    "pydantic-settings>=2.5.0",
    "httpx>=0.27.0",
 ]
 [project.optional-dependencies]
 dev = [
    "pytest>=8.3.0",
    "pytest-asyncio>=0.24.0",
    "httpx>=0.27.0",
 ]
 [build-system]
 requires = ["setuptools>=75.0"]
 build-backend = "setuptools.backends._legacy:_Backend"
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 asyncio_mode = "auto"
--- a/libs/ai-services/tests/init.py
+++ b/libs/ai-services/tests/init.py
--- a/libs/ai-services/tests/test_avm.py
+++ b/libs/ai-services/tests/test_avm.py
@@ -0,0 +1,59 @@
 from fastapi.testclient import TestClient
 from app.main import app
 client = TestClient(app)
 def test_predict_heuristic():
    resp = client.post(
        "/avm/predict",
        json={
            "area": 80.0,
            "district": "Cầu Giấy",
            "city": "Hà Nội",
            "property_type": "apartment",
            "bedrooms": 2,
            "bathrooms": 2,
            "floors": 1,
            "frontage": 0,
            "road_width": 0,
            "has_legal_paper": True,
        },
    )
    assert resp.status_code == 200
    data = resp.json()
    assert data["estimated_price_vnd"] > 0
    assert 0 <= data["confidence"] <= 1
    assert data["price_per_m2"] > 0
    assert data["price_range_low"] < data["estimated_price_vnd"]
    assert data["price_range_high"] > data["estimated_price_vnd"]
 def test_predict_validation_error():
    resp = client.post(
        "/avm/predict",
        json={"area": -10, "district": "", "city": "HN", "property_type": "house"},
    )
    assert resp.status_code == 422
 def test_extract_features():
    text = "Bán căn hộ chung cư 80m2 3 phòng ngủ 2 WC tầng 10 giá 3.5 tỷ sổ đỏ chính chủ"
    resp = client.post("/avm/extract-features", json={"text": text})
    assert resp.status_code == 200
    data = resp.json()
    features = data["features"]
    assert features["area"] == 80.0
    assert features["bedrooms"] == 3
    assert features["bathrooms"] == 2
    assert features["property_type"] == "apartment"
    assert features["has_legal_paper"] is True
    assert features["price_mentioned"] == 3_500_000_000
 def test_extract_features_minimal():
    resp = client.post("/avm/extract-features", json={"text": "Bán nhà riêng"})
    assert resp.status_code == 200
    data = resp.json()
    assert data["features"]["property_type"] == "house"
--- a/libs/ai-services/tests/test_health.py
+++ b/libs/ai-services/tests/test_health.py
@@ -0,0 +1,12 @@
 from fastapi.testclient import TestClient
 from app.main import app
 client = TestClient(app)
 def test_health():
    resp = client.get("/health")
    assert resp.status_code == 200
    data = resp.json()
    assert data["status"] == "ok"
--- a/libs/ai-services/tests/test_moderation.py
+++ b/libs/ai-services/tests/test_moderation.py
@@ -0,0 +1,50 @@
 from fastapi.testclient import TestClient
 from app.main import app
 client = TestClient(app)
 def test_clean_text():
    resp = client.post(
        "/moderation/check",
        json={"text": "Bán căn hộ đẹp tại quận 1", "context": "listing"},
    )
    assert resp.status_code == 200
    data = resp.json()
    assert data["is_flagged"] is False
    assert data["score"] == 0.0
 def test_phone_number_flagged():
    resp = client.post(
        "/moderation/check",
        json={"text": "Liên hệ 0912345678 để xem nhà", "context": "listing"},
    )
    assert resp.status_code == 200
    data = resp.json()
    assert data["is_flagged"] is True
    assert any(f["category"] == "contact_info" for f in data["flags"])
    assert "[REDACTED]" in data["cleaned_text"]
 def test_scam_language_flagged():
    resp = client.post(
        "/moderation/check",
        json={"text": "Cảnh báo lừa đảo từ chủ nhà", "context": "comment"},
    )
    assert resp.status_code == 200
    data = resp.json()
    assert data["is_flagged"] is True
    assert any(f["category"] == "profanity" for f in data["flags"])
 def test_prohibited_property():
    resp = client.post(
        "/moderation/check",
        json={"text": "Bán lô đất rừng phòng hộ 500m2", "context": "listing"},
    )
    assert resp.status_code == 200
    data = resp.json()
    assert data["is_flagged"] is True
    assert any(f["category"] == "prohibited_content" for f in data["flags"])