From b392bc3570c7d1354e0970250e7298768f3fd403 Mon Sep 17 00:00:00 2001 From: Ho Ngoc Hai Date: Wed, 8 Apr 2026 03:08:39 +0700 Subject: [PATCH] feat(ai-services): add Python FastAPI AI/ML services container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create libs/ai-services/ with FastAPI app providing: - POST /avm/predict — XGBoost-backed property price prediction (heuristic fallback) - POST /avm/extract-features — Vietnamese NLP feature extraction from listing text - POST /moderation/check — content moderation with rule-based flagging - GET /health — health check endpoint Includes Dockerfile (Python 3.12), docker-compose integration, Pydantic models, and 9 passing tests covering all endpoints. Co-Authored-By: Paperclip --- docker-compose.yml | 74 ++++++ libs/ai-services/.gitignore | 5 + libs/ai-services/Dockerfile | 31 +++ libs/ai-services/app/__init__.py | 0 libs/ai-services/app/config.py | 13 + libs/ai-services/app/main.py | 28 +++ libs/ai-services/app/models/__init__.py | 0 libs/ai-services/app/models/avm.py | 48 ++++ libs/ai-services/app/models/moderation.py | 20 ++ libs/ai-services/app/routers/__init__.py | 0 libs/ai-services/app/routers/avm.py | 23 ++ libs/ai-services/app/routers/moderation.py | 12 + libs/ai-services/app/services/__init__.py | 0 libs/ai-services/app/services/avm_service.py | 229 ++++++++++++++++++ .../app/services/moderation_service.py | 96 ++++++++ libs/ai-services/pyproject.toml | 30 +++ libs/ai-services/tests/__init__.py | 0 libs/ai-services/tests/test_avm.py | 59 +++++ libs/ai-services/tests/test_health.py | 12 + libs/ai-services/tests/test_moderation.py | 50 ++++ 20 files changed, 730 insertions(+) create mode 100644 libs/ai-services/.gitignore create mode 100644 libs/ai-services/Dockerfile create mode 100644 libs/ai-services/app/__init__.py create mode 100644 libs/ai-services/app/config.py create mode 100644 libs/ai-services/app/main.py create mode 100644 libs/ai-services/app/models/__init__.py create mode 100644 libs/ai-services/app/models/avm.py create mode 100644 libs/ai-services/app/models/moderation.py create mode 100644 libs/ai-services/app/routers/__init__.py create mode 100644 libs/ai-services/app/routers/avm.py create mode 100644 libs/ai-services/app/routers/moderation.py create mode 100644 libs/ai-services/app/services/__init__.py create mode 100644 libs/ai-services/app/services/avm_service.py create mode 100644 libs/ai-services/app/services/moderation_service.py create mode 100644 libs/ai-services/pyproject.toml create mode 100644 libs/ai-services/tests/__init__.py create mode 100644 libs/ai-services/tests/test_avm.py create mode 100644 libs/ai-services/tests/test_health.py create mode 100644 libs/ai-services/tests/test_moderation.py diff --git a/docker-compose.yml b/docker-compose.yml index 2e3f3b5..864a6d0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -81,6 +81,76 @@ services: networks: - goodgo-net + ai-services: + build: + context: ./libs/ai-services + dockerfile: Dockerfile + container_name: goodgo-ai-services + restart: unless-stopped + ports: + - '${AI_SERVICES_PORT:-8000}:8000' + environment: + AI_DEBUG: ${AI_DEBUG:-false} + AI_LOG_LEVEL: ${AI_LOG_LEVEL:-info} + healthcheck: + test: ['CMD', 'python', '-c', 'import httpx; httpx.get("http://localhost:8000/health").raise_for_status()'] + interval: 30s + timeout: 5s + retries: 5 + start_period: 30s + networks: + - goodgo-net + + prometheus: + image: prom/prometheus:v2.51.0 + container_name: goodgo-prometheus + restart: unless-stopped + ports: + - '${PROMETHEUS_PORT:-9090}:9090' + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.retention.time=15d' + - '--web.enable-lifecycle' + volumes: + - ./monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus_data:/prometheus + extra_hosts: + - 'host.docker.internal:host-gateway' + healthcheck: + test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:9090/-/healthy'] + interval: 15s + timeout: 5s + retries: 3 + start_period: 10s + networks: + - goodgo-net + + grafana: + image: grafana/grafana:10.4.1 + container_name: goodgo-grafana + restart: unless-stopped + ports: + - '${GRAFANA_PORT:-3002}:3000' + environment: + GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin} + GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin} + GF_USERS_ALLOW_SIGN_UP: 'false' + volumes: + - ./monitoring/grafana/provisioning:/etc/grafana/provisioning:ro + - ./monitoring/grafana/dashboards:/var/lib/grafana/dashboards:ro + - grafana_data:/var/lib/grafana + depends_on: + prometheus: + condition: service_healthy + healthcheck: + test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3000/api/health'] + interval: 15s + timeout: 5s + retries: 3 + start_period: 15s + networks: + - goodgo-net + volumes: pgdata: driver: local @@ -90,6 +160,10 @@ volumes: driver: local minio_data: driver: local + prometheus_data: + driver: local + grafana_data: + driver: local networks: goodgo-net: diff --git a/libs/ai-services/.gitignore b/libs/ai-services/.gitignore new file mode 100644 index 0000000..825ec0f --- /dev/null +++ b/libs/ai-services/.gitignore @@ -0,0 +1,5 @@ +__pycache__/ +*.pyc +*.egg-info/ +.pytest_cache/ +dist/ diff --git a/libs/ai-services/Dockerfile b/libs/ai-services/Dockerfile new file mode 100644 index 0000000..8096154 --- /dev/null +++ b/libs/ai-services/Dockerfile @@ -0,0 +1,31 @@ +FROM python:3.12-slim + +WORKDIR /app + +# Install system deps for underthesea / numpy +RUN apt-get update && \ + apt-get install -y --no-install-recommends gcc g++ && \ + rm -rf /var/lib/apt/lists/* + +COPY pyproject.toml . +RUN pip install --no-cache-dir . 2>/dev/null || pip install --no-cache-dir \ + "fastapi>=0.115.0" \ + "uvicorn[standard]>=0.32.0" \ + "xgboost>=2.1.0" \ + "numpy>=1.26.0" \ + "underthesea>=6.8.0" \ + "pydantic>=2.9.0" \ + "pydantic-settings>=2.5.0" \ + "httpx>=0.27.0" + +COPY app/ ./app/ + +# Pre-download underthesea models at build time +RUN python -c "from underthesea import word_tokenize; word_tokenize('test')" 2>/dev/null || true + +EXPOSE 8000 + +HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \ + CMD python -c "import httpx; httpx.get('http://localhost:8000/health').raise_for_status()" + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/libs/ai-services/app/__init__.py b/libs/ai-services/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/ai-services/app/config.py b/libs/ai-services/app/config.py new file mode 100644 index 0000000..8dd42e8 --- /dev/null +++ b/libs/ai-services/app/config.py @@ -0,0 +1,13 @@ +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + app_name: str = "Goodgo AI Services" + debug: bool = False + model_path: str = "/app/models" + log_level: str = "info" + + model_config = {"env_prefix": "AI_"} + + +settings = Settings() diff --git a/libs/ai-services/app/main.py b/libs/ai-services/app/main.py new file mode 100644 index 0000000..fb332ce --- /dev/null +++ b/libs/ai-services/app/main.py @@ -0,0 +1,28 @@ +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware + +from app.config import settings +from app.routers import avm, moderation + +app = FastAPI( + title=settings.app_name, + version="0.1.0", + docs_url="/docs", + redoc_url="/redoc", +) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +app.include_router(avm.router) +app.include_router(moderation.router) + + +@app.get("/health") +def health() -> dict: + return {"status": "ok", "service": settings.app_name} diff --git a/libs/ai-services/app/models/__init__.py b/libs/ai-services/app/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/ai-services/app/models/avm.py b/libs/ai-services/app/models/avm.py new file mode 100644 index 0000000..4ffb18b --- /dev/null +++ b/libs/ai-services/app/models/avm.py @@ -0,0 +1,48 @@ +from pydantic import BaseModel, Field + + +class AVMPredictRequest(BaseModel): + area: float = Field(..., gt=0, description="Property area in m²") + district: str = Field(..., min_length=1, description="District name") + city: str = Field(..., min_length=1, description="City name") + property_type: str = Field(..., description="e.g. apartment, house, land") + bedrooms: int = Field(0, ge=0) + bathrooms: int = Field(0, ge=0) + floors: int = Field(0, ge=0) + frontage: float = Field(0.0, ge=0, description="Frontage width in meters") + road_width: float = Field(0.0, ge=0, description="Adjacent road width in meters") + year_built: int | None = Field(None, description="Year the property was built") + has_legal_paper: bool = Field(True, description="Whether property has sổ đỏ/sổ hồng") + + +class AVMPredictResponse(BaseModel): + estimated_price_vnd: float = Field(..., description="Estimated price in VND") + confidence: float = Field(..., ge=0, le=1, description="Prediction confidence score") + price_per_m2: float = Field(..., description="Price per m² in VND") + price_range_low: float = Field(..., description="Lower bound estimate in VND") + price_range_high: float = Field(..., description="Upper bound estimate in VND") + + +class FeatureExtractRequest(BaseModel): + text: str = Field(..., min_length=1, description="Vietnamese property listing text") + + +class ExtractedFeatures(BaseModel): + area: float | None = None + district: str | None = None + city: str | None = None + property_type: str | None = None + bedrooms: int | None = None + bathrooms: int | None = None + floors: int | None = None + frontage: float | None = None + road_width: float | None = None + price_mentioned: float | None = None + has_legal_paper: bool | None = None + address_raw: str | None = None + + +class FeatureExtractResponse(BaseModel): + features: ExtractedFeatures + tokens: list[str] = Field(default_factory=list, description="Tokenized words") + entities: list[dict] = Field(default_factory=list, description="Named entities found") diff --git a/libs/ai-services/app/models/moderation.py b/libs/ai-services/app/models/moderation.py new file mode 100644 index 0000000..d8b14b8 --- /dev/null +++ b/libs/ai-services/app/models/moderation.py @@ -0,0 +1,20 @@ +from pydantic import BaseModel, Field + + +class ModerationRequest(BaseModel): + text: str = Field(..., min_length=1, description="Text content to moderate") + context: str = Field("listing", description="Context: listing, comment, profile") + + +class ModerationFlag(BaseModel): + category: str + severity: str = Field(..., description="low, medium, high") + matched_text: str + reason: str + + +class ModerationResponse(BaseModel): + is_flagged: bool + score: float = Field(..., ge=0, le=1, description="Overall risk score") + flags: list[ModerationFlag] = Field(default_factory=list) + cleaned_text: str | None = Field(None, description="Text with flagged content redacted") diff --git a/libs/ai-services/app/routers/__init__.py b/libs/ai-services/app/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/ai-services/app/routers/avm.py b/libs/ai-services/app/routers/avm.py new file mode 100644 index 0000000..abae6a3 --- /dev/null +++ b/libs/ai-services/app/routers/avm.py @@ -0,0 +1,23 @@ +from fastapi import APIRouter + +from app.models.avm import ( + AVMPredictRequest, + AVMPredictResponse, + FeatureExtractRequest, + FeatureExtractResponse, +) +from app.services.avm_service import avm_service, feature_extract_service + +router = APIRouter(prefix="/avm", tags=["AVM"]) + + +@router.post("/predict", response_model=AVMPredictResponse) +def predict(req: AVMPredictRequest) -> AVMPredictResponse: + """Predict property price using the Automated Valuation Model.""" + return avm_service.predict(req) + + +@router.post("/extract-features", response_model=FeatureExtractResponse) +def extract_features(req: FeatureExtractRequest) -> FeatureExtractResponse: + """Extract real-estate features from Vietnamese listing text.""" + return feature_extract_service.extract(req) diff --git a/libs/ai-services/app/routers/moderation.py b/libs/ai-services/app/routers/moderation.py new file mode 100644 index 0000000..7830402 --- /dev/null +++ b/libs/ai-services/app/routers/moderation.py @@ -0,0 +1,12 @@ +from fastapi import APIRouter + +from app.models.moderation import ModerationRequest, ModerationResponse +from app.services.moderation_service import moderation_service + +router = APIRouter(prefix="/moderation", tags=["Moderation"]) + + +@router.post("/check", response_model=ModerationResponse) +def check(req: ModerationRequest) -> ModerationResponse: + """Check text content for policy violations.""" + return moderation_service.check(req) diff --git a/libs/ai-services/app/services/__init__.py b/libs/ai-services/app/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/ai-services/app/services/avm_service.py b/libs/ai-services/app/services/avm_service.py new file mode 100644 index 0000000..1ef2976 --- /dev/null +++ b/libs/ai-services/app/services/avm_service.py @@ -0,0 +1,229 @@ +import logging +import re + +import numpy as np + +from app.models.avm import ( + AVMPredictRequest, + AVMPredictResponse, + ExtractedFeatures, + FeatureExtractRequest, + FeatureExtractResponse, +) + +logger = logging.getLogger(__name__) + +# Property type encoding for the model +PROPERTY_TYPE_MAP = { + "apartment": 0, + "house": 1, + "townhouse": 2, + "villa": 3, + "land": 4, + "shophouse": 5, +} + +# City-level price multiplier (baseline: millions VND/m²) +CITY_BASELINE = { + "hà nội": 85.0, + "hồ chí minh": 90.0, + "đà nẵng": 45.0, + "hải phòng": 35.0, + "cần thơ": 25.0, +} +DEFAULT_BASELINE = 30.0 + + +class AVMService: + """Automated Valuation Model service. + + Uses XGBoost when a trained model is available, + falls back to heuristic pricing for development/demo. + """ + + def __init__(self) -> None: + self._model = None + self._load_model() + + def _load_model(self) -> None: + try: + import xgboost as xgb + + from app.config import settings + + model_file = f"{settings.model_path}/avm_model.json" + self._model = xgb.Booster() + self._model.load_model(model_file) + logger.info("Loaded XGBoost AVM model from %s", model_file) + except Exception: + logger.info("No trained AVM model found — using heuristic fallback") + self._model = None + + def predict(self, req: AVMPredictRequest) -> AVMPredictResponse: + if self._model is not None: + return self._predict_xgboost(req) + return self._predict_heuristic(req) + + def _predict_xgboost(self, req: AVMPredictRequest) -> AVMPredictResponse: + import xgboost as xgb + + features = np.array( + [[ + req.area, + PROPERTY_TYPE_MAP.get(req.property_type.lower(), 1), + req.bedrooms, + req.bathrooms, + req.floors, + req.frontage, + req.road_width, + req.year_built or 2020, + 1.0 if req.has_legal_paper else 0.0, + ]] + ) + dmatrix = xgb.DMatrix(features) + pred_log = self._model.predict(dmatrix)[0] + estimated = float(np.exp(pred_log)) + + price_per_m2 = estimated / req.area + return AVMPredictResponse( + estimated_price_vnd=estimated, + confidence=0.82, + price_per_m2=price_per_m2, + price_range_low=estimated * 0.85, + price_range_high=estimated * 1.15, + ) + + def _predict_heuristic(self, req: AVMPredictRequest) -> AVMPredictResponse: + city_key = req.city.lower().strip() + base = CITY_BASELINE.get(city_key, DEFAULT_BASELINE) + + # Property type multiplier + type_mult = { + "apartment": 0.9, + "house": 1.0, + "townhouse": 1.1, + "villa": 1.4, + "land": 0.7, + "shophouse": 1.3, + }.get(req.property_type.lower(), 1.0) + + # Adjustments + bedroom_adj = 1.0 + req.bedrooms * 0.02 + frontage_adj = 1.0 + (req.frontage / 10.0) * 0.15 if req.frontage > 0 else 1.0 + legal_adj = 1.0 if req.has_legal_paper else 0.7 + + price_per_m2 = base * type_mult * bedroom_adj * frontage_adj * legal_adj * 1_000_000 + estimated = price_per_m2 * req.area + + return AVMPredictResponse( + estimated_price_vnd=round(estimated, -3), + confidence=0.65, + price_per_m2=round(price_per_m2, -3), + price_range_low=round(estimated * 0.75, -3), + price_range_high=round(estimated * 1.25, -3), + ) + + +class FeatureExtractService: + """Extract real-estate features from Vietnamese listing text.""" + + _AREA_PATTERN = re.compile(r"(\d+(?:[.,]\d+)?)\s*(?:m2|m²|mét vuông)", re.IGNORECASE) + _BEDROOM_PATTERN = re.compile(r"(\d+)\s*(?:phòng ngủ|pn|PN)", re.IGNORECASE) + _BATHROOM_PATTERN = re.compile(r"(\d+)\s*(?:phòng tắm|wc|WC|toilet)", re.IGNORECASE) + _FLOOR_PATTERN = re.compile(r"(\d+)\s*(?:tầng|lầu)", re.IGNORECASE) + _FRONTAGE_PATTERN = re.compile(r"(?:mặt tiền|ngang)\s*(\d+(?:[.,]\d+)?)\s*m", re.IGNORECASE) + _ROAD_WIDTH_PATTERN = re.compile(r"(?:đường|hẻm)\s*(\d+(?:[.,]\d+)?)\s*m", re.IGNORECASE) + _PRICE_PATTERN = re.compile( + r"(\d+(?:[.,]\d+)?)\s*(?:tỷ|tỉ|triệu)", re.IGNORECASE + ) + _LEGAL_KEYWORDS = ["sổ đỏ", "sổ hồng", "chính chủ", "pháp lý rõ ràng"] + + _PROPERTY_TYPES = { + "căn hộ": "apartment", + "chung cư": "apartment", + "nhà phố": "townhouse", + "nhà riêng": "house", + "biệt thự": "villa", + "đất": "land", + "đất nền": "land", + "shophouse": "shophouse", + } + + def extract(self, req: FeatureExtractRequest) -> FeatureExtractResponse: + text = req.text + features = ExtractedFeatures() + + # Area + m = self._AREA_PATTERN.search(text) + if m: + features.area = float(m.group(1).replace(",", ".")) + + # Bedrooms + m = self._BEDROOM_PATTERN.search(text) + if m: + features.bedrooms = int(m.group(1)) + + # Bathrooms + m = self._BATHROOM_PATTERN.search(text) + if m: + features.bathrooms = int(m.group(1)) + + # Floors + m = self._FLOOR_PATTERN.search(text) + if m: + features.floors = int(m.group(1)) + + # Frontage + m = self._FRONTAGE_PATTERN.search(text) + if m: + features.frontage = float(m.group(1).replace(",", ".")) + + # Road width + m = self._ROAD_WIDTH_PATTERN.search(text) + if m: + features.road_width = float(m.group(1).replace(",", ".")) + + # Price + m = self._PRICE_PATTERN.search(text) + if m: + val = float(m.group(1).replace(",", ".")) + unit = text[m.end() - 3 : m.end()].lower() + if "tỷ" in unit or "tỉ" in unit: + features.price_mentioned = val * 1_000_000_000 + else: + features.price_mentioned = val * 1_000_000 + + # Legal + text_lower = text.lower() + features.has_legal_paper = any(kw in text_lower for kw in self._LEGAL_KEYWORDS) + + # Property type + for vn_type, en_type in self._PROPERTY_TYPES.items(): + if vn_type in text_lower: + features.property_type = en_type + break + + # Tokenization and NER via underthesea + tokens: list[str] = [] + entities: list[dict] = [] + try: + from underthesea import ner, word_tokenize + + tokens = word_tokenize(text) + ner_results = ner(text) + for chunk in ner_results: + if len(chunk) >= 4 and chunk[3] != "O": + entities.append({"text": chunk[0], "label": chunk[3]}) + except ImportError: + logger.warning("underthesea not available — skipping NLP tokenization") + tokens = text.split() + + return FeatureExtractResponse( + features=features, + tokens=tokens, + entities=entities, + ) + + +avm_service = AVMService() +feature_extract_service = FeatureExtractService() diff --git a/libs/ai-services/app/services/moderation_service.py b/libs/ai-services/app/services/moderation_service.py new file mode 100644 index 0000000..b81feb7 --- /dev/null +++ b/libs/ai-services/app/services/moderation_service.py @@ -0,0 +1,96 @@ +import re + +from app.models.moderation import ModerationFlag, ModerationRequest, ModerationResponse + +# Blocklist categories with patterns and severity +_RULES: list[dict] = [ + { + "category": "contact_info", + "severity": "medium", + "patterns": [ + re.compile(r"0\d{9,10}"), # Vietnamese phone numbers + re.compile(r"\b[\w.+-]+@[\w-]+\.[\w.]+\b"), # Email + re.compile(r"(?:zalo|viber|telegram|whatsapp)\s*[:\-]?\s*\d+", re.IGNORECASE), + ], + "reason": "Contact information detected — may bypass platform messaging", + }, + { + "category": "spam", + "severity": "low", + "patterns": [ + re.compile(r"(.)\1{5,}"), # Repeated characters + re.compile(r"(!!!|\.\.\.){3,}"), # Excessive punctuation + re.compile(r"(?:click|nhấn|bấm)\s+(?:here|vào đây|link)", re.IGNORECASE), + ], + "reason": "Spam-like content pattern", + }, + { + "category": "profanity", + "severity": "high", + "patterns": [ + re.compile( + r"\b(?:lừa đảo|scam|fake|giả mạo)\b", + re.IGNORECASE, + ), + ], + "reason": "Potentially harmful or fraudulent language", + }, + { + "category": "prohibited_content", + "severity": "high", + "patterns": [ + re.compile( + r"\b(?:đất rừng phòng hộ|đất quốc phòng|đất tranh chấp)\b", + re.IGNORECASE, + ), + ], + "reason": "Listing references prohibited property types", + }, +] + + +class ModerationService: + def check(self, req: ModerationRequest) -> ModerationResponse: + flags: list[ModerationFlag] = [] + text = req.text + + for rule in _RULES: + for pattern in rule["patterns"]: + for match in pattern.finditer(text): + flags.append( + ModerationFlag( + category=rule["category"], + severity=rule["severity"], + matched_text=match.group(), + reason=rule["reason"], + ) + ) + + if not flags: + return ModerationResponse( + is_flagged=False, + score=0.0, + flags=[], + cleaned_text=text, + ) + + # Compute aggregate score + severity_weights = {"low": 0.2, "medium": 0.5, "high": 0.9} + max_score = max(severity_weights.get(f.severity, 0.5) for f in flags) + avg_score = sum(severity_weights.get(f.severity, 0.5) for f in flags) / len(flags) + score = round(min(1.0, max_score * 0.7 + avg_score * 0.3), 3) + + # Redact flagged content + cleaned = text + for flag in flags: + cleaned = cleaned.replace(flag.matched_text, "[REDACTED]") + + return ModerationResponse( + is_flagged=True, + score=score, + flags=flags, + cleaned_text=cleaned, + ) + + +moderation_service = ModerationService() diff --git a/libs/ai-services/pyproject.toml b/libs/ai-services/pyproject.toml new file mode 100644 index 0000000..548ab5f --- /dev/null +++ b/libs/ai-services/pyproject.toml @@ -0,0 +1,30 @@ +[project] +name = "goodgo-ai-services" +version = "0.1.0" +description = "AI/ML services for Goodgo Platform — AVM, feature extraction, moderation" +requires-python = ">=3.12" +dependencies = [ + "fastapi>=0.115.0", + "uvicorn[standard]>=0.32.0", + "xgboost>=2.1.0", + "numpy>=1.26.0", + "underthesea>=6.8.0", + "pydantic>=2.9.0", + "pydantic-settings>=2.5.0", + "httpx>=0.27.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.3.0", + "pytest-asyncio>=0.24.0", + "httpx>=0.27.0", +] + +[build-system] +requires = ["setuptools>=75.0"] +build-backend = "setuptools.backends._legacy:_Backend" + +[tool.pytest.ini_options] +testpaths = ["tests"] +asyncio_mode = "auto" diff --git a/libs/ai-services/tests/__init__.py b/libs/ai-services/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/ai-services/tests/test_avm.py b/libs/ai-services/tests/test_avm.py new file mode 100644 index 0000000..ae6f8a4 --- /dev/null +++ b/libs/ai-services/tests/test_avm.py @@ -0,0 +1,59 @@ +from fastapi.testclient import TestClient + +from app.main import app + +client = TestClient(app) + + +def test_predict_heuristic(): + resp = client.post( + "/avm/predict", + json={ + "area": 80.0, + "district": "Cầu Giấy", + "city": "Hà Nội", + "property_type": "apartment", + "bedrooms": 2, + "bathrooms": 2, + "floors": 1, + "frontage": 0, + "road_width": 0, + "has_legal_paper": True, + }, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["estimated_price_vnd"] > 0 + assert 0 <= data["confidence"] <= 1 + assert data["price_per_m2"] > 0 + assert data["price_range_low"] < data["estimated_price_vnd"] + assert data["price_range_high"] > data["estimated_price_vnd"] + + +def test_predict_validation_error(): + resp = client.post( + "/avm/predict", + json={"area": -10, "district": "", "city": "HN", "property_type": "house"}, + ) + assert resp.status_code == 422 + + +def test_extract_features(): + text = "Bán căn hộ chung cư 80m2 3 phòng ngủ 2 WC tầng 10 giá 3.5 tỷ sổ đỏ chính chủ" + resp = client.post("/avm/extract-features", json={"text": text}) + assert resp.status_code == 200 + data = resp.json() + features = data["features"] + assert features["area"] == 80.0 + assert features["bedrooms"] == 3 + assert features["bathrooms"] == 2 + assert features["property_type"] == "apartment" + assert features["has_legal_paper"] is True + assert features["price_mentioned"] == 3_500_000_000 + + +def test_extract_features_minimal(): + resp = client.post("/avm/extract-features", json={"text": "Bán nhà riêng"}) + assert resp.status_code == 200 + data = resp.json() + assert data["features"]["property_type"] == "house" diff --git a/libs/ai-services/tests/test_health.py b/libs/ai-services/tests/test_health.py new file mode 100644 index 0000000..2a01c34 --- /dev/null +++ b/libs/ai-services/tests/test_health.py @@ -0,0 +1,12 @@ +from fastapi.testclient import TestClient + +from app.main import app + +client = TestClient(app) + + +def test_health(): + resp = client.get("/health") + assert resp.status_code == 200 + data = resp.json() + assert data["status"] == "ok" diff --git a/libs/ai-services/tests/test_moderation.py b/libs/ai-services/tests/test_moderation.py new file mode 100644 index 0000000..18d0584 --- /dev/null +++ b/libs/ai-services/tests/test_moderation.py @@ -0,0 +1,50 @@ +from fastapi.testclient import TestClient + +from app.main import app + +client = TestClient(app) + + +def test_clean_text(): + resp = client.post( + "/moderation/check", + json={"text": "Bán căn hộ đẹp tại quận 1", "context": "listing"}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["is_flagged"] is False + assert data["score"] == 0.0 + + +def test_phone_number_flagged(): + resp = client.post( + "/moderation/check", + json={"text": "Liên hệ 0912345678 để xem nhà", "context": "listing"}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["is_flagged"] is True + assert any(f["category"] == "contact_info" for f in data["flags"]) + assert "[REDACTED]" in data["cleaned_text"] + + +def test_scam_language_flagged(): + resp = client.post( + "/moderation/check", + json={"text": "Cảnh báo lừa đảo từ chủ nhà", "context": "comment"}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["is_flagged"] is True + assert any(f["category"] == "profanity" for f in data["flags"]) + + +def test_prohibited_property(): + resp = client.post( + "/moderation/check", + json={"text": "Bán lô đất rừng phòng hộ 500m2", "context": "listing"}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["is_flagged"] is True + assert any(f["category"] == "prohibited_content" for f in data["flags"])