feat(ai-services): add Vietnamese NLP pipeline for property description analysis

Implement auto-tagging (amenities, location features, condition/legal),
content quality scoring with moderation integration, and FastAPI endpoints
for single and batch text analysis. Uses underthesea for Vietnamese
tokenization/POS when available, with regex fallback.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Ho Ngoc Hai
2026-04-08 22:42:31 +07:00
parent 944d6262e7
commit ee3ae2e81d
5 changed files with 431 additions and 1 deletions

View File

@@ -0,0 +1,27 @@
from fastapi import APIRouter
from app.models.nlp import (
BatchAnalyzeRequest,
BatchAnalyzeResponse,
NLPAnalyzeRequest,
NLPAnalyzeResponse,
)
from app.services.nlp_service import nlp_service
router = APIRouter(prefix="/nlp", tags=["NLP"])
@router.post("/analyze", response_model=NLPAnalyzeResponse)
def analyze(req: NLPAnalyzeRequest) -> NLPAnalyzeResponse:
"""Analyze Vietnamese property description: auto-tag, quality score, tokenize."""
return nlp_service.analyze(req)
@router.post("/batch-analyze", response_model=BatchAnalyzeResponse)
def batch_analyze(req: BatchAnalyzeRequest) -> BatchAnalyzeResponse:
"""Batch analyze multiple property descriptions."""
results = [
nlp_service.analyze(NLPAnalyzeRequest(text=t, include_moderation=req.include_moderation))
for t in req.texts
]
return BatchAnalyzeResponse(results=results)