goodgo-platform/libs/ai-services/tests/test_nlp.py

from fastapi.testclient import TestClient

from app.main import app

client = TestClient(app)

SAMPLE_LISTING = (
    "Bán căn hộ chung cư cao cấp 85m² tại quận 7, 2 phòng ngủ, 2 WC, "
    "3 tầng, nội thất đầy đủ. Có hồ bơi, phòng gym, bảo vệ 24/7. "
    "Gần trường học và siêu thị. Sổ hồng chính chủ. Giá 3.5 tỷ."
)


def test_analyze_returns_tags():
    resp = client.post("/nlp/analyze", json={"text": SAMPLE_LISTING})
    assert resp.status_code == 200
    data = resp.json()

    tags = data["tags"]
    tag_names = [t["tag"] for t in tags]

    # Amenities
    assert "hồ bơi" in tag_names
    assert "phòng gym" in tag_names
    assert "bảo vệ 24/7" in tag_names

    # Location
    assert "gần trường học" in tag_names
    assert "gần siêu thị" in tag_names

    # Condition / legal
    assert "sổ hồng" in tag_names
    assert "chính chủ" in tag_names


def test_analyze_quality_scores():
    resp = client.post("/nlp/analyze", json={"text": SAMPLE_LISTING})
    assert resp.status_code == 200
    quality = resp.json()["quality"]

    assert 0 < quality["overall"] <= 1
    assert 0 < quality["completeness"] <= 1
    assert 0 < quality["readability"] <= 1
    assert 0 < quality["information_density"] <= 1
    assert quality["moderation_score"] is not None
    assert quality["moderation_score"] == 0.0  # clean listing


def test_analyze_completeness_low_for_sparse_text():
    resp = client.post("/nlp/analyze", json={"text": "Bán nhà đẹp giá tốt"})
    assert resp.status_code == 200
    quality = resp.json()["quality"]
    assert quality["completeness"] < 0.3


def test_analyze_tokens_present():
    resp = client.post("/nlp/analyze", json={"text": SAMPLE_LISTING})
    assert resp.status_code == 200
    data = resp.json()
    assert len(data["tokens"]) > 0
    assert len(data["sentences"]) > 0


def test_analyze_no_moderation():
    resp = client.post(
        "/nlp/analyze",
        json={"text": SAMPLE_LISTING, "include_moderation": False},
    )
    assert resp.status_code == 200
    quality = resp.json()["quality"]
    assert quality["moderation_score"] is None


def test_analyze_flagged_content_reduces_quality():
    flagged_text = (
        "Bán căn hộ 80m² 2 phòng ngủ quận 1. Liên hệ 0912345678. "
        "Sổ hồng chính chủ. Giá 5 tỷ."
    )
    resp = client.post("/nlp/analyze", json={"text": flagged_text})
    assert resp.status_code == 200
    quality = resp.json()["quality"]
    assert quality["moderation_score"] > 0  # phone number flagged


def test_batch_analyze():
    resp = client.post(
        "/nlp/batch-analyze",
        json={
            "texts": [
                "Bán căn hộ 60m² có hồ bơi gần trường học. Sổ đỏ. 2 tỷ.",
                "Bán đất nền 200m² mặt tiền đường lớn. Pháp lý rõ ràng.",
            ],
            "include_moderation": True,
        },
    )
    assert resp.status_code == 200
    data = resp.json()
    assert len(data["results"]) == 2
    assert any(t["tag"] == "hồ bơi" for t in data["results"][0]["tags"])
    assert any(t["tag"] == "mặt tiền đường" for t in data["results"][1]["tags"])


def test_analyze_location_tags():
    text = "Căn hộ ven sông Sài Gòn, gần metro số 1, trung tâm thành phố."
    resp = client.post("/nlp/analyze", json={"text": text})
    assert resp.status_code == 200
    tag_names = [t["tag"] for t in resp.json()["tags"]]
    assert "ven sông" in tag_names
    assert "gần metro" in tag_names
    assert "trung tâm thành phố" in tag_names


def test_analyze_condition_tags():
    text = "Nhà mới xây, hoàn thiện cơ bản, 3 tầng, đang thi công sắp bàn giao."
    resp = client.post("/nlp/analyze", json={"text": text})
    assert resp.status_code == 200
    tag_names = [t["tag"] for t in resp.json()["tags"]]
    assert "mới xây" in tag_names
    assert "hoàn thiện cơ bản" in tag_names