feat(listings): implement listing duplicate detection service
Add DuplicateDetector domain service that flags potential duplicate listings using PostGIS ST_DWithin geo-proximity (100m radius) combined with trigram-based title similarity (>70% threshold). Detection runs during CreateListing but never blocks creation — warnings are returned in the response for seller/admin review. Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
@@ -0,0 +1,112 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { type PropertyType } from '@prisma/client';
|
||||
import { type PrismaService } from '@modules/shared/infrastructure/prisma.service';
|
||||
import {
|
||||
type DuplicateCandidate,
|
||||
type DuplicateCheckParams,
|
||||
type IDuplicateDetector,
|
||||
} from '../../domain/services/duplicate-detector';
|
||||
|
||||
interface NearbyRow {
|
||||
listing_id: string;
|
||||
property_id: string;
|
||||
title: string;
|
||||
address: string;
|
||||
district: string;
|
||||
property_type: PropertyType;
|
||||
distance_meters: number;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class PrismaDuplicateDetector implements IDuplicateDetector {
|
||||
constructor(private readonly prisma: PrismaService) {}
|
||||
|
||||
async findDuplicates(params: DuplicateCheckParams): Promise<DuplicateCandidate[]> {
|
||||
const radiusMeters = params.radiusMeters ?? 100;
|
||||
const minSimilarity = params.minTitleSimilarity ?? 0.7;
|
||||
|
||||
// Step 1: Find nearby properties using PostGIS ST_DWithin (uses GiST index)
|
||||
const nearbyRows = await this.prisma.$queryRaw<NearbyRow[]>`
|
||||
SELECT
|
||||
l."id" AS listing_id,
|
||||
p."id" AS property_id,
|
||||
p."title",
|
||||
p."address",
|
||||
p."district",
|
||||
p."propertyType" AS property_type,
|
||||
ST_Distance(
|
||||
p."location"::geography,
|
||||
ST_SetSRID(ST_MakePoint(${params.longitude}, ${params.latitude}), 4326)::geography
|
||||
) AS distance_meters
|
||||
FROM "Property" p
|
||||
INNER JOIN "Listing" l ON l."propertyId" = p."id"
|
||||
WHERE p."id" != ${params.excludePropertyId}
|
||||
AND p."propertyType" = ${params.propertyType}::"PropertyType"
|
||||
AND l."status" NOT IN ('SOLD', 'RENTED', 'EXPIRED', 'REJECTED', 'CANCELLED')
|
||||
AND ST_DWithin(
|
||||
p."location"::geography,
|
||||
ST_SetSRID(ST_MakePoint(${params.longitude}, ${params.latitude}), 4326)::geography,
|
||||
${radiusMeters}
|
||||
)
|
||||
ORDER BY distance_meters ASC
|
||||
LIMIT 20
|
||||
`;
|
||||
|
||||
// Step 2: Compute title similarity in application layer (avoids pg_trgm dependency)
|
||||
const normalizedInput = normalizeTitle(params.title);
|
||||
|
||||
return nearbyRows
|
||||
.map((row) => {
|
||||
const similarity = trigramSimilarity(normalizedInput, normalizeTitle(row.title));
|
||||
return {
|
||||
listingId: row.listing_id,
|
||||
propertyId: row.property_id,
|
||||
title: row.title,
|
||||
address: row.address,
|
||||
district: row.district,
|
||||
distanceMeters: Number(row.distance_meters),
|
||||
titleSimilarity: Math.round(similarity * 100) / 100,
|
||||
propertyType: row.property_type,
|
||||
};
|
||||
})
|
||||
.filter((c) => c.titleSimilarity >= minSimilarity);
|
||||
}
|
||||
}
|
||||
|
||||
/** Normalize Vietnamese title for comparison: lowercase, collapse whitespace, strip punctuation */
|
||||
function normalizeTitle(title: string): string {
|
||||
return title
|
||||
.toLowerCase()
|
||||
.replace(/[^\p{L}\p{N}\s]/gu, '')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
/** Trigram-based similarity score (0-1), equivalent to pg_trgm similarity() */
|
||||
function trigramSimilarity(a: string, b: string): number {
|
||||
if (a === b) return 1;
|
||||
if (a.length < 3 || b.length < 3) {
|
||||
// Fall back to simple containment check for very short strings
|
||||
return a === b ? 1 : 0;
|
||||
}
|
||||
|
||||
const trigramsA = extractTrigrams(a);
|
||||
const trigramsB = extractTrigrams(b);
|
||||
|
||||
let intersection = 0;
|
||||
for (const tri of trigramsA) {
|
||||
if (trigramsB.has(tri)) intersection++;
|
||||
}
|
||||
|
||||
const union = trigramsA.size + trigramsB.size - intersection;
|
||||
return union === 0 ? 0 : intersection / union;
|
||||
}
|
||||
|
||||
function extractTrigrams(s: string): Set<string> {
|
||||
const padded = ` ${s} `;
|
||||
const trigrams = new Set<string>();
|
||||
for (let i = 0; i <= padded.length - 3; i++) {
|
||||
trigrams.add(padded.slice(i, i + 3));
|
||||
}
|
||||
return trigrams;
|
||||
}
|
||||
Reference in New Issue
Block a user