feat(search): configure Typesense for Vietnamese diacritic search

Add normalized (ASCII-only) fields to Typesense schema and indexer so
users can search without diacritics (e.g. "can ho" finds "căn hộ").
Create synonym collection for HCMC district abbreviations and common
property-type aliases. Enable num_typos:2 for fuzzy matching.

- Add 7 normalized fields (title, description, address, ward, district,
  city, projectName) using Address.normalize() at index time
- Search queries both original Vietnamese and normalized field sets
- Upsert 28 Vietnamese synonym rules on collection init
- Normalize user query to ASCII alongside original for dual matching
- Update tests for new fields and synonym upsert behavior

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Ho Ngoc Hai
2026-04-23 00:41:14 +07:00
parent 36a9b00cf1
commit 7a854373b3
6 changed files with 195 additions and 3 deletions

View File

@@ -1,4 +1,5 @@
import { type ListingDocument } from '../../domain/repositories/search.repository';
import { Address } from '@modules/listings/domain/value-objects/address.vo';
export interface RawListingRow {
listingId: string;
@@ -64,5 +65,14 @@ export function mapRowToListingDocument(row: RawListingRow): ListingDocument {
legalStatus: row.legalStatus ?? null,
amenities: Array.isArray(row.amenities) ? (row.amenities as string[]) : [],
isFeatured: row.featuredUntil && new Date(row.featuredUntil) > new Date() ? 1 : 0,
// Vietnamese diacritic-normalized fields
titleNormalized: Address.normalize(row.title),
descriptionNormalized: Address.normalize(row.description),
addressNormalized: Address.normalize(row.address),
wardNormalized: Address.normalize(row.ward),
districtNormalized: Address.normalize(row.district),
cityNormalized: Address.normalize(row.city),
projectNameNormalized: row.projectName ? Address.normalize(row.projectName) : null,
};
}