diff --git a/apps/api/src/modules/search/domain/repositories/search.repository.ts b/apps/api/src/modules/search/domain/repositories/search.repository.ts index 5e63790..2ffdbf0 100644 --- a/apps/api/src/modules/search/domain/repositories/search.repository.ts +++ b/apps/api/src/modules/search/domain/repositories/search.repository.ts @@ -30,6 +30,15 @@ export interface ListingDocument { legalStatus: string | null; amenities: string[]; isFeatured: number; // 1 if featuredUntil > now, 0 otherwise + + // Vietnamese diacritic-normalized fields for accent-insensitive search + titleNormalized: string; + descriptionNormalized: string; + addressNormalized: string; + wardNormalized: string; + districtNormalized: string; + cityNormalized: string; + projectNameNormalized: string | null; } export interface SearchResult { diff --git a/apps/api/src/modules/search/infrastructure/__tests__/listing-indexer.service.spec.ts b/apps/api/src/modules/search/infrastructure/__tests__/listing-indexer.service.spec.ts index 6aeea8c..2190ed2 100644 --- a/apps/api/src/modules/search/infrastructure/__tests__/listing-indexer.service.spec.ts +++ b/apps/api/src/modules/search/infrastructure/__tests__/listing-indexer.service.spec.ts @@ -160,5 +160,42 @@ describe('ListingIndexerService', () => { expect(result!.priceVND).toBe(5000000000); expect(result!.location).toEqual([10.776, 106.700]); expect(result!.amenities).toEqual(['parking']); + + // Verify normalized fields are populated + expect(result!.titleNormalized).toBe('test'); + expect(result!.descriptionNormalized).toBe('desc'); + expect(result!.addressNormalized).toBe('123 street'); + expect(result!.wardNormalized).toBe('ward 1'); + expect(result!.districtNormalized).toBe('district 1'); + expect(result!.cityNormalized).toBe('hcmc'); + expect(result!.projectNameNormalized).toBeNull(); + }); + + it('normalizes Vietnamese diacritics in indexed fields', async () => { + const vietnameseListing = { + ...mockListing, + property: { + ...mockListing.property, + title: 'Căn hộ cao cấp', + description: 'Biệt thự đẹp', + address: '123 Đường Nguyễn Huệ', + ward: 'Phường Bến Nghé', + district: 'Quận 1', + city: 'Hồ Chí Minh', + projectName: 'Vinhomes Bason', + }, + }; + mockPrisma.listing.findUnique.mockResolvedValue(vietnameseListing); + mockPrisma.$queryRaw.mockResolvedValue([{ lat: 10.776, lng: 106.700 }]); + + const result = await service.fetchListingDocumentById('listing-1'); + + expect(result!.titleNormalized).toBe('can ho cao cap'); + expect(result!.descriptionNormalized).toBe('biet thu dep'); + expect(result!.addressNormalized).toBe('123 duong nguyen hue'); + expect(result!.wardNormalized).toBe('phuong ben nghe'); + expect(result!.districtNormalized).toBe('quan 1'); + expect(result!.cityNormalized).toBe('ho chi minh'); + expect(result!.projectNameNormalized).toBe('vinhomes bason'); }); }); diff --git a/apps/api/src/modules/search/infrastructure/__tests__/typesense-search.repository.spec.ts b/apps/api/src/modules/search/infrastructure/__tests__/typesense-search.repository.spec.ts index 95b3d24..d649bbf 100644 --- a/apps/api/src/modules/search/infrastructure/__tests__/typesense-search.repository.spec.ts +++ b/apps/api/src/modules/search/infrastructure/__tests__/typesense-search.repository.spec.ts @@ -31,6 +31,13 @@ function makeDocument(overrides?: Partial): ListingDocument { projectName: null, legalStatus: null, amenities: ['parking'], + titleNormalized: 'test apartment', + descriptionNormalized: 'a great place', + addressNormalized: '123 street', + wardNormalized: 'ward 1', + districtNormalized: 'district 1', + cityNormalized: 'hcmc', + projectNameNormalized: null, ...overrides, }; } @@ -44,6 +51,7 @@ describe('TypesenseSearchRepository', () => { retrieve: ReturnType; delete: ReturnType; documents: ReturnType; + synonyms: ReturnType; }; let documentOps: { upsert: ReturnType; @@ -70,6 +78,7 @@ describe('TypesenseSearchRepository', () => { retrieve: vi.fn(), delete: vi.fn().mockResolvedValue({}), documents: vi.fn().mockReturnValue(documentOps), + synonyms: vi.fn().mockReturnValue({ upsert: vi.fn().mockResolvedValue({}) }), }; createFn = vi.fn().mockResolvedValue({}); mockClient = { @@ -193,4 +202,33 @@ describe('TypesenseSearchRepository', () => { expect(searchCall.filter_by).toContain('location:(10.776, 106.7, 5 km)'); expect(searchCall.sort_by).toContain('location(10.776, 106.7):asc'); }); + + it('search queries both original and normalized fields', async () => { + documentOps.search.mockResolvedValue({ hits: [], found: 0, search_time_ms: 1 }); + + const params: SearchParams = { query: 'căn hộ', page: 1, perPage: 20 }; + await repo.search(params); + + const searchCall = documentOps.search.mock.calls[0]![0]; + expect(searchCall.query_by).toContain('titleNormalized'); + expect(searchCall.query_by).toContain('addressNormalized'); + expect(searchCall.num_typos).toBe('2'); + // Query should include both original Vietnamese and normalized ASCII + expect(searchCall.q).toContain('căn hộ'); + expect(searchCall.q).toContain('can ho'); + }); + + it('ensureCollection upserts Vietnamese synonyms', async () => { + collectionOps.retrieve.mockResolvedValue({ name: 'listings' }); + const upsertSpy = vi.fn().mockResolvedValue({}); + collectionOps.synonyms.mockReturnValue({ upsert: upsertSpy }); + + await repo.ensureCollection(); + + expect(upsertSpy).toHaveBeenCalled(); + // Verify at least the HCM synonym was upserted + expect(upsertSpy).toHaveBeenCalledWith('hcm', expect.objectContaining({ + synonyms: expect.arrayContaining(['hcm', 'ho chi minh']), + })); + }); }); diff --git a/apps/api/src/modules/search/infrastructure/services/listing-indexer.service.ts b/apps/api/src/modules/search/infrastructure/services/listing-indexer.service.ts index 5124480..a0a33d6 100644 --- a/apps/api/src/modules/search/infrastructure/services/listing-indexer.service.ts +++ b/apps/api/src/modules/search/infrastructure/services/listing-indexer.service.ts @@ -1,6 +1,7 @@ import { Inject, Injectable } from '@nestjs/common'; import { Prisma } from '@prisma/client'; import { LoggerService, PrismaService } from '@modules/shared'; +import { Address } from '@modules/listings/domain/value-objects/address.vo'; import { SEARCH_REPOSITORY, type ISearchRepository, @@ -124,6 +125,15 @@ export class ListingIndexerService { isFeatured: l.featuredUntil && l.featuredUntil > new Date() ? featuredTierWeight(l.featuredPackage as string | null) : 0, + + // Vietnamese diacritic-normalized fields + titleNormalized: Address.normalize(p.title), + descriptionNormalized: Address.normalize(p.description), + addressNormalized: Address.normalize(p.address), + wardNormalized: Address.normalize(p.ward), + districtNormalized: Address.normalize(p.district), + cityNormalized: Address.normalize(p.city), + projectNameNormalized: p.projectName ? Address.normalize(p.projectName) : null, }; }); } @@ -176,6 +186,15 @@ export class ListingIndexerService { isFeatured: listing.featuredUntil && listing.featuredUntil > new Date() ? featuredTierWeight(listing.featuredPackage as string | null) : 0, + + // Vietnamese diacritic-normalized fields + titleNormalized: Address.normalize(p.title), + descriptionNormalized: Address.normalize(p.description), + addressNormalized: Address.normalize(p.address), + wardNormalized: Address.normalize(p.ward), + districtNormalized: Address.normalize(p.district), + cityNormalized: Address.normalize(p.city), + projectNameNormalized: p.projectName ? Address.normalize(p.projectName) : null, }; } diff --git a/apps/api/src/modules/search/infrastructure/services/search-result-mapper.ts b/apps/api/src/modules/search/infrastructure/services/search-result-mapper.ts index f0ce8ba..2ac8c8b 100644 --- a/apps/api/src/modules/search/infrastructure/services/search-result-mapper.ts +++ b/apps/api/src/modules/search/infrastructure/services/search-result-mapper.ts @@ -1,4 +1,5 @@ import { type ListingDocument } from '../../domain/repositories/search.repository'; +import { Address } from '@modules/listings/domain/value-objects/address.vo'; export interface RawListingRow { listingId: string; @@ -64,5 +65,14 @@ export function mapRowToListingDocument(row: RawListingRow): ListingDocument { legalStatus: row.legalStatus ?? null, amenities: Array.isArray(row.amenities) ? (row.amenities as string[]) : [], isFeatured: row.featuredUntil && new Date(row.featuredUntil) > new Date() ? 1 : 0, + + // Vietnamese diacritic-normalized fields + titleNormalized: Address.normalize(row.title), + descriptionNormalized: Address.normalize(row.description), + addressNormalized: Address.normalize(row.address), + wardNormalized: Address.normalize(row.ward), + districtNormalized: Address.normalize(row.district), + cityNormalized: Address.normalize(row.city), + projectNameNormalized: row.projectName ? Address.normalize(row.projectName) : null, }; } diff --git a/apps/api/src/modules/search/infrastructure/services/typesense-search.repository.ts b/apps/api/src/modules/search/infrastructure/services/typesense-search.repository.ts index cb65e71..82d203f 100644 --- a/apps/api/src/modules/search/infrastructure/services/typesense-search.repository.ts +++ b/apps/api/src/modules/search/infrastructure/services/typesense-search.repository.ts @@ -2,6 +2,7 @@ import { Injectable } from '@nestjs/common'; import { type Client as TypesenseClient } from 'typesense'; import { type CollectionCreateSchema } from 'typesense/lib/Typesense/Collections'; import { LoggerService } from '@modules/shared'; +import { Address } from '@modules/listings/domain/value-objects/address.vo'; import { type ISearchRepository, type ListingDocument, @@ -12,6 +13,41 @@ import { TypesenseClientService } from './typesense-client.service'; const COLLECTION_NAME = 'listings'; +/** + * Vietnamese district abbreviation synonyms — maps common shortened forms + * to their full diacritic variants so users can search either way. + */ +const VIETNAMESE_SYNONYMS: Array<{ id: string; synonyms: string[] }> = [ + { id: 'q1', synonyms: ['q1', 'quan 1', 'quận 1', 'q.1'] }, + { id: 'q2', synonyms: ['q2', 'quan 2', 'quận 2', 'q.2', 'thu duc', 'thủ đức'] }, + { id: 'q3', synonyms: ['q3', 'quan 3', 'quận 3', 'q.3'] }, + { id: 'q4', synonyms: ['q4', 'quan 4', 'quận 4', 'q.4'] }, + { id: 'q5', synonyms: ['q5', 'quan 5', 'quận 5', 'q.5'] }, + { id: 'q6', synonyms: ['q6', 'quan 6', 'quận 6', 'q.6'] }, + { id: 'q7', synonyms: ['q7', 'quan 7', 'quận 7', 'q.7'] }, + { id: 'q8', synonyms: ['q8', 'quan 8', 'quận 8', 'q.8'] }, + { id: 'q9', synonyms: ['q9', 'quan 9', 'quận 9', 'q.9'] }, + { id: 'q10', synonyms: ['q10', 'quan 10', 'quận 10', 'q.10'] }, + { id: 'q11', synonyms: ['q11', 'quan 11', 'quận 11', 'q.11'] }, + { id: 'q12', synonyms: ['q12', 'quan 12', 'quận 12', 'q.12'] }, + { id: 'binh-thanh', synonyms: ['binh thanh', 'bình thạnh', 'bt'] }, + { id: 'tan-binh', synonyms: ['tan binh', 'tân bình', 'tb'] }, + { id: 'tan-phu', synonyms: ['tan phu', 'tân phú', 'tp'] }, + { id: 'phu-nhuan', synonyms: ['phu nhuan', 'phú nhuận', 'pn'] }, + { id: 'go-vap', synonyms: ['go vap', 'gò vấp', 'gv'] }, + { id: 'binh-tan', synonyms: ['binh tan', 'bình tân'] }, + { id: 'nha-be', synonyms: ['nha be', 'nhà bè'] }, + { id: 'can-gio', synonyms: ['can gio', 'cần giờ'] }, + { id: 'cu-chi', synonyms: ['cu chi', 'củ chi'] }, + { id: 'hoc-mon', synonyms: ['hoc mon', 'hóc môn'] }, + { id: 'binh-chanh', synonyms: ['binh chanh', 'bình chánh'] }, + { id: 'can-ho', synonyms: ['can ho', 'căn hộ', 'chung cu', 'chung cư'] }, + { id: 'nha-pho', synonyms: ['nha pho', 'nhà phố'] }, + { id: 'biet-thu', synonyms: ['biet thu', 'biệt thự'] }, + { id: 'dat-nen', synonyms: ['dat nen', 'đất nền'] }, + { id: 'hcm', synonyms: ['hcm', 'ho chi minh', 'hồ chí minh', 'tp hcm', 'tphcm', 'sai gon', 'sài gòn'] }, +]; + const LISTING_SCHEMA: CollectionCreateSchema = { name: COLLECTION_NAME, fields: [ @@ -43,6 +79,15 @@ const LISTING_SCHEMA: CollectionCreateSchema = { { name: 'legalStatus', type: 'string', facet: true, optional: true }, { name: 'amenities', type: 'string[]', facet: true, optional: true }, { name: 'isFeatured', type: 'int32', facet: true }, + + // Vietnamese diacritic-normalized fields (ASCII-only, for accent-insensitive search) + { name: 'titleNormalized', type: 'string', facet: false }, + { name: 'descriptionNormalized', type: 'string', facet: false }, + { name: 'addressNormalized', type: 'string', facet: false }, + { name: 'wardNormalized', type: 'string', facet: false }, + { name: 'districtNormalized', type: 'string', facet: false }, + { name: 'cityNormalized', type: 'string', facet: false }, + { name: 'projectNameNormalized', type: 'string', facet: false, optional: true }, ], token_separators: ['-', '_'], enable_nested_fields: false, @@ -67,6 +112,31 @@ export class TypesenseSearchRepository implements ISearchRepository { await this.client.collections().create(LISTING_SCHEMA); this.logger.log(`Collection "${COLLECTION_NAME}" created`, 'TypesenseSearch'); } + await this.ensureSynonyms(); + } + + /** + * Upsert Vietnamese district/property-type synonyms into the collection. + * Idempotent — safe to call on every startup. + */ + async ensureSynonyms(): Promise { + try { + for (const syn of VIETNAMESE_SYNONYMS) { + await this.client + .collections(COLLECTION_NAME) + .synonyms() + .upsert(syn.id, { synonyms: syn.synonyms }); + } + this.logger.log( + `Upserted ${VIETNAMESE_SYNONYMS.length} Vietnamese synonym rules`, + 'TypesenseSearch', + ); + } catch (err) { + this.logger.warn( + `Failed to upsert synonyms: ${err instanceof Error ? err.message : String(err)}`, + 'TypesenseSearch', + ); + } } async dropCollection(): Promise { @@ -121,14 +191,23 @@ export class TypesenseSearchRepository implements ISearchRepository { filterBy = filterBy ? `${filterBy} && ${geoFilter}` : geoFilter; } + const rawQuery = params.query || '*'; + // For non-wildcard queries, also search the normalized (ASCII) form + // so "can ho" matches "căn hộ" via the normalized fields. + const normalizedQuery = rawQuery !== '*' ? Address.normalize(rawQuery) : rawQuery; + const effectiveQuery = rawQuery !== '*' && normalizedQuery !== rawQuery + ? `${rawQuery} ${normalizedQuery}` + : rawQuery; + const searchParams = { - q: params.query || '*', - query_by: 'title,description,address,district,city,projectName', - query_by_weights: '5,3,2,2,1,2', + q: effectiveQuery, + query_by: 'title,description,address,district,city,projectName,titleNormalized,descriptionNormalized,addressNormalized,districtNormalized,cityNormalized,projectNameNormalized', + query_by_weights: '5,3,2,2,1,2,5,3,2,2,1,2', filter_by: filterBy, sort_by: this.buildSortBy(params), page, per_page: perPage, + num_typos: '2', highlight_full_fields: 'title,description', highlight_start_tag: '', highlight_end_tag: '',