/** * Sync OSM points-of-interest into the `Poi` table. * * Usage: * NODE_OPTIONS="-r dotenv/config" DOTENV_CONFIG_PATH=.env \ * pnpm tsx scripts/sync-osm-poi.ts \ * [--category=school,hospital,...|all] [--chunk=NAME] [--dry-run] * * What it does: * 1. For each requested category, queries Overpass for the matching * node/way/relation across the 4 Vietnam chunks. * 2. Filters out non-Vietnam centroids (cross-border bleed) and rows * without any Latin/Vietnamese letters in the name. * 3. Resolves provinceCode/districtCode/wardCode via PostGIS lookup * against `vn_provinces` / `vn_districts` / `vn_wards` (assumes * Phase 0 boundary sync ran first). * 4. Upserts on `osmId`, honouring `osmLocked` + `lockedFields`. */ import 'dotenv/config'; import area from '@turf/area'; import centroid from '@turf/centroid'; import { createId } from '@paralleldrive/cuid2'; import { PrismaPg } from '@prisma/adapter-pg'; import { type Prisma, PrismaClient } from '@prisma/client'; import type { Feature, MultiPolygon, Polygon, Point } from 'geojson'; import osmtogeojson from 'osmtogeojson'; import pg from 'pg'; import { isPointInVietnam } from './data/vn-country-polygon'; const pool = new pg.Pool({ connectionString: process.env['DATABASE_URL'] }); const adapter = new PrismaPg(pool); const prisma = new PrismaClient({ adapter }); const OVERPASS_URL = process.env['OVERPASS_URL'] ?? 'https://overpass-api.de/api/interpreter'; interface BBox { south: number; west: number; north: number; east: number; } const CHUNKS: Record = { north: { south: 19.0, west: 102.0, north: 23.5, east: 110.0 }, northCentral: { south: 16.5, west: 102.0, north: 19.0, east: 110.0 }, southCentral: { south: 13.0, west: 102.0, north: 16.5, east: 110.0 }, south: { south: 8.0, west: 102.0, north: 13.0, east: 110.0 }, }; type PoiCategoryKey = | 'SCHOOL_PRIMARY' | 'SCHOOL_SECONDARY' | 'UNIVERSITY' | 'HOSPITAL' | 'CLINIC' | 'PHARMACY' | 'MARKET' | 'SUPERMARKET' | 'MALL' | 'CONVENIENCE' | 'BANK' | 'ATM' | 'PARK' | 'GAS_STATION' | 'POLICE' | 'POST_OFFICE' | 'METRO_STATION' | 'RAILWAY_STATION' | 'BUS_STATION' | 'AIRPORT'; /** * For each category, the Overpass selector. We query node/way/relation * to catch both single points and named building polygons. */ const CATEGORY_QUERIES: Record = { // ── Education ───────────────────────────────────────────────────────── SCHOOL_PRIMARY: '["amenity"="school"]["isced:level"~"^(primary|0|1)$"]', SCHOOL_SECONDARY: '["amenity"="school"]["isced:level"~"^(secondary|2|3)$"]', UNIVERSITY: '["amenity"~"^(university|college)$"]', // ── Health ──────────────────────────────────────────────────────────── HOSPITAL: '["amenity"="hospital"]', CLINIC: '["amenity"="clinic"]', PHARMACY: '["amenity"="pharmacy"]', // ── Commerce ────────────────────────────────────────────────────────── MARKET: '["amenity"="marketplace"]', SUPERMARKET: '["shop"="supermarket"]', MALL: '["shop"="mall"]', CONVENIENCE: '["shop"="convenience"]', // ── Finance ─────────────────────────────────────────────────────────── BANK: '["amenity"="bank"]', ATM: '["amenity"="atm"]', // ── Recreation / Services ──────────────────────────────────────────── PARK: '["leisure"="park"]', GAS_STATION: '["amenity"="fuel"]', POLICE: '["amenity"="police"]', POST_OFFICE: '["amenity"="post_office"]', // ── Transport (stations / airports — lines live in TransportLine) ──── METRO_STATION: '["railway"="station"]["station"="subway"]', RAILWAY_STATION: '["railway"="station"]["station"!="subway"]', BUS_STATION: '["amenity"="bus_station"]', AIRPORT: '["aeroway"="aerodrome"]["aerodrome:type"~"international|public"]', }; const ALL_CATEGORIES: PoiCategoryKey[] = Object.keys(CATEGORY_QUERIES) as PoiCategoryKey[]; // ─── CLI ─────────────────────────────────────────────────────────────────── const argv = process.argv.slice(2); const dryRun = argv.includes('--dry-run'); const chunkArg = argv.find((a) => a.startsWith('--chunk='))?.slice('--chunk='.length); const categoryArg = argv.find((a) => a.startsWith('--category='))?.slice('--category='.length) ?? 'all'; const wantedCategories: PoiCategoryKey[] = categoryArg === 'all' ? ALL_CATEGORIES : (categoryArg .split(',') .map((s) => s.trim().toUpperCase()) .filter((s): s is PoiCategoryKey => ALL_CATEGORIES.includes(s as PoiCategoryKey)) as PoiCategoryKey[]); if (wantedCategories.length === 0) { console.error(`No valid categories. Available: ${ALL_CATEGORIES.join(', ')}`); process.exit(1); } interface OverpassResult { elements: unknown[]; } async function fetchChunk( category: PoiCategoryKey, chunkName: string, bbox: BBox, ): Promise { const sel = CATEGORY_QUERIES[category]; const query = ` [out:json][timeout:180]; ( node${sel}(${bbox.south},${bbox.west},${bbox.north},${bbox.east}); way${sel}(${bbox.south},${bbox.west},${bbox.north},${bbox.east}); relation${sel}(${bbox.south},${bbox.west},${bbox.north},${bbox.east}); ); out body geom; `; const t0 = Date.now(); console.log(` → ${category} ${chunkName}…`); const res = await fetch(OVERPASS_URL, { method: 'POST', headers: { 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'goodgo-osm-poi-sync/1.0 (https://goodgo.vn)', }, body: 'data=' + encodeURIComponent(query), }); if (!res.ok) { const body = await res.text(); throw new Error(`Overpass ${res.status}: ${body.slice(0, 200)}`); } const json = (await res.json()) as OverpassResult; console.log( ` ← ${category} ${chunkName}: ${json.elements?.length ?? 0} elements in ${( (Date.now() - t0) / 1000 ).toFixed(1)}s`, ); return json; } interface ParsedPoi { category: PoiCategoryKey; osmId: bigint; osmType: 'NODE' | 'WAY' | 'RELATION'; name: string; nameEn: string | null; centroid: { lng: number; lat: number }; address: string | null; tags: Record; } function parseFeature( feat: Feature, category: PoiCategoryKey, ): ParsedPoi | null { const featAny = feat as unknown as { id?: unknown }; const idStr = String(featAny.id ?? ''); const slashIdx = idStr.indexOf('/'); if (slashIdx < 0) return null; const typeStr = idStr.slice(0, slashIdx).toUpperCase(); if (typeStr !== 'NODE' && typeStr !== 'WAY' && typeStr !== 'RELATION') return null; const osmType = typeStr as 'NODE' | 'WAY' | 'RELATION'; const osmId = BigInt(idStr.slice(slashIdx + 1)); const propsRaw = (feat.properties ?? {}) as Record; const tagsRaw = propsRaw['tags']; const tags: Record = tagsRaw && typeof tagsRaw === 'object' ? (tagsRaw as Record) : (propsRaw as Record); const name = tags['name:vi'] ?? tags['name'] ?? null; // Skip purely unnamed POIs (very common for shop=convenience etc.) if (!name) return null; // Skip rows without Latin/Vietnamese letters (cross-border bleed). if (!/[A-Za-zÀ-ỹ]/.test(name)) return null; let cLng: number; let cLat: number; if (feat.geometry.type === 'Point') { [cLng, cLat] = feat.geometry.coordinates; } else { const c = centroid(feat as Feature); [cLng, cLat] = c.geometry.coordinates; } if (!isPointInVietnam(cLng, cLat)) return null; const address = tags['addr:full'] ?? [tags['addr:housenumber'], tags['addr:street']].filter(Boolean).join(' ') ?? null; return { category, osmId, osmType, name, nameEn: tags['name:en'] ?? null, centroid: { lng: cLng, lat: cLat }, address: address || null, tags, }; } interface UpsertStats { inserted: number; updated: number; locked: number; skipped: number; } async function upsertPoi(parsed: ParsedPoi, stats: UpsertStats): Promise { const existing = await prisma.poi.findUnique({ where: { osmId: parsed.osmId }, select: { id: true, osmLocked: true, lockedFields: true }, }); if (existing?.osmLocked) { stats.locked++; return; } // Resolve admin codes from the polygon tables. const admin = await prisma.$queryRawUnsafe< { provinceCode: string | null; districtCode: string | null; wardCode: string | null }[] >( `WITH p AS ( SELECT code FROM "vn_provinces" WHERE geometry IS NOT NULL AND ST_Contains(geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326)) LIMIT 1 ), d AS ( SELECT d.code FROM "vn_districts" d JOIN p ON p.code = d."provinceCode" WHERE d.geometry IS NOT NULL AND ST_Contains(d.geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326)) LIMIT 1 ) SELECT (SELECT code FROM p) AS "provinceCode", (SELECT code FROM d) AS "districtCode", (SELECT w.code FROM "vn_wards" w JOIN d ON d.code = w."districtCode" WHERE w.geometry IS NOT NULL AND ST_Contains(w.geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326)) LIMIT 1) AS "wardCode"`, parsed.centroid.lng, parsed.centroid.lat, ); const provinceCode = admin[0]?.provinceCode ?? null; const districtCode = admin[0]?.districtCode ?? null; const wardCode = admin[0]?.wardCode ?? null; if (!existing) { const cuid = createId(); await prisma.$executeRawUnsafe( ` INSERT INTO "Poi" ( id, category, name, "nameEn", location, address, "provinceCode", "districtCode", "wardCode", "osmId", "osmType", "osmTags", "dataSource", "isPublic", "lastSyncedAt", "createdAt", "updatedAt" ) VALUES ( $1, $2::"PoiCategory", $3, $4, ST_SetSRID(ST_MakePoint($5, $6), 4326), $7, $8, $9, $10, $11::bigint, $12::"OsmType", $13::jsonb, 'OSM'::"OsmDataSource", true, NOW(), NOW(), NOW() ) `, cuid, parsed.category, parsed.name, parsed.nameEn, parsed.centroid.lng, parsed.centroid.lat, parsed.address, provinceCode, districtCode, wardCode, parsed.osmId.toString(), parsed.osmType, JSON.stringify(parsed.tags), ); stats.inserted++; } else { // Update — respect lockedFields list. const locked = new Set(existing.lockedFields ?? []); const data: Prisma.PoiUpdateInput = { lastSyncedAt: new Date(), osmTags: JSON.stringify(parsed.tags) as unknown as Prisma.InputJsonValue, }; if (!locked.has('name')) data.name = parsed.name; if (!locked.has('nameEn')) data.nameEn = parsed.nameEn; if (!locked.has('address')) data.address = parsed.address; if (!locked.has('provinceCode')) data.provinceCode = provinceCode; if (!locked.has('districtCode')) data.districtCode = districtCode; if (!locked.has('wardCode')) data.wardCode = wardCode; await prisma.poi.update({ where: { id: existing.id }, data }); // Location update via raw SQL (Prisma can't write `Unsupported` columns). if (!locked.has('location')) { await prisma.$executeRawUnsafe( `UPDATE "Poi" SET location = ST_SetSRID(ST_MakePoint($1, $2), 4326) WHERE id = $3`, parsed.centroid.lng, parsed.centroid.lat, existing.id, ); } stats.updated++; } } async function processCategoryChunk( category: PoiCategoryKey, chunkName: string, bbox: BBox, stats: UpsertStats, ): Promise { const result = await fetchChunk(category, chunkName, bbox); const fc = osmtogeojson(result, { flatProperties: false }); const features = (fc.features as Feature[]).filter( (f) => f.geometry?.type === 'Point' || f.geometry?.type === 'Polygon' || f.geometry?.type === 'MultiPolygon', ); for (const feat of features) { const parsed = parseFeature(feat, category); if (!parsed) continue; if (dryRun) { stats.inserted++; continue; } try { await upsertPoi(parsed, stats); } catch (err) { console.error( ` ✗ ${category} ${parsed.name}: ${err instanceof Error ? err.message : err}`, ); stats.skipped++; } } } async function main(): Promise { console.log(`📍 OSM POI sync: categories=${wantedCategories.join(',')} dryRun=${dryRun}`); const chunks = chunkArg ? { [chunkArg]: CHUNKS[chunkArg]! } : CHUNKS; const totals: Record = {}; for (const cat of wantedCategories) { console.log(`\n=== ${cat} ===`); const s: UpsertStats = { inserted: 0, updated: 0, locked: 0, skipped: 0 }; for (const [name, bbox] of Object.entries(chunks)) { try { await processCategoryChunk(cat, name, bbox, s); } catch (err) { console.error(` ✗ chunk ${name} (${cat}) failed:`, err); } } totals[cat] = s; console.log( ` ✓ ${cat}: inserted=${s.inserted} updated=${s.updated} locked=${s.locked} skipped=${s.skipped}`, ); } console.log('\n📊 Totals:'); for (const cat of wantedCategories) { const s = totals[cat]!; console.log( ` ${cat.padEnd(20)} inserted=${s.inserted} updated=${s.updated} locked=${s.locked} skipped=${s.skipped}`, ); } } main() .catch((err) => { console.error(err); process.exitCode = 1; }) .finally(async () => { await prisma.$disconnect(); await pool.end(); });