/** * Sync Vietnam administrative boundaries from OpenStreetMap into the * `vn_provinces` / `vn_districts` / `vn_wards` tables. * * Usage: * NODE_OPTIONS="-r dotenv/config" DOTENV_CONFIG_PATH=.env \ * pnpm tsx scripts/sync-osm-admin-boundaries.ts \ * [--level=4|6|8|all] [--dry-run] [--chunk=NAME] * * What it does: * 1. Queries Overpass for `boundary=administrative + admin_level=N` * relations clipped to the Vietnam bbox (split into 4 chunks). * 2. Converts each relation's outer rings into a MultiPolygon GeoJSON. * 3. Looks up the GSO code from OSM tags (`ref:VN`, `gso_code`, * `iso_code`, fallback to slugified name → existing seed row). * 4. Upserts the row, writing geometry + centroid + areaKm2 + osmId. * * Coverage targets: * admin_level=4 → 63 provinces (cities of central authority + 58 tỉnh) * admin_level=6 → ~700 districts (quận / huyện / thị xã / TP thuộc tỉnh) * admin_level=8 → ~11.000 wards (phường / xã / thị trấn) * * Notes: * • Vietnam reformed wards in 2025 (some merged). We track historic * names via `vn_administrative_aliases` — this script populates that * table when an OSM tag `was:name` differs from the current name. * • Wards (level 8) are the heaviest pull (~11k polygons). We always * chunk them into 4 geographic slices to dodge Overpass timeouts. */ import 'dotenv/config'; import { PrismaPg } from '@prisma/adapter-pg'; import { PrismaClient } from '@prisma/client'; import area from '@turf/area'; import centroid from '@turf/centroid'; import type { Feature, MultiPolygon, Polygon } from 'geojson'; import osmtogeojson from 'osmtogeojson'; import pg from 'pg'; import { isPointInVietnam } from './data/vn-country-polygon'; const pool = new pg.Pool({ connectionString: process.env['DATABASE_URL'] }); const adapter = new PrismaPg(pool); const prisma = new PrismaClient({ adapter }); const OVERPASS_URL = process.env['OVERPASS_URL'] ?? 'https://overpass-api.de/api/interpreter'; interface BBox { south: number; west: number; north: number; east: number; } /** Same chunks the KCN sync uses — keeps Overpass query budget reasonable. */ const CHUNKS: Record = { north: { south: 19.0, west: 102.0, north: 23.5, east: 110.0 }, northCentral: { south: 16.5, west: 102.0, north: 19.0, east: 110.0 }, southCentral: { south: 13.0, west: 102.0, north: 16.5, east: 110.0 }, south: { south: 8.0, west: 102.0, north: 13.0, east: 110.0 }, }; // ─── CLI ────────────────────────────────────────────────────────────────── const argv = process.argv.slice(2); const dryRun = argv.includes('--dry-run'); const chunkArg = argv.find((a) => a.startsWith('--chunk='))?.slice('--chunk='.length); const levelArg = argv.find((a) => a.startsWith('--level='))?.slice('--level='.length) ?? 'all'; const wantedLevels: number[] = levelArg === 'all' ? [4, 6, 8] : levelArg .split(',') .map((s) => Number(s.trim())) .filter((n) => [4, 6, 8].includes(n)); // ─── Slug helper (matches GSO codename style) ────────────────────────────── function slugify(name: string): string { return name .toLowerCase() .replace(/đ/g, 'd') .normalize('NFD') .replace(/[̀-ͯ]/g, '') .replace(/[^a-z0-9]+/g, '_') .replace(/^_+|_+$/g, ''); } // ─── Overpass fetch ──────────────────────────────────────────────────────── interface OverpassResult { elements: unknown[]; } async function fetchChunk(level: number, name: string, bbox: BBox): Promise { // `out geom` returns the relation members with inline geometry so we can // assemble polygons without a second roundtrip. Timeout 300s for level=8. const query = ` [out:json][timeout:300]; relation ["boundary"="administrative"] ["admin_level"="${level}"] (${bbox.south},${bbox.west},${bbox.north},${bbox.east}); out body geom; `; console.log(` → fetching level=${level} chunk="${name}"…`); const start = Date.now(); const res = await fetch(OVERPASS_URL, { method: 'POST', headers: { 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'goodgo-osm-admin-sync/1.0 (https://goodgo.vn)', }, body: 'data=' + encodeURIComponent(query), }); if (!res.ok) { const body = await res.text(); throw new Error(`Overpass returned ${res.status}: ${body.slice(0, 200)}`); } const json = (await res.json()) as OverpassResult; console.log( ` ← level=${level} ${name}: ${json.elements?.length ?? 0} relations in ${( (Date.now() - start) / 1000 ).toFixed(1)}s`, ); return json; } // ─── Per-feature parser ──────────────────────────────────────────────────── interface ParsedAdmin { level: 4 | 6 | 8; osmId: bigint; name: string; nameEn: string | null; gsoCode: string | null; type: string; // "Tỉnh" / "Quận" / "Phường" etc. geometry: MultiPolygon; // outer rings only centroid: { lng: number; lat: number }; areaKm2: number; population: number | null; rawTags: Record; } const PROVINCE_TYPE_MAP = (name: string): string => /^(Thành phố|TP\.?)\s+(Hà Nội|Hồ Chí Minh|Hải Phòng|Đà Nẵng|Cần Thơ)/i.test(name) ? 'Thành phố Trung ương' : 'Tỉnh'; const DISTRICT_TYPE_MAP = (name: string): string => { if (/^Quận/i.test(name)) return 'Quận'; if (/^Huyện/i.test(name)) return 'Huyện'; if (/^Thị xã/i.test(name)) return 'Thị xã'; if (/^Thành phố/i.test(name)) return 'Thành phố thuộc tỉnh'; return 'Quận'; }; const WARD_TYPE_MAP = (name: string): string => { if (/^Phường/i.test(name)) return 'Phường'; if (/^Xã/i.test(name)) return 'Xã'; if (/^Thị trấn/i.test(name)) return 'Thị trấn'; return 'Xã'; }; function parseFeature( feat: Feature, level: 4 | 6 | 8, ): ParsedAdmin | null { const propsRaw = feat.properties as Record | null; if (!propsRaw) return null; // osmtogeojson encodes the prefixed id on `feat.id` ("relation/123") and // the bare numeric id under `properties.id`. We only kept relations. const featAny = feat as unknown as { id?: unknown }; const idStr = String(featAny.id ?? propsRaw['id'] ?? ''); if (!idStr.startsWith('relation/')) return null; const osmId = BigInt(idStr.slice('relation/'.length)); const tagsRaw = propsRaw['tags']; const tags: Record = tagsRaw && typeof tagsRaw === 'object' ? (tagsRaw as Record) : (propsRaw as Record); const name = tags['name:vi'] ?? tags['name'] ?? null; if (!name) return null; // Skip rows without any Latin/Vietnamese letter (cross-border bleed). if (!/[A-Za-zÀ-ỹ]/.test(name)) return null; const nameEn = tags['name:en'] ?? null; const gsoCode = tags['ref:VN'] ?? tags['gso_code'] ?? tags['ref'] ?? tags['iso_code'] ?? null; const populationRaw = tags['population']; const population = populationRaw && /^\d+$/.test(populationRaw) ? Number(populationRaw) : null; // Normalise to MultiPolygon regardless of source (Polygon → wrap once). const geom: MultiPolygon = feat.geometry.type === 'Polygon' ? { type: 'MultiPolygon', coordinates: [feat.geometry.coordinates] } : feat.geometry; const c = centroid(feat as Feature); const [cLng, cLat] = c.geometry.coordinates; // Geographic gate: drop relations whose centroid sits outside the VN // mainland polygon (China / Laos / Cambodia bleed across the bbox). if (!isPointInVietnam(cLng, cLat)) return null; const areaKm2 = Math.round((area(feat as Feature) / 1_000_000) * 100) / 100; let type: string; if (level === 4) type = PROVINCE_TYPE_MAP(name); else if (level === 6) type = DISTRICT_TYPE_MAP(name); else type = WARD_TYPE_MAP(name); return { level, osmId, name, nameEn, gsoCode, type, geometry: geom, centroid: { lng: cLng, lat: cLat }, areaKm2, population, rawTags: tags, }; } // ─── Resolve to existing GSO code or generate a synthetic one ───────────── async function resolveProvinceCode(p: ParsedAdmin): Promise { if (p.gsoCode) { const exists = await prisma.vnProvince.findUnique({ where: { code: p.gsoCode } }); if (exists) return p.gsoCode; } // Fallback: lookup by codename slug. const codename = slugify(p.name); const byCodename = await prisma.vnProvince.findFirst({ where: { codename } }); if (byCodename) return byCodename.code; // Brand-new: derive a code from osmId so it's stable. return `OSM_${p.osmId.toString()}`; } async function resolveDistrictCode(p: ParsedAdmin, provinceCode: string): Promise { if (p.gsoCode) { const exists = await prisma.vnDistrict.findUnique({ where: { code: p.gsoCode } }); if (exists) return p.gsoCode; } const codename = slugify(p.name); const byCodename = await prisma.vnDistrict.findFirst({ where: { codename, provinceCode }, }); if (byCodename) return byCodename.code; return `OSM_${p.osmId.toString()}`; } async function resolveWardCode(p: ParsedAdmin, districtCode: string): Promise { if (p.gsoCode) { const exists = await prisma.vnWard.findUnique({ where: { code: p.gsoCode } }); if (exists) return p.gsoCode; } const codename = slugify(p.name); const byCodename = await prisma.vnWard.findFirst({ where: { codename, districtCode }, }); if (byCodename) return byCodename.code; return `OSM_${p.osmId.toString()}`; } // ─── Upsert helpers — raw SQL because Prisma can't manage geometry ──────── function geomSql(g: MultiPolygon): string { const json = JSON.stringify(g).replace(/'/g, "''"); return `ST_Multi(ST_GeomFromGeoJSON('${json}'))`; } interface UpsertStats { inserted: number; updated: number; skipped: number; } async function upsertProvince(p: ParsedAdmin, stats: UpsertStats): Promise { const code = await resolveProvinceCode(p); const codename = slugify(p.name); const existed = await prisma.vnProvince.findUnique({ where: { code }, select: { code: true } }); await prisma.$executeRawUnsafe( ` INSERT INTO "vn_provinces" ( code, name, "nameEn", type, codename, "osmId", "areaKm2", population, "lastSyncedAt", "updatedAt", geometry, centroid ) VALUES ( $1, $2, $3, $4, $5, $6::bigint, $7, $8, NOW(), NOW(), ${geomSql(p.geometry)}, ST_SetSRID(ST_MakePoint($9, $10), 4326) ) ON CONFLICT (code) DO UPDATE SET name = EXCLUDED.name, "nameEn" = EXCLUDED."nameEn", type = EXCLUDED.type, "osmId" = EXCLUDED."osmId", "areaKm2" = EXCLUDED."areaKm2", population = COALESCE(EXCLUDED.population, "vn_provinces".population), "lastSyncedAt" = NOW(), "updatedAt" = NOW(), geometry = EXCLUDED.geometry, centroid = EXCLUDED.centroid `, code, p.name, p.nameEn, p.type, codename, p.osmId.toString(), p.areaKm2, p.population, p.centroid.lng, p.centroid.lat, ); if (existed) stats.updated++; else stats.inserted++; } async function upsertDistrict(p: ParsedAdmin, stats: UpsertStats): Promise { // Find which province contains this district by ST_Within against existing // synced province polygons. Falls back to province with largest overlap. const provinceMatch = await prisma.$queryRawUnsafe<{ code: string }[]>( `SELECT code FROM "vn_provinces" WHERE geometry IS NOT NULL AND ST_Contains(geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326)) LIMIT 1`, p.centroid.lng, p.centroid.lat, ); if (provinceMatch.length === 0) { stats.skipped++; return; // Cannot place district until provinces are synced first. } const provinceCode = provinceMatch[0]!.code; const code = await resolveDistrictCode(p, provinceCode); const codename = slugify(p.name); const existed = await prisma.vnDistrict.findUnique({ where: { code }, select: { code: true } }); await prisma.$executeRawUnsafe( ` INSERT INTO "vn_districts" ( code, "provinceCode", name, "nameEn", type, codename, "osmId", "areaKm2", population, "lastSyncedAt", "updatedAt", geometry, centroid ) VALUES ( $1, $2, $3, $4, $5, $6, $7::bigint, $8, $9, NOW(), NOW(), ${geomSql(p.geometry)}, ST_SetSRID(ST_MakePoint($10, $11), 4326) ) ON CONFLICT (code) DO UPDATE SET "provinceCode" = EXCLUDED."provinceCode", name = EXCLUDED.name, "nameEn" = EXCLUDED."nameEn", type = EXCLUDED.type, "osmId" = EXCLUDED."osmId", "areaKm2" = EXCLUDED."areaKm2", population = COALESCE(EXCLUDED.population, "vn_districts".population), "lastSyncedAt" = NOW(), "updatedAt" = NOW(), geometry = EXCLUDED.geometry, centroid = EXCLUDED.centroid `, code, provinceCode, p.name, p.nameEn, p.type, codename, p.osmId.toString(), p.areaKm2, p.population, p.centroid.lng, p.centroid.lat, ); if (existed) stats.updated++; else stats.inserted++; } async function upsertWard(p: ParsedAdmin, stats: UpsertStats): Promise { const districtMatch = await prisma.$queryRawUnsafe<{ code: string }[]>( `SELECT code FROM "vn_districts" WHERE geometry IS NOT NULL AND ST_Contains(geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326)) LIMIT 1`, p.centroid.lng, p.centroid.lat, ); if (districtMatch.length === 0) { stats.skipped++; return; } const districtCode = districtMatch[0]!.code; const code = await resolveWardCode(p, districtCode); const codename = slugify(p.name); const existed = await prisma.vnWard.findUnique({ where: { code }, select: { code: true } }); await prisma.$executeRawUnsafe( ` INSERT INTO "vn_wards" ( code, "districtCode", name, "nameEn", type, codename, "osmId", "areaKm2", population, "lastSyncedAt", "updatedAt", geometry, centroid ) VALUES ( $1, $2, $3, $4, $5, $6, $7::bigint, $8, $9, NOW(), NOW(), ${geomSql(p.geometry)}, ST_SetSRID(ST_MakePoint($10, $11), 4326) ) ON CONFLICT (code) DO UPDATE SET "districtCode" = EXCLUDED."districtCode", name = EXCLUDED.name, "nameEn" = EXCLUDED."nameEn", type = EXCLUDED.type, "osmId" = EXCLUDED."osmId", "areaKm2" = EXCLUDED."areaKm2", population = COALESCE(EXCLUDED.population, "vn_wards".population), "lastSyncedAt" = NOW(), "updatedAt" = NOW(), geometry = EXCLUDED.geometry, centroid = EXCLUDED.centroid `, code, districtCode, p.name, p.nameEn, p.type, codename, p.osmId.toString(), p.areaKm2, p.population, p.centroid.lng, p.centroid.lat, ); if (existed) stats.updated++; else stats.inserted++; } // ─── Main ───────────────────────────────────────────────────────────────── async function processChunk( level: 4 | 6 | 8, chunkName: string, bbox: BBox, ): Promise { const stats: UpsertStats = { inserted: 0, updated: 0, skipped: 0 }; const result = await fetchChunk(level, chunkName, bbox); const fc = osmtogeojson(result, { flatProperties: false }); const features = (fc.features as Feature[]).filter( (f) => f.geometry?.type === 'Polygon' || f.geometry?.type === 'MultiPolygon', ); for (const feat of features) { const parsed = parseFeature(feat, level); if (!parsed) continue; if (dryRun) { stats.inserted++; continue; } try { if (level === 4) await upsertProvince(parsed, stats); else if (level === 6) await upsertDistrict(parsed, stats); else await upsertWard(parsed, stats); } catch (err) { console.error(` ✗ ${parsed.name}: ${err instanceof Error ? err.message : err}`); stats.skipped++; } } console.log( ` ✓ level=${level} ${chunkName}: inserted=${stats.inserted} updated=${stats.updated} skipped=${stats.skipped}`, ); return stats; } async function main(): Promise { console.log('🌏 OSM admin boundaries sync starting'); console.log(` levels: ${wantedLevels.join(',')}, chunks: ${chunkArg ?? 'all'}, dryRun=${dryRun}`); const chunks = chunkArg ? { [chunkArg]: CHUNKS[chunkArg]! } : CHUNKS; const totals: Record = { 4: { inserted: 0, updated: 0, skipped: 0 }, 6: { inserted: 0, updated: 0, skipped: 0 }, 8: { inserted: 0, updated: 0, skipped: 0 }, }; // ALWAYS process levels in order 4 → 6 → 8, because 6 needs province // polygons in the DB to assign provinceCode (and 8 needs districts). for (const level of wantedLevels.sort() as (4 | 6 | 8)[]) { console.log(`\n=== Level ${level} ===`); for (const [name, bbox] of Object.entries(chunks)) { try { const s = await processChunk(level, name, bbox); totals[level]!.inserted += s.inserted; totals[level]!.updated += s.updated; totals[level]!.skipped += s.skipped; } catch (err) { console.error(` ✗ chunk ${name} (level ${level}) failed:`, err); } } } console.log('\n📊 Totals'); for (const lvl of wantedLevels) { const t = totals[lvl]!; console.log( ` level=${lvl}: inserted=${t.inserted} updated=${t.updated} skipped=${t.skipped}`, ); } } main() .catch((err) => { console.error(err); process.exitCode = 1; }) .finally(async () => { await prisma.$disconnect(); await pool.end(); });