/** * OSM → goodgo Industrial Park bulk sync (PR 2/4 of OSM-sync project). * * Usage: * NODE_OPTIONS="-r dotenv/config" DOTENV_CONFIG_PATH=.env \ * pnpm tsx scripts/sync-osm-industrial-parks.ts [--dry-run] [--chunk=north] * * Flags: * --dry-run Fetch from Overpass + show counts, don't write to DB. * --chunk=NAME Process only one chunk (north|northCentral|southCentral|south). * Default: process all four. * * Strategy: * • Vietnam is split into 4 horizontal bbox slices (Overpass times out * on the whole country at once). * • For each chunk we ask Overpass for `landuse=industrial` ways + * relations + nodes within the bbox, with inline geometry. * • osmtogeojson normalises the OSM JSON into a GeoJSON FeatureCollection. * • For each Feature we upsert a row keyed on `osmId`. The schema * guarantees uniqueness via the constraint added in PR 1. * • PostGIS columns (`location` Point + `boundary` MultiPolygon) are * written via raw SQL because Prisma cannot manage Geometry types. * • Nodes get `location` only — no boundary. * • Conflict resolution: respect `osmLocked` and `lockedFields` from PR 1. */ import 'dotenv/config'; import { createId } from '@paralleldrive/cuid2'; import { PrismaPg } from '@prisma/adapter-pg'; import { type Prisma, PrismaClient } from '@prisma/client'; import area from '@turf/area'; import centroid from '@turf/centroid'; import type { Feature, MultiPolygon, Polygon, Point } from 'geojson'; import osmtogeojson from 'osmtogeojson'; import pg from 'pg'; import { isPointInVietnam } from './data/vn-country-polygon'; import { nearestProvince } from './data/vn-province-centroids'; const generateCuid = (): Promise => Promise.resolve(createId()); // ─── Setup ──────────────────────────────────────────────────────────────── const pool = new pg.Pool({ connectionString: process.env['DATABASE_URL'] }); const adapter = new PrismaPg(pool); const prisma = new PrismaClient({ adapter }); const OVERPASS_URL = process.env['OVERPASS_URL'] ?? 'https://overpass-api.de/api/interpreter'; interface BBox { south: number; west: number; north: number; east: number; } /** Vietnam split horizontally — Overpass timeouts at country scale. */ const CHUNKS: Record = { north: { south: 19.0, west: 102.0, north: 23.5, east: 110.0 }, northCentral: { south: 16.5, west: 102.0, north: 19.0, east: 110.0 }, southCentral: { south: 13.0, west: 102.0, north: 16.5, east: 110.0 }, south: { south: 8.0, west: 102.0, north: 13.0, east: 110.0 }, }; // ─── CLI flags ──────────────────────────────────────────────────────────── const argv = process.argv.slice(2); const dryRun = argv.includes('--dry-run'); const chunkArg = argv.find((a) => a.startsWith('--chunk='))?.slice('--chunk='.length); // ─── Province / region heuristic from centroid ──────────────────────────── /** Approximate centroid → region using latitude bands. Good enough as a * fallback when the OSM tags lack `addr:state`; admin can correct later. */ function guessRegion(lat: number): 'NORTH' | 'CENTRAL' | 'SOUTH' { if (lat >= 19) return 'NORTH'; if (lat >= 13) return 'CENTRAL'; return 'SOUTH'; } // ─── Helpers ────────────────────────────────────────────────────────────── function slugify(name: string, osmId: string): string { const base = name .toLowerCase() .replace(/đ/g, 'd') // strip Vietnamese diacritics .normalize('NFD') .replace(/[̀-ͯ]/g, '') .replace(/[^a-z0-9]+/g, '-') .replace(/^-+|-+$/g, '') .slice(0, 60); return `${base || 'kcn'}-osm-${osmId}`; } interface OverpassResult { elements: unknown[]; } async function fetchChunk(name: string, bbox: BBox): Promise { const query = ` [out:json][timeout:180]; ( way["landuse"="industrial"](${bbox.south},${bbox.west},${bbox.north},${bbox.east}); relation["landuse"="industrial"](${bbox.south},${bbox.west},${bbox.north},${bbox.east}); node["landuse"="industrial"](${bbox.south},${bbox.west},${bbox.north},${bbox.east}); ); out body geom; `; console.log(` → fetching chunk "${name}" from Overpass…`); const start = Date.now(); // Overpass expects the query in a `data=` body field with form-encoding. const res = await fetch(OVERPASS_URL, { method: 'POST', headers: { 'Content-Type': 'application/x-www-form-urlencoded', 'User-Agent': 'goodgo-osm-sync/1.0 (https://goodgo.vn)', }, body: 'data=' + encodeURIComponent(query), }); if (!res.ok) { const body = await res.text(); throw new Error(`Overpass returned ${res.status}: ${body.slice(0, 200)}`); } const json = (await res.json()) as OverpassResult; console.log( ` ← ${name}: ${json.elements?.length ?? 0} elements in ${( (Date.now() - start) / 1000 ).toFixed(1)}s`, ); return json; } // ─── Per-feature upsert ────────────────────────────────────────────────── interface ParsedFeature { osmId: bigint; osmType: 'NODE' | 'WAY' | 'RELATION'; osmVersion: number | null; name: string; nameEn: string | null; developer: string; operator: string | null; tags: Record; centroid: { lng: number; lat: number }; /** GeoJSON Polygon|MultiPolygon as an unparsed JSON string for ST_GeomFromGeoJSON. */ boundaryGeoJson: string | null; totalAreaHa: number; province: string; district: string; address: string; } /** OSM tag keys whose Vietnamese-province values we recognise. */ const VN_PROVINCE_HINTS = ['addr:province', 'addr:state', 'addr:region', 'is_in:province']; function parseFeature( feat: Feature, ): ParsedFeature | null { const propsRaw = feat.properties as Record | null; if (!propsRaw) return null; const idStr = String(propsRaw['id'] ?? ''); // osmtogeojson encodes id as "way/123", "relation/456", "node/789" const slashIdx = idStr.indexOf('/'); if (slashIdx < 0) return null; const typeRaw = idStr.slice(0, slashIdx).toUpperCase(); if (typeRaw !== 'NODE' && typeRaw !== 'WAY' && typeRaw !== 'RELATION') { return null; } const osmType = typeRaw as 'NODE' | 'WAY' | 'RELATION'; const osmId = BigInt(idStr.slice(slashIdx + 1)); const tagsRaw = propsRaw['tags']; const tags: Record = tagsRaw && typeof tagsRaw === 'object' ? (tagsRaw as Record) : (propsRaw as Record); const name = tags['name:vi'] ?? tags['name'] ?? null; // Skip purely unnamed industrial polygons — too noisy for our catalog. if (!name) return null; // Skip rows whose names contain zero Latin / Vietnamese letters. This // catches polygons that bleed across the northern border (Quảng Ninh / // Lạng Sơn bbox edges) and have only CJK names — those are Chinese // industrial sites, not VN KCN. if (!/[A-Za-zÀ-ỹ]/.test(name)) return null; // We compute the centroid below to know whether to skip; do an early // bail-out using the country polygon so we don't even allocate a // ParsedFeature for sites in Laos / Thailand / Cambodia / China. const operator = tags['operator'] ?? null; const developer = operator ?? tags['operator:wikidata'] ?? 'Chưa xác định'; // Derive centroid + (optional) boundary GeoJSON. let cLng: number; let cLat: number; let boundaryGeoJson: string | null = null; let totalAreaHa = 0; if (feat.geometry.type === 'Point') { [cLng, cLat] = feat.geometry.coordinates; } else { // Polygon or MultiPolygon const c = centroid(feat as Feature); [cLng, cLat] = c.geometry.coordinates; // Normalise to MultiPolygon for storage in the boundary column. const geom: MultiPolygon = feat.geometry.type === 'Polygon' ? { type: 'MultiPolygon', coordinates: [feat.geometry.coordinates] } : feat.geometry; boundaryGeoJson = JSON.stringify(geom); // turf/area returns m² — convert to ha (1 ha = 10 000 m²). totalAreaHa = Math.round((area(feat as Feature) / 10000) * 100) / 100; } // Geographic gate: drop sites whose centroid falls outside the Vietnam // mainland polygon (Laos / Thailand / Cambodia / southern China bleed). if (!isPointInVietnam(cLng, cLat)) return null; // Province resolution: prefer explicit OSM tags, then fall back to a // nearest-centroid lookup against our 63-province table. The actual DB // upsert step (`upsertFeature`) replaces this with a precise PostGIS // ST_Contains lookup against `vn_provinces.geometry` once those polygons // are synced — this is just the bootstrap value used when the polygon // table is empty. const province = VN_PROVINCE_HINTS.map((k) => tags[k]).find(Boolean) ?? tags['addr:city'] ?? nearestProvince(cLat, cLng); const district = tags['addr:district'] ?? tags['addr:suburb'] ?? ''; const address = tags['addr:full'] ?? [tags['addr:housenumber'], tags['addr:street']].filter(Boolean).join(' ') ?? ''; const versionRaw = propsRaw['version']; const osmVersion = typeof versionRaw === 'number' ? versionRaw : null; return { osmId, osmType, osmVersion, name, nameEn: tags['name:en'] ?? null, developer, operator, tags, centroid: { lng: cLng, lat: cLat }, boundaryGeoJson, totalAreaHa, province, district, address, }; } interface UpsertStats { inserted: number; updated: number; skipped: number; locked: number; } async function upsertFeature( parsed: ParsedFeature, stats: UpsertStats, ): Promise { // Cheap pre-flight: if a row already exists with osmLocked=true, skip. const existing = await prisma.industrialPark.findUnique({ where: { osmId: parsed.osmId }, select: { id: true, osmLocked: true, lockedFields: true, dataSource: true }, }); if (existing?.osmLocked) { stats.locked += 1; return; } // Override the heuristic province with a precise PostGIS lookup against // the OSM-sourced admin polygons (when synced). Falls back to the // nearest-centroid value already on `parsed.province` if the polygon // table doesn't yet cover that area. const adminMatch = await prisma.$queryRawUnsafe< { provinceName: string | null; districtName: string | null }[] >( `WITH p AS ( SELECT code, name FROM "vn_provinces" WHERE geometry IS NOT NULL AND ST_Contains(geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326)) LIMIT 1 ) SELECT (SELECT name FROM p) AS "provinceName", (SELECT d.name FROM "vn_districts" d JOIN p ON p.code = d."provinceCode" WHERE d.geometry IS NOT NULL AND ST_Contains(d.geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326)) LIMIT 1) AS "districtName"`, parsed.centroid.lng, parsed.centroid.lat, ); const resolvedProvince = adminMatch[0]?.provinceName ?? parsed.province; const resolvedDistrict = adminMatch[0]?.districtName ?? parsed.district; parsed.province = resolvedProvince; if (!parsed.district) parsed.district = resolvedDistrict ?? ''; const region = guessRegion(parsed.centroid.lat); const slug = slugify(parsed.name, parsed.osmId.toString()); if (!existing) { // INSERT path via raw SQL — Prisma's create() can't satisfy the // `location` PostGIS Geometry NOT NULL column (it's `Unsupported`). const cuid = await generateCuid(); const tagsJson = JSON.stringify(parsed.tags); const boundarySql = parsed.boundaryGeoJson ? `ST_Multi(ST_GeomFromGeoJSON('${parsed.boundaryGeoJson.replace(/'/g, "''")}'))` : 'NULL'; // Use a parameterised statement for safety + ST_* exprs inlined. await prisma.$executeRawUnsafe( ` INSERT INTO "IndustrialPark" ( id, name, "nameEn", slug, developer, operator, status, location, address, district, province, region, "totalAreaHa", "leasableAreaHa", "occupancyRate", "remainingAreaHa", "tenantCount", "targetIndustries", "osmId", "osmType", "osmVersion", "osmTags", "dataSource", "isPublic", "lastSyncedAt", "createdAt", "updatedAt", boundary ) VALUES ( $1, $2, $3, $4, $5, $6, $7::"IndustrialParkStatus", ST_SetSRID(ST_MakePoint($8, $9), 4326), $10, $11, $12, $13::"VietnamRegion", $14, $15, $16, $17, $18, $19::text[], $20::bigint, $21::"IndustrialParkOsmType", $22, $23::jsonb, $24::"IndustrialParkDataSource", $25, $26, NOW(), NOW(), ${boundarySql} ) `, cuid, parsed.name, parsed.nameEn, slug, parsed.developer, parsed.operator, 'OPERATIONAL', parsed.centroid.lng, parsed.centroid.lat, parsed.address, parsed.district, parsed.province, region, parsed.totalAreaHa, parsed.totalAreaHa, 0, parsed.totalAreaHa, 0, [], parsed.osmId.toString(), parsed.osmType, parsed.osmVersion, tagsJson, 'OSM', false, new Date(), ); stats.inserted += 1; } else { // UPDATE path. Honour `lockedFields` — preserve listed columns. const locked = new Set(existing.lockedFields); const data: Prisma.IndustrialParkUpdateInput = { osmTags: parsed.tags as Prisma.InputJsonValue, osmVersion: parsed.osmVersion, lastSyncedAt: new Date(), }; if (!locked.has('name')) data.name = parsed.name; if (!locked.has('nameEn')) data.nameEn = parsed.nameEn; if (!locked.has('operator')) data.operator = parsed.operator; if (!locked.has('developer')) data.developer = parsed.developer; if (!locked.has('address')) data.address = parsed.address; if (!locked.has('district')) data.district = parsed.district; if (!locked.has('province')) data.province = parsed.province; if (!locked.has('region')) data.region = region; if (!locked.has('totalAreaHa') && parsed.totalAreaHa > 0) { data.totalAreaHa = parsed.totalAreaHa; } await prisma.industrialPark.update({ where: { osmId: parsed.osmId }, data, }); stats.updated += 1; } // For UPDATE path, also refresh the geometry columns (INSERT already // wrote them inline). Skip for fresh INSERTs we just wrote. if (existing) { if (parsed.boundaryGeoJson != null) { await prisma.$executeRaw` UPDATE "IndustrialPark" SET location = ST_SetSRID(ST_MakePoint(${parsed.centroid.lng}, ${parsed.centroid.lat}), 4326), boundary = ST_Multi(ST_GeomFromGeoJSON(${parsed.boundaryGeoJson})) WHERE "osmId" = ${parsed.osmId} `; } else { await prisma.$executeRaw` UPDATE "IndustrialPark" SET location = ST_SetSRID(ST_MakePoint(${parsed.centroid.lng}, ${parsed.centroid.lat}), 4326) WHERE "osmId" = ${parsed.osmId} `; } } } // ─── Main ───────────────────────────────────────────────────────────────── async function main() { const chunkNames = chunkArg ? [chunkArg] : (Object.keys(CHUNKS) as Array); const totalStats: UpsertStats = { inserted: 0, updated: 0, skipped: 0, locked: 0 }; let parseSkipped = 0; console.log( `🌍 OSM industrial-park sync — chunks: ${chunkNames.join(', ')} ` + `(dry-run: ${dryRun})\n`, ); for (const chunkName of chunkNames) { const bbox = CHUNKS[chunkName as keyof typeof CHUNKS]; if (!bbox) { console.warn(` ⚠️ unknown chunk "${chunkName}" — skipping`); continue; } const overpass = await fetchChunk(chunkName, bbox); // osmtogeojson is loosely typed; it accepts the raw Overpass JSON. const fc = osmtogeojson(overpass) as unknown as { features: Feature[]; }; const parsed: ParsedFeature[] = []; for (const f of fc.features) { const p = parseFeature(f); if (p) parsed.push(p); else parseSkipped += 1; } console.log( ` ▸ ${chunkName}: ${parsed.length} named industrial parks ` + `(skipped ${fc.features.length - parsed.length} unnamed/malformed)`, ); if (dryRun) { console.log( ` ▸ first 3 names: ${parsed .slice(0, 3) .map((p) => p.name) .join(', ')}`, ); continue; } for (const p of parsed) { try { await upsertFeature(p, totalStats); } catch (err) { totalStats.skipped += 1; console.warn( ` ⚠️ upsert failed for ${p.osmType.toLowerCase()}/${p.osmId}: ` + `${err instanceof Error ? err.message : String(err)}`, ); } } } console.log('\n✓ Sync complete.'); console.log( ` inserted: ${totalStats.inserted} ` + `· updated: ${totalStats.updated} ` + `· locked-skipped: ${totalStats.locked} ` + `· errors: ${totalStats.skipped} ` + `· parse-skipped: ${parseSkipped}`, ); } main() .catch((err) => { console.error('❌ Sync failed:', err); process.exit(1); }) .finally(async () => { await prisma.$disconnect(); await pool.end(); });