/** * Prune `IndustrialPark` rows whose centroid is outside the Vietnam * mainland polygon. Catches the cross-border bleed (Laos, Thailand, * Cambodia) that the Overpass bbox sync inevitably picks up. * * Usage: * NODE_OPTIONS="-r dotenv/config" DOTENV_CONFIG_PATH=.env \ * pnpm tsx scripts/prune-non-vietnam-osm.ts [--dry-run] * * Strategy: * 1. Build a PostGIS polygon from `VN_COUNTRY_POLYGON_GEOJSON`. * 2. SELECT rows where `NOT ST_Within(location, polygon)`, scoped to * OSM-sourced rows (we never want to delete a manually-curated * row even if its centroid is wonky). * 3. DELETE in one statement (cascade removes any IndustrialListing * rows attached to those parks). * * Safe to re-run: idempotent. */ import 'dotenv/config'; import { PrismaPg } from '@prisma/adapter-pg'; import { PrismaClient } from '@prisma/client'; import pg from 'pg'; import { VN_COUNTRY_POLYGON_GEOJSON } from './data/vn-country-polygon'; const pool = new pg.Pool({ connectionString: process.env['DATABASE_URL'] }); const adapter = new PrismaPg(pool); const prisma = new PrismaClient({ adapter }); const dryRun = process.argv.includes('--dry-run'); async function main(): Promise { const polygonSql = `ST_SetSRID(ST_GeomFromGeoJSON('${VN_COUNTRY_POLYGON_GEOJSON.replace( /'/g, "''", )}'), 4326)`; const outsideRows = await prisma.$queryRawUnsafe< { id: string; name: string; province: string; lat: number; lng: number; ha: number }[] >( `SELECT id, name, province, ROUND(ST_Y(location::geometry)::numeric, 3)::float AS lat, ROUND(ST_X(location::geometry)::numeric, 3)::float AS lng, COALESCE("totalAreaHa", 0) AS ha FROM "IndustrialPark" WHERE "dataSource" IN ('OSM', 'OSM_PROMOTED') AND NOT ST_Within(location::geometry, ${polygonSql}) ORDER BY ha DESC NULLS LAST`, ); console.log(`πŸ“ Found ${outsideRows.length} OSM rows OUTSIDE the VN polygon.`); if (outsideRows.length === 0) { console.log('βœ“ Catalog is clean.'); return; } // Show the top 15 by area so the operator can sanity-check before deleting. console.log(' Top 15 by area (will be deleted):'); for (const row of outsideRows.slice(0, 15)) { console.log( ` ${row.name.slice(0, 50).padEnd(50)} ${row.province.slice(0, 16).padEnd(16)} ${ row.ha } ha (${row.lat}, ${row.lng})`, ); } if (dryRun) { console.log('πŸ’‘ --dry-run: no writes performed.'); return; } console.log(`\nπŸ—‘ Deleting ${outsideRows.length} rows…`); const result = await prisma.$executeRawUnsafe( `DELETE FROM "IndustrialPark" WHERE "dataSource" IN ('OSM', 'OSM_PROMOTED') AND NOT ST_Within(location::geometry, ${polygonSql})`, ); console.log(`βœ“ Removed ${result} rows.`); } main() .catch((err) => { console.error(err); process.exitCode = 1; }) .finally(async () => { await prisma.$disconnect(); await pool.end(); });