/** * One-shot backfill for OSM-imported `IndustrialPark` rows whose * `province` is "Chưa xác định" (the placeholder we wrote when the OSM * tags lacked any addr:* hints). * * Usage: * NODE_OPTIONS="-r dotenv/config" DOTENV_CONFIG_PATH=.env \ * pnpm tsx scripts/backfill-osm-provinces.ts [--dry-run] * * What it does: * 1. Selects every row where dataSource = 'OSM' AND province = * 'Chưa xác định'. * 2. Reads the centroid via ST_X / ST_Y from the `location` Point. * 3. Looks up the nearest province from VN_PROVINCE_CENTROIDS. * 4. Updates the row in batches. * * Safe to re-run: skips rows where province is already filled in. */ import 'dotenv/config'; import { PrismaPg } from '@prisma/adapter-pg'; import { PrismaClient } from '@prisma/client'; import pg from 'pg'; import { nearestProvince } from './data/vn-province-centroids'; const pool = new pg.Pool({ connectionString: process.env['DATABASE_URL'] }); const adapter = new PrismaPg(pool); const prisma = new PrismaClient({ adapter }); const dryRun = process.argv.includes('--dry-run'); interface Row { id: string; lat: number; lng: number; } async function main(): Promise { console.log('🔍 Finding OSM rows with province="Chưa xác định"…'); const rows = await prisma.$queryRawUnsafe( `SELECT id, ST_Y(location::geometry) AS lat, ST_X(location::geometry) AS lng FROM "IndustrialPark" WHERE "dataSource"::text = 'OSM' AND province = 'Chưa xác định'`, ); console.log(` → ${rows.length} rows need a province.`); if (!rows.length) { console.log('✓ Nothing to do.'); return; } const updates = new Map(); for (const row of rows) { const province = nearestProvince(row.lat, row.lng); if (!updates.has(province)) updates.set(province, []); updates.get(province)!.push(row.id); } // Sort by impact for the dry-run preview. const summary = Array.from(updates.entries()).sort((a, b) => b[1].length - a[1].length); console.log(' → Distribution by inferred province:'); for (const [province, ids] of summary) { console.log(` ${province.padEnd(24)} ${ids.length}`); } if (dryRun) { console.log('💡 --dry-run: no writes performed.'); return; } let totalUpdated = 0; for (const [province, ids] of updates) { // UPDATE in batches of 500 ids to avoid huge IN-lists. for (let i = 0; i < ids.length; i += 500) { const batch = ids.slice(i, i + 500); const result = await prisma.industrialPark.updateMany({ where: { id: { in: batch } }, data: { province }, }); totalUpdated += result.count; } } console.log(`✓ Updated ${totalUpdated} rows.`); } main() .catch((err) => { console.error(err); process.exitCode = 1; }) .finally(async () => { await prisma.$disconnect(); await pool.end(); });