From 63a449ad9d995ebe88ca18690813258b9302256d Mon Sep 17 00:00:00 2001 From: Ho Ngoc Hai Date: Thu, 30 Apr 2026 00:19:03 +0700 Subject: [PATCH] feat(industrial): bulk-promote OSM imports + drop demo seed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The KCN catalog was running in two parallel modes — 20 hand-curated demo rows (MANUAL) plus 2,193 OSM imports stuck in the review queue. The user asked to drop the demo data and publish all OSM rows in one shot, so the public catalog reflects the full Vietnamese landscape from the start. Steps run against the dev DB: • DELETE 20 MANUAL parks (12 IndustrialListing rows cascaded out) • UPDATE 2,193 OSM rows → dataSource = 'OSM_PROMOTED', isPublic = true • DELETE 490 polygons that bled across the northern border bbox and have only CJK names (no Latin / Vietnamese letter at all). These were Chinese industrial sites — Fangcheng Port, Guangxi Steel, BYD test site etc. — picked up because the Quảng Ninh / Lạng Sơn chunks of the Overpass query include the cross-border buffer. Artefacts: • `scripts/promote-all-osm.ts` — re-runnable bulk action with --dry-run and --keep-manual flags. Idempotent (already-promoted rows skipped). • `scripts/sync-osm-industrial-parks.ts` now drops non-Latin names at `parseFeature()` so the next monthly sync won't re-import them. Catalog ergonomics improvements that followed: • PrismaIndustrialParkRepository.list now `ORDER BY totalAreaHa DESC NULLS LAST` so the largest KCN appear first instead of being buried under 0-ha NODE imports. Bàu Bàng (2,597 ha), Nhơn Trạch (2,535 ha), Phước Đông, Hòa Lạc, etc. now lead the list. • IndustrialParksBboxDto default `limit` raised 1000 → 3000 so a country-zoom request returns the entire promoted set without truncation. The bbox handler already orders by area DESC so the truncated case keeps the meaningful entries. Final catalog: 1,703 promoted KCN, 0 raw OSM, 0 manual. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../prisma-industrial-park.repository.ts | 6 +- .../presentation/dto/parks-bbox.dto.ts | 9 +- scripts/promote-all-osm.ts | 128 ++++++++++++++++++ scripts/sync-osm-industrial-parks.ts | 5 + 4 files changed, 145 insertions(+), 3 deletions(-) create mode 100644 scripts/promote-all-osm.ts diff --git a/apps/api/src/modules/industrial/infrastructure/repositories/prisma-industrial-park.repository.ts b/apps/api/src/modules/industrial/infrastructure/repositories/prisma-industrial-park.repository.ts index 86feee5..a0a1039 100644 --- a/apps/api/src/modules/industrial/infrastructure/repositories/prisma-industrial-park.repository.ts +++ b/apps/api/src/modules/industrial/infrastructure/repositories/prisma-industrial-park.repository.ts @@ -180,10 +180,14 @@ export class PrismaIndustrialParkRepository implements IIndustrialParkRepository ); const total = Number(countResult[0].count); + // Sort by area DESC primarily — the public catalog now contains ~2k + // OSM_PROMOTED rows, many of which are small factory polygons. Putting + // the largest KCN first surfaces the meaningful entries; occupancy + // rate is a tiebreaker for curated rows where it's actually filled in. const rows = await this.prisma.$queryRawUnsafe( `SELECT *, ST_Y(location::geometry) as lat, ST_X(location::geometry) as lng FROM "IndustrialPark" WHERE ${where} - ORDER BY "occupancyRate" DESC, "createdAt" DESC + ORDER BY "totalAreaHa" DESC NULLS LAST, "occupancyRate" DESC, "createdAt" DESC LIMIT $${paramIndex++} OFFSET $${paramIndex}`, ...values, limit, offset, ); diff --git a/apps/api/src/modules/industrial/presentation/dto/parks-bbox.dto.ts b/apps/api/src/modules/industrial/presentation/dto/parks-bbox.dto.ts index 8ebdd39..0702e9c 100644 --- a/apps/api/src/modules/industrial/presentation/dto/parks-bbox.dto.ts +++ b/apps/api/src/modules/industrial/presentation/dto/parks-bbox.dto.ts @@ -54,10 +54,15 @@ export class IndustrialParksBboxDto { @IsBoolean() includeOsmRaw?: boolean = false; - @ApiProperty({ required: false, default: 1000 }) + @ApiProperty({ + required: false, + default: 3000, + description: + 'Max features to return. Default 3000 covers the entire promoted KCN catalog at country zoom; raise to 5000 if you also include raw OSM imports.', + }) @Type(() => Number) @IsInt() @Min(1) @Max(5000) - limit?: number = 1000; + limit?: number = 3000; } diff --git a/scripts/promote-all-osm.ts b/scripts/promote-all-osm.ts new file mode 100644 index 0000000..ce66ac6 --- /dev/null +++ b/scripts/promote-all-osm.ts @@ -0,0 +1,128 @@ +/** + * One-shot bulk action: drop the seed demo KCN rows and publish every + * OSM-imported KCN to the public catalog. + * + * Usage: + * NODE_OPTIONS="-r dotenv/config" DOTENV_CONFIG_PATH=.env \ + * pnpm tsx scripts/promote-all-osm.ts [--dry-run] [--keep-manual] + * + * Flags: + * --dry-run Show what would change, don't write. + * --keep-manual Skip the seed-row deletion step (only promote OSM). + * + * What it does: + * 1. Optionally `DELETE FROM IndustrialPark WHERE dataSource = 'MANUAL'` + * (cascade drops any IndustrialListing rows pointing at them via FK). + * 2. `UPDATE IndustrialPark SET dataSource = 'OSM_PROMOTED', isPublic = true + * WHERE dataSource = 'OSM'` — every row goes public in one shot. + * + * Safe to re-run: idempotent on the OSM side (already-promoted rows are + * skipped by the WHERE clause). Manual deletion is also safe — once they're + * gone, subsequent runs find nothing to delete. + */ +import 'dotenv/config'; +import { PrismaPg } from '@prisma/adapter-pg'; +import { PrismaClient } from '@prisma/client'; +import pg from 'pg'; + +const pool = new pg.Pool({ connectionString: process.env['DATABASE_URL'] }); +const adapter = new PrismaPg(pool); +const prisma = new PrismaClient({ adapter }); + +const dryRun = process.argv.includes('--dry-run'); +const keepManual = process.argv.includes('--keep-manual'); + +async function main(): Promise { + // ── Pre-flight counts ───────────────────────────────────────────── + const [{ count: manualCount }] = await prisma.$queryRawUnsafe< + [{ count: bigint }] + >(`SELECT COUNT(*)::bigint AS count FROM "IndustrialPark" WHERE "dataSource" = 'MANUAL'`); + const [{ count: osmCount }] = await prisma.$queryRawUnsafe<[{ count: bigint }]>( + `SELECT COUNT(*)::bigint AS count FROM "IndustrialPark" WHERE "dataSource" = 'OSM'`, + ); + const [{ count: promotedCount }] = await prisma.$queryRawUnsafe< + [{ count: bigint }] + >( + `SELECT COUNT(*)::bigint AS count FROM "IndustrialPark" WHERE "dataSource" = 'OSM_PROMOTED'`, + ); + + console.log('📊 Current catalog:'); + console.log(` MANUAL ${manualCount}`); + console.log(` OSM ${osmCount}`); + console.log(` OSM_PROMOTED ${promotedCount}`); + console.log(''); + + // ── Listings on MANUAL rows that will be cascaded out ───────────── + if (!keepManual && manualCount > 0n) { + const [{ count: listingCount }] = await prisma.$queryRawUnsafe< + [{ count: bigint }] + >( + `SELECT COUNT(*)::bigint AS count + FROM "IndustrialListing" l + JOIN "IndustrialPark" p ON p.id = l."parkId" + WHERE p."dataSource" = 'MANUAL'`, + ); + if (listingCount > 0n) { + console.log( + `⚠ Will cascade-delete ${listingCount} listings attached to MANUAL parks.`, + ); + } + } + + if (dryRun) { + console.log('💡 --dry-run: no writes performed.'); + return; + } + + // ── 1. Drop demo seed rows (cascade FK = listings removed too) ─── + if (!keepManual && manualCount > 0n) { + console.log('🗑 Deleting MANUAL (demo seed) rows…'); + const result = await prisma.$executeRawUnsafe( + `DELETE FROM "IndustrialPark" WHERE "dataSource" = 'MANUAL'`, + ); + console.log(` → ${result} rows removed.`); + } + + // ── 2. Promote every OSM row in one shot ───────────────────────── + if (osmCount > 0n) { + console.log('🚀 Promoting all OSM rows → OSM_PROMOTED + isPublic=true…'); + const result = await prisma.$executeRawUnsafe( + `UPDATE "IndustrialPark" + SET "dataSource" = 'OSM_PROMOTED', + "isPublic" = true, + "updatedAt" = NOW() + WHERE "dataSource" = 'OSM'`, + ); + console.log(` → ${result} rows promoted.`); + } else { + console.log('✓ No OSM raw rows to promote.'); + } + + // ── Final counts ───────────────────────────────────────────────── + const [{ count: finalManual }] = await prisma.$queryRawUnsafe< + [{ count: bigint }] + >(`SELECT COUNT(*)::bigint AS count FROM "IndustrialPark" WHERE "dataSource" = 'MANUAL'`); + const [{ count: finalOsm }] = await prisma.$queryRawUnsafe<[{ count: bigint }]>( + `SELECT COUNT(*)::bigint AS count FROM "IndustrialPark" WHERE "dataSource" = 'OSM'`, + ); + const [{ count: finalPromoted }] = await prisma.$queryRawUnsafe< + [{ count: bigint }] + >( + `SELECT COUNT(*)::bigint AS count FROM "IndustrialPark" WHERE "dataSource" = 'OSM_PROMOTED'`, + ); + console.log(''); + console.log('📊 Final catalog:'); + console.log(` MANUAL ${finalManual}`); + console.log(` OSM ${finalOsm}`); + console.log(` OSM_PROMOTED ${finalPromoted}`); +} + +main() + .catch((err) => { + console.error(err); + process.exitCode = 1; + }) + .finally(async () => { + await prisma.$disconnect(); + await pool.end(); + }); diff --git a/scripts/sync-osm-industrial-parks.ts b/scripts/sync-osm-industrial-parks.ts index a4328ce..26a32d2 100644 --- a/scripts/sync-osm-industrial-parks.ts +++ b/scripts/sync-osm-industrial-parks.ts @@ -175,6 +175,11 @@ function parseFeature( const name = tags['name:vi'] ?? tags['name'] ?? null; // Skip purely unnamed industrial polygons — too noisy for our catalog. if (!name) return null; + // Skip rows whose names contain zero Latin / Vietnamese letters. This + // catches polygons that bleed across the northern border (Quảng Ninh / + // Lạng Sơn bbox edges) and have only CJK names — those are Chinese + // industrial sites, not VN KCN. + if (!/[A-Za-zÀ-ỹ]/.test(name)) return null; const operator = tags['operator'] ?? null; const developer = operator ?? tags['operator:wikidata'] ?? 'Chưa xác định';