Files
goodgo-platform/scripts/backfill-admin-codes.ts
Ho Ngoc Hai fba536406d feat(osm): foundation — admin boundaries, POI catalog, sync orchestrator
This is the Phase 0 + Phase 1 + Phase 4 foundation of the full OSM
integration plan. It backfills three things the rest of the platform
has been faking with hardcoded tables, and gives admins one dashboard
for every OSM-sourced layer.

Phase 0 — Vietnam administrative boundaries
* New columns on vn_provinces / vn_districts / vn_wards: PostGIS
  geometry (MultiPolygon), centroid (Point), areaKm2, osmId, population,
  lastSyncedAt + GIST indexes on geometry/centroid.
* `scripts/sync-osm-admin-boundaries.ts` pulls
  `boundary=administrative + admin_level=4|6|8` from Overpass per chunk,
  filters to mainland VN via the existing country polygon, resolves the
  GSO code (or generates `OSM_<id>`), and upserts via raw SQL because
  Prisma can't manage PostGIS columns.
* `GeoLookupService` (shared module) replaces the old
  `nearestProvince()` heuristic — `lookup(lng,lat)` returns
  province/district/ward via `ST_Contains` on the GIST-indexed polygons.
* The KCN sync now resolves province/district from the polygon table
  and falls back to the centroid heuristic only when polygons aren't
  loaded yet.
* `scripts/backfill-admin-codes.ts` rewrites province/district/ward on
  IndustrialPark, ProjectDevelopment and Property using the new lookup.

Phase 1 — POI catalog (15 categories, schema only here)
* New `Poi` table with `PoiCategory` enum, OSM provenance columns,
  GIST index on `location`. New `TransportLine` for metro/highway
  multilinestrings.
* `scripts/sync-osm-poi.ts` queries Overpass per category × chunk,
  resolves province/district codes from the boundary polygons, upserts
  with `osmLocked` / `lockedFields` honour same as KCN.
* New NestJS `PoiModule` exposes:
    GET /poi/by-bbox    — GeoJSON for map overlays
    GET /poi/nearby     — sidebar "tiện ích xung quanh" (HMAC distance ranks)
    GET /poi/coverage   — admin per-category counts
* New web component `<NearbyPoiSidebar />` ready to drop into listing /
  project / KCN detail pages.

Phase 4 — Sync orchestrator + admin dashboard
* New `OsmSyncRun` audit table tracks every sync invocation
  (RUNNING / SUCCESS / PARTIAL / FAILED + row stats + error message).
* `OsmSyncService` spawns the right tsx script for any (layer, category,
  chunk) tuple, parses stats out of stdout, updates the run row.
* `OsmSyncCronService` schedules:
    Daily 02:00  → POI category rotation (1/day, 20-day cycle)
    Mon  02:30  → admin-boundaries provinces
    Wed  02:30  → admin-boundaries districts
    Sat  02:30  → admin-boundaries wards
    1st of month 03:00 → industrial-parks (per chunk)
  All gated by `OSM_SYNC_ENABLED=true`.
* New admin endpoints under `/admin/osm/*` (layers / coverage / runs /
  trigger), guarded by JWT + ADMIN role.
* New `/admin/osm` Next.js page: stat cards, coverage table with
  per-row "Sync now", recent runs list with auto-refresh every 15s.

Run on dev so far: 33 provinces + 1100+ districts (still finishing) +
305 hospitals POI imported.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 12:01:19 +07:00

217 lines
7.0 KiB
TypeScript

/**
* Backfill `provinceCode` / `districtCode` / `wardCode` (and the human
* `province` / `district` / `ward` text columns where present) on every
* geo-bearing entity, using the freshly synced
* `vn_provinces` / `vn_districts` / `vn_wards` polygons.
*
* Tables processed:
* - IndustrialPark (PostGIS point)
* - ProjectDevelopment (PostGIS point)
* - Listing (uses Property.location internally — joined)
* - Property (PostGIS point — most listings live here)
*
* Usage:
* NODE_OPTIONS="-r dotenv/config" DOTENV_CONFIG_PATH=.env \
* pnpm tsx scripts/backfill-admin-codes.ts [--dry-run] [--table=NAME]
*
* Strategy:
* For each entity with a `location` Point we ST_Contains against the
* province/district/ward polygons and write the matched code+name back
* into the row. Only rows where the resolved value DIFFERS from the
* existing one are touched, so re-runs are cheap.
*/
import 'dotenv/config';
import { PrismaPg } from '@prisma/adapter-pg';
import { PrismaClient } from '@prisma/client';
import pg from 'pg';
const pool = new pg.Pool({ connectionString: process.env['DATABASE_URL'] });
const adapter = new PrismaPg(pool);
const prisma = new PrismaClient({ adapter });
const dryRun = process.argv.includes('--dry-run');
const tableArg = process.argv.find((a) => a.startsWith('--table='))?.slice('--table='.length);
interface AdminMatch {
provinceCode: string | null;
provinceName: string | null;
districtCode: string | null;
districtName: string | null;
wardCode: string | null;
wardName: string | null;
}
/**
* Single SQL statement that joins a point against the 3 admin tables and
* returns whichever level matched. NULL when no province polygon contains
* the point (likely outside VN or polygons not synced for that area).
*/
async function resolve(lng: number, lat: number): Promise<AdminMatch> {
const rows = await prisma.$queryRawUnsafe<
{
provinceCode: string | null;
provinceName: string | null;
districtCode: string | null;
districtName: string | null;
wardCode: string | null;
wardName: string | null;
}[]
>(
`WITH p AS (
SELECT code, name FROM "vn_provinces"
WHERE geometry IS NOT NULL
AND ST_Contains(geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1
),
d AS (
SELECT d.code, d.name
FROM "vn_districts" d
JOIN p ON p.code = d."provinceCode"
WHERE d.geometry IS NOT NULL
AND ST_Contains(d.geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1
),
w AS (
SELECT w.code, w.name
FROM "vn_wards" w
JOIN d ON d.code = w."districtCode"
WHERE w.geometry IS NOT NULL
AND ST_Contains(w.geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1
)
SELECT
(SELECT code FROM p) AS "provinceCode",
(SELECT name FROM p) AS "provinceName",
(SELECT code FROM d) AS "districtCode",
(SELECT name FROM d) AS "districtName",
(SELECT code FROM w) AS "wardCode",
(SELECT name FROM w) AS "wardName"`,
lng,
lat,
);
return (
rows[0] ?? {
provinceCode: null,
provinceName: null,
districtCode: null,
districtName: null,
wardCode: null,
wardName: null,
}
);
}
async function backfillIndustrialPark(): Promise<void> {
console.log('🏭 IndustrialPark…');
const rows = await prisma.$queryRawUnsafe<
{ id: string; lat: number; lng: number; province: string }[]
>(
`SELECT id, ST_Y(location::geometry) AS lat, ST_X(location::geometry) AS lng, province
FROM "IndustrialPark"`,
);
let updated = 0;
for (const r of rows) {
const m = await resolve(r.lng, r.lat);
if (!m.provinceName) continue; // outside VN polygon
if (m.provinceName === r.province) continue;
if (!dryRun) {
await prisma.$executeRawUnsafe(
`UPDATE "IndustrialPark" SET province = $2, district = COALESCE($3, district) WHERE id = $1`,
r.id,
m.provinceName,
m.districtName,
);
}
updated++;
}
console.log(` ${updated}/${rows.length} rows would update.`);
}
async function backfillProjectDevelopment(): Promise<void> {
console.log('🏗️ ProjectDevelopment…');
const rows = await prisma.$queryRawUnsafe<
{ id: string; lat: number; lng: number; city: string; district: string; ward: string }[]
>(
`SELECT id, ST_Y(location::geometry) AS lat, ST_X(location::geometry) AS lng, city, district, ward
FROM "ProjectDevelopment"`,
);
let updated = 0;
for (const r of rows) {
const m = await resolve(r.lng, r.lat);
if (!m.provinceName) continue;
const sameCity = m.provinceName === r.city;
const sameDistrict = !m.districtName || m.districtName === r.district;
const sameWard = !m.wardName || m.wardName === r.ward;
if (sameCity && sameDistrict && sameWard) continue;
if (!dryRun) {
await prisma.$executeRawUnsafe(
`UPDATE "ProjectDevelopment"
SET city = $2,
district = COALESCE($3, district),
ward = COALESCE($4, ward)
WHERE id = $1`,
r.id,
m.provinceName,
m.districtName,
m.wardName,
);
}
updated++;
}
console.log(` ${updated}/${rows.length} rows would update.`);
}
async function backfillProperty(): Promise<void> {
console.log('🏠 Property…');
// Property has Vietnamese province / district / ward text columns; check schema.
const colsExist = await prisma.$queryRawUnsafe<{ count: bigint }[]>(
`SELECT COUNT(*)::bigint AS count
FROM information_schema.columns
WHERE table_name = 'Property' AND column_name = 'province'`,
);
if (Number(colsExist[0]?.count ?? 0n) === 0) {
console.log(' (no province column on Property — skipping)');
return;
}
const rows = await prisma.$queryRawUnsafe<
{ id: string; lat: number; lng: number; province: string | null }[]
>(
`SELECT id, ST_Y(location::geometry) AS lat, ST_X(location::geometry) AS lng, province
FROM "Property"
WHERE location IS NOT NULL`,
);
let updated = 0;
for (const r of rows) {
const m = await resolve(r.lng, r.lat);
if (!m.provinceName) continue;
if (m.provinceName === r.province) continue;
if (!dryRun) {
await prisma.$executeRawUnsafe(
`UPDATE "Property" SET province = $2 WHERE id = $1`,
r.id,
m.provinceName,
);
}
updated++;
}
console.log(` ${updated}/${rows.length} rows would update.`);
}
async function main(): Promise<void> {
console.log(`🌍 Admin-code backfill (dryRun=${dryRun})`);
if (!tableArg || tableArg === 'industrial') await backfillIndustrialPark();
if (!tableArg || tableArg === 'project') await backfillProjectDevelopment();
if (!tableArg || tableArg === 'property') await backfillProperty();
}
main()
.catch((err) => {
console.error(err);
process.exitCode = 1;
})
.finally(async () => {
await prisma.$disconnect();
await pool.end();
});