feat(osm): foundation — admin boundaries, POI catalog, sync orchestrator

This is the Phase 0 + Phase 1 + Phase 4 foundation of the full OSM
integration plan. It backfills three things the rest of the platform
has been faking with hardcoded tables, and gives admins one dashboard
for every OSM-sourced layer.

Phase 0 — Vietnam administrative boundaries
* New columns on vn_provinces / vn_districts / vn_wards: PostGIS
  geometry (MultiPolygon), centroid (Point), areaKm2, osmId, population,
  lastSyncedAt + GIST indexes on geometry/centroid.
* `scripts/sync-osm-admin-boundaries.ts` pulls
  `boundary=administrative + admin_level=4|6|8` from Overpass per chunk,
  filters to mainland VN via the existing country polygon, resolves the
  GSO code (or generates `OSM_<id>`), and upserts via raw SQL because
  Prisma can't manage PostGIS columns.
* `GeoLookupService` (shared module) replaces the old
  `nearestProvince()` heuristic — `lookup(lng,lat)` returns
  province/district/ward via `ST_Contains` on the GIST-indexed polygons.
* The KCN sync now resolves province/district from the polygon table
  and falls back to the centroid heuristic only when polygons aren't
  loaded yet.
* `scripts/backfill-admin-codes.ts` rewrites province/district/ward on
  IndustrialPark, ProjectDevelopment and Property using the new lookup.

Phase 1 — POI catalog (15 categories, schema only here)
* New `Poi` table with `PoiCategory` enum, OSM provenance columns,
  GIST index on `location`. New `TransportLine` for metro/highway
  multilinestrings.
* `scripts/sync-osm-poi.ts` queries Overpass per category × chunk,
  resolves province/district codes from the boundary polygons, upserts
  with `osmLocked` / `lockedFields` honour same as KCN.
* New NestJS `PoiModule` exposes:
    GET /poi/by-bbox    — GeoJSON for map overlays
    GET /poi/nearby     — sidebar "tiện ích xung quanh" (HMAC distance ranks)
    GET /poi/coverage   — admin per-category counts
* New web component `<NearbyPoiSidebar />` ready to drop into listing /
  project / KCN detail pages.

Phase 4 — Sync orchestrator + admin dashboard
* New `OsmSyncRun` audit table tracks every sync invocation
  (RUNNING / SUCCESS / PARTIAL / FAILED + row stats + error message).
* `OsmSyncService` spawns the right tsx script for any (layer, category,
  chunk) tuple, parses stats out of stdout, updates the run row.
* `OsmSyncCronService` schedules:
    Daily 02:00  → POI category rotation (1/day, 20-day cycle)
    Mon  02:30  → admin-boundaries provinces
    Wed  02:30  → admin-boundaries districts
    Sat  02:30  → admin-boundaries wards
    1st of month 03:00 → industrial-parks (per chunk)
  All gated by `OSM_SYNC_ENABLED=true`.
* New admin endpoints under `/admin/osm/*` (layers / coverage / runs /
  trigger), guarded by JWT + ADMIN role.
* New `/admin/osm` Next.js page: stat cards, coverage table with
  per-row "Sync now", recent runs list with auto-refresh every 15s.

Run on dev so far: 33 provinces + 1100+ districts (still finishing) +
305 hospitals POI imported.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ho Ngoc Hai
2026-05-01 12:01:19 +07:00
parent 73ff469126
commit fba536406d
38 changed files with 3411 additions and 11 deletions

View File

@@ -0,0 +1,51 @@
-- Add PostGIS geometry + OSM provenance to vn_provinces / vn_districts / vn_wards.
-- Geometry is `MultiPolygon` (some provinces have offshore islands), centroid is `Point`.
-- All columns are nullable to allow incremental backfill from the Overpass sync.
-- ── vn_provinces ────────────────────────────────────────────────────────────
ALTER TABLE "vn_provinces"
ADD COLUMN IF NOT EXISTS "osmId" BIGINT,
ADD COLUMN IF NOT EXISTS "areaKm2" DOUBLE PRECISION,
ADD COLUMN IF NOT EXISTS "population" INTEGER,
ADD COLUMN IF NOT EXISTS "lastSyncedAt" TIMESTAMP(3),
ADD COLUMN IF NOT EXISTS "updatedAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP;
SELECT AddGeometryColumn('public', 'vn_provinces', 'geometry', 4326, 'MULTIPOLYGON', 2);
SELECT AddGeometryColumn('public', 'vn_provinces', 'centroid', 4326, 'POINT', 2);
CREATE UNIQUE INDEX IF NOT EXISTS "vn_provinces_osmId_key" ON "vn_provinces"("osmId") WHERE "osmId" IS NOT NULL;
CREATE INDEX IF NOT EXISTS "vn_provinces_geometry_idx" ON "vn_provinces" USING GIST ("geometry");
CREATE INDEX IF NOT EXISTS "vn_provinces_centroid_idx" ON "vn_provinces" USING GIST ("centroid");
CREATE INDEX IF NOT EXISTS "vn_provinces_lastSyncedAt_idx" ON "vn_provinces"("lastSyncedAt");
-- ── vn_districts ────────────────────────────────────────────────────────────
ALTER TABLE "vn_districts"
ADD COLUMN IF NOT EXISTS "osmId" BIGINT,
ADD COLUMN IF NOT EXISTS "areaKm2" DOUBLE PRECISION,
ADD COLUMN IF NOT EXISTS "population" INTEGER,
ADD COLUMN IF NOT EXISTS "lastSyncedAt" TIMESTAMP(3),
ADD COLUMN IF NOT EXISTS "updatedAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP;
SELECT AddGeometryColumn('public', 'vn_districts', 'geometry', 4326, 'MULTIPOLYGON', 2);
SELECT AddGeometryColumn('public', 'vn_districts', 'centroid', 4326, 'POINT', 2);
CREATE UNIQUE INDEX IF NOT EXISTS "vn_districts_osmId_key" ON "vn_districts"("osmId") WHERE "osmId" IS NOT NULL;
CREATE INDEX IF NOT EXISTS "vn_districts_geometry_idx" ON "vn_districts" USING GIST ("geometry");
CREATE INDEX IF NOT EXISTS "vn_districts_centroid_idx" ON "vn_districts" USING GIST ("centroid");
CREATE INDEX IF NOT EXISTS "vn_districts_lastSyncedAt_idx" ON "vn_districts"("lastSyncedAt");
-- ── vn_wards ────────────────────────────────────────────────────────────────
ALTER TABLE "vn_wards"
ADD COLUMN IF NOT EXISTS "osmId" BIGINT,
ADD COLUMN IF NOT EXISTS "areaKm2" DOUBLE PRECISION,
ADD COLUMN IF NOT EXISTS "population" INTEGER,
ADD COLUMN IF NOT EXISTS "lastSyncedAt" TIMESTAMP(3),
ADD COLUMN IF NOT EXISTS "updatedAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP;
SELECT AddGeometryColumn('public', 'vn_wards', 'geometry', 4326, 'MULTIPOLYGON', 2);
SELECT AddGeometryColumn('public', 'vn_wards', 'centroid', 4326, 'POINT', 2);
CREATE UNIQUE INDEX IF NOT EXISTS "vn_wards_osmId_key" ON "vn_wards"("osmId") WHERE "osmId" IS NOT NULL;
CREATE INDEX IF NOT EXISTS "vn_wards_geometry_idx" ON "vn_wards" USING GIST ("geometry");
CREATE INDEX IF NOT EXISTS "vn_wards_centroid_idx" ON "vn_wards" USING GIST ("centroid");
CREATE INDEX IF NOT EXISTS "vn_wards_lastSyncedAt_idx" ON "vn_wards"("lastSyncedAt");

View File

@@ -0,0 +1,77 @@
-- Phase 1: Poi catalog + TransportLine for OSM-sourced amenities and routes.
-- ── Enums ──────────────────────────────────────────────────────────────────
DO $$ BEGIN
CREATE TYPE "PoiCategory" AS ENUM (
'SCHOOL_PRIMARY','SCHOOL_SECONDARY','UNIVERSITY',
'HOSPITAL','CLINIC','PHARMACY',
'MARKET','SUPERMARKET','MALL','CONVENIENCE',
'BANK','ATM',
'PARK',
'GAS_STATION','POLICE','POST_OFFICE',
'METRO_STATION','RAILWAY_STATION','BUS_STATION','AIRPORT'
);
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
DO $$ BEGIN
CREATE TYPE "OsmType" AS ENUM ('NODE','WAY','RELATION');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
DO $$ BEGIN
CREATE TYPE "OsmDataSource" AS ENUM ('OSM','OSM_PROMOTED','MANUAL');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
-- ── Poi ────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS "Poi" (
"id" TEXT PRIMARY KEY,
"category" "PoiCategory" NOT NULL,
"name" TEXT NOT NULL,
"nameEn" TEXT,
"address" TEXT,
"provinceCode" TEXT,
"districtCode" TEXT,
"wardCode" TEXT,
"osmId" BIGINT NOT NULL,
"osmType" "OsmType" NOT NULL,
"osmTags" JSONB NOT NULL,
"dataSource" "OsmDataSource" NOT NULL DEFAULT 'OSM',
"isPublic" BOOLEAN NOT NULL DEFAULT true,
"osmLocked" BOOLEAN NOT NULL DEFAULT false,
"lockedFields" TEXT[] NOT NULL DEFAULT '{}',
"lastSyncedAt" TIMESTAMP(3) NOT NULL,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "Poi_osmId_key" UNIQUE ("osmId")
);
SELECT AddGeometryColumn('public', 'Poi', 'location', 4326, 'POINT', 2);
ALTER TABLE "Poi" ALTER COLUMN "location" SET NOT NULL;
CREATE INDEX IF NOT EXISTS "Poi_location_idx" ON "Poi" USING GIST ("location");
CREATE INDEX IF NOT EXISTS "Poi_cat_prov_idx" ON "Poi"("category","provinceCode");
CREATE INDEX IF NOT EXISTS "Poi_cat_dist_idx" ON "Poi"("category","districtCode");
CREATE INDEX IF NOT EXISTS "Poi_provinceCode_idx" ON "Poi"("provinceCode");
CREATE INDEX IF NOT EXISTS "Poi_dataSource_pub" ON "Poi"("dataSource","isPublic");
CREATE INDEX IF NOT EXISTS "Poi_lastSyncedAt_idx" ON "Poi"("lastSyncedAt");
-- ── TransportLine ──────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS "TransportLine" (
"id" TEXT PRIMARY KEY,
"type" TEXT NOT NULL,
"name" TEXT NOT NULL,
"ref" TEXT,
"osmRelationId" BIGINT,
"status" TEXT NOT NULL DEFAULT 'operational',
"lengthKm" DOUBLE PRECISION,
"lastSyncedAt" TIMESTAMP(3) NOT NULL,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "TransportLine_osmRelationId_key" UNIQUE ("osmRelationId")
);
SELECT AddGeometryColumn('public', 'TransportLine', 'geometry', 4326, 'MULTILINESTRING', 2);
ALTER TABLE "TransportLine" ALTER COLUMN "geometry" SET NOT NULL;
CREATE INDEX IF NOT EXISTS "TransportLine_geometry_idx" ON "TransportLine" USING GIST ("geometry");
CREATE INDEX IF NOT EXISTS "TransportLine_type_idx" ON "TransportLine"("type");
CREATE INDEX IF NOT EXISTS "TransportLine_status_idx" ON "TransportLine"("status");

View File

@@ -0,0 +1,25 @@
-- Phase 4: persistent audit log of every OSM sync run.
DO $$ BEGIN
CREATE TYPE "OsmSyncStatus" AS ENUM ('RUNNING','SUCCESS','PARTIAL','FAILED');
EXCEPTION WHEN duplicate_object THEN NULL; END $$;
CREATE TABLE IF NOT EXISTS "OsmSyncRun" (
"id" TEXT PRIMARY KEY,
"layer" TEXT NOT NULL,
"category" TEXT,
"chunk" TEXT,
"startedAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"finishedAt" TIMESTAMP(3),
"status" "OsmSyncStatus" NOT NULL DEFAULT 'RUNNING',
"rowsAdded" INTEGER NOT NULL DEFAULT 0,
"rowsUpdated" INTEGER NOT NULL DEFAULT 0,
"rowsSkipped" INTEGER NOT NULL DEFAULT 0,
"rowsLocked" INTEGER NOT NULL DEFAULT 0,
"errorMessage" TEXT,
"overpassQueryHash" TEXT,
"metadata" JSONB
);
CREATE INDEX IF NOT EXISTS "OsmSyncRun_layer_started" ON "OsmSyncRun"("layer","startedAt");
CREATE INDEX IF NOT EXISTS "OsmSyncRun_status_idx" ON "OsmSyncRun"("status");
CREATE INDEX IF NOT EXISTS "OsmSyncRun_started_idx" ON "OsmSyncRun"("startedAt");

View File

@@ -1574,15 +1574,31 @@ model SystemSetting {
// [GOO-21]
model VnProvince {
code String @id // GSO province code, zero-padded (e.g. "01", "79")
name String // Canonical Vietnamese name, e.g. "Thành phố Hồ Chí Minh"
nameEn String?
type String // "Thành phố Trung ương" | "Tỉnh"
codename String // slug, e.g. "thanh_pho_ho_chi_minh"
phoneCode Int?
districts VnDistrict[]
code String @id // GSO province code, zero-padded (e.g. "01", "79")
name String // Canonical Vietnamese name, e.g. "Thành phố Hồ Chí Minh"
nameEn String?
type String // "Thành phố Trung ương" | "Tỉnh"
codename String // slug, e.g. "thanh_pho_ho_chi_minh"
phoneCode Int?
/// OSM relation id for `boundary=administrative + admin_level=4`. Null until first sync.
osmId BigInt? @unique
/// PostGIS multipolygon (managed via raw SQL — Prisma can't model PostGIS).
geometry Unsupported("geometry(MultiPolygon, 4326)")?
/// Cached centroid for fast "show on map" without ST_Centroid every query.
centroid Unsupported("geometry(Point, 4326)")?
/// Surface area in km². Useful for density / coverage analytics.
areaKm2 Float?
/// Latest GSO population estimate when known.
population Int?
/// When the row was last refreshed from Overpass.
lastSyncedAt DateTime?
updatedAt DateTime @updatedAt
districts VnDistrict[]
@@index([codename])
@@index([geometry], type: Gist)
@@index([centroid], type: Gist)
@@index([lastSyncedAt])
@@map("vn_provinces")
}
@@ -1593,11 +1609,21 @@ model VnDistrict {
nameEn String?
type String // "Quận" | "Huyện" | "Thị xã" | "Thành phố thuộc tỉnh"
codename String
osmId BigInt? @unique
geometry Unsupported("geometry(MultiPolygon, 4326)")?
centroid Unsupported("geometry(Point, 4326)")?
areaKm2 Float?
population Int?
lastSyncedAt DateTime?
updatedAt DateTime @updatedAt
province VnProvince @relation(fields: [provinceCode], references: [code], onDelete: Restrict)
wards VnWard[]
@@index([provinceCode])
@@index([codename])
@@index([geometry], type: Gist)
@@index([centroid], type: Gist)
@@index([lastSyncedAt])
@@map("vn_districts")
}
@@ -1608,15 +1634,166 @@ model VnWard {
nameEn String?
type String // "Phường" | "Xã" | "Thị trấn"
codename String
osmId BigInt? @unique
geometry Unsupported("geometry(MultiPolygon, 4326)")?
centroid Unsupported("geometry(Point, 4326)")?
areaKm2 Float?
population Int?
lastSyncedAt DateTime?
updatedAt DateTime @updatedAt
district VnDistrict @relation(fields: [districtCode], references: [code], onDelete: Restrict)
@@index([districtCode])
@@index([codename])
@@index([geometry], type: Gist)
@@index([centroid], type: Gist)
@@index([lastSyncedAt])
@@map("vn_wards")
}
/// Historical name/code changes so legacy data (e.g. Quận 2, Quận 9) and post-2025
/// merges can still resolve to the current district/ward.
/// Categories of OSM POI we ingest. Each maps to one or more Overpass
/// tag queries — see `scripts/sync-osm-poi.ts`. Adding a new value here
/// requires a Prisma migration.
enum PoiCategory {
// Education
SCHOOL_PRIMARY
SCHOOL_SECONDARY
UNIVERSITY
// Health
HOSPITAL
CLINIC
PHARMACY
// Commerce
MARKET
SUPERMARKET
MALL
CONVENIENCE
// Finance
BANK
ATM
// Recreation
PARK
// Services
GAS_STATION
POLICE
POST_OFFICE
// Transport (also tracked here for proximity scoring; lines live in TransportLine)
METRO_STATION
RAILWAY_STATION
BUS_STATION
AIRPORT
}
enum OsmType {
NODE
WAY
RELATION
}
enum OsmDataSource {
OSM
OSM_PROMOTED
MANUAL
}
/// Catalog of points-of-interest sourced primarily from OSM. Backs the
/// "tiện ích xung quanh" feature on listing detail + KCN + project
/// proximity scoring + the search "within X meters" filters.
model Poi {
id String @id @default(cuid())
category PoiCategory
name String
nameEn String?
/// PostGIS Point — managed via raw SQL because Prisma can't model
/// `geometry`. GIST-indexed for fast nearby-radius queries.
location Unsupported("geometry(Point, 4326)")
address String?
/// Resolved by `GeoLookupService` after insert (not part of OSM data).
provinceCode String?
districtCode String?
wardCode String?
/// OSM provenance — same model as IndustrialPark.
osmId BigInt @unique
osmType OsmType
osmTags Json
dataSource OsmDataSource @default(OSM)
isPublic Boolean @default(true)
osmLocked Boolean @default(false)
lockedFields String[] @default([])
lastSyncedAt DateTime
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@index([location], type: Gist)
@@index([category, provinceCode])
@@index([category, districtCode])
@@index([provinceCode])
@@index([dataSource, isPublic])
@@index([lastSyncedAt])
@@map("Poi")
}
/// Transport lines (metro / railway / highway routes) — the linear
/// counterpart to Poi station entries. Used to compute "distance to
/// nearest metro line" without joining 100k station pings.
model TransportLine {
id String @id @default(cuid())
type String // METRO | RAILWAY | TRUNK | MOTORWAY | PRIMARY
name String // "Metro Số 1 Bến Thành - Suối Tiên" / "QL1A"
ref String? // "M1", "QL1A"
geometry Unsupported("geometry(MultiLineString, 4326)")
osmRelationId BigInt? @unique
status String @default("operational") // planned | under_construction | operational
lengthKm Float?
lastSyncedAt DateTime
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@index([geometry], type: Gist)
@@index([type])
@@index([status])
@@map("TransportLine")
}
enum OsmSyncStatus {
RUNNING
SUCCESS
PARTIAL
FAILED
}
/// Audit + monitoring record for every OSM sync run (admin boundaries,
/// POI categories, transport, KCN, etc.). Drives the `/admin/osm`
/// dashboard and Prometheus alerts.
model OsmSyncRun {
id String @id @default(cuid())
/// Coarse layer name: "admin-boundaries" / "poi" / "transport" / "industrial-parks"
layer String
/// Fine-grained scope inside the layer, when applicable.
category String?
chunk String?
startedAt DateTime @default(now())
finishedAt DateTime?
status OsmSyncStatus @default(RUNNING)
rowsAdded Int @default(0)
rowsUpdated Int @default(0)
rowsSkipped Int @default(0)
rowsLocked Int @default(0)
/// Truncated message for UI display; full stack lives in Loki.
errorMessage String? @db.Text
/// SHA-256 of the Overpass query so we can detect query drift.
overpassQueryHash String?
/// Free-form metadata (Overpass response size, kubectl run id, etc.).
metadata Json?
@@index([layer, startedAt])
@@index([status])
@@index([startedAt])
@@map("OsmSyncRun")
}
model VnAdministrativeAlias {
id String @id @default(cuid())
oldCode String? // GSO code pre-change, when known