feat(osm): foundation — admin boundaries, POI catalog, sync orchestrator

This is the Phase 0 + Phase 1 + Phase 4 foundation of the full OSM
integration plan. It backfills three things the rest of the platform
has been faking with hardcoded tables, and gives admins one dashboard
for every OSM-sourced layer.

Phase 0 — Vietnam administrative boundaries
* New columns on vn_provinces / vn_districts / vn_wards: PostGIS
  geometry (MultiPolygon), centroid (Point), areaKm2, osmId, population,
  lastSyncedAt + GIST indexes on geometry/centroid.
* `scripts/sync-osm-admin-boundaries.ts` pulls
  `boundary=administrative + admin_level=4|6|8` from Overpass per chunk,
  filters to mainland VN via the existing country polygon, resolves the
  GSO code (or generates `OSM_<id>`), and upserts via raw SQL because
  Prisma can't manage PostGIS columns.
* `GeoLookupService` (shared module) replaces the old
  `nearestProvince()` heuristic — `lookup(lng,lat)` returns
  province/district/ward via `ST_Contains` on the GIST-indexed polygons.
* The KCN sync now resolves province/district from the polygon table
  and falls back to the centroid heuristic only when polygons aren't
  loaded yet.
* `scripts/backfill-admin-codes.ts` rewrites province/district/ward on
  IndustrialPark, ProjectDevelopment and Property using the new lookup.

Phase 1 — POI catalog (15 categories, schema only here)
* New `Poi` table with `PoiCategory` enum, OSM provenance columns,
  GIST index on `location`. New `TransportLine` for metro/highway
  multilinestrings.
* `scripts/sync-osm-poi.ts` queries Overpass per category × chunk,
  resolves province/district codes from the boundary polygons, upserts
  with `osmLocked` / `lockedFields` honour same as KCN.
* New NestJS `PoiModule` exposes:
    GET /poi/by-bbox    — GeoJSON for map overlays
    GET /poi/nearby     — sidebar "tiện ích xung quanh" (HMAC distance ranks)
    GET /poi/coverage   — admin per-category counts
* New web component `<NearbyPoiSidebar />` ready to drop into listing /
  project / KCN detail pages.

Phase 4 — Sync orchestrator + admin dashboard
* New `OsmSyncRun` audit table tracks every sync invocation
  (RUNNING / SUCCESS / PARTIAL / FAILED + row stats + error message).
* `OsmSyncService` spawns the right tsx script for any (layer, category,
  chunk) tuple, parses stats out of stdout, updates the run row.
* `OsmSyncCronService` schedules:
    Daily 02:00  → POI category rotation (1/day, 20-day cycle)
    Mon  02:30  → admin-boundaries provinces
    Wed  02:30  → admin-boundaries districts
    Sat  02:30  → admin-boundaries wards
    1st of month 03:00 → industrial-parks (per chunk)
  All gated by `OSM_SYNC_ENABLED=true`.
* New admin endpoints under `/admin/osm/*` (layers / coverage / runs /
  trigger), guarded by JWT + ADMIN role.
* New `/admin/osm` Next.js page: stat cards, coverage table with
  per-row "Sync now", recent runs list with auto-refresh every 15s.

Run on dev so far: 33 provinces + 1100+ districts (still finishing) +
305 hospitals POI imported.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ho Ngoc Hai
2026-05-01 12:01:19 +07:00
parent 73ff469126
commit fba536406d
38 changed files with 3411 additions and 11 deletions

View File

@@ -1574,15 +1574,31 @@ model SystemSetting {
// [GOO-21]
model VnProvince {
code String @id // GSO province code, zero-padded (e.g. "01", "79")
name String // Canonical Vietnamese name, e.g. "Thành phố Hồ Chí Minh"
nameEn String?
type String // "Thành phố Trung ương" | "Tỉnh"
codename String // slug, e.g. "thanh_pho_ho_chi_minh"
phoneCode Int?
districts VnDistrict[]
code String @id // GSO province code, zero-padded (e.g. "01", "79")
name String // Canonical Vietnamese name, e.g. "Thành phố Hồ Chí Minh"
nameEn String?
type String // "Thành phố Trung ương" | "Tỉnh"
codename String // slug, e.g. "thanh_pho_ho_chi_minh"
phoneCode Int?
/// OSM relation id for `boundary=administrative + admin_level=4`. Null until first sync.
osmId BigInt? @unique
/// PostGIS multipolygon (managed via raw SQL — Prisma can't model PostGIS).
geometry Unsupported("geometry(MultiPolygon, 4326)")?
/// Cached centroid for fast "show on map" without ST_Centroid every query.
centroid Unsupported("geometry(Point, 4326)")?
/// Surface area in km². Useful for density / coverage analytics.
areaKm2 Float?
/// Latest GSO population estimate when known.
population Int?
/// When the row was last refreshed from Overpass.
lastSyncedAt DateTime?
updatedAt DateTime @updatedAt
districts VnDistrict[]
@@index([codename])
@@index([geometry], type: Gist)
@@index([centroid], type: Gist)
@@index([lastSyncedAt])
@@map("vn_provinces")
}
@@ -1593,11 +1609,21 @@ model VnDistrict {
nameEn String?
type String // "Quận" | "Huyện" | "Thị xã" | "Thành phố thuộc tỉnh"
codename String
osmId BigInt? @unique
geometry Unsupported("geometry(MultiPolygon, 4326)")?
centroid Unsupported("geometry(Point, 4326)")?
areaKm2 Float?
population Int?
lastSyncedAt DateTime?
updatedAt DateTime @updatedAt
province VnProvince @relation(fields: [provinceCode], references: [code], onDelete: Restrict)
wards VnWard[]
@@index([provinceCode])
@@index([codename])
@@index([geometry], type: Gist)
@@index([centroid], type: Gist)
@@index([lastSyncedAt])
@@map("vn_districts")
}
@@ -1608,15 +1634,166 @@ model VnWard {
nameEn String?
type String // "Phường" | "Xã" | "Thị trấn"
codename String
osmId BigInt? @unique
geometry Unsupported("geometry(MultiPolygon, 4326)")?
centroid Unsupported("geometry(Point, 4326)")?
areaKm2 Float?
population Int?
lastSyncedAt DateTime?
updatedAt DateTime @updatedAt
district VnDistrict @relation(fields: [districtCode], references: [code], onDelete: Restrict)
@@index([districtCode])
@@index([codename])
@@index([geometry], type: Gist)
@@index([centroid], type: Gist)
@@index([lastSyncedAt])
@@map("vn_wards")
}
/// Historical name/code changes so legacy data (e.g. Quận 2, Quận 9) and post-2025
/// merges can still resolve to the current district/ward.
/// Categories of OSM POI we ingest. Each maps to one or more Overpass
/// tag queries — see `scripts/sync-osm-poi.ts`. Adding a new value here
/// requires a Prisma migration.
enum PoiCategory {
// Education
SCHOOL_PRIMARY
SCHOOL_SECONDARY
UNIVERSITY
// Health
HOSPITAL
CLINIC
PHARMACY
// Commerce
MARKET
SUPERMARKET
MALL
CONVENIENCE
// Finance
BANK
ATM
// Recreation
PARK
// Services
GAS_STATION
POLICE
POST_OFFICE
// Transport (also tracked here for proximity scoring; lines live in TransportLine)
METRO_STATION
RAILWAY_STATION
BUS_STATION
AIRPORT
}
enum OsmType {
NODE
WAY
RELATION
}
enum OsmDataSource {
OSM
OSM_PROMOTED
MANUAL
}
/// Catalog of points-of-interest sourced primarily from OSM. Backs the
/// "tiện ích xung quanh" feature on listing detail + KCN + project
/// proximity scoring + the search "within X meters" filters.
model Poi {
id String @id @default(cuid())
category PoiCategory
name String
nameEn String?
/// PostGIS Point — managed via raw SQL because Prisma can't model
/// `geometry`. GIST-indexed for fast nearby-radius queries.
location Unsupported("geometry(Point, 4326)")
address String?
/// Resolved by `GeoLookupService` after insert (not part of OSM data).
provinceCode String?
districtCode String?
wardCode String?
/// OSM provenance — same model as IndustrialPark.
osmId BigInt @unique
osmType OsmType
osmTags Json
dataSource OsmDataSource @default(OSM)
isPublic Boolean @default(true)
osmLocked Boolean @default(false)
lockedFields String[] @default([])
lastSyncedAt DateTime
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@index([location], type: Gist)
@@index([category, provinceCode])
@@index([category, districtCode])
@@index([provinceCode])
@@index([dataSource, isPublic])
@@index([lastSyncedAt])
@@map("Poi")
}
/// Transport lines (metro / railway / highway routes) — the linear
/// counterpart to Poi station entries. Used to compute "distance to
/// nearest metro line" without joining 100k station pings.
model TransportLine {
id String @id @default(cuid())
type String // METRO | RAILWAY | TRUNK | MOTORWAY | PRIMARY
name String // "Metro Số 1 Bến Thành - Suối Tiên" / "QL1A"
ref String? // "M1", "QL1A"
geometry Unsupported("geometry(MultiLineString, 4326)")
osmRelationId BigInt? @unique
status String @default("operational") // planned | under_construction | operational
lengthKm Float?
lastSyncedAt DateTime
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@index([geometry], type: Gist)
@@index([type])
@@index([status])
@@map("TransportLine")
}
enum OsmSyncStatus {
RUNNING
SUCCESS
PARTIAL
FAILED
}
/// Audit + monitoring record for every OSM sync run (admin boundaries,
/// POI categories, transport, KCN, etc.). Drives the `/admin/osm`
/// dashboard and Prometheus alerts.
model OsmSyncRun {
id String @id @default(cuid())
/// Coarse layer name: "admin-boundaries" / "poi" / "transport" / "industrial-parks"
layer String
/// Fine-grained scope inside the layer, when applicable.
category String?
chunk String?
startedAt DateTime @default(now())
finishedAt DateTime?
status OsmSyncStatus @default(RUNNING)
rowsAdded Int @default(0)
rowsUpdated Int @default(0)
rowsSkipped Int @default(0)
rowsLocked Int @default(0)
/// Truncated message for UI display; full stack lives in Loki.
errorMessage String? @db.Text
/// SHA-256 of the Overpass query so we can detect query drift.
overpassQueryHash String?
/// Free-form metadata (Overpass response size, kubectl run id, etc.).
metadata Json?
@@index([layer, startedAt])
@@index([status])
@@index([startedAt])
@@map("OsmSyncRun")
}
model VnAdministrativeAlias {
id String @id @default(cuid())
oldCode String? // GSO code pre-change, when known