feat(osm): foundation — admin boundaries, POI catalog, sync orchestrator

This is the Phase 0 + Phase 1 + Phase 4 foundation of the full OSM
integration plan. It backfills three things the rest of the platform
has been faking with hardcoded tables, and gives admins one dashboard
for every OSM-sourced layer.

Phase 0 — Vietnam administrative boundaries
* New columns on vn_provinces / vn_districts / vn_wards: PostGIS
  geometry (MultiPolygon), centroid (Point), areaKm2, osmId, population,
  lastSyncedAt + GIST indexes on geometry/centroid.
* `scripts/sync-osm-admin-boundaries.ts` pulls
  `boundary=administrative + admin_level=4|6|8` from Overpass per chunk,
  filters to mainland VN via the existing country polygon, resolves the
  GSO code (or generates `OSM_<id>`), and upserts via raw SQL because
  Prisma can't manage PostGIS columns.
* `GeoLookupService` (shared module) replaces the old
  `nearestProvince()` heuristic — `lookup(lng,lat)` returns
  province/district/ward via `ST_Contains` on the GIST-indexed polygons.
* The KCN sync now resolves province/district from the polygon table
  and falls back to the centroid heuristic only when polygons aren't
  loaded yet.
* `scripts/backfill-admin-codes.ts` rewrites province/district/ward on
  IndustrialPark, ProjectDevelopment and Property using the new lookup.

Phase 1 — POI catalog (15 categories, schema only here)
* New `Poi` table with `PoiCategory` enum, OSM provenance columns,
  GIST index on `location`. New `TransportLine` for metro/highway
  multilinestrings.
* `scripts/sync-osm-poi.ts` queries Overpass per category × chunk,
  resolves province/district codes from the boundary polygons, upserts
  with `osmLocked` / `lockedFields` honour same as KCN.
* New NestJS `PoiModule` exposes:
    GET /poi/by-bbox    — GeoJSON for map overlays
    GET /poi/nearby     — sidebar "tiện ích xung quanh" (HMAC distance ranks)
    GET /poi/coverage   — admin per-category counts
* New web component `<NearbyPoiSidebar />` ready to drop into listing /
  project / KCN detail pages.

Phase 4 — Sync orchestrator + admin dashboard
* New `OsmSyncRun` audit table tracks every sync invocation
  (RUNNING / SUCCESS / PARTIAL / FAILED + row stats + error message).
* `OsmSyncService` spawns the right tsx script for any (layer, category,
  chunk) tuple, parses stats out of stdout, updates the run row.
* `OsmSyncCronService` schedules:
    Daily 02:00  → POI category rotation (1/day, 20-day cycle)
    Mon  02:30  → admin-boundaries provinces
    Wed  02:30  → admin-boundaries districts
    Sat  02:30  → admin-boundaries wards
    1st of month 03:00 → industrial-parks (per chunk)
  All gated by `OSM_SYNC_ENABLED=true`.
* New admin endpoints under `/admin/osm/*` (layers / coverage / runs /
  trigger), guarded by JWT + ADMIN role.
* New `/admin/osm` Next.js page: stat cards, coverage table with
  per-row "Sync now", recent runs list with auto-refresh every 15s.

Run on dev so far: 33 provinces + 1100+ districts (still finishing) +
305 hospitals POI imported.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ho Ngoc Hai
2026-05-01 12:01:19 +07:00
parent 73ff469126
commit fba536406d
38 changed files with 3411 additions and 11 deletions

View File

@@ -0,0 +1,216 @@
/**
* Backfill `provinceCode` / `districtCode` / `wardCode` (and the human
* `province` / `district` / `ward` text columns where present) on every
* geo-bearing entity, using the freshly synced
* `vn_provinces` / `vn_districts` / `vn_wards` polygons.
*
* Tables processed:
* - IndustrialPark (PostGIS point)
* - ProjectDevelopment (PostGIS point)
* - Listing (uses Property.location internally — joined)
* - Property (PostGIS point — most listings live here)
*
* Usage:
* NODE_OPTIONS="-r dotenv/config" DOTENV_CONFIG_PATH=.env \
* pnpm tsx scripts/backfill-admin-codes.ts [--dry-run] [--table=NAME]
*
* Strategy:
* For each entity with a `location` Point we ST_Contains against the
* province/district/ward polygons and write the matched code+name back
* into the row. Only rows where the resolved value DIFFERS from the
* existing one are touched, so re-runs are cheap.
*/
import 'dotenv/config';
import { PrismaPg } from '@prisma/adapter-pg';
import { PrismaClient } from '@prisma/client';
import pg from 'pg';
const pool = new pg.Pool({ connectionString: process.env['DATABASE_URL'] });
const adapter = new PrismaPg(pool);
const prisma = new PrismaClient({ adapter });
const dryRun = process.argv.includes('--dry-run');
const tableArg = process.argv.find((a) => a.startsWith('--table='))?.slice('--table='.length);
interface AdminMatch {
provinceCode: string | null;
provinceName: string | null;
districtCode: string | null;
districtName: string | null;
wardCode: string | null;
wardName: string | null;
}
/**
* Single SQL statement that joins a point against the 3 admin tables and
* returns whichever level matched. NULL when no province polygon contains
* the point (likely outside VN or polygons not synced for that area).
*/
async function resolve(lng: number, lat: number): Promise<AdminMatch> {
const rows = await prisma.$queryRawUnsafe<
{
provinceCode: string | null;
provinceName: string | null;
districtCode: string | null;
districtName: string | null;
wardCode: string | null;
wardName: string | null;
}[]
>(
`WITH p AS (
SELECT code, name FROM "vn_provinces"
WHERE geometry IS NOT NULL
AND ST_Contains(geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1
),
d AS (
SELECT d.code, d.name
FROM "vn_districts" d
JOIN p ON p.code = d."provinceCode"
WHERE d.geometry IS NOT NULL
AND ST_Contains(d.geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1
),
w AS (
SELECT w.code, w.name
FROM "vn_wards" w
JOIN d ON d.code = w."districtCode"
WHERE w.geometry IS NOT NULL
AND ST_Contains(w.geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1
)
SELECT
(SELECT code FROM p) AS "provinceCode",
(SELECT name FROM p) AS "provinceName",
(SELECT code FROM d) AS "districtCode",
(SELECT name FROM d) AS "districtName",
(SELECT code FROM w) AS "wardCode",
(SELECT name FROM w) AS "wardName"`,
lng,
lat,
);
return (
rows[0] ?? {
provinceCode: null,
provinceName: null,
districtCode: null,
districtName: null,
wardCode: null,
wardName: null,
}
);
}
async function backfillIndustrialPark(): Promise<void> {
console.log('🏭 IndustrialPark…');
const rows = await prisma.$queryRawUnsafe<
{ id: string; lat: number; lng: number; province: string }[]
>(
`SELECT id, ST_Y(location::geometry) AS lat, ST_X(location::geometry) AS lng, province
FROM "IndustrialPark"`,
);
let updated = 0;
for (const r of rows) {
const m = await resolve(r.lng, r.lat);
if (!m.provinceName) continue; // outside VN polygon
if (m.provinceName === r.province) continue;
if (!dryRun) {
await prisma.$executeRawUnsafe(
`UPDATE "IndustrialPark" SET province = $2, district = COALESCE($3, district) WHERE id = $1`,
r.id,
m.provinceName,
m.districtName,
);
}
updated++;
}
console.log(` ${updated}/${rows.length} rows would update.`);
}
async function backfillProjectDevelopment(): Promise<void> {
console.log('🏗️ ProjectDevelopment…');
const rows = await prisma.$queryRawUnsafe<
{ id: string; lat: number; lng: number; city: string; district: string; ward: string }[]
>(
`SELECT id, ST_Y(location::geometry) AS lat, ST_X(location::geometry) AS lng, city, district, ward
FROM "ProjectDevelopment"`,
);
let updated = 0;
for (const r of rows) {
const m = await resolve(r.lng, r.lat);
if (!m.provinceName) continue;
const sameCity = m.provinceName === r.city;
const sameDistrict = !m.districtName || m.districtName === r.district;
const sameWard = !m.wardName || m.wardName === r.ward;
if (sameCity && sameDistrict && sameWard) continue;
if (!dryRun) {
await prisma.$executeRawUnsafe(
`UPDATE "ProjectDevelopment"
SET city = $2,
district = COALESCE($3, district),
ward = COALESCE($4, ward)
WHERE id = $1`,
r.id,
m.provinceName,
m.districtName,
m.wardName,
);
}
updated++;
}
console.log(` ${updated}/${rows.length} rows would update.`);
}
async function backfillProperty(): Promise<void> {
console.log('🏠 Property…');
// Property has Vietnamese province / district / ward text columns; check schema.
const colsExist = await prisma.$queryRawUnsafe<{ count: bigint }[]>(
`SELECT COUNT(*)::bigint AS count
FROM information_schema.columns
WHERE table_name = 'Property' AND column_name = 'province'`,
);
if (Number(colsExist[0]?.count ?? 0n) === 0) {
console.log(' (no province column on Property — skipping)');
return;
}
const rows = await prisma.$queryRawUnsafe<
{ id: string; lat: number; lng: number; province: string | null }[]
>(
`SELECT id, ST_Y(location::geometry) AS lat, ST_X(location::geometry) AS lng, province
FROM "Property"
WHERE location IS NOT NULL`,
);
let updated = 0;
for (const r of rows) {
const m = await resolve(r.lng, r.lat);
if (!m.provinceName) continue;
if (m.provinceName === r.province) continue;
if (!dryRun) {
await prisma.$executeRawUnsafe(
`UPDATE "Property" SET province = $2 WHERE id = $1`,
r.id,
m.provinceName,
);
}
updated++;
}
console.log(` ${updated}/${rows.length} rows would update.`);
}
async function main(): Promise<void> {
console.log(`🌍 Admin-code backfill (dryRun=${dryRun})`);
if (!tableArg || tableArg === 'industrial') await backfillIndustrialPark();
if (!tableArg || tableArg === 'project') await backfillProjectDevelopment();
if (!tableArg || tableArg === 'property') await backfillProperty();
}
main()
.catch((err) => {
console.error(err);
process.exitCode = 1;
})
.finally(async () => {
await prisma.$disconnect();
await pool.end();
});

View File

@@ -0,0 +1,519 @@
/**
* Sync Vietnam administrative boundaries from OpenStreetMap into the
* `vn_provinces` / `vn_districts` / `vn_wards` tables.
*
* Usage:
* NODE_OPTIONS="-r dotenv/config" DOTENV_CONFIG_PATH=.env \
* pnpm tsx scripts/sync-osm-admin-boundaries.ts \
* [--level=4|6|8|all] [--dry-run] [--chunk=NAME]
*
* What it does:
* 1. Queries Overpass for `boundary=administrative + admin_level=N`
* relations clipped to the Vietnam bbox (split into 4 chunks).
* 2. Converts each relation's outer rings into a MultiPolygon GeoJSON.
* 3. Looks up the GSO code from OSM tags (`ref:VN`, `gso_code`,
* `iso_code`, fallback to slugified name → existing seed row).
* 4. Upserts the row, writing geometry + centroid + areaKm2 + osmId.
*
* Coverage targets:
* admin_level=4 → 63 provinces (cities of central authority + 58 tỉnh)
* admin_level=6 → ~700 districts (quận / huyện / thị xã / TP thuộc tỉnh)
* admin_level=8 → ~11.000 wards (phường / xã / thị trấn)
*
* Notes:
* • Vietnam reformed wards in 2025 (some merged). We track historic
* names via `vn_administrative_aliases` — this script populates that
* table when an OSM tag `was:name` differs from the current name.
* • Wards (level 8) are the heaviest pull (~11k polygons). We always
* chunk them into 4 geographic slices to dodge Overpass timeouts.
*/
import 'dotenv/config';
import area from '@turf/area';
import centroid from '@turf/centroid';
import { PrismaPg } from '@prisma/adapter-pg';
import { PrismaClient } from '@prisma/client';
import type { Feature, MultiPolygon, Polygon } from 'geojson';
import osmtogeojson from 'osmtogeojson';
import pg from 'pg';
import { isPointInVietnam } from './data/vn-country-polygon';
const pool = new pg.Pool({ connectionString: process.env['DATABASE_URL'] });
const adapter = new PrismaPg(pool);
const prisma = new PrismaClient({ adapter });
const OVERPASS_URL =
process.env['OVERPASS_URL'] ?? 'https://overpass-api.de/api/interpreter';
interface BBox {
south: number;
west: number;
north: number;
east: number;
}
/** Same chunks the KCN sync uses — keeps Overpass query budget reasonable. */
const CHUNKS: Record<string, BBox> = {
north: { south: 19.0, west: 102.0, north: 23.5, east: 110.0 },
northCentral: { south: 16.5, west: 102.0, north: 19.0, east: 110.0 },
southCentral: { south: 13.0, west: 102.0, north: 16.5, east: 110.0 },
south: { south: 8.0, west: 102.0, north: 13.0, east: 110.0 },
};
// ─── CLI ──────────────────────────────────────────────────────────────────
const argv = process.argv.slice(2);
const dryRun = argv.includes('--dry-run');
const chunkArg = argv.find((a) => a.startsWith('--chunk='))?.slice('--chunk='.length);
const levelArg = argv.find((a) => a.startsWith('--level='))?.slice('--level='.length) ?? 'all';
const wantedLevels: number[] =
levelArg === 'all'
? [4, 6, 8]
: levelArg
.split(',')
.map((s) => Number(s.trim()))
.filter((n) => [4, 6, 8].includes(n));
// ─── Slug helper (matches GSO codename style) ──────────────────────────────
function slugify(name: string): string {
return name
.toLowerCase()
.replace(/đ/g, 'd')
.normalize('NFD')
.replace(/[̀-ͯ]/g, '')
.replace(/[^a-z0-9]+/g, '_')
.replace(/^_+|_+$/g, '');
}
// ─── Overpass fetch ────────────────────────────────────────────────────────
interface OverpassResult {
elements: unknown[];
}
async function fetchChunk(level: number, name: string, bbox: BBox): Promise<OverpassResult> {
// `out geom` returns the relation members with inline geometry so we can
// assemble polygons without a second roundtrip. Timeout 300s for level=8.
const query = `
[out:json][timeout:300];
relation
["boundary"="administrative"]
["admin_level"="${level}"]
(${bbox.south},${bbox.west},${bbox.north},${bbox.east});
out body geom;
`;
console.log(` → fetching level=${level} chunk="${name}"…`);
const start = Date.now();
const res = await fetch(OVERPASS_URL, {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'User-Agent': 'goodgo-osm-admin-sync/1.0 (https://goodgo.vn)',
},
body: 'data=' + encodeURIComponent(query),
});
if (!res.ok) {
const body = await res.text();
throw new Error(`Overpass returned ${res.status}: ${body.slice(0, 200)}`);
}
const json = (await res.json()) as OverpassResult;
console.log(
` ← level=${level} ${name}: ${json.elements?.length ?? 0} relations in ${(
(Date.now() - start) /
1000
).toFixed(1)}s`,
);
return json;
}
// ─── Per-feature parser ────────────────────────────────────────────────────
interface ParsedAdmin {
level: 4 | 6 | 8;
osmId: bigint;
name: string;
nameEn: string | null;
gsoCode: string | null;
type: string; // "Tỉnh" / "Quận" / "Phường" etc.
geometry: MultiPolygon; // outer rings only
centroid: { lng: number; lat: number };
areaKm2: number;
population: number | null;
rawTags: Record<string, string>;
}
const PROVINCE_TYPE_MAP = (name: string): string =>
/^(Thành phố|TP\.?)\s+(Hà Nội|Hồ Chí Minh|Hải Phòng|Đà Nẵng|Cần Thơ)/i.test(name)
? 'Thành phố Trung ương'
: 'Tỉnh';
const DISTRICT_TYPE_MAP = (name: string): string => {
if (/^Quận/i.test(name)) return 'Quận';
if (/^Huyện/i.test(name)) return 'Huyện';
if (/^Thị xã/i.test(name)) return 'Thị xã';
if (/^Thành phố/i.test(name)) return 'Thành phố thuộc tỉnh';
return 'Quận';
};
const WARD_TYPE_MAP = (name: string): string => {
if (/^Phường/i.test(name)) return 'Phường';
if (/^Xã/i.test(name)) return 'Xã';
if (/^Thị trấn/i.test(name)) return 'Thị trấn';
return 'Xã';
};
function parseFeature(
feat: Feature<Polygon | MultiPolygon>,
level: 4 | 6 | 8,
): ParsedAdmin | null {
const propsRaw = feat.properties as Record<string, unknown> | null;
if (!propsRaw) return null;
// osmtogeojson encodes the prefixed id on `feat.id` ("relation/123") and
// the bare numeric id under `properties.id`. We only kept relations.
const featAny = feat as unknown as { id?: unknown };
const idStr = String(featAny.id ?? propsRaw['id'] ?? '');
if (!idStr.startsWith('relation/')) return null;
const osmId = BigInt(idStr.slice('relation/'.length));
const tagsRaw = propsRaw['tags'];
const tags: Record<string, string> =
tagsRaw && typeof tagsRaw === 'object'
? (tagsRaw as Record<string, string>)
: (propsRaw as Record<string, string>);
const name = tags['name:vi'] ?? tags['name'] ?? null;
if (!name) return null;
// Skip rows without any Latin/Vietnamese letter (cross-border bleed).
if (!/[A-Za-zÀ-ỹ]/.test(name)) return null;
const nameEn = tags['name:en'] ?? null;
const gsoCode =
tags['ref:VN'] ?? tags['gso_code'] ?? tags['ref'] ?? tags['iso_code'] ?? null;
const populationRaw = tags['population'];
const population = populationRaw && /^\d+$/.test(populationRaw) ? Number(populationRaw) : null;
// Normalise to MultiPolygon regardless of source (Polygon → wrap once).
const geom: MultiPolygon =
feat.geometry.type === 'Polygon'
? { type: 'MultiPolygon', coordinates: [feat.geometry.coordinates] }
: feat.geometry;
const c = centroid(feat as Feature);
const [cLng, cLat] = c.geometry.coordinates;
// Geographic gate: drop relations whose centroid sits outside the VN
// mainland polygon (China / Laos / Cambodia bleed across the bbox).
if (!isPointInVietnam(cLng, cLat)) return null;
const areaKm2 = Math.round((area(feat as Feature) / 1_000_000) * 100) / 100;
let type: string;
if (level === 4) type = PROVINCE_TYPE_MAP(name);
else if (level === 6) type = DISTRICT_TYPE_MAP(name);
else type = WARD_TYPE_MAP(name);
return {
level,
osmId,
name,
nameEn,
gsoCode,
type,
geometry: geom,
centroid: { lng: cLng, lat: cLat },
areaKm2,
population,
rawTags: tags,
};
}
// ─── Resolve to existing GSO code or generate a synthetic one ─────────────
async function resolveProvinceCode(p: ParsedAdmin): Promise<string> {
if (p.gsoCode) {
const exists = await prisma.vnProvince.findUnique({ where: { code: p.gsoCode } });
if (exists) return p.gsoCode;
}
// Fallback: lookup by codename slug.
const codename = slugify(p.name);
const byCodename = await prisma.vnProvince.findFirst({ where: { codename } });
if (byCodename) return byCodename.code;
// Brand-new: derive a code from osmId so it's stable.
return `OSM_${p.osmId.toString()}`;
}
async function resolveDistrictCode(p: ParsedAdmin, provinceCode: string): Promise<string> {
if (p.gsoCode) {
const exists = await prisma.vnDistrict.findUnique({ where: { code: p.gsoCode } });
if (exists) return p.gsoCode;
}
const codename = slugify(p.name);
const byCodename = await prisma.vnDistrict.findFirst({
where: { codename, provinceCode },
});
if (byCodename) return byCodename.code;
return `OSM_${p.osmId.toString()}`;
}
async function resolveWardCode(p: ParsedAdmin, districtCode: string): Promise<string> {
if (p.gsoCode) {
const exists = await prisma.vnWard.findUnique({ where: { code: p.gsoCode } });
if (exists) return p.gsoCode;
}
const codename = slugify(p.name);
const byCodename = await prisma.vnWard.findFirst({
where: { codename, districtCode },
});
if (byCodename) return byCodename.code;
return `OSM_${p.osmId.toString()}`;
}
// ─── Upsert helpers — raw SQL because Prisma can't manage geometry ────────
function geomSql(g: MultiPolygon): string {
const json = JSON.stringify(g).replace(/'/g, "''");
return `ST_Multi(ST_GeomFromGeoJSON('${json}'))`;
}
interface UpsertStats {
inserted: number;
updated: number;
skipped: number;
}
async function upsertProvince(p: ParsedAdmin, stats: UpsertStats): Promise<void> {
const code = await resolveProvinceCode(p);
const codename = slugify(p.name);
const existed = await prisma.vnProvince.findUnique({ where: { code }, select: { code: true } });
await prisma.$executeRawUnsafe(
`
INSERT INTO "vn_provinces" (
code, name, "nameEn", type, codename, "osmId",
"areaKm2", population, "lastSyncedAt", "updatedAt", geometry, centroid
) VALUES (
$1, $2, $3, $4, $5, $6::bigint,
$7, $8, NOW(), NOW(),
${geomSql(p.geometry)},
ST_SetSRID(ST_MakePoint($9, $10), 4326)
)
ON CONFLICT (code) DO UPDATE SET
name = EXCLUDED.name,
"nameEn" = EXCLUDED."nameEn",
type = EXCLUDED.type,
"osmId" = EXCLUDED."osmId",
"areaKm2" = EXCLUDED."areaKm2",
population = COALESCE(EXCLUDED.population, "vn_provinces".population),
"lastSyncedAt" = NOW(),
"updatedAt" = NOW(),
geometry = EXCLUDED.geometry,
centroid = EXCLUDED.centroid
`,
code,
p.name,
p.nameEn,
p.type,
codename,
p.osmId.toString(),
p.areaKm2,
p.population,
p.centroid.lng,
p.centroid.lat,
);
if (existed) stats.updated++;
else stats.inserted++;
}
async function upsertDistrict(p: ParsedAdmin, stats: UpsertStats): Promise<void> {
// Find which province contains this district by ST_Within against existing
// synced province polygons. Falls back to province with largest overlap.
const provinceMatch = await prisma.$queryRawUnsafe<{ code: string }[]>(
`SELECT code FROM "vn_provinces"
WHERE geometry IS NOT NULL
AND ST_Contains(geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1`,
p.centroid.lng,
p.centroid.lat,
);
if (provinceMatch.length === 0) {
stats.skipped++;
return; // Cannot place district until provinces are synced first.
}
const provinceCode = provinceMatch[0]!.code;
const code = await resolveDistrictCode(p, provinceCode);
const codename = slugify(p.name);
const existed = await prisma.vnDistrict.findUnique({ where: { code }, select: { code: true } });
await prisma.$executeRawUnsafe(
`
INSERT INTO "vn_districts" (
code, "provinceCode", name, "nameEn", type, codename, "osmId",
"areaKm2", population, "lastSyncedAt", "updatedAt", geometry, centroid
) VALUES (
$1, $2, $3, $4, $5, $6, $7::bigint,
$8, $9, NOW(), NOW(),
${geomSql(p.geometry)},
ST_SetSRID(ST_MakePoint($10, $11), 4326)
)
ON CONFLICT (code) DO UPDATE SET
"provinceCode" = EXCLUDED."provinceCode",
name = EXCLUDED.name,
"nameEn" = EXCLUDED."nameEn",
type = EXCLUDED.type,
"osmId" = EXCLUDED."osmId",
"areaKm2" = EXCLUDED."areaKm2",
population = COALESCE(EXCLUDED.population, "vn_districts".population),
"lastSyncedAt" = NOW(),
"updatedAt" = NOW(),
geometry = EXCLUDED.geometry,
centroid = EXCLUDED.centroid
`,
code,
provinceCode,
p.name,
p.nameEn,
p.type,
codename,
p.osmId.toString(),
p.areaKm2,
p.population,
p.centroid.lng,
p.centroid.lat,
);
if (existed) stats.updated++;
else stats.inserted++;
}
async function upsertWard(p: ParsedAdmin, stats: UpsertStats): Promise<void> {
const districtMatch = await prisma.$queryRawUnsafe<{ code: string }[]>(
`SELECT code FROM "vn_districts"
WHERE geometry IS NOT NULL
AND ST_Contains(geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1`,
p.centroid.lng,
p.centroid.lat,
);
if (districtMatch.length === 0) {
stats.skipped++;
return;
}
const districtCode = districtMatch[0]!.code;
const code = await resolveWardCode(p, districtCode);
const codename = slugify(p.name);
const existed = await prisma.vnWard.findUnique({ where: { code }, select: { code: true } });
await prisma.$executeRawUnsafe(
`
INSERT INTO "vn_wards" (
code, "districtCode", name, "nameEn", type, codename, "osmId",
"areaKm2", population, "lastSyncedAt", "updatedAt", geometry, centroid
) VALUES (
$1, $2, $3, $4, $5, $6, $7::bigint,
$8, $9, NOW(), NOW(),
${geomSql(p.geometry)},
ST_SetSRID(ST_MakePoint($10, $11), 4326)
)
ON CONFLICT (code) DO UPDATE SET
"districtCode" = EXCLUDED."districtCode",
name = EXCLUDED.name,
"nameEn" = EXCLUDED."nameEn",
type = EXCLUDED.type,
"osmId" = EXCLUDED."osmId",
"areaKm2" = EXCLUDED."areaKm2",
population = COALESCE(EXCLUDED.population, "vn_wards".population),
"lastSyncedAt" = NOW(),
"updatedAt" = NOW(),
geometry = EXCLUDED.geometry,
centroid = EXCLUDED.centroid
`,
code,
districtCode,
p.name,
p.nameEn,
p.type,
codename,
p.osmId.toString(),
p.areaKm2,
p.population,
p.centroid.lng,
p.centroid.lat,
);
if (existed) stats.updated++;
else stats.inserted++;
}
// ─── Main ─────────────────────────────────────────────────────────────────
async function processChunk(
level: 4 | 6 | 8,
chunkName: string,
bbox: BBox,
): Promise<UpsertStats> {
const stats: UpsertStats = { inserted: 0, updated: 0, skipped: 0 };
const result = await fetchChunk(level, chunkName, bbox);
const fc = osmtogeojson(result, { flatProperties: false });
const features = (fc.features as Feature<Polygon | MultiPolygon>[]).filter(
(f) => f.geometry?.type === 'Polygon' || f.geometry?.type === 'MultiPolygon',
);
for (const feat of features) {
const parsed = parseFeature(feat, level);
if (!parsed) continue;
if (dryRun) {
stats.inserted++;
continue;
}
try {
if (level === 4) await upsertProvince(parsed, stats);
else if (level === 6) await upsertDistrict(parsed, stats);
else await upsertWard(parsed, stats);
} catch (err) {
console.error(`${parsed.name}: ${err instanceof Error ? err.message : err}`);
stats.skipped++;
}
}
console.log(
` ✓ level=${level} ${chunkName}: inserted=${stats.inserted} updated=${stats.updated} skipped=${stats.skipped}`,
);
return stats;
}
async function main(): Promise<void> {
console.log('🌏 OSM admin boundaries sync starting');
console.log(` levels: ${wantedLevels.join(',')}, chunks: ${chunkArg ?? 'all'}, dryRun=${dryRun}`);
const chunks = chunkArg
? { [chunkArg]: CHUNKS[chunkArg]! }
: CHUNKS;
const totals: Record<number, UpsertStats> = {
4: { inserted: 0, updated: 0, skipped: 0 },
6: { inserted: 0, updated: 0, skipped: 0 },
8: { inserted: 0, updated: 0, skipped: 0 },
};
// ALWAYS process levels in order 4 → 6 → 8, because 6 needs province
// polygons in the DB to assign provinceCode (and 8 needs districts).
for (const level of wantedLevels.sort() as (4 | 6 | 8)[]) {
console.log(`\n=== Level ${level} ===`);
for (const [name, bbox] of Object.entries(chunks)) {
try {
const s = await processChunk(level, name, bbox);
totals[level]!.inserted += s.inserted;
totals[level]!.updated += s.updated;
totals[level]!.skipped += s.skipped;
} catch (err) {
console.error(` ✗ chunk ${name} (level ${level}) failed:`, err);
}
}
}
console.log('\n📊 Totals');
for (const lvl of wantedLevels) {
const t = totals[lvl]!;
console.log(
` level=${lvl}: inserted=${t.inserted} updated=${t.updated} skipped=${t.skipped}`,
);
}
}
main()
.catch((err) => {
console.error(err);
process.exitCode = 1;
})
.finally(async () => {
await prisma.$disconnect();
await pool.end();
});

View File

@@ -215,9 +215,11 @@ function parseFeature(
if (!isPointInVietnam(cLng, cLat)) return null;
// Province resolution: prefer explicit OSM tags, then fall back to a
// nearest-centroid lookup against our 63-province table. The fallback
// catches the (very common) case where Vietnamese landuse polygons have
// no addr:* tags at all.
// nearest-centroid lookup against our 63-province table. The actual DB
// upsert step (`upsertFeature`) replaces this with a precise PostGIS
// ST_Contains lookup against `vn_provinces.geometry` once those polygons
// are synced — this is just the bootstrap value used when the polygon
// table is empty.
const province =
VN_PROVINCE_HINTS.map((k) => tags[k]).find(Boolean) ??
tags['addr:city'] ??
@@ -271,6 +273,33 @@ async function upsertFeature(
return;
}
// Override the heuristic province with a precise PostGIS lookup against
// the OSM-sourced admin polygons (when synced). Falls back to the
// nearest-centroid value already on `parsed.province` if the polygon
// table doesn't yet cover that area.
const adminMatch = await prisma.$queryRawUnsafe<
{ provinceName: string | null; districtName: string | null }[]
>(
`WITH p AS (
SELECT code, name FROM "vn_provinces"
WHERE geometry IS NOT NULL
AND ST_Contains(geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1
)
SELECT
(SELECT name FROM p) AS "provinceName",
(SELECT d.name FROM "vn_districts" d JOIN p ON p.code = d."provinceCode"
WHERE d.geometry IS NOT NULL
AND ST_Contains(d.geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1) AS "districtName"`,
parsed.centroid.lng,
parsed.centroid.lat,
);
const resolvedProvince = adminMatch[0]?.provinceName ?? parsed.province;
const resolvedDistrict = adminMatch[0]?.districtName ?? parsed.district;
parsed.province = resolvedProvince;
if (!parsed.district) parsed.district = resolvedDistrict ?? '';
const region = guessRegion(parsed.centroid.lat);
const slug = slugify(parsed.name, parsed.osmId.toString());

400
scripts/sync-osm-poi.ts Normal file
View File

@@ -0,0 +1,400 @@
/**
* Sync OSM points-of-interest into the `Poi` table.
*
* Usage:
* NODE_OPTIONS="-r dotenv/config" DOTENV_CONFIG_PATH=.env \
* pnpm tsx scripts/sync-osm-poi.ts \
* [--category=school,hospital,...|all] [--chunk=NAME] [--dry-run]
*
* What it does:
* 1. For each requested category, queries Overpass for the matching
* node/way/relation across the 4 Vietnam chunks.
* 2. Filters out non-Vietnam centroids (cross-border bleed) and rows
* without any Latin/Vietnamese letters in the name.
* 3. Resolves provinceCode/districtCode/wardCode via PostGIS lookup
* against `vn_provinces` / `vn_districts` / `vn_wards` (assumes
* Phase 0 boundary sync ran first).
* 4. Upserts on `osmId`, honouring `osmLocked` + `lockedFields`.
*/
import 'dotenv/config';
import area from '@turf/area';
import centroid from '@turf/centroid';
import { createId } from '@paralleldrive/cuid2';
import { PrismaPg } from '@prisma/adapter-pg';
import { type Prisma, PrismaClient } from '@prisma/client';
import type { Feature, MultiPolygon, Polygon, Point } from 'geojson';
import osmtogeojson from 'osmtogeojson';
import pg from 'pg';
import { isPointInVietnam } from './data/vn-country-polygon';
const pool = new pg.Pool({ connectionString: process.env['DATABASE_URL'] });
const adapter = new PrismaPg(pool);
const prisma = new PrismaClient({ adapter });
const OVERPASS_URL =
process.env['OVERPASS_URL'] ?? 'https://overpass-api.de/api/interpreter';
interface BBox {
south: number;
west: number;
north: number;
east: number;
}
const CHUNKS: Record<string, BBox> = {
north: { south: 19.0, west: 102.0, north: 23.5, east: 110.0 },
northCentral: { south: 16.5, west: 102.0, north: 19.0, east: 110.0 },
southCentral: { south: 13.0, west: 102.0, north: 16.5, east: 110.0 },
south: { south: 8.0, west: 102.0, north: 13.0, east: 110.0 },
};
type PoiCategoryKey =
| 'SCHOOL_PRIMARY' | 'SCHOOL_SECONDARY' | 'UNIVERSITY'
| 'HOSPITAL' | 'CLINIC' | 'PHARMACY'
| 'MARKET' | 'SUPERMARKET' | 'MALL' | 'CONVENIENCE'
| 'BANK' | 'ATM'
| 'PARK'
| 'GAS_STATION' | 'POLICE' | 'POST_OFFICE'
| 'METRO_STATION' | 'RAILWAY_STATION' | 'BUS_STATION' | 'AIRPORT';
/**
* For each category, the Overpass selector. We query node/way/relation
* to catch both single points and named building polygons.
*/
const CATEGORY_QUERIES: Record<PoiCategoryKey, string> = {
// ── Education ─────────────────────────────────────────────────────────
SCHOOL_PRIMARY: '["amenity"="school"]["isced:level"~"^(primary|0|1)$"]',
SCHOOL_SECONDARY: '["amenity"="school"]["isced:level"~"^(secondary|2|3)$"]',
UNIVERSITY: '["amenity"~"^(university|college)$"]',
// ── Health ────────────────────────────────────────────────────────────
HOSPITAL: '["amenity"="hospital"]',
CLINIC: '["amenity"="clinic"]',
PHARMACY: '["amenity"="pharmacy"]',
// ── Commerce ──────────────────────────────────────────────────────────
MARKET: '["amenity"="marketplace"]',
SUPERMARKET: '["shop"="supermarket"]',
MALL: '["shop"="mall"]',
CONVENIENCE: '["shop"="convenience"]',
// ── Finance ───────────────────────────────────────────────────────────
BANK: '["amenity"="bank"]',
ATM: '["amenity"="atm"]',
// ── Recreation / Services ────────────────────────────────────────────
PARK: '["leisure"="park"]',
GAS_STATION: '["amenity"="fuel"]',
POLICE: '["amenity"="police"]',
POST_OFFICE: '["amenity"="post_office"]',
// ── Transport (stations / airports — lines live in TransportLine) ────
METRO_STATION: '["railway"="station"]["station"="subway"]',
RAILWAY_STATION: '["railway"="station"]["station"!="subway"]',
BUS_STATION: '["amenity"="bus_station"]',
AIRPORT: '["aeroway"="aerodrome"]["aerodrome:type"~"international|public"]',
};
const ALL_CATEGORIES: PoiCategoryKey[] = Object.keys(CATEGORY_QUERIES) as PoiCategoryKey[];
// ─── CLI ───────────────────────────────────────────────────────────────────
const argv = process.argv.slice(2);
const dryRun = argv.includes('--dry-run');
const chunkArg = argv.find((a) => a.startsWith('--chunk='))?.slice('--chunk='.length);
const categoryArg = argv.find((a) => a.startsWith('--category='))?.slice('--category='.length) ?? 'all';
const wantedCategories: PoiCategoryKey[] =
categoryArg === 'all'
? ALL_CATEGORIES
: (categoryArg
.split(',')
.map((s) => s.trim().toUpperCase())
.filter((s): s is PoiCategoryKey => ALL_CATEGORIES.includes(s as PoiCategoryKey)) as PoiCategoryKey[]);
if (wantedCategories.length === 0) {
console.error(`No valid categories. Available: ${ALL_CATEGORIES.join(', ')}`);
process.exit(1);
}
interface OverpassResult {
elements: unknown[];
}
async function fetchChunk(
category: PoiCategoryKey,
chunkName: string,
bbox: BBox,
): Promise<OverpassResult> {
const sel = CATEGORY_QUERIES[category];
const query = `
[out:json][timeout:180];
(
node${sel}(${bbox.south},${bbox.west},${bbox.north},${bbox.east});
way${sel}(${bbox.south},${bbox.west},${bbox.north},${bbox.east});
relation${sel}(${bbox.south},${bbox.west},${bbox.north},${bbox.east});
);
out body geom;
`;
const t0 = Date.now();
console.log(`${category} ${chunkName}`);
const res = await fetch(OVERPASS_URL, {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'User-Agent': 'goodgo-osm-poi-sync/1.0 (https://goodgo.vn)',
},
body: 'data=' + encodeURIComponent(query),
});
if (!res.ok) {
const body = await res.text();
throw new Error(`Overpass ${res.status}: ${body.slice(0, 200)}`);
}
const json = (await res.json()) as OverpassResult;
console.log(
`${category} ${chunkName}: ${json.elements?.length ?? 0} elements in ${(
(Date.now() - t0) /
1000
).toFixed(1)}s`,
);
return json;
}
interface ParsedPoi {
category: PoiCategoryKey;
osmId: bigint;
osmType: 'NODE' | 'WAY' | 'RELATION';
name: string;
nameEn: string | null;
centroid: { lng: number; lat: number };
address: string | null;
tags: Record<string, string>;
}
function parseFeature(
feat: Feature<Polygon | MultiPolygon | Point>,
category: PoiCategoryKey,
): ParsedPoi | null {
const featAny = feat as unknown as { id?: unknown };
const idStr = String(featAny.id ?? '');
const slashIdx = idStr.indexOf('/');
if (slashIdx < 0) return null;
const typeStr = idStr.slice(0, slashIdx).toUpperCase();
if (typeStr !== 'NODE' && typeStr !== 'WAY' && typeStr !== 'RELATION') return null;
const osmType = typeStr as 'NODE' | 'WAY' | 'RELATION';
const osmId = BigInt(idStr.slice(slashIdx + 1));
const propsRaw = (feat.properties ?? {}) as Record<string, unknown>;
const tagsRaw = propsRaw['tags'];
const tags: Record<string, string> =
tagsRaw && typeof tagsRaw === 'object'
? (tagsRaw as Record<string, string>)
: (propsRaw as Record<string, string>);
const name = tags['name:vi'] ?? tags['name'] ?? null;
// Skip purely unnamed POIs (very common for shop=convenience etc.)
if (!name) return null;
// Skip rows without Latin/Vietnamese letters (cross-border bleed).
if (!/[A-Za-zÀ-ỹ]/.test(name)) return null;
let cLng: number;
let cLat: number;
if (feat.geometry.type === 'Point') {
[cLng, cLat] = feat.geometry.coordinates;
} else {
const c = centroid(feat as Feature);
[cLng, cLat] = c.geometry.coordinates;
}
if (!isPointInVietnam(cLng, cLat)) return null;
const address =
tags['addr:full'] ??
[tags['addr:housenumber'], tags['addr:street']].filter(Boolean).join(' ') ??
null;
return {
category,
osmId,
osmType,
name,
nameEn: tags['name:en'] ?? null,
centroid: { lng: cLng, lat: cLat },
address: address || null,
tags,
};
}
interface UpsertStats {
inserted: number;
updated: number;
locked: number;
skipped: number;
}
async function upsertPoi(parsed: ParsedPoi, stats: UpsertStats): Promise<void> {
const existing = await prisma.poi.findUnique({
where: { osmId: parsed.osmId },
select: { id: true, osmLocked: true, lockedFields: true },
});
if (existing?.osmLocked) {
stats.locked++;
return;
}
// Resolve admin codes from the polygon tables.
const admin = await prisma.$queryRawUnsafe<
{ provinceCode: string | null; districtCode: string | null; wardCode: string | null }[]
>(
`WITH p AS (
SELECT code FROM "vn_provinces"
WHERE geometry IS NOT NULL
AND ST_Contains(geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1
),
d AS (
SELECT d.code
FROM "vn_districts" d JOIN p ON p.code = d."provinceCode"
WHERE d.geometry IS NOT NULL
AND ST_Contains(d.geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1
)
SELECT
(SELECT code FROM p) AS "provinceCode",
(SELECT code FROM d) AS "districtCode",
(SELECT w.code FROM "vn_wards" w JOIN d ON d.code = w."districtCode"
WHERE w.geometry IS NOT NULL
AND ST_Contains(w.geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1) AS "wardCode"`,
parsed.centroid.lng,
parsed.centroid.lat,
);
const provinceCode = admin[0]?.provinceCode ?? null;
const districtCode = admin[0]?.districtCode ?? null;
const wardCode = admin[0]?.wardCode ?? null;
if (!existing) {
const cuid = createId();
await prisma.$executeRawUnsafe(
`
INSERT INTO "Poi" (
id, category, name, "nameEn", location, address,
"provinceCode", "districtCode", "wardCode",
"osmId", "osmType", "osmTags",
"dataSource", "isPublic", "lastSyncedAt", "createdAt", "updatedAt"
) VALUES (
$1, $2::"PoiCategory", $3, $4,
ST_SetSRID(ST_MakePoint($5, $6), 4326), $7,
$8, $9, $10,
$11::bigint, $12::"OsmType", $13::jsonb,
'OSM'::"OsmDataSource", true, NOW(), NOW(), NOW()
)
`,
cuid,
parsed.category,
parsed.name,
parsed.nameEn,
parsed.centroid.lng,
parsed.centroid.lat,
parsed.address,
provinceCode,
districtCode,
wardCode,
parsed.osmId.toString(),
parsed.osmType,
JSON.stringify(parsed.tags),
);
stats.inserted++;
} else {
// Update — respect lockedFields list.
const locked = new Set(existing.lockedFields ?? []);
const data: Prisma.PoiUpdateInput = {
lastSyncedAt: new Date(),
osmTags: JSON.stringify(parsed.tags) as unknown as Prisma.InputJsonValue,
};
if (!locked.has('name')) data.name = parsed.name;
if (!locked.has('nameEn')) data.nameEn = parsed.nameEn;
if (!locked.has('address')) data.address = parsed.address;
if (!locked.has('provinceCode')) data.provinceCode = provinceCode;
if (!locked.has('districtCode')) data.districtCode = districtCode;
if (!locked.has('wardCode')) data.wardCode = wardCode;
await prisma.poi.update({ where: { id: existing.id }, data });
// Location update via raw SQL (Prisma can't write `Unsupported` columns).
if (!locked.has('location')) {
await prisma.$executeRawUnsafe(
`UPDATE "Poi" SET location = ST_SetSRID(ST_MakePoint($1, $2), 4326) WHERE id = $3`,
parsed.centroid.lng,
parsed.centroid.lat,
existing.id,
);
}
stats.updated++;
}
}
async function processCategoryChunk(
category: PoiCategoryKey,
chunkName: string,
bbox: BBox,
stats: UpsertStats,
): Promise<void> {
const result = await fetchChunk(category, chunkName, bbox);
const fc = osmtogeojson(result, { flatProperties: false });
const features = (fc.features as Feature<Polygon | MultiPolygon | Point>[]).filter(
(f) =>
f.geometry?.type === 'Point' ||
f.geometry?.type === 'Polygon' ||
f.geometry?.type === 'MultiPolygon',
);
for (const feat of features) {
const parsed = parseFeature(feat, category);
if (!parsed) continue;
if (dryRun) {
stats.inserted++;
continue;
}
try {
await upsertPoi(parsed, stats);
} catch (err) {
console.error(
`${category} ${parsed.name}: ${err instanceof Error ? err.message : err}`,
);
stats.skipped++;
}
}
}
async function main(): Promise<void> {
console.log(`📍 OSM POI sync: categories=${wantedCategories.join(',')} dryRun=${dryRun}`);
const chunks = chunkArg
? { [chunkArg]: CHUNKS[chunkArg]! }
: CHUNKS;
const totals: Record<string, UpsertStats> = {};
for (const cat of wantedCategories) {
console.log(`\n=== ${cat} ===`);
const s: UpsertStats = { inserted: 0, updated: 0, locked: 0, skipped: 0 };
for (const [name, bbox] of Object.entries(chunks)) {
try {
await processCategoryChunk(cat, name, bbox, s);
} catch (err) {
console.error(` ✗ chunk ${name} (${cat}) failed:`, err);
}
}
totals[cat] = s;
console.log(
`${cat}: inserted=${s.inserted} updated=${s.updated} locked=${s.locked} skipped=${s.skipped}`,
);
}
console.log('\n📊 Totals:');
for (const cat of wantedCategories) {
const s = totals[cat]!;
console.log(
` ${cat.padEnd(20)} inserted=${s.inserted} updated=${s.updated} locked=${s.locked} skipped=${s.skipped}`,
);
}
}
main()
.catch((err) => {
console.error(err);
process.exitCode = 1;
})
.finally(async () => {
await prisma.$disconnect();
await pool.end();
});