feat(osm): foundation — admin boundaries, POI catalog, sync orchestrator
This is the Phase 0 + Phase 1 + Phase 4 foundation of the full OSM
integration plan. It backfills three things the rest of the platform
has been faking with hardcoded tables, and gives admins one dashboard
for every OSM-sourced layer.
Phase 0 — Vietnam administrative boundaries
* New columns on vn_provinces / vn_districts / vn_wards: PostGIS
geometry (MultiPolygon), centroid (Point), areaKm2, osmId, population,
lastSyncedAt + GIST indexes on geometry/centroid.
* `scripts/sync-osm-admin-boundaries.ts` pulls
`boundary=administrative + admin_level=4|6|8` from Overpass per chunk,
filters to mainland VN via the existing country polygon, resolves the
GSO code (or generates `OSM_<id>`), and upserts via raw SQL because
Prisma can't manage PostGIS columns.
* `GeoLookupService` (shared module) replaces the old
`nearestProvince()` heuristic — `lookup(lng,lat)` returns
province/district/ward via `ST_Contains` on the GIST-indexed polygons.
* The KCN sync now resolves province/district from the polygon table
and falls back to the centroid heuristic only when polygons aren't
loaded yet.
* `scripts/backfill-admin-codes.ts` rewrites province/district/ward on
IndustrialPark, ProjectDevelopment and Property using the new lookup.
Phase 1 — POI catalog (15 categories, schema only here)
* New `Poi` table with `PoiCategory` enum, OSM provenance columns,
GIST index on `location`. New `TransportLine` for metro/highway
multilinestrings.
* `scripts/sync-osm-poi.ts` queries Overpass per category × chunk,
resolves province/district codes from the boundary polygons, upserts
with `osmLocked` / `lockedFields` honour same as KCN.
* New NestJS `PoiModule` exposes:
GET /poi/by-bbox — GeoJSON for map overlays
GET /poi/nearby — sidebar "tiện ích xung quanh" (HMAC distance ranks)
GET /poi/coverage — admin per-category counts
* New web component `<NearbyPoiSidebar />` ready to drop into listing /
project / KCN detail pages.
Phase 4 — Sync orchestrator + admin dashboard
* New `OsmSyncRun` audit table tracks every sync invocation
(RUNNING / SUCCESS / PARTIAL / FAILED + row stats + error message).
* `OsmSyncService` spawns the right tsx script for any (layer, category,
chunk) tuple, parses stats out of stdout, updates the run row.
* `OsmSyncCronService` schedules:
Daily 02:00 → POI category rotation (1/day, 20-day cycle)
Mon 02:30 → admin-boundaries provinces
Wed 02:30 → admin-boundaries districts
Sat 02:30 → admin-boundaries wards
1st of month 03:00 → industrial-parks (per chunk)
All gated by `OSM_SYNC_ENABLED=true`.
* New admin endpoints under `/admin/osm/*` (layers / coverage / runs /
trigger), guarded by JWT + ADMIN role.
* New `/admin/osm` Next.js page: stat cards, coverage table with
per-row "Sync now", recent runs list with auto-refresh every 15s.
Run on dev so far: 33 provinces + 1100+ districts (still finishing) +
305 hospitals POI imported.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
400
scripts/sync-osm-poi.ts
Normal file
400
scripts/sync-osm-poi.ts
Normal file
@@ -0,0 +1,400 @@
|
||||
/**
|
||||
* Sync OSM points-of-interest into the `Poi` table.
|
||||
*
|
||||
* Usage:
|
||||
* NODE_OPTIONS="-r dotenv/config" DOTENV_CONFIG_PATH=.env \
|
||||
* pnpm tsx scripts/sync-osm-poi.ts \
|
||||
* [--category=school,hospital,...|all] [--chunk=NAME] [--dry-run]
|
||||
*
|
||||
* What it does:
|
||||
* 1. For each requested category, queries Overpass for the matching
|
||||
* node/way/relation across the 4 Vietnam chunks.
|
||||
* 2. Filters out non-Vietnam centroids (cross-border bleed) and rows
|
||||
* without any Latin/Vietnamese letters in the name.
|
||||
* 3. Resolves provinceCode/districtCode/wardCode via PostGIS lookup
|
||||
* against `vn_provinces` / `vn_districts` / `vn_wards` (assumes
|
||||
* Phase 0 boundary sync ran first).
|
||||
* 4. Upserts on `osmId`, honouring `osmLocked` + `lockedFields`.
|
||||
*/
|
||||
import 'dotenv/config';
|
||||
import area from '@turf/area';
|
||||
import centroid from '@turf/centroid';
|
||||
import { createId } from '@paralleldrive/cuid2';
|
||||
import { PrismaPg } from '@prisma/adapter-pg';
|
||||
import { type Prisma, PrismaClient } from '@prisma/client';
|
||||
import type { Feature, MultiPolygon, Polygon, Point } from 'geojson';
|
||||
import osmtogeojson from 'osmtogeojson';
|
||||
import pg from 'pg';
|
||||
import { isPointInVietnam } from './data/vn-country-polygon';
|
||||
|
||||
const pool = new pg.Pool({ connectionString: process.env['DATABASE_URL'] });
|
||||
const adapter = new PrismaPg(pool);
|
||||
const prisma = new PrismaClient({ adapter });
|
||||
|
||||
const OVERPASS_URL =
|
||||
process.env['OVERPASS_URL'] ?? 'https://overpass-api.de/api/interpreter';
|
||||
|
||||
interface BBox {
|
||||
south: number;
|
||||
west: number;
|
||||
north: number;
|
||||
east: number;
|
||||
}
|
||||
|
||||
const CHUNKS: Record<string, BBox> = {
|
||||
north: { south: 19.0, west: 102.0, north: 23.5, east: 110.0 },
|
||||
northCentral: { south: 16.5, west: 102.0, north: 19.0, east: 110.0 },
|
||||
southCentral: { south: 13.0, west: 102.0, north: 16.5, east: 110.0 },
|
||||
south: { south: 8.0, west: 102.0, north: 13.0, east: 110.0 },
|
||||
};
|
||||
|
||||
type PoiCategoryKey =
|
||||
| 'SCHOOL_PRIMARY' | 'SCHOOL_SECONDARY' | 'UNIVERSITY'
|
||||
| 'HOSPITAL' | 'CLINIC' | 'PHARMACY'
|
||||
| 'MARKET' | 'SUPERMARKET' | 'MALL' | 'CONVENIENCE'
|
||||
| 'BANK' | 'ATM'
|
||||
| 'PARK'
|
||||
| 'GAS_STATION' | 'POLICE' | 'POST_OFFICE'
|
||||
| 'METRO_STATION' | 'RAILWAY_STATION' | 'BUS_STATION' | 'AIRPORT';
|
||||
|
||||
/**
|
||||
* For each category, the Overpass selector. We query node/way/relation
|
||||
* to catch both single points and named building polygons.
|
||||
*/
|
||||
const CATEGORY_QUERIES: Record<PoiCategoryKey, string> = {
|
||||
// ── Education ─────────────────────────────────────────────────────────
|
||||
SCHOOL_PRIMARY: '["amenity"="school"]["isced:level"~"^(primary|0|1)$"]',
|
||||
SCHOOL_SECONDARY: '["amenity"="school"]["isced:level"~"^(secondary|2|3)$"]',
|
||||
UNIVERSITY: '["amenity"~"^(university|college)$"]',
|
||||
// ── Health ────────────────────────────────────────────────────────────
|
||||
HOSPITAL: '["amenity"="hospital"]',
|
||||
CLINIC: '["amenity"="clinic"]',
|
||||
PHARMACY: '["amenity"="pharmacy"]',
|
||||
// ── Commerce ──────────────────────────────────────────────────────────
|
||||
MARKET: '["amenity"="marketplace"]',
|
||||
SUPERMARKET: '["shop"="supermarket"]',
|
||||
MALL: '["shop"="mall"]',
|
||||
CONVENIENCE: '["shop"="convenience"]',
|
||||
// ── Finance ───────────────────────────────────────────────────────────
|
||||
BANK: '["amenity"="bank"]',
|
||||
ATM: '["amenity"="atm"]',
|
||||
// ── Recreation / Services ────────────────────────────────────────────
|
||||
PARK: '["leisure"="park"]',
|
||||
GAS_STATION: '["amenity"="fuel"]',
|
||||
POLICE: '["amenity"="police"]',
|
||||
POST_OFFICE: '["amenity"="post_office"]',
|
||||
// ── Transport (stations / airports — lines live in TransportLine) ────
|
||||
METRO_STATION: '["railway"="station"]["station"="subway"]',
|
||||
RAILWAY_STATION: '["railway"="station"]["station"!="subway"]',
|
||||
BUS_STATION: '["amenity"="bus_station"]',
|
||||
AIRPORT: '["aeroway"="aerodrome"]["aerodrome:type"~"international|public"]',
|
||||
};
|
||||
|
||||
const ALL_CATEGORIES: PoiCategoryKey[] = Object.keys(CATEGORY_QUERIES) as PoiCategoryKey[];
|
||||
|
||||
// ─── CLI ───────────────────────────────────────────────────────────────────
|
||||
const argv = process.argv.slice(2);
|
||||
const dryRun = argv.includes('--dry-run');
|
||||
const chunkArg = argv.find((a) => a.startsWith('--chunk='))?.slice('--chunk='.length);
|
||||
const categoryArg = argv.find((a) => a.startsWith('--category='))?.slice('--category='.length) ?? 'all';
|
||||
const wantedCategories: PoiCategoryKey[] =
|
||||
categoryArg === 'all'
|
||||
? ALL_CATEGORIES
|
||||
: (categoryArg
|
||||
.split(',')
|
||||
.map((s) => s.trim().toUpperCase())
|
||||
.filter((s): s is PoiCategoryKey => ALL_CATEGORIES.includes(s as PoiCategoryKey)) as PoiCategoryKey[]);
|
||||
|
||||
if (wantedCategories.length === 0) {
|
||||
console.error(`No valid categories. Available: ${ALL_CATEGORIES.join(', ')}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
interface OverpassResult {
|
||||
elements: unknown[];
|
||||
}
|
||||
|
||||
async function fetchChunk(
|
||||
category: PoiCategoryKey,
|
||||
chunkName: string,
|
||||
bbox: BBox,
|
||||
): Promise<OverpassResult> {
|
||||
const sel = CATEGORY_QUERIES[category];
|
||||
const query = `
|
||||
[out:json][timeout:180];
|
||||
(
|
||||
node${sel}(${bbox.south},${bbox.west},${bbox.north},${bbox.east});
|
||||
way${sel}(${bbox.south},${bbox.west},${bbox.north},${bbox.east});
|
||||
relation${sel}(${bbox.south},${bbox.west},${bbox.north},${bbox.east});
|
||||
);
|
||||
out body geom;
|
||||
`;
|
||||
const t0 = Date.now();
|
||||
console.log(` → ${category} ${chunkName}…`);
|
||||
const res = await fetch(OVERPASS_URL, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'User-Agent': 'goodgo-osm-poi-sync/1.0 (https://goodgo.vn)',
|
||||
},
|
||||
body: 'data=' + encodeURIComponent(query),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const body = await res.text();
|
||||
throw new Error(`Overpass ${res.status}: ${body.slice(0, 200)}`);
|
||||
}
|
||||
const json = (await res.json()) as OverpassResult;
|
||||
console.log(
|
||||
` ← ${category} ${chunkName}: ${json.elements?.length ?? 0} elements in ${(
|
||||
(Date.now() - t0) /
|
||||
1000
|
||||
).toFixed(1)}s`,
|
||||
);
|
||||
return json;
|
||||
}
|
||||
|
||||
interface ParsedPoi {
|
||||
category: PoiCategoryKey;
|
||||
osmId: bigint;
|
||||
osmType: 'NODE' | 'WAY' | 'RELATION';
|
||||
name: string;
|
||||
nameEn: string | null;
|
||||
centroid: { lng: number; lat: number };
|
||||
address: string | null;
|
||||
tags: Record<string, string>;
|
||||
}
|
||||
|
||||
function parseFeature(
|
||||
feat: Feature<Polygon | MultiPolygon | Point>,
|
||||
category: PoiCategoryKey,
|
||||
): ParsedPoi | null {
|
||||
const featAny = feat as unknown as { id?: unknown };
|
||||
const idStr = String(featAny.id ?? '');
|
||||
const slashIdx = idStr.indexOf('/');
|
||||
if (slashIdx < 0) return null;
|
||||
const typeStr = idStr.slice(0, slashIdx).toUpperCase();
|
||||
if (typeStr !== 'NODE' && typeStr !== 'WAY' && typeStr !== 'RELATION') return null;
|
||||
const osmType = typeStr as 'NODE' | 'WAY' | 'RELATION';
|
||||
const osmId = BigInt(idStr.slice(slashIdx + 1));
|
||||
|
||||
const propsRaw = (feat.properties ?? {}) as Record<string, unknown>;
|
||||
const tagsRaw = propsRaw['tags'];
|
||||
const tags: Record<string, string> =
|
||||
tagsRaw && typeof tagsRaw === 'object'
|
||||
? (tagsRaw as Record<string, string>)
|
||||
: (propsRaw as Record<string, string>);
|
||||
|
||||
const name = tags['name:vi'] ?? tags['name'] ?? null;
|
||||
// Skip purely unnamed POIs (very common for shop=convenience etc.)
|
||||
if (!name) return null;
|
||||
// Skip rows without Latin/Vietnamese letters (cross-border bleed).
|
||||
if (!/[A-Za-zÀ-ỹ]/.test(name)) return null;
|
||||
|
||||
let cLng: number;
|
||||
let cLat: number;
|
||||
if (feat.geometry.type === 'Point') {
|
||||
[cLng, cLat] = feat.geometry.coordinates;
|
||||
} else {
|
||||
const c = centroid(feat as Feature);
|
||||
[cLng, cLat] = c.geometry.coordinates;
|
||||
}
|
||||
if (!isPointInVietnam(cLng, cLat)) return null;
|
||||
|
||||
const address =
|
||||
tags['addr:full'] ??
|
||||
[tags['addr:housenumber'], tags['addr:street']].filter(Boolean).join(' ') ??
|
||||
null;
|
||||
|
||||
return {
|
||||
category,
|
||||
osmId,
|
||||
osmType,
|
||||
name,
|
||||
nameEn: tags['name:en'] ?? null,
|
||||
centroid: { lng: cLng, lat: cLat },
|
||||
address: address || null,
|
||||
tags,
|
||||
};
|
||||
}
|
||||
|
||||
interface UpsertStats {
|
||||
inserted: number;
|
||||
updated: number;
|
||||
locked: number;
|
||||
skipped: number;
|
||||
}
|
||||
|
||||
async function upsertPoi(parsed: ParsedPoi, stats: UpsertStats): Promise<void> {
|
||||
const existing = await prisma.poi.findUnique({
|
||||
where: { osmId: parsed.osmId },
|
||||
select: { id: true, osmLocked: true, lockedFields: true },
|
||||
});
|
||||
if (existing?.osmLocked) {
|
||||
stats.locked++;
|
||||
return;
|
||||
}
|
||||
|
||||
// Resolve admin codes from the polygon tables.
|
||||
const admin = await prisma.$queryRawUnsafe<
|
||||
{ provinceCode: string | null; districtCode: string | null; wardCode: string | null }[]
|
||||
>(
|
||||
`WITH p AS (
|
||||
SELECT code FROM "vn_provinces"
|
||||
WHERE geometry IS NOT NULL
|
||||
AND ST_Contains(geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
|
||||
LIMIT 1
|
||||
),
|
||||
d AS (
|
||||
SELECT d.code
|
||||
FROM "vn_districts" d JOIN p ON p.code = d."provinceCode"
|
||||
WHERE d.geometry IS NOT NULL
|
||||
AND ST_Contains(d.geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
|
||||
LIMIT 1
|
||||
)
|
||||
SELECT
|
||||
(SELECT code FROM p) AS "provinceCode",
|
||||
(SELECT code FROM d) AS "districtCode",
|
||||
(SELECT w.code FROM "vn_wards" w JOIN d ON d.code = w."districtCode"
|
||||
WHERE w.geometry IS NOT NULL
|
||||
AND ST_Contains(w.geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
|
||||
LIMIT 1) AS "wardCode"`,
|
||||
parsed.centroid.lng,
|
||||
parsed.centroid.lat,
|
||||
);
|
||||
const provinceCode = admin[0]?.provinceCode ?? null;
|
||||
const districtCode = admin[0]?.districtCode ?? null;
|
||||
const wardCode = admin[0]?.wardCode ?? null;
|
||||
|
||||
if (!existing) {
|
||||
const cuid = createId();
|
||||
await prisma.$executeRawUnsafe(
|
||||
`
|
||||
INSERT INTO "Poi" (
|
||||
id, category, name, "nameEn", location, address,
|
||||
"provinceCode", "districtCode", "wardCode",
|
||||
"osmId", "osmType", "osmTags",
|
||||
"dataSource", "isPublic", "lastSyncedAt", "createdAt", "updatedAt"
|
||||
) VALUES (
|
||||
$1, $2::"PoiCategory", $3, $4,
|
||||
ST_SetSRID(ST_MakePoint($5, $6), 4326), $7,
|
||||
$8, $9, $10,
|
||||
$11::bigint, $12::"OsmType", $13::jsonb,
|
||||
'OSM'::"OsmDataSource", true, NOW(), NOW(), NOW()
|
||||
)
|
||||
`,
|
||||
cuid,
|
||||
parsed.category,
|
||||
parsed.name,
|
||||
parsed.nameEn,
|
||||
parsed.centroid.lng,
|
||||
parsed.centroid.lat,
|
||||
parsed.address,
|
||||
provinceCode,
|
||||
districtCode,
|
||||
wardCode,
|
||||
parsed.osmId.toString(),
|
||||
parsed.osmType,
|
||||
JSON.stringify(parsed.tags),
|
||||
);
|
||||
stats.inserted++;
|
||||
} else {
|
||||
// Update — respect lockedFields list.
|
||||
const locked = new Set(existing.lockedFields ?? []);
|
||||
const data: Prisma.PoiUpdateInput = {
|
||||
lastSyncedAt: new Date(),
|
||||
osmTags: JSON.stringify(parsed.tags) as unknown as Prisma.InputJsonValue,
|
||||
};
|
||||
if (!locked.has('name')) data.name = parsed.name;
|
||||
if (!locked.has('nameEn')) data.nameEn = parsed.nameEn;
|
||||
if (!locked.has('address')) data.address = parsed.address;
|
||||
if (!locked.has('provinceCode')) data.provinceCode = provinceCode;
|
||||
if (!locked.has('districtCode')) data.districtCode = districtCode;
|
||||
if (!locked.has('wardCode')) data.wardCode = wardCode;
|
||||
await prisma.poi.update({ where: { id: existing.id }, data });
|
||||
// Location update via raw SQL (Prisma can't write `Unsupported` columns).
|
||||
if (!locked.has('location')) {
|
||||
await prisma.$executeRawUnsafe(
|
||||
`UPDATE "Poi" SET location = ST_SetSRID(ST_MakePoint($1, $2), 4326) WHERE id = $3`,
|
||||
parsed.centroid.lng,
|
||||
parsed.centroid.lat,
|
||||
existing.id,
|
||||
);
|
||||
}
|
||||
stats.updated++;
|
||||
}
|
||||
}
|
||||
|
||||
async function processCategoryChunk(
|
||||
category: PoiCategoryKey,
|
||||
chunkName: string,
|
||||
bbox: BBox,
|
||||
stats: UpsertStats,
|
||||
): Promise<void> {
|
||||
const result = await fetchChunk(category, chunkName, bbox);
|
||||
const fc = osmtogeojson(result, { flatProperties: false });
|
||||
const features = (fc.features as Feature<Polygon | MultiPolygon | Point>[]).filter(
|
||||
(f) =>
|
||||
f.geometry?.type === 'Point' ||
|
||||
f.geometry?.type === 'Polygon' ||
|
||||
f.geometry?.type === 'MultiPolygon',
|
||||
);
|
||||
for (const feat of features) {
|
||||
const parsed = parseFeature(feat, category);
|
||||
if (!parsed) continue;
|
||||
if (dryRun) {
|
||||
stats.inserted++;
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
await upsertPoi(parsed, stats);
|
||||
} catch (err) {
|
||||
console.error(
|
||||
` ✗ ${category} ${parsed.name}: ${err instanceof Error ? err.message : err}`,
|
||||
);
|
||||
stats.skipped++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
console.log(`📍 OSM POI sync: categories=${wantedCategories.join(',')} dryRun=${dryRun}`);
|
||||
|
||||
const chunks = chunkArg
|
||||
? { [chunkArg]: CHUNKS[chunkArg]! }
|
||||
: CHUNKS;
|
||||
|
||||
const totals: Record<string, UpsertStats> = {};
|
||||
for (const cat of wantedCategories) {
|
||||
console.log(`\n=== ${cat} ===`);
|
||||
const s: UpsertStats = { inserted: 0, updated: 0, locked: 0, skipped: 0 };
|
||||
for (const [name, bbox] of Object.entries(chunks)) {
|
||||
try {
|
||||
await processCategoryChunk(cat, name, bbox, s);
|
||||
} catch (err) {
|
||||
console.error(` ✗ chunk ${name} (${cat}) failed:`, err);
|
||||
}
|
||||
}
|
||||
totals[cat] = s;
|
||||
console.log(
|
||||
` ✓ ${cat}: inserted=${s.inserted} updated=${s.updated} locked=${s.locked} skipped=${s.skipped}`,
|
||||
);
|
||||
}
|
||||
|
||||
console.log('\n📊 Totals:');
|
||||
for (const cat of wantedCategories) {
|
||||
const s = totals[cat]!;
|
||||
console.log(
|
||||
` ${cat.padEnd(20)} inserted=${s.inserted} updated=${s.updated} locked=${s.locked} skipped=${s.skipped}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
.catch((err) => {
|
||||
console.error(err);
|
||||
process.exitCode = 1;
|
||||
})
|
||||
.finally(async () => {
|
||||
await prisma.$disconnect();
|
||||
await pool.end();
|
||||
});
|
||||
Reference in New Issue
Block a user