Files
goodgo-platform/scripts/sync-osm-poi.ts
2026-05-04 17:27:08 +07:00

406 lines
14 KiB
TypeScript

/**
* Sync OSM points-of-interest into the `Poi` table.
*
* Usage:
* NODE_OPTIONS="-r dotenv/config" DOTENV_CONFIG_PATH=.env \
* pnpm tsx scripts/sync-osm-poi.ts \
* [--category=school,hospital,...|all] [--chunk=NAME] [--dry-run]
*
* What it does:
* 1. For each requested category, queries Overpass for the matching
* node/way/relation across the 4 Vietnam chunks.
* 2. Filters out non-Vietnam centroids (cross-border bleed) and rows
* without any Latin/Vietnamese letters in the name.
* 3. Resolves provinceCode/districtCode/wardCode via PostGIS lookup
* against `vn_provinces` / `vn_districts` / `vn_wards` (assumes
* Phase 0 boundary sync ran first).
* 4. Upserts on `osmId`, honouring `osmLocked` + `lockedFields`.
*/
import 'dotenv/config';
import { createId } from '@paralleldrive/cuid2';
import { PrismaPg } from '@prisma/adapter-pg';
import { type Prisma, PrismaClient } from '@prisma/client';
import centroid from '@turf/centroid';
import type { Feature, MultiPolygon, Polygon, Point } from 'geojson';
import osmtogeojson from 'osmtogeojson';
import pg from 'pg';
import { isPointInVietnam } from './data/vn-country-polygon';
const pool = new pg.Pool({ connectionString: process.env['DATABASE_URL'] });
const adapter = new PrismaPg(pool);
const prisma = new PrismaClient({ adapter });
const OVERPASS_URL =
process.env['OVERPASS_URL'] ?? 'https://overpass-api.de/api/interpreter';
interface BBox {
south: number;
west: number;
north: number;
east: number;
}
const CHUNKS: Record<string, BBox> = {
north: { south: 19.0, west: 102.0, north: 23.5, east: 110.0 },
northCentral: { south: 16.5, west: 102.0, north: 19.0, east: 110.0 },
southCentral: { south: 13.0, west: 102.0, north: 16.5, east: 110.0 },
south: { south: 8.0, west: 102.0, north: 13.0, east: 110.0 },
};
type PoiCategoryKey =
| 'SCHOOL_PRIMARY' | 'SCHOOL_SECONDARY' | 'UNIVERSITY'
| 'HOSPITAL' | 'CLINIC' | 'PHARMACY'
| 'MARKET' | 'SUPERMARKET' | 'MALL' | 'CONVENIENCE'
| 'BANK' | 'ATM'
| 'PARK'
| 'GAS_STATION' | 'POLICE' | 'POST_OFFICE'
| 'METRO_STATION' | 'RAILWAY_STATION' | 'BUS_STATION' | 'AIRPORT';
/**
* For each category, the Overpass selector. We query node/way/relation
* to catch both single points and named building polygons.
*/
const CATEGORY_QUERIES: Record<PoiCategoryKey, string> = {
// ── Education ─────────────────────────────────────────────────────────
// OSM Vietnam rarely tags `isced:level`, so we accept everything tagged
// `amenity=school` and let `school:type=primary|secondary` (when
// present) drive the post-import categorisation. The two SCHOOL_*
// selectors here intentionally overlap on plain `amenity=school` —
// we de-duplicate on `osmId` UNIQUE so it's a no-op on the second
// pass.
SCHOOL_PRIMARY: '["amenity"="school"]',
SCHOOL_SECONDARY: '["amenity"="school"]["school:type"~"secondary|gymnasium|high"]',
UNIVERSITY: '["amenity"~"^(university|college)$"]',
// ── Health ────────────────────────────────────────────────────────────
HOSPITAL: '["amenity"="hospital"]',
CLINIC: '["amenity"="clinic"]',
PHARMACY: '["amenity"="pharmacy"]',
// ── Commerce ──────────────────────────────────────────────────────────
MARKET: '["amenity"="marketplace"]',
SUPERMARKET: '["shop"="supermarket"]',
MALL: '["shop"="mall"]',
CONVENIENCE: '["shop"="convenience"]',
// ── Finance ───────────────────────────────────────────────────────────
BANK: '["amenity"="bank"]',
ATM: '["amenity"="atm"]',
// ── Recreation / Services ────────────────────────────────────────────
PARK: '["leisure"="park"]',
GAS_STATION: '["amenity"="fuel"]',
POLICE: '["amenity"="police"]',
POST_OFFICE: '["amenity"="post_office"]',
// ── Transport (stations / airports — lines live in TransportLine) ────
METRO_STATION: '["railway"="station"]["station"="subway"]',
RAILWAY_STATION: '["railway"="station"]["station"!="subway"]',
BUS_STATION: '["amenity"="bus_station"]',
AIRPORT: '["aeroway"="aerodrome"]["aerodrome:type"~"international|public"]',
};
const ALL_CATEGORIES: PoiCategoryKey[] = Object.keys(CATEGORY_QUERIES) as PoiCategoryKey[];
// ─── CLI ───────────────────────────────────────────────────────────────────
const argv = process.argv.slice(2);
const dryRun = argv.includes('--dry-run');
const chunkArg = argv.find((a) => a.startsWith('--chunk='))?.slice('--chunk='.length);
const categoryArg = argv.find((a) => a.startsWith('--category='))?.slice('--category='.length) ?? 'all';
const wantedCategories: PoiCategoryKey[] =
categoryArg === 'all'
? ALL_CATEGORIES
: (categoryArg
.split(',')
.map((s) => s.trim().toUpperCase())
.filter((s): s is PoiCategoryKey => ALL_CATEGORIES.includes(s as PoiCategoryKey)) as PoiCategoryKey[]);
if (wantedCategories.length === 0) {
console.error(`No valid categories. Available: ${ALL_CATEGORIES.join(', ')}`);
process.exit(1);
}
interface OverpassResult {
elements: unknown[];
}
async function fetchChunk(
category: PoiCategoryKey,
chunkName: string,
bbox: BBox,
): Promise<OverpassResult> {
const sel = CATEGORY_QUERIES[category];
const query = `
[out:json][timeout:180];
(
node${sel}(${bbox.south},${bbox.west},${bbox.north},${bbox.east});
way${sel}(${bbox.south},${bbox.west},${bbox.north},${bbox.east});
relation${sel}(${bbox.south},${bbox.west},${bbox.north},${bbox.east});
);
out body geom;
`;
const t0 = Date.now();
console.log(`${category} ${chunkName}`);
const res = await fetch(OVERPASS_URL, {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'User-Agent': 'goodgo-osm-poi-sync/1.0 (https://goodgo.vn)',
},
body: 'data=' + encodeURIComponent(query),
});
if (!res.ok) {
const body = await res.text();
throw new Error(`Overpass ${res.status}: ${body.slice(0, 200)}`);
}
const json = (await res.json()) as OverpassResult;
console.log(
`${category} ${chunkName}: ${json.elements?.length ?? 0} elements in ${(
(Date.now() - t0) /
1000
).toFixed(1)}s`,
);
return json;
}
interface ParsedPoi {
category: PoiCategoryKey;
osmId: bigint;
osmType: 'NODE' | 'WAY' | 'RELATION';
name: string;
nameEn: string | null;
centroid: { lng: number; lat: number };
address: string | null;
tags: Record<string, string>;
}
function parseFeature(
feat: Feature<Polygon | MultiPolygon | Point>,
category: PoiCategoryKey,
): ParsedPoi | null {
const featAny = feat as unknown as { id?: unknown };
const idStr = String(featAny.id ?? '');
const slashIdx = idStr.indexOf('/');
if (slashIdx < 0) return null;
const typeStr = idStr.slice(0, slashIdx).toUpperCase();
if (typeStr !== 'NODE' && typeStr !== 'WAY' && typeStr !== 'RELATION') return null;
const osmType = typeStr as 'NODE' | 'WAY' | 'RELATION';
const osmId = BigInt(idStr.slice(slashIdx + 1));
const propsRaw = (feat.properties ?? {}) as Record<string, unknown>;
const tagsRaw = propsRaw['tags'];
const tags: Record<string, string> =
tagsRaw && typeof tagsRaw === 'object'
? (tagsRaw as Record<string, string>)
: (propsRaw as Record<string, string>);
const name = tags['name:vi'] ?? tags['name'] ?? null;
// Skip purely unnamed POIs (very common for shop=convenience etc.)
if (!name) return null;
// Skip rows without Latin/Vietnamese letters (cross-border bleed).
if (!/[A-Za-zÀ-ỹ]/.test(name)) return null;
let cLng: number;
let cLat: number;
if (feat.geometry.type === 'Point') {
[cLng, cLat] = feat.geometry.coordinates;
} else {
const c = centroid(feat as Feature);
[cLng, cLat] = c.geometry.coordinates;
}
if (!isPointInVietnam(cLng, cLat)) return null;
const address =
tags['addr:full'] ??
[tags['addr:housenumber'], tags['addr:street']].filter(Boolean).join(' ') ??
null;
return {
category,
osmId,
osmType,
name,
nameEn: tags['name:en'] ?? null,
centroid: { lng: cLng, lat: cLat },
address: address || null,
tags,
};
}
interface UpsertStats {
inserted: number;
updated: number;
locked: number;
skipped: number;
}
async function upsertPoi(parsed: ParsedPoi, stats: UpsertStats): Promise<void> {
const existing = await prisma.poi.findUnique({
where: { osmId: parsed.osmId },
select: { id: true, osmLocked: true, lockedFields: true },
});
if (existing?.osmLocked) {
stats.locked++;
return;
}
// Resolve admin codes from the polygon tables.
const admin = await prisma.$queryRawUnsafe<
{ provinceCode: string | null; districtCode: string | null; wardCode: string | null }[]
>(
`WITH p AS (
SELECT code FROM "vn_provinces"
WHERE geometry IS NOT NULL
AND ST_Contains(geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1
),
d AS (
SELECT d.code
FROM "vn_districts" d JOIN p ON p.code = d."provinceCode"
WHERE d.geometry IS NOT NULL
AND ST_Contains(d.geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1
)
SELECT
(SELECT code FROM p) AS "provinceCode",
(SELECT code FROM d) AS "districtCode",
(SELECT w.code FROM "vn_wards" w JOIN d ON d.code = w."districtCode"
WHERE w.geometry IS NOT NULL
AND ST_Contains(w.geometry, ST_SetSRID(ST_MakePoint($1, $2), 4326))
LIMIT 1) AS "wardCode"`,
parsed.centroid.lng,
parsed.centroid.lat,
);
const provinceCode = admin[0]?.provinceCode ?? null;
const districtCode = admin[0]?.districtCode ?? null;
const wardCode = admin[0]?.wardCode ?? null;
if (!existing) {
const cuid = createId();
await prisma.$executeRawUnsafe(
`
INSERT INTO "Poi" (
id, category, name, "nameEn", location, address,
"provinceCode", "districtCode", "wardCode",
"osmId", "osmType", "osmTags",
"dataSource", "isPublic", "lastSyncedAt", "createdAt", "updatedAt"
) VALUES (
$1, $2::"PoiCategory", $3, $4,
ST_SetSRID(ST_MakePoint($5, $6), 4326), $7,
$8, $9, $10,
$11::bigint, $12::"OsmType", $13::jsonb,
'OSM'::"OsmDataSource", true, NOW(), NOW(), NOW()
)
`,
cuid,
parsed.category,
parsed.name,
parsed.nameEn,
parsed.centroid.lng,
parsed.centroid.lat,
parsed.address,
provinceCode,
districtCode,
wardCode,
parsed.osmId.toString(),
parsed.osmType,
JSON.stringify(parsed.tags),
);
stats.inserted++;
} else {
// Update — respect lockedFields list.
const locked = new Set(existing.lockedFields ?? []);
const data: Prisma.PoiUpdateInput = {
lastSyncedAt: new Date(),
osmTags: JSON.stringify(parsed.tags) as unknown as Prisma.InputJsonValue,
};
if (!locked.has('name')) data.name = parsed.name;
if (!locked.has('nameEn')) data.nameEn = parsed.nameEn;
if (!locked.has('address')) data.address = parsed.address;
if (!locked.has('provinceCode')) data.provinceCode = provinceCode;
if (!locked.has('districtCode')) data.districtCode = districtCode;
if (!locked.has('wardCode')) data.wardCode = wardCode;
await prisma.poi.update({ where: { id: existing.id }, data });
// Location update via raw SQL (Prisma can't write `Unsupported` columns).
if (!locked.has('location')) {
await prisma.$executeRawUnsafe(
`UPDATE "Poi" SET location = ST_SetSRID(ST_MakePoint($1, $2), 4326) WHERE id = $3`,
parsed.centroid.lng,
parsed.centroid.lat,
existing.id,
);
}
stats.updated++;
}
}
async function processCategoryChunk(
category: PoiCategoryKey,
chunkName: string,
bbox: BBox,
stats: UpsertStats,
): Promise<void> {
const result = await fetchChunk(category, chunkName, bbox);
const fc = osmtogeojson(result, { flatProperties: false });
const features = (fc.features as Feature<Polygon | MultiPolygon | Point>[]).filter(
(f) =>
f.geometry?.type === 'Point' ||
f.geometry?.type === 'Polygon' ||
f.geometry?.type === 'MultiPolygon',
);
for (const feat of features) {
const parsed = parseFeature(feat, category);
if (!parsed) continue;
if (dryRun) {
stats.inserted++;
continue;
}
try {
await upsertPoi(parsed, stats);
} catch (err) {
console.error(
`${category} ${parsed.name}: ${err instanceof Error ? err.message : err}`,
);
stats.skipped++;
}
}
}
async function main(): Promise<void> {
console.log(`📍 OSM POI sync: categories=${wantedCategories.join(',')} dryRun=${dryRun}`);
const chunks = chunkArg
? { [chunkArg]: CHUNKS[chunkArg]! }
: CHUNKS;
const totals: Record<string, UpsertStats> = {};
for (const cat of wantedCategories) {
console.log(`\n=== ${cat} ===`);
const s: UpsertStats = { inserted: 0, updated: 0, locked: 0, skipped: 0 };
for (const [name, bbox] of Object.entries(chunks)) {
try {
await processCategoryChunk(cat, name, bbox, s);
} catch (err) {
console.error(` ✗ chunk ${name} (${cat}) failed:`, err);
}
}
totals[cat] = s;
console.log(
`${cat}: inserted=${s.inserted} updated=${s.updated} locked=${s.locked} skipped=${s.skipped}`,
);
}
console.log('\n📊 Totals:');
for (const cat of wantedCategories) {
const s = totals[cat]!;
console.log(
` ${cat.padEnd(20)} inserted=${s.inserted} updated=${s.updated} locked=${s.locked} skipped=${s.skipped}`,
);
}
}
main()
.catch((err) => {
console.error(err);
process.exitCode = 1;
})
.finally(async () => {
await prisma.$disconnect();
await pool.end();
});