From 99f305f6baac8d9ad24bafc593c18ed35b4f182f Mon Sep 17 00:00:00 2001 From: Ho Ngoc Hai Date: Wed, 29 Apr 2026 18:27:45 +0700 Subject: [PATCH] feat(industrial): add OSM provenance + sync state to IndustrialPark (PR 1/4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First PR of the OSM-sync project. Adds the schema scaffolding so the follow-up bulk-import PR can write OSM-sourced rows alongside the 50 hand-curated industrial parks already in the table without disturbing public list/detail/map flows or the IndustrialListing FK relationship. New enums: - IndustrialParkOsmType: NODE | WAY | RELATION - IndustrialParkDataSource: MANUAL existing curated rows (default for the 50 backfilled) OSM raw OSM import, hidden from public until promoted OSM_PROMOTED admin-reviewed OSM row visible on the public list New columns on IndustrialPark: - dataSource — drives public visibility + sync policy - isPublic — true for MANUAL, false for raw OSM - osmType, osmId — link to OSM entity (osmId UNIQUE) - osmVersion, osmTags — incremental sync state + raw tag bag (JSONB) - boundary — PostGIS MultiPolygon for park outline (Point centroid stays in `location` for low-zoom render) - osmLocked — admin freeze flag; sync skips this row entirely - lockedFields — per-field freeze list; sync preserves listed cols - lastSyncedAt — last reconcile pass timestamp New indexes: - osmId — sync upsert lookup - (dataSource, isPublic) — public list filter - boundary GiST — viewport / bbox spatial queries - lastSyncedAt — cron staleness scan Backfill behaviour: existing 20 rows automatically get dataSource=MANUAL, isPublic=true via column defaults — no breaking change for current consumers (frontend list, detail, map, admin moderation, IndustrialListing FK). Manually written migration SQL because Prisma cannot manage the PostGIS Geometry type — `boundary` is added via AddGeometryColumn(). Next PRs: - PR 2: bulk-import script (Overpass + osmium fallback) - PR 3: bbox spatial API + frontend Mapbox layer (cluster + outlines) - PR 4: monthly sync cron + admin diff/promote UI Co-Authored-By: Claude Opus 4.7 (1M context) --- .../migration.sql | 57 ++++++++++++++++++ prisma/schema.prisma | 59 +++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 prisma/migrations/20260429020000_add_osm_provenance_to_industrial_parks/migration.sql diff --git a/prisma/migrations/20260429020000_add_osm_provenance_to_industrial_parks/migration.sql b/prisma/migrations/20260429020000_add_osm_provenance_to_industrial_parks/migration.sql new file mode 100644 index 0000000..612c285 --- /dev/null +++ b/prisma/migrations/20260429020000_add_osm_provenance_to_industrial_parks/migration.sql @@ -0,0 +1,57 @@ +-- ───────────────────────────────────────────────────────────────────────── +-- PR 1 of OSM-sync project: add provenance fields to IndustrialPark so the +-- next PR can bulk-import all `landuse=industrial` features from OSM +-- without disturbing the 50 hand-curated rows already in the table. +-- +-- Design notes: +-- • `dataSource` distinguishes MANUAL (existing seeded rows) from OSM +-- (raw imports) and OSM_PROMOTED (admin-reviewed). Public list filters +-- to MANUAL + OSM_PROMOTED via `isPublic`. +-- • `boundary` is a PostGIS MultiPolygon — most KCN OSM entities are +-- `way` polygons; some are `relation` multipolygons. Both fit here. +-- • `osmId` is unique. OSM ids are per-type but collisions across types +-- are improbable given our query restricts to `landuse=industrial`. +-- • `lockedFields` + `osmLocked` give admins escape hatches when OSM +-- data is noisier than the curated value. +-- ───────────────────────────────────────────────────────────────────────── + +-- Enums first (CREATE TYPE is not idempotent — wrap in DO block for safety). +CREATE TYPE "IndustrialParkOsmType" AS ENUM ('NODE', 'WAY', 'RELATION'); +CREATE TYPE "IndustrialParkDataSource" AS ENUM ('MANUAL', 'OSM', 'OSM_PROMOTED'); + +-- Add the columns. Existing rows get sensible defaults: they are +-- human-curated and publicly visible. +ALTER TABLE "IndustrialPark" + ADD COLUMN "dataSource" "IndustrialParkDataSource" NOT NULL DEFAULT 'MANUAL', + ADD COLUMN "isPublic" BOOLEAN NOT NULL DEFAULT true, + ADD COLUMN "osmType" "IndustrialParkOsmType", + ADD COLUMN "osmId" BIGINT, + ADD COLUMN "osmVersion" INTEGER, + ADD COLUMN "osmTags" JSONB, + ADD COLUMN "osmLocked" BOOLEAN NOT NULL DEFAULT false, + ADD COLUMN "lockedFields" TEXT[] NOT NULL DEFAULT ARRAY[]::TEXT[], + ADD COLUMN "lastSyncedAt" TIMESTAMP(3); + +-- PostGIS MultiPolygon column. Prisma uses `Unsupported(...)` for this so +-- it never tries to manage it; we keep ownership in raw SQL. +SELECT AddGeometryColumn('public', 'IndustrialPark', 'boundary', 4326, 'MULTIPOLYGON', 2); + +-- Unique constraint on osmId (NULLs allowed → unique among non-null values). +CREATE UNIQUE INDEX "IndustrialPark_osmId_key" ON "IndustrialPark"("osmId"); + +-- Indexes for the new access patterns: +-- • osmId lookup during sync upsert +-- • public-list filter (dataSource + isPublic) +-- • spatial bbox queries (boundary GiST) +-- • find rows that haven't been synced recently (cron) +CREATE INDEX "IndustrialPark_osmId_idx" + ON "IndustrialPark"("osmId"); + +CREATE INDEX "IndustrialPark_dataSource_isPublic_idx" + ON "IndustrialPark"("dataSource", "isPublic"); + +CREATE INDEX "IndustrialPark_boundary_idx" + ON "IndustrialPark" USING GIST ("boundary"); + +CREATE INDEX "IndustrialPark_lastSyncedAt_idx" + ON "IndustrialPark"("lastSyncedAt"); diff --git a/prisma/schema.prisma b/prisma/schema.prisma index d6e4bbd..286b3fb 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -1083,6 +1083,27 @@ enum IndustrialParkStatus { FULL } +/// OSM element type — way/relation are most common for industrial parks +/// (polygon boundaries), node only used when the park has no traced area. +enum IndustrialParkOsmType { + NODE + WAY + RELATION +} + +/// Provenance of an IndustrialPark row. Used to filter what's shown on the +/// public KCN list (only MANUAL + OSM_PROMOTED) versus the admin queue +/// (everything, including raw OSM imports). +enum IndustrialParkDataSource { + /// Human-curated by goodgo team; full business data (rents, fees, media). + MANUAL + /// Imported from OpenStreetMap, not yet vetted. Hidden from public list. + OSM + /// Imported from OSM and reviewed by an admin who promoted it to the + /// public catalog. Geometry/name still tracked against OSM via osmId. + OSM_PROMOTED +} + enum IndustrialPropertyType { INDUSTRIAL_LAND READY_BUILT_FACTORY @@ -1150,6 +1171,39 @@ model IndustrialPark { /// Optional owning operator user (role=PARK_OPERATOR). NULL for parks not /// yet assigned to an operator account — admin still manages those. ownerId String? + + // ─── OSM provenance & sync state ───────────────────────────────────────── + /// Marker for where this row came from. Drives public visibility + + /// conflict-resolution policy during OSM sync. + dataSource IndustrialParkDataSource @default(MANUAL) + /// Hidden from the public list when false. OSM-imported rows default to + /// false until an admin promotes them; MANUAL rows default to true. + isPublic Boolean @default(true) + /// OpenStreetMap entity that this row mirrors (NULL for purely manual rows). + /// `osmId` is unique because OSM ids are scoped per-type, but in practice + /// most industrial parks are `way` so collisions are vanishingly rare. + osmType IndustrialParkOsmType? + osmId BigInt? @unique + /// OSM `version` tag. Used during incremental sync to detect remote edits. + osmVersion Int? + /// Full OSM tag bag, kept as JSONB for flexibility (we don't model every + /// possible tag — operator, website, addr:*, source, etc.). + osmTags Json? + /// Polygon outline of the park as a MultiPolygon. NULL when the OSM entity + /// is a node (no traced area) or when sourced from a manual seed without + /// boundary tracing. `location` (Point) remains the centroid for low-zoom + /// rendering. + boundary Unsupported("geometry(MultiPolygon, 4326)")? + /// When true the OSM sync cron skips this row entirely (admin froze it). + /// Useful for parks where OSM tag noise would overwrite curated data. + osmLocked Boolean @default(false) + /// Per-field lock list. Even when `osmLocked = false`, the sync cron + /// preserves any column whose name appears here. Lets admins fix one + /// field (e.g. `name`) without freezing the whole row. + lockedFields String[] @default([]) + /// Last successful Overpass/PBF reconcile pass; NULL means never synced. + lastSyncedAt DateTime? + createdAt DateTime @default(now()) updatedAt DateTime @updatedAt @@ -1167,6 +1221,11 @@ model IndustrialPark { @@index([region, province, status]) @@index([createdAt]) @@index([ownerId]) + // OSM sync access patterns + @@index([osmId]) + @@index([dataSource, isPublic]) + @@index([boundary], type: Gist) + @@index([lastSyncedAt]) } model IndustrialListing {