aboutsummaryrefslogtreecommitdiffstats
path: root/webmap-publish
diff options
context:
space:
mode:
authorGuilhem Moulin <guilhem@fripost.org>2024-09-21 04:00:05 +0200
committerGuilhem Moulin <guilhem@fripost.org>2024-09-25 21:38:46 +0200
commit7cda119879cf48ba72ba34522fa9cdf9ef6d9b49 (patch)
treef631a3830b2de370ce0017a449b8ac1077f32ca9 /webmap-publish
parent54db31b0df41e397438d860ec8014b7100f72eb2 (diff)
Add `webmap-publish` script to export layers to Mapbox Vector Tiles.
Diffstat (limited to 'webmap-publish')
-rwxr-xr-xwebmap-publish760
1 files changed, 760 insertions, 0 deletions
diff --git a/webmap-publish b/webmap-publish
new file mode 100755
index 0000000..36bed88
--- /dev/null
+++ b/webmap-publish
@@ -0,0 +1,760 @@
+#!/usr/bin/python3
+
+#----------------------------------------------------------------------
+# Backend utilities for the Klimatanalys Norr project (create MVT tiles)
+# Copyright © 2024 Guilhem Moulin <info@guilhem.se>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#----------------------------------------------------------------------
+
+from os import O_RDONLY, O_WRONLY, O_CREAT, O_EXCL, O_TRUNC, O_CLOEXEC, O_DIRECTORY
+import os
+import errno
+from fcntl import flock, LOCK_EX, LOCK_UN
+from string import hexdigits
+import json
+import logging
+import math
+import random
+import argparse
+import tempfile
+from pathlib import Path
+import numpy
+
+from osgeo import gdal, ogr, osr
+from osgeo.gdalconst import (
+ OF_VECTOR as GDAL_OF_VECTOR,
+ OF_READONLY as GDAL_OF_READONLY,
+ OF_UPDATE as GDAL_OF_UPDATE,
+ OF_VERBOSE_ERROR as GDAL_OF_VERBOSE_ERROR,
+)
+gdal.UseExceptions()
+
+import common
+from common import (
+ format_bytes,
+ gdalSetOpenExArgs,
+ gdalVersionMin,
+ gdalGetMetadataItem,
+ escapeIdentifier
+)
+
+# Open and return source dataset.
+def openSourceDS(def_dict):
+ # merge 'open-options-publish' into 'open-options'
+ opts2 = def_dict.get('open-options-publish', None)
+ if opts2 is not None:
+ if 'open-options' not in def_dict:
+ def_dict['open-options'] = {}
+ def_dict['open-options'] |= opts2
+ kwargs, _ = gdalSetOpenExArgs(def_dict, flags=GDAL_OF_READONLY|GDAL_OF_VERBOSE_ERROR)
+
+ path = def_dict['path']
+ logging.debug('OpenEx(%s, %s)', path, str(kwargs))
+ return gdal.OpenEx(path, **kwargs)
+
+# Choose a random subdirectory under the given base directory and create
+# a new MVT dataset there.
+# The old MVT dataset (if any) remains, which is intentional we don't
+# want to disrupt browser caches and tile → attribute mapping.
+def createMVT(basedir, options=None):
+ drv = gdal.GetDriverByName('MVT')
+ if drv is None:
+ raise Exception('Unknown GDAL driver for format "MVT"')
+
+ # choose an available name at random from 00 to FF
+ weights = [1] * 256
+ for path in basedir.iterdir():
+ filename = path.name
+ if len(filename) == 2 and filename[0] in hexdigits and filename[1] in hexdigits:
+ n = int(filename, 16)
+ weights[n] = 0
+ if all([w == 0 for w in weights]):
+ raise Exception(f'No name available in {args.destdir} (forgot to cleanup?)')
+
+ name = random.choices(range(256), weights=weights, k=1)[0]
+ name = f'{name:0>2x}'
+ path = str(basedir.joinpath(name))
+
+ # merge options from the config file
+ defaults_map = {
+ 'min-zoom': 'MINZOOM',
+ 'max-zoom': 'MAXZOOM',
+ 'max-size': 'MAX_SIZE',
+ 'max-features': 'MAX_FEATURES',
+ 'simplification': 'SIMPLIFICATION',
+ 'simplification-max-zoom': 'SIMPLIFICATION_MAX_ZOOM',
+ 'tiling-scheme': 'TILING_SCHEME',
+ 'extent': 'EXTENT',
+ }
+ defaults_options = common.config.get('vector-tiles', None)
+ if defaults_options is not None:
+ for k, v in defaults_options.items():
+ if k not in defaults_map:
+ logging.warning('No default map for key "%s", ignoring', k)
+ continue
+ if options is None:
+ options = {}
+ k = defaults_map[k]
+ if k not in options:
+ if isinstance(v, list):
+ v = ','.join(map(str, v))
+ options[k] = v
+
+ kwargs = { 'eType': gdal.GDT_Unknown }
+ if options is not None:
+ kwargs['options'] = opts = []
+ for k, v in options.items():
+ opts.append(k + '=' + str(v))
+
+ logging.debug('Create(%s, %s, eType=%s%s)', drv.ShortName, path, kwargs['eType'],
+ ', options=' + str(kwargs['options']) if 'options' in kwargs else '')
+ ds = drv.Create(path, 0, 0, 0, **kwargs)
+ return ds, name
+
+# Filter a source layer.
+def getSourceLayer(ds_src, layerdef, extent=None):
+ layername = layerdef['__layername-src__']
+ lyr_src = ds_src.GetLayerByName(layername)
+ if lyr_src is None:
+ raise Exception(f'Source dataset has no layer named "{layername}"')
+
+ count0 = -1
+ if lyr_src.TestCapability(ogr.OLCFastFeatureCount):
+ count0 = lyr_src.GetFeatureCount(force=0)
+
+ srs_src = lyr_src.GetSpatialRef()
+ if srs_src is None:
+ raise Exception('Source layer "{}" has no SRS'.format(layername))
+ elif not srs_src.IsProjected():
+ raise Exception('Source layer "{}" SRS is not projected: {}'.format(
+ layername, srs_src.ExportToPrettyWkt()))
+ # "the value to multiply by linear distances to transform them to meters"
+ linearUnits = srs_src.GetLinearUnits()
+ logging.debug('Source layer "%s" SRS ("%s") linear units: %f', layername,
+ srs_src.GetName(), linearUnits)
+
+ # get geometry field of the source layer
+ defn = lyr_src.GetLayerDefn()
+ geomFieldCount = defn.GetGeomFieldCount()
+ if geomFieldCount != 1:
+ logging.warning('Source layer "%s" has %d != 1 geometry fields', lyr_src.GetName(), geomFieldCount)
+ geomField = defn.GetGeomFieldDefn(0)
+ geomType = geomField.GetType()
+ geomFieldName_esc = escapeIdentifier(geomField.GetName())
+
+ # transform extent to source SRS
+ if extent is None or extent.GetSpatialReference().IsSame(srs_src):
+ extent_src = extent
+ else:
+ extent_src = extent.Clone()
+ if extent_src.TransformTo(srs_src) != ogr.OGRERR_NONE:
+ raise Exception('Could not transform extent {} to {}'.format(
+ extent.ExportToWkt(), srs_src.GetName()))
+
+
+ reserved_fields = set(map(lambda x: x.lower(), [
+ 'geom',
+ 'GeomArea',
+ 'GeomPerimeter',
+ 'GeomLength',
+ ]))
+
+ # build SQL query
+ drv = ds_src.GetDriver()
+ columns = []
+ for i in range(defn.GetFieldCount()):
+ fld = defn.GetFieldDefn(i)
+ fldType = fld.GetType()
+ if fld.GetName().lower() in reserved_fields:
+ raise Exception(f'Invalid column name "{fld.GetName()}"')
+ fldName_esc = escapeIdentifier(fld.GetName())
+ column = 'm.' + fldName_esc
+ # for date/time/datetime fields, we let the RDBMS engine do the formatting if possible
+ if fldType == ogr.OFTDate:
+ if drv.ShortName == 'PostgreSQL':
+ column = 'to_char(' + column + ', \'YYYY-MM-DD\') AS '
+ elif drv.ShortName == 'GPKG':
+ column = 'strftime(\'%F\', ' + column + ') AS '
+ elif fldType == ogr.OFTTime:
+ if drv.ShortName == 'PostgreSQL':
+ column = 'to_char(' + column + ', \'HH24:MI:SSOF\') AS '
+ elif drv.ShortName == 'GPKG':
+ # XXX SQLite's strftime() lacks support for time zones
+ column = 'strftime(\'%T\', ' + column + ') AS '
+ elif fldType == ogr.OFTDateTime:
+ if drv.ShortName == 'PostgreSQL':
+ column = 'to_char(' + column + ', \'YYYY-MM-DD"T"HH24:MI:SSOF\') AS '
+ elif drv.ShortName == 'GPKG':
+ # XXX SQLite's strftime() lacks support for time zones
+ column = 'strftime(\'%FT%T\', ' + column + ') AS '
+ if column.endswith(' AS '):
+ column += fldName_esc
+ columns.append(column)
+
+ # add columns with computed info about the geometry (before clipping and rounded to 0.01m²/0.01m)
+ if ogr.GT_IsSubClassOf(geomType, ogr.wkbSurface) or ogr.GT_IsSubClassOf(geomType, ogr.wkbMultiSurface):
+ columns.append('ROUND(CAST(ST_Area(m.' + geomFieldName_esc + ') * '
+ + str(linearUnits) + '*' + str(linearUnits) + ' AS numeric), 2) AS "GeomArea"')
+ columns.append('ROUND(CAST(ST_Perimeter(m.' + geomFieldName_esc + ') * '
+ + str(linearUnits) + ' AS numeric), 2) AS "GeomPerimeter"')
+ elif ogr.GT_IsSubClassOf(geomType, ogr.wkbCurve) or ogr.GT_IsSubClassOf(geomType, ogr.wkbMultiCurve):
+ columns.append('ROUND(CAST(ST_Length(m.' + geomFieldName_esc + ') * '
+ + str(linearUnits) + ' AS numeric), 2) AS "GeomLength"')
+ columns.append('m.' + geomFieldName_esc)
+
+ query = 'SELECT ' + ', '.join(columns) + ' FROM ' + escapeIdentifier(lyr_src.GetName()) + ' m'
+
+ # add WHERE clauses and spatial filter; for GPKG/SQlite3, the query
+ # is too complex and the driver can't use ExecuteSQL()'s spatialFilter
+ # option so we add the ST_Intersects() condition
+ if drv.ShortName == 'PostgreSQL':
+ spatialFilter = extent_src
+ else:
+ spatialFilter = None
+
+ cond = layerdef.get('where', None)
+ if (extent_src is not None and spatialFilter is None) or cond is not None:
+ query += ' WHERE '
+ if extent_src is not None and spatialFilter is None:
+ query += 'ST_Intersects(m.' + geomFieldName_esc + ', \'' + extent_src.ExportToWkt() + '\')'
+ if cond is not None:
+ query += ' AND '
+ if cond is not None:
+ query += '(' + cond.strip() + ')'
+
+ logging.debug('SQL query: %s', query)
+ lyr_src = ds_src.ExecuteSQL(query, spatialFilter=spatialFilter)
+
+ count1 = -1
+ if lyr_src.TestCapability(ogr.OLCFastFeatureCount):
+ count1 = lyr_src.GetFeatureCount(force=0)
+ if count0 >= 0 and count1 >= 0:
+ logging.debug('Source layer "%s" has %d features, of which %d are to be exported',
+ layername, count0, count1)
+ return lyr_src
+
+# Export a layer from the source dataset, returning the next FID availble.
+# TODO bump metadata_maxsize util the compressed version doesn't exceed 64k
+def exportLayer(lyr_dst, lyr_src, srs=None, clip=None, layername=None,
+ coordinate_precision=None, nextfid=0,
+ # rotate after the feature which bring the file to the 256kiB mark
+ metadata_maxsize=256*1024, metadata=None):
+ if srs is None:
+ raise Exception('Missing target SRS')
+ if clip is not None:
+ clip_srs = clip.GetSpatialReference()
+ if not clip_srs.IsSame(srs):
+ raise Exception('Clipping geometry {} has SRS {},\nexpected {}'.format(
+ clip.ExportToWkt(), clip_srs.ExportToPrettyWkt(), srs.ExportToPrettyWkt()))
+ if metadata is None:
+ metadata_fp = None
+ else:
+ metadata_fp = metadata.get('fp', None)
+ exportToJson_options = []
+ if coordinate_precision is not None:
+ exportToJson_options = ['COORDINATE_PRECISION=' + str(coordinate_precision)]
+ metadata_size = metadata.get('cursize', 0)
+
+ srs_src = lyr_src.GetLayerDefn().GetGeomFieldDefn(0).GetSpatialRef()
+ if srs.IsSame(srs_src):
+ logging.debug('Both source and destination have the same SRS (%s), skipping coordinate transformation',
+ srs.GetName())
+ ct = None
+ else:
+ # TODO Use GetSupportedSRSList() and SetActiveSRS() with GDAL ≥3.7.0
+ # when possible, see apps/ogr2ogr_lib.cpp
+ logging.debug('Creating transforming from source SRS (%s) to destination SRS (%s)',
+ srs_src.GetName(), srs.GetName())
+ ct = osr.CoordinateTransformation(srs_src, srs)
+ if ct is None:
+ raise Exception(f'Could not create transformation from source SRS ({srs_src.GetName()}) '
+ + f'to destination SRS ({srs.GetName()})')
+
+ # extract metadata index and filename from nextfid (an exception is
+ # raised if nextfid doesn't fit in 4 bytes)
+ bstr = nextfid.to_bytes(4, byteorder='big')
+ nextfid_idx = int.from_bytes(bstr[-2:], byteorder='big')
+ nextfid_p = int.from_bytes(bstr[:2], byteorder='big')
+ metadata_maxlen = 1 << (8*2)
+
+ defn_dst = lyr_dst.GetLayerDefn()
+
+ # we roll our own loop rather than using ogr2ogr/GDALVectorTranslate()
+ # since we want to do a custom JSON serialization at the same time,
+ # in order to avoid duplicating properties across tiles or clipping
+ # overlays at tile boundaries
+
+ count = 0
+ feature = lyr_src.GetNextFeature()
+ while feature is not None:
+ geom = feature.GetGeometryRef().Clone()
+ if ct is not None and geom.Transform(ct) != ogr.OGRERR_NONE:
+ raise Exception('Could not apply coordinate transformation')
+
+ if metadata is not None:
+ # serialize the feature to JSON and save it
+ # TODO there is a lot of room for improvement here
+ # - geometries could be exported to WKB or FlatGeoBuf rather than GeoJSON
+ # - columns (schema) could be saved in layers.json only, so
+ # the object would be an array of length n+1 where the
+ # geometry would be at index 0 or n
+ # - data could be serialized to binary, cf. BJSON or GeoBuf
+ feature.SetGeometry(geom)
+ obj = feature.ExportToJson(as_object=True, options=exportToJson_options)
+ #props = { key:value for key, value in obj['properties'].items() if value is not None }
+ props = obj['properties']
+ props['geom'] = obj['geometry']
+
+ bstr = json.dumps(props, ensure_ascii=False, separators=(',',':')).encode('utf-8')
+
+ #if nextfid_idx >= metadata_maxlen or nextfid_p * metadata_maxlen + nextfid_idx != nextfid:
+ # raise Exception(f'impossible: {nextfid_p}*{metadata_maxlen} + {nextfid_idx} != {nextfid}')
+
+ if metadata_fp is None:
+ metadata_fp = openNewMetadaFile(metadata, nextfid_p)
+ metadata_size = 0
+ else:
+ metadata_fp.write(b',')
+
+ metadata_size += 1 + metadata_fp.write(bstr)
+
+ # clip the feature and export it (without fields to MVT)
+ geom2 = geom if clip is None else geom.Intersection(clip)
+ feature2 = ogr.Feature(defn_dst)
+ feature2.SetGeometryDirectly(geom2)
+ feature2.SetFID(nextfid)
+
+ if lyr_dst.CreateFeature(feature2) != ogr.OGRERR_NONE:
+ raise Exception(f'Could not transfer source feature #{feature.GetFID()}')
+
+ nextfid += 1
+ if metadata is not None:
+ nextfid_idx += 1
+ if metadata_size > metadata_maxsize or nextfid_idx >= metadata_maxlen:
+ closeMetadataFile(metadata)
+ metadata_fp = metadata_size = None
+ nextfid_p += 1
+ nextfid_idx = 0
+ nextfid = nextfid_p * metadata_maxlen
+ # will open a new while when processiong the next feature
+
+ count += 1
+ feature = lyr_src.GetNextFeature()
+
+ logging.info('Exported %d features to layer "%s" from "%s"', count,
+ lyr_dst.GetName(), layername)
+
+ if metadata is not None:
+ # save size of the metadata file currently open for proper accounting
+ metadata['cursize'] = 0 if metadata_fp is None else metadata_size
+ return nextfid, count
+
+# Open a new metadata file.
+def openNewMetadaFile(metadata, nextfid_p):
+ name = nextfid_p.to_bytes(length=2, byteorder='big').hex()
+ name = Path(metadata['tempdir']).joinpath(name).with_suffix('.json')
+ logging.debug('open(%s)', str(name))
+
+ fd = os.open(name, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, mode=0o0644,
+ dir_fd=metadata.get('dirfd', None))
+ metadata['fp'] = fp = os.fdopen(fd, mode='wb', closefd=True)
+ fp.write(b'[')
+ return fp
+
+# Close a metadata file.
+def closeMetadataFile(metadata):
+ fp = metadata.pop('fp')
+ fp.write(b']')
+ logging.debug('close(%d)', fp.fileno())
+ fp.close()
+
+# Wrapper around os.write() that loops until the entire data is written.
+def write_all(fd, buf):
+ offset = 0
+ while offset < len(buf):
+ try:
+ n = os.write(fd, buf[offset:])
+ except IOError as e:
+ if e.errno != errno.EAGAIN:
+ raise e
+ #logging.debug('Got EAGAIN while trying to write')
+ else:
+ #logging.debug('Wrote %d bytes of %d', n, len(buf) - offset)
+ offset += n
+ return offset
+
+# Compress a file to Brotli.
+# TODO increase chunksize to see if helps with performances
+def compress_brotli(path, suffix='.br', dir_fd=None, chunksize=65536,
+ oFlags=0, **kwargs):
+ compressor = brotli.Compressor(**kwargs)
+ fd_in = os.open(path, O_RDONLY|O_CLOEXEC, dir_fd=dir_fd)
+ size_in = size_out = 0
+ try:
+ # hard-code the mode to save an fstat(2) call
+ fd_out = os.open(path + suffix, O_WRONLY|O_CREAT|O_CLOEXEC|oFlags,
+ mode=0o0644, dir_fd=dir_fd)
+ try:
+ buf_in = os.read(fd_in, chunksize)
+ while len(buf_in) > 0:
+ #logging.debug('Read %d bytes of %d', len(buf_in), chunksize)
+ size_in += len(buf_in)
+ size_out += write_all(fd_out, compressor.process(buf_in))
+ buf_in = os.read(fd_in, chunksize)
+ size_out += write_all(fd_out, compressor.finish())
+ finally:
+ os.close(fd_out)
+ finally:
+ os.close(fd_in)
+ return size_in, size_out
+
+
+if __name__ == '__main__':
+ common.init_logger(app=os.path.basename(__file__), level=logging.INFO)
+
+ parser = argparse.ArgumentParser(description='Export GIS layers to Mapbox Vector Tiles (MVT).')
+ parser.add_argument('--debug', action='count', default=0,
+ help=argparse.SUPPRESS)
+ parser.add_argument('--destdir', default=os.curdir,
+ help=f'destination directory for generated files (default: {os.curdir})')
+ parser.add_argument('--lockfile', default=None,
+ help=f'obtain an exclusive lock while exporting features to MVT')
+ parser.add_argument('--name', default=None,
+ help=f'name to use in the layer map (default: last component of DESTDIR)')
+ parser.add_argument('--webroot', default=None,
+ help=f'path to the web root')
+ parser.add_argument('--metadata', default=None,
+ help=f'path to the JSON metadata file (default: DESTDIR/../metadata.json)')
+ parser.add_argument('--metadata-lockfile', default=None,
+ help=f'path to the JSON metadata lock file (default: DESTDIR/../.metadata.json.lck)')
+ parser.add_argument('--compress', action=argparse.BooleanOptionalAction, default=False,
+ help='whether to compress tiles (default: False)')
+ parser.add_argument('groupname', nargs='*', help='group layer name(s) to process')
+ args = parser.parse_args()
+
+ if args.debug > 0:
+ logging.getLogger().setLevel(logging.DEBUG)
+ if args.debug > 1:
+ from http.client import HTTPConnection
+ HTTPConnection.debuglevel = 1
+ requests_log = logging.getLogger('urllib3')
+ requests_log.setLevel(logging.DEBUG)
+ requests_log.propagate = True
+
+ if args.debug > 0:
+ logging.getLogger().setLevel(logging.DEBUG)
+ if args.debug > 1:
+ gdal.ConfigurePythonLogging(enable_debug=True)
+
+ if args.compress:
+ # fail early if the module is missing
+ import brotli
+
+ destdirname = args.name if args.name is not None else Path(args.destdir).name
+ if not destdirname:
+ logging.error('Could not infer --name value')
+ exit(1)
+
+ common.load_config(groupnames=None if args.groupname == [] else args.groupname)
+
+ # validate configuration
+ if 'dataset' not in common.config:
+ raise Exception('Configuration does not specify source dataset')
+
+ export_layers = {}
+ for layername, layerdef in common.config.get('layers', {}).items():
+ exportdef = layerdef.get('publish', None)
+ if exportdef is None:
+ raise Exception(f'Layer "{layername}" has no publication definition')
+ if isinstance(exportdef, str):
+ exportdef = { exportdef:{} }
+ elif isinstance(exportdef, list):
+ exportdef = { l:{} for l in exportdef }
+ for export_layername, export_layerdef in exportdef.items():
+ if export_layername not in export_layers:
+ export_layers[export_layername] = []
+ export_layers[export_layername].append(export_layerdef)
+ export_layerdef['__layername-src__'] = layername
+
+ if args.destdir != os.curdir:
+ try:
+ os.mkdir(args.destdir, mode=0o0755)
+ except FileExistsError:
+ pass
+
+ # open source DS
+ ds = openSourceDS(common.config['dataset'])
+
+ # get configured Spatial Reference System and extent; force traditional GIS order
+ # on the target SRS since it's what MVT and JSON export are expecting
+ srs = common.getSRS(common.config.get('SRS', None))
+ srs.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER)
+ extent, extent_simple = common.getExtent(common.config.get('extent', None), srs=srs)
+
+ if srs.IsProjected():
+ # will export with centimeter precision
+ linearUnits = srs.GetLinearUnits()
+ coordinate_precision = math.ceil(math.log10(linearUnits) + 2)
+ if coordinate_precision < 0:
+ coordinate_precision = 0
+ logging.debug('Coordinate precision: %d', coordinate_precision)
+
+ if args.lockfile is not None:
+ lock_fd = os.open(args.lockfile, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644)
+ logging.debug('flock("%s", LOCK_EX)', args.lockfile)
+ flock(lock_fd, LOCK_EX)
+
+ # always lock destdir (until the program exits) to avoid concurrent
+ # generations of the same tileset
+ destdir_fd = os.open(args.destdir, O_RDONLY|O_CLOEXEC|O_DIRECTORY)
+ logging.debug('flock("%s", LOCK_EX)', args.destdir)
+ flock(destdir_fd, LOCK_EX)
+
+ nFeatures = 0
+ tile_extension = '.pbf'
+ basedir = Path(f'/proc/self/fd/{destdir_fd}')
+ with tempfile.TemporaryDirectory() as tempdir:
+ # use a temporary directory for TEMPORARY_DB
+ logging.debug('Using "%s" as temporary directory', tempdir)
+
+ ds_dst, dirname = createMVT(basedir, options={
+ 'FORMAT': 'DIRECTORY',
+ 'COMPRESS': 'NO',
+ 'TYPE': 'overlay',
+ 'BUFFER': 32,
+ 'TILE_EXTENSION': tile_extension.removeprefix('.'),
+ 'TEMPORARY_DB': Path(tempdir).joinpath('tmp-mvt.db'),
+ })
+
+ # use a temporary directory for the metadata to avoid messing
+ # around with he MVT directory while building is in progress
+ metadata = { 'dirfd': destdir_fd }
+ metadata['tempdir'] = Path(tempfile.mkdtemp(prefix='.' + dirname + '-attr.',
+ dir=basedir)).relative_to(basedir)
+ try:
+ logging.debug('Using "%s" as temporary metadata directory', metadata['tempdir'])
+ os.chmod(metadata['tempdir'], 0o0755, dir_fd=metadata.get('dirfd', None),
+ follow_symlinks=False)
+ nextfid = 0
+ for layername, layerdefs in export_layers.items():
+ # create destination layer
+ lyr_dst = ds_dst.CreateLayer(layername, srs=srs, geom_type=ogr.wkbUnknown)
+ if lyr_dst is None:
+ raise Exception(f'Could not create destination layer "{layername}"')
+
+ for layerdef in layerdefs:
+ lyr_src = getSourceLayer(ds, layerdef, extent=extent)
+ try:
+ # use the non-dense geometry (a rectangle) for clipping as it's faster
+ # and there is no need to reproject (the SRSs match)
+ nextfid, count0 = exportLayer(lyr_dst, lyr_src,
+ layername=layerdef['__layername-src__'],
+ srs=srs, clip=extent_simple,
+ coordinate_precision=coordinate_precision,
+ nextfid=nextfid, metadata=metadata)
+ nFeatures += count0
+ finally:
+ ds.ReleaseResultSet(lyr_src)
+ lyr_dst = None
+
+ # close datasets
+ ds = None
+ ds_dst = None
+
+ # move metadata to the MVT structure (now that it has been closed)
+ logging.debug('rename(%s/%s, %s/%s/attr)',
+ args.destdir, metadata['tempdir'],
+ args.destdir, dirname)
+ os.rename(metadata['tempdir'],
+ Path(dirname).joinpath('attr'),
+ src_dir_fd=metadata.get('dirfd', None),
+ dst_dir_fd=destdir_fd)
+ metadata.pop('tempdir', None)
+
+ except Exception as e:
+ # cleanup
+ for root, dirnames, filenames, rootfd in os.fwalk(top=metadata['tempdir'],
+ dir_fd=metadata.get('dirfd', None), topdown=False,
+ follow_symlinks=False):
+ for filename in filenames:
+ #logging.debug('unlink(%s/%s)', root, filename)
+ os.unlink(filename, dir_fd=rootfd)
+ for dirname in dirnames:
+ #logging.debug('rmdir(%s/%s)', root, dirname)
+ os.rmdir(dirname, dir_fd=rootfd)
+ #logging.debug('rmdir(%s/%s)', args.destdir, metadata['tempdir'])
+ os.rmdir(metadata['tempdir'], dir_fd=metadata.get('dirfd', None))
+ raise e
+
+ finally:
+ if 'fp' in metadata:
+ closeMetadataFile(metadata)
+
+ srs = None
+ extent = None
+ extent_simple = None
+
+ try:
+ # OpenLayers doesn't make use of that file so delete it
+ os.unlink(Path(dirname).joinpath('metadata.json'), dir_fd=destdir_fd)
+ except FileNotFoundError:
+ pass
+
+ logging.info('Done creating tiles and attribute files (total %d features)', nFeatures)
+
+ if args.lockfile is not None:
+ # release the shared lock, the remaining shouldn't be as demanding
+ logging.debug('flock("%s", LOCK_UN)', args.lockfile)
+ flock(lock_fd, LOCK_UN)
+
+
+ stats_tiles = []
+ stats_metadata = []
+ if args.compress:
+ compress_mode = brotli.MODE_GENERIC
+
+ # collect stats and optionally compress
+ # TODO: optimize traversal, cf.
+ # https://r3n.hashnode.dev/tech-chronicles-conquer-memory-monsters-with-pythons-yield-in-large-directory-processing
+ # TODO: benchmark brotli quality to trade-off compression ratio vs. execution time
+ # (also, tile and attribute files can have different levels if needs be)
+ for root, _, filenames, rootfd in os.fwalk(top=dirname, dir_fd=destdir_fd,
+ follow_symlinks=False):
+ for filename in filenames:
+ lfilename = filename.lower()
+ if lfilename.endswith(tile_extension):
+ arr = stats_tiles
+ elif lfilename.endswith('.json'):
+ arr = stats_metadata
+ else:
+ continue
+
+ if not args.compress:
+ st = os.stat(filename, dir_fd=rootfd)
+ arr.append(st.st_size)
+ else:
+ #logging.debug("Compressing %s/%s", root, filename)
+ szi, szo = compress_brotli(filename, dir_fd=rootfd,
+ oFlags=O_EXCL, mode=compress_mode, quality=5)
+ arr.append((szi, szo))
+
+ # display stats
+ if len(stats_tiles) > 0:
+ stats_tiles = numpy.array(stats_tiles, dtype=numpy.int64, copy=False)
+
+ arr = stats_tiles[:,0] if args.compress else stats_tiles
+ tot = numpy.sum(arr)
+ logging.info('Tile count: %d [min=%s, max=%s, sum=%s, avg=%s]',
+ arr.size,
+ format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)),
+ format_bytes(tot), format_bytes(round(tot/arr.size)))
+
+ if args.compress:
+ arr = stats_tiles[:,1]
+ ztot = numpy.sum(arr)
+ logging.info('After compression: min=%s, max=%s, sum=%s, avg=%s (ratio %.1f%%)',
+ format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)),
+ format_bytes(ztot), format_bytes(round(ztot/arr.size)),
+ 100. * (1. - ztot/tot))
+
+ if len(stats_metadata) > 0:
+ stats_metadata = numpy.array(stats_metadata, dtype=numpy.int64, copy=False)
+
+ arr = stats_metadata[:,0] if args.compress else stats_metadata
+ tot = numpy.sum(arr)
+ logging.info('Metadata file count: %d [min=%s, max=%s, sum=%s, avg=%s]',
+ arr.size,
+ format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)),
+ format_bytes(tot), format_bytes(round(tot/arr.size)))
+
+ if args.compress:
+ arr = stats_metadata[:,1]
+ ztot = numpy.sum(arr)
+ logging.info('After compression: min=%s, max=%s, sum=%s, avg=%s (ratio %.1f%%)',
+ format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)),
+ format_bytes(ztot), format_bytes(round(ztot/arr.size)),
+ 100. * (1. - ztot/tot))
+
+
+ # update metadata.json
+ if args.metadata is None:
+ mapfile = Path(args.destdir).parent.joinpath('metadata.json')
+ else:
+ mapfile = Path(args.metadata)
+
+ # first, place an exclusive lock (it is important that all
+ # invocations of the program with the same --metadata file, even on
+ # different layer groups, share the same lock file to avoid
+ # concurrent updates of the medata.json)
+ if args.metadata_lockfile is None:
+ mapfile_lock = mapfile.with_name('.' + mapfile.name + '.lck')
+ else:
+ mapfile_lock = Path(args.metadata_lockfile)
+ mapfile_lock_fd = os.open(mapfile_lock, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644)
+ logging.debug('flock("%s", LOCK_EX)', mapfile_lock)
+ flock(mapfile_lock_fd, LOCK_EX)
+
+ # load the file
+ try:
+ with open(mapfile, mode='rb') as fp:
+ mapobj = json.load(fp)
+ except FileNotFoundError:
+ mapobj = {}
+
+ # remove the compressed version as we are prepare to update the
+ # uncompressed one
+ mapfile_compressed = mapfile.with_name(mapfile.name + '.br')
+ try:
+ logging.debug('unlink(%s)', mapfile_compressed)
+ os.unlink(mapfile_compressed)
+ except FileNotFoundError:
+ pass
+
+ k = 'layermap'
+ if k not in mapobj:
+ layermap = mapobj[k] = {}
+ else:
+ layermap = mapobj[k]
+ destpath = Path(args.destdir)
+ if args.webroot is not None:
+ destpath = destpath.relative_to(args.webroot)
+ destpath = '/' + destpath.as_posix().removeprefix('/') + '/' + dirname
+ layermap[destdirname] = destpath
+
+ # dump the new object to the temporary file
+ bstr = json.dumps(mapobj, ensure_ascii=False, separators=(',',':')).encode('utf-8')
+ mapfile_tmp = mapfile.with_stem('.' + mapfile.stem + '.new')
+ fd = os.open(mapfile_tmp, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644)
+ with os.fdopen(fd, mode='wb', closefd=True) as fp:
+ fp.write(bstr)
+
+ # compress the temporary file
+ if args.compress:
+ mapfile_tmp_compressed = mapfile_tmp.with_name(mapfile_tmp.name + '.br')
+ compressor = brotli.Compressor()
+ fd = os.open(mapfile_tmp_compressed, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644)
+ try:
+ write_all(fd, compressor.process(bstr))
+ write_all(fd, compressor.finish())
+ finally:
+ os.close(fd)
+
+ # rename temporary files to production (starting with the uncompressed one)
+ logging.debug('rename(%s, %s)', mapfile_tmp, mapfile)
+ os.rename(mapfile_tmp, mapfile)
+
+ if args.compress:
+ logging.debug('rename(%s, %s)', mapfile_tmp_compressed, mapfile_compressed)
+ os.rename(mapfile_tmp_compressed, mapfile_compressed)