From 7cda119879cf48ba72ba34522fa9cdf9ef6d9b49 Mon Sep 17 00:00:00 2001 From: Guilhem Moulin Date: Sat, 21 Sep 2024 04:00:05 +0200 Subject: Add `webmap-publish` script to export layers to Mapbox Vector Tiles. --- webmap-publish | 760 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 760 insertions(+) create mode 100755 webmap-publish (limited to 'webmap-publish') diff --git a/webmap-publish b/webmap-publish new file mode 100755 index 0000000..36bed88 --- /dev/null +++ b/webmap-publish @@ -0,0 +1,760 @@ +#!/usr/bin/python3 + +#---------------------------------------------------------------------- +# Backend utilities for the Klimatanalys Norr project (create MVT tiles) +# Copyright © 2024 Guilhem Moulin +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#---------------------------------------------------------------------- + +from os import O_RDONLY, O_WRONLY, O_CREAT, O_EXCL, O_TRUNC, O_CLOEXEC, O_DIRECTORY +import os +import errno +from fcntl import flock, LOCK_EX, LOCK_UN +from string import hexdigits +import json +import logging +import math +import random +import argparse +import tempfile +from pathlib import Path +import numpy + +from osgeo import gdal, ogr, osr +from osgeo.gdalconst import ( + OF_VECTOR as GDAL_OF_VECTOR, + OF_READONLY as GDAL_OF_READONLY, + OF_UPDATE as GDAL_OF_UPDATE, + OF_VERBOSE_ERROR as GDAL_OF_VERBOSE_ERROR, +) +gdal.UseExceptions() + +import common +from common import ( + format_bytes, + gdalSetOpenExArgs, + gdalVersionMin, + gdalGetMetadataItem, + escapeIdentifier +) + +# Open and return source dataset. +def openSourceDS(def_dict): + # merge 'open-options-publish' into 'open-options' + opts2 = def_dict.get('open-options-publish', None) + if opts2 is not None: + if 'open-options' not in def_dict: + def_dict['open-options'] = {} + def_dict['open-options'] |= opts2 + kwargs, _ = gdalSetOpenExArgs(def_dict, flags=GDAL_OF_READONLY|GDAL_OF_VERBOSE_ERROR) + + path = def_dict['path'] + logging.debug('OpenEx(%s, %s)', path, str(kwargs)) + return gdal.OpenEx(path, **kwargs) + +# Choose a random subdirectory under the given base directory and create +# a new MVT dataset there. +# The old MVT dataset (if any) remains, which is intentional we don't +# want to disrupt browser caches and tile → attribute mapping. +def createMVT(basedir, options=None): + drv = gdal.GetDriverByName('MVT') + if drv is None: + raise Exception('Unknown GDAL driver for format "MVT"') + + # choose an available name at random from 00 to FF + weights = [1] * 256 + for path in basedir.iterdir(): + filename = path.name + if len(filename) == 2 and filename[0] in hexdigits and filename[1] in hexdigits: + n = int(filename, 16) + weights[n] = 0 + if all([w == 0 for w in weights]): + raise Exception(f'No name available in {args.destdir} (forgot to cleanup?)') + + name = random.choices(range(256), weights=weights, k=1)[0] + name = f'{name:0>2x}' + path = str(basedir.joinpath(name)) + + # merge options from the config file + defaults_map = { + 'min-zoom': 'MINZOOM', + 'max-zoom': 'MAXZOOM', + 'max-size': 'MAX_SIZE', + 'max-features': 'MAX_FEATURES', + 'simplification': 'SIMPLIFICATION', + 'simplification-max-zoom': 'SIMPLIFICATION_MAX_ZOOM', + 'tiling-scheme': 'TILING_SCHEME', + 'extent': 'EXTENT', + } + defaults_options = common.config.get('vector-tiles', None) + if defaults_options is not None: + for k, v in defaults_options.items(): + if k not in defaults_map: + logging.warning('No default map for key "%s", ignoring', k) + continue + if options is None: + options = {} + k = defaults_map[k] + if k not in options: + if isinstance(v, list): + v = ','.join(map(str, v)) + options[k] = v + + kwargs = { 'eType': gdal.GDT_Unknown } + if options is not None: + kwargs['options'] = opts = [] + for k, v in options.items(): + opts.append(k + '=' + str(v)) + + logging.debug('Create(%s, %s, eType=%s%s)', drv.ShortName, path, kwargs['eType'], + ', options=' + str(kwargs['options']) if 'options' in kwargs else '') + ds = drv.Create(path, 0, 0, 0, **kwargs) + return ds, name + +# Filter a source layer. +def getSourceLayer(ds_src, layerdef, extent=None): + layername = layerdef['__layername-src__'] + lyr_src = ds_src.GetLayerByName(layername) + if lyr_src is None: + raise Exception(f'Source dataset has no layer named "{layername}"') + + count0 = -1 + if lyr_src.TestCapability(ogr.OLCFastFeatureCount): + count0 = lyr_src.GetFeatureCount(force=0) + + srs_src = lyr_src.GetSpatialRef() + if srs_src is None: + raise Exception('Source layer "{}" has no SRS'.format(layername)) + elif not srs_src.IsProjected(): + raise Exception('Source layer "{}" SRS is not projected: {}'.format( + layername, srs_src.ExportToPrettyWkt())) + # "the value to multiply by linear distances to transform them to meters" + linearUnits = srs_src.GetLinearUnits() + logging.debug('Source layer "%s" SRS ("%s") linear units: %f', layername, + srs_src.GetName(), linearUnits) + + # get geometry field of the source layer + defn = lyr_src.GetLayerDefn() + geomFieldCount = defn.GetGeomFieldCount() + if geomFieldCount != 1: + logging.warning('Source layer "%s" has %d != 1 geometry fields', lyr_src.GetName(), geomFieldCount) + geomField = defn.GetGeomFieldDefn(0) + geomType = geomField.GetType() + geomFieldName_esc = escapeIdentifier(geomField.GetName()) + + # transform extent to source SRS + if extent is None or extent.GetSpatialReference().IsSame(srs_src): + extent_src = extent + else: + extent_src = extent.Clone() + if extent_src.TransformTo(srs_src) != ogr.OGRERR_NONE: + raise Exception('Could not transform extent {} to {}'.format( + extent.ExportToWkt(), srs_src.GetName())) + + + reserved_fields = set(map(lambda x: x.lower(), [ + 'geom', + 'GeomArea', + 'GeomPerimeter', + 'GeomLength', + ])) + + # build SQL query + drv = ds_src.GetDriver() + columns = [] + for i in range(defn.GetFieldCount()): + fld = defn.GetFieldDefn(i) + fldType = fld.GetType() + if fld.GetName().lower() in reserved_fields: + raise Exception(f'Invalid column name "{fld.GetName()}"') + fldName_esc = escapeIdentifier(fld.GetName()) + column = 'm.' + fldName_esc + # for date/time/datetime fields, we let the RDBMS engine do the formatting if possible + if fldType == ogr.OFTDate: + if drv.ShortName == 'PostgreSQL': + column = 'to_char(' + column + ', \'YYYY-MM-DD\') AS ' + elif drv.ShortName == 'GPKG': + column = 'strftime(\'%F\', ' + column + ') AS ' + elif fldType == ogr.OFTTime: + if drv.ShortName == 'PostgreSQL': + column = 'to_char(' + column + ', \'HH24:MI:SSOF\') AS ' + elif drv.ShortName == 'GPKG': + # XXX SQLite's strftime() lacks support for time zones + column = 'strftime(\'%T\', ' + column + ') AS ' + elif fldType == ogr.OFTDateTime: + if drv.ShortName == 'PostgreSQL': + column = 'to_char(' + column + ', \'YYYY-MM-DD"T"HH24:MI:SSOF\') AS ' + elif drv.ShortName == 'GPKG': + # XXX SQLite's strftime() lacks support for time zones + column = 'strftime(\'%FT%T\', ' + column + ') AS ' + if column.endswith(' AS '): + column += fldName_esc + columns.append(column) + + # add columns with computed info about the geometry (before clipping and rounded to 0.01m²/0.01m) + if ogr.GT_IsSubClassOf(geomType, ogr.wkbSurface) or ogr.GT_IsSubClassOf(geomType, ogr.wkbMultiSurface): + columns.append('ROUND(CAST(ST_Area(m.' + geomFieldName_esc + ') * ' + + str(linearUnits) + '*' + str(linearUnits) + ' AS numeric), 2) AS "GeomArea"') + columns.append('ROUND(CAST(ST_Perimeter(m.' + geomFieldName_esc + ') * ' + + str(linearUnits) + ' AS numeric), 2) AS "GeomPerimeter"') + elif ogr.GT_IsSubClassOf(geomType, ogr.wkbCurve) or ogr.GT_IsSubClassOf(geomType, ogr.wkbMultiCurve): + columns.append('ROUND(CAST(ST_Length(m.' + geomFieldName_esc + ') * ' + + str(linearUnits) + ' AS numeric), 2) AS "GeomLength"') + columns.append('m.' + geomFieldName_esc) + + query = 'SELECT ' + ', '.join(columns) + ' FROM ' + escapeIdentifier(lyr_src.GetName()) + ' m' + + # add WHERE clauses and spatial filter; for GPKG/SQlite3, the query + # is too complex and the driver can't use ExecuteSQL()'s spatialFilter + # option so we add the ST_Intersects() condition + if drv.ShortName == 'PostgreSQL': + spatialFilter = extent_src + else: + spatialFilter = None + + cond = layerdef.get('where', None) + if (extent_src is not None and spatialFilter is None) or cond is not None: + query += ' WHERE ' + if extent_src is not None and spatialFilter is None: + query += 'ST_Intersects(m.' + geomFieldName_esc + ', \'' + extent_src.ExportToWkt() + '\')' + if cond is not None: + query += ' AND ' + if cond is not None: + query += '(' + cond.strip() + ')' + + logging.debug('SQL query: %s', query) + lyr_src = ds_src.ExecuteSQL(query, spatialFilter=spatialFilter) + + count1 = -1 + if lyr_src.TestCapability(ogr.OLCFastFeatureCount): + count1 = lyr_src.GetFeatureCount(force=0) + if count0 >= 0 and count1 >= 0: + logging.debug('Source layer "%s" has %d features, of which %d are to be exported', + layername, count0, count1) + return lyr_src + +# Export a layer from the source dataset, returning the next FID availble. +# TODO bump metadata_maxsize util the compressed version doesn't exceed 64k +def exportLayer(lyr_dst, lyr_src, srs=None, clip=None, layername=None, + coordinate_precision=None, nextfid=0, + # rotate after the feature which bring the file to the 256kiB mark + metadata_maxsize=256*1024, metadata=None): + if srs is None: + raise Exception('Missing target SRS') + if clip is not None: + clip_srs = clip.GetSpatialReference() + if not clip_srs.IsSame(srs): + raise Exception('Clipping geometry {} has SRS {},\nexpected {}'.format( + clip.ExportToWkt(), clip_srs.ExportToPrettyWkt(), srs.ExportToPrettyWkt())) + if metadata is None: + metadata_fp = None + else: + metadata_fp = metadata.get('fp', None) + exportToJson_options = [] + if coordinate_precision is not None: + exportToJson_options = ['COORDINATE_PRECISION=' + str(coordinate_precision)] + metadata_size = metadata.get('cursize', 0) + + srs_src = lyr_src.GetLayerDefn().GetGeomFieldDefn(0).GetSpatialRef() + if srs.IsSame(srs_src): + logging.debug('Both source and destination have the same SRS (%s), skipping coordinate transformation', + srs.GetName()) + ct = None + else: + # TODO Use GetSupportedSRSList() and SetActiveSRS() with GDAL ≥3.7.0 + # when possible, see apps/ogr2ogr_lib.cpp + logging.debug('Creating transforming from source SRS (%s) to destination SRS (%s)', + srs_src.GetName(), srs.GetName()) + ct = osr.CoordinateTransformation(srs_src, srs) + if ct is None: + raise Exception(f'Could not create transformation from source SRS ({srs_src.GetName()}) ' + + f'to destination SRS ({srs.GetName()})') + + # extract metadata index and filename from nextfid (an exception is + # raised if nextfid doesn't fit in 4 bytes) + bstr = nextfid.to_bytes(4, byteorder='big') + nextfid_idx = int.from_bytes(bstr[-2:], byteorder='big') + nextfid_p = int.from_bytes(bstr[:2], byteorder='big') + metadata_maxlen = 1 << (8*2) + + defn_dst = lyr_dst.GetLayerDefn() + + # we roll our own loop rather than using ogr2ogr/GDALVectorTranslate() + # since we want to do a custom JSON serialization at the same time, + # in order to avoid duplicating properties across tiles or clipping + # overlays at tile boundaries + + count = 0 + feature = lyr_src.GetNextFeature() + while feature is not None: + geom = feature.GetGeometryRef().Clone() + if ct is not None and geom.Transform(ct) != ogr.OGRERR_NONE: + raise Exception('Could not apply coordinate transformation') + + if metadata is not None: + # serialize the feature to JSON and save it + # TODO there is a lot of room for improvement here + # - geometries could be exported to WKB or FlatGeoBuf rather than GeoJSON + # - columns (schema) could be saved in layers.json only, so + # the object would be an array of length n+1 where the + # geometry would be at index 0 or n + # - data could be serialized to binary, cf. BJSON or GeoBuf + feature.SetGeometry(geom) + obj = feature.ExportToJson(as_object=True, options=exportToJson_options) + #props = { key:value for key, value in obj['properties'].items() if value is not None } + props = obj['properties'] + props['geom'] = obj['geometry'] + + bstr = json.dumps(props, ensure_ascii=False, separators=(',',':')).encode('utf-8') + + #if nextfid_idx >= metadata_maxlen or nextfid_p * metadata_maxlen + nextfid_idx != nextfid: + # raise Exception(f'impossible: {nextfid_p}*{metadata_maxlen} + {nextfid_idx} != {nextfid}') + + if metadata_fp is None: + metadata_fp = openNewMetadaFile(metadata, nextfid_p) + metadata_size = 0 + else: + metadata_fp.write(b',') + + metadata_size += 1 + metadata_fp.write(bstr) + + # clip the feature and export it (without fields to MVT) + geom2 = geom if clip is None else geom.Intersection(clip) + feature2 = ogr.Feature(defn_dst) + feature2.SetGeometryDirectly(geom2) + feature2.SetFID(nextfid) + + if lyr_dst.CreateFeature(feature2) != ogr.OGRERR_NONE: + raise Exception(f'Could not transfer source feature #{feature.GetFID()}') + + nextfid += 1 + if metadata is not None: + nextfid_idx += 1 + if metadata_size > metadata_maxsize or nextfid_idx >= metadata_maxlen: + closeMetadataFile(metadata) + metadata_fp = metadata_size = None + nextfid_p += 1 + nextfid_idx = 0 + nextfid = nextfid_p * metadata_maxlen + # will open a new while when processiong the next feature + + count += 1 + feature = lyr_src.GetNextFeature() + + logging.info('Exported %d features to layer "%s" from "%s"', count, + lyr_dst.GetName(), layername) + + if metadata is not None: + # save size of the metadata file currently open for proper accounting + metadata['cursize'] = 0 if metadata_fp is None else metadata_size + return nextfid, count + +# Open a new metadata file. +def openNewMetadaFile(metadata, nextfid_p): + name = nextfid_p.to_bytes(length=2, byteorder='big').hex() + name = Path(metadata['tempdir']).joinpath(name).with_suffix('.json') + logging.debug('open(%s)', str(name)) + + fd = os.open(name, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, mode=0o0644, + dir_fd=metadata.get('dirfd', None)) + metadata['fp'] = fp = os.fdopen(fd, mode='wb', closefd=True) + fp.write(b'[') + return fp + +# Close a metadata file. +def closeMetadataFile(metadata): + fp = metadata.pop('fp') + fp.write(b']') + logging.debug('close(%d)', fp.fileno()) + fp.close() + +# Wrapper around os.write() that loops until the entire data is written. +def write_all(fd, buf): + offset = 0 + while offset < len(buf): + try: + n = os.write(fd, buf[offset:]) + except IOError as e: + if e.errno != errno.EAGAIN: + raise e + #logging.debug('Got EAGAIN while trying to write') + else: + #logging.debug('Wrote %d bytes of %d', n, len(buf) - offset) + offset += n + return offset + +# Compress a file to Brotli. +# TODO increase chunksize to see if helps with performances +def compress_brotli(path, suffix='.br', dir_fd=None, chunksize=65536, + oFlags=0, **kwargs): + compressor = brotli.Compressor(**kwargs) + fd_in = os.open(path, O_RDONLY|O_CLOEXEC, dir_fd=dir_fd) + size_in = size_out = 0 + try: + # hard-code the mode to save an fstat(2) call + fd_out = os.open(path + suffix, O_WRONLY|O_CREAT|O_CLOEXEC|oFlags, + mode=0o0644, dir_fd=dir_fd) + try: + buf_in = os.read(fd_in, chunksize) + while len(buf_in) > 0: + #logging.debug('Read %d bytes of %d', len(buf_in), chunksize) + size_in += len(buf_in) + size_out += write_all(fd_out, compressor.process(buf_in)) + buf_in = os.read(fd_in, chunksize) + size_out += write_all(fd_out, compressor.finish()) + finally: + os.close(fd_out) + finally: + os.close(fd_in) + return size_in, size_out + + +if __name__ == '__main__': + common.init_logger(app=os.path.basename(__file__), level=logging.INFO) + + parser = argparse.ArgumentParser(description='Export GIS layers to Mapbox Vector Tiles (MVT).') + parser.add_argument('--debug', action='count', default=0, + help=argparse.SUPPRESS) + parser.add_argument('--destdir', default=os.curdir, + help=f'destination directory for generated files (default: {os.curdir})') + parser.add_argument('--lockfile', default=None, + help=f'obtain an exclusive lock while exporting features to MVT') + parser.add_argument('--name', default=None, + help=f'name to use in the layer map (default: last component of DESTDIR)') + parser.add_argument('--webroot', default=None, + help=f'path to the web root') + parser.add_argument('--metadata', default=None, + help=f'path to the JSON metadata file (default: DESTDIR/../metadata.json)') + parser.add_argument('--metadata-lockfile', default=None, + help=f'path to the JSON metadata lock file (default: DESTDIR/../.metadata.json.lck)') + parser.add_argument('--compress', action=argparse.BooleanOptionalAction, default=False, + help='whether to compress tiles (default: False)') + parser.add_argument('groupname', nargs='*', help='group layer name(s) to process') + args = parser.parse_args() + + if args.debug > 0: + logging.getLogger().setLevel(logging.DEBUG) + if args.debug > 1: + from http.client import HTTPConnection + HTTPConnection.debuglevel = 1 + requests_log = logging.getLogger('urllib3') + requests_log.setLevel(logging.DEBUG) + requests_log.propagate = True + + if args.debug > 0: + logging.getLogger().setLevel(logging.DEBUG) + if args.debug > 1: + gdal.ConfigurePythonLogging(enable_debug=True) + + if args.compress: + # fail early if the module is missing + import brotli + + destdirname = args.name if args.name is not None else Path(args.destdir).name + if not destdirname: + logging.error('Could not infer --name value') + exit(1) + + common.load_config(groupnames=None if args.groupname == [] else args.groupname) + + # validate configuration + if 'dataset' not in common.config: + raise Exception('Configuration does not specify source dataset') + + export_layers = {} + for layername, layerdef in common.config.get('layers', {}).items(): + exportdef = layerdef.get('publish', None) + if exportdef is None: + raise Exception(f'Layer "{layername}" has no publication definition') + if isinstance(exportdef, str): + exportdef = { exportdef:{} } + elif isinstance(exportdef, list): + exportdef = { l:{} for l in exportdef } + for export_layername, export_layerdef in exportdef.items(): + if export_layername not in export_layers: + export_layers[export_layername] = [] + export_layers[export_layername].append(export_layerdef) + export_layerdef['__layername-src__'] = layername + + if args.destdir != os.curdir: + try: + os.mkdir(args.destdir, mode=0o0755) + except FileExistsError: + pass + + # open source DS + ds = openSourceDS(common.config['dataset']) + + # get configured Spatial Reference System and extent; force traditional GIS order + # on the target SRS since it's what MVT and JSON export are expecting + srs = common.getSRS(common.config.get('SRS', None)) + srs.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER) + extent, extent_simple = common.getExtent(common.config.get('extent', None), srs=srs) + + if srs.IsProjected(): + # will export with centimeter precision + linearUnits = srs.GetLinearUnits() + coordinate_precision = math.ceil(math.log10(linearUnits) + 2) + if coordinate_precision < 0: + coordinate_precision = 0 + logging.debug('Coordinate precision: %d', coordinate_precision) + + if args.lockfile is not None: + lock_fd = os.open(args.lockfile, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644) + logging.debug('flock("%s", LOCK_EX)', args.lockfile) + flock(lock_fd, LOCK_EX) + + # always lock destdir (until the program exits) to avoid concurrent + # generations of the same tileset + destdir_fd = os.open(args.destdir, O_RDONLY|O_CLOEXEC|O_DIRECTORY) + logging.debug('flock("%s", LOCK_EX)', args.destdir) + flock(destdir_fd, LOCK_EX) + + nFeatures = 0 + tile_extension = '.pbf' + basedir = Path(f'/proc/self/fd/{destdir_fd}') + with tempfile.TemporaryDirectory() as tempdir: + # use a temporary directory for TEMPORARY_DB + logging.debug('Using "%s" as temporary directory', tempdir) + + ds_dst, dirname = createMVT(basedir, options={ + 'FORMAT': 'DIRECTORY', + 'COMPRESS': 'NO', + 'TYPE': 'overlay', + 'BUFFER': 32, + 'TILE_EXTENSION': tile_extension.removeprefix('.'), + 'TEMPORARY_DB': Path(tempdir).joinpath('tmp-mvt.db'), + }) + + # use a temporary directory for the metadata to avoid messing + # around with he MVT directory while building is in progress + metadata = { 'dirfd': destdir_fd } + metadata['tempdir'] = Path(tempfile.mkdtemp(prefix='.' + dirname + '-attr.', + dir=basedir)).relative_to(basedir) + try: + logging.debug('Using "%s" as temporary metadata directory', metadata['tempdir']) + os.chmod(metadata['tempdir'], 0o0755, dir_fd=metadata.get('dirfd', None), + follow_symlinks=False) + nextfid = 0 + for layername, layerdefs in export_layers.items(): + # create destination layer + lyr_dst = ds_dst.CreateLayer(layername, srs=srs, geom_type=ogr.wkbUnknown) + if lyr_dst is None: + raise Exception(f'Could not create destination layer "{layername}"') + + for layerdef in layerdefs: + lyr_src = getSourceLayer(ds, layerdef, extent=extent) + try: + # use the non-dense geometry (a rectangle) for clipping as it's faster + # and there is no need to reproject (the SRSs match) + nextfid, count0 = exportLayer(lyr_dst, lyr_src, + layername=layerdef['__layername-src__'], + srs=srs, clip=extent_simple, + coordinate_precision=coordinate_precision, + nextfid=nextfid, metadata=metadata) + nFeatures += count0 + finally: + ds.ReleaseResultSet(lyr_src) + lyr_dst = None + + # close datasets + ds = None + ds_dst = None + + # move metadata to the MVT structure (now that it has been closed) + logging.debug('rename(%s/%s, %s/%s/attr)', + args.destdir, metadata['tempdir'], + args.destdir, dirname) + os.rename(metadata['tempdir'], + Path(dirname).joinpath('attr'), + src_dir_fd=metadata.get('dirfd', None), + dst_dir_fd=destdir_fd) + metadata.pop('tempdir', None) + + except Exception as e: + # cleanup + for root, dirnames, filenames, rootfd in os.fwalk(top=metadata['tempdir'], + dir_fd=metadata.get('dirfd', None), topdown=False, + follow_symlinks=False): + for filename in filenames: + #logging.debug('unlink(%s/%s)', root, filename) + os.unlink(filename, dir_fd=rootfd) + for dirname in dirnames: + #logging.debug('rmdir(%s/%s)', root, dirname) + os.rmdir(dirname, dir_fd=rootfd) + #logging.debug('rmdir(%s/%s)', args.destdir, metadata['tempdir']) + os.rmdir(metadata['tempdir'], dir_fd=metadata.get('dirfd', None)) + raise e + + finally: + if 'fp' in metadata: + closeMetadataFile(metadata) + + srs = None + extent = None + extent_simple = None + + try: + # OpenLayers doesn't make use of that file so delete it + os.unlink(Path(dirname).joinpath('metadata.json'), dir_fd=destdir_fd) + except FileNotFoundError: + pass + + logging.info('Done creating tiles and attribute files (total %d features)', nFeatures) + + if args.lockfile is not None: + # release the shared lock, the remaining shouldn't be as demanding + logging.debug('flock("%s", LOCK_UN)', args.lockfile) + flock(lock_fd, LOCK_UN) + + + stats_tiles = [] + stats_metadata = [] + if args.compress: + compress_mode = brotli.MODE_GENERIC + + # collect stats and optionally compress + # TODO: optimize traversal, cf. + # https://r3n.hashnode.dev/tech-chronicles-conquer-memory-monsters-with-pythons-yield-in-large-directory-processing + # TODO: benchmark brotli quality to trade-off compression ratio vs. execution time + # (also, tile and attribute files can have different levels if needs be) + for root, _, filenames, rootfd in os.fwalk(top=dirname, dir_fd=destdir_fd, + follow_symlinks=False): + for filename in filenames: + lfilename = filename.lower() + if lfilename.endswith(tile_extension): + arr = stats_tiles + elif lfilename.endswith('.json'): + arr = stats_metadata + else: + continue + + if not args.compress: + st = os.stat(filename, dir_fd=rootfd) + arr.append(st.st_size) + else: + #logging.debug("Compressing %s/%s", root, filename) + szi, szo = compress_brotli(filename, dir_fd=rootfd, + oFlags=O_EXCL, mode=compress_mode, quality=5) + arr.append((szi, szo)) + + # display stats + if len(stats_tiles) > 0: + stats_tiles = numpy.array(stats_tiles, dtype=numpy.int64, copy=False) + + arr = stats_tiles[:,0] if args.compress else stats_tiles + tot = numpy.sum(arr) + logging.info('Tile count: %d [min=%s, max=%s, sum=%s, avg=%s]', + arr.size, + format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)), + format_bytes(tot), format_bytes(round(tot/arr.size))) + + if args.compress: + arr = stats_tiles[:,1] + ztot = numpy.sum(arr) + logging.info('After compression: min=%s, max=%s, sum=%s, avg=%s (ratio %.1f%%)', + format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)), + format_bytes(ztot), format_bytes(round(ztot/arr.size)), + 100. * (1. - ztot/tot)) + + if len(stats_metadata) > 0: + stats_metadata = numpy.array(stats_metadata, dtype=numpy.int64, copy=False) + + arr = stats_metadata[:,0] if args.compress else stats_metadata + tot = numpy.sum(arr) + logging.info('Metadata file count: %d [min=%s, max=%s, sum=%s, avg=%s]', + arr.size, + format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)), + format_bytes(tot), format_bytes(round(tot/arr.size))) + + if args.compress: + arr = stats_metadata[:,1] + ztot = numpy.sum(arr) + logging.info('After compression: min=%s, max=%s, sum=%s, avg=%s (ratio %.1f%%)', + format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)), + format_bytes(ztot), format_bytes(round(ztot/arr.size)), + 100. * (1. - ztot/tot)) + + + # update metadata.json + if args.metadata is None: + mapfile = Path(args.destdir).parent.joinpath('metadata.json') + else: + mapfile = Path(args.metadata) + + # first, place an exclusive lock (it is important that all + # invocations of the program with the same --metadata file, even on + # different layer groups, share the same lock file to avoid + # concurrent updates of the medata.json) + if args.metadata_lockfile is None: + mapfile_lock = mapfile.with_name('.' + mapfile.name + '.lck') + else: + mapfile_lock = Path(args.metadata_lockfile) + mapfile_lock_fd = os.open(mapfile_lock, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644) + logging.debug('flock("%s", LOCK_EX)', mapfile_lock) + flock(mapfile_lock_fd, LOCK_EX) + + # load the file + try: + with open(mapfile, mode='rb') as fp: + mapobj = json.load(fp) + except FileNotFoundError: + mapobj = {} + + # remove the compressed version as we are prepare to update the + # uncompressed one + mapfile_compressed = mapfile.with_name(mapfile.name + '.br') + try: + logging.debug('unlink(%s)', mapfile_compressed) + os.unlink(mapfile_compressed) + except FileNotFoundError: + pass + + k = 'layermap' + if k not in mapobj: + layermap = mapobj[k] = {} + else: + layermap = mapobj[k] + destpath = Path(args.destdir) + if args.webroot is not None: + destpath = destpath.relative_to(args.webroot) + destpath = '/' + destpath.as_posix().removeprefix('/') + '/' + dirname + layermap[destdirname] = destpath + + # dump the new object to the temporary file + bstr = json.dumps(mapobj, ensure_ascii=False, separators=(',',':')).encode('utf-8') + mapfile_tmp = mapfile.with_stem('.' + mapfile.stem + '.new') + fd = os.open(mapfile_tmp, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644) + with os.fdopen(fd, mode='wb', closefd=True) as fp: + fp.write(bstr) + + # compress the temporary file + if args.compress: + mapfile_tmp_compressed = mapfile_tmp.with_name(mapfile_tmp.name + '.br') + compressor = brotli.Compressor() + fd = os.open(mapfile_tmp_compressed, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644) + try: + write_all(fd, compressor.process(bstr)) + write_all(fd, compressor.finish()) + finally: + os.close(fd) + + # rename temporary files to production (starting with the uncompressed one) + logging.debug('rename(%s, %s)', mapfile_tmp, mapfile) + os.rename(mapfile_tmp, mapfile) + + if args.compress: + logging.debug('rename(%s, %s)', mapfile_tmp_compressed, mapfile_compressed) + os.rename(mapfile_tmp_compressed, mapfile_compressed) -- cgit v1.2.3