#!/usr/bin/python3 #---------------------------------------------------------------------- # Backend utilities for the Klimatanalys Norr project (create MVT tiles) # Copyright © 2024 Guilhem Moulin # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . #---------------------------------------------------------------------- from os import O_RDONLY, O_WRONLY, O_CREAT, O_EXCL, O_TRUNC, O_CLOEXEC, O_DIRECTORY import os import errno from fcntl import flock, LOCK_EX, LOCK_UN from string import hexdigits import json import logging import math import random import argparse import tempfile from pathlib import Path import numpy from osgeo import gdal, ogr, osr from osgeo.gdalconst import ( OF_VECTOR as GDAL_OF_VECTOR, OF_READONLY as GDAL_OF_READONLY, OF_UPDATE as GDAL_OF_UPDATE, OF_VERBOSE_ERROR as GDAL_OF_VERBOSE_ERROR, ) gdal.UseExceptions() import common from common import ( format_bytes, gdalSetOpenExArgs, gdalVersionMin, gdalGetMetadataItem, escapeIdentifier ) # Open and return source dataset. def openSourceDS(def_dict): # merge 'open-options-publish' into 'open-options' opts2 = def_dict.get('open-options-publish', None) if opts2 is not None: if 'open-options' not in def_dict: def_dict['open-options'] = {} def_dict['open-options'] |= opts2 kwargs, _ = gdalSetOpenExArgs(def_dict, flags=GDAL_OF_READONLY|GDAL_OF_VERBOSE_ERROR) path = def_dict['path'] logging.debug('OpenEx(%s, %s)', path, str(kwargs)) return gdal.OpenEx(path, **kwargs) # Choose a random subdirectory under the given base directory and create # a new MVT dataset there. # The old MVT dataset (if any) remains, which is intentional we don't # want to disrupt browser caches and tile → attribute mapping. def createMVT(basedir, options=None): drv = gdal.GetDriverByName('MVT') if drv is None: raise Exception('Unknown GDAL driver for format "MVT"') # choose an available name at random from 00 to FF weights = [1] * 256 for path in basedir.iterdir(): filename = path.name if len(filename) == 2 and filename[0] in hexdigits and filename[1] in hexdigits: n = int(filename, 16) weights[n] = 0 if all([w == 0 for w in weights]): raise Exception(f'No name available in {args.destdir} (forgot to cleanup?)') name = random.choices(range(256), weights=weights, k=1)[0] name = f'{name:0>2x}' path = str(basedir.joinpath(name)) # merge options from the config file defaults_map = { 'min-zoom': 'MINZOOM', 'max-zoom': 'MAXZOOM', 'max-size': 'MAX_SIZE', 'max-features': 'MAX_FEATURES', 'simplification': 'SIMPLIFICATION', 'simplification-max-zoom': 'SIMPLIFICATION_MAX_ZOOM', 'tiling-scheme': 'TILING_SCHEME', 'extent': 'EXTENT', } defaults_options = common.config.get('vector-tiles', None) if defaults_options is not None: for k, v in defaults_options.items(): if k not in defaults_map: logging.warning('No default map for key "%s", ignoring', k) continue if options is None: options = {} k = defaults_map[k] if k not in options: if isinstance(v, list): v = ','.join(map(str, v)) options[k] = v kwargs = { 'eType': gdal.GDT_Unknown } if options is not None: kwargs['options'] = opts = [] for k, v in options.items(): opts.append(k + '=' + str(v)) logging.debug('Create(%s, %s, eType=%s%s)', drv.ShortName, path, kwargs['eType'], ', options=' + str(kwargs['options']) if 'options' in kwargs else '') ds = drv.Create(path, 0, 0, 0, **kwargs) return ds, name # Filter a source layer. def getSourceLayer(ds_src, layerdef, extent=None): layername = layerdef['__layername-src__'] lyr_src = ds_src.GetLayerByName(layername) if lyr_src is None: raise Exception(f'Source dataset has no layer named "{layername}"') count0 = -1 if lyr_src.TestCapability(ogr.OLCFastFeatureCount): count0 = lyr_src.GetFeatureCount(force=0) srs_src = lyr_src.GetSpatialRef() if srs_src is None: raise Exception('Source layer "{}" has no SRS'.format(layername)) elif not srs_src.IsProjected(): raise Exception('Source layer "{}" SRS is not projected: {}'.format( layername, srs_src.ExportToPrettyWkt())) # "the value to multiply by linear distances to transform them to meters" linearUnits = srs_src.GetLinearUnits() logging.debug('Source layer "%s" SRS ("%s") linear units: %f', layername, srs_src.GetName(), linearUnits) # get geometry field of the source layer defn = lyr_src.GetLayerDefn() geomFieldCount = defn.GetGeomFieldCount() if geomFieldCount != 1: logging.warning('Source layer "%s" has %d != 1 geometry fields', lyr_src.GetName(), geomFieldCount) geomField = defn.GetGeomFieldDefn(0) geomType = geomField.GetType() geomFieldName_esc = escapeIdentifier(geomField.GetName()) # transform extent to source SRS if extent is None or extent.GetSpatialReference().IsSame(srs_src): extent_src = extent else: extent_src = extent.Clone() if extent_src.TransformTo(srs_src) != ogr.OGRERR_NONE: raise Exception('Could not transform extent {} to {}'.format( extent.ExportToWkt(), srs_src.GetName())) reserved_fields = set(map(lambda x: x.lower(), [ 'geom', 'GeomArea', 'GeomPerimeter', 'GeomLength', ])) # build SQL query drv = ds_src.GetDriver() columns = [] for i in range(defn.GetFieldCount()): fld = defn.GetFieldDefn(i) fldType = fld.GetType() if fld.GetName().lower() in reserved_fields: raise Exception(f'Invalid column name "{fld.GetName()}"') fldName_esc = escapeIdentifier(fld.GetName()) column = 'm.' + fldName_esc # for date/time/datetime fields, we let the RDBMS engine do the formatting if possible if fldType == ogr.OFTDate: if drv.ShortName == 'PostgreSQL': column = 'to_char(' + column + ', \'YYYY-MM-DD\') AS ' elif drv.ShortName == 'GPKG': column = 'strftime(\'%F\', ' + column + ') AS ' elif fldType == ogr.OFTTime: if drv.ShortName == 'PostgreSQL': column = 'to_char(' + column + ', \'HH24:MI:SSOF\') AS ' elif drv.ShortName == 'GPKG': # XXX SQLite's strftime() lacks support for time zones column = 'strftime(\'%T\', ' + column + ') AS ' elif fldType == ogr.OFTDateTime: if drv.ShortName == 'PostgreSQL': column = 'to_char(' + column + ', \'YYYY-MM-DD"T"HH24:MI:SSOF\') AS ' elif drv.ShortName == 'GPKG': # XXX SQLite's strftime() lacks support for time zones column = 'strftime(\'%FT%T\', ' + column + ') AS ' if column.endswith(' AS '): column += fldName_esc columns.append(column) # add columns with computed info about the geometry (before clipping and rounded to 0.01m²/0.01m) if ogr.GT_IsSubClassOf(geomType, ogr.wkbSurface) or ogr.GT_IsSubClassOf(geomType, ogr.wkbMultiSurface): columns.append('ROUND(CAST(ST_Area(m.' + geomFieldName_esc + ') * ' + str(linearUnits) + '*' + str(linearUnits) + ' AS numeric), 2) AS "GeomArea"') columns.append('ROUND(CAST(ST_Perimeter(m.' + geomFieldName_esc + ') * ' + str(linearUnits) + ' AS numeric), 2) AS "GeomPerimeter"') elif ogr.GT_IsSubClassOf(geomType, ogr.wkbCurve) or ogr.GT_IsSubClassOf(geomType, ogr.wkbMultiCurve): columns.append('ROUND(CAST(ST_Length(m.' + geomFieldName_esc + ') * ' + str(linearUnits) + ' AS numeric), 2) AS "GeomLength"') columns.append('m.' + geomFieldName_esc) query = 'SELECT ' + ', '.join(columns) + ' FROM ' + escapeIdentifier(lyr_src.GetName()) + ' m' # add WHERE clauses and spatial filter; for GPKG/SQlite3, the query # is too complex and the driver can't use ExecuteSQL()'s spatialFilter # option so we add the ST_Intersects() condition if drv.ShortName == 'PostgreSQL': spatialFilter = extent_src else: spatialFilter = None cond = layerdef.get('where', None) if (extent_src is not None and spatialFilter is None) or cond is not None: query += ' WHERE ' if extent_src is not None and spatialFilter is None: query += 'ST_Intersects(m.' + geomFieldName_esc + ', \'' + extent_src.ExportToWkt() + '\')' if cond is not None: query += ' AND ' if cond is not None: query += '(' + cond.strip() + ')' logging.debug('SQL query: %s', query) lyr_src = ds_src.ExecuteSQL(query, spatialFilter=spatialFilter) count1 = -1 if lyr_src.TestCapability(ogr.OLCFastFeatureCount): count1 = lyr_src.GetFeatureCount(force=0) if count0 >= 0 and count1 >= 0: logging.debug('Source layer "%s" has %d features, of which %d are to be exported', layername, count0, count1) return lyr_src # Export a layer from the source dataset, returning the next FID availble. # TODO bump metadata_maxsize util the compressed version doesn't exceed 64k def exportLayer(lyr_dst, lyr_src, srs=None, clip=None, layername=None, coordinate_precision=None, nextfid=0, # rotate after the feature which bring the file to the 256kiB mark metadata_maxsize=256*1024, metadata=None): if srs is None: raise Exception('Missing target SRS') if clip is not None: clip_srs = clip.GetSpatialReference() if not clip_srs.IsSame(srs): raise Exception('Clipping geometry {} has SRS {},\nexpected {}'.format( clip.ExportToWkt(), clip_srs.ExportToPrettyWkt(), srs.ExportToPrettyWkt())) if metadata is None: metadata_fp = None else: metadata_fp = metadata.get('fp', None) exportToJson_options = [] if coordinate_precision is not None: exportToJson_options = ['COORDINATE_PRECISION=' + str(coordinate_precision)] metadata_size = metadata.get('cursize', 0) srs_src = lyr_src.GetLayerDefn().GetGeomFieldDefn(0).GetSpatialRef() if srs.IsSame(srs_src): logging.debug('Both source and destination have the same SRS (%s), skipping coordinate transformation', srs.GetName()) ct = None else: # TODO Use GetSupportedSRSList() and SetActiveSRS() with GDAL ≥3.7.0 # when possible, see apps/ogr2ogr_lib.cpp logging.debug('Creating transforming from source SRS (%s) to destination SRS (%s)', srs_src.GetName(), srs.GetName()) ct = osr.CoordinateTransformation(srs_src, srs) if ct is None: raise Exception(f'Could not create transformation from source SRS ({srs_src.GetName()}) ' + f'to destination SRS ({srs.GetName()})') # extract metadata index and filename from nextfid (an exception is # raised if nextfid doesn't fit in 4 bytes) bstr = nextfid.to_bytes(4, byteorder='big') nextfid_idx = int.from_bytes(bstr[-2:], byteorder='big') nextfid_p = int.from_bytes(bstr[:2], byteorder='big') metadata_maxlen = 1 << (8*2) defn_dst = lyr_dst.GetLayerDefn() # we roll our own loop rather than using ogr2ogr/GDALVectorTranslate() # since we want to do a custom JSON serialization at the same time, # in order to avoid duplicating properties across tiles or clipping # overlays at tile boundaries count = 0 feature = lyr_src.GetNextFeature() while feature is not None: geom = feature.GetGeometryRef().Clone() if ct is not None and geom.Transform(ct) != ogr.OGRERR_NONE: raise Exception('Could not apply coordinate transformation') if metadata is not None: # serialize the feature to JSON and save it # TODO there is a lot of room for improvement here # - geometries could be exported to WKB or FlatGeoBuf rather than GeoJSON # - columns (schema) could be saved in layers.json only, so # the object would be an array of length n+1 where the # geometry would be at index 0 or n # - data could be serialized to binary, cf. BJSON or GeoBuf feature.SetGeometry(geom) obj = feature.ExportToJson(as_object=True, options=exportToJson_options) #props = { key:value for key, value in obj['properties'].items() if value is not None } props = obj['properties'] props['geom'] = obj['geometry'] bstr = json.dumps(props, ensure_ascii=False, separators=(',',':')).encode('utf-8') #if nextfid_idx >= metadata_maxlen or nextfid_p * metadata_maxlen + nextfid_idx != nextfid: # raise Exception(f'impossible: {nextfid_p}*{metadata_maxlen} + {nextfid_idx} != {nextfid}') if metadata_fp is None: metadata_fp = openNewMetadaFile(metadata, nextfid_p) metadata_size = 0 else: metadata_fp.write(b',') metadata_size += 1 + metadata_fp.write(bstr) # clip the feature and export it (without fields to MVT) geom2 = geom if clip is None else geom.Intersection(clip) feature2 = ogr.Feature(defn_dst) feature2.SetGeometryDirectly(geom2) feature2.SetFID(nextfid) if lyr_dst.CreateFeature(feature2) != ogr.OGRERR_NONE: raise Exception(f'Could not transfer source feature #{feature.GetFID()}') nextfid += 1 if metadata is not None: nextfid_idx += 1 if metadata_size > metadata_maxsize or nextfid_idx >= metadata_maxlen: closeMetadataFile(metadata) metadata_fp = metadata_size = None nextfid_p += 1 nextfid_idx = 0 nextfid = nextfid_p * metadata_maxlen # will open a new while when processiong the next feature count += 1 feature = lyr_src.GetNextFeature() logging.info('Exported %d features to layer "%s" from "%s"', count, lyr_dst.GetName(), layername) if metadata is not None: # save size of the metadata file currently open for proper accounting metadata['cursize'] = 0 if metadata_fp is None else metadata_size return nextfid, count # Open a new metadata file. def openNewMetadaFile(metadata, nextfid_p): name = nextfid_p.to_bytes(length=2, byteorder='big').hex() name = Path(metadata['tempdir']).joinpath(name).with_suffix('.json') logging.debug('open(%s)', str(name)) fd = os.open(name, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, mode=0o0644, dir_fd=metadata.get('dirfd', None)) metadata['fp'] = fp = os.fdopen(fd, mode='wb', closefd=True) fp.write(b'[') return fp # Close a metadata file. def closeMetadataFile(metadata): fp = metadata.pop('fp') fp.write(b']') logging.debug('close(%d)', fp.fileno()) fp.close() # Wrapper around os.write() that loops until the entire data is written. def write_all(fd, buf): offset = 0 while offset < len(buf): try: n = os.write(fd, buf[offset:]) except IOError as e: if e.errno != errno.EAGAIN: raise e #logging.debug('Got EAGAIN while trying to write') else: #logging.debug('Wrote %d bytes of %d', n, len(buf) - offset) offset += n return offset # Compress a file to Brotli. # TODO increase chunksize to see if helps with performances def compress_brotli(path, suffix='.br', dir_fd=None, chunksize=65536, oFlags=0, **kwargs): compressor = brotli.Compressor(**kwargs) fd_in = os.open(path, O_RDONLY|O_CLOEXEC, dir_fd=dir_fd) size_in = size_out = 0 try: # hard-code the mode to save an fstat(2) call fd_out = os.open(path + suffix, O_WRONLY|O_CREAT|O_CLOEXEC|oFlags, mode=0o0644, dir_fd=dir_fd) try: buf_in = os.read(fd_in, chunksize) while len(buf_in) > 0: #logging.debug('Read %d bytes of %d', len(buf_in), chunksize) size_in += len(buf_in) size_out += write_all(fd_out, compressor.process(buf_in)) buf_in = os.read(fd_in, chunksize) size_out += write_all(fd_out, compressor.finish()) finally: os.close(fd_out) finally: os.close(fd_in) return size_in, size_out if __name__ == '__main__': common.init_logger(app=os.path.basename(__file__), level=logging.INFO) parser = argparse.ArgumentParser(description='Export GIS layers to Mapbox Vector Tiles (MVT).') parser.add_argument('--debug', action='count', default=0, help=argparse.SUPPRESS) parser.add_argument('--destdir', default=os.curdir, help=f'destination directory for generated files (default: {os.curdir})') parser.add_argument('--lockfile', default=None, help=f'obtain an exclusive lock while exporting features to MVT') parser.add_argument('--name', default=None, help=f'name to use in the layer map (default: last component of DESTDIR)') parser.add_argument('--webroot', default=None, help=f'path to the web root') parser.add_argument('--metadata', default=None, help=f'path to the JSON metadata file (default: DESTDIR/../metadata.json)') parser.add_argument('--metadata-lockfile', default=None, help=f'path to the JSON metadata lock file (default: DESTDIR/../.metadata.json.lck)') parser.add_argument('--compress', action=argparse.BooleanOptionalAction, default=False, help='whether to compress tiles (default: False)') parser.add_argument('groupname', nargs='*', help='group layer name(s) to process') args = parser.parse_args() if args.debug > 0: logging.getLogger().setLevel(logging.DEBUG) if args.debug > 1: from http.client import HTTPConnection HTTPConnection.debuglevel = 1 requests_log = logging.getLogger('urllib3') requests_log.setLevel(logging.DEBUG) requests_log.propagate = True if args.debug > 0: logging.getLogger().setLevel(logging.DEBUG) if args.debug > 1: gdal.ConfigurePythonLogging(enable_debug=True) if args.compress: # fail early if the module is missing import brotli destdirname = args.name if args.name is not None else Path(args.destdir).name if not destdirname: logging.error('Could not infer --name value') exit(1) common.load_config(groupnames=None if args.groupname == [] else args.groupname) # validate configuration if 'dataset' not in common.config: raise Exception('Configuration does not specify source dataset') export_layers = {} for layername, layerdef in common.config.get('layers', {}).items(): exportdef = layerdef.get('publish', None) if exportdef is None: raise Exception(f'Layer "{layername}" has no publication definition') if isinstance(exportdef, str): exportdef = { exportdef:{} } elif isinstance(exportdef, list): exportdef = { l:{} for l in exportdef } for export_layername, export_layerdef in exportdef.items(): if export_layername not in export_layers: export_layers[export_layername] = [] export_layers[export_layername].append(export_layerdef) export_layerdef['__layername-src__'] = layername if args.destdir != os.curdir: try: os.mkdir(args.destdir, mode=0o0755) except FileExistsError: pass # open source DS ds = openSourceDS(common.config['dataset']) # get configured Spatial Reference System and extent; force traditional GIS order # on the target SRS since it's what MVT and JSON export are expecting srs = common.getSRS(common.config.get('SRS', None)) srs.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER) extent, extent_simple = common.getExtent(common.config.get('extent', None), srs=srs) if srs.IsProjected(): # will export with centimeter precision linearUnits = srs.GetLinearUnits() coordinate_precision = math.ceil(math.log10(linearUnits) + 2) if coordinate_precision < 0: coordinate_precision = 0 logging.debug('Coordinate precision: %d', coordinate_precision) if args.lockfile is not None: lock_fd = os.open(args.lockfile, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644) logging.debug('flock("%s", LOCK_EX)', args.lockfile) flock(lock_fd, LOCK_EX) # always lock destdir (until the program exits) to avoid concurrent # generations of the same tileset destdir_fd = os.open(args.destdir, O_RDONLY|O_CLOEXEC|O_DIRECTORY) logging.debug('flock("%s", LOCK_EX)', args.destdir) flock(destdir_fd, LOCK_EX) nFeatures = 0 tile_extension = '.pbf' basedir = Path(f'/proc/self/fd/{destdir_fd}') with tempfile.TemporaryDirectory() as tempdir: # use a temporary directory for TEMPORARY_DB logging.debug('Using "%s" as temporary directory', tempdir) ds_dst, dirname = createMVT(basedir, options={ 'FORMAT': 'DIRECTORY', 'COMPRESS': 'NO', 'TYPE': 'overlay', 'BUFFER': 32, 'TILE_EXTENSION': tile_extension.removeprefix('.'), 'TEMPORARY_DB': Path(tempdir).joinpath('tmp-mvt.db'), }) # use a temporary directory for the metadata to avoid messing # around with he MVT directory while building is in progress metadata = { 'dirfd': destdir_fd } metadata['tempdir'] = Path(tempfile.mkdtemp(prefix='.' + dirname + '-attr.', dir=basedir)).relative_to(basedir) try: logging.debug('Using "%s" as temporary metadata directory', metadata['tempdir']) os.chmod(metadata['tempdir'], 0o0755, dir_fd=metadata.get('dirfd', None), follow_symlinks=False) nextfid = 0 for layername, layerdefs in export_layers.items(): # create destination layer lyr_dst = ds_dst.CreateLayer(layername, srs=srs, geom_type=ogr.wkbUnknown) if lyr_dst is None: raise Exception(f'Could not create destination layer "{layername}"') for layerdef in layerdefs: lyr_src = getSourceLayer(ds, layerdef, extent=extent) try: # use the non-dense geometry (a rectangle) for clipping as it's faster # and there is no need to reproject (the SRSs match) nextfid, count0 = exportLayer(lyr_dst, lyr_src, layername=layerdef['__layername-src__'], srs=srs, clip=extent_simple, coordinate_precision=coordinate_precision, nextfid=nextfid, metadata=metadata) nFeatures += count0 finally: ds.ReleaseResultSet(lyr_src) lyr_dst = None # close datasets ds = None ds_dst = None # move metadata to the MVT structure (now that it has been closed) logging.debug('rename(%s/%s, %s/%s/attr)', args.destdir, metadata['tempdir'], args.destdir, dirname) os.rename(metadata['tempdir'], Path(dirname).joinpath('attr'), src_dir_fd=metadata.get('dirfd', None), dst_dir_fd=destdir_fd) metadata.pop('tempdir', None) except Exception as e: # cleanup for root, dirnames, filenames, rootfd in os.fwalk(top=metadata['tempdir'], dir_fd=metadata.get('dirfd', None), topdown=False, follow_symlinks=False): for filename in filenames: #logging.debug('unlink(%s/%s)', root, filename) os.unlink(filename, dir_fd=rootfd) for dirname in dirnames: #logging.debug('rmdir(%s/%s)', root, dirname) os.rmdir(dirname, dir_fd=rootfd) #logging.debug('rmdir(%s/%s)', args.destdir, metadata['tempdir']) os.rmdir(metadata['tempdir'], dir_fd=metadata.get('dirfd', None)) raise e finally: if 'fp' in metadata: closeMetadataFile(metadata) srs = None extent = None extent_simple = None try: # OpenLayers doesn't make use of that file so delete it os.unlink(Path(dirname).joinpath('metadata.json'), dir_fd=destdir_fd) except FileNotFoundError: pass logging.info('Done creating tiles and attribute files (total %d features)', nFeatures) if args.lockfile is not None: # release the shared lock, the remaining shouldn't be as demanding logging.debug('flock("%s", LOCK_UN)', args.lockfile) flock(lock_fd, LOCK_UN) stats_tiles = [] stats_metadata = [] if args.compress: compress_mode = brotli.MODE_GENERIC # collect stats and optionally compress # TODO: optimize traversal, cf. # https://r3n.hashnode.dev/tech-chronicles-conquer-memory-monsters-with-pythons-yield-in-large-directory-processing # TODO: benchmark brotli quality to trade-off compression ratio vs. execution time # (also, tile and attribute files can have different levels if needs be) for root, _, filenames, rootfd in os.fwalk(top=dirname, dir_fd=destdir_fd, follow_symlinks=False): for filename in filenames: lfilename = filename.lower() if lfilename.endswith(tile_extension): arr = stats_tiles elif lfilename.endswith('.json'): arr = stats_metadata else: continue if not args.compress: st = os.stat(filename, dir_fd=rootfd) arr.append(st.st_size) else: #logging.debug("Compressing %s/%s", root, filename) szi, szo = compress_brotli(filename, dir_fd=rootfd, oFlags=O_EXCL, mode=compress_mode, quality=5) arr.append((szi, szo)) # display stats if len(stats_tiles) > 0: stats_tiles = numpy.array(stats_tiles, dtype=numpy.int64, copy=False) arr = stats_tiles[:,0] if args.compress else stats_tiles tot = numpy.sum(arr) logging.info('Tile count: %d [min=%s, max=%s, sum=%s, avg=%s]', arr.size, format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)), format_bytes(tot), format_bytes(round(tot/arr.size))) if args.compress: arr = stats_tiles[:,1] ztot = numpy.sum(arr) logging.info('After compression: min=%s, max=%s, sum=%s, avg=%s (ratio %.1f%%)', format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)), format_bytes(ztot), format_bytes(round(ztot/arr.size)), 100. * (1. - ztot/tot)) if len(stats_metadata) > 0: stats_metadata = numpy.array(stats_metadata, dtype=numpy.int64, copy=False) arr = stats_metadata[:,0] if args.compress else stats_metadata tot = numpy.sum(arr) logging.info('Metadata file count: %d [min=%s, max=%s, sum=%s, avg=%s]', arr.size, format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)), format_bytes(tot), format_bytes(round(tot/arr.size))) if args.compress: arr = stats_metadata[:,1] ztot = numpy.sum(arr) logging.info('After compression: min=%s, max=%s, sum=%s, avg=%s (ratio %.1f%%)', format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)), format_bytes(ztot), format_bytes(round(ztot/arr.size)), 100. * (1. - ztot/tot)) # update metadata.json if args.metadata is None: mapfile = Path(args.destdir).parent.joinpath('metadata.json') else: mapfile = Path(args.metadata) # first, place an exclusive lock (it is important that all # invocations of the program with the same --metadata file, even on # different layer groups, share the same lock file to avoid # concurrent updates of the medata.json) if args.metadata_lockfile is None: mapfile_lock = mapfile.with_name('.' + mapfile.name + '.lck') else: mapfile_lock = Path(args.metadata_lockfile) mapfile_lock_fd = os.open(mapfile_lock, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644) logging.debug('flock("%s", LOCK_EX)', mapfile_lock) flock(mapfile_lock_fd, LOCK_EX) # load the file try: with open(mapfile, mode='rb') as fp: mapobj = json.load(fp) except FileNotFoundError: mapobj = {} # remove the compressed version as we are prepare to update the # uncompressed one mapfile_compressed = mapfile.with_name(mapfile.name + '.br') try: logging.debug('unlink(%s)', mapfile_compressed) os.unlink(mapfile_compressed) except FileNotFoundError: pass k = 'layermap' if k not in mapobj: layermap = mapobj[k] = {} else: layermap = mapobj[k] destpath = Path(args.destdir) if args.webroot is not None: destpath = destpath.relative_to(args.webroot) destpath = '/' + destpath.as_posix().removeprefix('/') + '/' + dirname layermap[destdirname] = destpath # dump the new object to the temporary file bstr = json.dumps(mapobj, ensure_ascii=False, separators=(',',':')).encode('utf-8') mapfile_tmp = mapfile.with_stem('.' + mapfile.stem + '.new') fd = os.open(mapfile_tmp, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644) with os.fdopen(fd, mode='wb', closefd=True) as fp: fp.write(bstr) # compress the temporary file if args.compress: mapfile_tmp_compressed = mapfile_tmp.with_name(mapfile_tmp.name + '.br') compressor = brotli.Compressor() fd = os.open(mapfile_tmp_compressed, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644) try: write_all(fd, compressor.process(bstr)) write_all(fd, compressor.finish()) finally: os.close(fd) # rename temporary files to production (starting with the uncompressed one) logging.debug('rename(%s, %s)', mapfile_tmp, mapfile) os.rename(mapfile_tmp, mapfile) if args.compress: logging.debug('rename(%s, %s)', mapfile_tmp_compressed, mapfile_compressed) os.rename(mapfile_tmp_compressed, mapfile_compressed)