From 7cda119879cf48ba72ba34522fa9cdf9ef6d9b49 Mon Sep 17 00:00:00 2001 From: Guilhem Moulin Date: Sat, 21 Sep 2024 04:00:05 +0200 Subject: Add `webmap-publish` script to export layers to Mapbox Vector Tiles. --- common.py | 113 ++++++++- config.yml | 78 ++++++ webmap-import | 114 +-------- webmap-publish | 760 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 960 insertions(+), 105 deletions(-) create mode 100755 webmap-publish diff --git a/common.py b/common.py index 79b5de8..1635195 100644 --- a/common.py +++ b/common.py @@ -65,7 +65,7 @@ def load_config(path=None, groupnames=None): for name, layerdefs in layers.items(): if isinstance(layerdefs, dict) and 'sources' not in layerdefs: layers[name] = { 'sources': [layerdefs] } - for k in ['description', 'create']: + for k in ['description', 'create', 'publish']: if k in layerdefs: layers[name][k] = layerdefs.pop(k) layerdefs = layers[name] @@ -190,7 +190,7 @@ def format_time(s): # Return a boolean indicating whether the installer GDAL version is # greater than or equal to the provider (maj, min, rev) triplet. -def gdal_version_min(maj=0, min=0, rev=0): +def gdalVersionMin(maj=0, min=0, rev=0): if maj < 1 or (maj == 1 and min < 10): # GDAL_VERSION_NUM() macro was changed in 1.10. That version # was released in 2013 so we blindly assume the installer @@ -198,11 +198,120 @@ def gdal_version_min(maj=0, min=0, rev=0): return True from osgeo import gdal + gdal.UseExceptions() + version_cur = int(gdal.VersionInfo()); # cf. GDAL_COMPUTE_VERSION(maj,min,rev) in gcore/gdal_version.h.in version_min = maj*1000000 + min*10000 + rev*100 return version_min <= version_cur +# Wrapper around gdal.MajorObject.GetMetadataItem(name) +def gdalGetMetadataItem(o, k): + v = o.GetMetadataItem(k) + if v is not None and isinstance(v, str): + return v.upper() == 'YES' + else: + return False + +# Escape the given identifier, cf. +# swig/python/gdal-utils/osgeo_utils/samples/validate_gpkg.py:_esc_id() +def escapeIdentifier(identifier): + if '\x00' in identifier: + raise Exception(f'Invalid identifier "{identifier}"') + # SQL:1999 delimited identifier + return '"' + identifier.replace('"', '""') + '"' + +# Return a pair kwargs and driver to use with gdal.OpenEx() +def gdalSetOpenExArgs(option_dict, flags=0): + from osgeo import gdal + gdal.UseExceptions() + + kwargs = { 'nOpenFlags': gdal.OF_VECTOR | flags } + + fmt = option_dict.get('format', None) + if fmt is None: + drv = None + else: + drv = gdal.GetDriverByName(fmt) + if drv is None: + raise Exception(f'Unknown driver name "{fmt}"') + elif not gdalGetMetadataItem(drv, gdal.DCAP_VECTOR): + raise Exception(f'Driver "{drv.ShortName}" has no vector capabilities') + kwargs['allowed_drivers'] = [ drv.ShortName ] + + oo = option_dict.get('open-options', None) + if oo is not None: + kwargs['open_options'] = [ k + '=' + str(v) for k, v in oo.items() ] + return kwargs, drv + +# Return the decoded Spatial Reference System +def getSRS(srs_str): + if srs_str is None: + return + + from osgeo import osr + osr.UseExceptions() + + srs = osr.SpatialReference() + if srs_str.startswith('EPSG:'): + code = int(srs_str.removeprefix('EPSG:')) + srs.ImportFromEPSG(code) + else: + raise Exception(f'Unknown SRS {srs_str}') + logging.debug('Default SRS: "%s" (%s)', srs.ExportToProj4(), srs.GetName()) + return srs + +# Convert extent [minX, minY, maxX, maxY] into a polygon and assign the +# given SRS. Return a pair with the densified and non-densified extent. +# Like apps/ogr2ogr_lib.cpp, the former is obtained by segmentizing the +# polygon to make sure it is sufficiently densified when transforming to +# source layer SRS for spatial filtering. +def getExtent(extent, srs=None): + if extent is None: + return None, None + + if not (isinstance(extent, list) or isinstance(extent, tuple)) or len(extent) != 4: + raise Exception(f'Invalid extent {extent}') + elif srs is None: + raise Exception('Configured extent but no SRS') + + logging.debug('Configured extent in %s: %s', + srs.GetName(), ', '.join(map(str, extent))) + + from osgeo import ogr, osr + ogr.UseExceptions() + + ring = ogr.Geometry(ogr.wkbLinearRing) + ring.AddPoint_2D(extent[0], extent[1]) + ring.AddPoint_2D(extent[2], extent[1]) + ring.AddPoint_2D(extent[2], extent[3]) + ring.AddPoint_2D(extent[0], extent[3]) + ring.AddPoint_2D(extent[0], extent[1]) + + polygon = ogr.Geometry(ogr.wkbPolygon) + polygon.AddGeometry(ring) + + # we expressed extent as minX, minY, maxX, maxY (easting/northing + # ordered, i.e., in traditional GIS order) + srs2 = srs.Clone() + srs2.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER) + polygon.AssignSpatialReference(srs2) + if not srs2.IsSame(srs): + polygon.TransformTo(srs) + + # densify the rectangle to avoid issues when reprojecting to the + # source layer SRS, cf. apps/ogr2ogr_lib.cpp:ApplySpatialFilter() + polygon_dense = polygon.Clone() + segment_distance_metre = 10 * 1000 + if srs.IsGeographic(): + dfMaxLength = segment_distance_metre / math.radians(srs.GetSemiMajor()) + polygon_dense.Segmentize(dfMaxLength) + elif srs.IsProjected(): + dfMaxLength = segment_distance_metre / srs.GetLinearUnits() + polygon_dense.Segmentize(dfMaxLength) + + return polygon_dense, polygon + ###### # The function definitions below are taken from cpython's source code diff --git a/config.yml b/config.yml index b23496b..ec5ec50 100644 --- a/config.yml +++ b/config.yml @@ -22,6 +22,29 @@ extent: - 1159296 - 7975712 +# Export options for MVT publishing, see +# https://gdal.org/en/latest/drivers/vector/mvt.html +vector-tiles: + min-zoom: 0 # res. 1024m/px + max-zoom: 9 # res. 2m/px + max-size: 4194304 + max-features: 262144 + tiling-scheme: + # Custom tiling scheme, cf. https://gdal.org/en/latest/drivers/vector/mvt.html#dataset-creation-options + # Compared to Lantmäteriet's scheme we start 2 levels deeper (resolution 1024m per pixel) + # and with 1024×1024 tiles + - 'EPSG:3006' + # upper-left corner of the upper-left tile (0,0) in the CRS + - 110720 + - 7975712 + # dimension of the tile at zoom level 0 (tile size * resolution) + - 1048576 + # simplification factor, cf. + # https://gis.stackexchange.com/questions/401867/gdal-mvt-writer-simplification-parameter + simplification: 4. + simplification-max-zoom: 2. + + # Take User-Agent value from Tor Browser 13.0.15 (based on Mozilla Firefox 115.11.0esr) User-Agent: 'Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/115.0' @@ -68,6 +91,11 @@ dataset: USER: webmap_import DBNAME: webmap + # Optional driver-specific dataset open options which overrides the + # above 'open-options' settings when publishing. + open-options-publish: + USER: webmap_guest + # Optional dictionary of default layer creation options, cf. # https://gdal.org/drivers/vector/pg.html#layer-creation-options or # https://gdal.org/drivers/vector/gpkg.html#layer-creation-options @@ -528,6 +556,7 @@ layers: - ArendeStatus - AvvHa - AvverkningsanmalanKlass + publish: clearcut_appl 'sks:UtfordAvverk': # https://geodpags.skogsstyrelsen.se/geodataport/feeds/UtfordAvverk.xml @@ -634,6 +663,7 @@ layers: Beteckn: - replace: 'Visas ej' with: null + publish: clearcut_comp 'st:betesomraden': source: @@ -826,6 +856,35 @@ layers: - replace: '([0-9]{4})([0-9]{2})([0-9]{2})' with: '{0}-{1}-{2}' type: regex + publish: + station_completed: + minzoom: 4 + where: | + "Raderat" IS FALSE AND "Statuskod" = 4 + station_processed: + minzoom: 4 + where: | + "Raderat" IS FALSE AND "Statuskod" = 1 + station_approved: + minzoom: 4 + where: | + "Raderat" IS FALSE AND "Statuskod" = 3 + station_revoked: + minzoom: 4 + where: | + "Raderat" IS FALSE AND ("Statuskod" = 6 OR "Statuskod" = 9) + station_rejected: + minzoom: 4 + where: | + "Raderat" IS FALSE AND "Statuskod" = 7 + station_dismounted: + minzoom: 4 + where: | + "Raderat" IS FALSE AND "Statuskod" = 5 + station_appealed: + minzoom: 4 + where: | + "Raderat" IS FALSE AND "Statuskod" = 8 'vbk:projekteringsomraden': description: Vindbrukskollen landbaserade projekteringsområden (Länsstyrelsen) @@ -977,6 +1036,13 @@ layers: with: '1' - replace: 'No' with: '0' + publish: + area_current: + where: | + "Raderat" IS FALSE AND "EjAktuell" IS FALSE + area_notcurrent: + where: | + "Raderat" IS FALSE AND "EjAktuell" IS NOT FALSE 'vbk:havsbaserad_vindkraft': description: Vindbrukskollen havsbaserad vindkraft (Länsstyrelsen) @@ -1193,6 +1259,10 @@ layers: with: '1' - replace: 'No' with: '0' + publish: + area_offshore: + where: | + "Raderat" IS FALSE # The list of layers available on the WMS server can be found at # https://maps3.sgu.se/geoserver/wms?SERVICE=WMS&VERSION=1.11&REQUEST=GetCapabilities @@ -1249,6 +1319,7 @@ layers: - replace: '([0-9]{4})([0-9]{2})([0-9]{2})' with: '{0}-{1}-{2}' type: regex + publish: mrr 'mrr:bearbetningskoncessioner_approved': description: Bearbetningskoncessioner, beviljade (SGU) @@ -1311,6 +1382,7 @@ layers: - replace: '([0-9]{4})([0-9]{2})([0-9]{2})' with: '{0}-{1}-{2}' type: regex + publish: mrr 'mrr:markanvisningar': description: Markanvisning till koncession (SGU) @@ -1357,6 +1429,7 @@ layers: - replace: '([0-9]{4})([0-9]{2})([0-9]{2})' with: '{0}-{1}-{2}' type: regex + publish: mrr 'mrr:mineral_applied': description: Undersökningstillstånd, metallar och mineral, ansökta (SGU) @@ -1411,6 +1484,7 @@ layers: - replace: '([0-9]{4})([0-9]{2})([0-9]{2})' with: '{0}-{1}-{2}' type: regex + publish: mrr 'mrr:mineral_approved': description: Undersökningstillstånd, metallar och mineral, beviljade (SGU) @@ -1479,6 +1553,7 @@ layers: - replace: '([0-9]{4})([0-9]{2})([0-9]{2})' with: '{0}-{1}-{2}' type: regex + publish: mrr # 'mrr:mineral_expired': # source: @@ -1560,6 +1635,7 @@ layers: - replace: '([0-9]{4})([0-9]{2})([0-9]{2})' with: '{0}-{1}-{2}' type: regex + publish: mrr 'mrr:olja_gas_diamant_approved': description: Undersökningstillstånd, olja, gas och diamant, beviljade (SGU) @@ -1628,6 +1704,7 @@ layers: - replace: '([0-9]{4})([0-9]{2})([0-9]{2})' with: '{0}-{1}-{2}' type: regex + publish: mrr 'mrr:torvkoncessioner': description: Torvkoncessioner (SGU) @@ -1692,3 +1769,4 @@ layers: - replace: '([0-9]{4})([0-9]{2})([0-9]{2})' with: '{0}-{1}-{2}' type: regex + publish: mrr diff --git a/webmap-import b/webmap-import index bfee9dc..74b6ed5 100755 --- a/webmap-import +++ b/webmap-import @@ -38,7 +38,6 @@ from osgeo.gdalconst import ( OF_VERBOSE_ERROR as GDAL_OF_VERBOSE_ERROR, CE_None as GDAL_CE_None, DCAP_CREATE as GDAL_DCAP_CREATE, - DCAP_VECTOR as GDAL_DCAP_VECTOR, DCAP_DEFAULT_FIELDS as GDAL_DCAP_DEFAULT_FIELDS, DCAP_NOTNULL_FIELDS as GDAL_DCAP_NOTNULL_FIELDS, DCAP_UNIQUE_FIELDS as GDAL_DCAP_UNIQUE_FIELDS, @@ -47,40 +46,13 @@ import osgeo.gdalconst as gdalconst gdal.UseExceptions() import common - -# Wrapper around gdal.MajorObject.GetMetadataItem(name) -def getMetadataItem(o, k): - v = o.GetMetadataItem(k) - if v is not None and isinstance(v, str): - return v.upper() == 'YES' - else: - return False - -# Return kwargs and driver for OpenEx() -def setOpenExArgs(option_dict, flags=0): - kwargs = { 'nOpenFlags': GDAL_OF_VECTOR | flags } - - fmt = option_dict.get('format', None) - if fmt is None: - drv = None - else: - drv = gdal.GetDriverByName(fmt) - if drv is None: - raise Exception(f'Unknown driver name "{fmt}"') - elif not getMetadataItem(drv, GDAL_DCAP_VECTOR): - raise Exception(f'Driver "{drv.ShortName}" has no vector capabilities') - kwargs['allowed_drivers'] = [ drv.ShortName ] - - oo = option_dict.get('open-options', None) - if oo is not None: - kwargs['open_options'] = [ k + '=' + str(v) for k, v in oo.items() ] - return kwargs, drv +from common import gdalSetOpenExArgs, gdalGetMetadataItem, gdalVersionMin, escapeIdentifier # Open and return the output DS. It is created if create=False or # create-options is a non-empty dictionary. def openOutputDS(def_dict): path = def_dict['path'] - kwargs, drv = setOpenExArgs(def_dict, flags=GDAL_OF_UPDATE|GDAL_OF_VERBOSE_ERROR) + kwargs, drv = gdalSetOpenExArgs(def_dict, flags=GDAL_OF_UPDATE|GDAL_OF_VERBOSE_ERROR) try: logging.debug('OpenEx(%s, %s)', path, str(kwargs)) return gdal.OpenEx(path, **kwargs) @@ -111,7 +83,7 @@ def openOutputDS(def_dict): if not def_dict.get('create', False) and dsco is None: # not configured for creation raise e - if drv is None or not getMetadataItem(drv, GDAL_DCAP_CREATE): + if drv is None or not gdalGetMetadataItem(drv, GDAL_DCAP_CREATE): # not capable of creation raise e @@ -349,13 +321,13 @@ def setFieldIf(cond, attrName, val, data, fldName, drvName, log=logging.warning) # constraints.) def validateSchema(layers, drvo=None, lco_defaults=None): # Cf. https://github.com/OSGeo/gdal/blob/master/NEWS.md - if common.gdal_version_min(maj=3, min=7): + if gdalVersionMin(maj=3, min=7): # list of capability flags supported by the CreateField() API drvoFieldDefnFlags = drvo.GetMetadataItem(gdalconst.DMD_CREATION_FIELD_DEFN_FLAGS) drvoFieldDefnFlags = drvoFieldDefnFlags.split(' ') if drvoFieldDefnFlags is not None else [] drvoSupportsFieldComment = 'Comment' in drvoFieldDefnFlags # GetTZFlag()/SetTZFlag() and OGR_TZFLAG_* constants added in 3.8.0 - hasTZFlagSupport = common.gdal_version_min(maj=3, min=8) + hasTZFlagSupport = gdalVersionMin(maj=3, min=8) else: # list of flags supported by the OGRLayer::AlterFieldDefn() API drvoFieldDefnFlags = drvo.GetMetadataItem(gdalconst.DMD_ALTER_FIELD_DEFN_FLAGS) @@ -366,9 +338,9 @@ def validateSchema(layers, drvo=None, lco_defaults=None): # cache driver capabilities drvoSupportsFieldWidthPrecision = 'WidthPrecision' in drvoFieldDefnFlags - drvoSupportsFieldNullable = 'Nullable' in drvoFieldDefnFlags and getMetadataItem(drvo, GDAL_DCAP_NOTNULL_FIELDS) - drvoSupportsFieldUnique = 'Unique' in drvoFieldDefnFlags and getMetadataItem(drvo, GDAL_DCAP_UNIQUE_FIELDS) - drvoSupportsFieldDefault = 'Default' in drvoFieldDefnFlags and getMetadataItem(drvo, GDAL_DCAP_DEFAULT_FIELDS) + drvoSupportsFieldNullable = 'Nullable' in drvoFieldDefnFlags and gdalGetMetadataItem(drvo, GDAL_DCAP_NOTNULL_FIELDS) + drvoSupportsFieldUnique = 'Unique' in drvoFieldDefnFlags and gdalGetMetadataItem(drvo, GDAL_DCAP_UNIQUE_FIELDS) + drvoSupportsFieldDefault = 'Default' in drvoFieldDefnFlags and gdalGetMetadataItem(drvo, GDAL_DCAP_DEFAULT_FIELDS) drvoSupportsFieldAlternativeName = 'AlternativeName' in drvoFieldDefnFlags for layername, layerdef in layers.items(): @@ -450,62 +422,6 @@ def validateSchema(layers, drvo=None, lco_defaults=None): fields[idx] = fld_def2 -# Return the decoded Spatial Reference System -def getSRS(srs_str): - if srs_str is None: - return - srs = osr.SpatialReference() - if srs_str.startswith('EPSG:'): - code = int(srs_str.removeprefix('EPSG:')) - srs.ImportFromEPSG(code) - else: - raise Exception(f'Unknown SRS {srs_str}') - logging.debug('Default SRS: "%s" (%s)', srs.ExportToProj4(), srs.GetName()) - return srs - -# Convert extent [minX, minY, maxX, maxY] into a polygon and assign the -# given SRS. Like apps/ogr2ogr_lib.cpp, we segmentize the polygon to -# make sure it is sufficiently densified when transforming to source -# layer SRS for spatial filtering. -def getExtent(extent, srs=None): - if extent is None: - return - - if not (isinstance(extent, list) or isinstance(extent, tuple)) or len(extent) != 4: - raise Exception(f'Invalid extent {extent}') - elif srs is None: - raise Exception('Configured extent but no SRS') - - logging.debug('Configured extent in %s: %s', - srs.GetName(), ', '.join(map(str, extent))) - - ring = ogr.Geometry(ogr.wkbLinearRing) - ring.AddPoint_2D(extent[0], extent[1]) - ring.AddPoint_2D(extent[2], extent[1]) - ring.AddPoint_2D(extent[2], extent[3]) - ring.AddPoint_2D(extent[0], extent[3]) - ring.AddPoint_2D(extent[0], extent[1]) - - polygon = ogr.Geometry(ogr.wkbPolygon) - polygon.AddGeometry(ring) - - # we expressed extent as minX, minY, maxX, maxY (easting/northing - # ordered, i.e., in traditional GIS order) - srs2 = srs.Clone() - srs2.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER) - polygon.AssignSpatialReference(srs2) - polygon.TransformTo(srs) - - segment_distance_metre = 10 * 1000 - if srs.IsGeographic(): - dfMaxLength = segment_distance_metre / math.radians(srs.GetSemiMajor()) - polygon.Segmentize(dfMaxLength) - elif srs.IsProjected(): - dfMaxLength = segment_distance_metre / srs.GetLinearUnits() - polygon.Segmentize(dfMaxLength) - - return polygon - # Validate the output layer against the provided SRS and creation options def validateOutputLayer(lyr, srs=None, options=None): ok = True @@ -799,14 +715,6 @@ def setOutputFieldMap(defn, sources): rule['replace'] = re.compile(rule['replace']) rules[idx] = ( rule['replace'], rule['with'] ) -# Escape the given identifier, cf. -# swig/python/gdal-utils/osgeo_utils/samples/validate_gpkg.py:_esc_id() -def escapeIdentifier(identifier): - if '\x00' in identifier: - raise Exception(f'Invalid identifier "{identifier}"') - # SQL:1999 delimited identifier - return '"' + identifier.replace('"', '""') + '"' - # Clear the given layer (wipe all its features) def clearLayer(ds, lyr): n = -1 @@ -938,7 +846,7 @@ def setFieldMapValue(fld, idx, val): # while we want a single transaction for the entire desination layer, # including truncation, source imports, and metadata changes. def importSource2(lyr_dst, path, args={}, basedir=None, extent=None): - kwargs, _ = setOpenExArgs(args, flags=GDAL_OF_READONLY|GDAL_OF_VERBOSE_ERROR) + kwargs, _ = gdalSetOpenExArgs(args, flags=GDAL_OF_READONLY|GDAL_OF_VERBOSE_ERROR) path2 = path if basedir is None else str(basedir.joinpath(path)) logging.debug('OpenEx(%s, %s)', path2, str(kwargs)) @@ -1245,8 +1153,8 @@ if __name__ == '__main__': lco_defaults=common.config['dataset'].get('create-layer-options', None)) # get configured Spatial Reference System and extent - srs = getSRS(common.config.get('SRS', None)) - extent = getExtent(common.config.get('extent', None), srs=srs) + srs = common.getSRS(common.config.get('SRS', None)) + extent = common.getExtent(common.config.get('extent', None), srs=srs)[0] if args.lockfile is not None: # obtain an exclusive lock and don't release it until exiting the program diff --git a/webmap-publish b/webmap-publish new file mode 100755 index 0000000..36bed88 --- /dev/null +++ b/webmap-publish @@ -0,0 +1,760 @@ +#!/usr/bin/python3 + +#---------------------------------------------------------------------- +# Backend utilities for the Klimatanalys Norr project (create MVT tiles) +# Copyright © 2024 Guilhem Moulin +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#---------------------------------------------------------------------- + +from os import O_RDONLY, O_WRONLY, O_CREAT, O_EXCL, O_TRUNC, O_CLOEXEC, O_DIRECTORY +import os +import errno +from fcntl import flock, LOCK_EX, LOCK_UN +from string import hexdigits +import json +import logging +import math +import random +import argparse +import tempfile +from pathlib import Path +import numpy + +from osgeo import gdal, ogr, osr +from osgeo.gdalconst import ( + OF_VECTOR as GDAL_OF_VECTOR, + OF_READONLY as GDAL_OF_READONLY, + OF_UPDATE as GDAL_OF_UPDATE, + OF_VERBOSE_ERROR as GDAL_OF_VERBOSE_ERROR, +) +gdal.UseExceptions() + +import common +from common import ( + format_bytes, + gdalSetOpenExArgs, + gdalVersionMin, + gdalGetMetadataItem, + escapeIdentifier +) + +# Open and return source dataset. +def openSourceDS(def_dict): + # merge 'open-options-publish' into 'open-options' + opts2 = def_dict.get('open-options-publish', None) + if opts2 is not None: + if 'open-options' not in def_dict: + def_dict['open-options'] = {} + def_dict['open-options'] |= opts2 + kwargs, _ = gdalSetOpenExArgs(def_dict, flags=GDAL_OF_READONLY|GDAL_OF_VERBOSE_ERROR) + + path = def_dict['path'] + logging.debug('OpenEx(%s, %s)', path, str(kwargs)) + return gdal.OpenEx(path, **kwargs) + +# Choose a random subdirectory under the given base directory and create +# a new MVT dataset there. +# The old MVT dataset (if any) remains, which is intentional we don't +# want to disrupt browser caches and tile → attribute mapping. +def createMVT(basedir, options=None): + drv = gdal.GetDriverByName('MVT') + if drv is None: + raise Exception('Unknown GDAL driver for format "MVT"') + + # choose an available name at random from 00 to FF + weights = [1] * 256 + for path in basedir.iterdir(): + filename = path.name + if len(filename) == 2 and filename[0] in hexdigits and filename[1] in hexdigits: + n = int(filename, 16) + weights[n] = 0 + if all([w == 0 for w in weights]): + raise Exception(f'No name available in {args.destdir} (forgot to cleanup?)') + + name = random.choices(range(256), weights=weights, k=1)[0] + name = f'{name:0>2x}' + path = str(basedir.joinpath(name)) + + # merge options from the config file + defaults_map = { + 'min-zoom': 'MINZOOM', + 'max-zoom': 'MAXZOOM', + 'max-size': 'MAX_SIZE', + 'max-features': 'MAX_FEATURES', + 'simplification': 'SIMPLIFICATION', + 'simplification-max-zoom': 'SIMPLIFICATION_MAX_ZOOM', + 'tiling-scheme': 'TILING_SCHEME', + 'extent': 'EXTENT', + } + defaults_options = common.config.get('vector-tiles', None) + if defaults_options is not None: + for k, v in defaults_options.items(): + if k not in defaults_map: + logging.warning('No default map for key "%s", ignoring', k) + continue + if options is None: + options = {} + k = defaults_map[k] + if k not in options: + if isinstance(v, list): + v = ','.join(map(str, v)) + options[k] = v + + kwargs = { 'eType': gdal.GDT_Unknown } + if options is not None: + kwargs['options'] = opts = [] + for k, v in options.items(): + opts.append(k + '=' + str(v)) + + logging.debug('Create(%s, %s, eType=%s%s)', drv.ShortName, path, kwargs['eType'], + ', options=' + str(kwargs['options']) if 'options' in kwargs else '') + ds = drv.Create(path, 0, 0, 0, **kwargs) + return ds, name + +# Filter a source layer. +def getSourceLayer(ds_src, layerdef, extent=None): + layername = layerdef['__layername-src__'] + lyr_src = ds_src.GetLayerByName(layername) + if lyr_src is None: + raise Exception(f'Source dataset has no layer named "{layername}"') + + count0 = -1 + if lyr_src.TestCapability(ogr.OLCFastFeatureCount): + count0 = lyr_src.GetFeatureCount(force=0) + + srs_src = lyr_src.GetSpatialRef() + if srs_src is None: + raise Exception('Source layer "{}" has no SRS'.format(layername)) + elif not srs_src.IsProjected(): + raise Exception('Source layer "{}" SRS is not projected: {}'.format( + layername, srs_src.ExportToPrettyWkt())) + # "the value to multiply by linear distances to transform them to meters" + linearUnits = srs_src.GetLinearUnits() + logging.debug('Source layer "%s" SRS ("%s") linear units: %f', layername, + srs_src.GetName(), linearUnits) + + # get geometry field of the source layer + defn = lyr_src.GetLayerDefn() + geomFieldCount = defn.GetGeomFieldCount() + if geomFieldCount != 1: + logging.warning('Source layer "%s" has %d != 1 geometry fields', lyr_src.GetName(), geomFieldCount) + geomField = defn.GetGeomFieldDefn(0) + geomType = geomField.GetType() + geomFieldName_esc = escapeIdentifier(geomField.GetName()) + + # transform extent to source SRS + if extent is None or extent.GetSpatialReference().IsSame(srs_src): + extent_src = extent + else: + extent_src = extent.Clone() + if extent_src.TransformTo(srs_src) != ogr.OGRERR_NONE: + raise Exception('Could not transform extent {} to {}'.format( + extent.ExportToWkt(), srs_src.GetName())) + + + reserved_fields = set(map(lambda x: x.lower(), [ + 'geom', + 'GeomArea', + 'GeomPerimeter', + 'GeomLength', + ])) + + # build SQL query + drv = ds_src.GetDriver() + columns = [] + for i in range(defn.GetFieldCount()): + fld = defn.GetFieldDefn(i) + fldType = fld.GetType() + if fld.GetName().lower() in reserved_fields: + raise Exception(f'Invalid column name "{fld.GetName()}"') + fldName_esc = escapeIdentifier(fld.GetName()) + column = 'm.' + fldName_esc + # for date/time/datetime fields, we let the RDBMS engine do the formatting if possible + if fldType == ogr.OFTDate: + if drv.ShortName == 'PostgreSQL': + column = 'to_char(' + column + ', \'YYYY-MM-DD\') AS ' + elif drv.ShortName == 'GPKG': + column = 'strftime(\'%F\', ' + column + ') AS ' + elif fldType == ogr.OFTTime: + if drv.ShortName == 'PostgreSQL': + column = 'to_char(' + column + ', \'HH24:MI:SSOF\') AS ' + elif drv.ShortName == 'GPKG': + # XXX SQLite's strftime() lacks support for time zones + column = 'strftime(\'%T\', ' + column + ') AS ' + elif fldType == ogr.OFTDateTime: + if drv.ShortName == 'PostgreSQL': + column = 'to_char(' + column + ', \'YYYY-MM-DD"T"HH24:MI:SSOF\') AS ' + elif drv.ShortName == 'GPKG': + # XXX SQLite's strftime() lacks support for time zones + column = 'strftime(\'%FT%T\', ' + column + ') AS ' + if column.endswith(' AS '): + column += fldName_esc + columns.append(column) + + # add columns with computed info about the geometry (before clipping and rounded to 0.01m²/0.01m) + if ogr.GT_IsSubClassOf(geomType, ogr.wkbSurface) or ogr.GT_IsSubClassOf(geomType, ogr.wkbMultiSurface): + columns.append('ROUND(CAST(ST_Area(m.' + geomFieldName_esc + ') * ' + + str(linearUnits) + '*' + str(linearUnits) + ' AS numeric), 2) AS "GeomArea"') + columns.append('ROUND(CAST(ST_Perimeter(m.' + geomFieldName_esc + ') * ' + + str(linearUnits) + ' AS numeric), 2) AS "GeomPerimeter"') + elif ogr.GT_IsSubClassOf(geomType, ogr.wkbCurve) or ogr.GT_IsSubClassOf(geomType, ogr.wkbMultiCurve): + columns.append('ROUND(CAST(ST_Length(m.' + geomFieldName_esc + ') * ' + + str(linearUnits) + ' AS numeric), 2) AS "GeomLength"') + columns.append('m.' + geomFieldName_esc) + + query = 'SELECT ' + ', '.join(columns) + ' FROM ' + escapeIdentifier(lyr_src.GetName()) + ' m' + + # add WHERE clauses and spatial filter; for GPKG/SQlite3, the query + # is too complex and the driver can't use ExecuteSQL()'s spatialFilter + # option so we add the ST_Intersects() condition + if drv.ShortName == 'PostgreSQL': + spatialFilter = extent_src + else: + spatialFilter = None + + cond = layerdef.get('where', None) + if (extent_src is not None and spatialFilter is None) or cond is not None: + query += ' WHERE ' + if extent_src is not None and spatialFilter is None: + query += 'ST_Intersects(m.' + geomFieldName_esc + ', \'' + extent_src.ExportToWkt() + '\')' + if cond is not None: + query += ' AND ' + if cond is not None: + query += '(' + cond.strip() + ')' + + logging.debug('SQL query: %s', query) + lyr_src = ds_src.ExecuteSQL(query, spatialFilter=spatialFilter) + + count1 = -1 + if lyr_src.TestCapability(ogr.OLCFastFeatureCount): + count1 = lyr_src.GetFeatureCount(force=0) + if count0 >= 0 and count1 >= 0: + logging.debug('Source layer "%s" has %d features, of which %d are to be exported', + layername, count0, count1) + return lyr_src + +# Export a layer from the source dataset, returning the next FID availble. +# TODO bump metadata_maxsize util the compressed version doesn't exceed 64k +def exportLayer(lyr_dst, lyr_src, srs=None, clip=None, layername=None, + coordinate_precision=None, nextfid=0, + # rotate after the feature which bring the file to the 256kiB mark + metadata_maxsize=256*1024, metadata=None): + if srs is None: + raise Exception('Missing target SRS') + if clip is not None: + clip_srs = clip.GetSpatialReference() + if not clip_srs.IsSame(srs): + raise Exception('Clipping geometry {} has SRS {},\nexpected {}'.format( + clip.ExportToWkt(), clip_srs.ExportToPrettyWkt(), srs.ExportToPrettyWkt())) + if metadata is None: + metadata_fp = None + else: + metadata_fp = metadata.get('fp', None) + exportToJson_options = [] + if coordinate_precision is not None: + exportToJson_options = ['COORDINATE_PRECISION=' + str(coordinate_precision)] + metadata_size = metadata.get('cursize', 0) + + srs_src = lyr_src.GetLayerDefn().GetGeomFieldDefn(0).GetSpatialRef() + if srs.IsSame(srs_src): + logging.debug('Both source and destination have the same SRS (%s), skipping coordinate transformation', + srs.GetName()) + ct = None + else: + # TODO Use GetSupportedSRSList() and SetActiveSRS() with GDAL ≥3.7.0 + # when possible, see apps/ogr2ogr_lib.cpp + logging.debug('Creating transforming from source SRS (%s) to destination SRS (%s)', + srs_src.GetName(), srs.GetName()) + ct = osr.CoordinateTransformation(srs_src, srs) + if ct is None: + raise Exception(f'Could not create transformation from source SRS ({srs_src.GetName()}) ' + + f'to destination SRS ({srs.GetName()})') + + # extract metadata index and filename from nextfid (an exception is + # raised if nextfid doesn't fit in 4 bytes) + bstr = nextfid.to_bytes(4, byteorder='big') + nextfid_idx = int.from_bytes(bstr[-2:], byteorder='big') + nextfid_p = int.from_bytes(bstr[:2], byteorder='big') + metadata_maxlen = 1 << (8*2) + + defn_dst = lyr_dst.GetLayerDefn() + + # we roll our own loop rather than using ogr2ogr/GDALVectorTranslate() + # since we want to do a custom JSON serialization at the same time, + # in order to avoid duplicating properties across tiles or clipping + # overlays at tile boundaries + + count = 0 + feature = lyr_src.GetNextFeature() + while feature is not None: + geom = feature.GetGeometryRef().Clone() + if ct is not None and geom.Transform(ct) != ogr.OGRERR_NONE: + raise Exception('Could not apply coordinate transformation') + + if metadata is not None: + # serialize the feature to JSON and save it + # TODO there is a lot of room for improvement here + # - geometries could be exported to WKB or FlatGeoBuf rather than GeoJSON + # - columns (schema) could be saved in layers.json only, so + # the object would be an array of length n+1 where the + # geometry would be at index 0 or n + # - data could be serialized to binary, cf. BJSON or GeoBuf + feature.SetGeometry(geom) + obj = feature.ExportToJson(as_object=True, options=exportToJson_options) + #props = { key:value for key, value in obj['properties'].items() if value is not None } + props = obj['properties'] + props['geom'] = obj['geometry'] + + bstr = json.dumps(props, ensure_ascii=False, separators=(',',':')).encode('utf-8') + + #if nextfid_idx >= metadata_maxlen or nextfid_p * metadata_maxlen + nextfid_idx != nextfid: + # raise Exception(f'impossible: {nextfid_p}*{metadata_maxlen} + {nextfid_idx} != {nextfid}') + + if metadata_fp is None: + metadata_fp = openNewMetadaFile(metadata, nextfid_p) + metadata_size = 0 + else: + metadata_fp.write(b',') + + metadata_size += 1 + metadata_fp.write(bstr) + + # clip the feature and export it (without fields to MVT) + geom2 = geom if clip is None else geom.Intersection(clip) + feature2 = ogr.Feature(defn_dst) + feature2.SetGeometryDirectly(geom2) + feature2.SetFID(nextfid) + + if lyr_dst.CreateFeature(feature2) != ogr.OGRERR_NONE: + raise Exception(f'Could not transfer source feature #{feature.GetFID()}') + + nextfid += 1 + if metadata is not None: + nextfid_idx += 1 + if metadata_size > metadata_maxsize or nextfid_idx >= metadata_maxlen: + closeMetadataFile(metadata) + metadata_fp = metadata_size = None + nextfid_p += 1 + nextfid_idx = 0 + nextfid = nextfid_p * metadata_maxlen + # will open a new while when processiong the next feature + + count += 1 + feature = lyr_src.GetNextFeature() + + logging.info('Exported %d features to layer "%s" from "%s"', count, + lyr_dst.GetName(), layername) + + if metadata is not None: + # save size of the metadata file currently open for proper accounting + metadata['cursize'] = 0 if metadata_fp is None else metadata_size + return nextfid, count + +# Open a new metadata file. +def openNewMetadaFile(metadata, nextfid_p): + name = nextfid_p.to_bytes(length=2, byteorder='big').hex() + name = Path(metadata['tempdir']).joinpath(name).with_suffix('.json') + logging.debug('open(%s)', str(name)) + + fd = os.open(name, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC, mode=0o0644, + dir_fd=metadata.get('dirfd', None)) + metadata['fp'] = fp = os.fdopen(fd, mode='wb', closefd=True) + fp.write(b'[') + return fp + +# Close a metadata file. +def closeMetadataFile(metadata): + fp = metadata.pop('fp') + fp.write(b']') + logging.debug('close(%d)', fp.fileno()) + fp.close() + +# Wrapper around os.write() that loops until the entire data is written. +def write_all(fd, buf): + offset = 0 + while offset < len(buf): + try: + n = os.write(fd, buf[offset:]) + except IOError as e: + if e.errno != errno.EAGAIN: + raise e + #logging.debug('Got EAGAIN while trying to write') + else: + #logging.debug('Wrote %d bytes of %d', n, len(buf) - offset) + offset += n + return offset + +# Compress a file to Brotli. +# TODO increase chunksize to see if helps with performances +def compress_brotli(path, suffix='.br', dir_fd=None, chunksize=65536, + oFlags=0, **kwargs): + compressor = brotli.Compressor(**kwargs) + fd_in = os.open(path, O_RDONLY|O_CLOEXEC, dir_fd=dir_fd) + size_in = size_out = 0 + try: + # hard-code the mode to save an fstat(2) call + fd_out = os.open(path + suffix, O_WRONLY|O_CREAT|O_CLOEXEC|oFlags, + mode=0o0644, dir_fd=dir_fd) + try: + buf_in = os.read(fd_in, chunksize) + while len(buf_in) > 0: + #logging.debug('Read %d bytes of %d', len(buf_in), chunksize) + size_in += len(buf_in) + size_out += write_all(fd_out, compressor.process(buf_in)) + buf_in = os.read(fd_in, chunksize) + size_out += write_all(fd_out, compressor.finish()) + finally: + os.close(fd_out) + finally: + os.close(fd_in) + return size_in, size_out + + +if __name__ == '__main__': + common.init_logger(app=os.path.basename(__file__), level=logging.INFO) + + parser = argparse.ArgumentParser(description='Export GIS layers to Mapbox Vector Tiles (MVT).') + parser.add_argument('--debug', action='count', default=0, + help=argparse.SUPPRESS) + parser.add_argument('--destdir', default=os.curdir, + help=f'destination directory for generated files (default: {os.curdir})') + parser.add_argument('--lockfile', default=None, + help=f'obtain an exclusive lock while exporting features to MVT') + parser.add_argument('--name', default=None, + help=f'name to use in the layer map (default: last component of DESTDIR)') + parser.add_argument('--webroot', default=None, + help=f'path to the web root') + parser.add_argument('--metadata', default=None, + help=f'path to the JSON metadata file (default: DESTDIR/../metadata.json)') + parser.add_argument('--metadata-lockfile', default=None, + help=f'path to the JSON metadata lock file (default: DESTDIR/../.metadata.json.lck)') + parser.add_argument('--compress', action=argparse.BooleanOptionalAction, default=False, + help='whether to compress tiles (default: False)') + parser.add_argument('groupname', nargs='*', help='group layer name(s) to process') + args = parser.parse_args() + + if args.debug > 0: + logging.getLogger().setLevel(logging.DEBUG) + if args.debug > 1: + from http.client import HTTPConnection + HTTPConnection.debuglevel = 1 + requests_log = logging.getLogger('urllib3') + requests_log.setLevel(logging.DEBUG) + requests_log.propagate = True + + if args.debug > 0: + logging.getLogger().setLevel(logging.DEBUG) + if args.debug > 1: + gdal.ConfigurePythonLogging(enable_debug=True) + + if args.compress: + # fail early if the module is missing + import brotli + + destdirname = args.name if args.name is not None else Path(args.destdir).name + if not destdirname: + logging.error('Could not infer --name value') + exit(1) + + common.load_config(groupnames=None if args.groupname == [] else args.groupname) + + # validate configuration + if 'dataset' not in common.config: + raise Exception('Configuration does not specify source dataset') + + export_layers = {} + for layername, layerdef in common.config.get('layers', {}).items(): + exportdef = layerdef.get('publish', None) + if exportdef is None: + raise Exception(f'Layer "{layername}" has no publication definition') + if isinstance(exportdef, str): + exportdef = { exportdef:{} } + elif isinstance(exportdef, list): + exportdef = { l:{} for l in exportdef } + for export_layername, export_layerdef in exportdef.items(): + if export_layername not in export_layers: + export_layers[export_layername] = [] + export_layers[export_layername].append(export_layerdef) + export_layerdef['__layername-src__'] = layername + + if args.destdir != os.curdir: + try: + os.mkdir(args.destdir, mode=0o0755) + except FileExistsError: + pass + + # open source DS + ds = openSourceDS(common.config['dataset']) + + # get configured Spatial Reference System and extent; force traditional GIS order + # on the target SRS since it's what MVT and JSON export are expecting + srs = common.getSRS(common.config.get('SRS', None)) + srs.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER) + extent, extent_simple = common.getExtent(common.config.get('extent', None), srs=srs) + + if srs.IsProjected(): + # will export with centimeter precision + linearUnits = srs.GetLinearUnits() + coordinate_precision = math.ceil(math.log10(linearUnits) + 2) + if coordinate_precision < 0: + coordinate_precision = 0 + logging.debug('Coordinate precision: %d', coordinate_precision) + + if args.lockfile is not None: + lock_fd = os.open(args.lockfile, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644) + logging.debug('flock("%s", LOCK_EX)', args.lockfile) + flock(lock_fd, LOCK_EX) + + # always lock destdir (until the program exits) to avoid concurrent + # generations of the same tileset + destdir_fd = os.open(args.destdir, O_RDONLY|O_CLOEXEC|O_DIRECTORY) + logging.debug('flock("%s", LOCK_EX)', args.destdir) + flock(destdir_fd, LOCK_EX) + + nFeatures = 0 + tile_extension = '.pbf' + basedir = Path(f'/proc/self/fd/{destdir_fd}') + with tempfile.TemporaryDirectory() as tempdir: + # use a temporary directory for TEMPORARY_DB + logging.debug('Using "%s" as temporary directory', tempdir) + + ds_dst, dirname = createMVT(basedir, options={ + 'FORMAT': 'DIRECTORY', + 'COMPRESS': 'NO', + 'TYPE': 'overlay', + 'BUFFER': 32, + 'TILE_EXTENSION': tile_extension.removeprefix('.'), + 'TEMPORARY_DB': Path(tempdir).joinpath('tmp-mvt.db'), + }) + + # use a temporary directory for the metadata to avoid messing + # around with he MVT directory while building is in progress + metadata = { 'dirfd': destdir_fd } + metadata['tempdir'] = Path(tempfile.mkdtemp(prefix='.' + dirname + '-attr.', + dir=basedir)).relative_to(basedir) + try: + logging.debug('Using "%s" as temporary metadata directory', metadata['tempdir']) + os.chmod(metadata['tempdir'], 0o0755, dir_fd=metadata.get('dirfd', None), + follow_symlinks=False) + nextfid = 0 + for layername, layerdefs in export_layers.items(): + # create destination layer + lyr_dst = ds_dst.CreateLayer(layername, srs=srs, geom_type=ogr.wkbUnknown) + if lyr_dst is None: + raise Exception(f'Could not create destination layer "{layername}"') + + for layerdef in layerdefs: + lyr_src = getSourceLayer(ds, layerdef, extent=extent) + try: + # use the non-dense geometry (a rectangle) for clipping as it's faster + # and there is no need to reproject (the SRSs match) + nextfid, count0 = exportLayer(lyr_dst, lyr_src, + layername=layerdef['__layername-src__'], + srs=srs, clip=extent_simple, + coordinate_precision=coordinate_precision, + nextfid=nextfid, metadata=metadata) + nFeatures += count0 + finally: + ds.ReleaseResultSet(lyr_src) + lyr_dst = None + + # close datasets + ds = None + ds_dst = None + + # move metadata to the MVT structure (now that it has been closed) + logging.debug('rename(%s/%s, %s/%s/attr)', + args.destdir, metadata['tempdir'], + args.destdir, dirname) + os.rename(metadata['tempdir'], + Path(dirname).joinpath('attr'), + src_dir_fd=metadata.get('dirfd', None), + dst_dir_fd=destdir_fd) + metadata.pop('tempdir', None) + + except Exception as e: + # cleanup + for root, dirnames, filenames, rootfd in os.fwalk(top=metadata['tempdir'], + dir_fd=metadata.get('dirfd', None), topdown=False, + follow_symlinks=False): + for filename in filenames: + #logging.debug('unlink(%s/%s)', root, filename) + os.unlink(filename, dir_fd=rootfd) + for dirname in dirnames: + #logging.debug('rmdir(%s/%s)', root, dirname) + os.rmdir(dirname, dir_fd=rootfd) + #logging.debug('rmdir(%s/%s)', args.destdir, metadata['tempdir']) + os.rmdir(metadata['tempdir'], dir_fd=metadata.get('dirfd', None)) + raise e + + finally: + if 'fp' in metadata: + closeMetadataFile(metadata) + + srs = None + extent = None + extent_simple = None + + try: + # OpenLayers doesn't make use of that file so delete it + os.unlink(Path(dirname).joinpath('metadata.json'), dir_fd=destdir_fd) + except FileNotFoundError: + pass + + logging.info('Done creating tiles and attribute files (total %d features)', nFeatures) + + if args.lockfile is not None: + # release the shared lock, the remaining shouldn't be as demanding + logging.debug('flock("%s", LOCK_UN)', args.lockfile) + flock(lock_fd, LOCK_UN) + + + stats_tiles = [] + stats_metadata = [] + if args.compress: + compress_mode = brotli.MODE_GENERIC + + # collect stats and optionally compress + # TODO: optimize traversal, cf. + # https://r3n.hashnode.dev/tech-chronicles-conquer-memory-monsters-with-pythons-yield-in-large-directory-processing + # TODO: benchmark brotli quality to trade-off compression ratio vs. execution time + # (also, tile and attribute files can have different levels if needs be) + for root, _, filenames, rootfd in os.fwalk(top=dirname, dir_fd=destdir_fd, + follow_symlinks=False): + for filename in filenames: + lfilename = filename.lower() + if lfilename.endswith(tile_extension): + arr = stats_tiles + elif lfilename.endswith('.json'): + arr = stats_metadata + else: + continue + + if not args.compress: + st = os.stat(filename, dir_fd=rootfd) + arr.append(st.st_size) + else: + #logging.debug("Compressing %s/%s", root, filename) + szi, szo = compress_brotli(filename, dir_fd=rootfd, + oFlags=O_EXCL, mode=compress_mode, quality=5) + arr.append((szi, szo)) + + # display stats + if len(stats_tiles) > 0: + stats_tiles = numpy.array(stats_tiles, dtype=numpy.int64, copy=False) + + arr = stats_tiles[:,0] if args.compress else stats_tiles + tot = numpy.sum(arr) + logging.info('Tile count: %d [min=%s, max=%s, sum=%s, avg=%s]', + arr.size, + format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)), + format_bytes(tot), format_bytes(round(tot/arr.size))) + + if args.compress: + arr = stats_tiles[:,1] + ztot = numpy.sum(arr) + logging.info('After compression: min=%s, max=%s, sum=%s, avg=%s (ratio %.1f%%)', + format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)), + format_bytes(ztot), format_bytes(round(ztot/arr.size)), + 100. * (1. - ztot/tot)) + + if len(stats_metadata) > 0: + stats_metadata = numpy.array(stats_metadata, dtype=numpy.int64, copy=False) + + arr = stats_metadata[:,0] if args.compress else stats_metadata + tot = numpy.sum(arr) + logging.info('Metadata file count: %d [min=%s, max=%s, sum=%s, avg=%s]', + arr.size, + format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)), + format_bytes(tot), format_bytes(round(tot/arr.size))) + + if args.compress: + arr = stats_metadata[:,1] + ztot = numpy.sum(arr) + logging.info('After compression: min=%s, max=%s, sum=%s, avg=%s (ratio %.1f%%)', + format_bytes(numpy.min(arr)), format_bytes(numpy.max(arr)), + format_bytes(ztot), format_bytes(round(ztot/arr.size)), + 100. * (1. - ztot/tot)) + + + # update metadata.json + if args.metadata is None: + mapfile = Path(args.destdir).parent.joinpath('metadata.json') + else: + mapfile = Path(args.metadata) + + # first, place an exclusive lock (it is important that all + # invocations of the program with the same --metadata file, even on + # different layer groups, share the same lock file to avoid + # concurrent updates of the medata.json) + if args.metadata_lockfile is None: + mapfile_lock = mapfile.with_name('.' + mapfile.name + '.lck') + else: + mapfile_lock = Path(args.metadata_lockfile) + mapfile_lock_fd = os.open(mapfile_lock, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644) + logging.debug('flock("%s", LOCK_EX)', mapfile_lock) + flock(mapfile_lock_fd, LOCK_EX) + + # load the file + try: + with open(mapfile, mode='rb') as fp: + mapobj = json.load(fp) + except FileNotFoundError: + mapobj = {} + + # remove the compressed version as we are prepare to update the + # uncompressed one + mapfile_compressed = mapfile.with_name(mapfile.name + '.br') + try: + logging.debug('unlink(%s)', mapfile_compressed) + os.unlink(mapfile_compressed) + except FileNotFoundError: + pass + + k = 'layermap' + if k not in mapobj: + layermap = mapobj[k] = {} + else: + layermap = mapobj[k] + destpath = Path(args.destdir) + if args.webroot is not None: + destpath = destpath.relative_to(args.webroot) + destpath = '/' + destpath.as_posix().removeprefix('/') + '/' + dirname + layermap[destdirname] = destpath + + # dump the new object to the temporary file + bstr = json.dumps(mapobj, ensure_ascii=False, separators=(',',':')).encode('utf-8') + mapfile_tmp = mapfile.with_stem('.' + mapfile.stem + '.new') + fd = os.open(mapfile_tmp, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644) + with os.fdopen(fd, mode='wb', closefd=True) as fp: + fp.write(bstr) + + # compress the temporary file + if args.compress: + mapfile_tmp_compressed = mapfile_tmp.with_name(mapfile_tmp.name + '.br') + compressor = brotli.Compressor() + fd = os.open(mapfile_tmp_compressed, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644) + try: + write_all(fd, compressor.process(bstr)) + write_all(fd, compressor.finish()) + finally: + os.close(fd) + + # rename temporary files to production (starting with the uncompressed one) + logging.debug('rename(%s, %s)', mapfile_tmp, mapfile) + os.rename(mapfile_tmp, mapfile) + + if args.compress: + logging.debug('rename(%s, %s)', mapfile_tmp_compressed, mapfile_compressed) + os.rename(mapfile_tmp_compressed, mapfile_compressed) -- cgit v1.2.3