From 2d2ae41e0e9dc945855a6bb62228995e74eea229 Mon Sep 17 00:00:00 2001 From: Guilhem Moulin Date: Tue, 9 Jun 2026 17:12:19 +0200 Subject: geodata-import-topo: Add support for archives containing tables to be appended. When ordering multiple counties or municipalities at https://geotorget.lantmateriet.se, one gets mark_ln25.zip, mark_ln24.zip, etc. We then want to import all features, so only clear the destination layer for the *first* source encountered, and append the rest. --- geodata-import-topo | 66 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/geodata-import-topo b/geodata-import-topo index e58a701..21a7246 100755 --- a/geodata-import-topo +++ b/geodata-import-topo @@ -132,17 +132,25 @@ def get_output_layer(layer_src : ogr.Layer, dso : gdal.Dataset, return layer_dst -LAYER_NAMES : Final[set[tuple[str|None,str]]] = set() +LAYER_NAMES : Final[dict[tuple[str|None,str], set[str]]] = {} def import_layer(layer : ogr.Layer, dso : gdal.Dataset, filename : str, srs : Optional[osr.SpatialReference] = None, schema : Optional[str] = None, - create_layer_options : Optional[dict[str,Any]] = None) -> None: - """Import a give layer to PostGIS""" + create_layer_options : Optional[dict[str,Any]] = None) -> str: + """Import a given layer to PostGIS, and return the destination layer name""" + # extract area covered by the shapefile: + # anlaggningsomrade_ln24.zip → ln24; mark_sverige.zip → sverige; etc + coverage = Path(filename).stem + if '_' in coverage: + coverage = coverage.split('_')[-1] layer_name = layer.GetName() k = (schema, layer_name) - if k in LAYER_NAMES: - raise RuntimeError(f'Duplicate layer "{layer_name}" in schema {schema}') - LAYER_NAMES.add(k) + is_first = k not in LAYER_NAMES + if is_first: + LAYER_NAMES[k] = set() + elif coverage in LAYER_NAMES[k]: + raise RuntimeError(f'Duplicate layer "{layer_name}" in schema {schema} for coverage {coverage}') + LAYER_NAMES[k].add(coverage) logging.info('Importing layer %s from %s to schema %s', layer_name, filename, schema) layer_dst = get_output_layer(layer, dso=dso, @@ -156,13 +164,16 @@ def import_layer(layer : ogr.Layer, dso : gdal.Dataset, filename : str, layer_dst.SetMetadataItem('DESCRIPTION', description) != GDAL_CE_None): logging.warning('Could not set description metadata') - clearLayer(layer_dst, identity='RESTART IDENTITY') + if is_first: + clearLayer(layer_dst, identity='RESTART IDENTITY') + else: + logging.debug('Appending to existing layer "%s"', layer_dst.GetName()) + defn_dst = layer_dst.GetLayerDefn() field_map = { defn_dst.GetFieldDefn(i).GetName() : i for i in range(defn_dst.GetFieldCount()) } importLayer(layer_dst, layer, args={'field-map': field_map}, extent=None) - if layer_dst.GetLayerDefn().GetGeomType() != ogr.wkbNone: - clusterLayer(layer_dst, column_name=layer_dst.GetGeometryColumn()) + return layer_dst.GetName() def guess_schema_from_file(path : str) -> str: """Infer PostgreSQL schema name from the filename of the .zip source.""" @@ -200,8 +211,15 @@ def import_source(path : str, dso : gdal.Dataset, if dso.TestCapability(ogr.ODsCTransactions): logging.debug('Starting transaction') dso_transaction = dso.StartTransaction() == ogr.OGRERR_NONE - n = import_source2(path, dso, srs=srs, schema=schema, - create_layer_options=create_layer_options) + n, layernames_dst = import_source2(path, dso, srs=srs, schema=schema, + create_layer_options=create_layer_options) + + # cluster destination layers + for layername_dst in layernames_dst: + layer_dst = dso.GetLayerByName(layername_dst) + if layer_dst.GetLayerDefn().GetGeomType() != ogr.wkbNone: + clusterLayer(layer_dst, column_name=layer_dst.GetGeometryColumn()) + if dso_transaction: logging.debug('Committing transaction') dso_transaction = False @@ -226,10 +244,12 @@ def import_source(path : str, dso : gdal.Dataset, def import_source2(path : str, dso : gdal.Dataset, srs : Optional[osr.SpatialReference] = None, schema : Optional[str] = None, - create_layer_options : Optional[dict[str,Any]] = None) -> int: + create_layer_options : Optional[dict[str,Any]] = None) \ + -> tuple[int, set[str]]: """Import a single shape file, or recursively all shape files containing in a .zip file, to PostGIS.""" n = 0 + layernames_dst = set() if path.lower().endswith('.zip') and zipfile.is_zipfile(path): logging.debug('Opening %s as ZipFile', path) with zipfile.ZipFile(path, mode='r') as z: @@ -237,7 +257,10 @@ def import_source2(path : str, dso : gdal.Dataset, if zi.is_dir(): raise NotImplementedError(f'{zi.filename}: Zipped directories are ' 'not supported') - if zi.filename == 'uttag.json': + if zi.filename == 'uttag.json' or (zi.filename.startswith('uttag_') and + zi.filename.endswith('.json')): + # uttag.json, uttag_ln25.json, etc + logging.debug('Skipping file "%s"', zi.filename) continue with tempfile.TemporaryDirectory() as tmpdir: if logging.getLogger().getEffectiveLevel() == logging.DEBUG: @@ -246,9 +269,11 @@ def import_source2(path : str, dso : gdal.Dataset, elif not zi.filename.lower().endswith('.zip'): logging.info('Extracting %s', zi.filename) p = z.extract(zi, path=tmpdir) - n += import_source2(p, dso=dso, srs=srs, schema=schema, - create_layer_options=create_layer_options) - return n + n2, layernames_dst2 = import_source2(p, dso=dso, srs=srs, schema=schema, + create_layer_options=create_layer_options) + n += n2 + layernames_dst |= layernames_dst2 + return n, layernames_dst kwargs, _ = gdalSetOpenExArgs({}, flags=GDAL_OF_VECTOR|GDAL_OF_READONLY|GDAL_OF_VERBOSE_ERROR) logging.debug('OpenEx(%s, %s)', path, str(kwargs)) @@ -260,10 +285,11 @@ def import_source2(path : str, dso : gdal.Dataset, logging.debug('Opened %s (driver %s, %d layers)', path, ds.GetDriver().ShortName, n) filename = Path(path).name for i in range(n): - import_layer(layer=ds.GetLayerByIndex(i), dso=dso, filename=filename, - srs=srs, schema=schema, - create_layer_options=create_layer_options) - return n + layername_dst = import_layer(layer=ds.GetLayerByIndex(i), dso=dso, filename=filename, + srs=srs, schema=schema, + create_layer_options=create_layer_options) + layernames_dst.add(layername_dst) + return n, layernames_dst # pylint: disable-next=missing-function-docstring -- cgit v1.2.3