aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuilhem Moulin <guilhem@fripost.org>2024-05-16 20:14:52 +0200
committerGuilhem Moulin <guilhem@fripost.org>2024-06-10 06:06:59 +0200
commit5dd5b429f9acdff8e727baa75bbda0b687b88926 (patch)
tree86979cfe5e74d4124cefd39b18c3b96f658afb7d
parent681c3d11b4fc02c416467d074ef0b4d84bf0cdab (diff)
Add `webmap-import` script to import source layers.
There is still a few things to do (such as reprojection and geometry changes) but it's mostly working. We roll out our own ogr2ogr/GDALVectorTranslate()-like function version because GDALVectorTranslate() insists in calling StartTransaction() https://github.com/OSGeo/gdal/issues/3403 while we want a single transaction for the entire desination layer, including truncation, source imports, and metadata changes. Surprisingly our version is not much slower than the C++ one. Importing the 157446 (of 667034) features from sksUtfordAvverk-2000-2015.shp takes 14.3s while ogr2ogr -f PostgreSQL \ -doo ACTIVE_SCHEMA=postgis \ --config PG_USE_COPY YES \ --config OGR_TRUNCATE YES \ -append \ -fieldmap "0,-1,-1,-1,-1,1,2,3,4,5,6,7,8,9,10,11,12,13" \ -nlt MULTIPOLYGON -nlt PROMOTE_TO_MULTI \ -gt unlimited \ -spat 110720 6927136 1159296 7975712 \ -nln "sks:UtfordAvverk" \ PG:"dbname='webmap' user='webmap_import'" \ /tmp/x/sksUtfordAvverk-2000-2015.shp \ sksUtfordAvverk-2000-2015 takes 14s. Merely opening /tmp/x/sksUtfordAvverk-2000-2015.shp and looping through its (extent-filtered) features results in a runtime of 4.3s.
-rw-r--r--config.yml528
-rwxr-xr-xwebmap-import1017
2 files changed, 1511 insertions, 34 deletions
diff --git a/config.yml b/config.yml
index 772cb0e..899d0ca 100644
--- a/config.yml
+++ b/config.yml
@@ -1,18 +1,20 @@
---
-# Spatial Reference System and (2D) extent
+# Spatial Reference System
SRS: 'EPSG:3006' # a.k.a. SWEREF99 TM, cf. https://epsg.io/3006
+
+# (2D) extent
extent:
- # Lantmäteriet uses a tile-scheme where the origin (upper-left corner) is at
- # N8500000 E-1200000 (SWEREF99 TM), where each tile is 256×256 pixels, and
- # where the resolution at level 0 is 4096m per pixel.
+ # Lantmäteriet uses a tile-scheme where the origin (upper-left corner) is at N8500000
+ # E-1200000 (SWEREF99 TM), where each tile is 256×256 pixels, and where the resolution
+ # at level 0 is 4096m per pixel.
#
# https://www.lantmateriet.se/globalassets/geodata/geodatatjanster/tb_twk_visning_cache_v1.1.0.pdf
# https://www.lantmateriet.se/globalassets/geodata/geodatatjanster/tb_twk_visning-oversiktlig_v1.0.3.pdf
#
- # We set the extent to a 4×4 tiles square at level 2 somehow centered on
- # Norrbotten and Västerbotten. This represent a TILEROW (x) offset of 5, and
- # a TILECOL (y) offset of 2. A 4×8 tiles rectangle with the same upper-left
- # and upper-right coordinates can be used to cover the entire country.
+ # We set the extent to a 4×4 tiles square at level 2 somehow centered on Norrbotten and
+ # Västerbotten. This represent a TILEROW (x) offset of 5, and a TILECOL (y) offset of 2.
+ # A 4×8 tiles rectangle with the same upper-left and upper-right coordinates can be used
+ # to cover the entire country.
- 110720
- 6927136 # alternatively 5878560 for the entire country
- 1159296
@@ -21,9 +23,12 @@ extent:
# Take User-Agent value from Tor Browser 13.0.15 (based on Mozilla Firefox 115.11.0esr)
User-Agent: 'Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/115.0'
-# Map group names to one or more pattern of layer name(s). This is a
-# convenience feature for systemd template units.
+# Map group names to one or more pattern of layer name(s). This is a convenience feature
+# for systemd template units.
layer-groups:
+ administrativindelning:
+ - counties
+ - municipalities
nvr: 'nvr:*'
sks: 'sks:*'
st: 'st:*'
@@ -31,43 +36,260 @@ layer-groups:
mrr: 'mrr:*'
+# Global GDAL/OGR configuration options, cf. https://gdal.org/user/configoptions.html and
+# the driver-specific configuration options such as
+# https://gdal.org/drivers/vector/pg.html#configuration-options or
+# https://gdal.org/drivers/vector/gpkg.html#configuration-options
+GDALconfig:
+ PG_USE_COPY: 'YES'
+
+
+dataset:
+ # Path/URI of the output (destination) dataset.
+ path: 'PG:'
+ # Format (optional)
+ format: PostgreSQL
+
+ # Whether the dataset should be created if it does not exist. (Creation will fail if
+ # the driver doesn't support it.)
+ #create: true
+ #create-options:
+ # Optional driver-specific dataset creation options, cf. for instance
+ # https://gdal.org/drivers/vector/gpkg.html#dataset-creation-options
+ #VERSION: 'AUTO'
+
+ # Optional driver-specific dataset open options, cf. for instance
+ # https://gdal.org/drivers/vector/pg.html#dataset-open-options or
+ # https://gdal.org/drivers/vector/gpkg.html#dataset-open-options
+ open-options:
+ ACTIVE_SCHEMA: postgis
+ USER: webmap_import
+ DBNAME: webmap
+
+ # Optional dictionary of default layer creation options, cf.
+ # https://gdal.org/drivers/vector/pg.html#layer-creation-options or
+ # https://gdal.org/drivers/vector/gpkg.html#layer-creation-options
+ # These apply to all layers.
+ create-layer-options:
+ LAUNDER: 'NO'
+ EXTRACT_SCHEMA_FROM_LAYER_NAME: 'NO'
+
+
layers:
-# # Dictionary of layer names and source receipes in the output dataset.
-# # If a layer has a single source, then the sources singleton can be
-# # inlined.
+# # Dictionary of layer names and source receipes in the output dataset. If a layer
+# # has a single source, then the sources singleton can be inlined.
# layer1_name:
+# description: A string describing that layer
+# create:
+# # Geometry Type for the output layer. Possible values are like ogr2ogr(1)'s -nlt
+# # value, namely one of NONE, GEOMETRY, POINT, LINESTRING, POLYGON, MULTIPOINT,
+# # MULTILINESTRING, MULTIPOLYGON, GEOMETRYCOLLECTION, CIRCULARSTRING, COMPOUNDCURVE,
+# # CURVEPOLYGON, MULTICURVE, MULTISURFACE. Add Z, M, or ZM to the type name to
+# # specify coordinates with elevation, measure, or both elevation and measure.
+# geometry-type: MULTIPOLYGON
+#
+# # Dictionary of layer creation options, cf.
+# # https://gdal.org/drivers/vector/pg.html#layer-creation-options or
+# # https://gdal.org/drivers/vector/gpkg.html#layer-creation-options
+# # dataset:create-layer-options is prepended when defined.
+# options:
+# #LAUNDER: 'NO'
+#
+# # The layer schema: a list of field names, types and constraints.
+# fields:
+# - # Feature field name
+# Name: field_name
+#
+# # Alternative field name (alias). This is a metadata style attribute only: the
+# # alternative name cannot be used in place of the actual field name during SQL
+# # queries or other field name dependent API calls.
+# AlternativeName: field_name_alias
+#
+# # Description/comment
+# Comment: A string describing that field
+#
+# # Feature field type (optional), one of:
+# # * Integer (simple 32bit integer);
+# # * IntegerList (list of 32bit integers);
+# # * Real (double precision floating point);
+# # * RealList (list of doubles);
+# # * String (string of characters);
+# # * StringList (array of strings);
+# # * Binary (raw binary data);
+# # * Date (date);
+# # * Time (time);
+# # * DateTime (date and time);
+# # * Integer64 (64bit integer); or
+# # * Integer64List (list of 64bit integers).
+# Type: String
+#
+# # Feature field subtype (optional), one of:
+# # * None (no subtype, this is the default value);
+# # * Bool (boolean integer, only valid for Integer and IntegerList types);
+# # * Int16 (signed 16-bit integer, only valid for Integer and IntegerList
+# # types);
+# # * Float32 (single precision [32 bit] floating point, only valid for Real
+# # and RealList types);
+# # * JSON (JSON content, only valid for String); or
+# # * UUID (UUID string representation, only valid for String).
+# SubType: UUID
+#
+# # Default timezone (optional), for Time and DateTime types
+# #TZFlag: local
+#
+# # Formatting precision for this field in characters (optional, this should
+# # normally be zero for fields of types other than Real)
+# Precision: 0
+#
+# # Formatting width for this field in characters (optional)
+# Width: 36
+#
+# # Default field value (optional); accepted values are NULL, a numeric value, a
+# # literal value enclosed between single quote characters (and inner single
+# # quote characters escaped by repetition of the single quote character),
+# # CURRENT_TIMESTAMP, CURRENT_TIME, CURRENT_DATE or a driver specific
+# # expression)
+# Default: None
+#
+# # Whether this field has no not-NULL constraint (optional)
+# Nullable: false
+#
+# # Whether this field has a unique constraint (optional)
+# Unique: true
+#
# sources:
# - source:
# download:
-# # source:download:url: URL from where to download the source file.
-# # source:download can be used as an alias when source:download:url is
-# # its only subkey.
+# # URL from where to download the source file. source:download can be used as
+# # an alias when source:download:url is its only subkey.
# url: 'https://example.net/path/to/layer.zip'
-# # source:download:max-size: The maximum size to download in bytes. An
-# # error is raised when the payload size exceeds this value.
+#
+# # The maximum size to download in bytes. An error is raised when the payload
+# # size exceeds this value.
# # (Default: 67108864, in other words 64MiB)
# max-size: 1073741824
-# # source:download:module: Basename of the download module to use for
-# # that layer.
+#
+# # Basename of the download module to use for that layer.
# module: webmap-download
+#
# cache:
-# # source:cache:path: Local path (relative to --cachedir) where to
-# # (atomically) save the downloaded file. The same path can be used by
-# # multiple entries as long as their pairs (source:download:url,
-# # source:download:module) match. Any parent directories are created if
-# # needed.
-# # If the path is empty or ends with a '/' character then it treated as a
-# # directory and the last component of source:download:url implicitly
-# # used as filename. In that case an error is raised if no filename can
-# # be derived from the URL.
-# # source:cache can be used as an alias when source:cache:path is its
-# # only subkey.
+# # Local path (relative to --cachedir) where to (atomically) save the
+# # downloaded file. The same path can be used by multiple entries as long as
+# # their pairs (source:download:url, source:download:module) match. Any
+# # parent directories are created if needed. If the path is empty or ends
+# # with a '/' character then it treated as a directory and the last component
+# # of source:download:url implicitly used as filename. In that case an error
+# # is raised if no filename can be derived from the URL. source:cache can be
+# # used as an alias when source:cache:path is its only subkey.
# path: path/to/sub/dir/
-# # source:cache:max-age: Maximum age for caching, in number of seconds
-# # ago. If source:cache:path exists and its mtime and/or ctime is newer
-# # than this value then no HTTP query is made.
+#
+# # Maximum age for caching, in number of seconds ago. If source:cache:path
+# # exists and its mtime and/or ctime is newer than this value then no HTTP
+# # query is made.
# # (Default: 21600, in other words 6h)
# max-age: 86400
+#
+# # Optional extracting receipe for archived/compressed sources
+# unar:
+# # The archiving format (only 'zip' is currently supported)
+# format: zip
+#
+# # glob(3)-patterns to extract from the archive. import:path is always
+# # extracted as an exact match.
+# patterns:
+# - 'path/to/source/layer.*'
+#
+# import:
+# # Path for the dataset holding the source layer (relative to the archive root
+# # for archived sources, and to --cachedir otherwise). The value is optional
+# # for non-archived sources, and defaults to source:cache:path if omitted.
+# path: path/to/source/layer.shp
+#
+# # Format of the source layer to limit allowed driver when opening the dataset.
+# format: ESRI Shapefile
+#
+# # Name of the source layer in the source dataset. If omitted, its 0th layer is
+# # considered.
+# layername: source_layer
+#
+# # Whether to apply the spatial filter when importing. Default: True.
+# spatial-filter: true
+#
+# # Mapping of source fields to destination fields. A list translates into an
+# identity mapping.
+# fields:
+# - field_name1
+# - field_name2
+# fields:
+# source_field_name2: field_name2
+# source_field_name2: field_name2
+
+ 'counties':
+ description: Sveriges län
+ create:
+ geometry-type: MULTIPOLYGON
+ fields:
+ - name: objektidentitet
+ type: String
+ subtype: UUID
+ unique: true
+ #width: 36
+ - name: skapad
+ type: DateTime
+ #TZFlag: TODO
+ - name: lanskod
+ type: Integer
+ subtype: Int16
+ unique: true
+ nullable: false
+ source:
+ # https://www.lantmateriet.se/sv/geodata/vara-produkter/produktlista/topografi-250-nedladdning-vektor/
+ cache: administrativindelning_sverige.zip
+ unar:
+ format: zip
+ import:
+ path: administrativindelning_sverige.gpkg
+ format: GPKG
+ layername: lansyta
+ spatial-filter: false
+ fields:
+ - objektidentitet
+ - skapad
+ - lanskod
+
+ 'municipalities':
+ description: Sveriges kommuner
+ create:
+ geometry-type: MULTIPOLYGON
+ fields:
+ - name: objektidentitet
+ type: String
+ subtype: UUID
+ unique: true
+ #width: 36
+ - name: skapad
+ type: DateTime
+ #TZFlag: TODO
+ - name: kommunkod
+ type: Integer
+ subtype: Int16
+ unique: true
+ nullable: false
+ source:
+ # https://www.lantmateriet.se/sv/geodata/vara-produkter/produktlista/topografi-250-nedladdning-vektor/
+ cache: administrativindelning_sverige.zip
+ unar:
+ format: zip
+ import:
+ path: administrativindelning_sverige.gpkg
+ format: GPKG
+ layername: kommunyta
+ spatial-filter: false
+ fields:
+ - objektidentitet
+ - skapad
+ - kommunkod
+
'nvr:TILLTRADESFORBUD':
source:
@@ -167,13 +389,156 @@ layers:
cache: naturvardsregistret/
'sks:AvverkAnm':
+ # https://geodpags.skogsstyrelsen.se/geodataport/feeds/AvverkAnm.xml
+ description: Avverkningsanmälningar (Skogsstyrelsen)
+ create:
+ geometry-type: MULTIPOLYGON
+ fields:
+ - name: objektid
+ type: Integer
+ unique: true
+ nullable: false
+ - name: year
+ type: Integer
+ subtype: Int16
+ nullable: false
+ - name: beteckn
+ type: String
+ width: 23
+ unique: true
+ nullable: false
+ - name: avverktyp
+ type: String
+ width: 254
+ nullable: false
+ - name: skogstyp
+ type: String
+ width: 254
+ nullable: false
+ - name: date
+ type: Date
+ nullable: false
+ - name: AnmaldHa
+ type: Real
+ width: 24
+ precision: 15
+ nullable: false
+ - name: SkogsodlHa
+ type: Real
+ width: 24
+ precision: 15
+ nullable: false
+ - name: NatforHa
+ type: Real
+ width: 24
+ precision: 15
+ nullable: false
+ - name: status
+ type: String
+ width: 21
+ nullable: false
+ - name: AvvSasong
+ type: String
+ width: 254
+ nullable: false
+ - name: AvvHa
+ type: Real
+ width: 24
+ precision: 15
+ - name: Avverkning
+ type: String
+ width: 20
+ nullable: false
source:
download:
url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksAvverkAnm.zip'
max-size: 134217728 # 128MiB
cache: sks/
+ unar:
+ format: zip
+ patterns:
+ - 'sksAvverkAnm.*'
+ import:
+ path: sksAvverkAnm.shp
+ format: ESRI Shapefile
+ layername: sksAvverkAnm
+ fields:
+ OBJECTID: objektid
+ ArendeAr: year
+ Beteckn: beteckn
+ Avverktyp: avverktyp
+ Skogstyp: skogstyp
+ Inkomdatum: date
+ AnmaldHa: AnmaldHa
+ SkogsodlHa: SkogsodlHa
+ NatforHa: NatforHa
+ ArendeStat: status
+ AvvSasong: AvvSasong
+ AvvHa: AvvHa
+ Avverkning: Avverkning
'sks:UtfordAvverk':
+ # https://geodpags.skogsstyrelsen.se/geodataport/feeds/UtfordAvverk.xml
+ description: Utförd avverkning (Skogsstyrelsen)
+ create:
+ geometry-type: MULTIPOLYGON
+ fields:
+ - name: objektid
+ type: Integer
+ #unique: true
+ nullable: false
+ - name: year
+ type: Integer
+ subtype: Int16
+ nullable: false
+ - name: beteckn
+ type: String
+ width: 8
+ #unique: true
+ nullable: false
+ - name: avverktyp
+ type: String
+ width: 254
+ nullable: false
+ - name: skogstyp
+ type: String
+ width: 254
+ nullable: false
+ - name: AnmaldHa
+ type: Real
+ width: 24
+ precision: 15
+ #nullable: false
+ - name: SkogsodlHa
+ type: Real
+ width: 24
+ precision: 15
+ nullable: false
+ - name: NatforHa
+ type: Real
+ width: 24
+ precision: 15
+ nullable: false
+ - name: date
+ type: Date
+ nullable: false
+ - name: KallaDatum
+ type: Date
+ - name: KallaAreal
+ type: String
+ width: 50
+ - name: Forebild
+ type: String
+ width: 50
+ - name: Efterbild
+ type: String
+ width: 59
+ - name: Arealha
+ type: Real
+ width: 24
+ precision: 15
+ nullable: false
+
sources:
- source:
download:
@@ -182,6 +547,30 @@ layers:
cache:
path: sks/
max-age: 2592000 # 30d
+ unar:
+ format: zip
+ patterns:
+ - 'sksUtfordAvverk-2000-2015.*'
+ import:
+ path: sksUtfordAvverk-2000-2015.shp
+ format: ESRI Shapefile
+ layername: sksUtfordAvverk-2000-2015
+ fields:
+ OBJECTID: objektid
+ Arendear: year
+ Beteckn: beteckn
+ Avverktyp: avverktyp
+ Skogstyp: skogstyp
+ AnmaldHa: AnmaldHa
+ SkogsodlHa: SkogsodlHa
+ Natforha: NatforHa
+ Avvdatum: date
+ KallaDatum: KallaDatum
+ KallaAreal: KallaAreal
+ Forebild: Forebild
+ Efterbild: Efterbild
+ Arealha: Arealha
+
- source:
download:
url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksUtfordAvverk-2016-2019.zip'
@@ -189,6 +578,30 @@ layers:
cache:
path: sks/
max-age: 2592000 # 30d
+ unar:
+ format: zip
+ patterns:
+ - 'sksUtfordAvverk-2016-2019.*'
+ import:
+ path: sksUtfordAvverk-2016-2019.shp
+ format: ESRI Shapefile
+ layername: sksUtfordAvverk-2016-2019
+ fields:
+ OBJECTID: objektid
+ Arendear: year
+ Beteckn: beteckn
+ Avverktyp: avverktyp
+ Skogstyp: skogstyp
+ AnmaldHa: AnmaldHa
+ SkogsodlHa: SkogsodlHa
+ Natforha: NatforHa
+ Avvdatum: date
+ KallaDatum: KallaDatum
+ KallaAreal: KallaAreal
+ Forebild: Forebild
+ Efterbild: Efterbild
+ Arealha: Arealha
+
- source:
download:
url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksUtfordAvverk-2020-2022.zip'
@@ -196,11 +609,58 @@ layers:
cache:
path: sks/
max-age: 864000 # 10d
+ unar:
+ format: zip
+ patterns:
+ - 'sksUtfordAvverk-2020-2022.*'
+ import:
+ path: sksUtfordAvverk-2020-2022.shp
+ format: ESRI Shapefile
+ layername: sksUtfordAvverk-2020-2022
+ fields:
+ OBJECTID: objektid
+ Arendear: year
+ Beteckn: beteckn
+ Avverktyp: avverktyp
+ Skogstyp: skogstyp
+ AnmaldHa: AnmaldHa
+ SkogsodlHa: SkogsodlHa
+ Natforha: NatforHa
+ Avvdatum: date
+ KallaDatum: KallaDatum
+ KallaAreal: KallaAreal
+ Forebild: Forebild
+ Efterbild: Efterbild
+ Arealha: Arealha
+
- source:
download:
url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksUtfordAvverk-2023-.zip'
max-size: 1073741824 # 1GiB
cache: sks/
+ unar:
+ format: zip
+ patterns:
+ - 'sksUtfordAvverk-2023-.*'
+ import:
+ path: sksUtfordAvverk-2023-.shp
+ format: ESRI Shapefile
+ layername: sksUtfordAvverk-2023-
+ fields:
+ OBJECTID: objektid
+ Arendear: year
+ Beteckn: beteckn
+ Avverktyp: avverktyp
+ Skogstyp: skogstyp
+ AnmaldHa: AnmaldHa
+ SkogsodlHa: SkogsodlHa
+ Natforha: NatforHa
+ Avvdatum: date
+ KallaDatum: KallaDatum
+ KallaAreal: KallaAreal
+ Forebild: Forebild
+ Efterbild: Efterbild
+ Arealha: Arealha
'st:betesomraden':
source:
diff --git a/webmap-import b/webmap-import
new file mode 100755
index 0000000..6daceee
--- /dev/null
+++ b/webmap-import
@@ -0,0 +1,1017 @@
+#!/usr/bin/python3
+
+#----------------------------------------------------------------------
+# Backend utilities for the Klimatanalys Norr project (extract/import layers)
+# Copyright © 2024 Guilhem Moulin <info@guilhem.se>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#----------------------------------------------------------------------
+
+import os
+import logging
+import argparse
+import tempfile
+from fnmatch import fnmatchcase
+from pathlib import Path
+
+from osgeo import gdal, ogr, osr
+from osgeo.gdalconst import (
+ OF_VECTOR as GDAL_OF_VECTOR,
+ OF_ALL as GDAL_OF_ALL,
+ OF_READONLY as GDAL_OF_READONLY,
+ OF_UPDATE as GDAL_OF_UPDATE,
+ OF_VERBOSE_ERROR as GDAL_OF_VERBOSE_ERROR,
+ CE_None as GDAL_CE_None,
+ DCAP_CREATE as GDAL_DCAP_CREATE,
+ DCAP_VECTOR as GDAL_DCAP_VECTOR,
+ DCAP_DEFAULT_FIELDS as GDAL_DCAP_DEFAULT_FIELDS,
+ DCAP_NOTNULL_FIELDS as GDAL_DCAP_NOTNULL_FIELDS,
+ DCAP_UNIQUE_FIELDS as GDAL_DCAP_UNIQUE_FIELDS,
+)
+import osgeo.gdalconst as gdalconst
+gdal.UseExceptions()
+
+import common
+
+# Wrapper around gdal.MajorObject.GetMetadataItem(name)
+def getMetadataItem(o, k):
+ v = o.GetMetadataItem(k)
+ if v is not None and isinstance(v, str):
+ return v.upper() == 'YES'
+ else:
+ return False
+
+# Return kwargs and driver for OpenEx()
+def setOpenExArgs(option_dict, flags=0):
+ kwargs = { 'nOpenFlags': GDAL_OF_VECTOR | flags }
+
+ fmt = option_dict.get('format', None)
+ if fmt is None:
+ drv = None
+ else:
+ drv = gdal.GetDriverByName(fmt)
+ if drv is None:
+ raise Exception(f'Unknown driver name "{fmt}"')
+ elif not getMetadataItem(drv, GDAL_DCAP_VECTOR):
+ raise Exception(f'Driver "{drv.ShortName}" has no vector capabilities')
+ kwargs['allowed_drivers'] = [ drv.ShortName ]
+
+ oo = option_dict.get('open-options', None)
+ if oo is not None:
+ kwargs['open_options'] = [ k + '=' + str(v) for k, v in oo.items() ]
+ return kwargs, drv
+
+# Open and return the output DS. It is created if create=False or
+# create-options is a non-empty dictionary.
+def openOutputDS(def_dict):
+ path = def_dict['path']
+ kwargs, drv = setOpenExArgs(def_dict, flags=GDAL_OF_UPDATE|GDAL_OF_VERBOSE_ERROR)
+ try:
+ logging.debug('OpenEx(%s, %s)', path, str(kwargs))
+ return gdal.OpenEx(path, **kwargs)
+ except RuntimeError as e:
+ if not (gdal.GetLastErrorType() >= gdalconst.CE_Failure and
+ gdal.GetLastErrorNo() == gdalconst.CPLE_OpenFailed):
+ # not an open error
+ raise e
+
+ dso2 = None
+ try:
+ dso2 = gdal.OpenEx(path, nOpenFlags=GDAL_OF_ALL | GDAL_OF_UPDATE)
+ except Exception:
+ pass
+ if dso2 is not None:
+ # path exists but can't be open with OpenEx(path, **kwargs)
+ raise e
+
+ try:
+ dso2 = gdal.OpenEx(path, nOpenFlags=GDAL_OF_ALL)
+ except Exception:
+ pass
+ if dso2 is not None:
+ # path exists but can't be open with OpenEx(path, **kwargs)
+ raise e
+
+ dsco = def_dict.get('create-options', None)
+ if not def_dict.get('create', False) and dsco is None:
+ # not configured for creation
+ raise e
+ if drv is None or not getMetadataItem(drv, GDAL_DCAP_CREATE):
+ # not capable of creation
+ raise e
+
+ if 'open_options' in kwargs:
+ # like ogr2ogr(1)
+ logging.warning('Destination\'s open options ignored ' +
+ 'when creating the output datasource')
+
+ kwargs2 = { 'eType': gdal.GDT_Unknown }
+ if dsco is not None:
+ kwargs2['options'] = [ k + '=' + str(v) for k, v in dsco.items() ]
+
+ logging.debug('Create(%s, %s, eType=%s%s)', drv.ShortName, path, kwargs2['eType'],
+ ', options=' + str(kwargs2['options']) if 'options' in kwargs2 else '')
+ # XXX racy, a GDAL equivalent of O_EXCL would be nice
+ return drv.Create(path, 0, 0, 0, **kwargs2)
+
+# cf. ogr/ogrgeometry.cpp:OGRFromOGCGeomType()
+def fromGeomTypeName(name):
+ name = name.upper()
+
+ isMeasured = False
+ if name.endswith('M'):
+ isMeasured = True
+ name = name.removesuffix('M')
+
+ convertTo3D = False
+ if name.endswith('Z'):
+ convertTo3D = True
+ name = name.removesuffix('Z')
+
+ if name == 'POINT':
+ eGType = ogr.wkbPoint
+ elif name == 'LINESTRING':
+ eGType = ogr.wkbLineString
+ elif name == 'POLYGON':
+ eGType = ogr.wkbPolygon
+ elif name == 'MULTIPOINT':
+ eGType = ogr.wkbMultiPoint
+ elif name == 'MULTILINESTRING':
+ eGType = ogr.wkbMultiLineString
+ elif name == 'MULTIPOLYGON':
+ eGType = ogr.wkbMultiPolygon
+ elif name == 'GEOMETRYCOLLECTION':
+ eGType = ogr.wkbGeometryCollection
+ elif name == 'CIRCULARSTRING':
+ eGType = ogr.wkbCircularString
+ elif name == 'COMPOUNDCURVE':
+ eGType = ogr.wkbCompoundCurve
+ elif name == 'CURVEPOLYGON':
+ eGType = ogr.wkbCurvePolygon
+ elif name == 'MULTICURVE':
+ eGType = ogr.wkbMultiCurve
+ elif name == 'MULTISURFACE':
+ eGType = ogr.wkbMultiSurface
+ elif name == 'TRIANGLE':
+ eGType = ogr.wkbTriangle
+ elif name == 'POLYHEDRALSURFACE':
+ eGType = ogr.wkbPolyhedralSurface
+ elif name == 'TIN':
+ eGType = ogr.wkbTIN
+ elif name == 'CURVE':
+ eGType = ogr.wkbCurve
+ elif name == 'SURFACE':
+ eGType = ogr.wkbSurface
+ else:
+ eGType = ogr.wkbUnknown
+
+ if convertTo3D:
+ eGType = ogr.GT_SetZ(eGType)
+
+ if isMeasured:
+ eGType = ogr.GT_SetM(eGType)
+
+ return eGType
+
+# Parse geometry type, cf. ogr2ogr_lib.cpp
+def parseGeomType(name):
+ if name is None:
+ return ogr.wkbUnknown
+ name2 = name.upper()
+
+ is3D = False
+ if name2.endswith('25D'):
+ name2 = name2[:-3] # alias
+ is3D = True
+ elif name2.endswith('Z'):
+ name2 = name2[:-1]
+ is3D = True
+
+ if name2 == 'NONE':
+ eGType = ogr.wkbNone
+ elif name2 == 'GEOMETRY':
+ eGType = ogr.wkbUnknown
+ else:
+ eGType = fromGeomTypeName(name2)
+ if eGType == ogr.wkbUnknown:
+ raise Exception(f'Unknown geometry type "{name}"')
+
+ if eGType != ogr.wkbNone and is3D:
+ eGType = ogr.GT_SetZ(eGType)
+
+ return eGType
+
+# cf. ogr/ogr_core.h's enum OGRFieldType;
+def getFieldTypeCode(name):
+ if name is None:
+ raise Exception('getFieldTypeCode(None)')
+ name2 = name.lower()
+ if name2 == 'integer':
+ # simple 32bit integer
+ return ogr.OFTInteger
+ elif name2 == 'integerlist':
+ # List of 32bit integers
+ return ogr.OFTIntegerList
+ elif name2 == 'real':
+ # Double Precision floating point
+ return ogr.OFTReal
+ elif name2 == 'reallist':
+ # List of doubles
+ return ogr.OFTRealList
+ elif name2 == 'string':
+ # String of ASCII chars
+ return ogr.OFTString
+ elif name2 == 'stringlist':
+ # Array of strings
+ return ogr.OFTStringList
+ elif name2 == 'binary':
+ # Raw Binary data
+ return ogr.OFTBinary
+ elif name2 == 'date':
+ # Date
+ return ogr.OFTDate
+ elif name2 == 'time':
+ # Time
+ return ogr.OFTTime
+ elif name2 == 'datetime':
+ # Date and Time
+ return ogr.OFTDateTime
+ elif name2 == 'integer64':
+ # Single 64bit integer
+ return ogr.OFTInteger64
+ elif name2 == 'integer64list':
+ # List of 64bit integers
+ return ogr.OFTInteger64List
+ else:
+ raise Exception(f'Unknown field type "{name}"')
+
+# cf. ogr/ogr_core.h's enum OGRFieldSubType;
+def getFieldSubTypeCode(name):
+ if name is None:
+ raise Exception('getFieldSubTypeCode(None)')
+ name2 = name.lower()
+ if name2 == 'none':
+ # No subtype. This is the default value.
+ return ogr.OFSTNone
+ elif name2 == 'bool':
+ # Boolean integer. Only valid for OFTInteger and OFTIntegerList.
+ return ogr.OFSTBoolean
+ elif name2 == 'int16':
+ # Signed 16-bit integer. Only valid for OFTInteger and OFTIntegerList.
+ return ogr.OFSTInt16
+ elif name2 == 'float32':
+ # Single precision (32 bit) floating point. Only valid for OFTReal and OFTRealList.
+ return ogr.OFSTFloat32
+ elif name2 == 'json':
+ # JSON content. Only valid for OFTString.
+ return ogr.OFSTJSON
+ elif name2 == 'uuid':
+ # UUID string representation. Only valid for OFTString.
+ return ogr.OFSTUUID
+ else:
+ raise Exception(f'Unknown field subtype "{name2}"')
+
+# Validate layer creation options and schema. The schema is modified in
+# place with the parsed result.
+# (We need the driver of the output dataset to determine capability on
+# constraints.)
+def validateSchema(layers, drvo=None, lco_defaults=None):
+ # cache driver capabilities
+ drvoSupportsDefaultFields = getMetadataItem(drvo, GDAL_DCAP_DEFAULT_FIELDS)
+ drvoSupportsNotNULLFields = getMetadataItem(drvo, GDAL_DCAP_NOTNULL_FIELDS)
+ drvoSupportsUniqueFields = getMetadataItem(drvo, GDAL_DCAP_UNIQUE_FIELDS)
+
+ for layername, layerdef in layers.items():
+ create = layerdef.get('create', None)
+ if create is None or len(create) < 1:
+ logging.warning('Layer "%s" has no creation schema', layername)
+ continue
+
+ # prepend global layer creation options (dataset:create-layer-options)
+ # and build the option=value list
+ lco = create.get('options', None)
+ if lco_defaults is not None or lco is not None:
+ options = []
+ if lco_defaults is not None:
+ options += [ k + '=' + str(v) for k, v in lco_defaults.items() ]
+ if lco is not None:
+ options += [ k + '=' + str(v) for k, v in lco.items() ]
+ create['options'] = options
+
+ # parse geometry type
+ create['geometry-type'] = parseGeomType(create.get('geometry-type', None))
+
+ fields = create.get('fields', None)
+ if fields is None:
+ create['fields'] = []
+ else:
+ fields_set = set()
+ for idx, fld_def in enumerate(fields):
+ fld_name = fld_def.get('name', None)
+ if fld_name is None or fld_name == '':
+ raise Exception(f'Field #{idx} has no name')
+ if fld_name in fields_set:
+ raise Exception(f'Duplicate field "{fld_name}"')
+ fields_set.add(fld_name)
+
+ fld_def2 = { 'Name': fld_name }
+ for k, v in fld_def.items():
+ k2 = k.lower()
+ if k2 == 'name':
+ pass
+ elif k2 == 'alternativename' or k2 == 'alias':
+ fld_def2['AlternativeName'] = v
+ elif k2 == 'comment':
+ fld_def2['Comment'] = v
+
+ elif k2 == 'type':
+ fld_def2['Type'] = getFieldTypeCode(v)
+ elif k2 == 'subtype':
+ fld_def2['SubType'] = getFieldSubTypeCode(v)
+ elif k2 == 'tzflag':
+ pass # TODO
+ #fld_def2['TZFlag'] = v
+ elif k2 == 'width' and v is not None and isinstance(v, int):
+ fld_def2['Width'] = v
+ elif k2 == 'precision' and v is not None and isinstance(v, int):
+ fld_def2['Precision'] = v
+
+ # constraints
+ elif k2 == 'default':
+ if drvoSupportsDefaultFields:
+ fld_def2['Default'] = v
+ else:
+ logging.warning('%s driver lacks GDAL_DCAP_DEFAULT_FIELDS support',
+ drvo.ShortName)
+ elif k2 == 'nullable' and v is not None and isinstance(v, bool):
+ if drvoSupportsNotNULLFields:
+ fld_def2['Nullable'] = v
+ else:
+ logging.warning('%s driver lacks GDAL_DCAP_NOTNULL_FIELDS support',
+ drvo.ShortName)
+ elif k2 == 'unique' and v is not None and isinstance(v, bool):
+ if drvoSupportsUniqueFields:
+ fld_def2['Unique'] = v
+ else:
+ logging.warning('%s driver lacks GDAL_DCAP_UNIQUE_FIELDS support',
+ drvo.ShortName)
+ else:
+ raise Exception(f'Field "{fld_name}" has unknown key "{k}"')
+
+ fields[idx] = fld_def2
+
+# Return the decoded Spatial Reference System
+def getSRS(srs_str):
+ if srs_str is None:
+ return
+ srs = osr.SpatialReference()
+ if srs_str.startswith('EPSG:'):
+ code = int(srs_str.removeprefix('EPSG:'))
+ srs.ImportFromEPSG(code)
+ else:
+ raise Exception(f'Unknown SRS {srs_str}')
+ logging.debug('Default SRS: "%s" (%s)', srs.ExportToProj4(), srs.GetName())
+ return srs
+
+# Validate the output layer against the provided SRS and creation options
+def validateOutputLayer(lyr, srs=None, options=None):
+ ok = True
+
+ # ensure the output SRS is equivalent
+ if srs is not None:
+ srs2 = lyr.GetSpatialRef()
+ # cf. apps/ogr2ogr_lib.cpp
+ srs_options = [
+ 'IGNORE_DATA_AXIS_TO_SRS_AXIS_MAPPING=YES',
+ 'CRITERION=EQUIVALENT'
+ ]
+ if not srs.IsSame(srs2, srs_options):
+ logging.warning('Output layer "%s" has SRS %s,\nexpected %s',
+ lyr.GetName(),
+ srs2.ExportToPrettyWkt(),
+ srs.ExportToPrettyWkt())
+ ok = False
+
+ if options is None:
+ return ok
+
+ layerDefn = lyr.GetLayerDefn()
+ n = layerDefn.GetGeomFieldCount()
+ if n != 1:
+ logging.warning('Output layer "%s" has %d != 1 geometry fields', layername, n)
+
+ geom_type1 = lyr.GetGeomType()
+ geom_type2 = options['geometry-type']
+ if geom_type1 != geom_type2:
+ logging.warning('Output layer "%s" has geometry type #%d (%s), expected #%d (%s)',
+ lyr.GetName(),
+ geom_type1, ogr.GeometryTypeToName(geom_type1),
+ geom_type2, ogr.GeometryTypeToName(geom_type2))
+ ok = False
+
+ fields = options.get('fields', None)
+ if fields is not None:
+ for fld in fields:
+ fldName = fld['Name']
+
+ idx = layerDefn.GetFieldIndex(fldName)
+ if idx < 0:
+ logging.warning('Output layer "%s" has no field named "%s"',
+ lyr.GetName(), fldName)
+ ok = False
+ continue
+ defn = layerDefn.GetFieldDefn(idx)
+
+ if 'AlternativeName' in fld:
+ v1 = defn.GetAlternativeName()
+ v2 = fld['AlternativeName']
+ if v1 != v2:
+ logging.warning('Field "%s" has AlternativeName="%s", expected "%s"',
+ fldName, v1, v2)
+ ok = False
+
+ if 'Comment' in fld:
+ v1 = defn.GetComment()
+ v2 = fld['Comment']
+ if v1 != v2:
+ logging.warning('Field "%s" has Comment="%s", expected "%s"',
+ fldName, v1, v2)
+ ok = False
+
+ if 'Type' in fld:
+ v1 = defn.GetType()
+ v2 = fld['Type']
+ if v1 != v2:
+ logging.warning('Field "%s" has Type=%d (%s), expected %d (%s)',
+ fldName,
+ v1, ogr.GetFieldTypeName(v1),
+ v2, ogr.GetFieldTypeName(v2))
+ ok = False
+
+ if 'SubType' in fld:
+ v1 = defn.GetSubType()
+ v2 = fld['SubType']
+ if v1 != v2:
+ logging.warning('Field "%s" has SubType=%d (%s), expected %d (%s)',
+ fldName,
+ v1, ogr.GetFieldSubTypeName(v1),
+ v2, ogr.GetFieldSubTypeName(v2))
+ ok = False
+
+ if 'TZFlag' in fld:
+ v1 = defn.GetTZFlag()
+ v2, n2 = fld['TZFlag']
+ if v1 != v2:
+ logging.warning('Field "%s" has TZFlag=%d, expected %d (%s)',
+ fldName, v1, v2, n2)
+ ok = False
+
+ if 'Precision' in fld:
+ v1 = defn.GetPrecision()
+ v2 = fld['Precision']
+ if v1 != v2:
+ logging.warning('Field "%s" has Precision=%d, expected %d',
+ fldName, v1, v2)
+ ok = False
+
+ if 'Width' in fld:
+ v1 = defn.GetWidth()
+ v2 = fld['Width']
+ if v1 != v2:
+ logging.warning('Field "%s" has Width=%d, expected %d',
+ fldName, v1, v2)
+ ok = False
+
+ if 'Default' in fld:
+ v1 = defn.GetDefault()
+ v2 = fld['Default']
+ if v1 != v2:
+ logging.warning('Field "%s" has Default="%s", expected "%s"',
+ fldName, v1, v2)
+ ok = False
+
+ if 'Nullable' in fld:
+ v1 = bool(defn.IsNullable())
+ v2 = fld['Nullable']
+ if v1 != v2:
+ logging.warning('Field "%s" has Nullable=%s, expected %s',
+ fldName, v1, v2)
+ ok = False
+
+ if 'Unique' in fld:
+ v1 = bool(defn.IsUnique())
+ v2 = fld['Unique']
+ if v1 != v2:
+ logging.warning('Field "%s" has Unique=%s, expected %s',
+ fldName, v1, v2)
+ ok = False
+
+ return ok
+
+# Create output layer
+def createOutputLayer(ds, layername, srs=None, options=None):
+ logging.info('Creating new destination layer "%s"', layername)
+ geom_type = options['geometry-type']
+ lco = options.get('options', None)
+
+ drv = ds.GetDriver()
+ if geom_type != ogr.wkbNone and drv.ShortName == 'PostgreSQL':
+ # “Important to set to 2 for 2D layers as it has constraints on the geometry
+ # dimension during loading.”
+ # — https://gdal.org/drivers/vector/pg.html#layer-creation-options
+ if ogr.GT_HasM(geom_type):
+ if ogr.GT_HasZ(geom_type):
+ dim = 'XYZM'
+ else:
+ dim = 'XYM'
+ elif ogr.GT_HasZ(geom_type):
+ dim = '3'
+ else:
+ dim = '2'
+ if lco is None:
+ lco = []
+ lco = ['dim=' + dim] + lco # prepend dim=
+
+ kwargs = { 'geom_type': geom_type }
+ if srs is not None:
+ kwargs['srs'] = srs
+ if lco is not None:
+ kwargs['options'] = lco
+ logging.debug('CreateLayer(%s, geom_type="%s"%s%s)', layername,
+ ogr.GeometryTypeToName(geom_type),
+ ', srs="' + kwargs['srs'].GetName() + '"' if 'srs' in kwargs else '',
+ ', options=' + str(kwargs['options']) if 'options' in kwargs else '')
+ lyr = dso.CreateLayer(layername, **kwargs)
+ if lyr is None:
+ raise Exception(f'Could not create destination layer "{layername}"')
+
+ fields = options['fields']
+ if len(fields) > 0 and not lyr.TestCapability(ogr.OLCCreateField):
+ raise Exception(f'Destination layer "{layername}" lacks field creation capability')
+
+ # set up output schema
+ for fld in fields:
+ fldName = fld['Name']
+ defn = ogr.FieldDefn()
+ defn.SetName(fldName)
+
+ if 'AlternativeName' in fld:
+ v = fld['AlternativeName']
+ logging.debug('Set AlternativeName=%s on output field "%s"', str(v), fldName)
+ defn.SetAlternativeName(v)
+
+ if 'Comment' in fld:
+ v = fld['Comment']
+ logging.debug('Set Comment=%s on output field "%s"', str(v), fldName)
+ defn.SetComment(v)
+
+ if 'Type' in fld:
+ v = fld['Type']
+ logging.debug('Set Type=%d (%s) on output field "%s"',
+ v, ogr.GetFieldTypeName(v), fldName)
+ defn.SetType(v)
+
+ if 'SubType' in fld:
+ v = fld['SubType']
+ logging.debug('Set SubType=%d (%s) on output field "%s"',
+ v, ogr.GetFieldSubTypeName(v), fldName)
+ defn.SetSubType(v)
+
+ if 'TZFlag' in fld:
+ v, n = fld['TZFlag']
+ logging.debug('Set TZFlag=%d (%s) on output field "%s"', v, n, fldName)
+ defn.SetTZFlag(v)
+
+ if 'Precision' in fld:
+ v = fld['Precision']
+ logging.debug('Set Precision=%d on output field "%s"', v, fldName)
+ defn.SetPrecision(v)
+
+ if 'Width' in fld:
+ v = fld['Width']
+ logging.debug('Set Width=%d on output field "%s"', v, fldName)
+ defn.SetWidth(v)
+
+ if 'Default' in fld:
+ v = fld['Default']
+ logging.debug('Set Default=%s on output field "%s"', v, fldName)
+ defn.SetDefault(v)
+
+ if 'Nullable' in fld:
+ v = fld['Nullable']
+ logging.debug('Set Nullable=%s on output field "%s"', v, fldName)
+ defn.SetNullable(v)
+
+ if 'Unique' in fld:
+ v = fld['Unique']
+ logging.debug('Set Unique=%s on output field "%s"', v, fldName)
+ defn.SetUnique(v)
+
+ if lyr.CreateField(defn, approx_ok=False) != GDAL_CE_None:
+ raise Exception('Could not create field "{fldName}"')
+ logging.debug('Added field "%s" to output layer "%s"', fldName, layername)
+
+ # flush before calling StartTransaction() so we're not tryingn to
+ # rollback changes on a non-existing table
+ lyr.SyncToDisk()
+ return lyr
+
+# Setup output field mapping, modifying the sources dictionary in place.
+def setOutputFieldMap(defn, sources):
+ fieldMap = {}
+ n = defn.GetFieldCount()
+ for i in range(n):
+ fld = defn.GetFieldDefn(i)
+ fldName = fld.GetName()
+ fieldMap[fldName] = i
+
+ for source in sources:
+ src = source['source']['path']
+ fieldMap2 = source['import'].get('fields', None)
+ if fieldMap2 is None:
+ fieldMap2 = {}
+ else:
+ if isinstance(fieldMap2, list):
+ # convert list to identity dictionary
+ fieldMap2 = { fld: fld for fld in fieldMap2 }
+
+ for ifld, ofld in fieldMap2.items():
+ i = fieldMap.get(ofld, None)
+ if i is None:
+ raise Exception(f'Ouput layer has no field named "{ofld}"')
+ fieldMap2[ifld] = i
+
+ source['import']['fields'] = fieldMap2
+
+ return fieldMap
+
+# Escape the given identifier, cf.
+# swig/python/gdal-utils/osgeo_utils/samples/validate_gpkg.py:_esc_id()
+def escapeIdentifier(identifier):
+ if '\x00' in identifier:
+ raise Exception(f'Invalid identifier "{identifier}"')
+ # SQL:1999 delimited identifier
+ return '"' + identifier.replace('"', '""') + '"'
+
+# Clear the given layer (wipe all its features)
+def clearLayer(ds, lyr):
+ n = -1
+ if lyr.TestCapability(ogr.OLCFastFeatureCount):
+ n = lyr.GetFeatureCount(force=0)
+ if n == 0:
+ # nothing to clear, we're good
+ return
+ layername_esc = escapeIdentifier(lyr.GetName())
+
+ # GDAL <3.9 doesn't have lyr.GetDataset() so we pass the DS along with the layer
+ drv = ds.GetDriver()
+ if drv.ShortName == 'PostgreSQL':
+ # https://www.postgresql.org/docs/15/sql-truncate.html
+ query = 'TRUNCATE TABLE {table} CONTINUE IDENTITY RESTRICT'
+ op = 'Truncating'
+ else:
+ query = 'DELETE FROM {table}'
+ op = 'Clearing'
+ logging.info('%s table %s (former feature count: %s)', op,
+ layername_esc, str(n) if n >= 0 else 'unknown')
+ ds.ExecuteSQL(query.format(table=layername_esc))
+
+# Extract an archive file into the given destination directory.
+def extractArchive(path, destdir, fmt=None, patterns=None, exact_matches=None):
+ if fmt is None:
+ suffix = path.suffix
+ if suffix is None or suffix == '' or not suffix.startswith('.'):
+ raise Exception(f'Could not infer archive format from "{path}"')
+ fmt = suffix.removeprefix('.')
+
+ fmt = fmt.lower()
+ logging.debug('Unpacking %s archive %s into %s', fmt, path, destdir)
+
+ if fmt == 'zip':
+ from zipfile import ZipFile
+ logging.debug('Opening %s as ZipFile', path)
+ with ZipFile(path, mode='r') as z:
+ namelist = listArchiveMembers(z.namelist(),
+ patterns=patterns, exact_matches=exact_matches)
+ z.extractall(path=destdir, members=namelist)
+ else:
+ raise Exception(f'Unknown archive format "{fmt}"')
+
+# List archive members matching the given parterns and/or exact matches
+def listArchiveMembers(namelist, patterns, exact_matches=None):
+ if patterns is None and exact_matches is None:
+ # if neither patterns nor exact_matches are given we'll extract the entire archive
+ return namelist
+ if patterns is None:
+ patterns = []
+ if exact_matches is None:
+ exact_matches = []
+
+ members = []
+ for name in namelist:
+ ok = False
+ if name in exact_matches:
+ # try exact matches first
+ logging.debug('Listed archive member %s (exact match)', name)
+ members.append(name)
+ ok = True
+ continue
+ # if there are no exact matches, try patterns one by one in the supplied order
+ for pat in patterns:
+ if fnmatchcase(name, pat):
+ logging.debug('Listed archive member %s (matching pattern "%s")', name, pat)
+ members.append(name)
+ ok = True
+ break
+ if not ok:
+ logging.debug('Ignoring archive member %s', name)
+ return members
+
+# Import a source layer
+def importSource(lyr, path=None, unar=None, args={}, cachedir=None, extent=None):
+ if unar is None:
+ return importSource2(lyr, str(path), args=args,
+ basedir=cachedir, extent=extent)
+
+ if cachedir is not None:
+ path = cachedir.joinpath(path)
+
+ ds_srcpath = Path(args['path'])
+ if ds_srcpath.is_absolute():
+ # treat absolute paths as relative to the archive root
+ logging.warning('%s is absolute, removing leading anchor', ds_srcpath)
+ ds_srcpath = ds_srcpath.relative_to(ds_srcpath.anchor)
+ ds_srcpath = str(ds_srcpath)
+
+ with tempfile.TemporaryDirectory() as tmpdir:
+ logging.debug('Created temporary directory %s', tmpdir)
+ extractArchive(path, tmpdir,
+ fmt=unar.get('format', None),
+ patterns=unar.get('patterns', None),
+ exact_matches=[ds_srcpath])
+ return importSource2(lyr, ds_srcpath, args=args,
+ basedir=Path(tmpdir), extent=extent)
+
+# Import a source layer (already extracted)
+# This is more or less like ogr2ogr/GDALVectorTranslate() but we roll
+# out our own (slower) version because GDALVectorTranslate() insists in
+# calling StartTransaction() https://github.com/OSGeo/gdal/issues/3403
+# while we want a single transaction for the entire desination layer,
+# including truncation, source imports, and metadata changes.
+def importSource2(lyr_dst, path, args={}, basedir=None, extent=None):
+ kwargs, _ = setOpenExArgs(args, flags=GDAL_OF_READONLY|GDAL_OF_VERBOSE_ERROR)
+ path2 = path if basedir is None else str(basedir.joinpath(path))
+
+ logging.debug('OpenEx(%s, %s)', path2, str(kwargs))
+ ds = gdal.OpenEx(path2, **kwargs)
+ if ds is None:
+ raise Exception(f'Could not open {path2}')
+
+ layername = args.get('layername', None)
+ if layername is None:
+ idx = 0
+ lyr = ds.GetLayerByIndex(idx)
+ msg = '#' + str(idx)
+ if lyr is not None:
+ layername = lyr.GetName()
+ msg += ' ("' + layername + '")'
+ else:
+ lyr = ds.GetLayerByName(layername)
+ msg = '"' + layername + '"'
+ if lyr is None:
+ raise Exception(f'Could not get requested layer {msg} from {path2}')
+
+ logging.info('Importing layer %s from "%s"', msg, path)
+ canIgnoreFields = lyr.TestCapability(ogr.OLCIgnoreFields)
+
+ defn = lyr.GetLayerDefn()
+ n = defn.GetGeomFieldCount()
+ if n != 1: # XXX
+ logging.warning('Source layer "%s" has %d != 1 geometry fields', layername, n)
+
+ n = defn.GetFieldCount()
+ fieldMap = [-1] * n
+ fields = args['fields']
+ fieldSet = set()
+
+ for i in range(n):
+ fld = defn.GetFieldDefn(i)
+ fldName = fld.GetName()
+ fieldMap[i] = v = fields.get(fldName, -1)
+ fieldSet.add(fldName)
+
+ if v < 0 and canIgnoreFields:
+ # call SetIgnored() on unwanted source fields
+ logging.debug('Set Ignored=True on output field "%s"', fldName)
+ fld.SetIgnored(True)
+ defn = None
+
+ logging.debug('Field map: %s', str(fieldMap))
+ for fld in fields:
+ if not fld in fieldSet:
+ logging.warning('Source layer "%s" has no field named "%s", ignoring', layername, fld)
+
+ count0 = -1
+ if lyr.TestCapability(ogr.OLCFastFeatureCount):
+ count0 = lyr.GetFeatureCount(force=0)
+
+ count1 = -1
+ if args.get('spatial-filter', True) and extent is not None:
+ # TODO reprojection
+ lyr.SetSpatialFilterRect(*extent)
+ if lyr.TestCapability(ogr.OLCFastFeatureCount):
+ count1 = lyr.GetFeatureCount(force=0)
+
+ if count0 >= 0:
+ if count1 >= 0:
+ logging.info('Source layer "%s" has %d features (of which %d within extent)',
+ layername, count0, count1)
+ else:
+ logging.info('Source layer "%s" has %d features', layername, count0)
+
+ #print(lyr.GetSupportedSRSList())
+ # TODO reprojection
+
+ defn_dst = lyr_dst.GetLayerDefn()
+ eGType_dst = defn_dst.GetGeomType()
+
+ n = 0
+ #mismatch = 0
+ feature = lyr.GetNextFeature()
+ while feature is not None:
+ #print(lyr.GetFeaturesRead())
+ feature2 = ogr.Feature(defn_dst)
+ # TODO MakeValid, force geometry, dimension
+ feature2.SetFromWithMap(feature, False, fieldMap)
+
+ geom = feature2.GetGeometryRef()
+ eGType = geom.GetGeometryType()
+ if eGType != eGType_dst:
+ if eGType == ogr.wkbPolygon and eGType_dst == ogr.wkbMultiPolygon:
+ geom = ogr.ForceToMultiPolygon(geom)
+ else:
+ raise Exception('Not implemented')
+ feature2.SetGeometryDirectly(geom)
+ #mismatch += 1
+
+ lyr_dst.CreateFeature(feature2)
+ n += 1
+ feature = lyr.GetNextFeature()
+
+ #print(mismatch)
+ lyr = None
+ logging.info('%d features imported from source layer "%s"', n, layername)
+
+
+if __name__ == '__main__':
+ logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
+
+ parser = argparse.ArgumentParser(description='Extract and import GIS layers.')
+ parser.add_argument('--cachedir', default=None,
+ help=f'cache directory (default: {os.curdir})')
+ parser.add_argument('--debug', action='count', default=0,
+ help=argparse.SUPPRESS)
+ parser.add_argument('groupname', nargs='*', help='group layer name(s) to process')
+ args = parser.parse_args()
+
+ if args.debug > 0:
+ logging.getLogger().setLevel(logging.DEBUG)
+ if args.debug > 1:
+ gdal.ConfigurePythonLogging(enable_debug=True)
+
+ common.load_config(groupnames=None if args.groupname == [] else args.groupname)
+
+ # validate configuration
+ if 'dataset' not in common.config:
+ raise Exception('Configuration does not specify output dataset')
+
+ layers = common.config.get('layers', {})
+ for layername, layerdefs in layers.items():
+ for idx, layerdef in enumerate(layerdefs['sources']):
+ importdef = layerdef.get('import', None)
+ if importdef is None:
+ raise Exception(f'Output layer "{layername}" source #{idx} has no import definition')
+
+ sourcedef = layerdef.get('source', None)
+ unar = None if sourcedef is None else sourcedef.get('unar', None)
+ src = None if sourcedef is None else sourcedef['cache'].get('path', None)
+
+ ds_srcpath = importdef.get('path', None)
+ if src is None and unar is None and ds_srcpath is not None:
+ # fallback to importe:path if there is no unarchiving receipe
+ src = Path(ds_srcpath)
+ if unar is not None and ds_srcpath is None:
+ raise Exception(f'Output layer "{layername}" source #{idx} has no import source path')
+ if src is None:
+ raise Exception(f'Output layer "{layername}" source #{idx} has no source path')
+ layerdef['source'] = { 'path': src, 'unar': unar }
+
+ # set global GDAL/OGR configuration options
+ for pszKey, pszValue in common.config.get('GDALconfig', {}).items():
+ logging.debug('gdal.SetConfigOption(%s, %s)', pszKey, pszValue)
+ gdal.SetConfigOption(pszKey, pszValue)
+
+ # open output dataset (possibly create it first)
+ dso = openOutputDS(common.config['dataset'])
+
+ validateSchema(layers,
+ drvo=dso.GetDriver(),
+ lco_defaults=common.config['dataset'].get('create-layer-options', None))
+
+ # get configured Spatial Reference System and extent
+ srs = getSRS(common.config.get('SRS', None))
+ extent = common.config.get('extent', None)
+ if extent is not None:
+ logging.debug('Configured extent in %s: %s',
+ srs.GetName(), ', '.join(map(str, extent)))
+
+ cachedir = Path(args.cachedir) if args.cachedir is not None else None
+ rv = 0
+ for layername, layerdef in layers.items():
+ sources = layerdef['sources']
+ if sources is None or len(sources) < 1:
+ logging.warning('Output layer "%s" has no definition, skipping', layername)
+ continue
+
+ logging.info('Processing output layer "%s"', layername)
+ transaction = False
+ try:
+ # get output layer
+ outLayerIsNotEmpty = True
+ lco = layerdef.get('create', None)
+ lyr = dso.GetLayerByName(layername)
+ if lyr is not None:
+ # TODO dso.DeleteLayer(layername) if --overwrite and dso.TestCapability(ogr.ODsCDeleteLayer)
+ # (Sets OVERWRITE=YES for PostgreSQL and GPKG.)
+ validateOutputLayer(lyr, srs=srs, options=lco)
+ # TODO bail out if all source files are older than lyr's last_change
+ elif not dso.TestCapability(ogr.ODsCCreateLayer):
+ raise Exception(f'Output driver {dso.GetDriver().ShortName} does not support layer creation')
+ elif lco is None or len(lco) < 1:
+ raise Exception(f'Missing schema for new output layer "{layername}"')
+ else:
+ lyr = createOutputLayer(dso, layername, srs=srs, options=lco)
+ outLayerIsNotEmpty = False
+
+ if not lyr.TestCapability(ogr.OLCSequentialWrite):
+ raise Exception(f'Output layer "{layername}" has no working CreateFeature() method')
+
+ # setup output field mapping in the sources dictionary
+ setOutputFieldMap(lyr.GetLayerDefn(), sources)
+
+ # start transaction if possible
+ if lyr.TestCapability(ogr.OLCTransactions):
+ logging.debug('Starting transaction')
+ transaction = lyr.StartTransaction() == ogr.OGRERR_NONE
+ else:
+ logging.warning('Unsafe update, output layer "%s" does not support transactions', layername)
+
+ if outLayerIsNotEmpty:
+ # clear non-empty output layer
+ clearLayer(dso, lyr)
+
+ if lyr.SetMetadataItem('DESCRIPTION', layerdef.get('description', '')) != GDAL_CE_None:
+ logging.warning('Could not set description metadata')
+
+ for source in sources:
+ # import source layers
+ importSource(lyr, **source['source'], args=source['import'],
+ cachedir=cachedir, extent=extent)
+
+ if transaction:
+ # commit transaction
+ logging.debug('Committing transaction')
+ transaction = False
+ if lyr.CommitTransaction() != ogr.OGRERR_NONE:
+ logging.error('Could not commit transaction')
+ rv = 1
+
+ except Exception:
+ if transaction:
+ logging.error('Exception occured in transaction, rolling back')
+ try:
+ if lyr.RollbackTransaction() != ogr.OGRERR_NONE:
+ logging.error('Could not rollback transaction')
+ except RuntimeError:
+ logging.exception('Could not rollback transaction')
+ logging.exception('Could not import layer "%s"', layername)
+ rv = 1
+
+ finally:
+ # close output layer
+ lyr = None
+
+ dso = None
+ srs = None
+ exit(rv)