From 5dd5b429f9acdff8e727baa75bbda0b687b88926 Mon Sep 17 00:00:00 2001 From: Guilhem Moulin Date: Thu, 16 May 2024 20:14:52 +0200 Subject: Add `webmap-import` script to import source layers. There is still a few things to do (such as reprojection and geometry changes) but it's mostly working. We roll out our own ogr2ogr/GDALVectorTranslate()-like function version because GDALVectorTranslate() insists in calling StartTransaction() https://github.com/OSGeo/gdal/issues/3403 while we want a single transaction for the entire desination layer, including truncation, source imports, and metadata changes. Surprisingly our version is not much slower than the C++ one. Importing the 157446 (of 667034) features from sksUtfordAvverk-2000-2015.shp takes 14.3s while ogr2ogr -f PostgreSQL \ -doo ACTIVE_SCHEMA=postgis \ --config PG_USE_COPY YES \ --config OGR_TRUNCATE YES \ -append \ -fieldmap "0,-1,-1,-1,-1,1,2,3,4,5,6,7,8,9,10,11,12,13" \ -nlt MULTIPOLYGON -nlt PROMOTE_TO_MULTI \ -gt unlimited \ -spat 110720 6927136 1159296 7975712 \ -nln "sks:UtfordAvverk" \ PG:"dbname='webmap' user='webmap_import'" \ /tmp/x/sksUtfordAvverk-2000-2015.shp \ sksUtfordAvverk-2000-2015 takes 14s. Merely opening /tmp/x/sksUtfordAvverk-2000-2015.shp and looping through its (extent-filtered) features results in a runtime of 4.3s. --- config.yml | 528 ++++++++++++++++++++++++++++-- webmap-import | 1017 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1511 insertions(+), 34 deletions(-) create mode 100755 webmap-import diff --git a/config.yml b/config.yml index 772cb0e..899d0ca 100644 --- a/config.yml +++ b/config.yml @@ -1,18 +1,20 @@ --- -# Spatial Reference System and (2D) extent +# Spatial Reference System SRS: 'EPSG:3006' # a.k.a. SWEREF99 TM, cf. https://epsg.io/3006 + +# (2D) extent extent: - # Lantmäteriet uses a tile-scheme where the origin (upper-left corner) is at - # N8500000 E-1200000 (SWEREF99 TM), where each tile is 256×256 pixels, and - # where the resolution at level 0 is 4096m per pixel. + # Lantmäteriet uses a tile-scheme where the origin (upper-left corner) is at N8500000 + # E-1200000 (SWEREF99 TM), where each tile is 256×256 pixels, and where the resolution + # at level 0 is 4096m per pixel. # # https://www.lantmateriet.se/globalassets/geodata/geodatatjanster/tb_twk_visning_cache_v1.1.0.pdf # https://www.lantmateriet.se/globalassets/geodata/geodatatjanster/tb_twk_visning-oversiktlig_v1.0.3.pdf # - # We set the extent to a 4×4 tiles square at level 2 somehow centered on - # Norrbotten and Västerbotten. This represent a TILEROW (x) offset of 5, and - # a TILECOL (y) offset of 2. A 4×8 tiles rectangle with the same upper-left - # and upper-right coordinates can be used to cover the entire country. + # We set the extent to a 4×4 tiles square at level 2 somehow centered on Norrbotten and + # Västerbotten. This represent a TILEROW (x) offset of 5, and a TILECOL (y) offset of 2. + # A 4×8 tiles rectangle with the same upper-left and upper-right coordinates can be used + # to cover the entire country. - 110720 - 6927136 # alternatively 5878560 for the entire country - 1159296 @@ -21,9 +23,12 @@ extent: # Take User-Agent value from Tor Browser 13.0.15 (based on Mozilla Firefox 115.11.0esr) User-Agent: 'Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/115.0' -# Map group names to one or more pattern of layer name(s). This is a -# convenience feature for systemd template units. +# Map group names to one or more pattern of layer name(s). This is a convenience feature +# for systemd template units. layer-groups: + administrativindelning: + - counties + - municipalities nvr: 'nvr:*' sks: 'sks:*' st: 'st:*' @@ -31,43 +36,260 @@ layer-groups: mrr: 'mrr:*' +# Global GDAL/OGR configuration options, cf. https://gdal.org/user/configoptions.html and +# the driver-specific configuration options such as +# https://gdal.org/drivers/vector/pg.html#configuration-options or +# https://gdal.org/drivers/vector/gpkg.html#configuration-options +GDALconfig: + PG_USE_COPY: 'YES' + + +dataset: + # Path/URI of the output (destination) dataset. + path: 'PG:' + # Format (optional) + format: PostgreSQL + + # Whether the dataset should be created if it does not exist. (Creation will fail if + # the driver doesn't support it.) + #create: true + #create-options: + # Optional driver-specific dataset creation options, cf. for instance + # https://gdal.org/drivers/vector/gpkg.html#dataset-creation-options + #VERSION: 'AUTO' + + # Optional driver-specific dataset open options, cf. for instance + # https://gdal.org/drivers/vector/pg.html#dataset-open-options or + # https://gdal.org/drivers/vector/gpkg.html#dataset-open-options + open-options: + ACTIVE_SCHEMA: postgis + USER: webmap_import + DBNAME: webmap + + # Optional dictionary of default layer creation options, cf. + # https://gdal.org/drivers/vector/pg.html#layer-creation-options or + # https://gdal.org/drivers/vector/gpkg.html#layer-creation-options + # These apply to all layers. + create-layer-options: + LAUNDER: 'NO' + EXTRACT_SCHEMA_FROM_LAYER_NAME: 'NO' + + layers: -# # Dictionary of layer names and source receipes in the output dataset. -# # If a layer has a single source, then the sources singleton can be -# # inlined. +# # Dictionary of layer names and source receipes in the output dataset. If a layer +# # has a single source, then the sources singleton can be inlined. # layer1_name: +# description: A string describing that layer +# create: +# # Geometry Type for the output layer. Possible values are like ogr2ogr(1)'s -nlt +# # value, namely one of NONE, GEOMETRY, POINT, LINESTRING, POLYGON, MULTIPOINT, +# # MULTILINESTRING, MULTIPOLYGON, GEOMETRYCOLLECTION, CIRCULARSTRING, COMPOUNDCURVE, +# # CURVEPOLYGON, MULTICURVE, MULTISURFACE. Add Z, M, or ZM to the type name to +# # specify coordinates with elevation, measure, or both elevation and measure. +# geometry-type: MULTIPOLYGON +# +# # Dictionary of layer creation options, cf. +# # https://gdal.org/drivers/vector/pg.html#layer-creation-options or +# # https://gdal.org/drivers/vector/gpkg.html#layer-creation-options +# # dataset:create-layer-options is prepended when defined. +# options: +# #LAUNDER: 'NO' +# +# # The layer schema: a list of field names, types and constraints. +# fields: +# - # Feature field name +# Name: field_name +# +# # Alternative field name (alias). This is a metadata style attribute only: the +# # alternative name cannot be used in place of the actual field name during SQL +# # queries or other field name dependent API calls. +# AlternativeName: field_name_alias +# +# # Description/comment +# Comment: A string describing that field +# +# # Feature field type (optional), one of: +# # * Integer (simple 32bit integer); +# # * IntegerList (list of 32bit integers); +# # * Real (double precision floating point); +# # * RealList (list of doubles); +# # * String (string of characters); +# # * StringList (array of strings); +# # * Binary (raw binary data); +# # * Date (date); +# # * Time (time); +# # * DateTime (date and time); +# # * Integer64 (64bit integer); or +# # * Integer64List (list of 64bit integers). +# Type: String +# +# # Feature field subtype (optional), one of: +# # * None (no subtype, this is the default value); +# # * Bool (boolean integer, only valid for Integer and IntegerList types); +# # * Int16 (signed 16-bit integer, only valid for Integer and IntegerList +# # types); +# # * Float32 (single precision [32 bit] floating point, only valid for Real +# # and RealList types); +# # * JSON (JSON content, only valid for String); or +# # * UUID (UUID string representation, only valid for String). +# SubType: UUID +# +# # Default timezone (optional), for Time and DateTime types +# #TZFlag: local +# +# # Formatting precision for this field in characters (optional, this should +# # normally be zero for fields of types other than Real) +# Precision: 0 +# +# # Formatting width for this field in characters (optional) +# Width: 36 +# +# # Default field value (optional); accepted values are NULL, a numeric value, a +# # literal value enclosed between single quote characters (and inner single +# # quote characters escaped by repetition of the single quote character), +# # CURRENT_TIMESTAMP, CURRENT_TIME, CURRENT_DATE or a driver specific +# # expression) +# Default: None +# +# # Whether this field has no not-NULL constraint (optional) +# Nullable: false +# +# # Whether this field has a unique constraint (optional) +# Unique: true +# # sources: # - source: # download: -# # source:download:url: URL from where to download the source file. -# # source:download can be used as an alias when source:download:url is -# # its only subkey. +# # URL from where to download the source file. source:download can be used as +# # an alias when source:download:url is its only subkey. # url: 'https://example.net/path/to/layer.zip' -# # source:download:max-size: The maximum size to download in bytes. An -# # error is raised when the payload size exceeds this value. +# +# # The maximum size to download in bytes. An error is raised when the payload +# # size exceeds this value. # # (Default: 67108864, in other words 64MiB) # max-size: 1073741824 -# # source:download:module: Basename of the download module to use for -# # that layer. +# +# # Basename of the download module to use for that layer. # module: webmap-download +# # cache: -# # source:cache:path: Local path (relative to --cachedir) where to -# # (atomically) save the downloaded file. The same path can be used by -# # multiple entries as long as their pairs (source:download:url, -# # source:download:module) match. Any parent directories are created if -# # needed. -# # If the path is empty or ends with a '/' character then it treated as a -# # directory and the last component of source:download:url implicitly -# # used as filename. In that case an error is raised if no filename can -# # be derived from the URL. -# # source:cache can be used as an alias when source:cache:path is its -# # only subkey. +# # Local path (relative to --cachedir) where to (atomically) save the +# # downloaded file. The same path can be used by multiple entries as long as +# # their pairs (source:download:url, source:download:module) match. Any +# # parent directories are created if needed. If the path is empty or ends +# # with a '/' character then it treated as a directory and the last component +# # of source:download:url implicitly used as filename. In that case an error +# # is raised if no filename can be derived from the URL. source:cache can be +# # used as an alias when source:cache:path is its only subkey. # path: path/to/sub/dir/ -# # source:cache:max-age: Maximum age for caching, in number of seconds -# # ago. If source:cache:path exists and its mtime and/or ctime is newer -# # than this value then no HTTP query is made. +# +# # Maximum age for caching, in number of seconds ago. If source:cache:path +# # exists and its mtime and/or ctime is newer than this value then no HTTP +# # query is made. # # (Default: 21600, in other words 6h) # max-age: 86400 +# +# # Optional extracting receipe for archived/compressed sources +# unar: +# # The archiving format (only 'zip' is currently supported) +# format: zip +# +# # glob(3)-patterns to extract from the archive. import:path is always +# # extracted as an exact match. +# patterns: +# - 'path/to/source/layer.*' +# +# import: +# # Path for the dataset holding the source layer (relative to the archive root +# # for archived sources, and to --cachedir otherwise). The value is optional +# # for non-archived sources, and defaults to source:cache:path if omitted. +# path: path/to/source/layer.shp +# +# # Format of the source layer to limit allowed driver when opening the dataset. +# format: ESRI Shapefile +# +# # Name of the source layer in the source dataset. If omitted, its 0th layer is +# # considered. +# layername: source_layer +# +# # Whether to apply the spatial filter when importing. Default: True. +# spatial-filter: true +# +# # Mapping of source fields to destination fields. A list translates into an +# identity mapping. +# fields: +# - field_name1 +# - field_name2 +# fields: +# source_field_name2: field_name2 +# source_field_name2: field_name2 + + 'counties': + description: Sveriges län + create: + geometry-type: MULTIPOLYGON + fields: + - name: objektidentitet + type: String + subtype: UUID + unique: true + #width: 36 + - name: skapad + type: DateTime + #TZFlag: TODO + - name: lanskod + type: Integer + subtype: Int16 + unique: true + nullable: false + source: + # https://www.lantmateriet.se/sv/geodata/vara-produkter/produktlista/topografi-250-nedladdning-vektor/ + cache: administrativindelning_sverige.zip + unar: + format: zip + import: + path: administrativindelning_sverige.gpkg + format: GPKG + layername: lansyta + spatial-filter: false + fields: + - objektidentitet + - skapad + - lanskod + + 'municipalities': + description: Sveriges kommuner + create: + geometry-type: MULTIPOLYGON + fields: + - name: objektidentitet + type: String + subtype: UUID + unique: true + #width: 36 + - name: skapad + type: DateTime + #TZFlag: TODO + - name: kommunkod + type: Integer + subtype: Int16 + unique: true + nullable: false + source: + # https://www.lantmateriet.se/sv/geodata/vara-produkter/produktlista/topografi-250-nedladdning-vektor/ + cache: administrativindelning_sverige.zip + unar: + format: zip + import: + path: administrativindelning_sverige.gpkg + format: GPKG + layername: kommunyta + spatial-filter: false + fields: + - objektidentitet + - skapad + - kommunkod + 'nvr:TILLTRADESFORBUD': source: @@ -167,13 +389,156 @@ layers: cache: naturvardsregistret/ 'sks:AvverkAnm': + # https://geodpags.skogsstyrelsen.se/geodataport/feeds/AvverkAnm.xml + description: Avverkningsanmälningar (Skogsstyrelsen) + create: + geometry-type: MULTIPOLYGON + fields: + - name: objektid + type: Integer + unique: true + nullable: false + - name: year + type: Integer + subtype: Int16 + nullable: false + - name: beteckn + type: String + width: 23 + unique: true + nullable: false + - name: avverktyp + type: String + width: 254 + nullable: false + - name: skogstyp + type: String + width: 254 + nullable: false + - name: date + type: Date + nullable: false + - name: AnmaldHa + type: Real + width: 24 + precision: 15 + nullable: false + - name: SkogsodlHa + type: Real + width: 24 + precision: 15 + nullable: false + - name: NatforHa + type: Real + width: 24 + precision: 15 + nullable: false + - name: status + type: String + width: 21 + nullable: false + - name: AvvSasong + type: String + width: 254 + nullable: false + - name: AvvHa + type: Real + width: 24 + precision: 15 + - name: Avverkning + type: String + width: 20 + nullable: false source: download: url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksAvverkAnm.zip' max-size: 134217728 # 128MiB cache: sks/ + unar: + format: zip + patterns: + - 'sksAvverkAnm.*' + import: + path: sksAvverkAnm.shp + format: ESRI Shapefile + layername: sksAvverkAnm + fields: + OBJECTID: objektid + ArendeAr: year + Beteckn: beteckn + Avverktyp: avverktyp + Skogstyp: skogstyp + Inkomdatum: date + AnmaldHa: AnmaldHa + SkogsodlHa: SkogsodlHa + NatforHa: NatforHa + ArendeStat: status + AvvSasong: AvvSasong + AvvHa: AvvHa + Avverkning: Avverkning 'sks:UtfordAvverk': + # https://geodpags.skogsstyrelsen.se/geodataport/feeds/UtfordAvverk.xml + description: Utförd avverkning (Skogsstyrelsen) + create: + geometry-type: MULTIPOLYGON + fields: + - name: objektid + type: Integer + #unique: true + nullable: false + - name: year + type: Integer + subtype: Int16 + nullable: false + - name: beteckn + type: String + width: 8 + #unique: true + nullable: false + - name: avverktyp + type: String + width: 254 + nullable: false + - name: skogstyp + type: String + width: 254 + nullable: false + - name: AnmaldHa + type: Real + width: 24 + precision: 15 + #nullable: false + - name: SkogsodlHa + type: Real + width: 24 + precision: 15 + nullable: false + - name: NatforHa + type: Real + width: 24 + precision: 15 + nullable: false + - name: date + type: Date + nullable: false + - name: KallaDatum + type: Date + - name: KallaAreal + type: String + width: 50 + - name: Forebild + type: String + width: 50 + - name: Efterbild + type: String + width: 59 + - name: Arealha + type: Real + width: 24 + precision: 15 + nullable: false + sources: - source: download: @@ -182,6 +547,30 @@ layers: cache: path: sks/ max-age: 2592000 # 30d + unar: + format: zip + patterns: + - 'sksUtfordAvverk-2000-2015.*' + import: + path: sksUtfordAvverk-2000-2015.shp + format: ESRI Shapefile + layername: sksUtfordAvverk-2000-2015 + fields: + OBJECTID: objektid + Arendear: year + Beteckn: beteckn + Avverktyp: avverktyp + Skogstyp: skogstyp + AnmaldHa: AnmaldHa + SkogsodlHa: SkogsodlHa + Natforha: NatforHa + Avvdatum: date + KallaDatum: KallaDatum + KallaAreal: KallaAreal + Forebild: Forebild + Efterbild: Efterbild + Arealha: Arealha + - source: download: url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksUtfordAvverk-2016-2019.zip' @@ -189,6 +578,30 @@ layers: cache: path: sks/ max-age: 2592000 # 30d + unar: + format: zip + patterns: + - 'sksUtfordAvverk-2016-2019.*' + import: + path: sksUtfordAvverk-2016-2019.shp + format: ESRI Shapefile + layername: sksUtfordAvverk-2016-2019 + fields: + OBJECTID: objektid + Arendear: year + Beteckn: beteckn + Avverktyp: avverktyp + Skogstyp: skogstyp + AnmaldHa: AnmaldHa + SkogsodlHa: SkogsodlHa + Natforha: NatforHa + Avvdatum: date + KallaDatum: KallaDatum + KallaAreal: KallaAreal + Forebild: Forebild + Efterbild: Efterbild + Arealha: Arealha + - source: download: url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksUtfordAvverk-2020-2022.zip' @@ -196,11 +609,58 @@ layers: cache: path: sks/ max-age: 864000 # 10d + unar: + format: zip + patterns: + - 'sksUtfordAvverk-2020-2022.*' + import: + path: sksUtfordAvverk-2020-2022.shp + format: ESRI Shapefile + layername: sksUtfordAvverk-2020-2022 + fields: + OBJECTID: objektid + Arendear: year + Beteckn: beteckn + Avverktyp: avverktyp + Skogstyp: skogstyp + AnmaldHa: AnmaldHa + SkogsodlHa: SkogsodlHa + Natforha: NatforHa + Avvdatum: date + KallaDatum: KallaDatum + KallaAreal: KallaAreal + Forebild: Forebild + Efterbild: Efterbild + Arealha: Arealha + - source: download: url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksUtfordAvverk-2023-.zip' max-size: 1073741824 # 1GiB cache: sks/ + unar: + format: zip + patterns: + - 'sksUtfordAvverk-2023-.*' + import: + path: sksUtfordAvverk-2023-.shp + format: ESRI Shapefile + layername: sksUtfordAvverk-2023- + fields: + OBJECTID: objektid + Arendear: year + Beteckn: beteckn + Avverktyp: avverktyp + Skogstyp: skogstyp + AnmaldHa: AnmaldHa + SkogsodlHa: SkogsodlHa + Natforha: NatforHa + Avvdatum: date + KallaDatum: KallaDatum + KallaAreal: KallaAreal + Forebild: Forebild + Efterbild: Efterbild + Arealha: Arealha 'st:betesomraden': source: diff --git a/webmap-import b/webmap-import new file mode 100755 index 0000000..6daceee --- /dev/null +++ b/webmap-import @@ -0,0 +1,1017 @@ +#!/usr/bin/python3 + +#---------------------------------------------------------------------- +# Backend utilities for the Klimatanalys Norr project (extract/import layers) +# Copyright © 2024 Guilhem Moulin +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#---------------------------------------------------------------------- + +import os +import logging +import argparse +import tempfile +from fnmatch import fnmatchcase +from pathlib import Path + +from osgeo import gdal, ogr, osr +from osgeo.gdalconst import ( + OF_VECTOR as GDAL_OF_VECTOR, + OF_ALL as GDAL_OF_ALL, + OF_READONLY as GDAL_OF_READONLY, + OF_UPDATE as GDAL_OF_UPDATE, + OF_VERBOSE_ERROR as GDAL_OF_VERBOSE_ERROR, + CE_None as GDAL_CE_None, + DCAP_CREATE as GDAL_DCAP_CREATE, + DCAP_VECTOR as GDAL_DCAP_VECTOR, + DCAP_DEFAULT_FIELDS as GDAL_DCAP_DEFAULT_FIELDS, + DCAP_NOTNULL_FIELDS as GDAL_DCAP_NOTNULL_FIELDS, + DCAP_UNIQUE_FIELDS as GDAL_DCAP_UNIQUE_FIELDS, +) +import osgeo.gdalconst as gdalconst +gdal.UseExceptions() + +import common + +# Wrapper around gdal.MajorObject.GetMetadataItem(name) +def getMetadataItem(o, k): + v = o.GetMetadataItem(k) + if v is not None and isinstance(v, str): + return v.upper() == 'YES' + else: + return False + +# Return kwargs and driver for OpenEx() +def setOpenExArgs(option_dict, flags=0): + kwargs = { 'nOpenFlags': GDAL_OF_VECTOR | flags } + + fmt = option_dict.get('format', None) + if fmt is None: + drv = None + else: + drv = gdal.GetDriverByName(fmt) + if drv is None: + raise Exception(f'Unknown driver name "{fmt}"') + elif not getMetadataItem(drv, GDAL_DCAP_VECTOR): + raise Exception(f'Driver "{drv.ShortName}" has no vector capabilities') + kwargs['allowed_drivers'] = [ drv.ShortName ] + + oo = option_dict.get('open-options', None) + if oo is not None: + kwargs['open_options'] = [ k + '=' + str(v) for k, v in oo.items() ] + return kwargs, drv + +# Open and return the output DS. It is created if create=False or +# create-options is a non-empty dictionary. +def openOutputDS(def_dict): + path = def_dict['path'] + kwargs, drv = setOpenExArgs(def_dict, flags=GDAL_OF_UPDATE|GDAL_OF_VERBOSE_ERROR) + try: + logging.debug('OpenEx(%s, %s)', path, str(kwargs)) + return gdal.OpenEx(path, **kwargs) + except RuntimeError as e: + if not (gdal.GetLastErrorType() >= gdalconst.CE_Failure and + gdal.GetLastErrorNo() == gdalconst.CPLE_OpenFailed): + # not an open error + raise e + + dso2 = None + try: + dso2 = gdal.OpenEx(path, nOpenFlags=GDAL_OF_ALL | GDAL_OF_UPDATE) + except Exception: + pass + if dso2 is not None: + # path exists but can't be open with OpenEx(path, **kwargs) + raise e + + try: + dso2 = gdal.OpenEx(path, nOpenFlags=GDAL_OF_ALL) + except Exception: + pass + if dso2 is not None: + # path exists but can't be open with OpenEx(path, **kwargs) + raise e + + dsco = def_dict.get('create-options', None) + if not def_dict.get('create', False) and dsco is None: + # not configured for creation + raise e + if drv is None or not getMetadataItem(drv, GDAL_DCAP_CREATE): + # not capable of creation + raise e + + if 'open_options' in kwargs: + # like ogr2ogr(1) + logging.warning('Destination\'s open options ignored ' + + 'when creating the output datasource') + + kwargs2 = { 'eType': gdal.GDT_Unknown } + if dsco is not None: + kwargs2['options'] = [ k + '=' + str(v) for k, v in dsco.items() ] + + logging.debug('Create(%s, %s, eType=%s%s)', drv.ShortName, path, kwargs2['eType'], + ', options=' + str(kwargs2['options']) if 'options' in kwargs2 else '') + # XXX racy, a GDAL equivalent of O_EXCL would be nice + return drv.Create(path, 0, 0, 0, **kwargs2) + +# cf. ogr/ogrgeometry.cpp:OGRFromOGCGeomType() +def fromGeomTypeName(name): + name = name.upper() + + isMeasured = False + if name.endswith('M'): + isMeasured = True + name = name.removesuffix('M') + + convertTo3D = False + if name.endswith('Z'): + convertTo3D = True + name = name.removesuffix('Z') + + if name == 'POINT': + eGType = ogr.wkbPoint + elif name == 'LINESTRING': + eGType = ogr.wkbLineString + elif name == 'POLYGON': + eGType = ogr.wkbPolygon + elif name == 'MULTIPOINT': + eGType = ogr.wkbMultiPoint + elif name == 'MULTILINESTRING': + eGType = ogr.wkbMultiLineString + elif name == 'MULTIPOLYGON': + eGType = ogr.wkbMultiPolygon + elif name == 'GEOMETRYCOLLECTION': + eGType = ogr.wkbGeometryCollection + elif name == 'CIRCULARSTRING': + eGType = ogr.wkbCircularString + elif name == 'COMPOUNDCURVE': + eGType = ogr.wkbCompoundCurve + elif name == 'CURVEPOLYGON': + eGType = ogr.wkbCurvePolygon + elif name == 'MULTICURVE': + eGType = ogr.wkbMultiCurve + elif name == 'MULTISURFACE': + eGType = ogr.wkbMultiSurface + elif name == 'TRIANGLE': + eGType = ogr.wkbTriangle + elif name == 'POLYHEDRALSURFACE': + eGType = ogr.wkbPolyhedralSurface + elif name == 'TIN': + eGType = ogr.wkbTIN + elif name == 'CURVE': + eGType = ogr.wkbCurve + elif name == 'SURFACE': + eGType = ogr.wkbSurface + else: + eGType = ogr.wkbUnknown + + if convertTo3D: + eGType = ogr.GT_SetZ(eGType) + + if isMeasured: + eGType = ogr.GT_SetM(eGType) + + return eGType + +# Parse geometry type, cf. ogr2ogr_lib.cpp +def parseGeomType(name): + if name is None: + return ogr.wkbUnknown + name2 = name.upper() + + is3D = False + if name2.endswith('25D'): + name2 = name2[:-3] # alias + is3D = True + elif name2.endswith('Z'): + name2 = name2[:-1] + is3D = True + + if name2 == 'NONE': + eGType = ogr.wkbNone + elif name2 == 'GEOMETRY': + eGType = ogr.wkbUnknown + else: + eGType = fromGeomTypeName(name2) + if eGType == ogr.wkbUnknown: + raise Exception(f'Unknown geometry type "{name}"') + + if eGType != ogr.wkbNone and is3D: + eGType = ogr.GT_SetZ(eGType) + + return eGType + +# cf. ogr/ogr_core.h's enum OGRFieldType; +def getFieldTypeCode(name): + if name is None: + raise Exception('getFieldTypeCode(None)') + name2 = name.lower() + if name2 == 'integer': + # simple 32bit integer + return ogr.OFTInteger + elif name2 == 'integerlist': + # List of 32bit integers + return ogr.OFTIntegerList + elif name2 == 'real': + # Double Precision floating point + return ogr.OFTReal + elif name2 == 'reallist': + # List of doubles + return ogr.OFTRealList + elif name2 == 'string': + # String of ASCII chars + return ogr.OFTString + elif name2 == 'stringlist': + # Array of strings + return ogr.OFTStringList + elif name2 == 'binary': + # Raw Binary data + return ogr.OFTBinary + elif name2 == 'date': + # Date + return ogr.OFTDate + elif name2 == 'time': + # Time + return ogr.OFTTime + elif name2 == 'datetime': + # Date and Time + return ogr.OFTDateTime + elif name2 == 'integer64': + # Single 64bit integer + return ogr.OFTInteger64 + elif name2 == 'integer64list': + # List of 64bit integers + return ogr.OFTInteger64List + else: + raise Exception(f'Unknown field type "{name}"') + +# cf. ogr/ogr_core.h's enum OGRFieldSubType; +def getFieldSubTypeCode(name): + if name is None: + raise Exception('getFieldSubTypeCode(None)') + name2 = name.lower() + if name2 == 'none': + # No subtype. This is the default value. + return ogr.OFSTNone + elif name2 == 'bool': + # Boolean integer. Only valid for OFTInteger and OFTIntegerList. + return ogr.OFSTBoolean + elif name2 == 'int16': + # Signed 16-bit integer. Only valid for OFTInteger and OFTIntegerList. + return ogr.OFSTInt16 + elif name2 == 'float32': + # Single precision (32 bit) floating point. Only valid for OFTReal and OFTRealList. + return ogr.OFSTFloat32 + elif name2 == 'json': + # JSON content. Only valid for OFTString. + return ogr.OFSTJSON + elif name2 == 'uuid': + # UUID string representation. Only valid for OFTString. + return ogr.OFSTUUID + else: + raise Exception(f'Unknown field subtype "{name2}"') + +# Validate layer creation options and schema. The schema is modified in +# place with the parsed result. +# (We need the driver of the output dataset to determine capability on +# constraints.) +def validateSchema(layers, drvo=None, lco_defaults=None): + # cache driver capabilities + drvoSupportsDefaultFields = getMetadataItem(drvo, GDAL_DCAP_DEFAULT_FIELDS) + drvoSupportsNotNULLFields = getMetadataItem(drvo, GDAL_DCAP_NOTNULL_FIELDS) + drvoSupportsUniqueFields = getMetadataItem(drvo, GDAL_DCAP_UNIQUE_FIELDS) + + for layername, layerdef in layers.items(): + create = layerdef.get('create', None) + if create is None or len(create) < 1: + logging.warning('Layer "%s" has no creation schema', layername) + continue + + # prepend global layer creation options (dataset:create-layer-options) + # and build the option=value list + lco = create.get('options', None) + if lco_defaults is not None or lco is not None: + options = [] + if lco_defaults is not None: + options += [ k + '=' + str(v) for k, v in lco_defaults.items() ] + if lco is not None: + options += [ k + '=' + str(v) for k, v in lco.items() ] + create['options'] = options + + # parse geometry type + create['geometry-type'] = parseGeomType(create.get('geometry-type', None)) + + fields = create.get('fields', None) + if fields is None: + create['fields'] = [] + else: + fields_set = set() + for idx, fld_def in enumerate(fields): + fld_name = fld_def.get('name', None) + if fld_name is None or fld_name == '': + raise Exception(f'Field #{idx} has no name') + if fld_name in fields_set: + raise Exception(f'Duplicate field "{fld_name}"') + fields_set.add(fld_name) + + fld_def2 = { 'Name': fld_name } + for k, v in fld_def.items(): + k2 = k.lower() + if k2 == 'name': + pass + elif k2 == 'alternativename' or k2 == 'alias': + fld_def2['AlternativeName'] = v + elif k2 == 'comment': + fld_def2['Comment'] = v + + elif k2 == 'type': + fld_def2['Type'] = getFieldTypeCode(v) + elif k2 == 'subtype': + fld_def2['SubType'] = getFieldSubTypeCode(v) + elif k2 == 'tzflag': + pass # TODO + #fld_def2['TZFlag'] = v + elif k2 == 'width' and v is not None and isinstance(v, int): + fld_def2['Width'] = v + elif k2 == 'precision' and v is not None and isinstance(v, int): + fld_def2['Precision'] = v + + # constraints + elif k2 == 'default': + if drvoSupportsDefaultFields: + fld_def2['Default'] = v + else: + logging.warning('%s driver lacks GDAL_DCAP_DEFAULT_FIELDS support', + drvo.ShortName) + elif k2 == 'nullable' and v is not None and isinstance(v, bool): + if drvoSupportsNotNULLFields: + fld_def2['Nullable'] = v + else: + logging.warning('%s driver lacks GDAL_DCAP_NOTNULL_FIELDS support', + drvo.ShortName) + elif k2 == 'unique' and v is not None and isinstance(v, bool): + if drvoSupportsUniqueFields: + fld_def2['Unique'] = v + else: + logging.warning('%s driver lacks GDAL_DCAP_UNIQUE_FIELDS support', + drvo.ShortName) + else: + raise Exception(f'Field "{fld_name}" has unknown key "{k}"') + + fields[idx] = fld_def2 + +# Return the decoded Spatial Reference System +def getSRS(srs_str): + if srs_str is None: + return + srs = osr.SpatialReference() + if srs_str.startswith('EPSG:'): + code = int(srs_str.removeprefix('EPSG:')) + srs.ImportFromEPSG(code) + else: + raise Exception(f'Unknown SRS {srs_str}') + logging.debug('Default SRS: "%s" (%s)', srs.ExportToProj4(), srs.GetName()) + return srs + +# Validate the output layer against the provided SRS and creation options +def validateOutputLayer(lyr, srs=None, options=None): + ok = True + + # ensure the output SRS is equivalent + if srs is not None: + srs2 = lyr.GetSpatialRef() + # cf. apps/ogr2ogr_lib.cpp + srs_options = [ + 'IGNORE_DATA_AXIS_TO_SRS_AXIS_MAPPING=YES', + 'CRITERION=EQUIVALENT' + ] + if not srs.IsSame(srs2, srs_options): + logging.warning('Output layer "%s" has SRS %s,\nexpected %s', + lyr.GetName(), + srs2.ExportToPrettyWkt(), + srs.ExportToPrettyWkt()) + ok = False + + if options is None: + return ok + + layerDefn = lyr.GetLayerDefn() + n = layerDefn.GetGeomFieldCount() + if n != 1: + logging.warning('Output layer "%s" has %d != 1 geometry fields', layername, n) + + geom_type1 = lyr.GetGeomType() + geom_type2 = options['geometry-type'] + if geom_type1 != geom_type2: + logging.warning('Output layer "%s" has geometry type #%d (%s), expected #%d (%s)', + lyr.GetName(), + geom_type1, ogr.GeometryTypeToName(geom_type1), + geom_type2, ogr.GeometryTypeToName(geom_type2)) + ok = False + + fields = options.get('fields', None) + if fields is not None: + for fld in fields: + fldName = fld['Name'] + + idx = layerDefn.GetFieldIndex(fldName) + if idx < 0: + logging.warning('Output layer "%s" has no field named "%s"', + lyr.GetName(), fldName) + ok = False + continue + defn = layerDefn.GetFieldDefn(idx) + + if 'AlternativeName' in fld: + v1 = defn.GetAlternativeName() + v2 = fld['AlternativeName'] + if v1 != v2: + logging.warning('Field "%s" has AlternativeName="%s", expected "%s"', + fldName, v1, v2) + ok = False + + if 'Comment' in fld: + v1 = defn.GetComment() + v2 = fld['Comment'] + if v1 != v2: + logging.warning('Field "%s" has Comment="%s", expected "%s"', + fldName, v1, v2) + ok = False + + if 'Type' in fld: + v1 = defn.GetType() + v2 = fld['Type'] + if v1 != v2: + logging.warning('Field "%s" has Type=%d (%s), expected %d (%s)', + fldName, + v1, ogr.GetFieldTypeName(v1), + v2, ogr.GetFieldTypeName(v2)) + ok = False + + if 'SubType' in fld: + v1 = defn.GetSubType() + v2 = fld['SubType'] + if v1 != v2: + logging.warning('Field "%s" has SubType=%d (%s), expected %d (%s)', + fldName, + v1, ogr.GetFieldSubTypeName(v1), + v2, ogr.GetFieldSubTypeName(v2)) + ok = False + + if 'TZFlag' in fld: + v1 = defn.GetTZFlag() + v2, n2 = fld['TZFlag'] + if v1 != v2: + logging.warning('Field "%s" has TZFlag=%d, expected %d (%s)', + fldName, v1, v2, n2) + ok = False + + if 'Precision' in fld: + v1 = defn.GetPrecision() + v2 = fld['Precision'] + if v1 != v2: + logging.warning('Field "%s" has Precision=%d, expected %d', + fldName, v1, v2) + ok = False + + if 'Width' in fld: + v1 = defn.GetWidth() + v2 = fld['Width'] + if v1 != v2: + logging.warning('Field "%s" has Width=%d, expected %d', + fldName, v1, v2) + ok = False + + if 'Default' in fld: + v1 = defn.GetDefault() + v2 = fld['Default'] + if v1 != v2: + logging.warning('Field "%s" has Default="%s", expected "%s"', + fldName, v1, v2) + ok = False + + if 'Nullable' in fld: + v1 = bool(defn.IsNullable()) + v2 = fld['Nullable'] + if v1 != v2: + logging.warning('Field "%s" has Nullable=%s, expected %s', + fldName, v1, v2) + ok = False + + if 'Unique' in fld: + v1 = bool(defn.IsUnique()) + v2 = fld['Unique'] + if v1 != v2: + logging.warning('Field "%s" has Unique=%s, expected %s', + fldName, v1, v2) + ok = False + + return ok + +# Create output layer +def createOutputLayer(ds, layername, srs=None, options=None): + logging.info('Creating new destination layer "%s"', layername) + geom_type = options['geometry-type'] + lco = options.get('options', None) + + drv = ds.GetDriver() + if geom_type != ogr.wkbNone and drv.ShortName == 'PostgreSQL': + # “Important to set to 2 for 2D layers as it has constraints on the geometry + # dimension during loading.” + # — https://gdal.org/drivers/vector/pg.html#layer-creation-options + if ogr.GT_HasM(geom_type): + if ogr.GT_HasZ(geom_type): + dim = 'XYZM' + else: + dim = 'XYM' + elif ogr.GT_HasZ(geom_type): + dim = '3' + else: + dim = '2' + if lco is None: + lco = [] + lco = ['dim=' + dim] + lco # prepend dim= + + kwargs = { 'geom_type': geom_type } + if srs is not None: + kwargs['srs'] = srs + if lco is not None: + kwargs['options'] = lco + logging.debug('CreateLayer(%s, geom_type="%s"%s%s)', layername, + ogr.GeometryTypeToName(geom_type), + ', srs="' + kwargs['srs'].GetName() + '"' if 'srs' in kwargs else '', + ', options=' + str(kwargs['options']) if 'options' in kwargs else '') + lyr = dso.CreateLayer(layername, **kwargs) + if lyr is None: + raise Exception(f'Could not create destination layer "{layername}"') + + fields = options['fields'] + if len(fields) > 0 and not lyr.TestCapability(ogr.OLCCreateField): + raise Exception(f'Destination layer "{layername}" lacks field creation capability') + + # set up output schema + for fld in fields: + fldName = fld['Name'] + defn = ogr.FieldDefn() + defn.SetName(fldName) + + if 'AlternativeName' in fld: + v = fld['AlternativeName'] + logging.debug('Set AlternativeName=%s on output field "%s"', str(v), fldName) + defn.SetAlternativeName(v) + + if 'Comment' in fld: + v = fld['Comment'] + logging.debug('Set Comment=%s on output field "%s"', str(v), fldName) + defn.SetComment(v) + + if 'Type' in fld: + v = fld['Type'] + logging.debug('Set Type=%d (%s) on output field "%s"', + v, ogr.GetFieldTypeName(v), fldName) + defn.SetType(v) + + if 'SubType' in fld: + v = fld['SubType'] + logging.debug('Set SubType=%d (%s) on output field "%s"', + v, ogr.GetFieldSubTypeName(v), fldName) + defn.SetSubType(v) + + if 'TZFlag' in fld: + v, n = fld['TZFlag'] + logging.debug('Set TZFlag=%d (%s) on output field "%s"', v, n, fldName) + defn.SetTZFlag(v) + + if 'Precision' in fld: + v = fld['Precision'] + logging.debug('Set Precision=%d on output field "%s"', v, fldName) + defn.SetPrecision(v) + + if 'Width' in fld: + v = fld['Width'] + logging.debug('Set Width=%d on output field "%s"', v, fldName) + defn.SetWidth(v) + + if 'Default' in fld: + v = fld['Default'] + logging.debug('Set Default=%s on output field "%s"', v, fldName) + defn.SetDefault(v) + + if 'Nullable' in fld: + v = fld['Nullable'] + logging.debug('Set Nullable=%s on output field "%s"', v, fldName) + defn.SetNullable(v) + + if 'Unique' in fld: + v = fld['Unique'] + logging.debug('Set Unique=%s on output field "%s"', v, fldName) + defn.SetUnique(v) + + if lyr.CreateField(defn, approx_ok=False) != GDAL_CE_None: + raise Exception('Could not create field "{fldName}"') + logging.debug('Added field "%s" to output layer "%s"', fldName, layername) + + # flush before calling StartTransaction() so we're not tryingn to + # rollback changes on a non-existing table + lyr.SyncToDisk() + return lyr + +# Setup output field mapping, modifying the sources dictionary in place. +def setOutputFieldMap(defn, sources): + fieldMap = {} + n = defn.GetFieldCount() + for i in range(n): + fld = defn.GetFieldDefn(i) + fldName = fld.GetName() + fieldMap[fldName] = i + + for source in sources: + src = source['source']['path'] + fieldMap2 = source['import'].get('fields', None) + if fieldMap2 is None: + fieldMap2 = {} + else: + if isinstance(fieldMap2, list): + # convert list to identity dictionary + fieldMap2 = { fld: fld for fld in fieldMap2 } + + for ifld, ofld in fieldMap2.items(): + i = fieldMap.get(ofld, None) + if i is None: + raise Exception(f'Ouput layer has no field named "{ofld}"') + fieldMap2[ifld] = i + + source['import']['fields'] = fieldMap2 + + return fieldMap + +# Escape the given identifier, cf. +# swig/python/gdal-utils/osgeo_utils/samples/validate_gpkg.py:_esc_id() +def escapeIdentifier(identifier): + if '\x00' in identifier: + raise Exception(f'Invalid identifier "{identifier}"') + # SQL:1999 delimited identifier + return '"' + identifier.replace('"', '""') + '"' + +# Clear the given layer (wipe all its features) +def clearLayer(ds, lyr): + n = -1 + if lyr.TestCapability(ogr.OLCFastFeatureCount): + n = lyr.GetFeatureCount(force=0) + if n == 0: + # nothing to clear, we're good + return + layername_esc = escapeIdentifier(lyr.GetName()) + + # GDAL <3.9 doesn't have lyr.GetDataset() so we pass the DS along with the layer + drv = ds.GetDriver() + if drv.ShortName == 'PostgreSQL': + # https://www.postgresql.org/docs/15/sql-truncate.html + query = 'TRUNCATE TABLE {table} CONTINUE IDENTITY RESTRICT' + op = 'Truncating' + else: + query = 'DELETE FROM {table}' + op = 'Clearing' + logging.info('%s table %s (former feature count: %s)', op, + layername_esc, str(n) if n >= 0 else 'unknown') + ds.ExecuteSQL(query.format(table=layername_esc)) + +# Extract an archive file into the given destination directory. +def extractArchive(path, destdir, fmt=None, patterns=None, exact_matches=None): + if fmt is None: + suffix = path.suffix + if suffix is None or suffix == '' or not suffix.startswith('.'): + raise Exception(f'Could not infer archive format from "{path}"') + fmt = suffix.removeprefix('.') + + fmt = fmt.lower() + logging.debug('Unpacking %s archive %s into %s', fmt, path, destdir) + + if fmt == 'zip': + from zipfile import ZipFile + logging.debug('Opening %s as ZipFile', path) + with ZipFile(path, mode='r') as z: + namelist = listArchiveMembers(z.namelist(), + patterns=patterns, exact_matches=exact_matches) + z.extractall(path=destdir, members=namelist) + else: + raise Exception(f'Unknown archive format "{fmt}"') + +# List archive members matching the given parterns and/or exact matches +def listArchiveMembers(namelist, patterns, exact_matches=None): + if patterns is None and exact_matches is None: + # if neither patterns nor exact_matches are given we'll extract the entire archive + return namelist + if patterns is None: + patterns = [] + if exact_matches is None: + exact_matches = [] + + members = [] + for name in namelist: + ok = False + if name in exact_matches: + # try exact matches first + logging.debug('Listed archive member %s (exact match)', name) + members.append(name) + ok = True + continue + # if there are no exact matches, try patterns one by one in the supplied order + for pat in patterns: + if fnmatchcase(name, pat): + logging.debug('Listed archive member %s (matching pattern "%s")', name, pat) + members.append(name) + ok = True + break + if not ok: + logging.debug('Ignoring archive member %s', name) + return members + +# Import a source layer +def importSource(lyr, path=None, unar=None, args={}, cachedir=None, extent=None): + if unar is None: + return importSource2(lyr, str(path), args=args, + basedir=cachedir, extent=extent) + + if cachedir is not None: + path = cachedir.joinpath(path) + + ds_srcpath = Path(args['path']) + if ds_srcpath.is_absolute(): + # treat absolute paths as relative to the archive root + logging.warning('%s is absolute, removing leading anchor', ds_srcpath) + ds_srcpath = ds_srcpath.relative_to(ds_srcpath.anchor) + ds_srcpath = str(ds_srcpath) + + with tempfile.TemporaryDirectory() as tmpdir: + logging.debug('Created temporary directory %s', tmpdir) + extractArchive(path, tmpdir, + fmt=unar.get('format', None), + patterns=unar.get('patterns', None), + exact_matches=[ds_srcpath]) + return importSource2(lyr, ds_srcpath, args=args, + basedir=Path(tmpdir), extent=extent) + +# Import a source layer (already extracted) +# This is more or less like ogr2ogr/GDALVectorTranslate() but we roll +# out our own (slower) version because GDALVectorTranslate() insists in +# calling StartTransaction() https://github.com/OSGeo/gdal/issues/3403 +# while we want a single transaction for the entire desination layer, +# including truncation, source imports, and metadata changes. +def importSource2(lyr_dst, path, args={}, basedir=None, extent=None): + kwargs, _ = setOpenExArgs(args, flags=GDAL_OF_READONLY|GDAL_OF_VERBOSE_ERROR) + path2 = path if basedir is None else str(basedir.joinpath(path)) + + logging.debug('OpenEx(%s, %s)', path2, str(kwargs)) + ds = gdal.OpenEx(path2, **kwargs) + if ds is None: + raise Exception(f'Could not open {path2}') + + layername = args.get('layername', None) + if layername is None: + idx = 0 + lyr = ds.GetLayerByIndex(idx) + msg = '#' + str(idx) + if lyr is not None: + layername = lyr.GetName() + msg += ' ("' + layername + '")' + else: + lyr = ds.GetLayerByName(layername) + msg = '"' + layername + '"' + if lyr is None: + raise Exception(f'Could not get requested layer {msg} from {path2}') + + logging.info('Importing layer %s from "%s"', msg, path) + canIgnoreFields = lyr.TestCapability(ogr.OLCIgnoreFields) + + defn = lyr.GetLayerDefn() + n = defn.GetGeomFieldCount() + if n != 1: # XXX + logging.warning('Source layer "%s" has %d != 1 geometry fields', layername, n) + + n = defn.GetFieldCount() + fieldMap = [-1] * n + fields = args['fields'] + fieldSet = set() + + for i in range(n): + fld = defn.GetFieldDefn(i) + fldName = fld.GetName() + fieldMap[i] = v = fields.get(fldName, -1) + fieldSet.add(fldName) + + if v < 0 and canIgnoreFields: + # call SetIgnored() on unwanted source fields + logging.debug('Set Ignored=True on output field "%s"', fldName) + fld.SetIgnored(True) + defn = None + + logging.debug('Field map: %s', str(fieldMap)) + for fld in fields: + if not fld in fieldSet: + logging.warning('Source layer "%s" has no field named "%s", ignoring', layername, fld) + + count0 = -1 + if lyr.TestCapability(ogr.OLCFastFeatureCount): + count0 = lyr.GetFeatureCount(force=0) + + count1 = -1 + if args.get('spatial-filter', True) and extent is not None: + # TODO reprojection + lyr.SetSpatialFilterRect(*extent) + if lyr.TestCapability(ogr.OLCFastFeatureCount): + count1 = lyr.GetFeatureCount(force=0) + + if count0 >= 0: + if count1 >= 0: + logging.info('Source layer "%s" has %d features (of which %d within extent)', + layername, count0, count1) + else: + logging.info('Source layer "%s" has %d features', layername, count0) + + #print(lyr.GetSupportedSRSList()) + # TODO reprojection + + defn_dst = lyr_dst.GetLayerDefn() + eGType_dst = defn_dst.GetGeomType() + + n = 0 + #mismatch = 0 + feature = lyr.GetNextFeature() + while feature is not None: + #print(lyr.GetFeaturesRead()) + feature2 = ogr.Feature(defn_dst) + # TODO MakeValid, force geometry, dimension + feature2.SetFromWithMap(feature, False, fieldMap) + + geom = feature2.GetGeometryRef() + eGType = geom.GetGeometryType() + if eGType != eGType_dst: + if eGType == ogr.wkbPolygon and eGType_dst == ogr.wkbMultiPolygon: + geom = ogr.ForceToMultiPolygon(geom) + else: + raise Exception('Not implemented') + feature2.SetGeometryDirectly(geom) + #mismatch += 1 + + lyr_dst.CreateFeature(feature2) + n += 1 + feature = lyr.GetNextFeature() + + #print(mismatch) + lyr = None + logging.info('%d features imported from source layer "%s"', n, layername) + + +if __name__ == '__main__': + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) + + parser = argparse.ArgumentParser(description='Extract and import GIS layers.') + parser.add_argument('--cachedir', default=None, + help=f'cache directory (default: {os.curdir})') + parser.add_argument('--debug', action='count', default=0, + help=argparse.SUPPRESS) + parser.add_argument('groupname', nargs='*', help='group layer name(s) to process') + args = parser.parse_args() + + if args.debug > 0: + logging.getLogger().setLevel(logging.DEBUG) + if args.debug > 1: + gdal.ConfigurePythonLogging(enable_debug=True) + + common.load_config(groupnames=None if args.groupname == [] else args.groupname) + + # validate configuration + if 'dataset' not in common.config: + raise Exception('Configuration does not specify output dataset') + + layers = common.config.get('layers', {}) + for layername, layerdefs in layers.items(): + for idx, layerdef in enumerate(layerdefs['sources']): + importdef = layerdef.get('import', None) + if importdef is None: + raise Exception(f'Output layer "{layername}" source #{idx} has no import definition') + + sourcedef = layerdef.get('source', None) + unar = None if sourcedef is None else sourcedef.get('unar', None) + src = None if sourcedef is None else sourcedef['cache'].get('path', None) + + ds_srcpath = importdef.get('path', None) + if src is None and unar is None and ds_srcpath is not None: + # fallback to importe:path if there is no unarchiving receipe + src = Path(ds_srcpath) + if unar is not None and ds_srcpath is None: + raise Exception(f'Output layer "{layername}" source #{idx} has no import source path') + if src is None: + raise Exception(f'Output layer "{layername}" source #{idx} has no source path') + layerdef['source'] = { 'path': src, 'unar': unar } + + # set global GDAL/OGR configuration options + for pszKey, pszValue in common.config.get('GDALconfig', {}).items(): + logging.debug('gdal.SetConfigOption(%s, %s)', pszKey, pszValue) + gdal.SetConfigOption(pszKey, pszValue) + + # open output dataset (possibly create it first) + dso = openOutputDS(common.config['dataset']) + + validateSchema(layers, + drvo=dso.GetDriver(), + lco_defaults=common.config['dataset'].get('create-layer-options', None)) + + # get configured Spatial Reference System and extent + srs = getSRS(common.config.get('SRS', None)) + extent = common.config.get('extent', None) + if extent is not None: + logging.debug('Configured extent in %s: %s', + srs.GetName(), ', '.join(map(str, extent))) + + cachedir = Path(args.cachedir) if args.cachedir is not None else None + rv = 0 + for layername, layerdef in layers.items(): + sources = layerdef['sources'] + if sources is None or len(sources) < 1: + logging.warning('Output layer "%s" has no definition, skipping', layername) + continue + + logging.info('Processing output layer "%s"', layername) + transaction = False + try: + # get output layer + outLayerIsNotEmpty = True + lco = layerdef.get('create', None) + lyr = dso.GetLayerByName(layername) + if lyr is not None: + # TODO dso.DeleteLayer(layername) if --overwrite and dso.TestCapability(ogr.ODsCDeleteLayer) + # (Sets OVERWRITE=YES for PostgreSQL and GPKG.) + validateOutputLayer(lyr, srs=srs, options=lco) + # TODO bail out if all source files are older than lyr's last_change + elif not dso.TestCapability(ogr.ODsCCreateLayer): + raise Exception(f'Output driver {dso.GetDriver().ShortName} does not support layer creation') + elif lco is None or len(lco) < 1: + raise Exception(f'Missing schema for new output layer "{layername}"') + else: + lyr = createOutputLayer(dso, layername, srs=srs, options=lco) + outLayerIsNotEmpty = False + + if not lyr.TestCapability(ogr.OLCSequentialWrite): + raise Exception(f'Output layer "{layername}" has no working CreateFeature() method') + + # setup output field mapping in the sources dictionary + setOutputFieldMap(lyr.GetLayerDefn(), sources) + + # start transaction if possible + if lyr.TestCapability(ogr.OLCTransactions): + logging.debug('Starting transaction') + transaction = lyr.StartTransaction() == ogr.OGRERR_NONE + else: + logging.warning('Unsafe update, output layer "%s" does not support transactions', layername) + + if outLayerIsNotEmpty: + # clear non-empty output layer + clearLayer(dso, lyr) + + if lyr.SetMetadataItem('DESCRIPTION', layerdef.get('description', '')) != GDAL_CE_None: + logging.warning('Could not set description metadata') + + for source in sources: + # import source layers + importSource(lyr, **source['source'], args=source['import'], + cachedir=cachedir, extent=extent) + + if transaction: + # commit transaction + logging.debug('Committing transaction') + transaction = False + if lyr.CommitTransaction() != ogr.OGRERR_NONE: + logging.error('Could not commit transaction') + rv = 1 + + except Exception: + if transaction: + logging.error('Exception occured in transaction, rolling back') + try: + if lyr.RollbackTransaction() != ogr.OGRERR_NONE: + logging.error('Could not rollback transaction') + except RuntimeError: + logging.exception('Could not rollback transaction') + logging.exception('Could not import layer "%s"', layername) + rv = 1 + + finally: + # close output layer + lyr = None + + dso = None + srs = None + exit(rv) -- cgit v1.2.3