path: root/config.yml
diff options
authorGuilhem Moulin <guilhem@fripost.org>2024-05-16 20:14:52 +0200
committerGuilhem Moulin <guilhem@fripost.org>2024-06-10 06:06:59 +0200
commit5dd5b429f9acdff8e727baa75bbda0b687b88926 (patch)
tree86979cfe5e74d4124cefd39b18c3b96f658afb7d /config.yml
parent681c3d11b4fc02c416467d074ef0b4d84bf0cdab (diff)
Add `webmap-import` script to import source layers.
There is still a few things to do (such as reprojection and geometry changes) but it's mostly working. We roll out our own ogr2ogr/GDALVectorTranslate()-like function version because GDALVectorTranslate() insists in calling StartTransaction() https://github.com/OSGeo/gdal/issues/3403 while we want a single transaction for the entire desination layer, including truncation, source imports, and metadata changes. Surprisingly our version is not much slower than the C++ one. Importing the 157446 (of 667034) features from sksUtfordAvverk-2000-2015.shp takes 14.3s while ogr2ogr -f PostgreSQL \ -doo ACTIVE_SCHEMA=postgis \ --config PG_USE_COPY YES \ --config OGR_TRUNCATE YES \ -append \ -fieldmap "0,-1,-1,-1,-1,1,2,3,4,5,6,7,8,9,10,11,12,13" \ -nlt MULTIPOLYGON -nlt PROMOTE_TO_MULTI \ -gt unlimited \ -spat 110720 6927136 1159296 7975712 \ -nln "sks:UtfordAvverk" \ PG:"dbname='webmap' user='webmap_import'" \ /tmp/x/sksUtfordAvverk-2000-2015.shp \ sksUtfordAvverk-2000-2015 takes 14s. Merely opening /tmp/x/sksUtfordAvverk-2000-2015.shp and looping through its (extent-filtered) features results in a runtime of 4.3s.
Diffstat (limited to 'config.yml')
1 files changed, 494 insertions, 34 deletions
diff --git a/config.yml b/config.yml
index 772cb0e..899d0ca 100644
--- a/config.yml
+++ b/config.yml
@@ -1,18 +1,20 @@
-# Spatial Reference System and (2D) extent
+# Spatial Reference System
SRS: 'EPSG:3006' # a.k.a. SWEREF99 TM, cf. https://epsg.io/3006
+# (2D) extent
- # Lantmäteriet uses a tile-scheme where the origin (upper-left corner) is at
- # N8500000 E-1200000 (SWEREF99 TM), where each tile is 256×256 pixels, and
- # where the resolution at level 0 is 4096m per pixel.
+ # Lantmäteriet uses a tile-scheme where the origin (upper-left corner) is at N8500000
+ # E-1200000 (SWEREF99 TM), where each tile is 256×256 pixels, and where the resolution
+ # at level 0 is 4096m per pixel.
# https://www.lantmateriet.se/globalassets/geodata/geodatatjanster/tb_twk_visning_cache_v1.1.0.pdf
# https://www.lantmateriet.se/globalassets/geodata/geodatatjanster/tb_twk_visning-oversiktlig_v1.0.3.pdf
- # We set the extent to a 4×4 tiles square at level 2 somehow centered on
- # Norrbotten and Västerbotten. This represent a TILEROW (x) offset of 5, and
- # a TILECOL (y) offset of 2. A 4×8 tiles rectangle with the same upper-left
- # and upper-right coordinates can be used to cover the entire country.
+ # We set the extent to a 4×4 tiles square at level 2 somehow centered on Norrbotten and
+ # Västerbotten. This represent a TILEROW (x) offset of 5, and a TILECOL (y) offset of 2.
+ # A 4×8 tiles rectangle with the same upper-left and upper-right coordinates can be used
+ # to cover the entire country.
- 110720
- 6927136 # alternatively 5878560 for the entire country
- 1159296
@@ -21,9 +23,12 @@ extent:
# Take User-Agent value from Tor Browser 13.0.15 (based on Mozilla Firefox 115.11.0esr)
User-Agent: 'Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/115.0'
-# Map group names to one or more pattern of layer name(s). This is a
-# convenience feature for systemd template units.
+# Map group names to one or more pattern of layer name(s). This is a convenience feature
+# for systemd template units.
+ administrativindelning:
+ - counties
+ - municipalities
nvr: 'nvr:*'
sks: 'sks:*'
st: 'st:*'
@@ -31,43 +36,260 @@ layer-groups:
mrr: 'mrr:*'
+# Global GDAL/OGR configuration options, cf. https://gdal.org/user/configoptions.html and
+# the driver-specific configuration options such as
+# https://gdal.org/drivers/vector/pg.html#configuration-options or
+# https://gdal.org/drivers/vector/gpkg.html#configuration-options
+ # Path/URI of the output (destination) dataset.
+ path: 'PG:'
+ # Format (optional)
+ format: PostgreSQL
+ # Whether the dataset should be created if it does not exist. (Creation will fail if
+ # the driver doesn't support it.)
+ #create: true
+ #create-options:
+ # Optional driver-specific dataset creation options, cf. for instance
+ # https://gdal.org/drivers/vector/gpkg.html#dataset-creation-options
+ # Optional driver-specific dataset open options, cf. for instance
+ # https://gdal.org/drivers/vector/pg.html#dataset-open-options or
+ # https://gdal.org/drivers/vector/gpkg.html#dataset-open-options
+ open-options:
+ ACTIVE_SCHEMA: postgis
+ USER: webmap_import
+ DBNAME: webmap
+ # Optional dictionary of default layer creation options, cf.
+ # https://gdal.org/drivers/vector/pg.html#layer-creation-options or
+ # https://gdal.org/drivers/vector/gpkg.html#layer-creation-options
+ # These apply to all layers.
+ create-layer-options:
-# # Dictionary of layer names and source receipes in the output dataset.
-# # If a layer has a single source, then the sources singleton can be
-# # inlined.
+# # Dictionary of layer names and source receipes in the output dataset. If a layer
+# # has a single source, then the sources singleton can be inlined.
# layer1_name:
+# description: A string describing that layer
+# create:
+# # Geometry Type for the output layer. Possible values are like ogr2ogr(1)'s -nlt
+# # CURVEPOLYGON, MULTICURVE, MULTISURFACE. Add Z, M, or ZM to the type name to
+# # specify coordinates with elevation, measure, or both elevation and measure.
+# geometry-type: MULTIPOLYGON
+# # Dictionary of layer creation options, cf.
+# # https://gdal.org/drivers/vector/pg.html#layer-creation-options or
+# # https://gdal.org/drivers/vector/gpkg.html#layer-creation-options
+# # dataset:create-layer-options is prepended when defined.
+# options:
+# # The layer schema: a list of field names, types and constraints.
+# fields:
+# - # Feature field name
+# Name: field_name
+# # Alternative field name (alias). This is a metadata style attribute only: the
+# # alternative name cannot be used in place of the actual field name during SQL
+# # queries or other field name dependent API calls.
+# AlternativeName: field_name_alias
+# # Description/comment
+# Comment: A string describing that field
+# # Feature field type (optional), one of:
+# # * Integer (simple 32bit integer);
+# # * IntegerList (list of 32bit integers);
+# # * Real (double precision floating point);
+# # * RealList (list of doubles);
+# # * String (string of characters);
+# # * StringList (array of strings);
+# # * Binary (raw binary data);
+# # * Date (date);
+# # * Time (time);
+# # * DateTime (date and time);
+# # * Integer64 (64bit integer); or
+# # * Integer64List (list of 64bit integers).
+# Type: String
+# # Feature field subtype (optional), one of:
+# # * None (no subtype, this is the default value);
+# # * Bool (boolean integer, only valid for Integer and IntegerList types);
+# # * Int16 (signed 16-bit integer, only valid for Integer and IntegerList
+# # types);
+# # * Float32 (single precision [32 bit] floating point, only valid for Real
+# # and RealList types);
+# # * JSON (JSON content, only valid for String); or
+# # * UUID (UUID string representation, only valid for String).
+# SubType: UUID
+# # Default timezone (optional), for Time and DateTime types
+# #TZFlag: local
+# # Formatting precision for this field in characters (optional, this should
+# # normally be zero for fields of types other than Real)
+# Precision: 0
+# # Formatting width for this field in characters (optional)
+# Width: 36
+# # Default field value (optional); accepted values are NULL, a numeric value, a
+# # literal value enclosed between single quote characters (and inner single
+# # quote characters escaped by repetition of the single quote character),
+# # expression)
+# Default: None
+# # Whether this field has no not-NULL constraint (optional)
+# Nullable: false
+# # Whether this field has a unique constraint (optional)
+# Unique: true
# sources:
# - source:
# download:
-# # source:download:url: URL from where to download the source file.
-# # source:download can be used as an alias when source:download:url is
-# # its only subkey.
+# # URL from where to download the source file. source:download can be used as
+# # an alias when source:download:url is its only subkey.
# url: 'https://example.net/path/to/layer.zip'
-# # source:download:max-size: The maximum size to download in bytes. An
-# # error is raised when the payload size exceeds this value.
+# # The maximum size to download in bytes. An error is raised when the payload
+# # size exceeds this value.
# # (Default: 67108864, in other words 64MiB)
# max-size: 1073741824
-# # source:download:module: Basename of the download module to use for
-# # that layer.
+# # Basename of the download module to use for that layer.
# module: webmap-download
# cache:
-# # source:cache:path: Local path (relative to --cachedir) where to
-# # (atomically) save the downloaded file. The same path can be used by
-# # multiple entries as long as their pairs (source:download:url,
-# # source:download:module) match. Any parent directories are created if
-# # needed.
-# # If the path is empty or ends with a '/' character then it treated as a
-# # directory and the last component of source:download:url implicitly
-# # used as filename. In that case an error is raised if no filename can
-# # be derived from the URL.
-# # source:cache can be used as an alias when source:cache:path is its
-# # only subkey.
+# # Local path (relative to --cachedir) where to (atomically) save the
+# # downloaded file. The same path can be used by multiple entries as long as
+# # their pairs (source:download:url, source:download:module) match. Any
+# # parent directories are created if needed. If the path is empty or ends
+# # with a '/' character then it treated as a directory and the last component
+# # of source:download:url implicitly used as filename. In that case an error
+# # is raised if no filename can be derived from the URL. source:cache can be
+# # used as an alias when source:cache:path is its only subkey.
# path: path/to/sub/dir/
-# # source:cache:max-age: Maximum age for caching, in number of seconds
-# # ago. If source:cache:path exists and its mtime and/or ctime is newer
-# # than this value then no HTTP query is made.
+# # Maximum age for caching, in number of seconds ago. If source:cache:path
+# # exists and its mtime and/or ctime is newer than this value then no HTTP
+# # query is made.
# # (Default: 21600, in other words 6h)
# max-age: 86400
+# # Optional extracting receipe for archived/compressed sources
+# unar:
+# # The archiving format (only 'zip' is currently supported)
+# format: zip
+# # glob(3)-patterns to extract from the archive. import:path is always
+# # extracted as an exact match.
+# patterns:
+# - 'path/to/source/layer.*'
+# import:
+# # Path for the dataset holding the source layer (relative to the archive root
+# # for archived sources, and to --cachedir otherwise). The value is optional
+# # for non-archived sources, and defaults to source:cache:path if omitted.
+# path: path/to/source/layer.shp
+# # Format of the source layer to limit allowed driver when opening the dataset.
+# format: ESRI Shapefile
+# # Name of the source layer in the source dataset. If omitted, its 0th layer is
+# # considered.
+# layername: source_layer
+# # Whether to apply the spatial filter when importing. Default: True.
+# spatial-filter: true
+# # Mapping of source fields to destination fields. A list translates into an
+# identity mapping.
+# fields:
+# - field_name1
+# - field_name2
+# fields:
+# source_field_name2: field_name2
+# source_field_name2: field_name2
+ 'counties':
+ description: Sveriges län
+ create:
+ geometry-type: MULTIPOLYGON
+ fields:
+ - name: objektidentitet
+ type: String
+ subtype: UUID
+ unique: true
+ #width: 36
+ - name: skapad
+ type: DateTime
+ #TZFlag: TODO
+ - name: lanskod
+ type: Integer
+ subtype: Int16
+ unique: true
+ nullable: false
+ source:
+ # https://www.lantmateriet.se/sv/geodata/vara-produkter/produktlista/topografi-250-nedladdning-vektor/
+ cache: administrativindelning_sverige.zip
+ unar:
+ format: zip
+ import:
+ path: administrativindelning_sverige.gpkg
+ format: GPKG
+ layername: lansyta
+ spatial-filter: false
+ fields:
+ - objektidentitet
+ - skapad
+ - lanskod
+ 'municipalities':
+ description: Sveriges kommuner
+ create:
+ geometry-type: MULTIPOLYGON
+ fields:
+ - name: objektidentitet
+ type: String
+ subtype: UUID
+ unique: true
+ #width: 36
+ - name: skapad
+ type: DateTime
+ #TZFlag: TODO
+ - name: kommunkod
+ type: Integer
+ subtype: Int16
+ unique: true
+ nullable: false
+ source:
+ # https://www.lantmateriet.se/sv/geodata/vara-produkter/produktlista/topografi-250-nedladdning-vektor/
+ cache: administrativindelning_sverige.zip
+ unar:
+ format: zip
+ import:
+ path: administrativindelning_sverige.gpkg
+ format: GPKG
+ layername: kommunyta
+ spatial-filter: false
+ fields:
+ - objektidentitet
+ - skapad
+ - kommunkod
@@ -167,13 +389,156 @@ layers:
cache: naturvardsregistret/
+ # https://geodpags.skogsstyrelsen.se/geodataport/feeds/AvverkAnm.xml
+ description: Avverkningsanmälningar (Skogsstyrelsen)
+ create:
+ geometry-type: MULTIPOLYGON
+ fields:
+ - name: objektid
+ type: Integer
+ unique: true
+ nullable: false
+ - name: year
+ type: Integer
+ subtype: Int16
+ nullable: false
+ - name: beteckn
+ type: String
+ width: 23
+ unique: true
+ nullable: false
+ - name: avverktyp
+ type: String
+ width: 254
+ nullable: false
+ - name: skogstyp
+ type: String
+ width: 254
+ nullable: false
+ - name: date
+ type: Date
+ nullable: false
+ - name: AnmaldHa
+ type: Real
+ width: 24
+ precision: 15
+ nullable: false
+ - name: SkogsodlHa
+ type: Real
+ width: 24
+ precision: 15
+ nullable: false
+ - name: NatforHa
+ type: Real
+ width: 24
+ precision: 15
+ nullable: false
+ - name: status
+ type: String
+ width: 21
+ nullable: false
+ - name: AvvSasong
+ type: String
+ width: 254
+ nullable: false
+ - name: AvvHa
+ type: Real
+ width: 24
+ precision: 15
+ - name: Avverkning
+ type: String
+ width: 20
+ nullable: false
url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksAvverkAnm.zip'
max-size: 134217728 # 128MiB
cache: sks/
+ unar:
+ format: zip
+ patterns:
+ - 'sksAvverkAnm.*'
+ import:
+ path: sksAvverkAnm.shp
+ format: ESRI Shapefile
+ layername: sksAvverkAnm
+ fields:
+ OBJECTID: objektid
+ ArendeAr: year
+ Beteckn: beteckn
+ Avverktyp: avverktyp
+ Skogstyp: skogstyp
+ Inkomdatum: date
+ AnmaldHa: AnmaldHa
+ SkogsodlHa: SkogsodlHa
+ NatforHa: NatforHa
+ ArendeStat: status
+ AvvSasong: AvvSasong
+ AvvHa: AvvHa
+ Avverkning: Avverkning
+ # https://geodpags.skogsstyrelsen.se/geodataport/feeds/UtfordAvverk.xml
+ description: Utförd avverkning (Skogsstyrelsen)
+ create:
+ geometry-type: MULTIPOLYGON
+ fields:
+ - name: objektid
+ type: Integer
+ #unique: true
+ nullable: false
+ - name: year
+ type: Integer
+ subtype: Int16
+ nullable: false
+ - name: beteckn
+ type: String
+ width: 8
+ #unique: true
+ nullable: false
+ - name: avverktyp
+ type: String
+ width: 254
+ nullable: false
+ - name: skogstyp
+ type: String
+ width: 254
+ nullable: false
+ - name: AnmaldHa
+ type: Real
+ width: 24
+ precision: 15
+ #nullable: false
+ - name: SkogsodlHa
+ type: Real
+ width: 24
+ precision: 15
+ nullable: false
+ - name: NatforHa
+ type: Real
+ width: 24
+ precision: 15
+ nullable: false
+ - name: date
+ type: Date
+ nullable: false
+ - name: KallaDatum
+ type: Date
+ - name: KallaAreal
+ type: String
+ width: 50
+ - name: Forebild
+ type: String
+ width: 50
+ - name: Efterbild
+ type: String
+ width: 59
+ - name: Arealha
+ type: Real
+ width: 24
+ precision: 15
+ nullable: false
- source:
@@ -182,6 +547,30 @@ layers:
path: sks/
max-age: 2592000 # 30d
+ unar:
+ format: zip
+ patterns:
+ - 'sksUtfordAvverk-2000-2015.*'
+ import:
+ path: sksUtfordAvverk-2000-2015.shp
+ format: ESRI Shapefile
+ layername: sksUtfordAvverk-2000-2015
+ fields:
+ OBJECTID: objektid
+ Arendear: year
+ Beteckn: beteckn
+ Avverktyp: avverktyp
+ Skogstyp: skogstyp
+ AnmaldHa: AnmaldHa
+ SkogsodlHa: SkogsodlHa
+ Natforha: NatforHa
+ Avvdatum: date
+ KallaDatum: KallaDatum
+ KallaAreal: KallaAreal
+ Forebild: Forebild
+ Efterbild: Efterbild
+ Arealha: Arealha
- source:
url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksUtfordAvverk-2016-2019.zip'
@@ -189,6 +578,30 @@ layers:
path: sks/
max-age: 2592000 # 30d
+ unar:
+ format: zip
+ patterns:
+ - 'sksUtfordAvverk-2016-2019.*'
+ import:
+ path: sksUtfordAvverk-2016-2019.shp
+ format: ESRI Shapefile
+ layername: sksUtfordAvverk-2016-2019
+ fields:
+ OBJECTID: objektid
+ Arendear: year
+ Beteckn: beteckn
+ Avverktyp: avverktyp
+ Skogstyp: skogstyp
+ AnmaldHa: AnmaldHa
+ SkogsodlHa: SkogsodlHa
+ Natforha: NatforHa
+ Avvdatum: date
+ KallaDatum: KallaDatum
+ KallaAreal: KallaAreal
+ Forebild: Forebild
+ Efterbild: Efterbild
+ Arealha: Arealha
- source:
url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksUtfordAvverk-2020-2022.zip'
@@ -196,11 +609,58 @@ layers:
path: sks/
max-age: 864000 # 10d
+ unar:
+ format: zip
+ patterns:
+ - 'sksUtfordAvverk-2020-2022.*'
+ import:
+ path: sksUtfordAvverk-2020-2022.shp
+ format: ESRI Shapefile
+ layername: sksUtfordAvverk-2020-2022
+ fields:
+ OBJECTID: objektid
+ Arendear: year
+ Beteckn: beteckn
+ Avverktyp: avverktyp
+ Skogstyp: skogstyp
+ AnmaldHa: AnmaldHa
+ SkogsodlHa: SkogsodlHa
+ Natforha: NatforHa
+ Avvdatum: date
+ KallaDatum: KallaDatum
+ KallaAreal: KallaAreal
+ Forebild: Forebild
+ Efterbild: Efterbild
+ Arealha: Arealha
- source:
url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksUtfordAvverk-2023-.zip'
max-size: 1073741824 # 1GiB
cache: sks/
+ unar:
+ format: zip
+ patterns:
+ - 'sksUtfordAvverk-2023-.*'
+ import:
+ path: sksUtfordAvverk-2023-.shp
+ format: ESRI Shapefile
+ layername: sksUtfordAvverk-2023-
+ fields:
+ OBJECTID: objektid
+ Arendear: year
+ Beteckn: beteckn
+ Avverktyp: avverktyp
+ Skogstyp: skogstyp
+ AnmaldHa: AnmaldHa
+ SkogsodlHa: SkogsodlHa
+ Natforha: NatforHa
+ Avvdatum: date
+ KallaDatum: KallaDatum
+ KallaAreal: KallaAreal
+ Forebild: Forebild
+ Efterbild: Efterbild
+ Arealha: Arealha