aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuilhem Moulin <guilhem@fripost.org>2025-04-18 11:42:07 +0200
committerGuilhem Moulin <guilhem@fripost.org>2025-04-19 19:25:16 +0200
commitc689b2d07828985e881423357c7ab42877f64909 (patch)
treefaaeef9e341f6258d25bba0963b14758eca27b84
parent2abf2297aabb355b72c6ae9e0aaf350f7a6cbe9d (diff)
Factor sources in config.yml.
This avoid duplications when the same source file is used multiple times (either by the same layer or by multiple layers). This change breaks webmap-import, but that one will be refactored shortly. It also breaks webmap-import-mrr.py, which is no longer used since mineralrattigheter.zip can be downloaded from SGU's site directly.
-rw-r--r--common.py80
-rw-r--r--config.yml359
-rwxr-xr-xwebmap-download154
3 files changed, 294 insertions, 299 deletions
diff --git a/common.py b/common.py
index da2927f..acbb5d8 100644
--- a/common.py
+++ b/common.py
@@ -1,6 +1,6 @@
#----------------------------------------------------------------------
# Backend utilities for the Klimatanalys Norr project (common module)
-# Copyright © 2024 Guilhem Moulin <info@guilhem.se>
+# Copyright © 2024-2025 Guilhem Moulin <info@guilhem.se>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -19,11 +19,10 @@
# pylint: disable=missing-module-docstring
import os
-from os import path as os_path, curdir as os_curdir, pardir as os_pardir, sep as os_sep
+from os import path as os_path, curdir as os_curdir
import sys
from fnmatch import fnmatchcase
from pathlib import Path, PosixPath
-from urllib.parse import urlparse
from stat import S_ISDIR
import math
import logging
@@ -70,11 +69,6 @@ def find_config(filename : str = 'config.yml', appname : str = 'webmap') -> Path
return p
raise MissingConfiguration(filename)
-class BadConfiguration(Exception):
- """Exception raised when there is a bad configuration"""
- def __init__(self, config_path : Path, message : str) -> Never:
- super().__init__(str(config_path) + ': ' + message)
-
def parse_config(path : Optional[Path] = None,
groupnames : Optional[list[str]] = None) -> dict[str, Any]:
"""Parse configuration file"""
@@ -82,77 +76,9 @@ def parse_config(path : Optional[Path] = None,
config_path = find_config() if path is None else path
with config_path.open(mode='r', encoding='utf-8') as fp:
config = yaml.safe_load(fp)
- layers = config.get('layers', {})
-
- # validate sources
- destinations = {}
- for name, layerdefs in layers.items():
- if isinstance(layerdefs, dict) and 'sources' not in layerdefs:
- layers[name] = { 'sources': [layerdefs] }
- for k in ['description', 'create', 'publish']:
- if k in layerdefs:
- layers[name][k] = layerdefs.pop(k)
- layerdefs = layers[name]
-
- if 'sources' not in layerdefs:
- # pylint: disable-next=broad-exception-raised
- raise Exception(f'Layer "{name}" does not have any source recipe')
-
- for sourcedef in layerdefs.get('sources', []):
- source = sourcedef.get('source', None)
- if source is None:
- continue
- download = source.get('download', None)
- if download is None:
- url = None
- dl_module = None
- elif isinstance(download, str):
- url = download
- dl_module = None
- source['download'] = download = { 'url': url }
- else:
- url = download.get('url', None)
- dl_module = download.get('module', None)
- if url is None:
- urlp = None
- else:
- urlp = urlparse(url)
- if urlp is None:
- # pylint: disable-next=broad-exception-raised
- raise Exception(f'urlparse({url}) failed')
-
- cache = source.get('cache', None)
- if cache is None or isinstance(cache, str):
- source['cache'] = { 'path': cache }
- else:
- cache = cache.get('path', None)
-
- if cache is None or cache in ['', os_curdir, os_pardir] or cache.endswith(os_sep):
- # infer filename from the source URL
- if urlp is None or urlp.path is None or urlp.path == '' or urlp.path.endswith('/'):
- # pylint: disable-next=broad-exception-raised
- raise Exception(f'Layer "{name}": Could not infer filename from URL {url}')
- p = PosixPath(urlp.path)
- if p is None or p.name is None or p.name == '':
- # pylint: disable-next=broad-exception-raised
- raise Exception(f'Invalid PosixPath({urlp.path})')
- if cache is None or cache == '':
- cache = Path()
- else:
- cache = Path(cache)
- cache = cache.joinpath(p.name)
- else:
- cache = Path(cache)
- source['cache']['path'] = cache
-
- v = { 'url': urlp, 'module': dl_module }
- if cache in destinations and destinations[cache] != v:
- # allow destination conflicts, but only when the source URL and module match
- # pylint: disable-next=broad-exception-raised
- raise Exception(f'Destination conflict for layer "{name}"')
- destinations[cache] = v
# filter layers that are not of interest
+ layers = config.get('layers', {})
if groupnames is not None:
layernames = []
layer_groups = config.get('layer-groups', {})
diff --git a/config.yml b/config.yml
index 517e117..b13f363 100644
--- a/config.yml
+++ b/config.yml
@@ -118,8 +118,125 @@ dataset:
EXTRACT_SCHEMA_FROM_LAYER_NAME: 'NO'
+downloads:
+# # List of cached paths and download recipes.
+#
+# - # URL from where to download the source file. path/to/file.gpkg can be used as
+# # an alias when path/to/file.gpkg:url is its only subkey.
+# url: 'https://example.net/path/to/file.gpkg'
+#
+# # Where to download the file.
+# path: path/to/file.gpkg
+#
+# # The maximum size to download in bytes. An error is raised when the payload
+# # size exceeds this value.
+# # (Default: 67108864, in other words 64MiB)
+# max-size: 1073741824
+#
+# # Maximum age for caching, in number of seconds ago. If the downloaded path
+# # exists and its mtime and/or ctime is newer than this value then no HTTP
+# # query is made.
+# # (Default: 21600, in other words 6h)
+# max-age: 86400
+#
+# For convenience
+#
+# - path: path/to/file1.gpkg
+# url: https://example.net/file1.gpkg
+# - path: path/to/sub/file2.gpkg
+# url: https://example.net/sub/file2.gpkg
+#
+# can be shortened as follow
+#
+# - basedir: path/to/
+# baseurl: https://example.net/
+# files:
+# - file1.gpkg
+# - sub/file2.gpkg
+
+ - basedir: nvk/nvr/
+ baseurl: https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/
+ files:
+ - TILLTRADESFORBUD.zip
+ - NP.zip
+ - NR.zip
+ - NVO.zip
+ - DVO.zip
+ - KR.zip
+ - VSO.zip
+ - LBSO.zip
+ - OBO.zip
+ - NM.zip
+ - IF.zip
+ - SPA_Rikstackande.zip
+ - HELCOM.zip
+ - Ramsar_2018.zip
+ - OSPAR.zip
+ - Varldsarv.zip
+ - biosfarsomraden.zip
+ - NVA.zip
+
+ - path: nvk/nvr/SCI_Rikstackande.zip
+ url: https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/SCI_Rikstackande.zip
+ max-size: 134217728 # 128MiB
+
+ - basedir: nvk/
+ baseurl: https://geodata.naturvardsverket.se/nedladdning/riksintresse/
+ files:
+ - RI_Naturvard.zip
+ - RI_Friluftsliv.zip
+
+ - basedir: lst/
+ baseurl: https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/
+ files:
+ - lst.LST_RI_Rorligt_friluftsliv_MB4kap2.zip
+ - lst.LST_RI_Obruten_kust_MB4kap3.zip
+ - lst.Lst_RI_Obrutet_fjall_MB4kap5.zip
+ - lst.LST_RI_Skyddade_vattendrag_MB4kap6.zip
+
+ - basedir: sks/
+ baseurl: https://geodpags.skogsstyrelsen.se/geodataport/data/
+ files:
+ - sksBiotopskydd_gpkg.zip
+ - sksNaturvardsavtal_gpkg.zip
+
+ - path: sks/sksAvverkAnm_gpkg.zip
+ url: https://geodpags.skogsstyrelsen.se/geodataport/data/sksAvverkAnm_gpkg.zip
+ max-size: 134217728 # 128MiB
+
+ - path: sks/sksUtfordAvverk_gpkg.zip
+ url: https://geodpags.skogsstyrelsen.se/geodataport/data/sksUtfordAvverk_gpkg.zip
+ max-size: 4294967296 # 4GiB
+ max-age: 216000 # 60h
+
+ - basedir: vbk/
+ baseurl: https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/
+ files:
+ - lst.vbk_vindkraftverk.zip
+ - lst.vbk_projekteringsomraden.zip
+ - lst.vbk_havsbaserad_vindkraft.zip
+
+ - basedir: sametinget/
+ baseurl: https://ext-dokument.lansstyrelsen.se/Gemensamt/Geodata/Datadistribution/SWEREF99TM/Sametinget/
+ files:
+ - Samebyarnas_betesomraden.zip
+ - Samebyarnas_markanvandningsredovisning.zip
+
+ - basedir: ren/
+ baseurl: https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/
+ files:
+ - ren.riks_ren.zip
+ - ren.omr_riks.zip
+
+ - path: mrr/mineralrattigheter.zip
+ url: https://resource.sgu.se/data/oppnadata/mineralrattigheter/mineralrattigheter.zip
+
+ - path: svk/SVK_STAMNAT.zip
+ url: https://gis-services.metria.se/svkfeed/filer/SVK_STAMNAT.zip
+
+
layers:
-# # Dictionary of layer names and source receipes in the output dataset. If a layer
+# # Dictionary of layer names and source recipes in the output dataset. If a layer
# # has a single source, then the sources singleton can be inlined.
# layer1_name:
# description: A string describing that layer
@@ -207,37 +324,10 @@ layers:
#
# sources:
# - source:
-# download:
-# # URL from where to download the source file. source:download can be used as
-# # an alias when source:download:url is its only subkey.
-# url: 'https://example.net/path/to/layer.zip'
-#
-# # The maximum size to download in bytes. An error is raised when the payload
-# # size exceeds this value.
-# # (Default: 67108864, in other words 64MiB)
-# max-size: 1073741824
-#
-# # Basename of the download module to use for that layer.
-# module: webmap-download
+# # Local source path (relative to --cachedir).
+# path: path/to/source/file.zip
#
-# cache:
-# # Local path (relative to --cachedir) where to (atomically) save the
-# # downloaded file. The same path can be used by multiple entries as long as
-# # their pairs (source:download:url, source:download:module) match. Any
-# # parent directories are created if needed. If the path is empty or ends
-# # with a '/' character then it treated as a directory and the last component
-# # of source:download:url implicitly used as filename. In that case an error
-# # is raised if no filename can be derived from the URL. source:cache can be
-# # used as an alias when source:cache:path is its only subkey.
-# path: path/to/sub/dir/
-#
-# # Maximum age for caching, in number of seconds ago. If source:cache:path
-# # exists and its mtime and/or ctime is newer than this value then no HTTP
-# # query is made.
-# # (Default: 21600, in other words 6h)
-# max-age: 86400
-#
-# # Optional extracting receipe for archived/compressed sources
+# # Optional extracting recipe for archived/compressed sources
# unar:
# # The archiving format (only 'zip' is currently supported)
# format: zip
@@ -250,7 +340,7 @@ layers:
# import:
# # Path for the dataset holding the source layer (relative to the archive root
# # for archived sources, and to --cachedir otherwise). The value is optional
-# # for non-archived sources, and defaults to source:cache:path if omitted.
+# # for non-archived sources, and defaults to source:path if omitted.
# path: path/to/source/layer.shp
#
# # Format of the source layer to limit allowed driver when opening the dataset.
@@ -315,7 +405,7 @@ layers:
comment: Tvåsiffrig kod för län
source:
# https://www.lantmateriet.se/sv/geodata/vara-produkter/produktlista/topografi-250-nedladdning-vektor/
- cache: administrativindelning_sverige.zip
+ path: administrativindelning_sverige.zip
unar:
format: zip
import:
@@ -353,7 +443,7 @@ layers:
comment: Fyrsiffrig kod för kommun
source:
# https://www.lantmateriet.se/sv/geodata/vara-produkter/produktlista/topografi-250-nedladdning-vektor/
- cache: administrativindelning_sverige.zip
+ path: administrativindelning_sverige.zip
unar:
format: zip
import:
@@ -411,8 +501,7 @@ layers:
type: String
width: 254
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/TILLTRADESFORBUD.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/TILLTRADESFORBUD.zip
unar:
format: zip
patterns:
@@ -506,8 +595,7 @@ layers:
nullable: false
comment: Beslutsmyndighet
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/NP.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/NP.zip
unar:
format: zip
patterns:
@@ -605,8 +693,7 @@ layers:
nullable: false
comment: Beslutsmyndighet
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/NR.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/NR.zip
unar:
format: zip
patterns:
@@ -710,8 +797,7 @@ layers:
nullable: false
comment: Beslutsmyndighet
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/NVO.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/NVO.zip
unar:
format: zip
patterns:
@@ -810,8 +896,7 @@ layers:
nullable: false
comment: Beslutsmyndighet
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/DVO.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/DVO.zip
unar:
format: zip
patterns:
@@ -909,8 +994,7 @@ layers:
nullable: false
comment: Beslutsmyndighet
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/KR.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/KR.zip
unar:
format: zip
patterns:
@@ -1016,8 +1100,7 @@ layers:
nullable: false
comment: Beslutsmyndighet
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/VSO.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/VSO.zip
unar:
format: zip
patterns:
@@ -1114,8 +1197,7 @@ layers:
nullable: false
comment: Beslutsmyndighet
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/LBSO.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/LBSO.zip
unar:
format: zip
patterns:
@@ -1179,8 +1261,7 @@ layers:
width: 254
comment: Länk till visningsformulär i Skogens Pärlor
source:
- download: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksBiotopskydd_gpkg.zip'
- cache: sks/
+ path: sks/sksBiotopskydd_gpkg.zip
unar:
format: zip
import:
@@ -1276,8 +1357,7 @@ layers:
nullable: false
comment: Beslutsmyndighet
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/OBO.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/OBO.zip
unar:
format: zip
patterns:
@@ -1363,8 +1443,7 @@ layers:
nullable: false
comment: Beslutsmyndighet
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/NM.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/NM.zip
unar:
format: zip
patterns:
@@ -1460,8 +1539,7 @@ layers:
nullable: false
comment: Beslutsmyndighet
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/NM.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/NM.zip
unar:
format: zip
patterns:
@@ -1558,8 +1636,7 @@ layers:
nullable: false
comment: Beslutsmyndighet
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/IF.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/IF.zip
unar:
format: zip
patterns:
@@ -1632,8 +1709,7 @@ layers:
type: String
width: 254
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/SPA_Rikstackande.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/SPA_Rikstackande.zip
unar:
format: zip
patterns:
@@ -1703,10 +1779,7 @@ layers:
width: 254
sources:
- source:
- download:
- url: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/SCI_Rikstackande.zip'
- max-size: 134217728 # 128MiB
- cache: nvk/nvr/
+ path: nvk/nvr/SCI_Rikstackande.zip
unar:
format: zip
patterns:
@@ -1716,10 +1789,7 @@ layers:
format: ESRI Shapefile
layername: SCI_alvar_AC_lan
- source:
- download:
- url: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/SCI_Rikstackande.zip'
- max-size: 134217728 # 128MiB
- cache: nvk/nvr/
+ path: nvk/nvr/SCI_Rikstackande.zip
unar:
format: zip
patterns:
@@ -1729,10 +1799,7 @@ layers:
format: ESRI Shapefile
layername: SCI_alvar_BD_lan
- source:
- download:
- url: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/SCI_Rikstackande.zip'
- max-size: 134217728 # 128MiB
- cache: nvk/nvr/
+ path: nvk/nvr/SCI_Rikstackande.zip
unar:
format: zip
patterns:
@@ -1758,8 +1825,7 @@ layers:
width: 62
nullable: false
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/HELCOM.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/HELCOM.zip
unar:
format: zip
patterns:
@@ -1820,8 +1886,7 @@ layers:
width: 254
nullable: false
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/Ramsar_2018.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/Ramsar_2018.zip
unar:
format: zip
patterns:
@@ -1863,8 +1928,7 @@ layers:
unique: true
nullable: false
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/OSPAR.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/OSPAR.zip
unar:
format: zip
patterns:
@@ -1887,8 +1951,7 @@ layers:
unique: true
nullable: false
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/Varldsarv.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/Varldsarv.zip
unar:
format: zip
patterns:
@@ -1920,8 +1983,7 @@ layers:
width: 254
nullable: false
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/biosfarsomraden.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/biosfarsomraden.zip
unar:
format: zip
patterns:
@@ -1966,8 +2028,7 @@ layers:
type: Date
nullable: false
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/NVA.zip'
- cache: nvk/nvr/
+ path: nvk/nvr/NVA.zip
unar:
format: zip
patterns:
@@ -2029,8 +2090,7 @@ layers:
type: String
width: 64
source:
- download: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksNaturvardsavtal_gpkg.zip'
- cache: sks/
+ path: sks/sksNaturvardsavtal_gpkg.zip
unar:
format: zip
import:
@@ -2122,10 +2182,7 @@ layers:
nullable: false
comment: Avverkningsamalan/NyAvverkningsanmalan
source:
- download:
- url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksAvverkAnm_gpkg.zip'
- max-size: 134217728 # 128MiB
- cache: sks/
+ path: sks/sksAvverkAnm_gpkg.zip
unar:
format: zip
import:
@@ -2223,12 +2280,7 @@ layers:
nullable: false
comment: Areal för ytan (ha)
source:
- download:
- url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksUtfordAvverk_gpkg.zip'
- max-size: 4294967296 # 4GiB
- cache:
- path: sks/
- max-age: 216000 # 60h
+ path: sks/sksUtfordAvverk_gpkg.zip
unar:
format: zip
import:
@@ -2294,8 +2346,7 @@ layers:
type: Date
nullable: false
source:
- download: 'https://ext-dokument.lansstyrelsen.se/Gemensamt/Geodata/Datadistribution/SWEREF99TM/Sametinget/Samebyarnas_betesomraden.zip'
- cache: sametinget/
+ path: sametinget/Samebyarnas_betesomraden.zip
unar:
format: zip
patterns:
@@ -2374,8 +2425,7 @@ layers:
unique: true
nullable: false
source:
- download: 'http://ext-dokument.lansstyrelsen.se/Gemensamt/Geodata/Datadistribution/SWEREF99TM/Sametinget/Samebyarnas_markanvandningsredovisning.zip'
- cache: sametinget/
+ path: sametinget/Samebyarnas_markanvandningsredovisning.zip
unar:
format: zip
patterns:
@@ -2418,8 +2468,7 @@ layers:
unique: true
nullable: false
source:
- download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/ren.riks_ren.zip'
- cache: ren/
+ path: ren/ren.riks_ren.zip
unar:
format: zip
patterns:
@@ -2474,8 +2523,7 @@ layers:
unique: true
nullable: false
source:
- download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/ren.omr_riks.zip'
- cache: ren/
+ path: ren/ren.omr_riks.zip
unar:
format: zip
patterns:
@@ -2606,8 +2654,7 @@ layers:
subtype: Bool
nullable: false
source:
- download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/lst.vbk_vindkraftverk.zip'
- cache: vbk/
+ path: vbk/lst.vbk_vindkraftverk.zip
unar:
format: zip
patterns:
@@ -2790,8 +2837,7 @@ layers:
subtype: Bool
nullable: false
source:
- download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/lst.vbk_projekteringsomraden.zip'
- cache: vbk/
+ path: vbk/lst.vbk_projekteringsomraden.zip
unar:
format: zip
patterns:
@@ -3014,8 +3060,7 @@ layers:
subtype: Bool
nullable: false
source:
- download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/lst.vbk_havsbaserad_vindkraft.zip'
- cache: vbk/
+ path: vbk/lst.vbk_havsbaserad_vindkraft.zip
unar:
format: zip
import:
@@ -3122,10 +3167,7 @@ layers:
nullable: false
comment: Datum för senaste uppdatering
source:
- download:
- module: webmap-download-mrr
- layername: 'MRR:SE.GOV.SGU.MRR.BEARBETNINGSKONCESSIONER_APPLIED_VY'
- cache: mrr/bearbetningskoncessioner_applied.geojson
+ path: mrr/mineralrattigheter.zip
import:
format: GeoJSON
layername: 'MRR:SE.GOV.SGU.MRR.BEARBETNINGSKONCESSIONER_APPLIED_VY'
@@ -3182,10 +3224,7 @@ layers:
nullable: false
comment: Datum för senaste uppdatering
source:
- download:
- module: webmap-download-mrr
- layername: 'MRR:SE.GOV.SGU.MRR.BEARBETNINGSKONCESSIONER_APPROVED_VY'
- cache: mrr/bearbetningskoncessioner_approved.geojson
+ path: mrr/mineralrattigheter.zip
import:
format: GeoJSON
layername: 'MRR:SE.GOV.SGU.MRR.BEARBETNINGSKONCESSIONER_APPROVED_VY'
@@ -3231,10 +3270,7 @@ layers:
nullable: false
comment: Datum för senaste uppdatering
source:
- download:
- module: webmap-download-mrr
- layername: 'MRR:SE.GOV.SGU.MRR.MARKANVISNINGAR_VY'
- cache: mrr/markanvisningar.geojson
+ path: mrr/mineralrattigheter.zip
import:
format: GeoJSON
layername: 'MRR:SE.GOV.SGU.MRR.MARKANVISNINGAR_VY'
@@ -3287,10 +3323,7 @@ layers:
nullable: false
comment: Datum för senaste uppdatering
source:
- download:
- module: webmap-download-mrr
- layername: 'MRR:SE.GOV.SGU.MRR.MINERAL_APPLIED_VY'
- cache: mrr/mineral_applied.geojson
+ path: mrr/mineralrattigheter.zip
import:
format: GeoJSON
layername: 'MRR:SE.GOV.SGU.MRR.MINERAL_APPLIED_VY'
@@ -3351,10 +3384,7 @@ layers:
nullable: false
comment: Datum för senaste uppdatering
source:
- download:
- module: webmap-download-mrr
- layername: 'MRR:SE.GOV.SGU.MRR.MINERAL_APPROVED_VY'
- cache: mrr/mineral_approved.geojson
+ path: mrr/mineralrattigheter.zip
import:
format: GeoJSON
layername: 'MRR:SE.GOV.SGU.MRR.MINERAL_APPROVED_VY'
@@ -3379,30 +3409,19 @@ layers:
# 'mrr:mineral_expired':
# source:
-# download:
-# module: webmap-download-mrr
-# layername: 'MRR:SE.GOV.SGU.MRR.MINERAL_EXPIRED_2'
-# cache: mrr/mineral_expired.geojson
+# path: mrr/mineralrattigheter.zip
#
# 'mrr:mineral_prohibited':
# source:
-# download:
-# module: webmap-download-mrr
-# layername: 'MRR:SE.GOV.SGU.MRR.MINERAL_PROHIBITED_2'
-# cache: mrr/mineral_prohibited.geojson
+# path: mrr/mineralrattigheter.zip
#
# 'mrr:ogd_expired':
# source:
-# download:
-# module: webmap-download-mrr
-# layername: 'MRR:SE.GOV.SGU.MRR.OGD_EXPIRED_2'
-# cache: mrr/ogd_expired.geojson
+# path: mrr/mineralrattigheter.zip
+#
# 'mrr:ogd_prohibited':
# source:
-# download:
-# module: webmap-download-mrr
-# layername: 'MRR:SE.GOV.SGU.MRR.OGD_PROHIBITED_2'
-# cache: mrr/ogd_prohibited.geojson
+# path: mrr/mineralrattigheter.zip
'mrr:olja_gas_diamant_applied':
description: Undersökningstillstånd, olja, gas och diamant, ansökta (SGU)
@@ -3438,10 +3457,7 @@ layers:
nullable: false
comment: Datum för senaste uppdatering
source:
- download:
- module: webmap-download-mrr
- layername: 'MRR:SE.GOV.SGU.MRR.OLJA_GAS_DIAMANT_APPLIED_VY'
- cache: mrr/olja_gas_diamant_applied.geojson
+ path: mrr/mineralrattigheter.zip
import:
format: GeoJSON
layername: 'MRR:SE.GOV.SGU.MRR.OLJA_GAS_DIAMANT_APPLIED_VY'
@@ -3502,10 +3518,7 @@ layers:
nullable: false
comment: Datum för senaste uppdatering
source:
- download:
- module: webmap-download-mrr
- layername: 'MRR:SE.GOV.SGU.MRR.OLJA_GAS_DIAMANT_APPROVED_VY'
- cache: mrr/olja_gas_diamant_approved.geojson
+ path: mrr/mineralrattigheter.zip
import:
format: GeoJSON
layername: 'MRR:SE.GOV.SGU.MRR.OLJA_GAS_DIAMANT_APPROVED_VY'
@@ -3565,10 +3578,7 @@ layers:
nullable: false
comment: Datum för senaste uppdatering
source:
- download:
- module: webmap-download-mrr
- layername: 'MRR:SE.GOV.SGU.MRR.TORVKONCESSIONER_VY'
- cache: mrr/torvkoncessioner.geojson
+ path: mrr/mineralrattigheter.zip
import:
format: GeoJSON
layername: 'MRR:SE.GOV.SGU.MRR.TORVKONCESSIONER_VY'
@@ -3634,8 +3644,7 @@ layers:
unique: false
nullable: false
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/riksintresse/RI_Naturvard.zip'
- cache: nvk/
+ path: nvk/RI_Naturvard.zip
unar:
format: zip
patterns:
@@ -3702,8 +3711,7 @@ layers:
type: Real
comment: Areal vatten i hektar
source:
- download: 'https://geodata.naturvardsverket.se/nedladdning/riksintresse/RI_Friluftsliv.zip'
- cache: nvk/
+ path: nvk/RI_Friluftsliv.zip
unar:
format: zip
patterns:
@@ -3747,8 +3755,7 @@ layers:
type: String
width: 254
source:
- download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/lst.LST_RI_Rorligt_friluftsliv_MB4kap2.zip'
- cache: lst/
+ path: lst/lst.LST_RI_Rorligt_friluftsliv_MB4kap2.zip
unar:
format: zip
patterns:
@@ -3795,8 +3802,7 @@ layers:
type: String
width: 254
source:
- download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/lst.LST_RI_Obruten_kust_MB4kap3.zip'
- cache: lst/
+ path: lst/lst.LST_RI_Obruten_kust_MB4kap3.zip
unar:
format: zip
patterns:
@@ -3838,8 +3844,7 @@ layers:
type: String
width: 254
source:
- download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/lst.Lst_RI_Obrutet_fjall_MB4kap5.zip'
- cache: lst/
+ path: lst/lst.Lst_RI_Obrutet_fjall_MB4kap5.zip
unar:
format: zip
patterns:
@@ -3887,8 +3892,7 @@ layers:
type: String
width: 254
source:
- download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/lst.LST_RI_Skyddade_vattendrag_MB4kap6.zip'
- cache: lst/
+ path: lst/lst.LST_RI_Skyddade_vattendrag_MB4kap6.zip
unar:
format: zip
patterns:
@@ -3916,8 +3920,7 @@ layers:
# XXX convert to V?
type: Integer
source:
- download: 'https://gis-services.metria.se/svkfeed/filer/SVK_STAMNAT.zip'
- cache: svk/
+ path: svk/SVK_STAMNAT.zip
unar:
format: zip
patterns:
@@ -3943,8 +3946,7 @@ layers:
geometry-type: POINTZ
fields: []
source:
- download: 'https://gis-services.metria.se/svkfeed/filer/SVK_STAMNAT.zip'
- cache: svk/
+ path: svk/SVK_STAMNAT.zip
unar:
format: zip
patterns:
@@ -3961,8 +3963,7 @@ layers:
geometry-type: MULTIPOLYGONZ
fields: []
source:
- download: 'https://gis-services.metria.se/svkfeed/filer/SVK_STAMNAT.zip'
- cache: svk/
+ path: svk/SVK_STAMNAT.zip
unar:
format: zip
patterns:
@@ -4168,7 +4169,7 @@ layers:
comment: Distance to the closest SvK station or production dam
source:
#download: 'https://opendata-view.smhi.se/SMHI_vatten_DamOrWeir/HY.PhysicalWaters.ManMadeObject/ows?service=WFS&request=GetFeature&typeNames=HY.PhysicalWaters.ManMadeObject&outputFormat=SHAPE-ZIP&srsName=EPSG:3006&format_options=charset:utf-8'
- cache: custom/HY_PhysicalWaters_ManMadeObject.zip
+ path: custom/HY_PhysicalWaters_ManMadeObject.zip
unar:
format: zip
patterns:
diff --git a/webmap-download b/webmap-download
index 2d31a19..a8a444a 100755
--- a/webmap-download
+++ b/webmap-download
@@ -2,7 +2,7 @@
#----------------------------------------------------------------------
# Backend utilities for the Klimatanalys Norr project (download common layers)
-# Copyright © 2024 Guilhem Moulin <info@guilhem.se>
+# Copyright © 2024-2025 Guilhem Moulin <info@guilhem.se>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -21,7 +21,20 @@
# pylint: disable=invalid-name,missing-module-docstring
# pylint: enable=invalid-name
-from os import O_RDONLY, O_WRONLY, O_CREAT, O_TRUNC, O_CLOEXEC, O_PATH, O_DIRECTORY, O_TMPFILE
+from os import (
+ O_RDONLY,
+ O_WRONLY,
+ O_CREAT,
+ O_TRUNC,
+ O_CLOEXEC,
+ O_PATH,
+ O_DIRECTORY,
+ O_TMPFILE,
+ path as os_path,
+ curdir as os_curdir,
+ pardir as os_pardir,
+ sep as os_sep
+)
import os
import sys
from fcntl import flock, LOCK_EX
@@ -32,7 +45,7 @@ import itertools
from pathlib import Path
from email.utils import parsedate_to_datetime, formatdate
from hashlib import sha256
-from typing import Any, Optional, NoReturn, Never
+from typing import Optional, NoReturn, Never
import requests
import common
@@ -64,7 +77,7 @@ class DownloadTooLarge(Exception):
# pylint: disable-next=dangerous-default-value
def download(dest : str,
- dl : Optional[dict[str, Any]],
+ dl : dict[str, dict[str, str|int]],
dir_fd : Optional[int] = None,
headers : dict[str, str] = {},
session : Optional[requests.sessions.Session] = None,
@@ -109,7 +122,7 @@ def download(dest : str,
# XXX we can't use TemporaryFile as it uses O_EXCL, cf.
# https://discuss.python.org/t/temporaryfile-contextmanager-that-allows-creating-a-directory-entry-on-success/19094/2
- fd = os.open(os.path.dirname(dest), O_WRONLY|O_CLOEXEC|O_TMPFILE, mode=0o644, dir_fd=dir_fd)
+ fd = os.open(os_path.dirname(dest), O_WRONLY|O_CLOEXEC|O_TMPFILE, mode=0o644, dir_fd=dir_fd)
try:
if progress is not None:
pbar = progress(
@@ -157,12 +170,77 @@ def download(dest : str,
common.format_time(elapsed),
common.format_bytes(int(size/elapsed)))
+class BadConfiguration(Exception):
+ """Exception raised when there is a bad configuration"""
+ def __init__(self, message : str, config_path : Optional[Path] = None) -> Never:
+ if config_path is not None:
+ message = str(config_path) + ': ' + message
+ super().__init__(message)
+
+def _check_key_type(k : str, v : str, known_keys : list[type, tuple[set[str]]]) -> bool:
+ for t, ks in known_keys:
+ if k in ks and isinstance(v, t):
+ return True
+ return False
+
+def parse_config_dl(downloads) -> dict[str, dict[str, str|int]]:
+ """Parse and validate the "downloads" section from the configuration dictionary"""
+
+ if not isinstance(downloads, list):
+ raise BadConfiguration(f'Invalid download recipe: {downloads}')
+
+ known_keys = [
+ (str, {'path', 'url'}),
+ (int, {'max-age', 'max-size'})
+ ]
+
+ destinations = {}
+ known_keys_set = {k for _,ks in known_keys for k in ks}
+ for dl in downloads:
+ if 'url' in dl:
+ dls = [dl]
+ elif 'basedir' in dl and 'baseurl' in dl and 'files' in dl and 'path' not in dl:
+ dls = []
+ for filename in dl['files']:
+ dl2 = {
+ 'path' : os_path.join(dl['basedir'], filename),
+ 'url' : dl['baseurl'] + filename
+ }
+ for k, v in dl.items():
+ if k not in ('basedir', 'baseurl', 'files'):
+ dl2[k] = v
+ dls.append(dl2)
+ else:
+ raise BadConfiguration(f'Invalid download recipe: {dl}')
+
+ for dl in dls:
+ path = dl.get('path', None)
+ if path is None or path in ('', os_curdir, os_pardir) or path.endswith(os_sep):
+ raise BadConfiguration(f'Invalid destination path "{path}"')
+ if path in destinations:
+ raise BadConfiguration(f'Duplicate download recipe for "{path}"')
+ dl2 = {}
+ for k, v in dl.items():
+ if k == 'path':
+ continue
+ if k not in known_keys_set:
+ logging.warning('Ignoring unknown setting "%s" in download recipe for "%s"',
+ k, path)
+ elif not _check_key_type(k, v, known_keys):
+ logging.warning('Ignoring setting "%s" in download recipe for "%s"'
+ ' (invalid type)', k, path)
+ else:
+ dl2[k] = v
+ destinations[path] = dl2
+
+ return destinations
+
def main() -> NoReturn: # pylint: disable=missing-function-docstring
- common.init_logger(app=os.path.basename(__file__), level=logging.INFO)
+ common.init_logger(app=os_path.basename(__file__), level=logging.INFO)
parser = argparse.ArgumentParser(description='Download or update GIS layers.')
- parser.add_argument('--cachedir', default=os.curdir,
- help=f'destination directory for downloaded files (default: {os.curdir})')
+ parser.add_argument('--cachedir', default=os_curdir,
+ help=f'destination directory for downloaded files (default: {os_curdir})')
parser.add_argument('--lockdir', default=None,
help='optional directory for lock files')
parser.add_argument('--quiet', action='store_true',
@@ -184,13 +262,24 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring
requests_log.propagate = True
config = common.parse_config(groupnames=None if args.groupname == [] else args.groupname)
+ downloads = parse_config_dl(config.get('downloads', []))
- sources = []
- for name, layerdefs in config.get('layers', {}).items():
- for layerdef in layerdefs['sources']:
- sourcedef = layerdef.get('source', {})
- sourcedef['layername'] = name
- sources.append(sourcedef)
+ rv = 0
+ download_paths = set()
+ for layername, layerdef in config.get('layers', {}).items():
+ source = layerdef.get('source', None)
+ if source is None:
+ logging.error('Layer "%s" has no source, ignoring', layername)
+ rv = 1
+ continue
+ path = source.get('path', None)
+ if path is None:
+ logging.error('Layer "%s" has no source path, ignoring', layername)
+ rv = 1
+ elif path not in downloads:
+ logging.warning('Ignoring unknown source of path "%s" from layer "%s"', path, layername)
+ else:
+ download_paths.add(path)
if args.quiet or not sys.stderr.isatty():
pbar = None
@@ -205,27 +294,8 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring
session_requests = requests.Session()
- rv = 0
- downloads = set()
- for source in sources:
- dl = source.get('download', None)
- dl_module = None if dl is None else dl.get('module', None)
- if dl_module is None:
- fetch = download
- else:
- dl_module = __import__(dl_module)
- fetch = dl_module.download
-
- cache = source.get('cache', None)
- dest = None if cache is None else cache.get('path', None)
- if dest is None:
- continue
-
- dest = str(dest) # convert from Path()
- if dest in downloads:
- logging.info('%s was already downloaded, skipping', dest)
- continue
-
+ for dest in download_paths:
+ dl = downloads[dest]
headers = {}
user_agent = config.get('User-Agent', None)
if user_agent is not None:
@@ -233,7 +303,7 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring
try:
# create parent directories
- destdir = os.path.dirname(dest)
+ destdir = os_path.dirname(dest)
common.makedirs(destdir, mode=0o755, dir_fd=destdir_fd, exist_ok=True)
# place an exclusive lock on a lockfile as the destination can be used by other layers
@@ -253,7 +323,7 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring
# the file doesn't exist, or stat() failed for some reason
pass
else:
- max_age = cache.get('max-age', 6*3600) # 6h
+ max_age = dl.get('max-age', 6*3600) # 6h
if max_age is not None:
s = max_age + max(st.st_ctime, st.st_mtime) - time()
if s > 0:
@@ -261,16 +331,14 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring
dest, common.format_time(s))
continue
headers['If-Modified-Since'] = formatdate(timeval=st.st_mtime, localtime=False, usegmt=True)
- fetch(dest, dl, dir_fd=destdir_fd,
- headers=headers, session=session_requests,
- progress=pbar)
- downloads.add(dest)
+ download(dest, dl, dir_fd=destdir_fd,
+ headers=headers, session=session_requests,
+ progress=pbar)
finally:
if lockdir_fd is not None:
os.close(lock_fd)
except Exception: # pylint: disable=broad-exception-caught
- logging.exception('Could not download %s as %s',
- dl.get('url', source['layername']), dest)
+ logging.exception('Could not download %s as %s', dl.get('url', '[N/A]'), dest)
if args.exit_code:
rv = 1
sys.exit(rv)