diff options
-rw-r--r-- | common.py | 80 | ||||
-rw-r--r-- | config.yml | 359 | ||||
-rwxr-xr-x | webmap-download | 154 |
3 files changed, 294 insertions, 299 deletions
@@ -1,6 +1,6 @@ #---------------------------------------------------------------------- # Backend utilities for the Klimatanalys Norr project (common module) -# Copyright © 2024 Guilhem Moulin <info@guilhem.se> +# Copyright © 2024-2025 Guilhem Moulin <info@guilhem.se> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,11 +19,10 @@ # pylint: disable=missing-module-docstring import os -from os import path as os_path, curdir as os_curdir, pardir as os_pardir, sep as os_sep +from os import path as os_path, curdir as os_curdir import sys from fnmatch import fnmatchcase from pathlib import Path, PosixPath -from urllib.parse import urlparse from stat import S_ISDIR import math import logging @@ -70,11 +69,6 @@ def find_config(filename : str = 'config.yml', appname : str = 'webmap') -> Path return p raise MissingConfiguration(filename) -class BadConfiguration(Exception): - """Exception raised when there is a bad configuration""" - def __init__(self, config_path : Path, message : str) -> Never: - super().__init__(str(config_path) + ': ' + message) - def parse_config(path : Optional[Path] = None, groupnames : Optional[list[str]] = None) -> dict[str, Any]: """Parse configuration file""" @@ -82,77 +76,9 @@ def parse_config(path : Optional[Path] = None, config_path = find_config() if path is None else path with config_path.open(mode='r', encoding='utf-8') as fp: config = yaml.safe_load(fp) - layers = config.get('layers', {}) - - # validate sources - destinations = {} - for name, layerdefs in layers.items(): - if isinstance(layerdefs, dict) and 'sources' not in layerdefs: - layers[name] = { 'sources': [layerdefs] } - for k in ['description', 'create', 'publish']: - if k in layerdefs: - layers[name][k] = layerdefs.pop(k) - layerdefs = layers[name] - - if 'sources' not in layerdefs: - # pylint: disable-next=broad-exception-raised - raise Exception(f'Layer "{name}" does not have any source recipe') - - for sourcedef in layerdefs.get('sources', []): - source = sourcedef.get('source', None) - if source is None: - continue - download = source.get('download', None) - if download is None: - url = None - dl_module = None - elif isinstance(download, str): - url = download - dl_module = None - source['download'] = download = { 'url': url } - else: - url = download.get('url', None) - dl_module = download.get('module', None) - if url is None: - urlp = None - else: - urlp = urlparse(url) - if urlp is None: - # pylint: disable-next=broad-exception-raised - raise Exception(f'urlparse({url}) failed') - - cache = source.get('cache', None) - if cache is None or isinstance(cache, str): - source['cache'] = { 'path': cache } - else: - cache = cache.get('path', None) - - if cache is None or cache in ['', os_curdir, os_pardir] or cache.endswith(os_sep): - # infer filename from the source URL - if urlp is None or urlp.path is None or urlp.path == '' or urlp.path.endswith('/'): - # pylint: disable-next=broad-exception-raised - raise Exception(f'Layer "{name}": Could not infer filename from URL {url}') - p = PosixPath(urlp.path) - if p is None or p.name is None or p.name == '': - # pylint: disable-next=broad-exception-raised - raise Exception(f'Invalid PosixPath({urlp.path})') - if cache is None or cache == '': - cache = Path() - else: - cache = Path(cache) - cache = cache.joinpath(p.name) - else: - cache = Path(cache) - source['cache']['path'] = cache - - v = { 'url': urlp, 'module': dl_module } - if cache in destinations and destinations[cache] != v: - # allow destination conflicts, but only when the source URL and module match - # pylint: disable-next=broad-exception-raised - raise Exception(f'Destination conflict for layer "{name}"') - destinations[cache] = v # filter layers that are not of interest + layers = config.get('layers', {}) if groupnames is not None: layernames = [] layer_groups = config.get('layer-groups', {}) @@ -118,8 +118,125 @@ dataset: EXTRACT_SCHEMA_FROM_LAYER_NAME: 'NO' +downloads: +# # List of cached paths and download recipes. +# +# - # URL from where to download the source file. path/to/file.gpkg can be used as +# # an alias when path/to/file.gpkg:url is its only subkey. +# url: 'https://example.net/path/to/file.gpkg' +# +# # Where to download the file. +# path: path/to/file.gpkg +# +# # The maximum size to download in bytes. An error is raised when the payload +# # size exceeds this value. +# # (Default: 67108864, in other words 64MiB) +# max-size: 1073741824 +# +# # Maximum age for caching, in number of seconds ago. If the downloaded path +# # exists and its mtime and/or ctime is newer than this value then no HTTP +# # query is made. +# # (Default: 21600, in other words 6h) +# max-age: 86400 +# +# For convenience +# +# - path: path/to/file1.gpkg +# url: https://example.net/file1.gpkg +# - path: path/to/sub/file2.gpkg +# url: https://example.net/sub/file2.gpkg +# +# can be shortened as follow +# +# - basedir: path/to/ +# baseurl: https://example.net/ +# files: +# - file1.gpkg +# - sub/file2.gpkg + + - basedir: nvk/nvr/ + baseurl: https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/ + files: + - TILLTRADESFORBUD.zip + - NP.zip + - NR.zip + - NVO.zip + - DVO.zip + - KR.zip + - VSO.zip + - LBSO.zip + - OBO.zip + - NM.zip + - IF.zip + - SPA_Rikstackande.zip + - HELCOM.zip + - Ramsar_2018.zip + - OSPAR.zip + - Varldsarv.zip + - biosfarsomraden.zip + - NVA.zip + + - path: nvk/nvr/SCI_Rikstackande.zip + url: https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/SCI_Rikstackande.zip + max-size: 134217728 # 128MiB + + - basedir: nvk/ + baseurl: https://geodata.naturvardsverket.se/nedladdning/riksintresse/ + files: + - RI_Naturvard.zip + - RI_Friluftsliv.zip + + - basedir: lst/ + baseurl: https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/ + files: + - lst.LST_RI_Rorligt_friluftsliv_MB4kap2.zip + - lst.LST_RI_Obruten_kust_MB4kap3.zip + - lst.Lst_RI_Obrutet_fjall_MB4kap5.zip + - lst.LST_RI_Skyddade_vattendrag_MB4kap6.zip + + - basedir: sks/ + baseurl: https://geodpags.skogsstyrelsen.se/geodataport/data/ + files: + - sksBiotopskydd_gpkg.zip + - sksNaturvardsavtal_gpkg.zip + + - path: sks/sksAvverkAnm_gpkg.zip + url: https://geodpags.skogsstyrelsen.se/geodataport/data/sksAvverkAnm_gpkg.zip + max-size: 134217728 # 128MiB + + - path: sks/sksUtfordAvverk_gpkg.zip + url: https://geodpags.skogsstyrelsen.se/geodataport/data/sksUtfordAvverk_gpkg.zip + max-size: 4294967296 # 4GiB + max-age: 216000 # 60h + + - basedir: vbk/ + baseurl: https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/ + files: + - lst.vbk_vindkraftverk.zip + - lst.vbk_projekteringsomraden.zip + - lst.vbk_havsbaserad_vindkraft.zip + + - basedir: sametinget/ + baseurl: https://ext-dokument.lansstyrelsen.se/Gemensamt/Geodata/Datadistribution/SWEREF99TM/Sametinget/ + files: + - Samebyarnas_betesomraden.zip + - Samebyarnas_markanvandningsredovisning.zip + + - basedir: ren/ + baseurl: https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/ + files: + - ren.riks_ren.zip + - ren.omr_riks.zip + + - path: mrr/mineralrattigheter.zip + url: https://resource.sgu.se/data/oppnadata/mineralrattigheter/mineralrattigheter.zip + + - path: svk/SVK_STAMNAT.zip + url: https://gis-services.metria.se/svkfeed/filer/SVK_STAMNAT.zip + + layers: -# # Dictionary of layer names and source receipes in the output dataset. If a layer +# # Dictionary of layer names and source recipes in the output dataset. If a layer # # has a single source, then the sources singleton can be inlined. # layer1_name: # description: A string describing that layer @@ -207,37 +324,10 @@ layers: # # sources: # - source: -# download: -# # URL from where to download the source file. source:download can be used as -# # an alias when source:download:url is its only subkey. -# url: 'https://example.net/path/to/layer.zip' -# -# # The maximum size to download in bytes. An error is raised when the payload -# # size exceeds this value. -# # (Default: 67108864, in other words 64MiB) -# max-size: 1073741824 -# -# # Basename of the download module to use for that layer. -# module: webmap-download +# # Local source path (relative to --cachedir). +# path: path/to/source/file.zip # -# cache: -# # Local path (relative to --cachedir) where to (atomically) save the -# # downloaded file. The same path can be used by multiple entries as long as -# # their pairs (source:download:url, source:download:module) match. Any -# # parent directories are created if needed. If the path is empty or ends -# # with a '/' character then it treated as a directory and the last component -# # of source:download:url implicitly used as filename. In that case an error -# # is raised if no filename can be derived from the URL. source:cache can be -# # used as an alias when source:cache:path is its only subkey. -# path: path/to/sub/dir/ -# -# # Maximum age for caching, in number of seconds ago. If source:cache:path -# # exists and its mtime and/or ctime is newer than this value then no HTTP -# # query is made. -# # (Default: 21600, in other words 6h) -# max-age: 86400 -# -# # Optional extracting receipe for archived/compressed sources +# # Optional extracting recipe for archived/compressed sources # unar: # # The archiving format (only 'zip' is currently supported) # format: zip @@ -250,7 +340,7 @@ layers: # import: # # Path for the dataset holding the source layer (relative to the archive root # # for archived sources, and to --cachedir otherwise). The value is optional -# # for non-archived sources, and defaults to source:cache:path if omitted. +# # for non-archived sources, and defaults to source:path if omitted. # path: path/to/source/layer.shp # # # Format of the source layer to limit allowed driver when opening the dataset. @@ -315,7 +405,7 @@ layers: comment: Tvåsiffrig kod för län source: # https://www.lantmateriet.se/sv/geodata/vara-produkter/produktlista/topografi-250-nedladdning-vektor/ - cache: administrativindelning_sverige.zip + path: administrativindelning_sverige.zip unar: format: zip import: @@ -353,7 +443,7 @@ layers: comment: Fyrsiffrig kod för kommun source: # https://www.lantmateriet.se/sv/geodata/vara-produkter/produktlista/topografi-250-nedladdning-vektor/ - cache: administrativindelning_sverige.zip + path: administrativindelning_sverige.zip unar: format: zip import: @@ -411,8 +501,7 @@ layers: type: String width: 254 source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/TILLTRADESFORBUD.zip' - cache: nvk/nvr/ + path: nvk/nvr/TILLTRADESFORBUD.zip unar: format: zip patterns: @@ -506,8 +595,7 @@ layers: nullable: false comment: Beslutsmyndighet source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/NP.zip' - cache: nvk/nvr/ + path: nvk/nvr/NP.zip unar: format: zip patterns: @@ -605,8 +693,7 @@ layers: nullable: false comment: Beslutsmyndighet source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/NR.zip' - cache: nvk/nvr/ + path: nvk/nvr/NR.zip unar: format: zip patterns: @@ -710,8 +797,7 @@ layers: nullable: false comment: Beslutsmyndighet source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/NVO.zip' - cache: nvk/nvr/ + path: nvk/nvr/NVO.zip unar: format: zip patterns: @@ -810,8 +896,7 @@ layers: nullable: false comment: Beslutsmyndighet source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/DVO.zip' - cache: nvk/nvr/ + path: nvk/nvr/DVO.zip unar: format: zip patterns: @@ -909,8 +994,7 @@ layers: nullable: false comment: Beslutsmyndighet source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/KR.zip' - cache: nvk/nvr/ + path: nvk/nvr/KR.zip unar: format: zip patterns: @@ -1016,8 +1100,7 @@ layers: nullable: false comment: Beslutsmyndighet source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/VSO.zip' - cache: nvk/nvr/ + path: nvk/nvr/VSO.zip unar: format: zip patterns: @@ -1114,8 +1197,7 @@ layers: nullable: false comment: Beslutsmyndighet source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/LBSO.zip' - cache: nvk/nvr/ + path: nvk/nvr/LBSO.zip unar: format: zip patterns: @@ -1179,8 +1261,7 @@ layers: width: 254 comment: Länk till visningsformulär i Skogens Pärlor source: - download: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksBiotopskydd_gpkg.zip' - cache: sks/ + path: sks/sksBiotopskydd_gpkg.zip unar: format: zip import: @@ -1276,8 +1357,7 @@ layers: nullable: false comment: Beslutsmyndighet source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/OBO.zip' - cache: nvk/nvr/ + path: nvk/nvr/OBO.zip unar: format: zip patterns: @@ -1363,8 +1443,7 @@ layers: nullable: false comment: Beslutsmyndighet source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/NM.zip' - cache: nvk/nvr/ + path: nvk/nvr/NM.zip unar: format: zip patterns: @@ -1460,8 +1539,7 @@ layers: nullable: false comment: Beslutsmyndighet source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/NM.zip' - cache: nvk/nvr/ + path: nvk/nvr/NM.zip unar: format: zip patterns: @@ -1558,8 +1636,7 @@ layers: nullable: false comment: Beslutsmyndighet source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/IF.zip' - cache: nvk/nvr/ + path: nvk/nvr/IF.zip unar: format: zip patterns: @@ -1632,8 +1709,7 @@ layers: type: String width: 254 source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/SPA_Rikstackande.zip' - cache: nvk/nvr/ + path: nvk/nvr/SPA_Rikstackande.zip unar: format: zip patterns: @@ -1703,10 +1779,7 @@ layers: width: 254 sources: - source: - download: - url: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/SCI_Rikstackande.zip' - max-size: 134217728 # 128MiB - cache: nvk/nvr/ + path: nvk/nvr/SCI_Rikstackande.zip unar: format: zip patterns: @@ -1716,10 +1789,7 @@ layers: format: ESRI Shapefile layername: SCI_alvar_AC_lan - source: - download: - url: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/SCI_Rikstackande.zip' - max-size: 134217728 # 128MiB - cache: nvk/nvr/ + path: nvk/nvr/SCI_Rikstackande.zip unar: format: zip patterns: @@ -1729,10 +1799,7 @@ layers: format: ESRI Shapefile layername: SCI_alvar_BD_lan - source: - download: - url: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/SCI_Rikstackande.zip' - max-size: 134217728 # 128MiB - cache: nvk/nvr/ + path: nvk/nvr/SCI_Rikstackande.zip unar: format: zip patterns: @@ -1758,8 +1825,7 @@ layers: width: 62 nullable: false source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/HELCOM.zip' - cache: nvk/nvr/ + path: nvk/nvr/HELCOM.zip unar: format: zip patterns: @@ -1820,8 +1886,7 @@ layers: width: 254 nullable: false source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/Ramsar_2018.zip' - cache: nvk/nvr/ + path: nvk/nvr/Ramsar_2018.zip unar: format: zip patterns: @@ -1863,8 +1928,7 @@ layers: unique: true nullable: false source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/OSPAR.zip' - cache: nvk/nvr/ + path: nvk/nvr/OSPAR.zip unar: format: zip patterns: @@ -1887,8 +1951,7 @@ layers: unique: true nullable: false source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/Varldsarv.zip' - cache: nvk/nvr/ + path: nvk/nvr/Varldsarv.zip unar: format: zip patterns: @@ -1920,8 +1983,7 @@ layers: width: 254 nullable: false source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/biosfarsomraden.zip' - cache: nvk/nvr/ + path: nvk/nvr/biosfarsomraden.zip unar: format: zip patterns: @@ -1966,8 +2028,7 @@ layers: type: Date nullable: false source: - download: 'https://geodata.naturvardsverket.se/nedladdning/naturvardsregistret/NVA.zip' - cache: nvk/nvr/ + path: nvk/nvr/NVA.zip unar: format: zip patterns: @@ -2029,8 +2090,7 @@ layers: type: String width: 64 source: - download: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksNaturvardsavtal_gpkg.zip' - cache: sks/ + path: sks/sksNaturvardsavtal_gpkg.zip unar: format: zip import: @@ -2122,10 +2182,7 @@ layers: nullable: false comment: Avverkningsamalan/NyAvverkningsanmalan source: - download: - url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksAvverkAnm_gpkg.zip' - max-size: 134217728 # 128MiB - cache: sks/ + path: sks/sksAvverkAnm_gpkg.zip unar: format: zip import: @@ -2223,12 +2280,7 @@ layers: nullable: false comment: Areal för ytan (ha) source: - download: - url: 'https://geodpags.skogsstyrelsen.se/geodataport/data/sksUtfordAvverk_gpkg.zip' - max-size: 4294967296 # 4GiB - cache: - path: sks/ - max-age: 216000 # 60h + path: sks/sksUtfordAvverk_gpkg.zip unar: format: zip import: @@ -2294,8 +2346,7 @@ layers: type: Date nullable: false source: - download: 'https://ext-dokument.lansstyrelsen.se/Gemensamt/Geodata/Datadistribution/SWEREF99TM/Sametinget/Samebyarnas_betesomraden.zip' - cache: sametinget/ + path: sametinget/Samebyarnas_betesomraden.zip unar: format: zip patterns: @@ -2374,8 +2425,7 @@ layers: unique: true nullable: false source: - download: 'http://ext-dokument.lansstyrelsen.se/Gemensamt/Geodata/Datadistribution/SWEREF99TM/Sametinget/Samebyarnas_markanvandningsredovisning.zip' - cache: sametinget/ + path: sametinget/Samebyarnas_markanvandningsredovisning.zip unar: format: zip patterns: @@ -2418,8 +2468,7 @@ layers: unique: true nullable: false source: - download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/ren.riks_ren.zip' - cache: ren/ + path: ren/ren.riks_ren.zip unar: format: zip patterns: @@ -2474,8 +2523,7 @@ layers: unique: true nullable: false source: - download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/ren.omr_riks.zip' - cache: ren/ + path: ren/ren.omr_riks.zip unar: format: zip patterns: @@ -2606,8 +2654,7 @@ layers: subtype: Bool nullable: false source: - download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/lst.vbk_vindkraftverk.zip' - cache: vbk/ + path: vbk/lst.vbk_vindkraftverk.zip unar: format: zip patterns: @@ -2790,8 +2837,7 @@ layers: subtype: Bool nullable: false source: - download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/lst.vbk_projekteringsomraden.zip' - cache: vbk/ + path: vbk/lst.vbk_projekteringsomraden.zip unar: format: zip patterns: @@ -3014,8 +3060,7 @@ layers: subtype: Bool nullable: false source: - download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/lst.vbk_havsbaserad_vindkraft.zip' - cache: vbk/ + path: vbk/lst.vbk_havsbaserad_vindkraft.zip unar: format: zip import: @@ -3122,10 +3167,7 @@ layers: nullable: false comment: Datum för senaste uppdatering source: - download: - module: webmap-download-mrr - layername: 'MRR:SE.GOV.SGU.MRR.BEARBETNINGSKONCESSIONER_APPLIED_VY' - cache: mrr/bearbetningskoncessioner_applied.geojson + path: mrr/mineralrattigheter.zip import: format: GeoJSON layername: 'MRR:SE.GOV.SGU.MRR.BEARBETNINGSKONCESSIONER_APPLIED_VY' @@ -3182,10 +3224,7 @@ layers: nullable: false comment: Datum för senaste uppdatering source: - download: - module: webmap-download-mrr - layername: 'MRR:SE.GOV.SGU.MRR.BEARBETNINGSKONCESSIONER_APPROVED_VY' - cache: mrr/bearbetningskoncessioner_approved.geojson + path: mrr/mineralrattigheter.zip import: format: GeoJSON layername: 'MRR:SE.GOV.SGU.MRR.BEARBETNINGSKONCESSIONER_APPROVED_VY' @@ -3231,10 +3270,7 @@ layers: nullable: false comment: Datum för senaste uppdatering source: - download: - module: webmap-download-mrr - layername: 'MRR:SE.GOV.SGU.MRR.MARKANVISNINGAR_VY' - cache: mrr/markanvisningar.geojson + path: mrr/mineralrattigheter.zip import: format: GeoJSON layername: 'MRR:SE.GOV.SGU.MRR.MARKANVISNINGAR_VY' @@ -3287,10 +3323,7 @@ layers: nullable: false comment: Datum för senaste uppdatering source: - download: - module: webmap-download-mrr - layername: 'MRR:SE.GOV.SGU.MRR.MINERAL_APPLIED_VY' - cache: mrr/mineral_applied.geojson + path: mrr/mineralrattigheter.zip import: format: GeoJSON layername: 'MRR:SE.GOV.SGU.MRR.MINERAL_APPLIED_VY' @@ -3351,10 +3384,7 @@ layers: nullable: false comment: Datum för senaste uppdatering source: - download: - module: webmap-download-mrr - layername: 'MRR:SE.GOV.SGU.MRR.MINERAL_APPROVED_VY' - cache: mrr/mineral_approved.geojson + path: mrr/mineralrattigheter.zip import: format: GeoJSON layername: 'MRR:SE.GOV.SGU.MRR.MINERAL_APPROVED_VY' @@ -3379,30 +3409,19 @@ layers: # 'mrr:mineral_expired': # source: -# download: -# module: webmap-download-mrr -# layername: 'MRR:SE.GOV.SGU.MRR.MINERAL_EXPIRED_2' -# cache: mrr/mineral_expired.geojson +# path: mrr/mineralrattigheter.zip # # 'mrr:mineral_prohibited': # source: -# download: -# module: webmap-download-mrr -# layername: 'MRR:SE.GOV.SGU.MRR.MINERAL_PROHIBITED_2' -# cache: mrr/mineral_prohibited.geojson +# path: mrr/mineralrattigheter.zip # # 'mrr:ogd_expired': # source: -# download: -# module: webmap-download-mrr -# layername: 'MRR:SE.GOV.SGU.MRR.OGD_EXPIRED_2' -# cache: mrr/ogd_expired.geojson +# path: mrr/mineralrattigheter.zip +# # 'mrr:ogd_prohibited': # source: -# download: -# module: webmap-download-mrr -# layername: 'MRR:SE.GOV.SGU.MRR.OGD_PROHIBITED_2' -# cache: mrr/ogd_prohibited.geojson +# path: mrr/mineralrattigheter.zip 'mrr:olja_gas_diamant_applied': description: Undersökningstillstånd, olja, gas och diamant, ansökta (SGU) @@ -3438,10 +3457,7 @@ layers: nullable: false comment: Datum för senaste uppdatering source: - download: - module: webmap-download-mrr - layername: 'MRR:SE.GOV.SGU.MRR.OLJA_GAS_DIAMANT_APPLIED_VY' - cache: mrr/olja_gas_diamant_applied.geojson + path: mrr/mineralrattigheter.zip import: format: GeoJSON layername: 'MRR:SE.GOV.SGU.MRR.OLJA_GAS_DIAMANT_APPLIED_VY' @@ -3502,10 +3518,7 @@ layers: nullable: false comment: Datum för senaste uppdatering source: - download: - module: webmap-download-mrr - layername: 'MRR:SE.GOV.SGU.MRR.OLJA_GAS_DIAMANT_APPROVED_VY' - cache: mrr/olja_gas_diamant_approved.geojson + path: mrr/mineralrattigheter.zip import: format: GeoJSON layername: 'MRR:SE.GOV.SGU.MRR.OLJA_GAS_DIAMANT_APPROVED_VY' @@ -3565,10 +3578,7 @@ layers: nullable: false comment: Datum för senaste uppdatering source: - download: - module: webmap-download-mrr - layername: 'MRR:SE.GOV.SGU.MRR.TORVKONCESSIONER_VY' - cache: mrr/torvkoncessioner.geojson + path: mrr/mineralrattigheter.zip import: format: GeoJSON layername: 'MRR:SE.GOV.SGU.MRR.TORVKONCESSIONER_VY' @@ -3634,8 +3644,7 @@ layers: unique: false nullable: false source: - download: 'https://geodata.naturvardsverket.se/nedladdning/riksintresse/RI_Naturvard.zip' - cache: nvk/ + path: nvk/RI_Naturvard.zip unar: format: zip patterns: @@ -3702,8 +3711,7 @@ layers: type: Real comment: Areal vatten i hektar source: - download: 'https://geodata.naturvardsverket.se/nedladdning/riksintresse/RI_Friluftsliv.zip' - cache: nvk/ + path: nvk/RI_Friluftsliv.zip unar: format: zip patterns: @@ -3747,8 +3755,7 @@ layers: type: String width: 254 source: - download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/lst.LST_RI_Rorligt_friluftsliv_MB4kap2.zip' - cache: lst/ + path: lst/lst.LST_RI_Rorligt_friluftsliv_MB4kap2.zip unar: format: zip patterns: @@ -3795,8 +3802,7 @@ layers: type: String width: 254 source: - download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/lst.LST_RI_Obruten_kust_MB4kap3.zip' - cache: lst/ + path: lst/lst.LST_RI_Obruten_kust_MB4kap3.zip unar: format: zip patterns: @@ -3838,8 +3844,7 @@ layers: type: String width: 254 source: - download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/lst.Lst_RI_Obrutet_fjall_MB4kap5.zip' - cache: lst/ + path: lst/lst.Lst_RI_Obrutet_fjall_MB4kap5.zip unar: format: zip patterns: @@ -3887,8 +3892,7 @@ layers: type: String width: 254 source: - download: 'https://ext-dokument.lansstyrelsen.se/gemensamt/geodata/ShapeExport/lst.LST_RI_Skyddade_vattendrag_MB4kap6.zip' - cache: lst/ + path: lst/lst.LST_RI_Skyddade_vattendrag_MB4kap6.zip unar: format: zip patterns: @@ -3916,8 +3920,7 @@ layers: # XXX convert to V? type: Integer source: - download: 'https://gis-services.metria.se/svkfeed/filer/SVK_STAMNAT.zip' - cache: svk/ + path: svk/SVK_STAMNAT.zip unar: format: zip patterns: @@ -3943,8 +3946,7 @@ layers: geometry-type: POINTZ fields: [] source: - download: 'https://gis-services.metria.se/svkfeed/filer/SVK_STAMNAT.zip' - cache: svk/ + path: svk/SVK_STAMNAT.zip unar: format: zip patterns: @@ -3961,8 +3963,7 @@ layers: geometry-type: MULTIPOLYGONZ fields: [] source: - download: 'https://gis-services.metria.se/svkfeed/filer/SVK_STAMNAT.zip' - cache: svk/ + path: svk/SVK_STAMNAT.zip unar: format: zip patterns: @@ -4168,7 +4169,7 @@ layers: comment: Distance to the closest SvK station or production dam source: #download: 'https://opendata-view.smhi.se/SMHI_vatten_DamOrWeir/HY.PhysicalWaters.ManMadeObject/ows?service=WFS&request=GetFeature&typeNames=HY.PhysicalWaters.ManMadeObject&outputFormat=SHAPE-ZIP&srsName=EPSG:3006&format_options=charset:utf-8' - cache: custom/HY_PhysicalWaters_ManMadeObject.zip + path: custom/HY_PhysicalWaters_ManMadeObject.zip unar: format: zip patterns: diff --git a/webmap-download b/webmap-download index 2d31a19..a8a444a 100755 --- a/webmap-download +++ b/webmap-download @@ -2,7 +2,7 @@ #---------------------------------------------------------------------- # Backend utilities for the Klimatanalys Norr project (download common layers) -# Copyright © 2024 Guilhem Moulin <info@guilhem.se> +# Copyright © 2024-2025 Guilhem Moulin <info@guilhem.se> # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,7 +21,20 @@ # pylint: disable=invalid-name,missing-module-docstring # pylint: enable=invalid-name -from os import O_RDONLY, O_WRONLY, O_CREAT, O_TRUNC, O_CLOEXEC, O_PATH, O_DIRECTORY, O_TMPFILE +from os import ( + O_RDONLY, + O_WRONLY, + O_CREAT, + O_TRUNC, + O_CLOEXEC, + O_PATH, + O_DIRECTORY, + O_TMPFILE, + path as os_path, + curdir as os_curdir, + pardir as os_pardir, + sep as os_sep +) import os import sys from fcntl import flock, LOCK_EX @@ -32,7 +45,7 @@ import itertools from pathlib import Path from email.utils import parsedate_to_datetime, formatdate from hashlib import sha256 -from typing import Any, Optional, NoReturn, Never +from typing import Optional, NoReturn, Never import requests import common @@ -64,7 +77,7 @@ class DownloadTooLarge(Exception): # pylint: disable-next=dangerous-default-value def download(dest : str, - dl : Optional[dict[str, Any]], + dl : dict[str, dict[str, str|int]], dir_fd : Optional[int] = None, headers : dict[str, str] = {}, session : Optional[requests.sessions.Session] = None, @@ -109,7 +122,7 @@ def download(dest : str, # XXX we can't use TemporaryFile as it uses O_EXCL, cf. # https://discuss.python.org/t/temporaryfile-contextmanager-that-allows-creating-a-directory-entry-on-success/19094/2 - fd = os.open(os.path.dirname(dest), O_WRONLY|O_CLOEXEC|O_TMPFILE, mode=0o644, dir_fd=dir_fd) + fd = os.open(os_path.dirname(dest), O_WRONLY|O_CLOEXEC|O_TMPFILE, mode=0o644, dir_fd=dir_fd) try: if progress is not None: pbar = progress( @@ -157,12 +170,77 @@ def download(dest : str, common.format_time(elapsed), common.format_bytes(int(size/elapsed))) +class BadConfiguration(Exception): + """Exception raised when there is a bad configuration""" + def __init__(self, message : str, config_path : Optional[Path] = None) -> Never: + if config_path is not None: + message = str(config_path) + ': ' + message + super().__init__(message) + +def _check_key_type(k : str, v : str, known_keys : list[type, tuple[set[str]]]) -> bool: + for t, ks in known_keys: + if k in ks and isinstance(v, t): + return True + return False + +def parse_config_dl(downloads) -> dict[str, dict[str, str|int]]: + """Parse and validate the "downloads" section from the configuration dictionary""" + + if not isinstance(downloads, list): + raise BadConfiguration(f'Invalid download recipe: {downloads}') + + known_keys = [ + (str, {'path', 'url'}), + (int, {'max-age', 'max-size'}) + ] + + destinations = {} + known_keys_set = {k for _,ks in known_keys for k in ks} + for dl in downloads: + if 'url' in dl: + dls = [dl] + elif 'basedir' in dl and 'baseurl' in dl and 'files' in dl and 'path' not in dl: + dls = [] + for filename in dl['files']: + dl2 = { + 'path' : os_path.join(dl['basedir'], filename), + 'url' : dl['baseurl'] + filename + } + for k, v in dl.items(): + if k not in ('basedir', 'baseurl', 'files'): + dl2[k] = v + dls.append(dl2) + else: + raise BadConfiguration(f'Invalid download recipe: {dl}') + + for dl in dls: + path = dl.get('path', None) + if path is None or path in ('', os_curdir, os_pardir) or path.endswith(os_sep): + raise BadConfiguration(f'Invalid destination path "{path}"') + if path in destinations: + raise BadConfiguration(f'Duplicate download recipe for "{path}"') + dl2 = {} + for k, v in dl.items(): + if k == 'path': + continue + if k not in known_keys_set: + logging.warning('Ignoring unknown setting "%s" in download recipe for "%s"', + k, path) + elif not _check_key_type(k, v, known_keys): + logging.warning('Ignoring setting "%s" in download recipe for "%s"' + ' (invalid type)', k, path) + else: + dl2[k] = v + destinations[path] = dl2 + + return destinations + def main() -> NoReturn: # pylint: disable=missing-function-docstring - common.init_logger(app=os.path.basename(__file__), level=logging.INFO) + common.init_logger(app=os_path.basename(__file__), level=logging.INFO) parser = argparse.ArgumentParser(description='Download or update GIS layers.') - parser.add_argument('--cachedir', default=os.curdir, - help=f'destination directory for downloaded files (default: {os.curdir})') + parser.add_argument('--cachedir', default=os_curdir, + help=f'destination directory for downloaded files (default: {os_curdir})') parser.add_argument('--lockdir', default=None, help='optional directory for lock files') parser.add_argument('--quiet', action='store_true', @@ -184,13 +262,24 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring requests_log.propagate = True config = common.parse_config(groupnames=None if args.groupname == [] else args.groupname) + downloads = parse_config_dl(config.get('downloads', [])) - sources = [] - for name, layerdefs in config.get('layers', {}).items(): - for layerdef in layerdefs['sources']: - sourcedef = layerdef.get('source', {}) - sourcedef['layername'] = name - sources.append(sourcedef) + rv = 0 + download_paths = set() + for layername, layerdef in config.get('layers', {}).items(): + source = layerdef.get('source', None) + if source is None: + logging.error('Layer "%s" has no source, ignoring', layername) + rv = 1 + continue + path = source.get('path', None) + if path is None: + logging.error('Layer "%s" has no source path, ignoring', layername) + rv = 1 + elif path not in downloads: + logging.warning('Ignoring unknown source of path "%s" from layer "%s"', path, layername) + else: + download_paths.add(path) if args.quiet or not sys.stderr.isatty(): pbar = None @@ -205,27 +294,8 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring session_requests = requests.Session() - rv = 0 - downloads = set() - for source in sources: - dl = source.get('download', None) - dl_module = None if dl is None else dl.get('module', None) - if dl_module is None: - fetch = download - else: - dl_module = __import__(dl_module) - fetch = dl_module.download - - cache = source.get('cache', None) - dest = None if cache is None else cache.get('path', None) - if dest is None: - continue - - dest = str(dest) # convert from Path() - if dest in downloads: - logging.info('%s was already downloaded, skipping', dest) - continue - + for dest in download_paths: + dl = downloads[dest] headers = {} user_agent = config.get('User-Agent', None) if user_agent is not None: @@ -233,7 +303,7 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring try: # create parent directories - destdir = os.path.dirname(dest) + destdir = os_path.dirname(dest) common.makedirs(destdir, mode=0o755, dir_fd=destdir_fd, exist_ok=True) # place an exclusive lock on a lockfile as the destination can be used by other layers @@ -253,7 +323,7 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring # the file doesn't exist, or stat() failed for some reason pass else: - max_age = cache.get('max-age', 6*3600) # 6h + max_age = dl.get('max-age', 6*3600) # 6h if max_age is not None: s = max_age + max(st.st_ctime, st.st_mtime) - time() if s > 0: @@ -261,16 +331,14 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring dest, common.format_time(s)) continue headers['If-Modified-Since'] = formatdate(timeval=st.st_mtime, localtime=False, usegmt=True) - fetch(dest, dl, dir_fd=destdir_fd, - headers=headers, session=session_requests, - progress=pbar) - downloads.add(dest) + download(dest, dl, dir_fd=destdir_fd, + headers=headers, session=session_requests, + progress=pbar) finally: if lockdir_fd is not None: os.close(lock_fd) except Exception: # pylint: disable=broad-exception-caught - logging.exception('Could not download %s as %s', - dl.get('url', source['layername']), dest) + logging.exception('Could not download %s as %s', dl.get('url', '[N/A]'), dest) if args.exit_code: rv = 1 sys.exit(rv) |