aboutsummaryrefslogtreecommitdiffstats
path: root/webmap-download
diff options
context:
space:
mode:
Diffstat (limited to 'webmap-download')
-rwxr-xr-xwebmap-download154
1 files changed, 111 insertions, 43 deletions
diff --git a/webmap-download b/webmap-download
index 2d31a19..a8a444a 100755
--- a/webmap-download
+++ b/webmap-download
@@ -2,7 +2,7 @@
#----------------------------------------------------------------------
# Backend utilities for the Klimatanalys Norr project (download common layers)
-# Copyright © 2024 Guilhem Moulin <info@guilhem.se>
+# Copyright © 2024-2025 Guilhem Moulin <info@guilhem.se>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -21,7 +21,20 @@
# pylint: disable=invalid-name,missing-module-docstring
# pylint: enable=invalid-name
-from os import O_RDONLY, O_WRONLY, O_CREAT, O_TRUNC, O_CLOEXEC, O_PATH, O_DIRECTORY, O_TMPFILE
+from os import (
+ O_RDONLY,
+ O_WRONLY,
+ O_CREAT,
+ O_TRUNC,
+ O_CLOEXEC,
+ O_PATH,
+ O_DIRECTORY,
+ O_TMPFILE,
+ path as os_path,
+ curdir as os_curdir,
+ pardir as os_pardir,
+ sep as os_sep
+)
import os
import sys
from fcntl import flock, LOCK_EX
@@ -32,7 +45,7 @@ import itertools
from pathlib import Path
from email.utils import parsedate_to_datetime, formatdate
from hashlib import sha256
-from typing import Any, Optional, NoReturn, Never
+from typing import Optional, NoReturn, Never
import requests
import common
@@ -64,7 +77,7 @@ class DownloadTooLarge(Exception):
# pylint: disable-next=dangerous-default-value
def download(dest : str,
- dl : Optional[dict[str, Any]],
+ dl : dict[str, dict[str, str|int]],
dir_fd : Optional[int] = None,
headers : dict[str, str] = {},
session : Optional[requests.sessions.Session] = None,
@@ -109,7 +122,7 @@ def download(dest : str,
# XXX we can't use TemporaryFile as it uses O_EXCL, cf.
# https://discuss.python.org/t/temporaryfile-contextmanager-that-allows-creating-a-directory-entry-on-success/19094/2
- fd = os.open(os.path.dirname(dest), O_WRONLY|O_CLOEXEC|O_TMPFILE, mode=0o644, dir_fd=dir_fd)
+ fd = os.open(os_path.dirname(dest), O_WRONLY|O_CLOEXEC|O_TMPFILE, mode=0o644, dir_fd=dir_fd)
try:
if progress is not None:
pbar = progress(
@@ -157,12 +170,77 @@ def download(dest : str,
common.format_time(elapsed),
common.format_bytes(int(size/elapsed)))
+class BadConfiguration(Exception):
+ """Exception raised when there is a bad configuration"""
+ def __init__(self, message : str, config_path : Optional[Path] = None) -> Never:
+ if config_path is not None:
+ message = str(config_path) + ': ' + message
+ super().__init__(message)
+
+def _check_key_type(k : str, v : str, known_keys : list[type, tuple[set[str]]]) -> bool:
+ for t, ks in known_keys:
+ if k in ks and isinstance(v, t):
+ return True
+ return False
+
+def parse_config_dl(downloads) -> dict[str, dict[str, str|int]]:
+ """Parse and validate the "downloads" section from the configuration dictionary"""
+
+ if not isinstance(downloads, list):
+ raise BadConfiguration(f'Invalid download recipe: {downloads}')
+
+ known_keys = [
+ (str, {'path', 'url'}),
+ (int, {'max-age', 'max-size'})
+ ]
+
+ destinations = {}
+ known_keys_set = {k for _,ks in known_keys for k in ks}
+ for dl in downloads:
+ if 'url' in dl:
+ dls = [dl]
+ elif 'basedir' in dl and 'baseurl' in dl and 'files' in dl and 'path' not in dl:
+ dls = []
+ for filename in dl['files']:
+ dl2 = {
+ 'path' : os_path.join(dl['basedir'], filename),
+ 'url' : dl['baseurl'] + filename
+ }
+ for k, v in dl.items():
+ if k not in ('basedir', 'baseurl', 'files'):
+ dl2[k] = v
+ dls.append(dl2)
+ else:
+ raise BadConfiguration(f'Invalid download recipe: {dl}')
+
+ for dl in dls:
+ path = dl.get('path', None)
+ if path is None or path in ('', os_curdir, os_pardir) or path.endswith(os_sep):
+ raise BadConfiguration(f'Invalid destination path "{path}"')
+ if path in destinations:
+ raise BadConfiguration(f'Duplicate download recipe for "{path}"')
+ dl2 = {}
+ for k, v in dl.items():
+ if k == 'path':
+ continue
+ if k not in known_keys_set:
+ logging.warning('Ignoring unknown setting "%s" in download recipe for "%s"',
+ k, path)
+ elif not _check_key_type(k, v, known_keys):
+ logging.warning('Ignoring setting "%s" in download recipe for "%s"'
+ ' (invalid type)', k, path)
+ else:
+ dl2[k] = v
+ destinations[path] = dl2
+
+ return destinations
+
def main() -> NoReturn: # pylint: disable=missing-function-docstring
- common.init_logger(app=os.path.basename(__file__), level=logging.INFO)
+ common.init_logger(app=os_path.basename(__file__), level=logging.INFO)
parser = argparse.ArgumentParser(description='Download or update GIS layers.')
- parser.add_argument('--cachedir', default=os.curdir,
- help=f'destination directory for downloaded files (default: {os.curdir})')
+ parser.add_argument('--cachedir', default=os_curdir,
+ help=f'destination directory for downloaded files (default: {os_curdir})')
parser.add_argument('--lockdir', default=None,
help='optional directory for lock files')
parser.add_argument('--quiet', action='store_true',
@@ -184,13 +262,24 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring
requests_log.propagate = True
config = common.parse_config(groupnames=None if args.groupname == [] else args.groupname)
+ downloads = parse_config_dl(config.get('downloads', []))
- sources = []
- for name, layerdefs in config.get('layers', {}).items():
- for layerdef in layerdefs['sources']:
- sourcedef = layerdef.get('source', {})
- sourcedef['layername'] = name
- sources.append(sourcedef)
+ rv = 0
+ download_paths = set()
+ for layername, layerdef in config.get('layers', {}).items():
+ source = layerdef.get('source', None)
+ if source is None:
+ logging.error('Layer "%s" has no source, ignoring', layername)
+ rv = 1
+ continue
+ path = source.get('path', None)
+ if path is None:
+ logging.error('Layer "%s" has no source path, ignoring', layername)
+ rv = 1
+ elif path not in downloads:
+ logging.warning('Ignoring unknown source of path "%s" from layer "%s"', path, layername)
+ else:
+ download_paths.add(path)
if args.quiet or not sys.stderr.isatty():
pbar = None
@@ -205,27 +294,8 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring
session_requests = requests.Session()
- rv = 0
- downloads = set()
- for source in sources:
- dl = source.get('download', None)
- dl_module = None if dl is None else dl.get('module', None)
- if dl_module is None:
- fetch = download
- else:
- dl_module = __import__(dl_module)
- fetch = dl_module.download
-
- cache = source.get('cache', None)
- dest = None if cache is None else cache.get('path', None)
- if dest is None:
- continue
-
- dest = str(dest) # convert from Path()
- if dest in downloads:
- logging.info('%s was already downloaded, skipping', dest)
- continue
-
+ for dest in download_paths:
+ dl = downloads[dest]
headers = {}
user_agent = config.get('User-Agent', None)
if user_agent is not None:
@@ -233,7 +303,7 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring
try:
# create parent directories
- destdir = os.path.dirname(dest)
+ destdir = os_path.dirname(dest)
common.makedirs(destdir, mode=0o755, dir_fd=destdir_fd, exist_ok=True)
# place an exclusive lock on a lockfile as the destination can be used by other layers
@@ -253,7 +323,7 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring
# the file doesn't exist, or stat() failed for some reason
pass
else:
- max_age = cache.get('max-age', 6*3600) # 6h
+ max_age = dl.get('max-age', 6*3600) # 6h
if max_age is not None:
s = max_age + max(st.st_ctime, st.st_mtime) - time()
if s > 0:
@@ -261,16 +331,14 @@ def main() -> NoReturn: # pylint: disable=missing-function-docstring
dest, common.format_time(s))
continue
headers['If-Modified-Since'] = formatdate(timeval=st.st_mtime, localtime=False, usegmt=True)
- fetch(dest, dl, dir_fd=destdir_fd,
- headers=headers, session=session_requests,
- progress=pbar)
- downloads.add(dest)
+ download(dest, dl, dir_fd=destdir_fd,
+ headers=headers, session=session_requests,
+ progress=pbar)
finally:
if lockdir_fd is not None:
os.close(lock_fd)
except Exception: # pylint: disable=broad-exception-caught
- logging.exception('Could not download %s as %s',
- dl.get('url', source['layername']), dest)
+ logging.exception('Could not download %s as %s', dl.get('url', '[N/A]'), dest)
if args.exit_code:
rv = 1
sys.exit(rv)