aboutsummaryrefslogtreecommitdiffstats
path: root/webmap-download
diff options
context:
space:
mode:
authorGuilhem Moulin <guilhem@fripost.org>2026-03-06 10:52:43 +0100
committerGuilhem Moulin <guilhem@fripost.org>2026-03-06 16:06:41 +0100
commitca91a579770c89d25aefae220079bf336fa88dc9 (patch)
treecb1f49adacf12b0cb15b6430b0446fbee2135814 /webmap-download
parent94438a900d3fb933a33aed4d2ffeb8809e966c46 (diff)
Rename "webmap" to the less generic "geodata".
The database has uses beyond the webmap.
Diffstat (limited to 'webmap-download')
-rwxr-xr-xwebmap-download291
1 files changed, 0 insertions, 291 deletions
diff --git a/webmap-download b/webmap-download
deleted file mode 100755
index 5e191ad..0000000
--- a/webmap-download
+++ /dev/null
@@ -1,291 +0,0 @@
-#!/usr/bin/python3
-
-#----------------------------------------------------------------------
-# Backend utilities for the Klimatanalys Norr project (download common layers)
-# Copyright © 2024-2025 Guilhem Moulin <info@guilhem.se>
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <https://www.gnu.org/licenses/>.
-#----------------------------------------------------------------------
-
-# pylint: disable=invalid-name, missing-module-docstring, fixme
-# pylint: enable=invalid-name
-
-from os import (
- O_RDONLY,
- O_WRONLY,
- O_CREAT,
- O_TRUNC,
- O_CLOEXEC,
- O_PATH,
- O_DIRECTORY,
- O_TMPFILE,
- path as os_path,
- curdir as os_curdir,
-)
-import os
-import sys
-from fcntl import flock, LOCK_EX
-import logging
-from time import time, monotonic as time_monotonic
-import argparse
-import itertools
-from pathlib import Path
-from email.utils import parsedate_to_datetime, formatdate
-from typing import Optional, NoReturn, Never
-import requests
-
-import common
-from common import parse_config_dl, getSourcePathLockFileName
-
-def download_trystream(url : str, **kwargs) -> requests.Response:
- """GET a url, trying a number of times. Return immediately after the
- first chunk is received"""
-
- max_retries = kwargs.pop('max_retries', 10)
- f = kwargs.pop('session', None)
- if f is None:
- f = requests
- for i in itertools.count(1):
- try:
- r = f.get(url, **kwargs, stream=True)
- except (requests.Timeout, requests.ConnectionError):
- if i < max_retries:
- logging.error('timeout')
- continue
- raise
-
- r.raise_for_status()
- return r
-
-class DownloadTooLarge(Exception):
- """Exception raised when a downloaded file exceeds max-size"""
- def __init__(self, max_size : int) -> Never:
- super().__init__(f'Payload exceeds max-size ({max_size})')
-
-# pylint: disable-next=dangerous-default-value
-def download(dest : str,
- dl : dict[str, dict[str, str|int]],
- dir_fd : Optional[int] = None,
- headers : dict[str, str] = {},
- session : Optional[requests.sessions.Session] = None,
- progress = None) -> None:
- """Process a single download recipe"""
-
- url = None if dl is None else dl.get('url', None)
- if url is None:
- logging.error('%s has no source URL, ignoring', dest)
- return
- max_size = dl.get('max-size', 2**26) # 64MiB
- logging.info('Downloading %s…', url)
- dest_path = Path(dest)
- dest_tmp = str(dest_path.with_stem(f'.{dest_path.stem}.new'))
- try:
- # delete any leftover
- os.unlink(dest_tmp, dir_fd=dir_fd)
- except FileNotFoundError:
- pass
-
- start = time_monotonic()
- r = download_trystream(url, headers=headers, session=session, timeout=30)
- if r.status_code == 304:
- logging.info('%s: %d Not Modified', dest, r.status_code)
- return
-
- body_size = r.headers.get('Content-Length', None)
- last_modified = r.headers.get('Last-Modified', None)
- if last_modified is not None:
- try:
- last_modified = parsedate_to_datetime(last_modified)
- last_modified = last_modified.timestamp()
- except ValueError:
- logging.exception('Could not parse Last-Modified value')
- last_modified = None
-
- size = 0
- pbar = None
-
- # XXX we can't use TemporaryFile as it uses O_EXCL, cf.
- # https://discuss.python.org/t/temporaryfile-contextmanager-that-allows-creating-a-directory-entry-on-success/19094/2
- fd = os.open(os_path.dirname(dest), O_WRONLY|O_CLOEXEC|O_TMPFILE, mode=0o644, dir_fd=dir_fd)
- try:
- if progress is not None:
- pbar = progress(
- total=int(body_size) if body_size is not None else float('inf'),
- leave=False,
- unit_scale=True,
- unit_divisor=1024,
- unit='B'
- )
- with os.fdopen(fd, mode='wb', closefd=False) as fp:
- for chunk in r.iter_content(chunk_size=2**16):
- chunk_size = len(chunk)
- if pbar is not None:
- pbar.update(chunk_size)
- size += chunk_size
- if max_size is not None and size > max_size:
- raise DownloadTooLarge(max_size)
- fp.write(chunk)
- r = None
-
- if last_modified is not None:
- os.utime(fd, times=(last_modified, last_modified))
-
- # XXX unfortunately there is no way for linkat() to clobber the destination,
- # so we use a temporary file; it's racy, but thanks to O_TMPFILE better
- # (shorter race) than if we were dumping chunks in a named file descriptor
- os.link(f'/proc/self/fd/{fd}', dest_tmp, dst_dir_fd=dir_fd, follow_symlinks=True)
- finally:
- os.close(fd)
- if pbar is not None:
- pbar.close()
-
- try:
- # atomic rename (ensures output is never partially written)
- os.rename(dest_tmp, dest, src_dir_fd=dir_fd, dst_dir_fd=dir_fd)
- except (OSError, ValueError) as e:
- try:
- os.unlink(dest_tmp, dir_fd=dir_fd)
- finally:
- raise e
-
- elapsed = time_monotonic() - start
- logging.info('%s: Downloaded %s in %s (%s/s)', dest,
- common.format_bytes(size),
- common.format_time(elapsed),
- common.format_bytes(int(size/elapsed)))
-
-# pylint: disable-next=missing-function-docstring
-def main() -> NoReturn:
- common.init_logger(app=os_path.basename(__file__), level=logging.INFO)
-
- parser = argparse.ArgumentParser(description='Download or update GIS layers.')
- parser.add_argument('--cachedir', default=os_curdir,
- help=f'destination directory for downloaded files (default: {os_curdir})')
- parser.add_argument('--lockdir', default=None,
- help='optional directory for lock files')
- parser.add_argument('--quiet', action='store_true',
- help='skip progress bars even when stderr is a TTY')
- parser.add_argument('--debug', action='count', default=0,
- help=argparse.SUPPRESS)
- parser.add_argument('--exit-code', default=True, action=argparse.BooleanOptionalAction,
- help='whether to exit with status 1 in case of download failures')
- parser.add_argument('--force', default=False, action='store_true',
- help='always download regardless of age')
- parser.add_argument('groupname', nargs='*', help='group layer name(s) to process')
- args = parser.parse_args()
-
- if args.debug > 0: # pylint: disable=duplicate-code
- logging.getLogger().setLevel(logging.DEBUG)
- if args.debug > 1:
- from http.client import HTTPConnection # pylint: disable=import-outside-toplevel
- HTTPConnection.debuglevel = 1
- requests_log = logging.getLogger('urllib3')
- requests_log.setLevel(logging.DEBUG)
- requests_log.propagate = True
-
- config = common.parse_config(groupnames=None if args.groupname == [] else args.groupname)
- downloads = parse_config_dl(config.get('downloads', []))
-
- rv = 0
- download_paths = set()
- for layername, layerdef in config.get('layers', {}).items():
- sources = layerdef.get('sources', None)
- if sources is None or len(sources) < 1:
- logging.warning('Layer "%s" has no source, ignoring', layername)
- continue
- for idx, source in enumerate(sources):
- if 'source' not in source:
- continue
- source = source['source']
- path = None if source is None else source.get('path', None)
- if path is None:
- logging.error('Source #%d of layer "%s" has no path, ignoring',
- idx, layername)
- rv = 1
- elif path not in downloads:
- logging.warning('Ignoring unknown source of path "%s" from layer "%s"',
- path, layername)
- else:
- download_paths.add(path)
-
- if args.quiet or not sys.stderr.isatty():
- pbar = None
- else:
- from tqdm import tqdm # pylint: disable=import-outside-toplevel
- pbar = tqdm
-
- # intentionally leave the dirfd open until the program terminates
- opendir_args = O_RDONLY|O_CLOEXEC|O_PATH|O_DIRECTORY
- destdir_fd = os.open(args.cachedir, opendir_args)
- lockdir_fd = None if args.lockdir is None else os.open(args.lockdir, opendir_args)
-
- session_requests = requests.Session()
-
- for dest in download_paths:
- dl = downloads[dest]
- headers = {}
- user_agent = config.get('User-Agent', None)
- if user_agent is not None:
- headers['User-Agent'] = user_agent
-
- try:
- # create parent directories
- destdir = os_path.dirname(dest)
- common.makedirs(destdir, mode=0o755, dir_fd=destdir_fd, exist_ok=True)
-
- # place an exclusive lock on a lockfile as the destination can be used by other layers
- # hence might be updated in parallel
- if lockdir_fd is not None:
- umask = os.umask(0o002)
- lockfile = getSourcePathLockFileName(dest)
- try:
- # use O_TRUNC to bump lockfile's mtime
- lock_fd = os.open(lockfile, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o664,
- dir_fd=lockdir_fd)
- finally:
- os.umask(umask)
-
- try:
- if lockdir_fd is not None:
- logging.debug('flock("%s", LOCK_EX)', lockfile)
- flock(lock_fd, LOCK_EX)
- try:
- st = os.stat(dest, dir_fd=destdir_fd)
- except (OSError, ValueError):
- # the file doesn't exist, or stat() failed for some reason
- pass
- else:
- if not args.force:
- max_age = dl.get('max-age', 6*3600) # 6h
- if max_age is not None:
- s = max_age + max(st.st_ctime, st.st_mtime) - time()
- if s > 0:
- logging.info('%s: Too young, try again in %s', dest,
- common.format_time(s))
- continue
- headers['If-Modified-Since'] = formatdate(timeval=st.st_mtime,
- localtime=False, usegmt=True)
- download(dest, dl, dir_fd=destdir_fd,
- headers=headers, session=session_requests,
- progress=pbar)
- finally:
- if lockdir_fd is not None:
- os.close(lock_fd)
- except Exception: # pylint: disable=broad-exception-caught
- logging.exception('Could not download %s as %s', dl.get('url', '[N/A]'), dest)
- if args.exit_code:
- rv = 1
- sys.exit(rv)
-
-main()