aboutsummaryrefslogtreecommitdiffstats
path: root/webmap-download
diff options
context:
space:
mode:
Diffstat (limited to 'webmap-download')
-rwxr-xr-xwebmap-download88
1 files changed, 58 insertions, 30 deletions
diff --git a/webmap-download b/webmap-download
index 917f178..2d31a19 100755
--- a/webmap-download
+++ b/webmap-download
@@ -18,8 +18,12 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#----------------------------------------------------------------------
+# pylint: disable=invalid-name,missing-module-docstring
+# pylint: enable=invalid-name
+
from os import O_RDONLY, O_WRONLY, O_CREAT, O_TRUNC, O_CLOEXEC, O_PATH, O_DIRECTORY, O_TMPFILE
-import os, sys
+import os
+import sys
from fcntl import flock, LOCK_EX
import logging
from time import time, monotonic as time_monotonic
@@ -28,34 +32,53 @@ import itertools
from pathlib import Path
from email.utils import parsedate_to_datetime, formatdate
from hashlib import sha256
+from typing import Any, Optional, NoReturn, Never
import requests
import common
-def download_trystream(url, **kwargs):
- max_tries = 10
- f = kwargs.pop('session', requests)
+def download_trystream(url : str, **kwargs) -> requests.Response:
+ """GET a url, trying a number of times. Return immediately after the
+ first chunk is received"""
+
+ max_retries = kwargs.pop('max_retries', 10)
+ f = kwargs.pop('session', None)
+ if f is None:
+ f = requests
for i in itertools.count(1):
try:
r = f.get(url, **kwargs, stream=True)
except (requests.Timeout, requests.ConnectionError):
- if i < max_tries:
+ if i < max_retries:
logging.error('timeout')
continue
raise
- else:
- r.raise_for_status()
- return r
-def download(url, dest, dir_fd=None, headers={}, session=requests, progress=None):
+ r.raise_for_status()
+ return r
+
+class DownloadTooLarge(Exception):
+ """Exception raised when a downloaded file exceeds max-size"""
+ def __init__(self, max_size : int) -> Never:
+ super().__init__(f'Payload exceeds max-size ({max_size})')
+
+# pylint: disable-next=dangerous-default-value
+def download(dest : str,
+ dl : Optional[dict[str, Any]],
+ dir_fd : Optional[int] = None,
+ headers : dict[str, str] = {},
+ session : Optional[requests.sessions.Session] = None,
+ progress = None) -> None:
+ """Process a single download recipe"""
+
url = None if dl is None else dl.get('url', None)
if url is None:
logging.error('%s has no source URL, ignoring', dest)
return
max_size = dl.get('max-size', 2**26) # 64MiB
logging.info('Downloading %s…', url)
- destPath = Path(dest)
- dest_tmp = str(destPath.with_stem(f'.{destPath.stem}.new'))
+ dest_path = Path(dest)
+ dest_tmp = str(dest_path.with_stem(f'.{dest_path.stem}.new'))
try:
# delete any leftover
os.unlink(dest_tmp, dir_fd=dir_fd)
@@ -64,7 +87,7 @@ def download(url, dest, dir_fd=None, headers={}, session=requests, progress=None
start = time_monotonic()
r = download_trystream(url, headers=headers, session=session, timeout=30)
- if r.status_code == requests.codes.not_modified:
+ if r.status_code == 304:
# XXX shouldn't we call os.utime(dest) to bump its ctime here?
# otherwise we'll make several queries and get multiple 304
# replies if the file is used by multiple layers
@@ -103,12 +126,12 @@ def download(url, dest, dir_fd=None, headers={}, session=requests, progress=None
pbar.update(chunk_size)
size += chunk_size
if max_size is not None and size > max_size:
- raise Exception(f'Payload exceeds max-size ({max_size})')
+ raise DownloadTooLarge(max_size)
fp.write(chunk)
r = None
if last_modified is not None:
- os.utime(fd, times=(last_modified, last_modified), follow_symlinks=True)
+ os.utime(fd, times=(last_modified, last_modified))
# XXX unfortunately there is no way for linkat() to clobber the destination,
# so we use a temporary file; it's racy, but thanks to O_TMPFILE better
@@ -129,10 +152,12 @@ def download(url, dest, dir_fd=None, headers={}, session=requests, progress=None
raise e
elapsed = time_monotonic() - start
- logging.info("%s: Downloaded %s in %s (%s/s)", dest, common.format_bytes(size),
- common.format_time(elapsed), common.format_bytes(int(size/elapsed)))
+ logging.info('%s: Downloaded %s in %s (%s/s)', dest,
+ common.format_bytes(size),
+ common.format_time(elapsed),
+ common.format_bytes(int(size/elapsed)))
-if __name__ == '__main__':
+def main() -> NoReturn: # pylint: disable=missing-function-docstring
common.init_logger(app=os.path.basename(__file__), level=logging.INFO)
parser = argparse.ArgumentParser(description='Download or update GIS layers.')
@@ -152,16 +177,16 @@ if __name__ == '__main__':
if args.debug > 0:
logging.getLogger().setLevel(logging.DEBUG)
if args.debug > 1:
- from http.client import HTTPConnection
+ from http.client import HTTPConnection # pylint: disable=import-outside-toplevel
HTTPConnection.debuglevel = 1
- requests_log = logging.getLogger("urllib3")
+ requests_log = logging.getLogger('urllib3')
requests_log.setLevel(logging.DEBUG)
requests_log.propagate = True
- common.load_config(groupnames=None if args.groupname == [] else args.groupname)
+ config = common.parse_config(groupnames=None if args.groupname == [] else args.groupname)
sources = []
- for name, layerdefs in common.config.get('layers', {}).items():
+ for name, layerdefs in config.get('layers', {}).items():
for layerdef in layerdefs['sources']:
sourcedef = layerdef.get('source', {})
sourcedef['layername'] = name
@@ -170,7 +195,7 @@ if __name__ == '__main__':
if args.quiet or not sys.stderr.isatty():
pbar = None
else:
- from tqdm import tqdm
+ from tqdm import tqdm # pylint: disable=import-outside-toplevel
pbar = tqdm
# intentionally leave the dirfd open until the program terminates
@@ -178,7 +203,7 @@ if __name__ == '__main__':
destdir_fd = os.open(args.cachedir, opendir_args)
lockdir_fd = None if args.lockdir is None else os.open(args.lockdir, opendir_args)
- sessionRequests = requests.Session()
+ session_requests = requests.Session()
rv = 0
downloads = set()
@@ -202,21 +227,22 @@ if __name__ == '__main__':
continue
headers = {}
- user_agent = common.config.get('User-Agent', None)
+ user_agent = config.get('User-Agent', None)
if user_agent is not None:
headers['User-Agent'] = user_agent
try:
# create parent directories
destdir = os.path.dirname(dest)
- common.makedirs(destdir, mode=0o755, dir_fd=destdir_fd, exist_ok=True, logging=logging)
+ common.makedirs(destdir, mode=0o755, dir_fd=destdir_fd, exist_ok=True)
# place an exclusive lock on a lockfile as the destination can be used by other layers
# hence might be updated in parallel
if lockdir_fd is not None:
lockfile = sha256(dest.encode('utf-8')).hexdigest() + '.lck'
# use O_TRUNC to bump lockfile's mtime
- lock_fd = os.open(lockfile, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644, dir_fd=lockdir_fd)
+ lock_fd = os.open(lockfile, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644,
+ dir_fd=lockdir_fd)
try:
if lockdir_fd is not None:
logging.debug('flock("%s", LOCK_EX)', lockfile)
@@ -235,16 +261,18 @@ if __name__ == '__main__':
dest, common.format_time(s))
continue
headers['If-Modified-Since'] = formatdate(timeval=st.st_mtime, localtime=False, usegmt=True)
- fetch(dl, dest, dir_fd=destdir_fd,
- headers=headers, session=sessionRequests,
+ fetch(dest, dl, dir_fd=destdir_fd,
+ headers=headers, session=session_requests,
progress=pbar)
downloads.add(dest)
finally:
if lockdir_fd is not None:
os.close(lock_fd)
- except Exception:
+ except Exception: # pylint: disable=broad-exception-caught
logging.exception('Could not download %s as %s',
dl.get('url', source['layername']), dest)
if args.exit_code:
rv = 1
- exit(rv)
+ sys.exit(rv)
+
+main()