1 files changed, 58 insertions, 30 deletions
diff --git a/webmap-download b/webmap-download
index 917f178..2d31a19 100755
--- a/webmap-download
+++ b/webmap-download
@@ -18,8 +18,12 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 #----------------------------------------------------------------------
 
+# pylint: disable=invalid-name,missing-module-docstring
+# pylint: enable=invalid-name
+
 from os import O_RDONLY, O_WRONLY, O_CREAT, O_TRUNC, O_CLOEXEC, O_PATH, O_DIRECTORY, O_TMPFILE
-import os, sys
+import os
+import sys
 from fcntl import flock, LOCK_EX
 import logging
 from time import time, monotonic as time_monotonic
@@ -28,34 +32,53 @@ import itertools
 from pathlib import Path
 from email.utils import parsedate_to_datetime, formatdate
 from hashlib import sha256
+from typing import Any, Optional, NoReturn, Never
 import requests
 
 import common
 
-def download_trystream(url, **kwargs):
-    max_tries = 10
-    f = kwargs.pop('session', requests)
+def download_trystream(url : str, **kwargs) -> requests.Response:
+    """GET a url, trying a number of times. Return immediately after the
+    first chunk is received"""
+
+    max_retries = kwargs.pop('max_retries', 10)
+    f = kwargs.pop('session', None)
+    if f is None:
+        f = requests
     for i in itertools.count(1):
         try:
             r = f.get(url, **kwargs, stream=True)
         except (requests.Timeout, requests.ConnectionError):
-            if i < max_tries:
+            if i < max_retries:
                 logging.error('timeout')
                 continue
             raise
-        else:
-            r.raise_for_status()
-            return r
 
-def download(url, dest, dir_fd=None, headers={}, session=requests, progress=None):
+        r.raise_for_status()
+        return r
+
+class DownloadTooLarge(Exception):
+    """Exception raised when a downloaded file exceeds max-size"""
+    def __init__(self, max_size : int) -> Never:
+        super().__init__(f'Payload exceeds max-size ({max_size})')
+
+# pylint: disable-next=dangerous-default-value
+def download(dest : str,
+             dl : Optional[dict[str, Any]],
+             dir_fd : Optional[int] = None,
+             headers : dict[str, str] = {},
+             session : Optional[requests.sessions.Session] = None,
+             progress = None) -> None:
+    """Process a single download recipe"""
+
     url = None if dl is None else dl.get('url', None)
     if url is None:
         logging.error('%s has no source URL, ignoring', dest)
         return
     max_size = dl.get('max-size', 2**26) # 64MiB
     logging.info('Downloading %s…', url)
-    destPath = Path(dest)
-    dest_tmp = str(destPath.with_stem(f'.{destPath.stem}.new'))
+    dest_path = Path(dest)
+    dest_tmp = str(dest_path.with_stem(f'.{dest_path.stem}.new'))
     try:
         # delete any leftover
         os.unlink(dest_tmp, dir_fd=dir_fd)
@@ -64,7 +87,7 @@ def download(url, dest, dir_fd=None, headers={}, session=requests, progress=None
 
     start = time_monotonic()
     r = download_trystream(url, headers=headers, session=session, timeout=30)
-    if r.status_code == requests.codes.not_modified:
+    if r.status_code == 304:
         # XXX shouldn't we call os.utime(dest) to bump its ctime here?
         # otherwise we'll make several queries and get multiple 304
         # replies if the file is used by multiple layers
@@ -103,12 +126,12 @@ def download(url, dest, dir_fd=None, headers={}, session=requests, progress=None
                     pbar.update(chunk_size)
                 size += chunk_size
                 if max_size is not None and size > max_size:
-                    raise Exception(f'Payload exceeds max-size ({max_size})')
+                    raise DownloadTooLarge(max_size)
                 fp.write(chunk)
         r = None
 
         if last_modified is not None:
-            os.utime(fd, times=(last_modified, last_modified), follow_symlinks=True)
+            os.utime(fd, times=(last_modified, last_modified))
 
         # XXX unfortunately there is no way for linkat() to clobber the destination,
         # so we use a temporary file; it's racy, but thanks to O_TMPFILE better
@@ -129,10 +152,12 @@ def download(url, dest, dir_fd=None, headers={}, session=requests, progress=None
             raise e
 
     elapsed = time_monotonic() - start
-    logging.info("%s: Downloaded %s in %s (%s/s)", dest, common.format_bytes(size),
-        common.format_time(elapsed), common.format_bytes(int(size/elapsed)))
+    logging.info('%s: Downloaded %s in %s (%s/s)', dest,
+                 common.format_bytes(size),
+                 common.format_time(elapsed),
+                 common.format_bytes(int(size/elapsed)))
 
-if __name__ == '__main__':
+def main() -> NoReturn: # pylint: disable=missing-function-docstring
     common.init_logger(app=os.path.basename(__file__), level=logging.INFO)
 
     parser = argparse.ArgumentParser(description='Download or update GIS layers.')
@@ -152,16 +177,16 @@ if __name__ == '__main__':
     if args.debug > 0:
         logging.getLogger().setLevel(logging.DEBUG)
     if args.debug > 1:
-        from http.client import HTTPConnection
+        from http.client import HTTPConnection # pylint: disable=import-outside-toplevel
         HTTPConnection.debuglevel = 1
-        requests_log = logging.getLogger("urllib3")
+        requests_log = logging.getLogger('urllib3')
         requests_log.setLevel(logging.DEBUG)
         requests_log.propagate = True
 
-    common.load_config(groupnames=None if args.groupname == [] else args.groupname)
+    config = common.parse_config(groupnames=None if args.groupname == [] else args.groupname)
 
     sources = []
-    for name, layerdefs in common.config.get('layers', {}).items():
+    for name, layerdefs in config.get('layers', {}).items():
         for layerdef in layerdefs['sources']:
             sourcedef = layerdef.get('source', {})
             sourcedef['layername'] = name
@@ -170,7 +195,7 @@ if __name__ == '__main__':
     if args.quiet or not sys.stderr.isatty():
         pbar = None
     else:
-        from tqdm import tqdm
+        from tqdm import tqdm # pylint: disable=import-outside-toplevel
         pbar = tqdm
 
     # intentionally leave the dirfd open until the program terminates
@@ -178,7 +203,7 @@ if __name__ == '__main__':
     destdir_fd = os.open(args.cachedir, opendir_args)
     lockdir_fd = None if args.lockdir is None else os.open(args.lockdir, opendir_args)
 
-    sessionRequests = requests.Session()
+    session_requests = requests.Session()
 
     rv = 0
     downloads = set()
@@ -202,21 +227,22 @@ if __name__ == '__main__':
             continue
 
         headers = {}
-        user_agent = common.config.get('User-Agent', None)
+        user_agent = config.get('User-Agent', None)
         if user_agent is not None:
             headers['User-Agent'] = user_agent
 
         try:
             # create parent directories
             destdir = os.path.dirname(dest)
-            common.makedirs(destdir, mode=0o755, dir_fd=destdir_fd, exist_ok=True, logging=logging)
+            common.makedirs(destdir, mode=0o755, dir_fd=destdir_fd, exist_ok=True)
 
             # place an exclusive lock on a lockfile as the destination can be used by other layers
             # hence might be updated in parallel
             if lockdir_fd is not None:
                 lockfile = sha256(dest.encode('utf-8')).hexdigest() + '.lck'
                 # use O_TRUNC to bump lockfile's mtime
-                lock_fd = os.open(lockfile, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644, dir_fd=lockdir_fd)
+                lock_fd = os.open(lockfile, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644,
+                                  dir_fd=lockdir_fd)
             try:
                 if lockdir_fd is not None:
                     logging.debug('flock("%s", LOCK_EX)', lockfile)
@@ -235,16 +261,18 @@ if __name__ == '__main__':
                                 dest, common.format_time(s))
                             continue
                     headers['If-Modified-Since'] = formatdate(timeval=st.st_mtime, localtime=False, usegmt=True)
-                fetch(dl, dest, dir_fd=destdir_fd,
-                    headers=headers, session=sessionRequests,
+                fetch(dest, dl, dir_fd=destdir_fd,
+                    headers=headers, session=session_requests,
                     progress=pbar)
                 downloads.add(dest)
             finally:
                 if lockdir_fd is not None:
                     os.close(lock_fd)
-        except Exception:
+        except Exception: # pylint: disable=broad-exception-caught
             logging.exception('Could not download %s as %s',
                               dl.get('url', source['layername']), dest)
             if args.exit_code:
                 rv = 1
-    exit(rv)
+    sys.exit(rv)
+
+main()