From d25c8b8b60fbfabf7544a3a5a44c105f0bc5b4da Mon Sep 17 00:00:00 2001 From: Guilhem Moulin Date: Sun, 2 Jun 2024 13:24:45 +0200 Subject: Rename script into module and run module.download(). Instead of using a dedicated executable. There is too much code duplicate otherwise. --- common.py | 12 +++++------- config.yml | 8 ++++---- webmap-download | 35 +++++++++++++++++------------------ 3 files changed, 26 insertions(+), 29 deletions(-) diff --git a/common.py b/common.py index e4456af..44c8a53 100644 --- a/common.py +++ b/common.py @@ -7,10 +7,8 @@ from stat import S_ISDIR from xdg.BaseDirectory import xdg_config_home import logging import yaml -import __main__ as main def load_config(path=None, groupnames=None): - main_script = os.path.basename(main.__file__) if path is None: for p in [Path(), Path(xdg_config_home).joinpath('webmap'), @@ -36,14 +34,14 @@ def load_config(path=None, groupnames=None): download = source.get('download', None) if download is None: url = None - script = None + dl_module = None elif isinstance(download, str): url = download - script = None + dl_module = None source['download'] = download = { 'url': url } else: url = download.get('url', None) - script = download.get('script', None) + dl_module = download.get('module', None) if url is None: urlp = None else: @@ -73,9 +71,9 @@ def load_config(path=None, groupnames=None): cache = Path(cache) source['cache']['path'] = cache - v = { 'url': urlp, 'script': main_script if script is None else script } + v = { 'url': urlp, 'module': dl_module } if cache in destinations and destinations[cache] != v: - # allow destination conflicts, but only when the source URL and script match + # allow destination conflicts, but only when the source URL and module match raise Exception(f'Destination conflict for layer "{name}"') destinations[cache] = v diff --git a/config.yml b/config.yml index 5b6f842..029e60b 100644 --- a/config.yml +++ b/config.yml @@ -26,14 +26,14 @@ layers: # # error is raised when the payload size exceeds this value. # # (Default: 67108864, in other words 64MiB) # max-size: 1073741824 -# # source:download:script: Basename of the download script to use for -# # that layer. The entry is ignored when the main script doesn't match. -# script: webmap-download +# # source:download:module: Basename of the download module to use for +# # that layer. +# module: webmap-download # cache: # # source:cache:path: Local path (relative to --cachedir) where to # # (atomically) save the downloaded file. The same path can be used by # # multiple entries as long as their pairs (source:download:url, -# # source:download:script) match. Any parent directories are created if +# # source:download:module) match. Any parent directories are created if # # needed. # # If the path is empty or ends with a '/' character then it treated as a # # directory and the last component of source:download:url implicitly diff --git a/webmap-download b/webmap-download index f87cddf..be48bbb 100755 --- a/webmap-download +++ b/webmap-download @@ -30,7 +30,12 @@ def download_trystream(url, **kwargs): r.raise_for_status() return r -def download(url, dest, dir_fd=None, headers={}, max_size=None, session=requests, progress=None): +def download(url, dest, dir_fd=None, headers={}, session=requests, progress=None): + url = None if dl is None else dl.get('url', None) + if url is None: + logging.error('%s has no source URL, ignoring', dest) + return + max_size = dl.get('max-size', 2**26) # 64MiB logging.info('Downloading %s…', url) destPath = Path(dest) dest_tmp = destPath.with_stem(f'.{destPath.stem}.new').as_posix() @@ -175,24 +180,18 @@ if __name__ == '__main__': downloads = set() for source in sources: dl = source.get('download', None) - script = None if dl is None else dl.get('script', None) - if script is not None and script != os.path.basename(__file__): - logging.info('Layer "%s" is not for us (%s != %s), skipping', - source['layername'], - script, os.path.basename(__file__)) - continue - - url = None if dl is None else dl.get('url', None) - if url is None: - logging.error('Layer "%s" has no source URL, ignoring', - source['layername']) - continue + dl_module = None if dl is None else dl.get('module', None) + if dl_module is None: + fetch = download + else: + dl_module = __import__(dl_module) + fetch = dl_module.download cache = source.get('cache', None) dest = None if cache is None else cache.get('path', None) if dest is None: raise Exception('Impossible') - elif url in downloads: + elif dest in downloads: logging.info('%s was already downloaded, skipping', dest) continue @@ -231,16 +230,16 @@ if __name__ == '__main__': dest, format_time(s)) continue headers['If-Modified-Since'] = formatdate(timeval=st.st_mtime, localtime=False, usegmt=True) - max_size = dl.get('max-size', 2**26) # 64MiB - download(url, dest, dir_fd=destdir_fd, max_size=max_size, + fetch(dl, dest, dir_fd=destdir_fd, headers=headers, session=sessionRequests, progress=pbar) - downloads.add(url) + downloads.add(dest) finally: if lockdir_fd is not None: os.close(lock_fd) except Exception: - logging.exception('Could not download %s as %s', url, dest) + logging.exception('Could not download %s as %s', + dl.get('url', source['layername']), dest) if args.exit_code: rv = 1 exit(rv) -- cgit v1.2.3