From c654850f009f9a370eddc0d32a2de1a31c32de52 Mon Sep 17 00:00:00 2001 From: Guilhem Moulin Date: Thu, 30 May 2024 22:28:55 +0200 Subject: webmap-download: Add a progress bar. --- webmap-download | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/webmap-download b/webmap-download index 39c48c2..9d0861a 100755 --- a/webmap-download +++ b/webmap-download @@ -30,7 +30,7 @@ def download_trystream(url, **kwargs): r.raise_for_status() return r -def download(url, dest, dir_fd=None, headers={}, max_size=None, session=requests): +def download(url, dest, dir_fd=None, headers={}, max_size=None, session=requests, progress=None): logging.info('Downloading %s…', url) destPath = Path(dest) dest_tmp = destPath.with_stem(f'.{destPath.stem}.new').as_posix() @@ -49,6 +49,7 @@ def download(url, dest, dir_fd=None, headers={}, max_size=None, session=requests logging.info('%s: %d Not Modified', dest, r.status_code) return + body_size = r.headers.get('Content-Length', None) last_modified = r.headers.get('Last-Modified', None) if last_modified is not None: try: @@ -63,13 +64,30 @@ def download(url, dest, dir_fd=None, headers={}, max_size=None, session=requests fd = os.open(os.path.dirname(dest), O_WRONLY|O_CLOEXEC|O_TMPFILE, mode=0o644, dir_fd=dir_fd) with os.fdopen(fd, mode='wb') as fp: size = 0 - for chunk in r.iter_content(chunk_size=2**16): - size = size + len(chunk) - if max_size is not None and size > max_size: - raise Exception(f'Payload exceeds max-size ({max_size})') - fp.write(chunk) + + if progress is not None: + tot = int(body_size) if body_size is not None else float('inf') + pbar = progress(total=tot, leave=False, unit_scale=True, unit_divisor=1024, unit='B') + else: + pbar = None + + try: + for chunk in r.iter_content(chunk_size=2**16): + chunk_size = len(chunk) + if pbar is not None: + pbar.update(chunk_size) + size = size + chunk_size + if max_size is not None and size > max_size: + raise Exception(f'Payload exceeds max-size ({max_size})') + fp.write(chunk) + finally: + if pbar is not None: + pbar.close() + r = None - end = time_monotonic() + elapsed = time_monotonic() - start + logging.info("%s: Downloaded %s in %s (%s/s)", dest, format_bytes(size), + format_time(elapsed), format_bytes(int(size/elapsed))) # XXX unfortunately there is no way for linkat() to clobber the destination, # so we use a temporary file; it's racy, but thanks to O_TMPFILE better @@ -86,10 +104,6 @@ def download(url, dest, dir_fd=None, headers={}, max_size=None, session=requests os.utime(dest_tmp, times=(last_modified, last_modified), dir_fd=dir_fd, follow_symlinks=False) os.rename(dest_tmp, dest, src_dir_fd=dir_fd, dst_dir_fd=dir_fd) - - elapsed = end - start - logging.info("%s: Downloaded %s in %s (%s/s)", dest, format_bytes(size), - format_time(elapsed), format_bytes(int(size/elapsed))) except Exception as e: try: os.unlink(dest_tmp, dir_fd=dir_fd) @@ -144,6 +158,12 @@ if __name__ == '__main__': sourcedef['layername'] = name sources.append(sourcedef) + if sys.stderr.isatty(): + from tqdm import tqdm + pbar = tqdm + else: + pbar = None + # intentionally leave the dirfd open until the program terminates opendir_args = O_RDONLY|O_CLOEXEC|O_PATH|O_DIRECTORY destdir_fd = None if args.cachedir is None else os.open(args.cachedir, opendir_args) @@ -224,7 +244,8 @@ if __name__ == '__main__': headers['If-Modified-Since'] = formatdate(timeval=st.st_mtime, localtime=False, usegmt=True) max_size = dl.get('max-size', 2**26) # 64MiB download(url, dest, dir_fd=destdir_fd, max_size=max_size, - headers=headers, session=sessionRequests) + headers=headers, session=sessionRequests, + progress=pbar) downloads.add(url) finally: os.close(lock_fd) -- cgit v1.2.3