aboutsummaryrefslogtreecommitdiffstats
path: root/webmap-download
diff options
context:
space:
mode:
authorGuilhem Moulin <guilhem@fripost.org>2025-04-19 13:27:49 +0200
committerGuilhem Moulin <guilhem@fripost.org>2025-04-23 12:09:24 +0200
commite5e8a6548ef156b785aae1b8a37fe71f26146061 (patch)
treeff774b2dbccb133f0f75d4731de9e302dfcc59bf /webmap-download
parentc33799f69e7eb42cb0ab4735c7e878d74faca16a (diff)
webmap-import: Add a cache layer and store the source file's last modification time.
That way we can avoid the expensive unpack+import when the source file(s) have not been updated since the last run. The check can be bypassed with a new flag `--force`. We use a sequence for the FID:s (primary key) and a UNIQUE constraint on triplets (source_path, archive_member, layername) as GDAL doesn't support multicolumns primary keys. To avoid races between the stat(2) calls, gdal.OpenEx() and updates via `webmap-download` runs we place a shared lock on the downloaded files. One could resort to some tricks to eliminate the race between the first two, but there is also some value in having consistency during the entire execution of the script (a single source file can be used by multiple layers for instance, and it makes sense to use the very same file for all layers in that case). We also intersperse dso.FlushCache() calls between _importSource() calls in order to force the PG driver to call EndCopy() to detect errors and trigger a rollback when _importSource() fails.
Diffstat (limited to 'webmap-download')
-rwxr-xr-xwebmap-download7
1 files changed, 3 insertions, 4 deletions
diff --git a/webmap-download b/webmap-download
index edb624e..05aa2c4 100755
--- a/webmap-download
+++ b/webmap-download
@@ -44,12 +44,11 @@ import argparse
import itertools
from pathlib import Path
from email.utils import parsedate_to_datetime, formatdate
-from hashlib import sha256
from typing import Optional, NoReturn, Never
import requests
import common
-from common import BadConfiguration
+from common import BadConfiguration, getSourcePathLockFileName
def download_trystream(url : str, **kwargs) -> requests.Response:
"""GET a url, trying a number of times. Return immediately after the
@@ -309,9 +308,9 @@ def main() -> NoReturn:
# place an exclusive lock on a lockfile as the destination can be used by other layers
# hence might be updated in parallel
if lockdir_fd is not None:
- lockfile = sha256(dest.encode('utf-8')).hexdigest() + '.lck'
+ lockfile = getSourcePathLockFileName(dest)
# use O_TRUNC to bump lockfile's mtime
- lock_fd = os.open(lockfile, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o644,
+ lock_fd = os.open(lockfile, O_WRONLY|O_CREAT|O_TRUNC|O_CLOEXEC, mode=0o664,
dir_fd=lockdir_fd)
try:
if lockdir_fd is not None: