aboutsummaryrefslogtreecommitdiffstats
path: root/common.py
diff options
context:
space:
mode:
authorGuilhem Moulin <guilhem@fripost.org>2025-04-19 13:27:49 +0200
committerGuilhem Moulin <guilhem@fripost.org>2025-04-23 12:09:24 +0200
commite5e8a6548ef156b785aae1b8a37fe71f26146061 (patch)
treeff774b2dbccb133f0f75d4731de9e302dfcc59bf /common.py
parentc33799f69e7eb42cb0ab4735c7e878d74faca16a (diff)
webmap-import: Add a cache layer and store the source file's last modification time.
That way we can avoid the expensive unpack+import when the source file(s) have not been updated since the last run. The check can be bypassed with a new flag `--force`. We use a sequence for the FID:s (primary key) and a UNIQUE constraint on triplets (source_path, archive_member, layername) as GDAL doesn't support multicolumns primary keys. To avoid races between the stat(2) calls, gdal.OpenEx() and updates via `webmap-download` runs we place a shared lock on the downloaded files. One could resort to some tricks to eliminate the race between the first two, but there is also some value in having consistency during the entire execution of the script (a single source file can be used by multiple layers for instance, and it makes sense to use the very same file for all layers in that case). We also intersperse dso.FlushCache() calls between _importSource() calls in order to force the PG driver to call EndCopy() to detect errors and trigger a rollback when _importSource() fails.
Diffstat (limited to 'common.py')
-rw-r--r--common.py16
1 files changed, 16 insertions, 0 deletions
diff --git a/common.py b/common.py
index eab9dd5..b1d14ba 100644
--- a/common.py
+++ b/common.py
@@ -27,6 +27,7 @@ from stat import S_ISDIR
import math
import logging
from typing import Any, Optional, Never
+from hashlib import sha256
from xdg.BaseDirectory import xdg_config_home
import yaml
@@ -143,6 +144,11 @@ def parse_config(path : Optional[Path] = None,
return config
+# pylint: disable-next=invalid-name
+def getSourcePathLockFileName(path : str) -> str:
+ """Return the name of the lockfile associated with a source path."""
+ return sha256(path.encode('utf-8')).hexdigest() + '.lck'
+
def format_bytes(n : int, threshold : int = 768, precision : int = 2) -> str:
"""Format a number of bytes to a SI unit"""
@@ -174,6 +180,16 @@ def escape_identifier(identifier : str) -> str:
# SQL:1999 delimited identifier
return '"' + identifier.replace('"', '""') + '"'
+def escape_literal_str(literal : str) -> str:
+ """Escape the given character string literal, cf.
+ swig/python/gdal-utils/osgeo_utils/samples/validate_gpkg.py:_esc_literal()."""
+
+ if literal is None or '\x00' in literal:
+ raise RuntimeError(f'Invalid literal "{literal}"')
+
+ # SQL:1999 character string literal
+ return '\'' + literal.replace('\'', '\'\'') + '\''
+
######
# The function definitions below are taken from cpython's source code