aboutsummaryrefslogtreecommitdiffstats
path: root/webmap-import
diff options
context:
space:
mode:
authorGuilhem Moulin <guilhem@fripost.org>2024-06-19 04:29:26 +0200
committerGuilhem Moulin <guilhem@fripost.org>2024-06-19 12:05:12 +0200
commitceb76b0893b5a0cbfeab269d373b6bb656222b69 (patch)
tree3498a8054c6d4e1a5fbfb4268f67a21210634c04 /webmap-import
parentae01b2aae08033a4183b7c601dbf0381362f346b (diff)
Add logic for field regex substitution.
This is useful to replace a YYYYMMDD formatted date with YYYY-MM-DD. The target field can then be set to not-nullable and its type set to Date, as the OGR_F_SetField*() with take care of the conversion. We could also do that via an SQL query, but in our case the sources are not proper RDBMS so SQL is emulated anyway.
Diffstat (limited to 'webmap-import')
-rwxr-xr-xwebmap-import100
1 files changed, 74 insertions, 26 deletions
diff --git a/webmap-import b/webmap-import
index b97b275..731c49f 100755
--- a/webmap-import
+++ b/webmap-import
@@ -758,9 +758,16 @@ def setOutputFieldMap(defn, sources):
for idx, rule in enumerate(rules):
if rule is None or not isinstance(rule, dict):
raise Exception(f'Field "{fldName}" has invalid rule #{idx}: {rule}')
- if 'from' not in rule or 'to' not in rule or len(rule) != 2:
+ if 'type' not in rule:
+ ruleType = rule['type'] = 'literal'
+ else:
+ ruleType = rule['type']
+ if ('replace' not in rule or 'with' not in rule or len(rule) != 3 or
+ ruleType is None or ruleType not in ['literal', 'regex']):
raise Exception(f'Field "{fldName}" has invalid rule #{idx}: {rule}')
- rules[idx] = ( rule['from'], rule['to'] )
+ if ruleType == 'regex':
+ rule['replace'] = re.compile(rule['replace'])
+ rules[idx] = ( rule['replace'], rule['with'] )
# Escape the given identifier, cf.
# swig/python/gdal-utils/osgeo_utils/samples/validate_gpkg.py:_esc_id()
@@ -1002,8 +1009,8 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None):
else:
logging.info('Source layer "%s" has %d features', layername, count0)
- # build a list of pairs (field index, mapping_dict)
- valueMapLiteral = []
+ # build a list of triplets (field index, replacement_for_null, [(from_value, to_value), …])
+ valueMap = []
for fldName, rules in args.get('value-map', {}).items():
i = defn.GetFieldIndex(fldName)
if i < 0:
@@ -1012,20 +1019,34 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None):
logging.warning('Ignored source field "%s" has value map', fldName)
continue
- h = {}
+ hasNullReplacement = False
+ nullReplacement = None
+ mapping = []
fld = defn.GetFieldDefn(i)
for idx, (rFrom, rTo) in enumerate(rules):
- # use fld for both from and to (the types must match,
- # casting is not allowed in the mapping)
- rFrom = setFieldMapValue(fld, idx, rFrom)
- rTo = setFieldMapValue(fld, idx, rTo)
- h[rFrom] = rTo
-
- if len(h) > 0:
- valueMapLiteral.append((i, h))
+ # use fld for both 'from' and 'to' (the types must match, casting is not allowed in the mapping)
+ if rFrom is None:
+ if hasNullReplacement:
+ logging.warning('Field "%s" has duplicate NULL replacement', fld.GetName())
+ else:
+ setFieldMapValue(fld, idx, None) # validate NULL
+ rTo = setFieldMapValue(fld, idx, rTo)
+ hasNullReplacement = True
+ nullReplacement = rTo
+ elif isinstance(rFrom, re.Pattern):
+ # validate but keep the rFrom regex
+ setFieldMapValue(fld, idx, str(rFrom))
+ rTo = setFieldMapValue(fld, idx, rTo)
+ mapping.append( (rFrom, rTo, 1) )
+ else:
+ rFrom = setFieldMapValue(fld, idx, rFrom)
+ rTo = setFieldMapValue(fld, idx, rTo)
+ mapping.append( (rFrom, rTo, 0) )
- bValueMapLiteral = len(valueMapLiteral) > 0
+ if nullReplacement is not None or len(mapping) > 0:
+ valueMap.append( (i, nullReplacement, mapping) )
+ bValueMap = len(valueMap) > 0
defn = None
defn_dst = lyr_dst.GetLayerDefn()
@@ -1034,31 +1055,48 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None):
eGType_dst_HasM = ogr.GT_HasM(eGType_dst)
dGeomIsUnknown = ogr.GT_Flatten(eGType_dst) == ogr.wkbUnknown
+ if bValueMap:
+ valueMapCounts = [0] * fieldCount
+
n = 0
mismatch = {}
feature = lyr.GetNextFeature()
while feature is not None:
- if bValueMapLiteral:
- for i, h in valueMapLiteral:
+ if bValueMap:
+ for i, nullReplacement, mapping in valueMap:
if not feature.IsFieldSet(i):
continue
elif feature.IsFieldNull(i):
- if None in h:
- v = h[None]
- if v is not None:
- # replace NULL with non-NULL value
- feature.SetField(i, v)
+ if nullReplacement is not None:
+ # replace NULL with non-NULL value
+ feature.SetField(i, nullReplacement)
+ valueMapCounts[i] += 1
continue
v = feature.GetField(i)
- if v in h:
- v2 = h[v]
- if v2 is None:
+ for rFrom, rTo, rType in mapping:
+ if rType == 0:
+ # literal
+ if v != rFrom:
+ continue
+ elif rType == 1:
+ # regex
+ m = rFrom.fullmatch(v)
+ if m is None:
+ continue
+ elif rTo is not None:
+ rTo = rTo.format(*m.groups())
+ else:
+ raise Exception(str(rType))
+
+ if rTo is None:
# replace non-NULL value with NULL
feature.SetFieldNull(i)
else:
# replace non-NULL value with non-NULL value
- feature.SetField(i, v2)
+ feature.SetField(i, rTo)
+ valueMapCounts[i] += 1
+ break
feature2 = ogr.Feature(defn_dst)
feature2.SetFromWithMap(feature, False, fieldMap)
@@ -1092,11 +1130,21 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None):
n += 1
feature = lyr.GetNextFeature()
+ if bValueMap:
+ valueMapCounts = [ (lyr.GetLayerDefn().GetFieldDefn(i).GetName(), k) for i,k in enumerate(valueMapCounts) if k > 0 ]
+
lyr = None
logging.info('Imported %d features from source layer "%s"', n, layername)
+ if bValueMap:
+ if len(valueMapCounts) > 0:
+ valueMapCounts = ', '.join([ str(k) + '× "' + n + '"' for n,k in valueMapCounts ])
+ else:
+ valueMapCounts = '-'
+ logging.info('Field substitutions: %s', valueMapCounts)
+
if len(mismatch) > 0:
- mismatches = [ str(n) + '× ' + ogr.GeometryTypeToName(t)
+ mismatches = [ str(n) + '× ' + ogr.GeometryTypeToName(t)
for t,n in sorted(mismatch.items(), key=lambda x: x[1]) ]
logging.info('Forced conversion to %s: %s',
ogr.GeometryTypeToName(eGType_dst), ', '.join(mismatches))