diff options
author | Guilhem Moulin <guilhem@fripost.org> | 2024-06-19 04:29:26 +0200 |
---|---|---|
committer | Guilhem Moulin <guilhem@fripost.org> | 2024-06-19 12:05:12 +0200 |
commit | ceb76b0893b5a0cbfeab269d373b6bb656222b69 (patch) | |
tree | 3498a8054c6d4e1a5fbfb4268f67a21210634c04 /webmap-import | |
parent | ae01b2aae08033a4183b7c601dbf0381362f346b (diff) |
Add logic for field regex substitution.
This is useful to replace a YYYYMMDD formatted date with YYYY-MM-DD.
The target field can then be set to not-nullable and its type set to
Date, as the OGR_F_SetField*() with take care of the conversion.
We could also do that via an SQL query, but in our case the sources are
not proper RDBMS so SQL is emulated anyway.
Diffstat (limited to 'webmap-import')
-rwxr-xr-x | webmap-import | 100 |
1 files changed, 74 insertions, 26 deletions
diff --git a/webmap-import b/webmap-import index b97b275..731c49f 100755 --- a/webmap-import +++ b/webmap-import @@ -758,9 +758,16 @@ def setOutputFieldMap(defn, sources): for idx, rule in enumerate(rules): if rule is None or not isinstance(rule, dict): raise Exception(f'Field "{fldName}" has invalid rule #{idx}: {rule}') - if 'from' not in rule or 'to' not in rule or len(rule) != 2: + if 'type' not in rule: + ruleType = rule['type'] = 'literal' + else: + ruleType = rule['type'] + if ('replace' not in rule or 'with' not in rule or len(rule) != 3 or + ruleType is None or ruleType not in ['literal', 'regex']): raise Exception(f'Field "{fldName}" has invalid rule #{idx}: {rule}') - rules[idx] = ( rule['from'], rule['to'] ) + if ruleType == 'regex': + rule['replace'] = re.compile(rule['replace']) + rules[idx] = ( rule['replace'], rule['with'] ) # Escape the given identifier, cf. # swig/python/gdal-utils/osgeo_utils/samples/validate_gpkg.py:_esc_id() @@ -1002,8 +1009,8 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None): else: logging.info('Source layer "%s" has %d features', layername, count0) - # build a list of pairs (field index, mapping_dict) - valueMapLiteral = [] + # build a list of triplets (field index, replacement_for_null, [(from_value, to_value), …]) + valueMap = [] for fldName, rules in args.get('value-map', {}).items(): i = defn.GetFieldIndex(fldName) if i < 0: @@ -1012,20 +1019,34 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None): logging.warning('Ignored source field "%s" has value map', fldName) continue - h = {} + hasNullReplacement = False + nullReplacement = None + mapping = [] fld = defn.GetFieldDefn(i) for idx, (rFrom, rTo) in enumerate(rules): - # use fld for both from and to (the types must match, - # casting is not allowed in the mapping) - rFrom = setFieldMapValue(fld, idx, rFrom) - rTo = setFieldMapValue(fld, idx, rTo) - h[rFrom] = rTo - - if len(h) > 0: - valueMapLiteral.append((i, h)) + # use fld for both 'from' and 'to' (the types must match, casting is not allowed in the mapping) + if rFrom is None: + if hasNullReplacement: + logging.warning('Field "%s" has duplicate NULL replacement', fld.GetName()) + else: + setFieldMapValue(fld, idx, None) # validate NULL + rTo = setFieldMapValue(fld, idx, rTo) + hasNullReplacement = True + nullReplacement = rTo + elif isinstance(rFrom, re.Pattern): + # validate but keep the rFrom regex + setFieldMapValue(fld, idx, str(rFrom)) + rTo = setFieldMapValue(fld, idx, rTo) + mapping.append( (rFrom, rTo, 1) ) + else: + rFrom = setFieldMapValue(fld, idx, rFrom) + rTo = setFieldMapValue(fld, idx, rTo) + mapping.append( (rFrom, rTo, 0) ) - bValueMapLiteral = len(valueMapLiteral) > 0 + if nullReplacement is not None or len(mapping) > 0: + valueMap.append( (i, nullReplacement, mapping) ) + bValueMap = len(valueMap) > 0 defn = None defn_dst = lyr_dst.GetLayerDefn() @@ -1034,31 +1055,48 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None): eGType_dst_HasM = ogr.GT_HasM(eGType_dst) dGeomIsUnknown = ogr.GT_Flatten(eGType_dst) == ogr.wkbUnknown + if bValueMap: + valueMapCounts = [0] * fieldCount + n = 0 mismatch = {} feature = lyr.GetNextFeature() while feature is not None: - if bValueMapLiteral: - for i, h in valueMapLiteral: + if bValueMap: + for i, nullReplacement, mapping in valueMap: if not feature.IsFieldSet(i): continue elif feature.IsFieldNull(i): - if None in h: - v = h[None] - if v is not None: - # replace NULL with non-NULL value - feature.SetField(i, v) + if nullReplacement is not None: + # replace NULL with non-NULL value + feature.SetField(i, nullReplacement) + valueMapCounts[i] += 1 continue v = feature.GetField(i) - if v in h: - v2 = h[v] - if v2 is None: + for rFrom, rTo, rType in mapping: + if rType == 0: + # literal + if v != rFrom: + continue + elif rType == 1: + # regex + m = rFrom.fullmatch(v) + if m is None: + continue + elif rTo is not None: + rTo = rTo.format(*m.groups()) + else: + raise Exception(str(rType)) + + if rTo is None: # replace non-NULL value with NULL feature.SetFieldNull(i) else: # replace non-NULL value with non-NULL value - feature.SetField(i, v2) + feature.SetField(i, rTo) + valueMapCounts[i] += 1 + break feature2 = ogr.Feature(defn_dst) feature2.SetFromWithMap(feature, False, fieldMap) @@ -1092,11 +1130,21 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None): n += 1 feature = lyr.GetNextFeature() + if bValueMap: + valueMapCounts = [ (lyr.GetLayerDefn().GetFieldDefn(i).GetName(), k) for i,k in enumerate(valueMapCounts) if k > 0 ] + lyr = None logging.info('Imported %d features from source layer "%s"', n, layername) + if bValueMap: + if len(valueMapCounts) > 0: + valueMapCounts = ', '.join([ str(k) + '× "' + n + '"' for n,k in valueMapCounts ]) + else: + valueMapCounts = '-' + logging.info('Field substitutions: %s', valueMapCounts) + if len(mismatch) > 0: - mismatches = [ str(n) + '× ' + ogr.GeometryTypeToName(t) + mismatches = [ str(n) + '× ' + ogr.GeometryTypeToName(t) for t,n in sorted(mismatch.items(), key=lambda x: x[1]) ] logging.info('Forced conversion to %s: %s', ogr.GeometryTypeToName(eGType_dst), ', '.join(mismatches)) |