aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuilhem Moulin <guilhem@fripost.org>2024-06-19 04:29:26 +0200
committerGuilhem Moulin <guilhem@fripost.org>2024-06-19 12:05:12 +0200
commitceb76b0893b5a0cbfeab269d373b6bb656222b69 (patch)
tree3498a8054c6d4e1a5fbfb4268f67a21210634c04
parentae01b2aae08033a4183b7c601dbf0381362f346b (diff)
Add logic for field regex substitution.
This is useful to replace a YYYYMMDD formatted date with YYYY-MM-DD. The target field can then be set to not-nullable and its type set to Date, as the OGR_F_SetField*() with take care of the conversion. We could also do that via an SQL query, but in our case the sources are not proper RDBMS so SQL is emulated anyway.
-rw-r--r--config.yml134
-rw-r--r--schema.sql16
-rwxr-xr-xwebmap-import100
3 files changed, 161 insertions, 89 deletions
diff --git a/config.yml b/config.yml
index 183599c..60b9a20 100644
--- a/config.yml
+++ b/config.yml
@@ -231,18 +231,23 @@ layers:
# source_field_name1: field_name1
# source_field_name2: field_name2
#
-# # Mapping of (source) field values, to replace particular
-# # values with something else. Useful for instance to replace
-# # the empty string with NULL.
+# # Mapping of (source) field values, to replace literal values or values (fully)
+# # matching a regular expression with something else. Useful for instance to
+# # replace the empty string with NULL. Rules are tested in the order given and
+# # stop at the first match.
# value-map:
# source_field_name1:
-# - from: ''
-# to: null
-# - from: 'foo'
-# to: 'bar'
+# - replace: ''
+# with: null
+# #type: literal
+# - replace: 'foo'
+# with: 'bar'
+# - replace: 'fo[a-z]'
+# with: 'baz'
+# type: regex
# source_field_name2:
-# - from: 0
-# to: 1
+# - replace: 0
+# with: 1
'lansyta':
description: Sveriges län (Lantmäteriet)
@@ -626,8 +631,8 @@ layers:
Arealha: ArealHa
value-map:
Beteckn:
- - from: 'Visas ej'
- to: null
+ - replace: 'Visas ej'
+ with: null
- source:
download:
@@ -661,8 +666,8 @@ layers:
Arealha: ArealHa
value-map:
Beteckn:
- - from: 'Visas ej'
- to: null
+ - replace: 'Visas ej'
+ with: null
- source:
download:
@@ -696,8 +701,8 @@ layers:
Arealha: ArealHa
value-map:
Beteckn:
- - from: 'Visas ej'
- to: null
+ - replace: 'Visas ej'
+ with: null
- source:
download:
@@ -729,8 +734,8 @@ layers:
Arealha: ArealHa
value-map:
Beteckn:
- - from: 'Visas ej'
- to: null
+ - replace: 'Visas ej'
+ with: null
'st:betesomraden':
source:
@@ -784,9 +789,7 @@ layers:
nullable: false
width: 16
- name: LastUpdated
- # TODO convert to Date
- type: String
- width: 10
+ type: Date
nullable: false
#comment: datum för senaste uppdatering
source:
@@ -804,6 +807,11 @@ layers:
'Application date': ApplicationDate
'Diary nr': DiaryNr
'Last updated': LastUpdated
+ value-map:
+ 'Last updated':
+ - replace: '([0-9]{4})([0-9]{2})([0-9]{2})'
+ with: '{0}-{1}-{2}'
+ type: regex
'mrr:bearbetningskoncessioner_approved':
description: Bearbetningskoncessioner, beviljade (SGU)
@@ -839,9 +847,7 @@ layers:
unique: true
width: 16
- name: LastUpdated
- # TODO convert to Date
- type: String
- width: 10
+ type: Date
nullable: false
#comment: datum för senaste uppdatering
source:
@@ -862,8 +868,12 @@ layers:
'Last updated': LastUpdated
value-map:
'Diary nr':
- - from: ''
- to: null
+ - replace: ''
+ with: null
+ 'Last updated':
+ - replace: '([0-9]{4})([0-9]{2})([0-9]{2})'
+ with: '{0}-{1}-{2}'
+ type: regex
'mrr:markanvisningar':
description: Markanvisning till koncession (SGU)
@@ -883,9 +893,7 @@ layers:
unique: true
width: 16
- name: LastUpdated
- # TODO convert to Date
- type: String
- width: 10
+ type: Date
nullable: false
#comment: datum för senaste uppdatering
source:
@@ -903,11 +911,15 @@ layers:
'Last updated': LastUpdated
value-map:
'Decision date':
- - from: ''
- to: null
+ - replace: ''
+ with: null
'Diary nr':
- - from: ''
- to: null
+ - replace: ''
+ with: null
+ 'Last updated':
+ - replace: '([0-9]{4})([0-9]{2})([0-9]{2})'
+ with: '{0}-{1}-{2}'
+ type: regex
'mrr:mineral_applied':
description: Undersökningstillstånd, metallar och mineral, ansökta (SGU)
@@ -939,9 +951,7 @@ layers:
nullable: false
width: 16
- name: LastUpdated
- # TODO convert to Date
- type: String
- width: 10
+ type: Date
nullable: false
#comment: datum för senaste uppdatering
source:
@@ -959,6 +969,11 @@ layers:
'Application date': ApplicationDate
'Diary nr': DiaryNr
'Last updated': LastUpdated
+ value-map:
+ 'Last updated':
+ - replace: '([0-9]{4})([0-9]{2})([0-9]{2})'
+ with: '{0}-{1}-{2}'
+ type: regex
'mrr:mineral_approved':
description: Undersökningstillstånd, metallar och mineral, beviljade (SGU)
@@ -999,9 +1014,7 @@ layers:
unique: true
width: 16
- name: LastUpdated
- # TODO convert to Date
- type: String
- width: 10
+ type: Date
nullable: false
#comment: datum för senaste uppdatering
source:
@@ -1023,8 +1036,12 @@ layers:
'Last updated': LastUpdated
value-map:
'Diary nr':
- - from: ''
- to: null
+ - replace: ''
+ with: null
+ 'Last updated':
+ - replace: '([0-9]{4})([0-9]{2})([0-9]{2})'
+ with: '{0}-{1}-{2}'
+ type: regex
# 'mrr:mineral_expired':
# source:
@@ -1083,9 +1100,7 @@ layers:
nullable: false
width: 16
- name: LastUpdated
- # TODO convert to Date
- type: String
- width: 10
+ type: Date
nullable: false
#comment: datum för senaste uppdatering
source:
@@ -1103,6 +1118,11 @@ layers:
'Application date': ApplicationDate
'Diary nr': DiaryNr
'Last updated': LastUpdated
+ value-map:
+ 'Last updated':
+ - replace: '([0-9]{4})([0-9]{2})([0-9]{2})'
+ with: '{0}-{1}-{2}'
+ type: regex
'mrr:olja_gas_diamant_approved':
description: Undersökningstillstånd, olja, gas och diamant, beviljade (SGU)
@@ -1143,9 +1163,7 @@ layers:
unique: true
width: 16
- name: LastUpdated
- # TODO convert to Date
- type: String
- width: 10
+ type: Date
nullable: false
#comment: datum för senaste uppdatering
source:
@@ -1167,8 +1185,12 @@ layers:
'Last updated': LastUpdated
value-map:
'Diary nr':
- - from: ''
- to: null
+ - replace: ''
+ with: null
+ 'Last updated':
+ - replace: '([0-9]{4})([0-9]{2})([0-9]{2})'
+ with: '{0}-{1}-{2}'
+ type: regex
'mrr:torvkoncessioner':
description: Torvkoncessioner (SGU)
@@ -1203,9 +1225,7 @@ layers:
unique: true
width: 16
- name: LastUpdated
- # TODO convert to Date
- type: String
- width: 10
+ type: Date
nullable: false
#comment: datum för senaste uppdatering
source:
@@ -1226,8 +1246,12 @@ layers:
'Last updated': LastUpdated
value-map:
'Mineral':
- - from: ''
- to: null
+ - replace: ''
+ with: null
'Diary nr':
- - from: ''
- to: null
+ - replace: ''
+ with: null
+ 'Last updated':
+ - replace: '([0-9]{4})([0-9]{2})([0-9]{2})'
+ with: '{0}-{1}-{2}'
+ type: regex
diff --git a/schema.sql b/schema.sql
index 6fbef97..6b4e773 100644
--- a/schema.sql
+++ b/schema.sql
@@ -170,7 +170,7 @@ CREATE TABLE postgis."mrr:bearbetningskoncessioner_applied" (
"Applicant" character varying(254) NOT NULL,
"ApplicationDate" date NOT NULL,
"DiaryNr" character(16) NOT NULL,
- "LastUpdated" character(10) NOT NULL,
+ "LastUpdated" date NOT NULL,
wkb_geometry postgis.geometry(MultiPolygon,3006)
);
@@ -217,7 +217,7 @@ CREATE TABLE postgis."mrr:bearbetningskoncessioner_approved" (
"ValidFrom" date NOT NULL,
"ValidTo" date NOT NULL,
"DiaryNr" character(16),
- "LastUpdated" character(10) NOT NULL,
+ "LastUpdated" date NOT NULL,
wkb_geometry postgis.geometry(MultiPolygon,3006)
);
@@ -261,7 +261,7 @@ CREATE TABLE postgis."mrr:markanvisningar" (
"Name" character varying(254) NOT NULL,
"DecisionDate" date,
"DiaryNr" character(16),
- "LastUpdated" character(10) NOT NULL,
+ "LastUpdated" date NOT NULL,
wkb_geometry postgis.geometry(MultiPolygon,3006)
);
@@ -307,7 +307,7 @@ CREATE TABLE postgis."mrr:mineral_applied" (
"Applicant" character varying(254) NOT NULL,
"ApplicationDate" date NOT NULL,
"DiaryNr" character(16) NOT NULL,
- "LastUpdated" character(10) NOT NULL,
+ "LastUpdated" date NOT NULL,
wkb_geometry postgis.geometry(MultiPolygon,3006)
);
@@ -355,7 +355,7 @@ CREATE TABLE postgis."mrr:mineral_approved" (
"ValidFrom" date NOT NULL,
"ValidTo" date NOT NULL,
"DiaryNr" character(16),
- "LastUpdated" character(10) NOT NULL,
+ "LastUpdated" date NOT NULL,
wkb_geometry postgis.geometry(MultiPolygon,3006)
);
@@ -401,7 +401,7 @@ CREATE TABLE postgis."mrr:olja_gas_diamant_applied" (
"Applicant" character varying(254) NOT NULL,
"ApplicationDate" date NOT NULL,
"DiaryNr" character(16) NOT NULL,
- "LastUpdated" character(10) NOT NULL,
+ "LastUpdated" date NOT NULL,
wkb_geometry postgis.geometry(MultiPolygon,3006)
);
@@ -449,7 +449,7 @@ CREATE TABLE postgis."mrr:olja_gas_diamant_approved" (
"ValidFrom" date NOT NULL,
"ValidTo" date NOT NULL,
"DiaryNr" character(16),
- "LastUpdated" character(10) NOT NULL,
+ "LastUpdated" date NOT NULL,
wkb_geometry postgis.geometry(MultiPolygon,3006)
);
@@ -496,7 +496,7 @@ CREATE TABLE postgis."mrr:torvkoncessioner" (
"ValidFrom" date NOT NULL,
"ValidTo" date NOT NULL,
"DiaryNr" character(16),
- "LastUpdated" character(10) NOT NULL,
+ "LastUpdated" date NOT NULL,
wkb_geometry postgis.geometry(MultiPolygon,3006)
);
diff --git a/webmap-import b/webmap-import
index b97b275..731c49f 100755
--- a/webmap-import
+++ b/webmap-import
@@ -758,9 +758,16 @@ def setOutputFieldMap(defn, sources):
for idx, rule in enumerate(rules):
if rule is None or not isinstance(rule, dict):
raise Exception(f'Field "{fldName}" has invalid rule #{idx}: {rule}')
- if 'from' not in rule or 'to' not in rule or len(rule) != 2:
+ if 'type' not in rule:
+ ruleType = rule['type'] = 'literal'
+ else:
+ ruleType = rule['type']
+ if ('replace' not in rule or 'with' not in rule or len(rule) != 3 or
+ ruleType is None or ruleType not in ['literal', 'regex']):
raise Exception(f'Field "{fldName}" has invalid rule #{idx}: {rule}')
- rules[idx] = ( rule['from'], rule['to'] )
+ if ruleType == 'regex':
+ rule['replace'] = re.compile(rule['replace'])
+ rules[idx] = ( rule['replace'], rule['with'] )
# Escape the given identifier, cf.
# swig/python/gdal-utils/osgeo_utils/samples/validate_gpkg.py:_esc_id()
@@ -1002,8 +1009,8 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None):
else:
logging.info('Source layer "%s" has %d features', layername, count0)
- # build a list of pairs (field index, mapping_dict)
- valueMapLiteral = []
+ # build a list of triplets (field index, replacement_for_null, [(from_value, to_value), …])
+ valueMap = []
for fldName, rules in args.get('value-map', {}).items():
i = defn.GetFieldIndex(fldName)
if i < 0:
@@ -1012,20 +1019,34 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None):
logging.warning('Ignored source field "%s" has value map', fldName)
continue
- h = {}
+ hasNullReplacement = False
+ nullReplacement = None
+ mapping = []
fld = defn.GetFieldDefn(i)
for idx, (rFrom, rTo) in enumerate(rules):
- # use fld for both from and to (the types must match,
- # casting is not allowed in the mapping)
- rFrom = setFieldMapValue(fld, idx, rFrom)
- rTo = setFieldMapValue(fld, idx, rTo)
- h[rFrom] = rTo
-
- if len(h) > 0:
- valueMapLiteral.append((i, h))
+ # use fld for both 'from' and 'to' (the types must match, casting is not allowed in the mapping)
+ if rFrom is None:
+ if hasNullReplacement:
+ logging.warning('Field "%s" has duplicate NULL replacement', fld.GetName())
+ else:
+ setFieldMapValue(fld, idx, None) # validate NULL
+ rTo = setFieldMapValue(fld, idx, rTo)
+ hasNullReplacement = True
+ nullReplacement = rTo
+ elif isinstance(rFrom, re.Pattern):
+ # validate but keep the rFrom regex
+ setFieldMapValue(fld, idx, str(rFrom))
+ rTo = setFieldMapValue(fld, idx, rTo)
+ mapping.append( (rFrom, rTo, 1) )
+ else:
+ rFrom = setFieldMapValue(fld, idx, rFrom)
+ rTo = setFieldMapValue(fld, idx, rTo)
+ mapping.append( (rFrom, rTo, 0) )
- bValueMapLiteral = len(valueMapLiteral) > 0
+ if nullReplacement is not None or len(mapping) > 0:
+ valueMap.append( (i, nullReplacement, mapping) )
+ bValueMap = len(valueMap) > 0
defn = None
defn_dst = lyr_dst.GetLayerDefn()
@@ -1034,31 +1055,48 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None):
eGType_dst_HasM = ogr.GT_HasM(eGType_dst)
dGeomIsUnknown = ogr.GT_Flatten(eGType_dst) == ogr.wkbUnknown
+ if bValueMap:
+ valueMapCounts = [0] * fieldCount
+
n = 0
mismatch = {}
feature = lyr.GetNextFeature()
while feature is not None:
- if bValueMapLiteral:
- for i, h in valueMapLiteral:
+ if bValueMap:
+ for i, nullReplacement, mapping in valueMap:
if not feature.IsFieldSet(i):
continue
elif feature.IsFieldNull(i):
- if None in h:
- v = h[None]
- if v is not None:
- # replace NULL with non-NULL value
- feature.SetField(i, v)
+ if nullReplacement is not None:
+ # replace NULL with non-NULL value
+ feature.SetField(i, nullReplacement)
+ valueMapCounts[i] += 1
continue
v = feature.GetField(i)
- if v in h:
- v2 = h[v]
- if v2 is None:
+ for rFrom, rTo, rType in mapping:
+ if rType == 0:
+ # literal
+ if v != rFrom:
+ continue
+ elif rType == 1:
+ # regex
+ m = rFrom.fullmatch(v)
+ if m is None:
+ continue
+ elif rTo is not None:
+ rTo = rTo.format(*m.groups())
+ else:
+ raise Exception(str(rType))
+
+ if rTo is None:
# replace non-NULL value with NULL
feature.SetFieldNull(i)
else:
# replace non-NULL value with non-NULL value
- feature.SetField(i, v2)
+ feature.SetField(i, rTo)
+ valueMapCounts[i] += 1
+ break
feature2 = ogr.Feature(defn_dst)
feature2.SetFromWithMap(feature, False, fieldMap)
@@ -1092,11 +1130,21 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None):
n += 1
feature = lyr.GetNextFeature()
+ if bValueMap:
+ valueMapCounts = [ (lyr.GetLayerDefn().GetFieldDefn(i).GetName(), k) for i,k in enumerate(valueMapCounts) if k > 0 ]
+
lyr = None
logging.info('Imported %d features from source layer "%s"', n, layername)
+ if bValueMap:
+ if len(valueMapCounts) > 0:
+ valueMapCounts = ', '.join([ str(k) + '× "' + n + '"' for n,k in valueMapCounts ])
+ else:
+ valueMapCounts = '-'
+ logging.info('Field substitutions: %s', valueMapCounts)
+
if len(mismatch) > 0:
- mismatches = [ str(n) + '× ' + ogr.GeometryTypeToName(t)
+ mismatches = [ str(n) + '× ' + ogr.GeometryTypeToName(t)
for t,n in sorted(mismatch.items(), key=lambda x: x[1]) ]
logging.info('Forced conversion to %s: %s',
ogr.GeometryTypeToName(eGType_dst), ', '.join(mismatches))