From bc58a552b1be29d0f11d6856da60282034339e96 Mon Sep 17 00:00:00 2001 From: Guilhem Moulin Date: Wed, 19 Jun 2024 01:55:08 +0200 Subject: Add logic to replace field value literals. And set them to NULL. --- config.yml | 80 +++++++++++++++++++++++++++++++++++++---------- schema.sql | 50 +++++++++++++++++++++++++++++- webmap-import | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 205 insertions(+), 24 deletions(-) diff --git a/config.yml b/config.yml index 6587643..450cc5f 100644 --- a/config.yml +++ b/config.yml @@ -228,8 +228,21 @@ layers: # - field_name1 # - field_name2 # field-map: +# source_field_name1: field_name1 # source_field_name2: field_name2 -# source_field_name2: field_name2 +# +# # Mapping of (source) field values, to replace particular +# # values with something else. Useful for instance to replace +# # the empty string with NULL. +# value-map: +# source_field_name1: +# - from: '' +# to: null +# - from: 'foo' +# to: 'bar' +# source_field_name2: +# - from: 0 +# to: 1 'lansyta': description: Sveriges län (Lantmäteriet) @@ -521,9 +534,9 @@ layers: - name: Beteckn type: String width: 12 - #unique: true - # TODO: map "Visas ej" to NULL - nullable: false + unique: true + # obfuscated codes are mapped to NULL + nullable: true #comment: ärendebeteckning - name: ArendeAr type: Integer @@ -611,6 +624,10 @@ layers: Forebild: Forebild Efterbild: Efterbild Arealha: ArealHa + value-map: + Beteckn: + - from: 'Visas ej' + to: null - source: download: @@ -642,6 +659,10 @@ layers: Forebild: Forebild Efterbild: Efterbild Arealha: ArealHa + value-map: + Beteckn: + - from: 'Visas ej' + to: null - source: download: @@ -673,6 +694,10 @@ layers: Forebild: Forebild Efterbild: Efterbild Arealha: ArealHa + value-map: + Beteckn: + - from: 'Visas ej' + to: null - source: download: @@ -702,6 +727,10 @@ layers: Forebild: Forebild Efterbild: Efterbild Arealha: ArealHa + value-map: + Beteckn: + - from: 'Visas ej' + to: null 'st:betesomraden': source: @@ -807,8 +836,7 @@ layers: #comment: när tillståndets giltighet slutar - name: DiaryNr type: String - # TODO map empty string to NULL - #unique: true + unique: true width: 16 - name: LastUpdated # TODO convert to Date @@ -832,6 +860,10 @@ layers: 'Valid to': ValidTo 'Diary nr': DiaryNr 'Last updated': LastUpdated + value-map: + 'Diary nr': + - from: '' + to: null 'mrr:markanvisningar': description: Markanvisning till koncession (SGU) @@ -845,12 +877,10 @@ layers: #comment: benämning på området - name: DecisionDate type: Date - # TODO map empty string to NULL #comment: beslutsdatum - name: DiaryNr type: String - # TODO map empty string to NULL - #unique: true + unique: true width: 16 - name: LastUpdated # TODO convert to Date @@ -871,6 +901,13 @@ layers: 'Decision date': DecisionDate 'Diary nr': DiaryNr 'Last updated': LastUpdated + value-map: + 'Decision date': + - from: '' + to: null + 'Diary nr': + - from: '' + to: null 'mrr:mineral_applied': description: Undersökningstillstånd, metallar och mineral, ansökta (SGU) @@ -959,8 +996,7 @@ layers: #comment: när tillståndets giltighet slutar - name: DiaryNr type: String - # TODO map empty string to NULL - #unique: true + unique: true width: 16 - name: LastUpdated # TODO convert to Date @@ -985,6 +1021,10 @@ layers: 'Valid to': ValidTo 'Diary nr': DiaryNr 'Last updated': LastUpdated + value-map: + 'Diary nr': + - from: '' + to: null # 'mrr:mineral_expired': # source: @@ -1100,8 +1140,7 @@ layers: #comment: när tillståndets giltighet slutar - name: DiaryNr type: String - # TODO map empty string to NULL - #unique: true + unique: true width: 16 - name: LastUpdated # TODO convert to Date @@ -1126,6 +1165,10 @@ layers: 'Valid to': ValidTo 'Diary nr': DiaryNr 'Last updated': LastUpdated + value-map: + 'Diary nr': + - from: '' + to: null 'mrr:torvkoncessioner': description: Torvkoncessioner (SGU) @@ -1140,7 +1183,6 @@ layers: #comment: benämning på området - name: Mineral type: String - # TODO map empty string to NULL width: 254 #comment: koncessionsmineral - name: Owner @@ -1158,8 +1200,7 @@ layers: #comment: när tillståndets giltighet slutar - name: DiaryNr type: String - # TODO map empty string to NULL - #unique: true + unique: true width: 16 - name: LastUpdated # TODO convert to Date @@ -1183,3 +1224,10 @@ layers: 'Valid to': ValidTo 'Diary nr': DiaryNr 'Last updated': LastUpdated + value-map: + 'Mineral': + - from: '' + to: null + 'Diary nr': + - from: '' + to: null diff --git a/schema.sql b/schema.sql index b1ab6ad..07bda74 100644 --- a/schema.sql +++ b/schema.sql @@ -591,7 +591,7 @@ ALTER SEQUENCE postgis."sks:AvverkAnm_ogc_fid_seq" OWNED BY postgis."sks:AvverkA CREATE TABLE postgis."sks:UtfordAvverk" ( ogc_fid bigint NOT NULL, "OBJECTID" integer NOT NULL, - "Beteckn" character(12) NOT NULL, + "Beteckn" character(12), "ArendeAr" smallint NOT NULL, "Avverktyp" character varying(254) NOT NULL, "Skogstyp" character varying(254) NOT NULL, @@ -794,6 +794,14 @@ ALTER TABLE ONLY postgis."mrr:bearbetningskoncessioner_applied" ADD CONSTRAINT "mrr:bearbetningskoncessioner_applied_pkey" PRIMARY KEY (ogc_fid); +-- +-- Name: mrr:bearbetningskoncessioner_approved mrr:bearbetningskoncessioner_approved_DiaryNr_key; Type: CONSTRAINT; Schema: postgis; Owner: webmap_import +-- + +ALTER TABLE ONLY postgis."mrr:bearbetningskoncessioner_approved" + ADD CONSTRAINT "mrr:bearbetningskoncessioner_approved_DiaryNr_key" UNIQUE ("DiaryNr"); + + -- -- Name: mrr:bearbetningskoncessioner_approved mrr:bearbetningskoncessioner_approved_Name_key; Type: CONSTRAINT; Schema: postgis; Owner: webmap_import -- @@ -810,6 +818,14 @@ ALTER TABLE ONLY postgis."mrr:bearbetningskoncessioner_approved" ADD CONSTRAINT "mrr:bearbetningskoncessioner_approved_pkey" PRIMARY KEY (ogc_fid); +-- +-- Name: mrr:markanvisningar mrr:markanvisningar_DiaryNr_key; Type: CONSTRAINT; Schema: postgis; Owner: webmap_import +-- + +ALTER TABLE ONLY postgis."mrr:markanvisningar" + ADD CONSTRAINT "mrr:markanvisningar_DiaryNr_key" UNIQUE ("DiaryNr"); + + -- -- Name: mrr:markanvisningar mrr:markanvisningar_pkey; Type: CONSTRAINT; Schema: postgis; Owner: webmap_import -- @@ -842,6 +858,14 @@ ALTER TABLE ONLY postgis."mrr:mineral_applied" ADD CONSTRAINT "mrr:mineral_applied_pkey" PRIMARY KEY (ogc_fid); +-- +-- Name: mrr:mineral_approved mrr:mineral_approved_DiaryNr_key; Type: CONSTRAINT; Schema: postgis; Owner: webmap_import +-- + +ALTER TABLE ONLY postgis."mrr:mineral_approved" + ADD CONSTRAINT "mrr:mineral_approved_DiaryNr_key" UNIQUE ("DiaryNr"); + + -- -- Name: mrr:mineral_approved mrr:mineral_approved_LicenceID_key; Type: CONSTRAINT; Schema: postgis; Owner: webmap_import -- @@ -890,6 +914,14 @@ ALTER TABLE ONLY postgis."mrr:olja_gas_diamant_applied" ADD CONSTRAINT "mrr:olja_gas_diamant_applied_pkey" PRIMARY KEY (ogc_fid); +-- +-- Name: mrr:olja_gas_diamant_approved mrr:olja_gas_diamant_approved_DiaryNr_key; Type: CONSTRAINT; Schema: postgis; Owner: webmap_import +-- + +ALTER TABLE ONLY postgis."mrr:olja_gas_diamant_approved" + ADD CONSTRAINT "mrr:olja_gas_diamant_approved_DiaryNr_key" UNIQUE ("DiaryNr"); + + -- -- Name: mrr:olja_gas_diamant_approved mrr:olja_gas_diamant_approved_LicenceID_key; Type: CONSTRAINT; Schema: postgis; Owner: webmap_import -- @@ -914,6 +946,14 @@ ALTER TABLE ONLY postgis."mrr:olja_gas_diamant_approved" ADD CONSTRAINT "mrr:olja_gas_diamant_approved_pkey" PRIMARY KEY (ogc_fid); +-- +-- Name: mrr:torvkoncessioner mrr:torvkoncessioner_DiaryNr_key; Type: CONSTRAINT; Schema: postgis; Owner: webmap_import +-- + +ALTER TABLE ONLY postgis."mrr:torvkoncessioner" + ADD CONSTRAINT "mrr:torvkoncessioner_DiaryNr_key" UNIQUE ("DiaryNr"); + + -- -- Name: mrr:torvkoncessioner mrr:torvkoncessioner_Name_key; Type: CONSTRAINT; Schema: postgis; Owner: webmap_import -- @@ -954,6 +994,14 @@ ALTER TABLE ONLY postgis."sks:AvverkAnm" ADD CONSTRAINT "sks:AvverkAnm_pkey" PRIMARY KEY (ogc_fid); +-- +-- Name: sks:UtfordAvverk sks:UtfordAvverk_Beteckn_key; Type: CONSTRAINT; Schema: postgis; Owner: webmap_import +-- + +ALTER TABLE ONLY postgis."sks:UtfordAvverk" + ADD CONSTRAINT "sks:UtfordAvverk_Beteckn_key" UNIQUE ("Beteckn"); + + -- -- Name: sks:UtfordAvverk sks:UtfordAvverk_OBJECTID_key; Type: CONSTRAINT; Schema: postgis; Owner: webmap_import -- diff --git a/webmap-import b/webmap-import index e9ae302..e978f34 100755 --- a/webmap-import +++ b/webmap-import @@ -730,8 +730,9 @@ def setOutputFieldMap(defn, sources): fieldMap[fldName] = i for source in sources: - src = source['source']['path'] - fieldMap2 = source['import'].get('field-map', None) + source_import = source['import'] + + fieldMap2 = source_import.get('field-map', None) if fieldMap2 is None: fieldMap2 = fieldMap else: @@ -744,10 +745,22 @@ def setOutputFieldMap(defn, sources): if i is None: raise Exception(f'Ouput layer has no field named "{ofld}"') fieldMap2[ifld] = i - - source['import']['field-map'] = fieldMap2 - - return fieldMap + source_import['field-map'] = fieldMap2 + + # validate field value mapping + valueMap = source_import.get('value-map', None) + if valueMap is not None: + for fldName, rules in valueMap.items(): + if rules is None: + continue + if not isinstance(rules, list): + rules = [rules] + for idx, rule in enumerate(rules): + if rule is None or not isinstance(rule, dict): + raise Exception(f'Field "{fldName}" has invalid rule #{idx}: {rule}') + if 'from' not in rule or 'to' not in rule or len(rule) != 2: + raise Exception(f'Field "{fldName}" has invalid rule #{idx}: {rule}') + rules[idx] = ( rule['from'], rule['to'] ) # Escape the given identifier, cf. # swig/python/gdal-utils/osgeo_utils/samples/validate_gpkg.py:_esc_id() @@ -856,6 +869,31 @@ def importSource(lyr, path=None, unar=None, args={}, cachedir=None, extent=None) return importSource2(lyr, ds_srcpath, args=args, basedir=Path(tmpdir), extent=extent) +# Validate field value mapping +def setFieldMapValue(fld, idx, val): + if val is None: + if not fld.IsNullable(): + logging.warning('Field "%s" is not NULLable but remaps NULL', fld.GetName()) + return None + + fldType = fld.GetType() + if fldType == ogr.OFTInteger or fldType == ogr.OFTInteger64: + if isinstance(val, int): + return val + elif fldType == ogr.OFTString: + if isinstance(val, str): + return val + elif fldType == ogr.OFTBinary: + if isinstance(val, bytes): + return val + elif fldType == ogr.OFTReal: + if isinstance(val, int): + return float(val) + elif isinstance(val, float): + return val + + raise Exception(f'Field "{fld.GetName()}" mapping #{idx} has incompatible type for {ogr.GetFieldTypeName(fldType)}') + # Import a source layer (already extracted) # This is more or less like ogr2ogr/GDALVectorTranslate() but we roll # out our own (slower) version because GDALVectorTranslate() insists in @@ -927,7 +965,6 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None): # call SetIgnored() on unwanted source fields logging.debug('Set Ignored=True on output field "%s"', fldName) fld.SetIgnored(True) - defn = None count0 = -1 if lyr.TestCapability(ogr.OLCFastFeatureCount): @@ -965,6 +1002,32 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None): else: logging.info('Source layer "%s" has %d features', layername, count0) + # build a list of pairs (field index, mapping_dict) + valueMapLiteral = [] + for fldName, rules in args.get('value-map', {}).items(): + i = defn.GetFieldIndex(fldName) + if i < 0: + raise Exception(f'Source layer "{layername}" has no field named "{fldName}"') + if fieldMap[i] < 0: + logging.warning('Ignored source field "%s" has value map', fldName) + continue + + h = {} + fld = defn.GetFieldDefn(i) + for idx, (rFrom, rTo) in enumerate(rules): + # use fld for both from and to (the types must match, + # casting is not allowed in the mapping) + rFrom = setFieldMapValue(fld, idx, rFrom) + rTo = setFieldMapValue(fld, idx, rTo) + h[rFrom] = rTo + + if len(h) > 0: + valueMapLiteral.append((i, h)) + + bValueMapLiteral = len(valueMapLiteral) > 0 + + defn = None + defn_dst = lyr_dst.GetLayerDefn() eGType_dst = defn_dst.GetGeomType() eGType_dst_HasZ = ogr.GT_HasZ(eGType_dst) @@ -975,6 +1038,28 @@ def importSource2(lyr_dst, path, args={}, basedir=None, extent=None): mismatch = {} feature = lyr.GetNextFeature() while feature is not None: + if bValueMapLiteral: + for i, h in valueMapLiteral: + if not feature.IsFieldSet(i): + continue + elif feature.IsFieldNull(i): + if None in h: + v = h[None] + if v is not None: + # replace NULL with non-NULL value + feature.SetField(i, v) + continue + + v = feature.GetField(i) + if v in h: + v2 = h[v] + if v2 is None: + # replace non-NULL value with NULL + feature.SetFieldNull(i) + else: + # replace non-NULL value with non-NULL value + feature.SetField(i, v2) + feature2 = ogr.Feature(defn_dst) feature2.SetFromWithMap(feature, False, fieldMap) -- cgit v1.2.3