From d1f52a5e7ac2dd62c6348f17b02ccf324456c9da Mon Sep 17 00:00:00 2001 From: Guilhem Moulin Date: Thu, 20 Jun 2024 17:32:34 +0200 Subject: Add script to download administrative codes from SCB. Unfortunately SCB doesn't provide CSV files, so we download their xls file and produce our own CSV files. We also add a conversion tool to turn these CSV files into a (single, compact) JSON file to be served via HTTP. --- administrative-codes/csv2json | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100755 administrative-codes/csv2json (limited to 'administrative-codes/csv2json') diff --git a/administrative-codes/csv2json b/administrative-codes/csv2json new file mode 100755 index 0000000..7c22666 --- /dev/null +++ b/administrative-codes/csv2json @@ -0,0 +1,43 @@ +#!/usr/bin/python3 + +#---------------------------------------------------------------------- +# Backend utilities for the Klimatanalys Norr project (compile county & municipality codes) +# Copyright © 2024 Guilhem Moulin +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#---------------------------------------------------------------------- + +import sys +import csv +from pathlib import Path +import json + +basedir = Path(sys.argv[0]).parent +data = {} +def readCSV(path): + with open(path, mode='r', newline='') as fp: + reader = csv.DictReader(fp, delimiter='\t', dialect='unix') + for row in reader: + code = row['Code'] + if code in data: + raise Exception(f'Duplicate code {code}') + data[code] = row['Name'] + +# The source (SCB) lists all codes in same file: they differ only in +# length (2 digits for counties, 4 digits for municipalities) so it +# doesn't hurt to merge them back in a single JSON dictionary. Also +# having a single file simplifies the client-side logic. +for path in sys.argv[1:]: + readCSV(path) +json.dump(data, sys.stdout, ensure_ascii=False, sort_keys=True, separators=(',', ':')) -- cgit v1.2.3