diff options
author | Guilhem Moulin <guilhem@fripost.org> | 2024-06-20 17:32:34 +0200 |
---|---|---|
committer | Guilhem Moulin <guilhem@fripost.org> | 2024-06-21 15:06:58 +0200 |
commit | d1f52a5e7ac2dd62c6348f17b02ccf324456c9da (patch) | |
tree | 7b96836ed3ac4071d4edbbdd1cf0835868fdfc78 /administrative-codes/csv2json | |
parent | e930cd95f3392b44152ae05b4189c65e833adaa3 (diff) |
Add script to download administrative codes from SCB.
Unfortunately SCB doesn't provide CSV files, so we download their xls
file and produce our own CSV files. We also add a conversion tool to
turn these CSV files into a (single, compact) JSON file to be served via
HTTP.
Diffstat (limited to 'administrative-codes/csv2json')
-rwxr-xr-x | administrative-codes/csv2json | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/administrative-codes/csv2json b/administrative-codes/csv2json new file mode 100755 index 0000000..7c22666 --- /dev/null +++ b/administrative-codes/csv2json @@ -0,0 +1,43 @@ +#!/usr/bin/python3 + +#---------------------------------------------------------------------- +# Backend utilities for the Klimatanalys Norr project (compile county & municipality codes) +# Copyright © 2024 Guilhem Moulin <info@guilhem.se> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +#---------------------------------------------------------------------- + +import sys +import csv +from pathlib import Path +import json + +basedir = Path(sys.argv[0]).parent +data = {} +def readCSV(path): + with open(path, mode='r', newline='') as fp: + reader = csv.DictReader(fp, delimiter='\t', dialect='unix') + for row in reader: + code = row['Code'] + if code in data: + raise Exception(f'Duplicate code {code}') + data[code] = row['Name'] + +# The source (SCB) lists all codes in same file: they differ only in +# length (2 digits for counties, 4 digits for municipalities) so it +# doesn't hurt to merge them back in a single JSON dictionary. Also +# having a single file simplifies the client-side logic. +for path in sys.argv[1:]: + readCSV(path) +json.dump(data, sys.stdout, ensure_ascii=False, sort_keys=True, separators=(',', ':')) |