diff options
author | Guilhem Moulin <guilhem@fripost.org> | 2024-06-20 17:32:34 +0200 |
---|---|---|
committer | Guilhem Moulin <guilhem@fripost.org> | 2024-06-21 15:06:58 +0200 |
commit | d1f52a5e7ac2dd62c6348f17b02ccf324456c9da (patch) | |
tree | 7b96836ed3ac4071d4edbbdd1cf0835868fdfc78 /administrative-codes/update | |
parent | e930cd95f3392b44152ae05b4189c65e833adaa3 (diff) |
Add script to download administrative codes from SCB.
Unfortunately SCB doesn't provide CSV files, so we download their xls
file and produce our own CSV files. We also add a conversion tool to
turn these CSV files into a (single, compact) JSON file to be served via
HTTP.
Diffstat (limited to 'administrative-codes/update')
-rwxr-xr-x | administrative-codes/update | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/administrative-codes/update b/administrative-codes/update new file mode 100755 index 0000000..855f73b --- /dev/null +++ b/administrative-codes/update @@ -0,0 +1,86 @@ +#!/usr/bin/python3 + +#---------------------------------------------------------------------- +# Backend utilities for the Klimatanalys Norr project (get county & municipality codes) +# Copyright © 2024 Guilhem Moulin <info@guilhem.se> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +#---------------------------------------------------------------------- + +import re +import sys +import csv +from pathlib import Path +import requests +import xlrd + +# The authorative source for county and municipality codes is Statistiska centralbyrån (SCB), cf. +# https://www.skatteverket.se/privat/skatter/arbeteochinkomst/askattsedelochskattetabeller/koderforlankommunerochforsamlingar.4.18e1b10334ebe8bc80004187.html +# +# Unfortunately SCB doesn't provide a CSV, so we download their xls file and produce our own. +# https://www.scb.se/hitta-statistik/regional-statistik-och-kartor/regionala-indelningar/lan-och-kommuner/lan-och-kommuner-i-kodnummerordning/ +r = requests.get('https://www.scb.se/contentassets/7a89e48960f741e08918e489ea36354a/kommunlankod_2024.xls') +r.raise_for_status() + +if 'content-type' not in r.headers: + raise Exception('Missing Content-Type from response headers') +if r.headers['content-type'] not in ['application/vnd.ms-excel', 'application/octet-stream']: + raise Exception(f"Unsupported Content-Type: {r.headers['content-type']}") + +xls = xlrd.open_workbook(file_contents=r.content) +sheets = xls.sheet_names() +if len(sheets) > 1: + print(f'WARN: XLS has {len(sheets)} > 1 sheets: ' + str(sheets), file=sys.stderr) +sheet = xls.sheet_by_index(0) + +COUNTY_CODE_RE = re.compile('[0-9]{2}') +MUNICIPALITY_CODE_RE = re.compile('[0-9]{4}') +counties = [] +municipalities = [] + +# https://xlrd.readthedocs.io/en/latest/api.html#xlrd-sheet +for i in range(sheet.nrows): + if sheet.row_len(i) < 2: + continue + code = sheet.cell_value(i, 0) + if code is None: + continue + m1 = MUNICIPALITY_CODE_RE.fullmatch(code) + m2 = COUNTY_CODE_RE.fullmatch(code) if m1 is None else None + if m1 is None and m2 is None: + # skip headers + continue + name = sheet.cell_value(i, 1) + if name is None or name == '': + continue + row = { 'Code': code, 'Name': name } + if m1 is not None: + municipalities.append(row) + elif m2 is not None: + counties.append(row) + +basedir = Path(sys.argv[0]).parent +def writeCSV(filename, data): + fieldnames = ['Code', 'Name'] + path = basedir.joinpath(filename).with_suffix('.csv') + with path.open(mode='w', newline='') as fp: + writer = csv.DictWriter(fp, fieldnames=fieldnames, delimiter='\t', + quoting=csv.QUOTE_MINIMAL, dialect='unix') + writer.writeheader() + for row in data: + writer.writerow(row) + print(f'Wrote {len(data)} rows in {path}', file=sys.stderr) + +writeCSV('counties', counties) +writeCSV('municipalities', municipalities) |