#!/usr/bin/python3 #---------------------------------------------------------------------- # Backend utilities for the Klimatanalys Norr project (get county & municipality codes) # Copyright © 2024 Guilhem Moulin # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . #---------------------------------------------------------------------- import re import sys import csv from pathlib import Path import requests import xlrd # The authorative source for county and municipality codes is Statistiska centralbyrån (SCB), cf. # https://www.skatteverket.se/privat/skatter/arbeteochinkomst/askattsedelochskattetabeller/koderforlankommunerochforsamlingar.4.18e1b10334ebe8bc80004187.html # # Unfortunately SCB doesn't provide a CSV, so we download their xls file and produce our own. # https://www.scb.se/hitta-statistik/regional-statistik-och-kartor/regionala-indelningar/lan-och-kommuner/lan-och-kommuner-i-kodnummerordning/ r = requests.get('https://www.scb.se/contentassets/7a89e48960f741e08918e489ea36354a/kommunlankod_2024.xls') r.raise_for_status() if 'content-type' not in r.headers: raise Exception('Missing Content-Type from response headers') if r.headers['content-type'] not in ['application/vnd.ms-excel', 'application/octet-stream']: raise Exception(f"Unsupported Content-Type: {r.headers['content-type']}") xls = xlrd.open_workbook(file_contents=r.content) sheets = xls.sheet_names() if len(sheets) > 1: print(f'WARN: XLS has {len(sheets)} > 1 sheets: ' + str(sheets), file=sys.stderr) sheet = xls.sheet_by_index(0) COUNTY_CODE_RE = re.compile('[0-9]{2}') MUNICIPALITY_CODE_RE = re.compile('[0-9]{4}') counties = [] municipalities = [] # https://xlrd.readthedocs.io/en/latest/api.html#xlrd-sheet for i in range(sheet.nrows): if sheet.row_len(i) < 2: continue code = sheet.cell_value(i, 0) if code is None: continue m1 = MUNICIPALITY_CODE_RE.fullmatch(code) m2 = COUNTY_CODE_RE.fullmatch(code) if m1 is None else None if m1 is None and m2 is None: # skip headers continue name = sheet.cell_value(i, 1) if name is None or name == '': continue row = { 'Code': code, 'Name': name } if m1 is not None: municipalities.append(row) elif m2 is not None: counties.append(row) basedir = Path(sys.argv[0]).parent def writeCSV(filename, data): fieldnames = ['Code', 'Name'] path = basedir.joinpath(filename).with_suffix('.csv') with path.open(mode='w', newline='') as fp: writer = csv.DictWriter(fp, fieldnames=fieldnames, delimiter='\t', quoting=csv.QUOTE_MINIMAL, dialect='unix') writer.writeheader() for row in data: writer.writerow(row) print(f'Wrote {len(data)} rows in {path}', file=sys.stderr) writeCSV('counties', counties) writeCSV('municipalities', municipalities)