1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
#!/usr/bin/python3
#----------------------------------------------------------------------
# Backend utilities for the Klimatanalys Norr project (get county & municipality codes)
# Copyright © 2024 Guilhem Moulin <info@guilhem.se>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#----------------------------------------------------------------------
import re
import sys
import csv
from pathlib import Path
import requests
import xlrd
# The authorative source for county and municipality codes is Statistiska centralbyrån (SCB), cf.
# https://www.skatteverket.se/privat/skatter/arbeteochinkomst/askattsedelochskattetabeller/koderforlankommunerochforsamlingar.4.18e1b10334ebe8bc80004187.html
#
# Unfortunately SCB doesn't provide a CSV, so we download their xls file and produce our own.
# https://www.scb.se/hitta-statistik/regional-statistik-och-kartor/regionala-indelningar/lan-och-kommuner/lan-och-kommuner-i-kodnummerordning/
r = requests.get('https://www.scb.se/contentassets/7a89e48960f741e08918e489ea36354a/kommunlankod_2024.xls')
r.raise_for_status()
if 'content-type' not in r.headers:
raise Exception('Missing Content-Type from response headers')
if r.headers['content-type'] not in ['application/vnd.ms-excel', 'application/octet-stream']:
raise Exception(f"Unsupported Content-Type: {r.headers['content-type']}")
xls = xlrd.open_workbook(file_contents=r.content)
sheets = xls.sheet_names()
if len(sheets) > 1:
print(f'WARN: XLS has {len(sheets)} > 1 sheets: ' + str(sheets), file=sys.stderr)
sheet = xls.sheet_by_index(0)
COUNTY_CODE_RE = re.compile('[0-9]{2}')
MUNICIPALITY_CODE_RE = re.compile('[0-9]{4}')
counties = []
municipalities = []
# https://xlrd.readthedocs.io/en/latest/api.html#xlrd-sheet
for i in range(sheet.nrows):
if sheet.row_len(i) < 2:
continue
code = sheet.cell_value(i, 0)
if code is None:
continue
m1 = MUNICIPALITY_CODE_RE.fullmatch(code)
m2 = COUNTY_CODE_RE.fullmatch(code) if m1 is None else None
if m1 is None and m2 is None:
# skip headers
continue
name = sheet.cell_value(i, 1)
if name is None or name == '':
continue
row = { 'Code': code, 'Name': name }
if m1 is not None:
municipalities.append(row)
elif m2 is not None:
counties.append(row)
basedir = Path(sys.argv[0]).parent
def writeCSV(filename, data):
fieldnames = ['Code', 'Name']
path = basedir.joinpath(filename).with_suffix('.csv')
with path.open(mode='w', newline='') as fp:
writer = csv.DictWriter(fp, fieldnames=fieldnames, delimiter='\t',
quoting=csv.QUOTE_MINIMAL, dialect='unix')
writer.writeheader()
for row in data:
writer.writerow(row)
print(f'Wrote {len(data)} rows in {path}', file=sys.stderr)
writeCSV('counties', counties)
writeCSV('municipalities', municipalities)
|