aboutsummaryrefslogtreecommitdiffstats
path: root/administrative-codes/update
blob: 855f73b2d6ab5c66d9175ef5404726619b721013 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/python3

#----------------------------------------------------------------------
# Backend utilities for the Klimatanalys Norr project (get county & municipality codes)
# Copyright © 2024 Guilhem Moulin <info@guilhem.se>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#----------------------------------------------------------------------

import re
import sys
import csv
from pathlib import Path
import requests
import xlrd

# The authorative source for county and municipality codes is Statistiska centralbyrån (SCB), cf.
# https://www.skatteverket.se/privat/skatter/arbeteochinkomst/askattsedelochskattetabeller/koderforlankommunerochforsamlingar.4.18e1b10334ebe8bc80004187.html
#
# Unfortunately SCB doesn't provide a CSV, so we download their xls file and produce our own.
# https://www.scb.se/hitta-statistik/regional-statistik-och-kartor/regionala-indelningar/lan-och-kommuner/lan-och-kommuner-i-kodnummerordning/
r = requests.get('https://www.scb.se/contentassets/7a89e48960f741e08918e489ea36354a/kommunlankod_2024.xls')
r.raise_for_status()

if 'content-type' not in r.headers:
    raise Exception('Missing Content-Type from response headers')
if r.headers['content-type'] not in ['application/vnd.ms-excel', 'application/octet-stream']:
    raise Exception(f"Unsupported Content-Type: {r.headers['content-type']}")

xls = xlrd.open_workbook(file_contents=r.content)
sheets = xls.sheet_names()
if len(sheets) > 1:
    print(f'WARN: XLS has {len(sheets)} > 1 sheets: ' + str(sheets), file=sys.stderr)
sheet = xls.sheet_by_index(0)

COUNTY_CODE_RE = re.compile('[0-9]{2}')
MUNICIPALITY_CODE_RE = re.compile('[0-9]{4}')
counties = []
municipalities = []

# https://xlrd.readthedocs.io/en/latest/api.html#xlrd-sheet
for i in range(sheet.nrows):
    if sheet.row_len(i) < 2:
        continue
    code = sheet.cell_value(i, 0)
    if code is None:
        continue
    m1 = MUNICIPALITY_CODE_RE.fullmatch(code)
    m2 = COUNTY_CODE_RE.fullmatch(code) if m1 is None else None
    if m1 is None and m2 is None:
        # skip headers
        continue
    name = sheet.cell_value(i, 1)
    if name is None or name == '':
        continue
    row = { 'Code': code, 'Name': name }
    if m1 is not None:
        municipalities.append(row)
    elif m2 is not None:
        counties.append(row)

basedir = Path(sys.argv[0]).parent
def writeCSV(filename, data):
    fieldnames = ['Code', 'Name']
    path = basedir.joinpath(filename).with_suffix('.csv')
    with path.open(mode='w', newline='') as fp:
        writer = csv.DictWriter(fp, fieldnames=fieldnames, delimiter='\t',
            quoting=csv.QUOTE_MINIMAL, dialect='unix')
        writer.writeheader()
        for row in data:
            writer.writerow(row)
    print(f'Wrote {len(data)} rows in {path}', file=sys.stderr)

writeCSV('counties',       counties)
writeCSV('municipalities', municipalities)