aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuilhem Moulin <guilhem@fripost.org>2024-06-20 17:32:34 +0200
committerGuilhem Moulin <guilhem@fripost.org>2024-06-21 15:06:58 +0200
commitd1f52a5e7ac2dd62c6348f17b02ccf324456c9da (patch)
tree7b96836ed3ac4071d4edbbdd1cf0835868fdfc78
parente930cd95f3392b44152ae05b4189c65e833adaa3 (diff)
Add script to download administrative codes from SCB.
Unfortunately SCB doesn't provide CSV files, so we download their xls file and produce our own CSV files. We also add a conversion tool to turn these CSV files into a (single, compact) JSON file to be served via HTTP.
-rw-r--r--administrative-codes/.gitignore3
-rw-r--r--administrative-codes/Makefile19
-rw-r--r--administrative-codes/counties.csv22
-rwxr-xr-xadministrative-codes/csv2json43
-rw-r--r--administrative-codes/municipalities.csv291
-rwxr-xr-xadministrative-codes/update86
6 files changed, 464 insertions, 0 deletions
diff --git a/administrative-codes/.gitignore b/administrative-codes/.gitignore
new file mode 100644
index 0000000..d9d14dc
--- /dev/null
+++ b/administrative-codes/.gitignore
@@ -0,0 +1,3 @@
+/administrative-codes.json
+/administrative-codes.json.br
+/administrative-codes.json.gz
diff --git a/administrative-codes/Makefile b/administrative-codes/Makefile
new file mode 100644
index 0000000..c0008bc
--- /dev/null
+++ b/administrative-codes/Makefile
@@ -0,0 +1,19 @@
+OUT = administrative-codes
+CSV_SOURCES = counties.csv municipalities.csv
+GENERATED_FILES = $(addsuffix .json,$(OUT)) $(addsuffix .json.br,$(OUT))
+all: $(GENERATED_FILES)
+
+$(CSV_SOURCES): %.csv:
+ ./update
+
+%.json: $(CSV_SOURCES)
+ ./csv2json $^ >$@
+
+# XXX The brotli(1) executable doesn't support mode=MODE_TEXT
+%.json.br: %.json
+ brotli --best --keep --output=$@ -- $^
+
+clean:
+ rm -f -- $(GENERATED_FILES)
+
+.PHONY: update clean
diff --git a/administrative-codes/counties.csv b/administrative-codes/counties.csv
new file mode 100644
index 0000000..ee34965
--- /dev/null
+++ b/administrative-codes/counties.csv
@@ -0,0 +1,22 @@
+Code Name
+01 Stockholms län
+03 Uppsala län
+04 Södermanlands län
+05 Östergötlands län
+06 Jönköpings län
+07 Kronobergs län
+08 Kalmar län
+09 Gotlands län
+10 Blekinge län
+12 Skåne län
+13 Hallands län
+14 Västra Götalands län
+17 Värmlands län
+18 Örebro län
+19 Västmanlands län
+20 Dalarnas län
+21 Gävleborgs län
+22 Västernorrlands län
+23 Jämtlands län
+24 Västerbottens län
+25 Norrbottens län
diff --git a/administrative-codes/csv2json b/administrative-codes/csv2json
new file mode 100755
index 0000000..7c22666
--- /dev/null
+++ b/administrative-codes/csv2json
@@ -0,0 +1,43 @@
+#!/usr/bin/python3
+
+#----------------------------------------------------------------------
+# Backend utilities for the Klimatanalys Norr project (compile county & municipality codes)
+# Copyright © 2024 Guilhem Moulin <info@guilhem.se>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#----------------------------------------------------------------------
+
+import sys
+import csv
+from pathlib import Path
+import json
+
+basedir = Path(sys.argv[0]).parent
+data = {}
+def readCSV(path):
+ with open(path, mode='r', newline='') as fp:
+ reader = csv.DictReader(fp, delimiter='\t', dialect='unix')
+ for row in reader:
+ code = row['Code']
+ if code in data:
+ raise Exception(f'Duplicate code {code}')
+ data[code] = row['Name']
+
+# The source (SCB) lists all codes in same file: they differ only in
+# length (2 digits for counties, 4 digits for municipalities) so it
+# doesn't hurt to merge them back in a single JSON dictionary. Also
+# having a single file simplifies the client-side logic.
+for path in sys.argv[1:]:
+ readCSV(path)
+json.dump(data, sys.stdout, ensure_ascii=False, sort_keys=True, separators=(',', ':'))
diff --git a/administrative-codes/municipalities.csv b/administrative-codes/municipalities.csv
new file mode 100644
index 0000000..2c61d86
--- /dev/null
+++ b/administrative-codes/municipalities.csv
@@ -0,0 +1,291 @@
+Code Name
+0114 Upplands Väsby
+0115 Vallentuna
+0117 Österåker
+0120 Värmdö
+0123 Järfälla
+0125 Ekerö
+0126 Huddinge
+0127 Botkyrka
+0128 Salem
+0136 Haninge
+0138 Tyresö
+0139 Upplands-Bro
+0140 Nykvarn
+0160 Täby
+0162 Danderyd
+0163 Sollentuna
+0180 Stockholm
+0181 Södertälje
+0182 Nacka
+0183 Sundbyberg
+0184 Solna
+0186 Lidingö
+0187 Vaxholm
+0188 Norrtälje
+0191 Sigtuna
+0192 Nynäshamn
+0305 Håbo
+0319 Älvkarleby
+0330 Knivsta
+0331 Heby
+0360 Tierp
+0380 Uppsala
+0381 Enköping
+0382 Östhammar
+0428 Vingåker
+0461 Gnesta
+0480 Nyköping
+0481 Oxelösund
+0482 Flen
+0483 Katrineholm
+0484 Eskilstuna
+0486 Strängnäs
+0488 Trosa
+0509 Ödeshög
+0512 Ydre
+0513 Kinda
+0560 Boxholm
+0561 Åtvidaberg
+0562 Finspång
+0563 Valdemarsvik
+0580 Linköping
+0581 Norrköping
+0582 Söderköping
+0583 Motala
+0584 Vadstena
+0586 Mjölby
+0604 Aneby
+0617 Gnosjö
+0642 Mullsjö
+0643 Habo
+0662 Gislaved
+0665 Vaggeryd
+0680 Jönköping
+0682 Nässjö
+0683 Värnamo
+0684 Sävsjö
+0685 Vetlanda
+0686 Eksjö
+0687 Tranås
+0760 Uppvidinge
+0761 Lessebo
+0763 Tingsryd
+0764 Alvesta
+0765 Älmhult
+0767 Markaryd
+0780 Växjö
+0781 Ljungby
+0821 Högsby
+0834 Torsås
+0840 Mörbylånga
+0860 Hultsfred
+0861 Mönsterås
+0862 Emmaboda
+0880 Kalmar
+0881 Nybro
+0882 Oskarshamn
+0883 Västervik
+0884 Vimmerby
+0885 Borgholm
+0980 Gotland
+1060 Olofström
+1080 Karlskrona
+1081 Ronneby
+1082 Karlshamn
+1083 Sölvesborg
+1214 Svalöv
+1230 Staffanstorp
+1231 Burlöv
+1233 Vellinge
+1256 Östra Göinge
+1257 Örkelljunga
+1260 Bjuv
+1261 Kävlinge
+1262 Lomma
+1263 Svedala
+1264 Skurup
+1265 Sjöbo
+1266 Hörby
+1267 Höör
+1270 Tomelilla
+1272 Bromölla
+1273 Osby
+1275 Perstorp
+1276 Klippan
+1277 Åstorp
+1278 Båstad
+1280 Malmö
+1281 Lund
+1282 Landskrona
+1283 Helsingborg
+1284 Höganäs
+1285 Eslöv
+1286 Ystad
+1287 Trelleborg
+1290 Kristianstad
+1291 Simrishamn
+1292 Ängelholm
+1293 Hässleholm
+1315 Hylte
+1380 Halmstad
+1381 Laholm
+1382 Falkenberg
+1383 Varberg
+1384 Kungsbacka
+1401 Härryda
+1402 Partille
+1407 Öckerö
+1415 Stenungsund
+1419 Tjörn
+1421 Orust
+1427 Sotenäs
+1430 Munkedal
+1435 Tanum
+1438 Dals-Ed
+1439 Färgelanda
+1440 Ale
+1441 Lerum
+1442 Vårgårda
+1443 Bollebygd
+1444 Grästorp
+1445 Essunga
+1446 Karlsborg
+1447 Gullspång
+1452 Tranemo
+1460 Bengtsfors
+1461 Mellerud
+1462 Lilla Edet
+1463 Mark
+1465 Svenljunga
+1466 Herrljunga
+1470 Vara
+1471 Götene
+1472 Tibro
+1473 Töreboda
+1480 Göteborg
+1481 Mölndal
+1482 Kungälv
+1484 Lysekil
+1485 Uddevalla
+1486 Strömstad
+1487 Vänersborg
+1488 Trollhättan
+1489 Alingsås
+1490 Borås
+1491 Ulricehamn
+1492 Åmål
+1493 Mariestad
+1494 Lidköping
+1495 Skara
+1496 Skövde
+1497 Hjo
+1498 Tidaholm
+1499 Falköping
+1715 Kil
+1730 Eda
+1737 Torsby
+1760 Storfors
+1761 Hammarö
+1762 Munkfors
+1763 Forshaga
+1764 Grums
+1765 Årjäng
+1766 Sunne
+1780 Karlstad
+1781 Kristinehamn
+1782 Filipstad
+1783 Hagfors
+1784 Arvika
+1785 Säffle
+1814 Lekeberg
+1860 Laxå
+1861 Hallsberg
+1862 Degerfors
+1863 Hällefors
+1864 Ljusnarsberg
+1880 Örebro
+1881 Kumla
+1882 Askersund
+1883 Karlskoga
+1884 Nora
+1885 Lindesberg
+1904 Skinnskatteberg
+1907 Surahammar
+1960 Kungsör
+1961 Hallstahammar
+1962 Norberg
+1980 Västerås
+1981 Sala
+1982 Fagersta
+1983 Köping
+1984 Arboga
+2021 Vansbro
+2023 Malung-Sälen
+2026 Gagnef
+2029 Leksand
+2031 Rättvik
+2034 Orsa
+2039 Älvdalen
+2061 Smedjebacken
+2062 Mora
+2080 Falun
+2081 Borlänge
+2082 Säter
+2083 Hedemora
+2084 Avesta
+2085 Ludvika
+2101 Ockelbo
+2104 Hofors
+2121 Ovanåker
+2132 Nordanstig
+2161 Ljusdal
+2180 Gävle
+2181 Sandviken
+2182 Söderhamn
+2183 Bollnäs
+2184 Hudiksvall
+2260 Ånge
+2262 Timrå
+2280 Härnösand
+2281 Sundsvall
+2282 Kramfors
+2283 Sollefteå
+2284 Örnsköldsvik
+2303 Ragunda
+2305 Bräcke
+2309 Krokom
+2313 Strömsund
+2321 Åre
+2326 Berg
+2361 Härjedalen
+2380 Östersund
+2401 Nordmaling
+2403 Bjurholm
+2404 Vindeln
+2409 Robertsfors
+2417 Norsjö
+2418 Malå
+2421 Storuman
+2422 Sorsele
+2425 Dorotea
+2460 Vännäs
+2462 Vilhelmina
+2463 Åsele
+2480 Umeå
+2481 Lycksele
+2482 Skellefteå
+2505 Arvidsjaur
+2506 Arjeplog
+2510 Jokkmokk
+2513 Överkalix
+2514 Kalix
+2518 Övertorneå
+2521 Pajala
+2523 Gällivare
+2560 Älvsbyn
+2580 Luleå
+2581 Piteå
+2582 Boden
+2583 Haparanda
+2584 Kiruna
diff --git a/administrative-codes/update b/administrative-codes/update
new file mode 100755
index 0000000..855f73b
--- /dev/null
+++ b/administrative-codes/update
@@ -0,0 +1,86 @@
+#!/usr/bin/python3
+
+#----------------------------------------------------------------------
+# Backend utilities for the Klimatanalys Norr project (get county & municipality codes)
+# Copyright © 2024 Guilhem Moulin <info@guilhem.se>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#----------------------------------------------------------------------
+
+import re
+import sys
+import csv
+from pathlib import Path
+import requests
+import xlrd
+
+# The authorative source for county and municipality codes is Statistiska centralbyrån (SCB), cf.
+# https://www.skatteverket.se/privat/skatter/arbeteochinkomst/askattsedelochskattetabeller/koderforlankommunerochforsamlingar.4.18e1b10334ebe8bc80004187.html
+#
+# Unfortunately SCB doesn't provide a CSV, so we download their xls file and produce our own.
+# https://www.scb.se/hitta-statistik/regional-statistik-och-kartor/regionala-indelningar/lan-och-kommuner/lan-och-kommuner-i-kodnummerordning/
+r = requests.get('https://www.scb.se/contentassets/7a89e48960f741e08918e489ea36354a/kommunlankod_2024.xls')
+r.raise_for_status()
+
+if 'content-type' not in r.headers:
+ raise Exception('Missing Content-Type from response headers')
+if r.headers['content-type'] not in ['application/vnd.ms-excel', 'application/octet-stream']:
+ raise Exception(f"Unsupported Content-Type: {r.headers['content-type']}")
+
+xls = xlrd.open_workbook(file_contents=r.content)
+sheets = xls.sheet_names()
+if len(sheets) > 1:
+ print(f'WARN: XLS has {len(sheets)} > 1 sheets: ' + str(sheets), file=sys.stderr)
+sheet = xls.sheet_by_index(0)
+
+COUNTY_CODE_RE = re.compile('[0-9]{2}')
+MUNICIPALITY_CODE_RE = re.compile('[0-9]{4}')
+counties = []
+municipalities = []
+
+# https://xlrd.readthedocs.io/en/latest/api.html#xlrd-sheet
+for i in range(sheet.nrows):
+ if sheet.row_len(i) < 2:
+ continue
+ code = sheet.cell_value(i, 0)
+ if code is None:
+ continue
+ m1 = MUNICIPALITY_CODE_RE.fullmatch(code)
+ m2 = COUNTY_CODE_RE.fullmatch(code) if m1 is None else None
+ if m1 is None and m2 is None:
+ # skip headers
+ continue
+ name = sheet.cell_value(i, 1)
+ if name is None or name == '':
+ continue
+ row = { 'Code': code, 'Name': name }
+ if m1 is not None:
+ municipalities.append(row)
+ elif m2 is not None:
+ counties.append(row)
+
+basedir = Path(sys.argv[0]).parent
+def writeCSV(filename, data):
+ fieldnames = ['Code', 'Name']
+ path = basedir.joinpath(filename).with_suffix('.csv')
+ with path.open(mode='w', newline='') as fp:
+ writer = csv.DictWriter(fp, fieldnames=fieldnames, delimiter='\t',
+ quoting=csv.QUOTE_MINIMAL, dialect='unix')
+ writer.writeheader()
+ for row in data:
+ writer.writerow(row)
+ print(f'Wrote {len(data)} rows in {path}', file=sys.stderr)
+
+writeCSV('counties', counties)
+writeCSV('municipalities', municipalities)