From d1f52a5e7ac2dd62c6348f17b02ccf324456c9da Mon Sep 17 00:00:00 2001 From: Guilhem Moulin Date: Thu, 20 Jun 2024 17:32:34 +0200 Subject: Add script to download administrative codes from SCB. Unfortunately SCB doesn't provide CSV files, so we download their xls file and produce our own CSV files. We also add a conversion tool to turn these CSV files into a (single, compact) JSON file to be served via HTTP. --- administrative-codes/.gitignore | 3 + administrative-codes/Makefile | 19 +++ administrative-codes/counties.csv | 22 +++ administrative-codes/csv2json | 43 +++++ administrative-codes/municipalities.csv | 291 ++++++++++++++++++++++++++++++++ administrative-codes/update | 86 ++++++++++ 6 files changed, 464 insertions(+) create mode 100644 administrative-codes/.gitignore create mode 100644 administrative-codes/Makefile create mode 100644 administrative-codes/counties.csv create mode 100755 administrative-codes/csv2json create mode 100644 administrative-codes/municipalities.csv create mode 100755 administrative-codes/update diff --git a/administrative-codes/.gitignore b/administrative-codes/.gitignore new file mode 100644 index 0000000..d9d14dc --- /dev/null +++ b/administrative-codes/.gitignore @@ -0,0 +1,3 @@ +/administrative-codes.json +/administrative-codes.json.br +/administrative-codes.json.gz diff --git a/administrative-codes/Makefile b/administrative-codes/Makefile new file mode 100644 index 0000000..c0008bc --- /dev/null +++ b/administrative-codes/Makefile @@ -0,0 +1,19 @@ +OUT = administrative-codes +CSV_SOURCES = counties.csv municipalities.csv +GENERATED_FILES = $(addsuffix .json,$(OUT)) $(addsuffix .json.br,$(OUT)) +all: $(GENERATED_FILES) + +$(CSV_SOURCES): %.csv: + ./update + +%.json: $(CSV_SOURCES) + ./csv2json $^ >$@ + +# XXX The brotli(1) executable doesn't support mode=MODE_TEXT +%.json.br: %.json + brotli --best --keep --output=$@ -- $^ + +clean: + rm -f -- $(GENERATED_FILES) + +.PHONY: update clean diff --git a/administrative-codes/counties.csv b/administrative-codes/counties.csv new file mode 100644 index 0000000..ee34965 --- /dev/null +++ b/administrative-codes/counties.csv @@ -0,0 +1,22 @@ +Code Name +01 Stockholms län +03 Uppsala län +04 Södermanlands län +05 Östergötlands län +06 Jönköpings län +07 Kronobergs län +08 Kalmar län +09 Gotlands län +10 Blekinge län +12 Skåne län +13 Hallands län +14 Västra Götalands län +17 Värmlands län +18 Örebro län +19 Västmanlands län +20 Dalarnas län +21 Gävleborgs län +22 Västernorrlands län +23 Jämtlands län +24 Västerbottens län +25 Norrbottens län diff --git a/administrative-codes/csv2json b/administrative-codes/csv2json new file mode 100755 index 0000000..7c22666 --- /dev/null +++ b/administrative-codes/csv2json @@ -0,0 +1,43 @@ +#!/usr/bin/python3 + +#---------------------------------------------------------------------- +# Backend utilities for the Klimatanalys Norr project (compile county & municipality codes) +# Copyright © 2024 Guilhem Moulin +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#---------------------------------------------------------------------- + +import sys +import csv +from pathlib import Path +import json + +basedir = Path(sys.argv[0]).parent +data = {} +def readCSV(path): + with open(path, mode='r', newline='') as fp: + reader = csv.DictReader(fp, delimiter='\t', dialect='unix') + for row in reader: + code = row['Code'] + if code in data: + raise Exception(f'Duplicate code {code}') + data[code] = row['Name'] + +# The source (SCB) lists all codes in same file: they differ only in +# length (2 digits for counties, 4 digits for municipalities) so it +# doesn't hurt to merge them back in a single JSON dictionary. Also +# having a single file simplifies the client-side logic. +for path in sys.argv[1:]: + readCSV(path) +json.dump(data, sys.stdout, ensure_ascii=False, sort_keys=True, separators=(',', ':')) diff --git a/administrative-codes/municipalities.csv b/administrative-codes/municipalities.csv new file mode 100644 index 0000000..2c61d86 --- /dev/null +++ b/administrative-codes/municipalities.csv @@ -0,0 +1,291 @@ +Code Name +0114 Upplands Väsby +0115 Vallentuna +0117 Österåker +0120 Värmdö +0123 Järfälla +0125 Ekerö +0126 Huddinge +0127 Botkyrka +0128 Salem +0136 Haninge +0138 Tyresö +0139 Upplands-Bro +0140 Nykvarn +0160 Täby +0162 Danderyd +0163 Sollentuna +0180 Stockholm +0181 Södertälje +0182 Nacka +0183 Sundbyberg +0184 Solna +0186 Lidingö +0187 Vaxholm +0188 Norrtälje +0191 Sigtuna +0192 Nynäshamn +0305 Håbo +0319 Älvkarleby +0330 Knivsta +0331 Heby +0360 Tierp +0380 Uppsala +0381 Enköping +0382 Östhammar +0428 Vingåker +0461 Gnesta +0480 Nyköping +0481 Oxelösund +0482 Flen +0483 Katrineholm +0484 Eskilstuna +0486 Strängnäs +0488 Trosa +0509 Ödeshög +0512 Ydre +0513 Kinda +0560 Boxholm +0561 Åtvidaberg +0562 Finspång +0563 Valdemarsvik +0580 Linköping +0581 Norrköping +0582 Söderköping +0583 Motala +0584 Vadstena +0586 Mjölby +0604 Aneby +0617 Gnosjö +0642 Mullsjö +0643 Habo +0662 Gislaved +0665 Vaggeryd +0680 Jönköping +0682 Nässjö +0683 Värnamo +0684 Sävsjö +0685 Vetlanda +0686 Eksjö +0687 Tranås +0760 Uppvidinge +0761 Lessebo +0763 Tingsryd +0764 Alvesta +0765 Älmhult +0767 Markaryd +0780 Växjö +0781 Ljungby +0821 Högsby +0834 Torsås +0840 Mörbylånga +0860 Hultsfred +0861 Mönsterås +0862 Emmaboda +0880 Kalmar +0881 Nybro +0882 Oskarshamn +0883 Västervik +0884 Vimmerby +0885 Borgholm +0980 Gotland +1060 Olofström +1080 Karlskrona +1081 Ronneby +1082 Karlshamn +1083 Sölvesborg +1214 Svalöv +1230 Staffanstorp +1231 Burlöv +1233 Vellinge +1256 Östra Göinge +1257 Örkelljunga +1260 Bjuv +1261 Kävlinge +1262 Lomma +1263 Svedala +1264 Skurup +1265 Sjöbo +1266 Hörby +1267 Höör +1270 Tomelilla +1272 Bromölla +1273 Osby +1275 Perstorp +1276 Klippan +1277 Åstorp +1278 Båstad +1280 Malmö +1281 Lund +1282 Landskrona +1283 Helsingborg +1284 Höganäs +1285 Eslöv +1286 Ystad +1287 Trelleborg +1290 Kristianstad +1291 Simrishamn +1292 Ängelholm +1293 Hässleholm +1315 Hylte +1380 Halmstad +1381 Laholm +1382 Falkenberg +1383 Varberg +1384 Kungsbacka +1401 Härryda +1402 Partille +1407 Öckerö +1415 Stenungsund +1419 Tjörn +1421 Orust +1427 Sotenäs +1430 Munkedal +1435 Tanum +1438 Dals-Ed +1439 Färgelanda +1440 Ale +1441 Lerum +1442 Vårgårda +1443 Bollebygd +1444 Grästorp +1445 Essunga +1446 Karlsborg +1447 Gullspång +1452 Tranemo +1460 Bengtsfors +1461 Mellerud +1462 Lilla Edet +1463 Mark +1465 Svenljunga +1466 Herrljunga +1470 Vara +1471 Götene +1472 Tibro +1473 Töreboda +1480 Göteborg +1481 Mölndal +1482 Kungälv +1484 Lysekil +1485 Uddevalla +1486 Strömstad +1487 Vänersborg +1488 Trollhättan +1489 Alingsås +1490 Borås +1491 Ulricehamn +1492 Åmål +1493 Mariestad +1494 Lidköping +1495 Skara +1496 Skövde +1497 Hjo +1498 Tidaholm +1499 Falköping +1715 Kil +1730 Eda +1737 Torsby +1760 Storfors +1761 Hammarö +1762 Munkfors +1763 Forshaga +1764 Grums +1765 Årjäng +1766 Sunne +1780 Karlstad +1781 Kristinehamn +1782 Filipstad +1783 Hagfors +1784 Arvika +1785 Säffle +1814 Lekeberg +1860 Laxå +1861 Hallsberg +1862 Degerfors +1863 Hällefors +1864 Ljusnarsberg +1880 Örebro +1881 Kumla +1882 Askersund +1883 Karlskoga +1884 Nora +1885 Lindesberg +1904 Skinnskatteberg +1907 Surahammar +1960 Kungsör +1961 Hallstahammar +1962 Norberg +1980 Västerås +1981 Sala +1982 Fagersta +1983 Köping +1984 Arboga +2021 Vansbro +2023 Malung-Sälen +2026 Gagnef +2029 Leksand +2031 Rättvik +2034 Orsa +2039 Älvdalen +2061 Smedjebacken +2062 Mora +2080 Falun +2081 Borlänge +2082 Säter +2083 Hedemora +2084 Avesta +2085 Ludvika +2101 Ockelbo +2104 Hofors +2121 Ovanåker +2132 Nordanstig +2161 Ljusdal +2180 Gävle +2181 Sandviken +2182 Söderhamn +2183 Bollnäs +2184 Hudiksvall +2260 Ånge +2262 Timrå +2280 Härnösand +2281 Sundsvall +2282 Kramfors +2283 Sollefteå +2284 Örnsköldsvik +2303 Ragunda +2305 Bräcke +2309 Krokom +2313 Strömsund +2321 Åre +2326 Berg +2361 Härjedalen +2380 Östersund +2401 Nordmaling +2403 Bjurholm +2404 Vindeln +2409 Robertsfors +2417 Norsjö +2418 Malå +2421 Storuman +2422 Sorsele +2425 Dorotea +2460 Vännäs +2462 Vilhelmina +2463 Åsele +2480 Umeå +2481 Lycksele +2482 Skellefteå +2505 Arvidsjaur +2506 Arjeplog +2510 Jokkmokk +2513 Överkalix +2514 Kalix +2518 Övertorneå +2521 Pajala +2523 Gällivare +2560 Älvsbyn +2580 Luleå +2581 Piteå +2582 Boden +2583 Haparanda +2584 Kiruna diff --git a/administrative-codes/update b/administrative-codes/update new file mode 100755 index 0000000..855f73b --- /dev/null +++ b/administrative-codes/update @@ -0,0 +1,86 @@ +#!/usr/bin/python3 + +#---------------------------------------------------------------------- +# Backend utilities for the Klimatanalys Norr project (get county & municipality codes) +# Copyright © 2024 Guilhem Moulin +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#---------------------------------------------------------------------- + +import re +import sys +import csv +from pathlib import Path +import requests +import xlrd + +# The authorative source for county and municipality codes is Statistiska centralbyrån (SCB), cf. +# https://www.skatteverket.se/privat/skatter/arbeteochinkomst/askattsedelochskattetabeller/koderforlankommunerochforsamlingar.4.18e1b10334ebe8bc80004187.html +# +# Unfortunately SCB doesn't provide a CSV, so we download their xls file and produce our own. +# https://www.scb.se/hitta-statistik/regional-statistik-och-kartor/regionala-indelningar/lan-och-kommuner/lan-och-kommuner-i-kodnummerordning/ +r = requests.get('https://www.scb.se/contentassets/7a89e48960f741e08918e489ea36354a/kommunlankod_2024.xls') +r.raise_for_status() + +if 'content-type' not in r.headers: + raise Exception('Missing Content-Type from response headers') +if r.headers['content-type'] not in ['application/vnd.ms-excel', 'application/octet-stream']: + raise Exception(f"Unsupported Content-Type: {r.headers['content-type']}") + +xls = xlrd.open_workbook(file_contents=r.content) +sheets = xls.sheet_names() +if len(sheets) > 1: + print(f'WARN: XLS has {len(sheets)} > 1 sheets: ' + str(sheets), file=sys.stderr) +sheet = xls.sheet_by_index(0) + +COUNTY_CODE_RE = re.compile('[0-9]{2}') +MUNICIPALITY_CODE_RE = re.compile('[0-9]{4}') +counties = [] +municipalities = [] + +# https://xlrd.readthedocs.io/en/latest/api.html#xlrd-sheet +for i in range(sheet.nrows): + if sheet.row_len(i) < 2: + continue + code = sheet.cell_value(i, 0) + if code is None: + continue + m1 = MUNICIPALITY_CODE_RE.fullmatch(code) + m2 = COUNTY_CODE_RE.fullmatch(code) if m1 is None else None + if m1 is None and m2 is None: + # skip headers + continue + name = sheet.cell_value(i, 1) + if name is None or name == '': + continue + row = { 'Code': code, 'Name': name } + if m1 is not None: + municipalities.append(row) + elif m2 is not None: + counties.append(row) + +basedir = Path(sys.argv[0]).parent +def writeCSV(filename, data): + fieldnames = ['Code', 'Name'] + path = basedir.joinpath(filename).with_suffix('.csv') + with path.open(mode='w', newline='') as fp: + writer = csv.DictWriter(fp, fieldnames=fieldnames, delimiter='\t', + quoting=csv.QUOTE_MINIMAL, dialect='unix') + writer.writeheader() + for row in data: + writer.writerow(row) + print(f'Wrote {len(data)} rows in {path}', file=sys.stderr) + +writeCSV('counties', counties) +writeCSV('municipalities', municipalities) -- cgit v1.2.3