aboutsummaryrefslogtreecommitdiffstats
path: root/administrative-codes/update
diff options
context:
space:
mode:
authorGuilhem Moulin <guilhem@fripost.org>2024-06-20 17:32:34 +0200
committerGuilhem Moulin <guilhem@fripost.org>2024-06-21 15:06:58 +0200
commitd1f52a5e7ac2dd62c6348f17b02ccf324456c9da (patch)
tree7b96836ed3ac4071d4edbbdd1cf0835868fdfc78 /administrative-codes/update
parente930cd95f3392b44152ae05b4189c65e833adaa3 (diff)
Add script to download administrative codes from SCB.
Unfortunately SCB doesn't provide CSV files, so we download their xls file and produce our own CSV files. We also add a conversion tool to turn these CSV files into a (single, compact) JSON file to be served via HTTP.
Diffstat (limited to 'administrative-codes/update')
-rwxr-xr-xadministrative-codes/update86
1 files changed, 86 insertions, 0 deletions
diff --git a/administrative-codes/update b/administrative-codes/update
new file mode 100755
index 0000000..855f73b
--- /dev/null
+++ b/administrative-codes/update
@@ -0,0 +1,86 @@
+#!/usr/bin/python3
+
+#----------------------------------------------------------------------
+# Backend utilities for the Klimatanalys Norr project (get county & municipality codes)
+# Copyright © 2024 Guilhem Moulin <info@guilhem.se>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#----------------------------------------------------------------------
+
+import re
+import sys
+import csv
+from pathlib import Path
+import requests
+import xlrd
+
+# The authorative source for county and municipality codes is Statistiska centralbyrån (SCB), cf.
+# https://www.skatteverket.se/privat/skatter/arbeteochinkomst/askattsedelochskattetabeller/koderforlankommunerochforsamlingar.4.18e1b10334ebe8bc80004187.html
+#
+# Unfortunately SCB doesn't provide a CSV, so we download their xls file and produce our own.
+# https://www.scb.se/hitta-statistik/regional-statistik-och-kartor/regionala-indelningar/lan-och-kommuner/lan-och-kommuner-i-kodnummerordning/
+r = requests.get('https://www.scb.se/contentassets/7a89e48960f741e08918e489ea36354a/kommunlankod_2024.xls')
+r.raise_for_status()
+
+if 'content-type' not in r.headers:
+ raise Exception('Missing Content-Type from response headers')
+if r.headers['content-type'] not in ['application/vnd.ms-excel', 'application/octet-stream']:
+ raise Exception(f"Unsupported Content-Type: {r.headers['content-type']}")
+
+xls = xlrd.open_workbook(file_contents=r.content)
+sheets = xls.sheet_names()
+if len(sheets) > 1:
+ print(f'WARN: XLS has {len(sheets)} > 1 sheets: ' + str(sheets), file=sys.stderr)
+sheet = xls.sheet_by_index(0)
+
+COUNTY_CODE_RE = re.compile('[0-9]{2}')
+MUNICIPALITY_CODE_RE = re.compile('[0-9]{4}')
+counties = []
+municipalities = []
+
+# https://xlrd.readthedocs.io/en/latest/api.html#xlrd-sheet
+for i in range(sheet.nrows):
+ if sheet.row_len(i) < 2:
+ continue
+ code = sheet.cell_value(i, 0)
+ if code is None:
+ continue
+ m1 = MUNICIPALITY_CODE_RE.fullmatch(code)
+ m2 = COUNTY_CODE_RE.fullmatch(code) if m1 is None else None
+ if m1 is None and m2 is None:
+ # skip headers
+ continue
+ name = sheet.cell_value(i, 1)
+ if name is None or name == '':
+ continue
+ row = { 'Code': code, 'Name': name }
+ if m1 is not None:
+ municipalities.append(row)
+ elif m2 is not None:
+ counties.append(row)
+
+basedir = Path(sys.argv[0]).parent
+def writeCSV(filename, data):
+ fieldnames = ['Code', 'Name']
+ path = basedir.joinpath(filename).with_suffix('.csv')
+ with path.open(mode='w', newline='') as fp:
+ writer = csv.DictWriter(fp, fieldnames=fieldnames, delimiter='\t',
+ quoting=csv.QUOTE_MINIMAL, dialect='unix')
+ writer.writeheader()
+ for row in data:
+ writer.writerow(row)
+ print(f'Wrote {len(data)} rows in {path}', file=sys.stderr)
+
+writeCSV('counties', counties)
+writeCSV('municipalities', municipalities)