aboutsummaryrefslogtreecommitdiffstats
path: root/administrative-codes/update
diff options
context:
space:
mode:
Diffstat (limited to 'administrative-codes/update')
-rwxr-xr-xadministrative-codes/update86
1 files changed, 86 insertions, 0 deletions
diff --git a/administrative-codes/update b/administrative-codes/update
new file mode 100755
index 0000000..855f73b
--- /dev/null
+++ b/administrative-codes/update
@@ -0,0 +1,86 @@
+#!/usr/bin/python3
+
+#----------------------------------------------------------------------
+# Backend utilities for the Klimatanalys Norr project (get county & municipality codes)
+# Copyright © 2024 Guilhem Moulin <info@guilhem.se>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#----------------------------------------------------------------------
+
+import re
+import sys
+import csv
+from pathlib import Path
+import requests
+import xlrd
+
+# The authorative source for county and municipality codes is Statistiska centralbyrån (SCB), cf.
+# https://www.skatteverket.se/privat/skatter/arbeteochinkomst/askattsedelochskattetabeller/koderforlankommunerochforsamlingar.4.18e1b10334ebe8bc80004187.html
+#
+# Unfortunately SCB doesn't provide a CSV, so we download their xls file and produce our own.
+# https://www.scb.se/hitta-statistik/regional-statistik-och-kartor/regionala-indelningar/lan-och-kommuner/lan-och-kommuner-i-kodnummerordning/
+r = requests.get('https://www.scb.se/contentassets/7a89e48960f741e08918e489ea36354a/kommunlankod_2024.xls')
+r.raise_for_status()
+
+if 'content-type' not in r.headers:
+ raise Exception('Missing Content-Type from response headers')
+if r.headers['content-type'] not in ['application/vnd.ms-excel', 'application/octet-stream']:
+ raise Exception(f"Unsupported Content-Type: {r.headers['content-type']}")
+
+xls = xlrd.open_workbook(file_contents=r.content)
+sheets = xls.sheet_names()
+if len(sheets) > 1:
+ print(f'WARN: XLS has {len(sheets)} > 1 sheets: ' + str(sheets), file=sys.stderr)
+sheet = xls.sheet_by_index(0)
+
+COUNTY_CODE_RE = re.compile('[0-9]{2}')
+MUNICIPALITY_CODE_RE = re.compile('[0-9]{4}')
+counties = []
+municipalities = []
+
+# https://xlrd.readthedocs.io/en/latest/api.html#xlrd-sheet
+for i in range(sheet.nrows):
+ if sheet.row_len(i) < 2:
+ continue
+ code = sheet.cell_value(i, 0)
+ if code is None:
+ continue
+ m1 = MUNICIPALITY_CODE_RE.fullmatch(code)
+ m2 = COUNTY_CODE_RE.fullmatch(code) if m1 is None else None
+ if m1 is None and m2 is None:
+ # skip headers
+ continue
+ name = sheet.cell_value(i, 1)
+ if name is None or name == '':
+ continue
+ row = { 'Code': code, 'Name': name }
+ if m1 is not None:
+ municipalities.append(row)
+ elif m2 is not None:
+ counties.append(row)
+
+basedir = Path(sys.argv[0]).parent
+def writeCSV(filename, data):
+ fieldnames = ['Code', 'Name']
+ path = basedir.joinpath(filename).with_suffix('.csv')
+ with path.open(mode='w', newline='') as fp:
+ writer = csv.DictWriter(fp, fieldnames=fieldnames, delimiter='\t',
+ quoting=csv.QUOTE_MINIMAL, dialect='unix')
+ writer.writeheader()
+ for row in data:
+ writer.writerow(row)
+ print(f'Wrote {len(data)} rows in {path}', file=sys.stderr)
+
+writeCSV('counties', counties)
+writeCSV('municipalities', municipalities)