aboutsummaryrefslogtreecommitdiffstats
path: root/administrative-codes/csv2json
diff options
context:
space:
mode:
authorGuilhem Moulin <guilhem@fripost.org>2024-06-20 17:32:34 +0200
committerGuilhem Moulin <guilhem@fripost.org>2024-06-21 15:06:58 +0200
commitd1f52a5e7ac2dd62c6348f17b02ccf324456c9da (patch)
tree7b96836ed3ac4071d4edbbdd1cf0835868fdfc78 /administrative-codes/csv2json
parente930cd95f3392b44152ae05b4189c65e833adaa3 (diff)
Add script to download administrative codes from SCB.
Unfortunately SCB doesn't provide CSV files, so we download their xls file and produce our own CSV files. We also add a conversion tool to turn these CSV files into a (single, compact) JSON file to be served via HTTP.
Diffstat (limited to 'administrative-codes/csv2json')
-rwxr-xr-xadministrative-codes/csv2json43
1 files changed, 43 insertions, 0 deletions
diff --git a/administrative-codes/csv2json b/administrative-codes/csv2json
new file mode 100755
index 0000000..7c22666
--- /dev/null
+++ b/administrative-codes/csv2json
@@ -0,0 +1,43 @@
+#!/usr/bin/python3
+
+#----------------------------------------------------------------------
+# Backend utilities for the Klimatanalys Norr project (compile county & municipality codes)
+# Copyright © 2024 Guilhem Moulin <info@guilhem.se>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#----------------------------------------------------------------------
+
+import sys
+import csv
+from pathlib import Path
+import json
+
+basedir = Path(sys.argv[0]).parent
+data = {}
+def readCSV(path):
+ with open(path, mode='r', newline='') as fp:
+ reader = csv.DictReader(fp, delimiter='\t', dialect='unix')
+ for row in reader:
+ code = row['Code']
+ if code in data:
+ raise Exception(f'Duplicate code {code}')
+ data[code] = row['Name']
+
+# The source (SCB) lists all codes in same file: they differ only in
+# length (2 digits for counties, 4 digits for municipalities) so it
+# doesn't hurt to merge them back in a single JSON dictionary. Also
+# having a single file simplifies the client-side logic.
+for path in sys.argv[1:]:
+ readCSV(path)
+json.dump(data, sys.stdout, ensure_ascii=False, sort_keys=True, separators=(',', ':'))