PostgreSQL: Add option to CLUSTER on geometry index.

See https://www.postgresql.org/docs/current/sql-cluster.html .
author: Guilhem Moulin <guilhem@fripost.org> 2026-03-06 07:38:04 +0100
committer: Guilhem Moulin <guilhem@fripost.org> 2026-03-06 09:29:31 +0100
commit: f2393202a5343dcaeffdbec518aa241573185335 (patch)
tree: 56efe6f15a4f7b0bdde4b5b35140f3c5fb9cc9f0
parent: 906dd8a34ed4231cc73cdea217a5e9b31cd37cb6 (diff)
4 files changed, 76 insertions, 3 deletions
diff --git a/common.py b/common.py
index 040b90f..7b5639b 100644
--- a/common.py
+++ b/common.py
@@ -115,7 +115,7 @@ def parse_config(path : Optional[Path] = None,
     for name, layerdefs in layers.items():
         if isinstance(layerdefs, dict) and 'sources' not in layerdefs:
             layers[name] = { 'sources': [layerdefs] }
-            for k in ('description', 'create', 'publish', 'type'):
+            for k in ('description', 'create', 'publish', 'type', 'cluster-geometry'):
                 if k in layerdefs:
                     layers[name][k] = layerdefs.pop(k)
             layerdefs = layers[name]
diff --git a/config.yml b/config.yml
index f6f5df1..bf42711 100644
--- a/config.yml
+++ b/config.yml
@@ -614,6 +614,10 @@ layers:
 #  # has a single source, then the sources singleton can be inlined.
 #  layer1_name:
 #    description: A string describing that layer
+#
+#    # Whether to CLUSTER on the geometry index after importing
+#    cluster-geometry: true
+#
 #    create:
 #      # Geometry Type for the output layer.  Possible values are like ogr2ogr(1)'s -nlt
 #      # value, namely one of NONE, GEOMETRY, POINT, LINESTRING, POLYGON, MULTIPOINT,
diff --git a/import_source.py b/import_source.py
index 9e0393b..650c31d 100644
--- a/import_source.py
+++ b/import_source.py
@@ -234,6 +234,69 @@ def createOutputLayer(ds : gdal.Dataset,
     lyr.SyncToDisk()
     return lyr
 
+def clusterLayer(lyr : ogr.Layer,
+                 index_name : Optional[str] = None,
+                 column_name : Optional[str] = None,
+                 analyze : bool = True) -> bool:
+    """Cluster a table according to an index.  If no index name is given and a column name is
+    given instead, then the index involving the least number of columns (including the given
+    column) is chosen.  If neither index name or column name is given, then recluster the table
+    using the same index as before.  An optional boolean (default value: True) indicates whether
+    to ANALYZE the table after clustering.  See
+    https://www.postgresql.org/docs/current/sql-cluster.html .
+    Requires that the dataset driver is PostgreSQL."""
+    ds = lyr.GetDataset()
+    if ds.GetDriver().ShortName != 'PostgreSQL':
+        logging.warning('clusterLayer() called on a non-PostgreSQL dataset, ignoring')
+        return False
+
+    layername_esc = getEscapedTableName(lyr)
+    if index_name is None and column_name is not None:
+        # find out which indices involve lyr's column_name
+        query = 'WITH indices AS ('
+        query +=  'SELECT i.relname AS index, array_agg(a.attname) AS columns '
+        query +=    'FROM pg_class t, pg_class i, pg_index ix, pg_attribute a '
+        query +=    'WHERE t.oid = ix.indrelid '
+        query +=      'AND i.oid = ix.indexrelid '
+        query +=      'AND a.attrelid = t.oid '
+        query +=      'AND a.attnum = ANY(ix.indkey) '
+        query +=      'AND t.relkind = \'r\' '
+        query +=      'AND ix.indrelid = ' + escape_literal_str(layername_esc) + '::regclass '
+        query +=    'GROUP BY 1) '
+        query += 'SELECT index, array_length(columns, 1) AS len '
+        query +=   'FROM indices '
+        query +=   'WHERE ' + escape_literal_str(column_name) + ' = ANY(columns)'
+        query +=   'ORDER BY 2,1 LIMIT 1' # pick the index involving the least number of columns
+
+        logging.debug('ExecuteSQL(%s)', query)
+        with ds.ExecuteSQL(query) as res:
+            defn = res.GetLayerDefn()
+            i = defn.GetFieldIndex('index')
+            row = res.GetNextFeature()
+            if row is not None and row.IsFieldSetAndNotNull(i):
+                index_name = row.GetFieldAsString(i)
+
+        if index_name is None:
+            logging.warning('Layer %s has no index on column %s, cannot CLUSTER',
+                            lyr.GetName(), column_name)
+            return False
+
+    query = 'CLUSTER ' + layername_esc
+    if index_name is not None:
+        query += ' USING ' + escape_identifier(index_name)
+    logging.debug('ExecuteSQL(%s)', query)
+    ds.ExecuteSQL(query)
+
+    if analyze:
+        # "Because the planner records statistics about the ordering of tables, it is
+        # advisable to run ANALYZE on the newly clustered table. Otherwise, the planner
+        # might make poor choices of query plans."
+        query = 'ANALYZE ' + layername_esc
+        logging.debug('ExecuteSQL(%s)', query)
+        ds.ExecuteSQL(query)
+
+    return True
+
 # pylint: disable-next=too-many-branches
 def validateOutputLayer(lyr : ogr.Layer,
                         srs : Optional[osr.SpatialReference] = None,
@@ -483,7 +546,8 @@ def importSources(lyr : ogr.Layer,
                   extent : ogr.Geometry|None = None,
                   dsoTransaction : bool = False,
                   lyrcache : ogr.Layer|None = None,
-                  force : bool = False) -> ImportStatus:
+                  force : bool = False,
+                  cluster_geometry : bool = False) -> ImportStatus:
     """Clear lyr and import source layers to it."""
 
     dso = lyr.GetDataset()
@@ -534,6 +598,10 @@ def importSources(lyr : ogr.Layer,
                 # the transaction on lyr was already rolled back
                 lyrTransaction = False
 
+        if (rv == ImportStatus.IMPORT_SUCCESS and cluster_geometry
+                and lyr.GetLayerDefn().GetGeomType() != ogr.wkbNone):
+            clusterLayer(lyr, column_name=lyr.GetGeometryColumn())
+
     except Exception: # pylint: disable=broad-exception-caught
         rv = ImportStatus.IMPORT_ERROR
         if isinstance(lyrTransaction, str):
diff --git a/webmap-import b/webmap-import
index b4552d5..2f0f5b4 100755
--- a/webmap-import
+++ b/webmap-import
@@ -278,7 +278,8 @@ def processOutputLayer(ds : gdal.Dataset,
                          cachedir=cachedir, extent=extent,
                          dsoTransaction=dsTransaction,
                          lyrcache=lyrcache,
-                         force=force)
+                         force=force,
+                         cluster_geometry=layerdef.get('cluster-geometry', False))
 
 def validate_sources(layers : dict[str, Any]) -> None:
     """Mangle and validate layer sources and import definitions"""
author	Guilhem Moulin <guilhem@fripost.org>	2026-03-06 07:38:04 +0100
committer	Guilhem Moulin <guilhem@fripost.org>	2026-03-06 09:29:31 +0100
commit	f2393202a5343dcaeffdbec518aa241573185335 (patch)
tree	56efe6f15a4f7b0bdde4b5b35140f3c5fb9cc9f0
parent	906dd8a34ed4231cc73cdea217a5e9b31cd37cb6 (diff)