aboutsummaryrefslogtreecommitdiffstats
path: root/import_source.py
diff options
context:
space:
mode:
authorGuilhem Moulin <guilhem@fripost.org>2026-03-06 07:38:04 +0100
committerGuilhem Moulin <guilhem@fripost.org>2026-03-06 09:29:31 +0100
commitf2393202a5343dcaeffdbec518aa241573185335 (patch)
tree56efe6f15a4f7b0bdde4b5b35140f3c5fb9cc9f0 /import_source.py
parent906dd8a34ed4231cc73cdea217a5e9b31cd37cb6 (diff)
PostgreSQL: Add option to CLUSTER on geometry index.
See https://www.postgresql.org/docs/current/sql-cluster.html .
Diffstat (limited to 'import_source.py')
-rw-r--r--import_source.py70
1 files changed, 69 insertions, 1 deletions
diff --git a/import_source.py b/import_source.py
index 9e0393b..650c31d 100644
--- a/import_source.py
+++ b/import_source.py
@@ -234,6 +234,69 @@ def createOutputLayer(ds : gdal.Dataset,
lyr.SyncToDisk()
return lyr
+def clusterLayer(lyr : ogr.Layer,
+ index_name : Optional[str] = None,
+ column_name : Optional[str] = None,
+ analyze : bool = True) -> bool:
+ """Cluster a table according to an index. If no index name is given and a column name is
+ given instead, then the index involving the least number of columns (including the given
+ column) is chosen. If neither index name or column name is given, then recluster the table
+ using the same index as before. An optional boolean (default value: True) indicates whether
+ to ANALYZE the table after clustering. See
+ https://www.postgresql.org/docs/current/sql-cluster.html .
+ Requires that the dataset driver is PostgreSQL."""
+ ds = lyr.GetDataset()
+ if ds.GetDriver().ShortName != 'PostgreSQL':
+ logging.warning('clusterLayer() called on a non-PostgreSQL dataset, ignoring')
+ return False
+
+ layername_esc = getEscapedTableName(lyr)
+ if index_name is None and column_name is not None:
+ # find out which indices involve lyr's column_name
+ query = 'WITH indices AS ('
+ query += 'SELECT i.relname AS index, array_agg(a.attname) AS columns '
+ query += 'FROM pg_class t, pg_class i, pg_index ix, pg_attribute a '
+ query += 'WHERE t.oid = ix.indrelid '
+ query += 'AND i.oid = ix.indexrelid '
+ query += 'AND a.attrelid = t.oid '
+ query += 'AND a.attnum = ANY(ix.indkey) '
+ query += 'AND t.relkind = \'r\' '
+ query += 'AND ix.indrelid = ' + escape_literal_str(layername_esc) + '::regclass '
+ query += 'GROUP BY 1) '
+ query += 'SELECT index, array_length(columns, 1) AS len '
+ query += 'FROM indices '
+ query += 'WHERE ' + escape_literal_str(column_name) + ' = ANY(columns)'
+ query += 'ORDER BY 2,1 LIMIT 1' # pick the index involving the least number of columns
+
+ logging.debug('ExecuteSQL(%s)', query)
+ with ds.ExecuteSQL(query) as res:
+ defn = res.GetLayerDefn()
+ i = defn.GetFieldIndex('index')
+ row = res.GetNextFeature()
+ if row is not None and row.IsFieldSetAndNotNull(i):
+ index_name = row.GetFieldAsString(i)
+
+ if index_name is None:
+ logging.warning('Layer %s has no index on column %s, cannot CLUSTER',
+ lyr.GetName(), column_name)
+ return False
+
+ query = 'CLUSTER ' + layername_esc
+ if index_name is not None:
+ query += ' USING ' + escape_identifier(index_name)
+ logging.debug('ExecuteSQL(%s)', query)
+ ds.ExecuteSQL(query)
+
+ if analyze:
+ # "Because the planner records statistics about the ordering of tables, it is
+ # advisable to run ANALYZE on the newly clustered table. Otherwise, the planner
+ # might make poor choices of query plans."
+ query = 'ANALYZE ' + layername_esc
+ logging.debug('ExecuteSQL(%s)', query)
+ ds.ExecuteSQL(query)
+
+ return True
+
# pylint: disable-next=too-many-branches
def validateOutputLayer(lyr : ogr.Layer,
srs : Optional[osr.SpatialReference] = None,
@@ -483,7 +546,8 @@ def importSources(lyr : ogr.Layer,
extent : ogr.Geometry|None = None,
dsoTransaction : bool = False,
lyrcache : ogr.Layer|None = None,
- force : bool = False) -> ImportStatus:
+ force : bool = False,
+ cluster_geometry : bool = False) -> ImportStatus:
"""Clear lyr and import source layers to it."""
dso = lyr.GetDataset()
@@ -534,6 +598,10 @@ def importSources(lyr : ogr.Layer,
# the transaction on lyr was already rolled back
lyrTransaction = False
+ if (rv == ImportStatus.IMPORT_SUCCESS and cluster_geometry
+ and lyr.GetLayerDefn().GetGeomType() != ogr.wkbNone):
+ clusterLayer(lyr, column_name=lyr.GetGeometryColumn())
+
except Exception: # pylint: disable=broad-exception-caught
rv = ImportStatus.IMPORT_ERROR
if isinstance(lyrTransaction, str):