diff options
| -rw-r--r-- | common.py | 2 | ||||
| -rw-r--r-- | config.yml | 4 | ||||
| -rw-r--r-- | import_source.py | 70 | ||||
| -rwxr-xr-x | webmap-import | 3 |
4 files changed, 76 insertions, 3 deletions
@@ -115,7 +115,7 @@ def parse_config(path : Optional[Path] = None, for name, layerdefs in layers.items(): if isinstance(layerdefs, dict) and 'sources' not in layerdefs: layers[name] = { 'sources': [layerdefs] } - for k in ('description', 'create', 'publish', 'type'): + for k in ('description', 'create', 'publish', 'type', 'cluster-geometry'): if k in layerdefs: layers[name][k] = layerdefs.pop(k) layerdefs = layers[name] @@ -614,6 +614,10 @@ layers: # # has a single source, then the sources singleton can be inlined. # layer1_name: # description: A string describing that layer +# +# # Whether to CLUSTER on the geometry index after importing +# cluster-geometry: true +# # create: # # Geometry Type for the output layer. Possible values are like ogr2ogr(1)'s -nlt # # value, namely one of NONE, GEOMETRY, POINT, LINESTRING, POLYGON, MULTIPOINT, diff --git a/import_source.py b/import_source.py index 9e0393b..650c31d 100644 --- a/import_source.py +++ b/import_source.py @@ -234,6 +234,69 @@ def createOutputLayer(ds : gdal.Dataset, lyr.SyncToDisk() return lyr +def clusterLayer(lyr : ogr.Layer, + index_name : Optional[str] = None, + column_name : Optional[str] = None, + analyze : bool = True) -> bool: + """Cluster a table according to an index. If no index name is given and a column name is + given instead, then the index involving the least number of columns (including the given + column) is chosen. If neither index name or column name is given, then recluster the table + using the same index as before. An optional boolean (default value: True) indicates whether + to ANALYZE the table after clustering. See + https://www.postgresql.org/docs/current/sql-cluster.html . + Requires that the dataset driver is PostgreSQL.""" + ds = lyr.GetDataset() + if ds.GetDriver().ShortName != 'PostgreSQL': + logging.warning('clusterLayer() called on a non-PostgreSQL dataset, ignoring') + return False + + layername_esc = getEscapedTableName(lyr) + if index_name is None and column_name is not None: + # find out which indices involve lyr's column_name + query = 'WITH indices AS (' + query += 'SELECT i.relname AS index, array_agg(a.attname) AS columns ' + query += 'FROM pg_class t, pg_class i, pg_index ix, pg_attribute a ' + query += 'WHERE t.oid = ix.indrelid ' + query += 'AND i.oid = ix.indexrelid ' + query += 'AND a.attrelid = t.oid ' + query += 'AND a.attnum = ANY(ix.indkey) ' + query += 'AND t.relkind = \'r\' ' + query += 'AND ix.indrelid = ' + escape_literal_str(layername_esc) + '::regclass ' + query += 'GROUP BY 1) ' + query += 'SELECT index, array_length(columns, 1) AS len ' + query += 'FROM indices ' + query += 'WHERE ' + escape_literal_str(column_name) + ' = ANY(columns)' + query += 'ORDER BY 2,1 LIMIT 1' # pick the index involving the least number of columns + + logging.debug('ExecuteSQL(%s)', query) + with ds.ExecuteSQL(query) as res: + defn = res.GetLayerDefn() + i = defn.GetFieldIndex('index') + row = res.GetNextFeature() + if row is not None and row.IsFieldSetAndNotNull(i): + index_name = row.GetFieldAsString(i) + + if index_name is None: + logging.warning('Layer %s has no index on column %s, cannot CLUSTER', + lyr.GetName(), column_name) + return False + + query = 'CLUSTER ' + layername_esc + if index_name is not None: + query += ' USING ' + escape_identifier(index_name) + logging.debug('ExecuteSQL(%s)', query) + ds.ExecuteSQL(query) + + if analyze: + # "Because the planner records statistics about the ordering of tables, it is + # advisable to run ANALYZE on the newly clustered table. Otherwise, the planner + # might make poor choices of query plans." + query = 'ANALYZE ' + layername_esc + logging.debug('ExecuteSQL(%s)', query) + ds.ExecuteSQL(query) + + return True + # pylint: disable-next=too-many-branches def validateOutputLayer(lyr : ogr.Layer, srs : Optional[osr.SpatialReference] = None, @@ -483,7 +546,8 @@ def importSources(lyr : ogr.Layer, extent : ogr.Geometry|None = None, dsoTransaction : bool = False, lyrcache : ogr.Layer|None = None, - force : bool = False) -> ImportStatus: + force : bool = False, + cluster_geometry : bool = False) -> ImportStatus: """Clear lyr and import source layers to it.""" dso = lyr.GetDataset() @@ -534,6 +598,10 @@ def importSources(lyr : ogr.Layer, # the transaction on lyr was already rolled back lyrTransaction = False + if (rv == ImportStatus.IMPORT_SUCCESS and cluster_geometry + and lyr.GetLayerDefn().GetGeomType() != ogr.wkbNone): + clusterLayer(lyr, column_name=lyr.GetGeometryColumn()) + except Exception: # pylint: disable=broad-exception-caught rv = ImportStatus.IMPORT_ERROR if isinstance(lyrTransaction, str): diff --git a/webmap-import b/webmap-import index b4552d5..2f0f5b4 100755 --- a/webmap-import +++ b/webmap-import @@ -278,7 +278,8 @@ def processOutputLayer(ds : gdal.Dataset, cachedir=cachedir, extent=extent, dsoTransaction=dsTransaction, lyrcache=lyrcache, - force=force) + force=force, + cluster_geometry=layerdef.get('cluster-geometry', False)) def validate_sources(layers : dict[str, Any]) -> None: """Mangle and validate layer sources and import definitions""" |
