Skip to content

Commit

Permalink
Merge pull request #344 from rustprooflabs/custom-indexing
Browse files Browse the repository at this point in the history
Enable selective indexing
  • Loading branch information
rustprooflabs authored Jul 13, 2023
2 parents 4dbd10e + 6b31f1d commit a7d3edc
Show file tree
Hide file tree
Showing 86 changed files with 1,049 additions and 247 deletions.
10 changes: 1 addition & 9 deletions docker/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def verify_checksum(md5_file, path):


def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
layerset_path, sp_gist, replication):
layerset_path, replication):
"""Sets environment variables needed by PgOSM Flex. Also creates DB
record in `osm.pgosm_flex` table.
Expand All @@ -104,8 +104,6 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
layerset : str
layerset_path : str
str when set, or None
sp_gist : bool
When `True` uses SP-GIST index instead of GIST for spatial indexes.
replication : bool
Indicates when osm2pgsql-replication is used
"""
Expand Down Expand Up @@ -136,11 +134,6 @@ def set_env_vars(region, subregion, srid, language, pgosm_date, layerset,
# Connection to DB for admin purposes, e.g. drop/create main database
os.environ['PGOSM_CONN_PG'] = db.connection_string(admin=True)

if sp_gist:
os.environ['PGOSM_GIST_TYPE'] = 'spgist'
else:
os.environ['PGOSM_GIST_TYPE'] = 'gist'

if replication:
os.environ['PGOSM_REPLICATION'] = 'true'
else:
Expand Down Expand Up @@ -210,5 +203,4 @@ def unset_env_vars():
os.environ.pop('PGOSM_LAYERSET', None)
os.environ.pop('PGOSM_CONN', None)
os.environ.pop('PGOSM_CONN_PG', None)
os.environ.pop('PGOSM_GIST_TYPE', None)
os.environ.pop('PGOSM_REPLICATION', None)
6 changes: 2 additions & 4 deletions docker/pgosm_flex.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,13 @@
@click.option('--srid', required=False, default=helpers.DEFAULT_SRID,
envvar="PGOSM_SRID",
help="SRID for data loaded by osm2pgsql to PostGIS. Defaults to 3857")
@click.option('--sp-gist', default=False, is_flag=True,
help='When set, builds SP-GIST indexes on geom column instead of the default GIST indexes.')
@click.option('--update', default=None,
type=click.Choice(['append', 'create'], case_sensitive=True),
help='EXPERIMENTAL - Wrap around osm2pgsql create v. append modes, without using osm2pgsql-replication.')
def run_pgosm_flex(ram, region, subregion, debug, force,
input_file, layerset, layerset_path, language, pg_dump,
pgosm_date, replication, skip_nested,
skip_qgis_style, srid, sp_gist, update):
skip_qgis_style, srid, update):
"""Run PgOSM Flex within Docker to automate osm2pgsql flex processing.
"""
paths = get_paths()
Expand All @@ -96,7 +94,7 @@ def run_pgosm_flex(ram, region, subregion, debug, force,
region = input_file

helpers.set_env_vars(region, subregion, srid, language, pgosm_date,
layerset, layerset_path, sp_gist, replication)
layerset, layerset_path, replication)
db.wait_for_postgres()
if force and db.pg_conn_parts()['pg_host'] == 'localhost':
msg = 'Using --force with the built-in database is unnecessary.'
Expand Down
2 changes: 0 additions & 2 deletions docker/tests/test_geofabrik.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def setUp(self):
pgosm_date=PGOSM_DATE,
layerset=LAYERSET,
layerset_path=None,
sp_gist=False,
replication=False)


Expand All @@ -41,7 +40,6 @@ def test_get_region_filename_returns_region_when_subregion_None(self):
pgosm_date=PGOSM_DATE,
layerset=LAYERSET,
layerset_path=None,
sp_gist=False,
replication=False)

result = geofabrik.get_region_filename()
Expand Down
7 changes: 0 additions & 7 deletions docker/tests/test_pgosm_flex.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def setUp(self):
pgosm_date=PGOSM_DATE,
layerset=LAYERSET,
layerset_path=None,
sp_gist=False,
replication=False)


Expand Down Expand Up @@ -91,7 +90,6 @@ def test_get_export_filename_region_only(self):
pgosm_date=PGOSM_DATE,
layerset=LAYERSET,
layerset_path=None,
sp_gist=False,
replication=False)

input_file = None
Expand All @@ -109,7 +107,6 @@ def test_layerset_include_place_returns_boolean(self):
pgosm_date=PGOSM_DATE,
layerset=LAYERSET,
layerset_path=layerset_path,
sp_gist=False,
replication=False)

paths = pgosm_flex.get_paths()
Expand All @@ -128,7 +125,6 @@ def test_layerset_include_place_returns_True_with_default_layerset(self):
pgosm_date=PGOSM_DATE,
layerset=LAYERSET,
layerset_path=layerset_path,
sp_gist=False,
replication=False)

paths = pgosm_flex.get_paths()
Expand All @@ -147,7 +143,6 @@ def test_layerset_include_place_returns_false_when_place_false_in_ini(self):
pgosm_date=PGOSM_DATE,
layerset=layerset,
layerset_path=layerset_path,
sp_gist=False,
replication=False)

paths = pgosm_flex.get_paths()
Expand All @@ -166,7 +161,6 @@ def test_layerset_include_place_returns_false_when_place_missing_in_ini(self):
pgosm_date=PGOSM_DATE,
layerset=layerset,
layerset_path=layerset_path,
sp_gist=False,
replication=False)

paths = pgosm_flex.get_paths()
Expand All @@ -185,7 +179,6 @@ def test_layerset_include_place_returns_true_when_place_true_in_ini(self):
pgosm_date=PGOSM_DATE,
layerset=layerset,
layerset_path=layerset_path,
sp_gist=False,
replication=False)

paths = pgosm_flex.get_paths()
Expand Down
1 change: 1 addition & 0 deletions docs/src/SUMMARY.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- [Customize PgOSM Flex](./customizations.md)
- [Common Customizations](./common-customization.md)
- [Layersets](./layersets.md)
- [Indexes](./custom-indexes.md)
- [Configure Postgres](./configure-postgres.md)
- [Query examples](./query.md)
- [Routing](./routing.md)
Expand Down
137 changes: 137 additions & 0 deletions docs/src/custom-indexes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# Indexes

PgOSM Flex allows the indexes on the tables using `.ini` files. The default
index configuration files are stored under `flex-config/indexes/`.
The default indexing strategy is baked into the Docker
image, to use the defaults you can follow the instructions throughout the
documentation without any adjustments.

## Map Volume in `docker run`

To customize indexes, map the path of your custom index definitions folder
to the Docker container under `/app/flex-config/indexes`. This overwrites the default
indexing scheme with the custom folder. You must define an INI file for each of
the layers included by your chosen `layerset`. The easiest approach is to copy the
existing directory with all of the index definitions, then customize those to
your needs.

The following command assumes you have the PgOSM Flex project cloned into the
`~/git/pgosm-flex` folder. The `noindexes` example creates the PgOSM Flex
tables with only the required `PRIMARY KEY`s.

```bash
docker run --name pgosm -d --rm \
-v ~/pgosm-data:/app/output \
-v /etc/localtime:/etc/localtime:ro \
-v ~/git/pgosm-flex/flex-config/indexes/examples/noindexes:/app/flex-config/indexes \
-e POSTGRES_PASSWORD=$POSTGRES_PASSWORD \
-p 5433:5432 -d rustprooflabs/pgosm-flex
```

> The `lotsofexamples` folder under `flex-config/indexes/examples/` illustrates creating indexes on nearly all columns.
## INI files

Each Lua style (`flex-config/style/*.lua`) must have a matching INI file
under `flex-config/indexes/`. Each `.ini` file should have 4 sections defined.


```ini
[all]

[point]

[line]

[polygon]
```

Index settings in the `[all]` section will apply to all tables in the layer
unless specific tables override the setting. Indexes in the `[point]`, `[line]`,
and `[polygon]` sections apply to only those specific tables.
The variables to use for indexes are described in the next section.


## Index variables

There are three (3) variables that can be configured for each column in the
PgOSM Flex database. `<name>` is the name of the column in the database.

* `<name>`
* `<name>_where`
* `<name>_method`

### To index or not to index

The `<name>` variable is the column's name and is set to boolean.
To add an index to the `admin_level` column add `admin_level=true`. To exclude
an index from a column either omit the column from the definition file, or
set it to `false`, e.g. `admin_level=false`.

### Partial indexes


Partial indexes can be created with the `<name>_where` variable.
The `admin_level` column can have a partial index created on rows where the
`admin_level` value is set using `admin_level_where=admin_level IS NOT NULL`.

```ini
[all]
admin_level=true
admin_level_where=admin_level IS NOT NULL
```

### Index method

The `<name>_method` variable can be used to set the index method used by Postgres.
This value is passed to `osm2pgsql`'s [method option](https://osm2pgsql.org/doc/manual.html#defining-indexes), which appears to hand off to Postgres. This should
allow any [indexing method](https://www.postgresql.org/docs/current/indexes-types.html)
supported by Postgres.

One common way to use the `<name>_method` variable is to change a spatial
column's index from `GIST` to `SPGIST` using `geom_method=spgist`.
`GEOMETRY` columns default to `GIST` and all other columns default to `BTREE`.

```ini
[point]
geom=true
geom_method=spgist
```

> See Paul Ramsey's post [(The Many) Spatial Indexes of PostGIS](https://www.crunchydata.com/blog/the-many-spatial-indexes-of-postgis) for more information about when to choose `SPGIST`.

Setting index method isn't limited to spatial indexes. The following example
illustrates adding a `BRIN` index to the `admin_level` column.

```ini
[all]
admin_level=true
admin_level_method=brin
```


## Most columns can be indexed

The only limit to which columns can be indexed is the `index_columns` list
defined in `flex_config/helpers.lua`.

> If there are columns that you would like to index this way submit either a pull request or create an issue requesting the change.

## Caveats


Setting indexes is only relevant for the first import. When using `--replication`
these configurations only impact the initial import. Subsequent imports make no
attempt to verify / adjust database indexes.

The primary key cannot be omitted using this approach. The primary keys on
`osm_id` are created in post-processing SQL and is not able to be overridden
using this approach.

The simplest index specification file is shown above by defining the four (4)
empty sections define no indexes beyond the table's `PRIMARY KEY` on the `osm_id`
column.


2 changes: 1 addition & 1 deletion docs/src/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Running PgOSM Flex is easy via the PgOSM Docker image

A few decisions made in this project:

* ID column is `osm_id`
* ID column is `osm_id` and is always `PRIMARY KEY`
* Geometry column named `geom`
* Defaults to same units as OpenStreetMap (e.g. km/hr, meters)
* Data not included in a dedicated column is available from `osm.tags.tags` (`JSONB`)
Expand Down
Loading

0 comments on commit a7d3edc

Please sign in to comment.