Skip to content

Commit

Permalink
initial version
Browse files Browse the repository at this point in the history
  • Loading branch information
minus34 committed Oct 1, 2018
1 parent fd1a919 commit c8ef0df
Show file tree
Hide file tree
Showing 9 changed files with 517 additions and 0 deletions.
63 changes: 63 additions & 0 deletions clustering/test_dbscan_kmeans.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
-- SELECT
-- SUM(count)::integer count,
-- JSON_Agg(JSON_Build_Object(cat, count)) cat,
-- ST_AsGeoJson(ST_PointOnSurface(ST_Union(geom))) geomj
-- FROM (
-- SELECT
-- COUNT(cat) count,
-- ST_Union(geom) geom,
-- cat,
-- kmeans_cid,
-- dbscan_cid
-- FROM (
-- SELECT
-- cat,
-- kmeans_cid,
-- geom AS geom,
-- ST_ClusterDBSCAN(geom, 0.1, 1) OVER (PARTITION BY kmeans_cid) dbscan_cid
-- FROM (
-- SELECT
-- reliability AS cat,
-- ST_ClusterKMeans(geom, 10) OVER () kmeans_cid,
-- geom
-- FROM gnaf_201802.address_principals
-- LIMIT 1000
-- ) kmeans
-- ) dbscan GROUP BY kmeans_cid, dbscan_cid, cat
-- ) cluster GROUP BY kmeans_cid, dbscan_cid;

WITH kmeans AS (
SELECT
reliability AS cat,
ST_ClusterKMeans(geom, 10) OVER () kmeans_cid,
geom
FROM gnaf_201802.address_principals
LIMIT 1000
), dbscan AS (
SELECT
cat,
kmeans_cid,
geom AS geom,
ST_ClusterDBSCAN(geom, 0.1, 1) OVER (PARTITION BY kmeans_cid) dbscan_cid
FROM kmeans
), cluster AS (
SELECT
COUNT(cat) count,
ST_Union(geom) geom,
cat,
kmeans_cid,
dbscan_cid
FROM dbscan
GROUP BY
kmeans_cid,
dbscan_cid,
cat
)
SELECT
SUM(count)::integer count,
JSON_Agg(JSON_Build_Object(cat, count)) cat,
ST_AsGeoJson(ST_PointOnSurface(ST_Union(geom))) geomj
FROM cluster
GROUP BY
kmeans_cid,
dbscan_cid;
92 changes: 92 additions & 0 deletions external_storage_testing/00_create_tables.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@

-- create meshblocks table
DROP TABLE if exists testing.abs_2011_mb;
CREATE TABLE testing.abs_2011_mb (
gid int4 NOT NULL,
mb_11code text NULL,
mb_category text NULL,
sa1_11main float8 NULL,
sa1_11_7cd int4 NULL,
sa2_11main int4 NULL,
sa2_11_5cd int4 NULL,
sa2_11name text NULL,
sa3_11code int4 NULL,
sa3_11name text NULL,
sa4_11code int4 NULL,
sa4_11name text NULL,
gcc_11code text NULL,
gcc_11name text NULL,
state text NULL,
area_sqm numeric NULL,
mb11_pop int4 NULL,
mb11_dwell int4 NULL,
geom geometry(Multipolygon,4283) NULL
)
WITH (OIDS = FALSE);
ALTER TABLE testing.abs_2011_mb OWNER TO postgres;

-- set geom column to decompressed
ALTER TABLE testing.abs_2011_mb ALTER COLUMN geom SET STORAGE EXTERNAL;

-- insert data
insert into testing.abs_2011_mb
select * from admin_bdys_201808.abs_2011_mb;

-- add indexes and cluster on geom index
ALTER TABLE testing.abs_2011_mb ADD CONSTRAINT abs_2011_mb_pk PRIMARY KEY (gid);

CREATE INDEX abs_2011_mb_geom_idx ON testing.abs_2011_mb USING gist (geom);
ALTER TABLE testing.abs_2011_mb CLUSTER ON abs_2011_mb_geom_idx;


-- create GNAF table
DROP TABLE IF EXISTS testing.address_principals;
CREATE TABLE testing.address_principals
(
gid integer NOT NULL,
gnaf_pid text NOT NULL,
street_locality_pid text NOT NULL,
locality_pid text NOT NULL,
alias_principal character(1) NOT NULL,
primary_secondary text,
building_name text,
lot_number text,
flat_number text,
level_number text,
number_first text,
number_last text,
street_name text NOT NULL,
street_type text,
street_suffix text,
address text NOT NULL,
locality_name text NOT NULL,
postcode text,
state text NOT NULL,
locality_postcode text,
confidence smallint NOT NULL,
legal_parcel_id text,
mb_2011_code bigint,
mb_2016_code bigint,
latitude numeric(10,8) NOT NULL,
longitude numeric(11,8) NOT NULL,
geocode_type text NOT NULL,
reliability smallint NOT NULL,
geom geometry(Point,4283) NOT NULL
)
WITH (OIDS = FALSE);
ALTER TABLE testing.address_principals OWNER to postgres;

-- set geom column to decompressed
ALTER TABLE testing.address_principals ALTER COLUMN geom SET STORAGE EXTERNAL;

-- insert data
insert into testing.address_principals
select * from gnaf_201808.address_principals;

-- add indexes and cluster on geom index
ALTER TABLE testing.address_principals ADD CONSTRAINT address_principals_pk PRIMARY KEY (gnaf_pid);

CREATE INDEX address_principals_geom_idx ON testing.address_principals USING gist (geom);
ALTER TABLE testing.address_principals CLUSTER ON address_principals_geom_idx;

CREATE INDEX address_principals_gid_idx ON testing.address_principals USING btree (gid);
9 changes: 9 additions & 0 deletions external_storage_testing/01_pip_test_standard.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

-- run point in polygon test with standard geom field storage -- mins
DROP TABLE IF EXISTS testing.pip_test_standard;
CREATE TABLE testing.pip_test_standard AS
SELECT gnaf.gnaf_pid,
mb.mb_11code
FROM admin_bdys_201808.abs_2011_mb AS mb
inner join gnaf_201808.address_principals as gnaf
on st_intersects(gnaf.geom, mb.geom);
10 changes: 10 additions & 0 deletions external_storage_testing/02_pip_test_external.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

-- run point in polygon test with standard geom field storage -- mins
DROP TABLE IF EXISTS testing.pip_test_standard;
CREATE TABLE testing.pip_test_standard AS
SELECT gnaf.gnaf_pid,
mb.mb_11code
FROM testing.abs_2011_mb AS mb
inner join testing.address_principals as gnaf
on st_intersects(gnaf.geom, mb.geom);

35 changes: 35 additions & 0 deletions external_storage_testing/xx_check_tables.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@


-- check storage type of geom columns
select tbl.relname,
att.attname,
case att.attstorage
when 'p' then 'plain'
when 'm' then 'main'
when 'e' then 'external'
when 'x' then 'extended'
end as attstorage
from pg_attribute att
join pg_class tbl on tbl.oid = att.attrelid
join pg_namespace ns on tbl.relnamespace = ns.oid
where ns.nspname = 'testing'
and att.attname = 'geom'
and not att.attisdropped;


-- check table sizes
select table_schema,
table_name,
pg_relation_size(table_schema||'.'||table_name),
pg_total_relation_size(table_schema||'.'||table_name)
from information_schema.tables
where (table_schema = 'admin_bdys_201808' and table_name = 'abs_2011_mb')
or (table_schema = 'gnaf_201808' and table_name = 'address_principals')
OR table_schema = 'testing'
order by table_name,
table_schema;

--admin_bdys_201808 abs_2011_mb 270098432 424329216
--testing abs_2011_mb 203399168 432021504
--gnaf_201808 address_principals 3510861824 5039718400
--testing address_principals 3510812672 5037875200
20 changes: 20 additions & 0 deletions osm/optimise_loaded_tables.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

-- Load into PG (via command line)
-- osm2pgsql -d geo -P 5432 -H localhost -s -l /Users/hugh.saalmans/Downloads/australia-oceania-latest.osm.pbf

-- get stats for non-spatial tables
ANALYZE public.planet_osm_rels;
ANALYZE public.planet_osm_ways;
ANALYZE public.planet_osm_nodes;

-- cluster spatial tables on spatial indexes
ALTER TABLE public.planet_osm_line CLUSTER ON planet_osm_line_index;
ALTER TABLE public.planet_osm_polygon CLUSTER ON planet_osm_polygon_index;
ALTER TABLE public.planet_osm_point CLUSTER ON planet_osm_point_index;
ALTER TABLE public.planet_osm_roads CLUSTER ON planet_osm_roads_index;

-- get stats for spatial tables
ANALYZE public.planet_osm_line;
ANALYZE public.planet_osm_polygon;
ANALYZE public.planet_osm_point;
ANALYZE public.planet_osm_roads;
Loading

0 comments on commit c8ef0df

Please sign in to comment.