Skip to content

Commit

Permalink
changed merged with point process
Browse files Browse the repository at this point in the history
  • Loading branch information
minus34 committed Jul 19, 2015
1 parent 6c10007 commit c4dff32
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 24 deletions.
27 changes: 11 additions & 16 deletions hex-grid/add-hex-grid-ids-to-points.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def get_hex_ids(pg_cur):
# Get the 'what ever the attribute is' counts for each hex grid (using parallel processing)
sql = "SELECT public.parsel('{0}.grid_{2}'," \
"'gid'," \
"'SELECT sqt.count::integer, grd.geom " \
"'SELECT sqt.count, grd.geom " \
"FROM {0}.grid_{2} AS grd INNER JOIN (" \
"SELECT bdys.gid, Count(*) AS count " \
"FROM {0}.grid_{2} AS bdys INNER JOIN public.{1} as pnts ON ST_Contains(bdys.geom, pnts.geom) " \
Expand All @@ -102,25 +102,20 @@ def get_hex_ids(pg_cur):
"'bdys'," \
"{3})".format(pg_schema, points_table_name, curr_width_str, str(cpus))

pg_cur.execute(sql)


# sql = "CREATE UNLOGGED TABLE {0}.grid_{2}_counts AS " \
# "SELECT sqt.count::integer, grd.geom " \
# # Testing using group by to limit number of polygons - not worth the effort!
# sql = "SELECT public.parsel('{0}.grid_{2}'," \
# "'gid'," \
# "'SELECT sqt2.count, (ST_Dump(ST_Union(geom))).geom FROM (SELECT sqt.count, grd.geom " \
# "FROM {0}.grid_{2} AS grd INNER JOIN (" \
# "SELECT bdys.gid, Count(*) AS count " \
# "FROM {0}.grid_{2} AS bdys INNER JOIN public.{1} as pnts ON ST_Contains(bdys.geom, pnts.geom) " \
# "GROUP BY bdys.gid" \
# ") AS sqt ON grd.gid = sqt.gid".format(pg_schema, points_table_name, curr_width_str)
#
# pg_cur.execute(sql)
#
# # Create spatial index
# pg_cur.execute("CREATE INDEX grid_{1}_counts_geom_idx ON {0}.grid_{1}_counts USING gist (geom)"
# .format(pg_schema, curr_width_str))
#
# pg_cur.execute("ALTER TABLE {0}.grid_{1}_counts CLUSTER ON grid_{1}_counts_geom_idx"
# .format(pg_schema, curr_width_str))
# ") AS sqt ON grd.gid = sqt.gid) AS sqt2 GROUP BY sqt2.count'," \
# "'{0}.grid_{2}_counts'," \
# "'bdys'," \
# "{3})".format(pg_schema, points_table_name, curr_width_str, str(cpus))

pg_cur.execute(sql)

pg_cur.execute("ANALYZE {0}.grid_{1}_counts".format(pg_schema, curr_width_str))

Expand Down
16 changes: 8 additions & 8 deletions hex-grid/create-hex-grid-using-width.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@
hex_grid_table_name = "grid"

# Grid parameters
start_width = 1.0 # in km
start_width = 0.5 # in km
multiple = 2
max_width = 1500
min_width = 0.3

# #####################################################################################################################

startTime = datetime.now()

print("--------------------------------------------------")
print("PROCESSING STARTED")
print "--------------------------------------------------"
print datetime.now().strftime('%Y-%m-%d %H:%M:%S') + " - PROCESSING STARTED"


def main():
Expand All @@ -52,7 +52,7 @@ def main():
def get_hex_grids(pg_cur):
curr_width = start_width

while curr_width < max_width:
while curr_width > min_width:
# Get table name friendly width
curr_width_str = str(curr_width)
curr_width_str = curr_width_str.replace(".", "_")
Expand All @@ -78,16 +78,16 @@ def get_hex_grids(pg_cur):

# Generate hex grids
sql = "INSERT INTO {0}.{1}_{2} (pid, geom) " \
"SELECT * FROM hex_grid_width({2}, 84.0, -44.0, 161.5, -5.0, 4283, 3577, 4283)"\
.format(pg_schema, hex_grid_table_name, curr_width_str)
"SELECT * FROM hex_grid_width({3}, 84.0, -44.0, 161.5, -5.0, 4283, 3577, 4283)"\
.format(pg_schema, hex_grid_table_name, curr_width_str, curr_width)

pg_cur.execute(sql)

pg_cur.execute("ANALYZE %s.%s_%s" % (pg_schema, hex_grid_table_name, curr_width_str))

print datetime.now().strftime('%Y-%m-%d %H:%M:%S') + " - Grid %s processed" % curr_width_str

curr_width *= multiple
curr_width /= multiple

return True

Expand Down
149 changes: 149 additions & 0 deletions hex-grid/merge-points-to-hex-grid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# *********************************************************************************************************************
# PURPOSE: Copies policy data from the prod Greenplum appliance to another postgres instance
#
# AUTHOR: HS, 2015/04/08
#
# NOTES:
# -
#
# *********************************************************************************************************************

# Import Python modules
import math
import psycopg2

from datetime import datetime

metres2degrees = (2.0 * math.pi * 6378137.0) / 360.0
tile_width = 78271.52 # Standard width of a single 256 pixel map tile at zoom level one


#######################################################################################################################
# SCRIPT PARAMETERS
#######################################################################################################################

# Postgres parameters
pg_host = "localhost"
pg_port = 5432
pg_db = "iadpdev"
pg_user = "postgres"
pg_password = "password"
pg_schema = "hex"

hex_grid_table_name = "grid"

points_table_name = "mb_random_points"
output_points_table_name = "mb_points_hex"

# Parallel processing parameters
cpus = 6

# Grid parameters
start_zoom_level = 1
start_width = 1024 # in km
multiple = 2
min_width = 0.9

# #####################################################################################################################

startTime = datetime.now()

print "--------------------------------------------------"
print datetime.now().strftime('%Y-%m-%d %H:%M:%S') + " - PROCESSING STARTED"


def main():
# Connect to Postgres
pg_connect_string = "host='%s' dbname='%s' user='%s' password='%s' port=%s"

# need to use password version as trust is not set on server
pg_conn = psycopg2.connect(pg_connect_string % (pg_host, pg_db, pg_user, pg_password, pg_port))

pg_conn.autocommit = True
pg_cur = pg_conn.cursor()

# Get Hex IDs
get_hex_ids(pg_cur)

pg_cur.close()
pg_conn.close()

return True


def get_hex_ids(pg_cur):
curr_width = start_width
zoom_level = start_zoom_level

while curr_width > min_width:
# Set the number of decimal places for the output GeoJSON to reduce response size & speed up rendering
tolerance = (tile_width / math.pow(2.0, float(zoom_level))) / metres2degrees
places = 0
precision = 0.1

while precision > tolerance:
places += 1
precision /= 10

places += 1

# Get table name friendly width
curr_width_str = str(curr_width)
curr_width_str = curr_width_str.replace(".", "_")
curr_width_str = curr_width_str.replace("_0", "")

pg_cur.execute("DROP TABLE IF EXISTS {0}.grid_{1}_counts".format(pg_schema, curr_width_str))

sql = "CREATE UNLOGGED TABLE {0}.grid_{2}_counts (count integer, geojson text, geom geometry(POINT,4283, 2)) " \
"WITH (OIDS=FALSE); ALTER TABLE hex.grid_{2}_counts OWNER TO {3};"\
.format(pg_schema, points_table_name, curr_width_str, pg_user)

pg_cur.execute(sql)

# Create spatial index
pg_cur.execute("CREATE INDEX grid_{1}_counts_geom_idx ON {0}.grid_{1}_counts USING gist (geom)"
.format(pg_schema, curr_width_str))

pg_cur.execute("ALTER TABLE {0}.grid_{1}_counts CLUSTER ON grid_{1}_counts_geom_idx"
.format(pg_schema, curr_width_str))

# Get the 'what ever the attribute is' counts for each hex grid (using parallel processing)
sql = "SELECT public.parsel('{0}.grid_{2}'," \
"'gid'," \
"'SELECT sqt.count, ST_AsGeoJSON(grd.geom, {4}, 0), ST_Centroid(grd.geom) " \
"FROM {0}.grid_{2} AS grd INNER JOIN (" \
"SELECT bdys.gid, Count(*) AS count " \
"FROM {0}.grid_{2} AS bdys INNER JOIN public.{1} as pnts ON ST_Contains(bdys.geom, pnts.geom) " \
"GROUP BY bdys.gid" \
") AS sqt ON grd.gid = sqt.gid'," \
"'{0}.grid_{2}_counts'," \
"'bdys'," \
"{3})".format(pg_schema, points_table_name, curr_width_str, str(cpus), places)

# print sql

pg_cur.execute(sql)

pg_cur.execute("ANALYZE {0}.grid_{1}_counts".format(pg_schema, curr_width_str))

print datetime.now().strftime('%Y-%m-%d %H:%M:%S') + " - {0} processed".format(curr_width_str,)

curr_width /= multiple
zoom_level += 1

return True


if __name__ == '__main__':
if main():
print datetime.now().strftime('%Y-%m-%d %H:%M:%S') + " - PROCESSING FINISHED"
else:
print datetime.now().strftime('%Y-%m-%d %H:%M:%S') + " - PROCESSING STOPPED UNEXPECTEDLY"

duration = datetime.now() - startTime
days, seconds = duration.days, duration.seconds
hours = str(days * 24 + seconds // 3600).zfill(2)
minutes = str((seconds % 3600) // 60).zfill(2)
seconds = str(seconds % 60 + duration.microseconds // 1000000).zfill(2)

print "DURATION: " + hours + ":" + minutes + ":" + seconds
4 changes: 4 additions & 0 deletions hex-grid/xx-testing.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@



CREATE TABLE hex.grid_1_counts AS SELECT sqt.count::integer, grd.geom FROM hex.grid_1 AS grd INNER JOIN (SELECT bdys.gid, Count(*) AS count FROM hex.grid_1 AS bdys INNER JOIN public.mb_random_points as pnts ON ST_Contains(bdys.geom, pnts.geom) GROUP BY bdys.gid) AS sqt ON grd.gid = sqt.gid;

0 comments on commit c4dff32

Please sign in to comment.