-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecycle_stat_US.py
140 lines (110 loc) · 5.75 KB
/
recycle_stat_US.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# =================================================================================
# INFO-664-02 Programing for Cultural Heritage 22/SP - Sunni Wong
# Final Project: New York City Recyclable Collection 2010-2018
# Part 2: National Municipal Recyclable and Waste Collection Rate, 2010-2018
# Data set: National solid waste management dataset from EPA
# https://edg.epa.gov/metadata/catalog/search/resource/details.page?uuid=C9310A59-16D2-4002-B36B-2B0A1C637D4E
# =================================================================================
import plotly.express as px
import csv
import pandas as pd
# Credit to Professor Matt Miller for initial conversion of the EPA CSV file
# to a dataset that works with the Plotly module.
with open("National_MSW_total.csv","r") as USmswOriginal_csv:
reader = csv.DictReader(USmswOriginal_csv)
USmswOriginal = {}
# Select data to be included in the USmswOriginal{}
for row in reader:
if "Materials" in row:
# Skip footer description rows in the CSV
if row["Materials"] != "" and "Table 1" not in row["Materials"]:
for year in row:
# Read and create keys with year values from the first row
if year != "Materials":
# Take only data from 2010
if int(year) >= 2010:
if year not in USmswOriginal:
USmswOriginal[year] = {"year": year}
# Add corresponding materials as keys and their values to each year
USmswOriginal[year][row["Materials"]] = row[year]
# Find out total MSW weight before dropping irrelevant columns
for year in USmswOriginal:
year_total = int(USmswOriginal[year]["Total MSW Generated - Weight"])
paper_pt = int(USmswOriginal[year]["Products - Paper and Paperboard"])
USmswOriginal[year]["Products - Paper and Paperboard"] = paper_pt
glass_pt = int(USmswOriginal[year]["Products - Glass"])
USmswOriginal[year]["Products - Glass"] = glass_pt
metal_pt = int(USmswOriginal[year]["Products - Metals - Total"])
USmswOriginal[year]["Products - Metals - Total"] = metal_pt
plastic_pt = int(USmswOriginal[year]["Products - Plastics"])
USmswOriginal[year]["Products - Plastics"] = plastic_pt
USmswOriginal[year]["Non-Recyclable & Others"] = year_total - paper_pt - glass_pt - metal_pt - plastic_pt
# Remove unwanted data
for year in USmswOriginal:
for p in ["Products - Metals - Ferrous", "Products - Metals - Aluminum",
"Products - Metals - OtherNonferrous", "Products - Rubber and Leather",
"Products - Textiles", "Products - Wood", "Products - Other",
"Products - Total Materials", "Other Wastes - Food Waste",
"Other Wastes - Yard Trimmings", "Other Wastes - Miscellaneous Inorganic Wastes",
"Other Wastes - Total"]:
USmswOriginal[year].pop(p)
# Write to new csv for plotly
with open("USmswTotal_p.csv","w") as outfile:
# Create a list of keys from any of the years for headers
fieldnames = list(USmswOriginal["2010"])
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
# Populate data in rows
for year in USmswOriginal:
writer.writerow(USmswOriginal[year])
# Read new csv for building the bar chart
USmswTotal = pd.read_csv("USmswTotal_p.csv")
# Rename columns' name
USmswTotal.rename(columns={
"year":"YEAR",
"Products - Paper and Paperboard" : "PAPER COLLECTED",
"Products - Glass" : "GLASS COLLECTED",
"Products - Metals - Total": "METALS COLLECTED",
"Products - Plastics" : "PLASTICS COLLECTED",
"Non-Recyclable & Others" : "NON-RECYCLABLE & OTHERS",
"Total MSW Generated - Weight" : "MSW TOTAL"
}, inplace=True)
# Find out percentage of each material
USmswTotal["PAPER COLLECTED"] = USmswTotal["PAPER COLLECTED"] / USmswTotal["MSW TOTAL"]
USmswTotal["GLASS COLLECTED"] = USmswTotal["GLASS COLLECTED"] / USmswTotal["MSW TOTAL"]
USmswTotal["METALS COLLECTED"] = USmswTotal["METALS COLLECTED"] / USmswTotal["MSW TOTAL"]
USmswTotal["PLASTICS COLLECTED"] = USmswTotal["PLASTICS COLLECTED"] / USmswTotal["MSW TOTAL"]
USmswTotal["NON-RECYCLABLE & OTHERS"] = USmswTotal["NON-RECYCLABLE & OTHERS"] / USmswTotal["MSW TOTAL"]
# Swap plastic and paper columns
cols = list(USmswTotal.columns)
a, b = cols.index("GLASS COLLECTED"), cols.index("PLASTICS COLLECTED")
cols[a], cols[b] = cols[b], cols[a]
USmswTotal = USmswTotal[cols]
# Selecting columns to compare
USp_cols = list(USmswTotal)
USp_cols.remove("YEAR")
USp_cols.remove("MSW TOTAL")
# Show US bar chart
# Numbers presented as percentage with 2 decimal places
# Reversed legend order to adhere to bar chart order.
USbarcolor = {"PAPER COLLECTED" : "#7AC142",
"PLASTICS COLLECTED" : "#0093D0",
"GLASS COLLECTED" : "#46A7D1",
"METAL COLLECTED" : "#8BBCD1",
"NON-RECYCLABLE & OTHERS" : "#FFA15A"}
fig = px.bar(USmswTotal,
x="YEAR",
y=USp_cols,
labels={"value": "Percentage to Total MSW", "YEAR": "Year"},
color_discrete_sequence = [*USbarcolor.values()],
title="National Municipal Recyclable and Waste Collection Rate, 2010-2018",
text_auto=".2%")
fig.update_layout(legend_traceorder="reversed",
legend_title="Types of Materials",
yaxis = dict(
tickmode = "array",
tickvals = [0,0.2,0.4,0.6,0.8,1],
ticktext = ["0", "20%", "40%", "60%", "80%", "100%"]
)
)
fig.show()