-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecycle_stat.py
144 lines (123 loc) · 5.83 KB
/
recycle_stat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# =================================================================================
# INFO-664-02 Programing for Cultural Heritage 22/SP - Sunni Wong
# Final Project: New York City Recyclable Collection 2010-2018
# Part 3: Comparison of Municipal Recyclable and Waste Collection Rate of NYC and US, 2010-2018
# This script uses the two datasets (USmswTotal_p.csv and NYCmswTotal_p.csv) created
# in recycle_stat_NY.py and recycle_stat_US.py.
# If running this script alone, please download the two new datasets from GitHub,
# and save them in the same directory with this script.
# =================================================================================
import csv
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
# =================================================================================
# US data
# =================================================================================
USmswTotal = pd.read_csv("USmswTotal_p.csv")
# Rename columns' name
USmswTotal.rename(columns={
"year": "YEAR",
"Products - Paper and Paperboard": "PAPER COLLECTED",
"Products - Glass": "GLASS COLLECTED",
"Products - Metals - Total": "METALS COLLECTED",
"Products - Plastics": "PLASTICS COLLECTED",
"Non-Recyclable & Others": "NON-RECYCLABLE & OTHERS",
"Total MSW Generated - Weight" : "MSW TOTAL"
}, inplace=True)
# Calculate percentage of each material to total weight
USmswTotal["PAPER COLLECTED"] = USmswTotal["PAPER COLLECTED"] / USmswTotal["MSW TOTAL"] * 100
USmswTotal["GLASS COLLECTED"] = USmswTotal["GLASS COLLECTED"] / USmswTotal["MSW TOTAL"] * 100
USmswTotal["METALS COLLECTED"] = USmswTotal["METALS COLLECTED"] / USmswTotal["MSW TOTAL"] * 100
USmswTotal["PLASTICS COLLECTED"] = USmswTotal["PLASTICS COLLECTED"] / USmswTotal["MSW TOTAL"] * 100
USmswTotal["NON-RECYCLABLE & OTHERS"] = 100 - USmswTotal["PAPER COLLECTED"] - USmswTotal["GLASS COLLECTED"] - USmswTotal["METALS COLLECTED"] - USmswTotal["PLASTICS COLLECTED"]
# Swap plastic and paper columns
cols = list(USmswTotal.columns)
a, b = cols.index("GLASS COLLECTED"), cols.index("PLASTICS COLLECTED")
cols[a], cols[b] = cols[b], cols[a]
USmswTotal = USmswTotal[cols]
# Selecting columns to compare
USp_cols = list(USmswTotal)
USp_cols.remove("YEAR")
USp_cols.remove("MSW TOTAL")
# =================================================================================
# NYC data
# =================================================================================
NYCmswTotal = pd.read_csv("NYCmswTotal_p.csv")
# Keep only data from 2010
NYCmswTotal = NYCmswTotal[20:]
NYCmswTotal.reset_index(drop=True, inplace=True)
# 1) Calculate the total tonnage of MSW of each year
# 2) Calculate the percentage of each type of MSW
# Ignore year when doing sums across rows
NYCp_cols = list(NYCmswTotal)
NYCp_cols.remove("YEAR")
NYCmswTotal["MSW TOTAL"] = NYCmswTotal[NYCp_cols].sum(axis=1)
NYCmswTotal["PAPER COLLECTED"] = NYCmswTotal["PAPERTONSCOLLECTED"] / NYCmswTotal["MSW TOTAL"] * 100
NYCmswTotal["MGP COLLECTED"] = NYCmswTotal["MGPTONSCOLLECTED"] / NYCmswTotal["MSW TOTAL"] * 100
NYCmswTotal["NON-RECYCLABLE & OTHERS"] = 100 - NYCmswTotal["PAPER COLLECTED"] - NYCmswTotal["MGP COLLECTED"]
# =================================================================================
# Combine two stacked bar charts together by using subgroups.
# Based on Stackoverflow answer https://stackoverflow.com/a/65314442
# =================================================================================
USNY = pd.DataFrame(
dict(
year = NYCmswTotal["YEAR"][0:9].tolist(),
USpaper = USmswTotal["PAPER COLLECTED"].tolist(),
USplastic = USmswTotal["PLASTICS COLLECTED"].tolist(),
USmetal = USmswTotal["METALS COLLECTED"].tolist(),
USglass = USmswTotal["GLASS COLLECTED"].tolist(),
USnon_recycle = USmswTotal["NON-RECYCLABLE & OTHERS"].tolist(),
NYCpaper = NYCmswTotal["PAPER COLLECTED"][0:9].tolist(),
NYCmsw = NYCmswTotal["MGP COLLECTED"][0:9].tolist(),
NYCnon_recycle = NYCmswTotal["NON-RECYCLABLE & OTHERS"][0:9].tolist(),
)
)
fig = go.Figure()
fig.update_layout(
title = "Comparison of Municipal Recyclable and Waste Collection Rate of NYC and US, 2010-2018",
xaxis = dict(title_text = "Year"),
yaxis = dict(title_text = "Percentage",
tickmode = "array",
tickvals = [0,20,40,60,80,100],
ticktext = ["0", "20%", "40%", "60%", "80%", "100%"]),
legend_title_text = "Types of Materials",
barmode = "stack",
)
# Sequence is important
groups = ["USpaper", "USplastic", "USmetal", "USglass", "USnon_recycle", "NYCpaper", "NYCmsw", "NYCnon_recycle"]
names = ["US PAPER", "US PLASTIC", "US METAL", "US GLASS", "US NON-RECYCLABLE & OTHERS", "NYC PAPER", "NYC PLASTIC", "NYC NON-RECYCLABLE & OTHERS"]
colors = {"USpaper" : "#7AC142",
"USplastic" : "#0093D0",
"USglass" : "#46A7D1",
"USmetal" : "#8BBCD1",
"USnon_recycle" : "#FFA15A",
"NYCpaper" : "#7AC142",
"NYCmsw" : "#0093D0",
"NYCnon_recycle" : "#FFA15A"}
repeat = len(USNY.year)
i = 0
for r,n,c in zip(groups,names,colors.values()):
if i <= 4:
fig.add_trace(
go.Bar(x=[USNY.year, ["US"] * repeat],
y=USNY[r],
name=n,
marker_color=c,
text=["%.1f%%" % n for n in USNY[r]],
marker_pattern_shape = ["."] * repeat,
marker_pattern_fgcolor = ["#ffffff"] * repeat,
marker_pattern_solidity = [0.05] * repeat
),
)
else:
fig.add_trace(
go.Bar(x=[USNY.year, ["NYC"] * repeat],
y=USNY[r],
name=n,
marker_color=c,
text=["%.1f%%" % n for n in USNY[r]],
),
)
i += 1
fig.show()