-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMidterm_Q2.R
122 lines (72 loc) · 4.46 KB
/
Midterm_Q2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#question 2.1
#####################################################################################################
###### Model 1 : Implementing the linear regression to relate income inequality and hate crime rate
#####################################################################################################
hatecrime_dt <- read.csv(file.choose(), header = TRUE, sep = ",",fileEncoding="UTF-8-BOM")
View(hatecrime_dt)
names(hatecrime_dt)
library(ggplot2)
ggplot(hatecrime_dt, aes(x=gini_index, y=hate_crimes_per_100k_splc)) + geom_point()
ggplot(hatecrime_dt, aes(x=gini_index, y=avg_hatecrimes_per_100k_fbi)) + geom_point()
model2 = lm(hate_crimes_per_100k_splc~gini_index+avg_hatecrimes_per_100k_fbi, hatecrime_dt)
summary(model2)
model3 = lm(avg_hatecrimes_per_100k_fbi~gini_index, hatecrime_dt)
summary(model3)
ggplot(hatecrime_dt, aes(x=gini_index, y=hate_crimes_per_100k_splc)) + geom_point() +
stat_smooth(method="lm")
ggplot(hatecrime_dt, aes(x=gini_index, y=avg_hatecrimes_per_100k_fbi)) + geom_point() +
stat_smooth(method="lm")
#####################################################################################################
## Model 2 : Implementing the linear regression to relate population race/nature and hate crime rate
#####################################################################################################
names(hatecrime_dt)
model = lm (hate_crimes_per_100k_splc~share_non_white+share_white_poverty+share_non_citizen, hatecrime_dt)
summary(model)
model = lm (hate_crimes_per_100k_splc~share_non_white+share_voters_voted_trump+share_unemployed_seasonal, hatecrime_dt)
summary(model)
model = lm (hate_crimes_per_100k_splc~share_non_white+share_voters_voted_trump+share_unemployed_seasonal+avg_hatecrimes_per_100k_fbi, hatecrime_dt)
summary(model)
#####################################################################################################
## Q3. US Heat map to present the number of hate crimes varying across states
#####################################################################################################
library(ggplot2)
install.packages("maps")
library("maps")
states <- map_data("state")
# Here I'm creating a sample dataset
# The dataset will have 2 columns: The region (or state)
# and a number that will represent the value that you
# want to plot (here the value is just the numerical order of the states).
View(hatecrime_dt_st)
hatecrime_dt_st=hatecrime_dt
#Removing the Alaska and Hawaii as it is not presesnt in the Heat map data
hatecrime_dt_st <- subset(hatecrime_dt_st, state!='Hawaii')
hatecrime_dt_st <- subset(hatecrime_dt_st, state!='Alaska')
#Renaming the state columns name to region to take the join with the map data
colnames(hatecrime_dt_st)[1] <- "region"
View(hatecrime_dt_st)
#lower the state value to match the state value
hatecrime_dt_st$region <- tolower(hatecrime_dt_st$region)
# Then we merge our dataset with the geospatial data:
sim_data_geo <- merge(states, hatecrime_dt_st, by="region")
qplot(long, lat, data=sim_data_geo, geom="polygon", fill=hate_crimes_per_100k_splc, group=group,color="Yellow") + theme(axis.title=element_text(face="bold.italic",size="10", color="brown"))
ggplot(sim_data_geo, aes(long, lat)) + geom_polygon(aes(group=group, fill=hate_crimes_per_100k_splc))
#####################################################################################################
## Model 3 : mplementing Agglomerative Clustering to find the similarity between the sates.
#####################################################################################################
hatecrime_dt <- read.csv(file.choose(), header = TRUE, sep = ",",fileEncoding="UTF-8-BOM")
response <- hatecrime_dt[,1]
predictor <- hatecrime_dt[,2:12]
library(cluster)
clusters <- agnes(x=predictor, diss = FALSE, stand = TRUE, method = "average")
DendCluster <- as.dendrogram(clusters)
plot(DendCluster)
View(hatecrime_dt[c(31,22,24,48,38),])
clustersComplete <- agnes(x=predictor, diss = FALSE, stand = TRUE, method = "complete")
DendClusterComplete <- as.dendrogram(clustersComplete)
plot(DendClusterComplete)
View(hatecrime_dt[c(9,22,7,48,38),])
clustersSingle <- agnes(x=predictor, diss = FALSE, stand = TRUE, method = "single")
DendClusterSingle <- as.dendrogram(clustersSingle)
plot(DendClusterSingle)
#################################################################################################