-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpokemonKNN.py
executable file
·241 lines (196 loc) · 8.71 KB
/
pokemonKNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# Machine Learning Project 1
# KNN with Pokemon
# Justin Kaminski, Tim Inzitari, Josh Foss, & Alex Helmick
# Feb 10th 2020
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from matplotlib import cm
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d # must keep
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from scipy import stats
pokemon = pd.read_csv('Pokemon.csv', sep=',')
pokemon.columns = pokemon.columns.str.replace(' ', '_')
pokemon.head()
X = pokemon[['Type_1', 'Type_2', 'Total', 'HP', 'Attack', 'Defense', 'Sp._Atk', 'Sp._Def', 'Speed']]
Type_1 = X['Type_1']
y = pokemon['Legendary']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
# DESCRIPTION OF EACH STATS MEAN MEDIAN MODE ETC
p = pokemon[['Total', 'HP', 'Attack', 'Defense', 'Sp._Atk', 'Sp._Def', 'Speed', 'Legendary']]
legend = p[p.Legendary == True]
nL = p[p.Legendary == False]
print("-------------------------------------------------------------------")
print("Legendary Total")
print(legend['Total'].describe())
print("Mode of Legendary Total: ", stats.mode(legend['Total']))
print("------")
print("Non Legendary Total")
print(nL['Total'].describe())
print("Mode of Non Legendary Total: ", stats.mode(nL['Total']))
print("-------------------------------------------------------------------\n")
print("-------------------------------------------------------------------")
print("Legendary HP")
print(legend['HP'].describe())
print("Mode of Legendary HP: ", stats.mode(legend['HP']))
print("------")
print("Non Legendary HP")
print(nL['HP'].describe())
print("Mode of Non Legendary HP: ", stats.mode(nL['HP']))
print("-------------------------------------------------------------------\n")
print("-------------------------------------------------------------------")
print("Legendary Attack")
print(legend['Attack'].describe())
print("Mode of Legendary Attack: ", stats.mode(legend['Attack']))
print("------")
print("Non Legendary Attack")
print(nL['Attack'].describe())
print("Mode of Non Legendary Attack: ", stats.mode(nL['Attack']))
print("-------------------------------------------------------------------\n")
print("-------------------------------------------------------------------")
print("Legendary Defense")
print(legend['Defense'].describe())
print("Mode of Legendary Defense: ", stats.mode(legend['Defense']))
print("------")
print("Non Legendary Defense")
print(nL['Defense'].describe())
print("Mode of Non Legendary Defense: ", stats.mode(nL['Defense']))
print("-------------------------------------------------------------------\n")
print("-------------------------------------------------------------------")
print("Legendary Sp._Atk")
print(legend['Sp._Atk'].describe())
print("Mode of Legendary Special Attack: ", stats.mode(legend['Sp._Atk']))
print("------")
print("Non Legendary Special Attack")
print(nL['Sp._Atk'].describe())
print("Mode of Non Legendary Special Attack: ", stats.mode(nL['Sp._Atk']))
print("-------------------------------------------------------------------\n")
print("-------------------------------------------------------------------")
print("Legendary Special Defense")
print(legend['Sp._Def'].describe())
print("Mode of Legendary Special Defense: ", stats.mode(legend['Sp._Def']))
print("------")
print("Non Legendary Special Defense")
print(nL['Sp._Def'].describe())
print("Mode of Non Legendary Special Defense: ", stats.mode(nL['Sp._Def']))
print("-------------------------------------------------------------------\n")
print("-------------------------------------------------------------------")
print("Legendary Speed")
print(legend['Speed'].describe())
print("Mode of Legendary Speed: ", stats.mode(legend['Speed']))
print("------")
print("Non Legendary Speed")
print(nL['Speed'].describe())
print("Mode of Non Legendary Speed: ", stats.mode(nL['Speed']))
print("-------------------------------------------------------------------\n")
# END STAT DESCRIPTIONS
# count number of legendary and not legendary pokemon
print("-------------------------------------------------------------------")
print("Count of Legendary Pokemon vs Not Legendary, True is Legendary False is not")
print(pokemon['Legendary'].value_counts())
print("-------------------------------------------------------------------\n")
# plotting a scatter matrix
cmap = cm.get_cmap('gnuplot')
scatter = scatter_matrix(X_train, c=y_train, marker='o', s=40, hist_kwds={'bins': 15}, figsize=(9, 9), cmap=cmap)
# plotting a 3D scatter plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X_train['Total'], X_train['Attack'], X_train['HP'], c=y_train, marker='o', s=100)
ax.set_xlabel('Total')
ax.set_ylabel('Attack')
ax.set_zlabel('HP')
plt.show()
# Create classifier object
knn = KNeighborsClassifier(n_neighbors=5, weights='uniform')
# Train the classifier (fit the estimator) using the training data
knn.fit(X_train, y_train)
# Estimate the accuracy of the classifier on future data, using the test data
knn.score(X_test, y_test)
# ## Use the trained k-NN classifier model to classify new, previously unseen objects
# this example is similar is stats to the pokemon Palkia
#
pokemon_prediction = knn.predict([[11, 15, 684, 91, 121, 99, 151, 121, 101]])
print("Are the stats of Pokemon 1 that of a legendary Pokemon?: %s " % pokemon_prediction[0])
# second example: chosen similar to that of a non-legendary pokemon
pokemon_prediction = knn.predict([[7, 3, 390, 55, 35, 50, 55, 110, 85]])
print("Are the stats of Pokemon 2 that of a legendary Pokemon?: %s " % pokemon_prediction[0])
# Third Example:
pokemon_prediction = knn.predict([[1, 3, 349, 63, 60, 55, 50, 50, 71]])
print("Are the stats of Pokemon 3 that of a legendary Pokemon?: %s " % pokemon_prediction[0])
# Fourth Pokemon
pokemon_prediction = knn.predict([[6, 5, 580, 75, 100, 105, 95, 85, 120]])
print("Are the stats of Pokemon 4 that of a legendary Pokemon?: %s " % pokemon_prediction[0])
# -----------------------------------
# USER INPUT TEST AMOUNT ##
# kth Pokemon tests, change to Y to enable
another = "N"
while another == "Y":
type1, type2, HP, Att, Def, spAtt, spDef, Speed = input("Type1 Type2 HP Att Def spAtt spDef Speed: ").split()
Total = str(int(HP) + int(Att) + int(Def) + int(spAtt) + int(spDef) + int(Speed))
pokemon_prediction = knn.predict([[type1, type2, Total, HP, Att, Def, spAtt, spDef, Speed]])
print("Total: %s" % Total)
print("Are the stats of this Pokemon that of a legendary Pokemon?: %s " % pokemon_prediction[0])
another = input("Another? Y for Yes: ")
# END USER INPUT AMOUNT #
# -----------------------------------
bestSplit = 0
bestK = 0
# How sensitive is k-NN classification accuracy to the choice of the 'k' parameter?
k_range = range(3, 20)
scores = []
for k in k_range:
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
scores.append(knn.score(X_test, y_test))
bestK = scores.index(max(scores)) + 1
plt.figure()
plt.xlabel('k')
plt.ylabel('accuracy')
plt.scatter(k_range, scores)
plt.xticks([3, 5, 10, 15, 20])
plt.show()
# How sensitive is k-NN classification accuracy to the train/test split proportion?
t = [0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2]
knn = KNeighborsClassifier(n_neighbors=bestK)
plt.figure()
previousLargestSplit = 0
for s in t:
scores = []
for i in range(1, 1000):
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=1-s)
knn.fit(X_train, y_train)
scores.append(knn.score(X_test, y_test))
plt.plot(s, np.mean(scores), 'bo')
if np.mean(scores) > previousLargestSplit:
previousLargestSplit = np.mean(scores)
bestSplit = s
plt.xlabel('Training set proportion (%)')
plt.ylabel('accuracy')
plt.show()
dist = ['euclidean', 'manhattan']
bestDist = ''
previousScore = 0
scores = []
plt.figure()
for s in dist:
knn = KNeighborsClassifier(n_neighbors=bestK, metric=s)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=1-bestSplit)
knn.fit(X_train, y_train)
scores.append(knn.score(X_test, y_test))
if previousScore < knn.score(X_test, y_test):
previousScore = knn.score(X_test, y_test)
bestDist = s
plt.plot(s, np.mean(scores), 'bo')
plt.xlabel('Distance Used')
plt.ylabel('accuracy')
plt.show()
# using best K and best split
knnBest = KNeighborsClassifier(n_neighbors=bestK, metric=bestDist)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=1-bestSplit)
knnBest.fit(X_train, y_train)
print('Using best K(' + str(bestK) + ') and best split(' + str(bestSplit) + ') and best dist(' + str(bestDist) + ')')
print('the accuracy is ' + str(knnBest.score(X_test, y_test)))
print(confusion_matrix(knnBest.predict(X_test), y_test))