-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathperceptron.py
170 lines (142 loc) · 6.57 KB
/
perceptron.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# perceptron.py
# -----------------
# Main file for the Perceptron algorithm methods.
# Thanks to the Berkeley AI projects for general code structure.
#
# Chet Aldrich, Laura Biester
import util
import perceptronWeights
from loadFeatures import *
from copy import deepcopy
from progressBar import ProgressBar
class Perceptron:
"""
A Perceptron Classifier.
"""
def __init__(self, legalLabels, iterations):
"""
Initializes the information required for the perceptron.
Keyword Arguments:
legalLabels -- the possible labels in the dataset
iterations -- the desired iteration value during training
"""
self.legalLabels = legalLabels
self.iterations = iterations
self.initWeights()
def printToFile(self):
"""
printToFile() prints trained weights to a separate file. Simply place this method after the training sequence if you want to save the values for later use.
"""
file = open("perceptronWeights.py", "w+")
print >>file, "def getSavedWeights():\n\treturn {0}".format(self.weights)
file.close()
def initWeights(self):
"""
initWeights() initializes the weight dictionary with Counters (see util.py).
"""
self.weights = {}
for label in self.legalLabels:
self.weights[label] = util.Counter()
def useTrainedWeights(self):
"""
useTrainedWeights() gets a dictionary of previously trained weights from
the perceptronWeights.py file.
"""
self.weights = perceptronWeights.getSavedWeights()
def train(self, trainingData, trainingLabels, validationData, validationLabels, tune):
"""
train() updates the perceptron prototype vectors over the training values given.
Keyword Arguments:
trainingData -- training data for the perceptron
trainingLabels -- labels for the associated training data
validationData -- validation data for the perceptron tuning function
validationLabels -- labels for the associated validation data
tune -- a boolean indicating whether to tune over iterations.
"""
if (tune):
iterationValues = [1, 3, 5, 10, 15, 20]
self.tune(trainingData, trainingLabels, validationData, validationLabels, iterationValues)
else:
self.trainingHelper(trainingData, trainingLabels, self.iterations)
def trainingHelper(self, trainingData, trainingLabels, iterations):
"""
trainingHelper() classifies training data using the perceptron weights
and updates the perceptron weights if the perceptron is incorrect.
Keyword Arguments:
trainingData -- training data for the perceptron
trainingLabels -- labels for the associated training data
iterations -- desired number of iterations over the training dataset.
"""
for i in range(iterations):
progressBar = ProgressBar(100, len(trainingData), "Learning Weights, Iteration {0} of {1}"
.format(i + 1, iterations))
for j in range(len(trainingData)):
progressBar.update(j)
values = util.Counter()
# Go over each label, and create the value from the training data and current vectors
for label in self.legalLabels:
activation = 0
for key, value in trainingData[j].items():
activation += self.weights[label][key] * value
values[label] = activation
# Here, we update values in weight vectors if we reach an incorrect conclusion.
if values.argMax() != trainingLabels[j]:
for key, value in trainingData[j].items():
self.weights[trainingLabels[j]][key] += value
self.weights[values.argMax()][key] -= value
progressBar.clear()
def tune(self, trainingData, trainingLabels, validationData, validationLabels, iterationValues):
"""
tune() tunes the data to the best number of iterations over the validation data.
Keyword Arguments:
trainingData -- training data for the perceptron
trainingLabels -- labels for the associated training data
validationData -- validation data for the perceptron tuning function
validationLabels -- labels for the associated validation data
iterationValues -- a list of different numbers of iterations to try
"""
tuningWeights = Counter()
correctlyClassified = Counter()
# classify for each number of iterations
for index, iteration in enumerate(iterationValues):
# just do the number of extra iterations required
if index != 0:
extraIterations = iterationValues[index] - iterationValues[index - 1]
else:
extraIterations = iteration
# train extra iterations
self.trainingHelper(trainingData, trainingLabels, extraIterations)
# save weights for each number of iterations
tuningWeights[iteration] = deepcopy(self.weights)
# classify data
classified = self.classify(validationData)
# check if classification matches label
for i in range(len(validationLabels)):
if validationLabels[i] == classified[i]:
correctlyClassified[iteration] += 1
# find iteration value with most correct classifications
# use those weights when classifying
bestIterationValue = correctlyClassified.argMax()
self.weights = tuningWeights[bestIterationValue]
def classify(self, data):
"""
classify() classifies each data item in the input by finding the best prototype vector.
Keyword Arguments:
data -- the test data to classify
"""
guesses = []
progressBar = ProgressBar(100, len(data), "Classifying Data")
for index, entry in enumerate(data):
progressBar.update(index)
values = util.Counter()
# for each label, compute activation
for label in self.legalLabels:
activation = 0
# sum over the weights * values to get activation
for key, value in entry.items():
activation += self.weights[label][key] * value
values[label] = activation
# add classification guess for data by getting the argmax
guesses.append(values.argMax())
progressBar.clear()
return guesses