-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchooseAttribute.m
61 lines (44 loc) · 1.6 KB
/
chooseAttribute.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
function [ best ] = chooseAttribute(examples, attributes, targets)
% attributes is a set of remaining attributes
% examples a matrix, rows of examples, columns of set attributes
% targets rows of examples true or false
% Chooses the attribute with the maximum information gain
max_gain = -1;
best = -1;
for a = attributes
attribute = examples(:, a);
gain = Gain(attribute, targets);
if gain >= max_gain
max_gain = gain;
best = a;
end
end
end
function [entropy] = I(pos, neg)
% Calculates the entropy given the number of positive and negative
% examples
count = pos + neg + eps;
p = pos / count;
n = neg / count;
entropy = -p * log2(p+eps) - n * log2(n+eps);
end
function [remainder] = Remainder(attribute, targets)
% Calculates the loss in entropy
posPos = sum(attribute & targets);
posNeg = sum(attribute & ~targets);
negPos = sum(~attribute & targets);
negNeg = sum(~attribute & ~targets);
positivecount = posPos + posNeg + eps;
negativecount = negPos + negNeg + eps;
positive = positivecount / length(targets) * ...
I(posPos / positivecount, posNeg / positivecount);
negative = negativecount / length(targets) * ...
I(negPos / negativecount, negNeg / negativecount);
remainder = positive + negative;
end
function [gain] = Gain(attribute, targets)
% Calculates the information gain
positive = sum(targets) / length(targets);
negative = 1.0 - positive;
gain = I(positive, negative) - Remainder(attribute, targets) + eps;
end