Skip to content

Commit

Permalink
Vastly simplify backprop algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Campbell committed Nov 24, 2017
1 parent 342a259 commit 120f695
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 77 deletions.
7 changes: 4 additions & 3 deletions bench/xor_benchmark.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
xor_inputs = [[0,0],[0,1],[1,0],[1,1]]
xor_targets = [[0],[1],[1],[0]]

time = Time.now.to_i
results =
Array.new(100) do |j|
# inputs
Expand Down Expand Up @@ -42,13 +43,13 @@
checking: false
)

# puts "iteration #{i} error: #{avg_error.to_f}"

break if avg_error < 0.0001
end

puts j
i
end

puts results.reduce(:+).fdiv(results.size).round(2)
taken = Time.now.to_i - time
puts results.reduce(:+).fdiv(results.size).round(2)
puts "in #{taken}s"
134 changes: 62 additions & 72 deletions lib/rann/backprop.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,14 @@ def run_batch inputs, targets, opts = {}
if opts[:checking]
# check assumes batchsize 1 for now
sorted_gradients = avg_gradients.values_at *network.connections.map(&:id)
if GradientChecker.check network, inputs.first, targets.first, sorted_gradients
invalid = GradientChecker.check network, inputs.first, targets.first, sorted_gradients
if invalid.empty?
puts "gradient valid"
else
puts "gradient INVALID"
puts "gradients INVALID for connections:"
invalid.each do |i|
puts "#{network.connections[i].input_neuron.name} -> #{network.connections[i].output_neuron.name}"
end
end
end

Expand Down Expand Up @@ -106,79 +110,68 @@ def self.run_single network, inputs, targets
error = mse targets, outputs

# backward pass with unravelling for recurrent networks
deltas = Hash.new{ |h, k| h[k] = Hash.new(0.to_d) }

# outputs first
network.output_neurons.each.with_index do |o, i|
activation_derivative = ACTIVATION_DERIVATIVES[o.activation_function]

deltas[0][o.id] = mse_delta(targets[i], outputs[i], activation_derivative)
end

# remove this push mechanism, shouldn't be necessary and uses extra memory.
incoming_deltas = Hash.new{ |h, k| h[k] = Hash.new{ |h, k| h[k] = [] } }
node_deltas = Hash.new{ |h, k| h[k] = Hash.new(0.to_d) }
gradients = Hash.new(0)

initial_timestep = inputs.size - 1
connection_stack =
network.output_neurons
.flat_map{ |n| network.connections_to n }
.map{ |c| [c, initial_timestep] }

# maybe change this to traverse the static network timestep times if this
# proves too difficult to rationalise
while current = connection_stack.shift
conn, timestep = current

inp_n = conn.input_neuron
out_n = conn.output_neuron
out_timestep = out_n.context? ? timestep + 1 : timestep

# skip if already processed (might've been enqueued by two nodes before
# being processed). could alternatively add a check when enqueueing that
# not already enqueued? might be better for memory, but slow down
# processing.
next if deltas[timestep].key?(inp_n.id)

from_here = bptt_connecting_to inp_n, network, timestep, deltas
connection_stack.unshift *from_here

incoming_deltas[timestep][inp_n.id] <<
if out_n.is_a? ProductNeuron
intermediate = states[out_timestep][:intermediates][out_n.id]
deltas[out_timestep][out_n.id].mult intermediate.div(states[timestep][:values][inp_n.id], 10), 10
neuron_stack = network.output_neurons.map{ |n| [n, initial_timestep] }

while current = neuron_stack.shift
neuron, timestep = current
next if node_deltas[timestep].key? neuron

from_here = bptt_connecting_to neuron, network, timestep
neuron_stack.push *from_here

# neuron delta is summation of neuron deltas deltas for the connections
# from this neuron
node_delta =
if neuron.output?
output_index = network.output_neurons.index neuron
activation_derivative = ACTIVATION_DERIVATIVES[neuron.activation_function]
mse_delta targets[output_index], outputs[output_index], activation_derivative
else
deltas[out_timestep][out_n.id].mult conn.weight, 10
sum_of_deltas =
network.connections_from(neuron).reduce 0.to_d do |m, c|
out_timestep = c.output_neuron.context? ? timestep + 1 : timestep
output_node_delta = node_deltas[out_timestep][c.output_neuron.id]

# connection delta is the output neuron delta multiplied by the
# connection's weight
connection_delta =
if c.output_neuron.is_a? ProductNeuron
intermediate = states[out_timestep][:intermediates][c.output_neuron.id]
output_node_delta.mult intermediate.div(states[timestep][:values][c.input_neuron.id], 10), 10
else
output_node_delta.mult c.weight, 10
end

m + connection_delta
end

ACTIVATION_DERIVATIVES[neuron.activation_function]
.call(states[timestep][:values][neuron.id])
.mult(sum_of_deltas, 10)
end

if incoming_deltas[timestep][inp_n.id].size == network.connections_from(inp_n).size
sum_of_deltas = incoming_deltas[timestep][inp_n.id].reduce :+
node_deltas[timestep][neuron.id] = node_delta

deltas[timestep][inp_n.id] =
ACTIVATION_DERIVATIVES[inp_n.activation_function]
.call(states[timestep][:values][inp_n.id])
.mult(sum_of_deltas, 10)
end
end
network.connections_to(neuron).each do |c|
in_timestep = neuron.context? ? timestep - 1 : timestep

gradients = {}

network.connections.each_with_index do |con, i|
gradients[con.id] = 0.to_d

(inputs.size - 1).downto 0 do |t|
if nd = deltas[t][con.output_neuron.id]
gradient =
if con.input_neuron.context?
t == 0 ? 0.to_d : nd.mult(states[t - 1][:values][con.input_neuron.id], 10)
elsif con.output_neuron.is_a? ProductNeuron
intermediate = states[t][:intermediates][con.output_neuron.id]
nd.mult intermediate.div(con.weight, 10), 10
else
nd.mult states[t][:values][con.input_neuron.id], 10
end
# connection gradient is the output neuron delta multipled by the
# connection's input neuron value.
gradient =
if c.output_neuron.is_a? ProductNeuron
intermediate = states[timestep][:intermediates][c.output_neuron.id]
node_delta.mult intermediate.div(c.weight, 10), 10
elsif c.input_neuron.context? && timestep == 0
0.to_d
else
node_delta.mult states[in_timestep][:values][c.input_neuron.id], 10
end

gradients[con.id] += gradient
end
gradients[c.id] += gradient
end
end

Expand Down Expand Up @@ -218,7 +211,7 @@ def self.mse_delta target, actual, activation_derivative
step_one.mult step_two, 10
end

def self.bptt_connecting_to neuron, network, timestep, deltas
def self.bptt_connecting_to neuron, network, timestep
# halt traversal if we're at a context and we're at the base timestep
return [] if neuron.context? && timestep == 0

Expand All @@ -227,10 +220,7 @@ def self.bptt_connecting_to neuron, network, timestep, deltas
next if c.input_neuron.input?

timestep -= timestep if neuron.context?

unless deltas[timestep].key?(c.input_neuron.id)
a << [c, timestep]
end
a << [c.input_neuron, timestep]
end
end
end
Expand Down
4 changes: 3 additions & 1 deletion lib/rann/gradient_checker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ def self.check network, inputs, targets, dvec
gradapprox[i] = (error_thetaplus - error_thetaminus).div(EPSILON.mult(2, 10), 10)
end

gradapprox.each.with_index.all?{ |ga, i| in_epsilon? ga, dvec[i] }
gradapprox.each.with_index.with_object [] do |(ga, i), res|
res << i unless in_epsilon? ga, dvec[i]
end
end

def self.error outputs, targets
Expand Down
2 changes: 1 addition & 1 deletion lib/rann/optimisers/rmsprop.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class RMSProp
def initialize opts = {}, restore = {}
@decay = opts[:decay] || 0.9.to_d
@fudge_factor = opts[:fudge_factor] || 0.00000001.to_d
@learning_rate = opts[:learning_rate] || 0.001.to_d
@learning_rate = opts[:learning_rate] || 0.01.to_d
@historical_gradient = (restore[:historical_gradient] || {}).tap{ |h| h.default = 0.to_d }
end

Expand Down

0 comments on commit 120f695

Please sign in to comment.