From f951825d514ea47f783ceedd064ace09cf1dc18d Mon Sep 17 00:00:00 2001 From: Mike Campbell Date: Sun, 3 Dec 2017 23:14:01 +0000 Subject: [PATCH] How can one person write so many fucking bugs - ProductNeuron backprop error resulting in 0 neuron delta for connecting neurons - Network evaluation broken for context neurons - Timestep handling error in backprop - Network traversal didn't actually work with certain topologies --- CHANGES.md | 7 +++++ lib/rann/backprop.rb | 68 +++++++++++++++++++++++++++++--------------- lib/rann/lstm.rb | 4 +-- lib/rann/network.rb | 2 +- 4 files changed, 54 insertions(+), 27 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index b3d1a83..cb2cfb2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,10 @@ +- So. Many. Bugs. Turns out the product neuron stuff was still broken and + network evaluation wasn't behaving correctly with context neurons (recurrent + connections). Also an error in timestep handling during backprop, and just + generally network traversal ... + + *Michael Campbell* + - Don't lock the input connections into the LSTM layer, that acts as the fully connected part of the network and that's where the majority of learning takes place, derp. diff --git a/lib/rann/backprop.rb b/lib/rann/backprop.rb index d5a3d38..b10c10a 100644 --- a/lib/rann/backprop.rb +++ b/lib/rann/backprop.rb @@ -113,46 +113,67 @@ def self.run_single network, inputs, targets error = mse targets, outputs # backward pass with unravelling for recurrent networks - node_deltas = Hash.new{ |h, k| h[k] = Hash.new 0.to_d } + node_deltas = Hash.new{ |h, k| h[k] = {} } gradients = Hash.new 0.to_d initial_timestep = inputs.size - 1 neuron_stack = network.output_neurons.map{ |n| [n, initial_timestep] } + skipped = [] while current = neuron_stack.shift neuron, timestep = current next if node_deltas[timestep].key? neuron.id - from_here = bptt_connecting_to neuron, network, timestep - neuron_stack.push *from_here - # neuron delta is summation of neuron deltas deltas for the connections # from this neuron - step_one = - if neuron.output? - output_index = network.output_neurons.index neuron - mse_delta targets[output_index], outputs[output_index] - else + if neuron.output? + output_index = network.output_neurons.index neuron + step_one = mse_delta targets[output_index], outputs[output_index] + else + sum = network.connections_from(neuron).reduce 0.to_d do |m, c| out_timestep = c.output_neuron.context? ? timestep + 1 : timestep output_node_delta = node_deltas[out_timestep][c.output_neuron.id] - # connection delta is the output neuron delta multiplied by the - # connection's weight - connection_delta = - if c.output_neuron.is_a? ProductNeuron - intermediate = - network.connections_to(c.output_neuron).reject{ |c2| c2 == c }.reduce 0.to_d do |m, c2| - m * states[timestep][:values][c2.input_neuron.id] * c2.weight - end - output_node_delta * intermediate * c.weight - else - output_node_delta * c.weight + if out_timestep > initial_timestep + m + else + # complicated network case, see NOTES.md + # can't find node delta, re-enqueue at back of queue and record + # the skip. + if !output_node_delta + if skipped.size == neuron_stack.size + 1 + output_node_delta = 0.to_d + else + neuron_stack.push current + skipped << current + break + end end - m + connection_delta + # connection delta is the output neuron delta multiplied by the + # connection's weight + connection_delta = + if c.output_neuron.is_a? ProductNeuron + intermediate = + network.connections_to(c.output_neuron).reject{ |c2| c2 == c }.reduce 1.to_d do |m, c2| + m * states[timestep][:values][c2.input_neuron.id] * c2.weight + end + output_node_delta * intermediate * c.weight + else + output_node_delta * c.weight + end + + m + connection_delta + end end - end + + step_one = sum || next + end + + from_here = bptt_connecting_to neuron, network, timestep + neuron_stack.push *from_here + skipped.clear node_delta = ACTIVATION_DERIVATIVES[neuron.activation_function] @@ -233,11 +254,12 @@ def self.bptt_connecting_to neuron, network, timestep # halt traversal if we're at a context and we're at the base timestep return [] if neuron.context? && timestep == 0 + timestep -= 1 if neuron.context? + network.connections_to(neuron).each.with_object [] do |c, a| # don't enqueue connections from inputs next if c.input_neuron.input? - timestep -= timestep if neuron.context? a << [c.input_neuron, timestep] end end diff --git a/lib/rann/lstm.rb b/lib/rann/lstm.rb index 3923429..f8c9d42 100644 --- a/lib/rann/lstm.rb +++ b/lib/rann/lstm.rb @@ -35,8 +35,7 @@ def init memory_standard = RANN::Neuron.new("LSTM #{name} Mem Standard #{j}", 2, :standard, :linear).tap{ |n| @network.add n } memory_tanh = RANN::Neuron.new("LSTM #{name} Mem Tanh #{j}", 1, :standard, :tanh).tap{ |n| @network.add n } memory_o_product = RANN::ProductNeuron.new("LSTM #{name} Mem/Hidden 4 Product #{j}", 2, :standard, :linear).tap{ |n| @network.add n } - output = RANN::Neuron.new("LSTM #{name} Output #{j}", 1, :standard, :linear).tap{ |n| @network.add n } - @outputs << output + @outputs << memory_o_product memory_context = RANN::Neuron.new("LSTM #{name} Mem Context #{j}", 1, :context).tap{ |n| @network.add n } output_context = RANN::Neuron.new("LSTM #{name} Output Context #{j}", 1, :context).tap{ |n| @network.add n } @@ -52,7 +51,6 @@ def init @network.add RANN::LockedConnection.new memory_standard, memory_tanh, 1.to_d @network.add RANN::LockedConnection.new o, memory_o_product, 1.to_d @network.add RANN::LockedConnection.new memory_tanh, memory_o_product, 1.to_d - @network.add RANN::LockedConnection.new memory_o_product, output, 1.to_d @network.add RANN::LockedConnection.new memory_standard, memory_context, 1.to_d @network.add RANN::LockedConnection.new memory_context, memory_product, 1.to_d @network.add RANN::LockedConnection.new memory_context, i, 1.to_d diff --git a/lib/rann/network.rb b/lib/rann/network.rb index 3f7dbf4..6995f61 100644 --- a/lib/rann/network.rb +++ b/lib/rann/network.rb @@ -41,7 +41,7 @@ def evaluate input # would probably be easier to detect circular dependency this way too? begin i = 0 - until output_neurons.all?{ |neuron| neuron.value } + until connections.select(&:enabled?).all? &:processed? i += 1 connections.each do |connection| next if !connection.enabled?