From 628a7c9ca171bb9b5aa8d3c8837580ff0f8c1d5d Mon Sep 17 00:00:00 2001 From: Mike Campbell Date: Mon, 11 Dec 2017 14:57:54 +0000 Subject: [PATCH] Fix backprop & LSTM weirdness --- CHANGES.md | 13 +++++++++++++ Gemfile.lock | 2 +- lib/rann/backprop.rb | 31 ++++++++++++------------------- lib/rann/lstm.rb | 8 ++++++-- lib/rann/network.rb | 8 ++++++++ lib/rann/neuron.rb | 2 +- lib/rann/version.rb | 2 +- 7 files changed, 42 insertions(+), 24 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index cb2cfb2..4a8efa9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,16 @@ +- More backprop fixes.. + + *Michael Campbell* + +- Give LSTM input neurons linear activation. + + *Michael Campbell* + +- For context neurons connected to a product neuron, it's initial value should + be 1. + + *Michael Campbell* + - So. Many. Bugs. Turns out the product neuron stuff was still broken and network evaluation wasn't behaving correctly with context neurons (recurrent connections). Also an error in timestep handling during backprop, and just diff --git a/Gemfile.lock b/Gemfile.lock index de0a18b..6f3e478 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - rann (0.2.7) + rann (0.2.8) parallel (~> 1.12, >= 1.12.0) ruby-graphviz (~> 1.2, >= 1.2.3) diff --git a/lib/rann/backprop.rb b/lib/rann/backprop.rb index b10c10a..4fd200c 100644 --- a/lib/rann/backprop.rb +++ b/lib/rann/backprop.rb @@ -114,11 +114,16 @@ def self.run_single network, inputs, targets # backward pass with unravelling for recurrent networks node_deltas = Hash.new{ |h, k| h[k] = {} } - gradients = Hash.new 0.to_d - initial_timestep = inputs.size - 1 neuron_stack = network.output_neurons.map{ |n| [n, initial_timestep] } - skipped = [] + # initialize network end-point node_deltas in all timesteps with zero + network.neurons_with_no_outgoing_connections.each do |n| + (0...(inputs.size - 1)).each do |i| + node_deltas[i][n.id] = 0.to_d + neuron_stack << [n, i] + end + end + gradients = Hash.new 0.to_d while current = neuron_stack.shift neuron, timestep = current @@ -137,20 +142,9 @@ def self.run_single network, inputs, targets if out_timestep > initial_timestep m + elsif !output_node_delta + break else - # complicated network case, see NOTES.md - # can't find node delta, re-enqueue at back of queue and record - # the skip. - if !output_node_delta - if skipped.size == neuron_stack.size + 1 - output_node_delta = 0.to_d - else - neuron_stack.push current - skipped << current - break - end - end - # connection delta is the output neuron delta multiplied by the # connection's weight connection_delta = @@ -172,8 +166,7 @@ def self.run_single network, inputs, targets end from_here = bptt_connecting_to neuron, network, timestep - neuron_stack.push *from_here - skipped.clear + neuron_stack |= from_here node_delta = ACTIVATION_DERIVATIVES[neuron.activation_function] @@ -215,7 +208,7 @@ def save filepath = nil end end - def restore filepath + def restore filepath = nil unless filepath filepath = Dir['*'].select{ |f| f =~ /rann_savepoint_.*/ }.sort.last diff --git a/lib/rann/lstm.rb b/lib/rann/lstm.rb index f8c9d42..7bd5d02 100644 --- a/lib/rann/lstm.rb +++ b/lib/rann/lstm.rb @@ -19,7 +19,7 @@ def initialize name, size def init @size.times do |j| - input = RANN::Neuron.new("LSTM #{name} Input #{j}", 0, :standard).tap{ |n| @network.add n } + input = RANN::Neuron.new("LSTM #{name} Input #{j}", 0, :standard, :linear).tap{ |n| @network.add n } @inputs << input f = RANN::Neuron.new("LSTM #{name} F #{j}", 3, :standard, :sig).tap{ |n| @network.add n } @@ -36,7 +36,11 @@ def init memory_tanh = RANN::Neuron.new("LSTM #{name} Mem Tanh #{j}", 1, :standard, :tanh).tap{ |n| @network.add n } memory_o_product = RANN::ProductNeuron.new("LSTM #{name} Mem/Hidden 4 Product #{j}", 2, :standard, :linear).tap{ |n| @network.add n } @outputs << memory_o_product - memory_context = RANN::Neuron.new("LSTM #{name} Mem Context #{j}", 1, :context).tap{ |n| @network.add n } + memory_context = + RANN::Neuron.new("LSTM #{name} Mem Context #{j}", 1, :context).tap do |n| + @network.add n + n.value = 1.to_d # connecting to a product neuron + end output_context = RANN::Neuron.new("LSTM #{name} Output Context #{j}", 1, :context).tap{ |n| @network.add n } @network.add RANN::Connection.new input, f diff --git a/lib/rann/network.rb b/lib/rann/network.rb index 6995f61..94eb4ae 100644 --- a/lib/rann/network.rb +++ b/lib/rann/network.rb @@ -186,5 +186,13 @@ def recalculate_neuron_connection_counts! true end + + def neurons_with_no_outgoing_connections + return @no_outgoing if defined? @no_outgoing + + neurons.select do |n| + connections_from(n).none? + end + end end end diff --git a/lib/rann/neuron.rb b/lib/rann/neuron.rb index 7b67721..56dce8f 100644 --- a/lib/rann/neuron.rb +++ b/lib/rann/neuron.rb @@ -74,7 +74,7 @@ def set_default_value! end def initial_activation_function - if standard? || context? + if standard? :relu else :linear diff --git a/lib/rann/version.rb b/lib/rann/version.rb index 7513643..8051156 100644 --- a/lib/rann/version.rb +++ b/lib/rann/version.rb @@ -1,3 +1,3 @@ module RANN - VERSION = "0.2.7" + VERSION = "0.2.8" end