Use BigDecimal.limit properly

mikecmpbll · Dec 2, 2017 · 4e123fd · 4e123fd
1 parent 1ecb638
commit 4e123fd
Show file tree

Hide file tree

Showing 11 changed files with 36 additions and 38 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,3 +1,8 @@
+- Worked out how to use BigDecimal properly, lol. Refactored to use
+  `BigDecimal.limit`.
+
+  *Michael Campbell*
+
 - Allow customisable precision, defaults to 10.
 
   *Michael Campbell*

diff --git a/examples/step_by_step.rb b/examples/step_by_step.rb
@@ -53,5 +53,5 @@
 puts "backprop gradients & updates:"
 gradients.each do |cid, g|
   c = network.connections.find{ |c| c.id == cid }
-  puts "#{c.input_neuron.name} -> #{c.output_neuron.name}: g = #{g.to_f}, u = #{(c.weight - (0.5.to_d.mult(g, 10))).to_f}"
+  puts "#{c.input_neuron.name} -> #{c.output_neuron.name}: g = #{g.to_f}, u = #{(c.weight - 0.5.to_d * g).to_f}"
 end
diff --git a/lib/rann.rb b/lib/rann.rb
@@ -8,14 +8,5 @@
 require "rann/locked_connection"
 require "rann/backprop"
 
-module RANN
-  @@significant_digits = 10
-
-  def self.significant_digits= sd
-    @@significant_digits = sd
-  end
-
-  def self.d
-    @@significant_digits
-  end
-end
+BigDecimal.mode BigDecimal::EXCEPTION_ALL, true
+BigDecimal.limit 10
diff --git a/lib/rann/backprop.rb b/lib/rann/backprop.rb
@@ -12,9 +12,9 @@ class Backprop
 
     ACTIVATION_DERIVATIVES = {
       relu:   ->(x){ x > 0 ? 1.to_d : 0.to_d },
-      sig:    ->(x){ x.mult(1 - x, RANN.d) },
+      sig:    ->(x){ x * (1 - x) },
       linear: ->(_){ 1.to_d },
-      tanh:   ->(x){ 1 - x.power(2, RANN.d) },
+      tanh:   ->(x){ 1 - x ** 2 },
       step:   ->(_){ 0.to_d },
     }
 
@@ -60,9 +60,9 @@ def run_batch inputs, targets, opts = {}
           gradients, error = Backprop.run_single network, input, targets[i + j]
 
           gradients.each do |cid, g|
-            group_avg_gradients[cid] += g.div batch_size, RANN.d
+            group_avg_gradients[cid] += g / batch_size
           end
-          group_avg_error += error.div batch_size, RANN.d
+          group_avg_error += error / batch_size
         end
 
         group_avg_gradients.default_proc = nil
@@ -142,9 +142,9 @@ def self.run_single network, inputs, targets
               connection_delta =
                 if c.output_neuron.is_a? ProductNeuron
                   intermediate = states[out_timestep][:intermediates][c.output_neuron.id]
-                  output_node_delta.mult intermediate.div(states[timestep][:values][c.input_neuron.id], RANN.d), RANN.d
+                  output_node_delta * intermediate / states[timestep][:values][c.input_neuron.id]
                 else
-                  output_node_delta.mult c.weight, RANN.d
+                  output_node_delta * c.weight
                 end
 
               m + connection_delta
@@ -153,8 +153,8 @@ def self.run_single network, inputs, targets
 
         node_delta =
           ACTIVATION_DERIVATIVES[neuron.activation_function]
-            .call(states[timestep][:values][neuron.id])
-            .mult(step_one, RANN.d)
+            .call(states[timestep][:values][neuron.id]) *
+            step_one
 
         node_deltas[timestep][neuron.id] = node_delta
 
@@ -165,11 +165,11 @@ def self.run_single network, inputs, targets
           gradient =
             if c.output_neuron.is_a? ProductNeuron
               intermediate = states[timestep][:intermediates][c.output_neuron.id]
-              node_delta.mult intermediate.div(c.weight, RANN.d), RANN.d
+              node_delta * intermediate / c.weight
             elsif c.input_neuron.context? && timestep == 0
               0.to_d
             else
-              node_delta.mult states[in_timestep][:values][c.input_neuron.id], RANN.d
+              node_delta * states[in_timestep][:values][c.input_neuron.id]
             end
 
           gradients[c.id] += gradient
@@ -216,7 +216,7 @@ def self.mse targets, outputs
       total_squared_error = 0.to_d
 
       targets.size.times do |i|
-        total_squared_error += (targets[i] - outputs[i]).power(2, RANN.d).div 2, RANN.d
+        total_squared_error += (targets[i] - outputs[i]) ** 2 / 2
       end
 
       total_squared_error

diff --git a/lib/rann/connection.rb b/lib/rann/connection.rb
@@ -25,7 +25,7 @@ def initialize input_neuron, output_neuron, weight = nil
 
     def process
       if processable? && !processed?
-        out_value = input_neuron.value.mult weight, RANN.d
+        out_value = input_neuron.value * weight
         output_neuron.push_value! out_value
         @processed = true
       end
@@ -60,8 +60,9 @@ def initial_weight
       if output_neuron.context?
         1.to_d
       else
-        rand.to_d RANN.d
+        rand.to_d BigDecimal.limit
       end
     end
   end
 end
+
diff --git a/lib/rann/gradient_checker.rb b/lib/rann/gradient_checker.rb
@@ -1,7 +1,7 @@
 module RANN
   class GradientChecker
     def self.epsilon
-      10.to_d.power -4, RANN.d + 2
+      10.to_d ** -4
     end
 
     def self.check network, inputs, targets, dvec
@@ -23,7 +23,7 @@ def self.check network, inputs, targets, dvec
         error_thetaminus = error outputs, targets
         network.reset!
 
-        gradapprox[i] = (error_thetaplus - error_thetaminus).div epsilon.mult(2, RANN.d), RANN.d
+        gradapprox[i] = (error_thetaplus - error_thetaminus) / (epsilon * 2)
       end
 
       gradapprox.each.with_index.with_object [] do |(ga, i), res|
@@ -35,7 +35,7 @@ def self.error outputs, targets
       total_squared_error = 0.to_d
 
       targets.size.times do |i|
-        total_squared_error += (targets[i] - outputs[i]).power(2, RANN.d + 2).div 2, RANN.d
+        total_squared_error += (targets[i] - outputs[i]) ** 2 / 2
       end
 
       total_squared_error

diff --git a/lib/rann/network.rb b/lib/rann/network.rb
@@ -168,9 +168,9 @@ def reset!
     def init_normalised!
       connections.each do |c|
         out_cons = c.output_neuron.connection_count.to_d
-        from     = -1.to_d.div out_cons.sqrt(RANN.d), RANN.d
-        to       = 1.to_d.div out_cons.sqrt(RANN.d), RANN.d
-        c.weight = (to - from).mult(rand.to_d, RANN.d) + from
+        from     = -1.to_d / out_cons.sqrt(0)
+        to       = 1.to_d / out_cons.sqrt(0)
+        c.weight = (to - from) * rand.to_d + from
       end
     end
     alias init_normalized! init_normalised!

diff --git a/lib/rann/neuron.rb b/lib/rann/neuron.rb
@@ -1,12 +1,13 @@
 require "securerandom"
 require "bigdecimal"
+require "bigdecimal/math"
 require "bigdecimal/util"
 
 module RANN
   class Neuron
     ACTIVATION_FUNCTIONS = {
-      sig:    ->(v){ 1.to_d.div(1 + Math::E.to_d.power(-v, RANN.d), RANN.d) },
-      tanh:   ->(v){ Math.tanh(v).to_d(RANN.d) },
+      sig:    ->(v){ 1.to_d / (1 + BigMath.E(BigDecimal.limit) ** -v) },
+      tanh:   ->(v){ Math.tanh(v).to_d(BigDecimal.limit) },
       relu:   ->(v){ [0.to_d, v].max },
       linear: ->(v){ v },
       step:   ->(v){ v > 0.5 ? 1.to_d : 0.to_d },

diff --git a/lib/rann/optimisers/adagrad.rb b/lib/rann/optimisers/adagrad.rb
@@ -12,9 +12,9 @@ def initialize opts = {}
       end
 
       def update grad, cid
-        @historical_gradient[cid] = @historical_gradient[cid] + grad.power(2, RANN.d)
+        @historical_gradient[cid] = @historical_gradient[cid] + grad ** 2
 
-        grad.mult(- @learning_rate.div(@fudge_factor + @historical_gradient[cid].sqrt(RANN.d), RANN.d), RANN.d)
+        grad * - @learning_rate / (@fudge_factor + @historical_gradient[cid].sqrt(0))
       end
 
       # anything that gets modified over the course of training

diff --git a/lib/rann/optimisers/rmsprop.rb b/lib/rann/optimisers/rmsprop.rb
@@ -13,9 +13,9 @@ def initialize opts = {}
       end
 
       def update grad, cid
-        @historical_gradient[cid] = @decay.mult(@historical_gradient[cid], RANN.d) + (1 - @decay).mult(grad.power(2, RANN.d), RANN.d)
+        @historical_gradient[cid] = @decay * @historical_gradient[cid] + (1 - @decay) * grad ** 2
 
-        grad.mult(- @learning_rate.div(@fudge_factor + @historical_gradient[cid].sqrt(RANN.d), RANN.d), RANN.d)
+        grad * - @learning_rate / (@fudge_factor + @historical_gradient[cid].sqrt(0))
       end
 
       # anything that gets modified over the course of training

diff --git a/lib/rann/product_neuron.rb b/lib/rann/product_neuron.rb
@@ -5,7 +5,7 @@ class ProductNeuron < Neuron
     attr_accessor :intermediate
 
     def set_value!
-      @intermediate = incoming.reduce{ |i, m| m.mult(i, RANN.d) }
+      @intermediate = incoming.reduce{ |i, m| m * i }
       self.value    = ACTIVATION_FUNCTIONS[activation_function].call @intermediate
     end
   end