Skip to content

Commit

Permalink
dec: add calibration tests and update thresholds
Browse files Browse the repository at this point in the history
  • Loading branch information
db47h committed May 22, 2020
1 parent 00e29bc commit af127d2
Show file tree
Hide file tree
Showing 3 changed files with 196 additions and 18 deletions.
28 changes: 21 additions & 7 deletions dec.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,20 @@ import (
"sync"
)

// The following thresholds are hugely different from their counterparts
// in math/big.

// Operands that are shorter than decKaratsubaThreshold are multiplied using
// "grade school" multiplication; for longer operands the Karatsuba algorithm
// is used.
var decKaratsubaThreshold = 30 // computed by calibrate_test.go

// Operands that are shorter than decBasicSqrThreshold are squared using
// "grade school" multiplication; for operands longer than karatsubaSqrThreshold
// we use the Karatsuba algorithm optimized for x == y.
var decBasicSqrThreshold = 10 // computed by calibrate_test.go
var decKaratsubaSqrThreshold = 50 // computed by calibrate_test.go

// dec is an unsigned integer x of the form
//
// x = x[n-1]*_BD^(n-1) + x[n-2]*_BD^(n-2) + ... + x[1]*_BD + x[0]
Expand Down Expand Up @@ -569,12 +583,12 @@ func (z dec) sqr(x dec) dec {
z = nil // z is an alias for x - cannot reuse
}

if n < basicSqrThreshold {
if n < decBasicSqrThreshold {
z = z.make(2 * n)
decBasicMul(z, x, x)
return z.norm()
}
if n < karatsubaSqrThreshold {
if n < decKaratsubaSqrThreshold {
z = z.make(2 * n)
decBasicSqr(z, x)
return z.norm()
Expand All @@ -584,7 +598,7 @@ func (z dec) sqr(x dec) dec {

// z = (x1*b + x0)^2 = x1^2*b^2 + 2*x1*x0*b + x0^2

k := karatsubaLen(n, karatsubaSqrThreshold)
k := karatsubaLen(n, decKaratsubaSqrThreshold)

x0 := x[0:k]
z = z.make(max(6*k, 2*n))
Expand Down Expand Up @@ -639,7 +653,7 @@ func decBasicSqr(z, x dec) {
func decKaratsubaSqr(z, x dec) {
n := len(x)

if n&1 != 0 || n < karatsubaSqrThreshold || n < 2 {
if n&1 != 0 || n < decKaratsubaSqrThreshold || n < 2 {
decBasicSqr(z[:2*n], x)
return
}
Expand Down Expand Up @@ -698,7 +712,7 @@ func (z dec) mul(x, y dec) dec {
}

// use basic multiplication if the numbers are small
if n < karatsubaThreshold {
if n < decKaratsubaThreshold {
z = z.make(m + n)
decBasicMul(z, x, y)
return z.norm()
Expand All @@ -711,7 +725,7 @@ func (z dec) mul(x, y dec) dec {
// y = yh*b + y0 (0 <= y0 < b)
// b = 10**(_DW*k) ("base" of digits xi, yi)
//
k := karatsubaLen(n, karatsubaThreshold)
k := karatsubaLen(n, decKaratsubaThreshold)
// k <= n

// // multiply x0 and y0 via Karatsuba
Expand Down Expand Up @@ -959,7 +973,7 @@ func decKaratsuba(z, x, y dec) {
// Switch to basic multiplication if numbers are odd or small.
// (n is always even if karatsubaThreshold is even, but be
// conservative)
if n&1 != 0 || n < karatsubaThreshold || n < 2 {
if n&1 != 0 || n < decKaratsubaThreshold || n < 2 {
decBasicMul(z, x, y)
return
}
Expand Down
175 changes: 175 additions & 0 deletions dec_calibrate_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Calibration used to determine thresholds for using
// different algorithms. Ideally, this would be converted
// to go generate to create thresholds.go

// This file prints execution times for the Mul benchmark
// given different Karatsuba thresholds. The result may be
// used to manually fine-tune the threshold constant. The
// results are somewhat fragile; use repeated runs to get
// a clear picture.

// Calculates lower and upper thresholds for when basicSqr
// is faster than standard multiplication.

// Usage: go test -run=TestDecCalibrate -v -calibrate -cpu 1
// Forcing a single logical CPU seems to yield more stable
// benchmarks.

package decimal

import (
"flag"
"fmt"
"testing"
"time"
)

var calibrate = flag.Bool("calibrate", false, "run calibration test")

const (
sqrModeMul = "mul(x, x)"
sqrModeBasic = "basicSqr(x)"
sqrModeKaratsuba = "karatsubaSqr(x)"
)

func TestDecCalibrate(t *testing.T) {
if !*calibrate {
return
}

computeKaratsubaThresholds()

// compute basicSqrThreshold where overhead becomes negligible
minSqr := computeSqrThreshold(5, 20, 1, 3, sqrModeMul, sqrModeBasic)
// compute karatsubaSqrThreshold where karatsuba is faster
maxSqr := computeSqrThreshold(30, 300, 10, 3, sqrModeBasic, sqrModeKaratsuba)
if minSqr != 0 {
fmt.Printf("found basicSqrThreshold = %d\n", minSqr)
} else {
fmt.Println("no basicSqrThreshold found")
}
if maxSqr != 0 {
fmt.Printf("found karatsubaSqrThreshold = %d\n", maxSqr)
} else {
fmt.Println("no karatsubaSqrThreshold found")
}
}

func karatsubaLoad(b *testing.B) {
BenchmarkDecMul1e4(b)
}

// measureKaratsuba returns the time to run a Karatsuba-relevant benchmark
// given Karatsuba threshold th.
func measureKaratsuba(th int) time.Duration {
th, decKaratsubaThreshold = decKaratsubaThreshold, th
res := testing.Benchmark(karatsubaLoad)
decKaratsubaThreshold = th
return time.Duration(res.NsPerOp())
}

func computeKaratsubaThresholds() {
fmt.Printf("Multiplication times for varying Karatsuba thresholds\n")
fmt.Printf("(run repeatedly for good results)\n")

// determine Tk, the work load execution time using basic multiplication
Tb := measureKaratsuba(1e9) // th == 1e9 => Karatsuba multiplication disabled
fmt.Printf("Tb = %10s\n", Tb)

// thresholds
th := 4
th1 := -1
th2 := -1

var deltaOld time.Duration
for count := -1; count != 0 && th < 128; count-- {
// determine Tk, the work load execution time using Karatsuba multiplication
Tk := measureKaratsuba(th)

// improvement over Tb
delta := (Tb - Tk) * 100 / Tb

fmt.Printf("th = %3d Tk = %10s %4d%%", th, Tk, delta)

// determine break-even point
if Tk < Tb && th1 < 0 {
th1 = th
fmt.Print(" break-even point")
}

// determine diminishing return
if 0 < delta && delta < deltaOld && th2 < 0 {
th2 = th
fmt.Print(" diminishing return")
}
deltaOld = delta

fmt.Println()

// trigger counter
if th1 >= 0 && th2 >= 0 && count < 0 {
count = 10 // this many extra measurements after we got both thresholds
}

th++
}
}

func measureSqr(words, nruns int, mode string) time.Duration {
// more runs for better statistics
initBasicSqr, initKaratsubaSqr := decBasicSqrThreshold, decKaratsubaSqrThreshold

switch mode {
case sqrModeMul:
decBasicSqrThreshold = words + 1
case sqrModeBasic:
decBasicSqrThreshold, decKaratsubaSqrThreshold = words-1, words+1
case sqrModeKaratsuba:
decKaratsubaSqrThreshold = words - 1
}

var testval int64
for i := 0; i < nruns; i++ {
res := testing.Benchmark(func(b *testing.B) { benchmarkDecSqr(b, words) })
testval += res.NsPerOp()
}
testval /= int64(nruns)

decBasicSqrThreshold, decKaratsubaSqrThreshold = initBasicSqr, initKaratsubaSqr

return time.Duration(testval)
}

func computeSqrThreshold(from, to, step, nruns int, lower, upper string) int {
fmt.Printf("Calibrating threshold between %s and %s\n", lower, upper)
fmt.Printf("Looking for a timing difference for x between %d - %d words by %d step\n", from, to, step)
var initPos bool
var threshold int
for i := from; i <= to; i += step {
baseline := measureSqr(i, nruns, lower)
testval := measureSqr(i, nruns, upper)
pos := baseline > testval
delta := baseline - testval
percent := delta * 100 / baseline
fmt.Printf("words = %3d deltaT = %10s (%4d%%) is %s better: %v", i, delta, percent, upper, pos)
if i == from {
initPos = pos
}
if threshold == 0 && pos != initPos {
threshold = i
fmt.Printf(" threshold found")
}
fmt.Println()

}
if threshold != 0 {
fmt.Printf("Found threshold = %d between %d - %d\n", threshold, from, to)
} else {
fmt.Printf("Found NO threshold between %d - %d\n", from, to)
}
return threshold
}
11 changes: 0 additions & 11 deletions stdlib.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,17 +249,6 @@ type nat []Word

const divRecursiveThreshold = 100

// Operands that are shorter than karatsubaThreshold are multiplied using
// "grade school" multiplication; for longer operands the Karatsuba algorithm
// is used.
const karatsubaThreshold = 40 // computed by calibrate_test.go

// Operands that are shorter than basicSqrThreshold are squared using
// "grade school" multiplication; for operands longer than karatsubaSqrThreshold
// we use the Karatsuba algorithm optimized for x == y.
var basicSqrThreshold = 20 // computed by calibrate_test.go
var karatsubaSqrThreshold = 260 // computed by calibrate_test.go

// karatsubaLen computes an approximation to the maximum k <= n such that
// k = p/10**i for a number p <= threshold and an i >= 0. Thus, the
// result is the largest number that can be divided repeatedly by 10 before
Expand Down

0 comments on commit af127d2

Please sign in to comment.