From f86aba1d0da2664f87d89a515b2686d610f30107 Mon Sep 17 00:00:00 2001 From: Denis Bernard Date: Fri, 8 May 2020 03:43:34 +0200 Subject: [PATCH] dec: implement add, sub, div, and modW. Import nat_test.go from stdlib => dec_test.go. --- dec.go | 345 +++++++++++++++++-- dec_arith.go | 82 ++++- dec_arith_test.go | 20 +- dec_conv.go | 5 + dec_conv_test.go | 43 +-- dec_test.go | 833 ++++++++++++++++++++++++++++++++++++++++++---- decimal_test.go | 3 - stdlib.go | 14 +- 8 files changed, 1212 insertions(+), 133 deletions(-) diff --git a/dec.go b/dec.go index dd3fcec..fc51b65 100644 --- a/dec.go +++ b/dec.go @@ -14,9 +14,10 @@ const ( _WD = _W * 30103 / 100000 // Decimal base for a word. 1e9 for 32 bits words and 1e19 for 64 bits // words. - // TODO(db47h): We want this value to be a const. This is a dirty hack to - // avoid conditional compilation that will break if bits.UintSize>64 + // We want this value to be a const. This is a dirty hack to avoid + // conditional compilation; it will break if bits.UintSize != 32 or 64 _BD = 9999999998000000000*(_WD/19) + 1000000000*(_WD/9) + _MD = _BD - 1 ) // dec is an unsigned integer x of the form @@ -34,6 +35,7 @@ type dec []Word var ( decOne = dec{1} + decTen = dec{10} ) func (z dec) clear() { @@ -158,6 +160,60 @@ func (x dec) sticky(i uint) uint { return 0 } +func (z dec) add(x, y dec) dec { + m := len(x) + n := len(y) + + switch { + case m < n: + return z.add(y, x) + case m == 0: + // n == 0 because m >= n; result is 0 + return z[:0] + case n == 0: + // result is x + return z.set(x) + } + // m > 0 + + z = z.make(m + 1) + c := add10VV(z[0:n], x, y) + if m > n { + c = add10VW(z[n:m], x[n:], c) + } + z[m] = c + + return z.norm() +} + +func (z dec) sub(x, y dec) dec { + m := len(x) + n := len(y) + + switch { + case m < n: + panic("underflow") + case m == 0: + // n == 0 because m >= n; result is 0 + return z[:0] + case n == 0: + // result is x + return z.set(x) + } + // m > 0 + + z = z.make(m) + c := sub10VV(z[0:n], x, y) + if m > n { + c = sub10VW(z[n:], x[n:], c) + } + if c != 0 { + panic("underflow") + } + + return z.norm() +} + func (x dec) cmp(y dec) (r int) { m := len(x) n := len(y) @@ -205,6 +261,166 @@ func (z dec) divW(x dec, y Word) (q dec, r Word) { return } +func (z dec) div(z2, u, v dec) (q, r dec) { + if len(v) == 0 { + panic("division by zero") + } + + if u.cmp(v) < 0 { + q = z[:0] + r = z2.set(u) + return + } + + if len(v) == 1 { + var r2 Word + q, r2 = z.divW(u, v[0]) + r = z2.setWord(r2) + return + } + + q, r = z.divLarge(z2, u, v) + return +} + +// getDec returns a *dec of len n. The contents may not be zero. +// The pool holds *dec to avoid allocation when converting to interface{}. +func getDec(n int) *dec { + var z *dec + if v := decPool.Get(); v != nil { + z = v.(*dec) + } + if z == nil { + z = new(dec) + } + *z = z.make(n) + return z +} + +func putDec(x *dec) { + decPool.Put(x) +} + +var decPool sync.Pool + +// q = (uIn-r)/vIn, with 0 <= r < vIn +// Uses z as storage for q, and u as storage for r if possible. +// See Knuth, Volume 2, section 4.3.1, Algorithm D. +// Preconditions: +// len(vIn) >= 2 +// len(uIn) >= len(vIn) +// u must not alias z +func (z dec) divLarge(u, uIn, vIn dec) (q, r dec) { + n := len(vIn) + m := len(uIn) + + // D1. + d := _BD / (vIn[n-1] + 1) + // do not modify vIn, it may be used by another goroutine simultaneously + vp := getDec(n) + v := *vp + mulAdd10VWW(v, vIn, d, 0) + + // u may safely alias uIn or vIn, the value of uIn is used to set u and vIn was already used + u = u.make(m + 1) + u[m] = mulAdd10VWW(u[:m], uIn, d, 0) + + // z may safely alias uIn or vIn, both values were used already + if alias(z, u) { + z = nil // z is an alias for u - cannot reuse + } + q = z.make(m - n + 1) + + // TODO(db47h): implement divRecursive + // if n < divRecursiveThreshold { + q.divBasic(u, v) + // } else { + // q.divRecursive(u, v) + // } + putDec(vp) + + q = q.norm() + r, _ = u.divW(u, d) + r = r.norm() + return q, r +} + +// divBasic performs word-by-word division of u by v. +// The quotient is written in pre-allocated q. +// The remainder overwrites input u. +// +// Precondition: +// - len(q) >= len(u)-len(v) +func (q dec) divBasic(u, v dec) { + n := len(v) + m := len(u) - n + + qhatvp := getDec(n + 1) + qhatv := *qhatvp + // D2. + vn1 := v[n-1] + for j := m; j >= 0; j-- { + // D3. + qhat := Word(_MD) + var ujn Word + if j+n < len(u) { + ujn = u[j+n] + } + if ujn != vn1 { + var rhat Word + qhat, rhat = div10WW(ujn, u[j+n-1], vn1) + // x1 | x2 = q̂v_{n-2} + vn2 := v[n-2] + x1, x2 := mul10WW(qhat, vn2) + // test if q̂v_{n-2} > br̂ + u_{j+n-2} + ujn2 := u[j+n-2] + for greaterThan(x1, x2, rhat, ujn2) { + qhat-- + prevRhat := rhat + rhat += vn1 + // v[n-1] >= 0, so this tests for overflow. + if rhat < prevRhat { + break + } + x1, x2 = mul10WW(qhat, vn2) + } + } + + // D4. + qhatv[n] = mulAdd10VWW(qhatv[0:n], v, qhat, 0) + qhl := len(qhatv) + if j+qhl > len(u) && qhatv[n] == 0 { + qhl-- + } + c := sub10VV(u[j:j+qhl], u[j:], qhatv) + if c != 0 { + c := add10VV(u[j:j+n], u[j:], v) + u[j+n] += c + qhat-- + } + + if j == m && m == len(q) && qhat == 0 { + continue + } + q[j] = qhat + } + + putDec(qhatvp) +} + +// greaterThan reports whether (x1*_BD + x2) > (y1*_BD + y2) +func greaterThan(x1, x2, y1, y2 Word) bool { + return x1 > y1 || x1 == y1 && x2 > y2 +} + +// modW returns x % d. +func (x dec) modW(d Word) (r Word) { + for i := len(x) - 1; i >= 0; i-- { + _, r = div10WW(r, x[i], d) + } + return r +} + func (z dec) mulAddWW(x dec, y, r Word) dec { m := len(x) if m == 0 || y == 0 { @@ -267,26 +483,6 @@ func (z dec) shr(x dec, s uint) dec { return z.norm() } -// getDec returns a *dec of len n. The contents may not be zero. -// The pool holds *dec to avoid allocation when converting to interface{}. -func getDec(n int) *dec { - var z *dec - if v := decPool.Get(); v != nil { - z = v.(*dec) - } - if z == nil { - z = new(dec) - } - *z = z.make(n) - return z -} - -func putDec(x *dec) { - decPool.Put(x) -} - -var decPool sync.Pool - // Operands that are shorter than basicSqrThreshold are squared using // "grade school" multiplication; for operands longer than karatsubaSqrThreshold // we use the Karatsuba algorithm optimized for x == y. @@ -448,3 +644,108 @@ func (z dec) mul(x, y dec) dec { // return z.norm() } + +func (z dec) expNN(x, y, m dec) dec { + panic("not implemented") +} + +// // If m != 0 (i.e., len(m) != 0), expNN sets z to x**y mod m; +// // otherwise it sets z to x**y. The result is the value of z. +// func (z dec) expNN(x, y, m dec) dec { +// if alias(z, x) || alias(z, y) { +// // We cannot allow in-place modification of x or y. +// z = nil +// } + +// // x**y mod 1 == 0 +// if len(m) == 1 && m[0] == 1 { +// return z.setWord(0) +// } +// // m == 0 || m > 1 + +// // x**0 == 1 +// if len(y) == 0 { +// return z.setWord(1) +// } +// // y > 0 + +// // x**1 mod m == x mod m +// if len(y) == 1 && y[0] == 1 && len(m) != 0 { +// _, z = dec(nil).div(z, x, m) +// return z +// } +// // y > 1 + +// if len(m) != 0 { +// // We likely end up being as long as the modulus. +// z = z.make(len(m)) +// } +// z = z.set(x) + +// // If the base is non-trivial and the exponent is large, we use +// // 4-bit, windowed exponentiation. This involves precomputing 14 values +// // (x^2...x^15) but then reduces the number of multiply-reduces by a +// // third. Even for a 32-bit exponent, this reduces the number of +// // operations. Uses Montgomery method for odd moduli. +// if x.cmp(decOne) > 0 && len(y) > 1 && len(m) > 0 { +// if m[0]&1 == 1 { +// return z.expNNMontgomery(x, y, m) +// } +// return z.expNNWindowed(x, y, m) +// } + +// v := y[len(y)-1] // v > 0 because y is normalized and y > 0 +// shift := nlz(v) + 1 +// v <<= shift +// var q nat + +// const mask = 1 << (_W - 1) + +// // We walk through the bits of the exponent one by one. Each time we +// // see a bit, we square, thus doubling the power. If the bit is a one, +// // we also multiply by x, thus adding one to the power. + +// w := _W - int(shift) +// // zz and r are used to avoid allocating in mul and div as +// // otherwise the arguments would alias. +// var zz, r nat +// for j := 0; j < w; j++ { +// zz = zz.sqr(z) +// zz, z = z, zz + +// if v&mask != 0 { +// zz = zz.mul(z, x) +// zz, z = z, zz +// } + +// if len(m) != 0 { +// zz, r = zz.div(r, z, m) +// zz, r, q, z = q, z, zz, r +// } + +// v <<= 1 +// } + +// for i := len(y) - 2; i >= 0; i-- { +// v = y[i] + +// for j := 0; j < _W; j++ { +// zz = zz.sqr(z) +// zz, z = z, zz + +// if v&mask != 0 { +// zz = zz.mul(z, x) +// zz, z = z, zz +// } + +// if len(m) != 0 { +// zz, r = zz.div(r, z, m) +// zz, r, q, z = q, z, zz, r +// } + +// v <<= 1 +// } +// } + +// return z.norm() +// } diff --git a/dec_arith.go b/dec_arith.go index db31db8..9041faf 100644 --- a/dec_arith.go +++ b/dec_arith.go @@ -45,7 +45,7 @@ func decDigits32(x uint) (n uint) { return n } -func nldz(x Word) uint { +func nlz10(x Word) uint { if bits.UintSize == 32 { return _WD - decDigits32(uint(x)) } @@ -159,34 +159,41 @@ func decMaxPow(b Word) (p Word, n int) { // add10VW adds y to x. The resulting carry c is either 0 or 1. func add10VW(z, x dec, y Word) (c Word) { - z[0], c = add10WW(x[0], y) + z[0], c = add10WWW(x[0], y, 0) // propagate carry for i := 1; i < len(z) && i < len(x); i++ { s := x[i] + c - if s >= _BD { - z[i] = 0 - continue - } - // c = 0 from this point - z[i] = s - // copy remaining digits if not adding in-place - if !same(z, x) { + if s < _BD { + z[i] = s + // copy remaining digits copy(z[i+1:], x[i+1:]) + return 0 } - return 0 + z[i] = 0 } - return c + return } func div10WVW(z []Word, xn Word, x []Word, y Word) (r Word) { r = xn for i := len(z) - 1; i >= 0; i-- { - h, l := mulAddWWW(r, _BD, x[i]) - z[i], r = divWW(h, l, y) + z[i], r = div10WW(r, x[i], y) } return } +// q = (u1<<_W + u0 - r)/v +func div10WW(u1, u0, v Word) (q, r Word) { + // q < _BD if u1 < v < _BD + if debugDecimal && !(u1 < v && v < _BD) { + panic("decimal overflow") + } + // convert to base 2 + hi, lo := mulAddWWW(u1, _BD, u0) + // q = (u-r)/v. Since v < _BD => r < _BD + return divWW(hi, lo, v) +} + func mulAdd10VWW(z, x []Word, y, r Word) (c Word) { c = r // The comment near the top of this file discusses this for loop condition. @@ -212,11 +219,11 @@ func mul10WW(x, y Word) (z1, z0 Word) { return Word(hi), Word(lo) } -func add10WW(x, y Word) (s, c Word) { - r, cc := bits.Add(uint(x), uint(y), 0) - if r >= _BD { - r -= _BD +func add10WWW(x, y, cIn Word) (s, c Word) { + r, cc := bits.Add(uint(x), uint(y), uint(cIn)) + if cc != 0 || r >= _BD { cc = 1 + r -= _BD } return Word(r), Word(cc) } @@ -231,3 +238,42 @@ func addMul10VVW(z, x []Word, y Word) (c Word) { } return } + +// The resulting carry c is either 0 or 1. +func add10VV(z, x, y []Word) (c Word) { + for i := 0; i < len(z) && i < len(x) && i < len(y); i++ { + z[i], c = add10WWW(x[i], y[i], c) + } + return +} + +func sub10WWW(x, y, b Word) (d, c Word) { + dd, cc := bits.Sub(uint(x), uint(y), uint(b)) + if cc != 0 { + dd += _BD + } + return Word(dd), Word(cc) +} + +// The resulting carry c is either 0 or 1. +func sub10VV(z, x, y []Word) (c Word) { + for i := 0; i < len(z) && i < len(x) && i < len(y); i++ { + z[i], c = sub10WWW(x[i], y[i], c) + } + return +} + +func sub10VW(z, x []Word, y Word) (c Word) { + c = y + for i := 0; i < len(z) && i < len(x); i++ { + zi, cc := bits.Sub(uint(x[i]), uint(c), 0) + c = Word(cc) + if c == 0 { + z[i] = Word(zi) + copy(z[i+1:], x[i+1:]) + return + } + z[i] = Word(zi + _BD) + } + return +} diff --git a/dec_arith_test.go b/dec_arith_test.go index 9a3a597..13f764b 100644 --- a/dec_arith_test.go +++ b/dec_arith_test.go @@ -1,11 +1,9 @@ package decimal import ( - "math/rand" "reflect" "strconv" "testing" - "time" ) func TestAdd10VW(t *testing.T) { @@ -38,9 +36,8 @@ func TestAdd10VW(t *testing.T) { } func TestDecDigits(t *testing.T) { - rand.Seed(time.Now().UnixNano()) for i := 0; i < 10000; i++ { - n := uint(rand.Uint64()) + n := uint(rnd.Uint64()) d := uint(0) for m := n; m != 0; m /= 10 { d++ @@ -52,8 +49,19 @@ func TestDecDigits(t *testing.T) { } func BenchmarkDecDigits(b *testing.B) { - rand.Seed(0xdeadbeefbadf00d) for i := 0; i < b.N; i++ { - benchU = decDigits(uint(rand.Uint64()) % _BD) + benchU = decDigits(uint(rnd.Uint64()) % _BD) } } + +func rnd10W() Word { + return Word(rnd.Uint64() % _BD) +} + +func rnd10V(n int) []Word { + v := make([]Word, n) + for i := range v { + v[i] = rnd10W() + } + return v +} diff --git a/dec_conv.go b/dec_conv.go index fe3c417..9201723 100644 --- a/dec_conv.go +++ b/dec_conv.go @@ -271,6 +271,11 @@ type decDivisor struct { ndigits int // digit length of divisor in terms of output base digits } +// expWW computes x**y +func (z dec) expWW(x, y Word) dec { + return z.expNN(dec(nil).setWord(x), dec(nil).setWord(y), nil) +} + func divisors(m int, b Word, ndigits int, bb Word) []decDivisor { return nil } diff --git a/dec_conv_test.go b/dec_conv_test.go index 6790c46..4f87f4e 100644 --- a/dec_conv_test.go +++ b/dec_conv_test.go @@ -5,6 +5,7 @@ package decimal import ( + "fmt" "io" "math" "strings" @@ -360,27 +361,27 @@ func BenchmarkDecStringPiParallel(b *testing.B) { // } // } -// func BenchmarkString(b *testing.B) { -// const x = 10 -// for _, base := range []int{2, 8, 10, 16} { -// for _, y := range []Word{10, 100, 1000, 10000, 100000} { -// if isRaceBuilder && y > 1000 { -// continue -// } -// b.Run(fmt.Sprintf("%d/Base%d", y, base), func(b *testing.B) { -// b.StopTimer() -// var z dec -// z = z.expWW(x, y) -// z.utoa(base) // warm divisor cache -// b.StartTimer() - -// for i := 0; i < b.N; i++ { -// _ = z.utoa(base) -// } -// }) -// } -// } -// } +func BenchmarkString(b *testing.B) { + const x = 10 + for _, base := range []int{2, 8, 10, 16} { + for _, y := range []Word{10, 100, 1000, 10000, 100000} { + if isRaceBuilder && y > 1000 { + continue + } + b.Run(fmt.Sprintf("%d/Base%d", y, base), func(b *testing.B) { + b.StopTimer() + var z dec + z = z.expWW(x, y) + z.utoa(base) // warm divisor cache + b.StartTimer() + + for i := 0; i < b.N; i++ { + _ = z.utoa(base) + } + }) + } + } +} // func BenchmarkLeafSize(b *testing.B) { // for n := 0; n <= 16; n++ { diff --git a/dec_test.go b/dec_test.go index ce82d08..29bc5d1 100644 --- a/dec_test.go +++ b/dec_test.go @@ -1,79 +1,788 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + package decimal import ( - "math/bits" - "math/rand" - "strconv" + "fmt" + "runtime" + "strings" "testing" - "time" ) -func TestDec_ntz(t *testing.T) { - rand.Seed(time.Now().UnixNano()) - for i := 0; i < 10000; i++ { - w := uint(rand.Uint64()) % _BD - e := uint(rand.Intn(_WD + 1)) - h, l := bits.Mul(w, pow10(e)) - h, l = bits.Div(h, l, _BD) - d := dec{Word(l), Word(h)}.norm() - // adjust e if w == 0 or w%10 == 0 - if w == 0 { - e = 0 - } else { - e += decTrailingZeros(w) - } - if d.ntz() != e { - t.Fatalf("dec{%v}.ntz() = %d, expected %d", d, d.ntz(), e) - } - } -} - -func TestDec_digit(t *testing.T) { - data := []struct { - d dec - n uint - r uint - }{ - {dec{123}, 0, 3}, - {dec{123}, 2, 1}, - {dec{123}, 3, 0}, - {dec{0, 1234567891234567891}, 37, 1}, - {dec{0, 1234567891234567891}, 36, 2}, - {dec{0, 1234567891234567891}, 38, 0}, - } - for di, d := range data { - t.Run(strconv.Itoa(di), func(t *testing.T) { - if dig := d.d.digit(d.n); dig != d.r { - t.Fatalf("%v.digit(%d) = %d, expected %d", d.d, d.n, dig, d.r) - } - }) +var cmpTests = []struct { + x, y dec + r int +}{ + {nil, nil, 0}, + {nil, dec(nil), 0}, + {dec(nil), nil, 0}, + {dec(nil), dec(nil), 0}, + {dec{0}, dec{0}, 0}, + {dec{0}, dec{1}, -1}, + {dec{1}, dec{0}, 1}, + {dec{1}, dec{1}, 0}, + {dec{0, _MD}, dec{1}, 1}, + {dec{1}, dec{0, _MD}, -1}, + {dec{1, _MD}, dec{0, _MD}, 1}, + {dec{0, _MD}, dec{1, _MD}, -1}, + {dec{16, 571956, 8794, 68}, dec{837, 9146, 1, 754489}, -1}, + {dec{34986, 41, 105, 1957}, dec{56, 7458, 104, 1957}, 1}, +} + +func TestCmp(t *testing.T) { + for i, a := range cmpTests { + r := a.x.cmp(a.y) + if r != a.r { + t.Errorf("#%d got r = %v; want %v", i, r, a.r) + } + } +} + +type funNN func(z, x, y dec) dec +type argNN struct { + z, x, y dec +} + +var sumNN = []argNN{ + {}, + {dec{1}, nil, dec{1}}, + {dec{1111111110}, dec{123456789}, dec{987654321}}, + {dec{0, 0, 0, 1}, nil, dec{0, 0, 0, 1}}, + {dec{0, 0, 0, 1111111110}, dec{0, 0, 0, 123456789}, dec{0, 0, 0, 987654321}}, + {dec{0, 0, 0, 1}, dec{0, 0, _MD}, dec{0, 0, 1}}, +} + +var prodNN = []argNN{ + {}, + {nil, nil, nil}, + {nil, dec{991}, nil}, + {dec{991}, dec{991}, dec{1}}, + {dec{991 * 991}, dec{991}, dec{991}}, + {dec{0, 0, 991 * 991}, dec{0, 991}, dec{0, 991}}, + {dec{1 * 991, 2 * 991, 3 * 991, 4 * 991}, dec{1, 2, 3, 4}, dec{991}}, + {dec{4, 11, 20, 30, 20, 11, 4}, dec{1, 2, 3, 4}, dec{4, 3, 2, 1}}, + // 3^100 * 3^28 = 3^128 + { + decFromString("11790184577738583171520872861412518665678211592275841109096961"), + decFromString("515377520732011331036461129765621272702107522001"), + decFromString("22876792454961"), + }, + // z = 111....1 (70000 digits) + // x = 10^(99*700) + ... + 10^1400 + 10^700 + 1 + // y = 111....1 (700 digits, larger than Karatsuba threshold on 32-bit and 64-bit) + { + decFromString(strings.Repeat("1", 70000)), + decFromString("1" + strings.Repeat(strings.Repeat("0", 699)+"1", 99)), + decFromString(strings.Repeat("1", 700)), + }, + // z = 111....1 (20000 digits) + // x = 10^10000 + 1 + // y = 111....1 (10000 digits) + { + decFromString(strings.Repeat("1", 20000)), + decFromString("1" + strings.Repeat("0", 9999) + "1"), + decFromString(strings.Repeat("1", 10000)), + }, +} + +func decFromString(s string) dec { + x, _, _, err := dec(nil).scan(strings.NewReader(s), 0, false) + if err != nil { + panic(err) + } + return x +} + +func TestSet(t *testing.T) { + for _, a := range sumNN { + z := dec(nil).set(a.z) + if z.cmp(a.z) != 0 { + t.Errorf("got z = %v; want %v", z, a.z) + } + } +} + +func testFunNN(t *testing.T, msg string, f funNN, a argNN) { + z := f(nil, a.x, a.y) + if z.cmp(a.z) != 0 { + t.Errorf("%s%+v\n\tgot z = %v; want %v", msg, a, z, a.z) + } +} + +func TestFunNN(t *testing.T) { + for _, a := range sumNN { + arg := a + testFunNN(t, "add", dec.add, arg) + + arg = argNN{a.z, a.y, a.x} + testFunNN(t, "add symmetric", dec.add, arg) + + arg = argNN{a.x, a.z, a.y} + testFunNN(t, "sub", dec.sub, arg) + + arg = argNN{a.y, a.z, a.x} + testFunNN(t, "sub symmetric", dec.sub, arg) + } + + for _, a := range prodNN { + arg := a + testFunNN(t, "mul", dec.mul, arg) + + arg = argNN{a.z, a.y, a.x} + testFunNN(t, "mul symmetric", dec.mul, arg) + } +} + +// var mulRangesN = []struct { +// a, b uint64 +// prod string +// }{ +// {0, 0, "0"}, +// {1, 1, "1"}, +// {1, 2, "2"}, +// {1, 3, "6"}, +// {10, 10, "10"}, +// {0, 100, "0"}, +// {0, 1e9, "0"}, +// {1, 0, "1"}, // empty range +// {100, 1, "1"}, // empty range +// {1, 10, "3628800"}, // 10! +// {1, 20, "2432902008176640000"}, // 20! +// {1, 100, +// "933262154439441526816992388562667004907159682643816214685929" + +// "638952175999932299156089414639761565182862536979208272237582" + +// "51185210916864000000000000000000000000", // 100! +// }, +// } + +// func TestMulRangeN(t *testing.T) { +// for i, r := range mulRangesN { +// prod := string(nat(nil).mulRange(r.a, r.b).utoa(10)) +// if prod != r.prod { +// t.Errorf("#%d: got %s; want %s", i, prod, r.prod) +// } +// } +// } + +// allocBytes returns the number of bytes allocated by invoking f. +func allocBytes(f func()) uint64 { + var stats runtime.MemStats + runtime.ReadMemStats(&stats) + t := stats.TotalAlloc + f() + runtime.ReadMemStats(&stats) + return stats.TotalAlloc - t +} + +// TestMulUnbalanced tests that multiplying numbers of different lengths +// does not cause deep recursion and in turn allocate too much memory. +// Test case for issue 3807. +func TestMulUnbalanced(t *testing.T) { + defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(1)) + x := rndDec(50000) + y := rndDec(40) + allocSize := allocBytes(func() { + dec(nil).mul(x, y) + }) + inputSize := uint64(len(x)+len(y)) * _S + if ratio := allocSize / uint64(inputSize); ratio > 10 { + t.Errorf("multiplication uses too much memory (%d > %d times the size of inputs)", allocSize, ratio) + } +} + +// rndDec returns a random dec value >= 0 of (usually) n words in length. +// In extremely unlikely cases it may be smaller than n words if the top- +// most words are 0. +func rndDec(n int) dec { + return dec(rnd10V(n)).norm() +} + +// rndDec1 is like rndDec but the result is guaranteed to be > 0. +func rndDec1(n int) dec { + x := dec(rnd10V(n)).norm() + if len(x) == 0 { + x.setWord(1) } + return x } -func TestDecSetUint64(t *testing.T) { - data := []struct { - in uint64 - exp int32 - }{ - {_BD + 1, int32(_WD + 1)}, - {_BD - 1, int32(_WD)}, - {9999, 4}, +func BenchmarkMul(b *testing.B) { + mulx := rndDec(1e4) + muly := rndDec(1e4) + b.ResetTimer() + for i := 0; i < b.N; i++ { + var z dec + z.mul(mulx, muly) } +} + +func benchmarkDecMul(b *testing.B, nwords int) { + x := rndDec(nwords) + y := rndDec(nwords) var z dec - for _, d := range data { - out := fmt.Sprintf("%d", d.in) - t.Run(out, func(t *testing.T) { - var exp int32 - z, exp = z.setUint64(d.in) - if a := string(z.utoa(10)); a != out { - t.Fatalf("expected mantissa %v, got %v", out, a) + b.ResetTimer() + for i := 0; i < b.N; i++ { + z.mul(x, y) + } +} + +var mulBenchSizes = []int{10, 100, 1000, 10000, 100000} + +func BenchmarkDecMul(b *testing.B) { + for _, n := range mulBenchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + benchmarkDecMul(b, n) + }) + } +} + +func TestNLZ(t *testing.T) { + var x Word = _MD + for i := 0; i <= _WD; i++ { + if int(nlz10(x)) != i { + t.Errorf("failed at %x: got %d want %d", x, nlz10(x), i) + } + x /= 10 + } +} + +type shiftTest struct { + in dec + shift uint + out dec +} + +var leftShiftTests = []shiftTest{ + {nil, 0, nil}, + {nil, 1, nil}, + {decOne, 0, decOne}, + {decOne, 1, decTen}, + {dec{_BD / 10}, 1, dec{0}}, + {dec{_BD / 10, 0}, 1, dec{0, 1}}, +} + +func TestShiftLeft(t *testing.T) { + for i, test := range leftShiftTests { + var z dec + z = z.shl(test.in, test.shift) + for j, d := range test.out { + if j >= len(z) || z[j] != d { + t.Errorf("#%d: got: %v want: %v", i, z, test.out) + break } - if exp != d.exp { - t.Fatalf("expected exponent %v, got %v", d.exp, exp) + } + } +} + +var rightShiftTests = []shiftTest{ + {nil, 0, nil}, + {nil, 1, nil}, + {decOne, 0, decOne}, + {decOne, 1, nil}, + {decTen, 1, decOne}, + {dec{0, 1}, 1, dec{_BD / 10}}, + {dec{10, 1, 1}, 1, dec{_BD/10 + 1, _BD / 10}}, +} + +func TestShiftRight(t *testing.T) { + for i, test := range rightShiftTests { + var z dec + z = z.shr(test.in, test.shift) + for j, d := range test.out { + if j >= len(z) || z[j] != d { + t.Errorf("#%d: got: %v want: %v", i, z, test.out) + break } - if l := (exp + _WD - 1) / _WD; l != int32(len(z)) { - t.Fatalf("expected length %v, got %v", l, len(z)) + } + } +} + +func BenchmarkZeroShifts(b *testing.B) { + x := rndDec(800) + + b.Run("Shl", func(b *testing.B) { + for i := 0; i < b.N; i++ { + var z dec + z.shl(x, 0) + } + }) + b.Run("ShlSame", func(b *testing.B) { + for i := 0; i < b.N; i++ { + x.shl(x, 0) + } + }) + + b.Run("Shr", func(b *testing.B) { + for i := 0; i < b.N; i++ { + var z dec + z.shr(x, 0) + } + }) + b.Run("ShrSame", func(b *testing.B) { + for i := 0; i < b.N; i++ { + x.shr(x, 0) + } + }) +} + +type modWTest struct { + in string + dividend string + out string +} + +var modWTests32 = []modWTest{ + {"23492635982634928349238759823742", "252341", "220170"}, +} + +var modWTests64 = []modWTest{ + {"6527895462947293856291561095690465243862946", "524326975699234", "375066989628668"}, +} + +func runModWTests(t *testing.T, tests []modWTest) { + for i, test := range tests { + in := decFromString(test.in) + d := decFromString(test.dividend) + out := decFromString(test.out) + + r := in.modW(d[0]) + if r != out[0] { + t.Errorf("#%d failed: got %d want %s", i, r, out.utoa(10)) + } + } +} + +func TestModW(t *testing.T) { + if _W >= 32 { + runModWTests(t, modWTests32) + } + if _W >= 64 { + runModWTests(t, modWTests64) + } +} + +// var montgomeryTests = []struct { +// x, y, m string +// k0 uint64 +// out32, out64 string +// }{ +// { +// "0xffffffffffffffffffffffffffffffffffffffffffffffffe", +// "0xffffffffffffffffffffffffffffffffffffffffffffffffe", +// "0xfffffffffffffffffffffffffffffffffffffffffffffffff", +// 1, +// "0x1000000000000000000000000000000000000000000", +// "0x10000000000000000000000000000000000", +// }, +// { +// "0x000000000ffffff5", +// "0x000000000ffffff0", +// "0x0000000010000001", +// 0xff0000000fffffff, +// "0x000000000bfffff4", +// "0x0000000003400001", +// }, +// { +// "0x0000000080000000", +// "0x00000000ffffffff", +// "0x1000000000000001", +// 0xfffffffffffffff, +// "0x0800000008000001", +// "0x0800000008000001", +// }, +// { +// "0x0000000080000000", +// "0x0000000080000000", +// "0xffffffff00000001", +// 0xfffffffeffffffff, +// "0xbfffffff40000001", +// "0xbfffffff40000001", +// }, +// { +// "0x0000000080000000", +// "0x0000000080000000", +// "0x00ffffff00000001", +// 0xfffffeffffffff, +// "0xbfffff40000001", +// "0xbfffff40000001", +// }, +// { +// "0x0000000080000000", +// "0x0000000080000000", +// "0x0000ffff00000001", +// 0xfffeffffffff, +// "0xbfff40000001", +// "0xbfff40000001", +// }, +// { +// "0x3321ffffffffffffffffffffffffffff00000000000022222623333333332bbbb888c0", +// "0x3321ffffffffffffffffffffffffffff00000000000022222623333333332bbbb888c0", +// "0x33377fffffffffffffffffffffffffffffffffffffffffffff0000000000022222eee1", +// 0xdecc8f1249812adf, +// "0x04eb0e11d72329dc0915f86784820fc403275bf2f6620a20e0dd344c5cd0875e50deb5", +// "0x0d7144739a7d8e11d72329dc0915f86784820fc403275bf2f61ed96f35dd34dbb3d6a0", +// }, +// { +// "0x10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffffffffffff00000000000022222223333333333444444444", +// "0x10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffffffffffff999999999999999aaabbbbbbbbcccccccccccc", +// "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff33377fffffffffffffffffffffffffffffffffffffffffffff0000000000022222eee1", +// 0xdecc8f1249812adf, +// "0x5c0d52f451aec609b15da8e5e5626c4eaa88723bdeac9d25ca9b961269400410ca208a16af9c2fb07d7a11c7772cba02c22f9711078d51a3797eb18e691295293284d988e349fa6deba46b25a4ecd9f715", +// "0x92fcad4b5c0d52f451aec609b15da8e5e5626c4eaa88723bdeac9d25ca9b961269400410ca208a16af9c2fb07d799c32fe2f3cc5422f9711078d51a3797eb18e691295293284d8f5e69caf6decddfe1df6", +// }, +// } + +// func TestMontgomery(t *testing.T) { +// one := NewInt(1) +// _B := new(Int).Lsh(one, _W) +// for i, test := range montgomeryTests { +// x := decFromString(test.x) +// y := decFromString(test.y) +// m := decFromString(test.m) +// for len(x) < len(m) { +// x = append(x, 0) +// } +// for len(y) < len(m) { +// y = append(y, 0) +// } + +// if x.cmp(m) > 0 { +// _, r := nat(nil).div(nil, x, m) +// t.Errorf("#%d: x > m (0x%s > 0x%s; use 0x%s)", i, x.utoa(16), m.utoa(16), r.utoa(16)) +// } +// if y.cmp(m) > 0 { +// _, r := nat(nil).div(nil, x, m) +// t.Errorf("#%d: y > m (0x%s > 0x%s; use 0x%s)", i, y.utoa(16), m.utoa(16), r.utoa(16)) +// } + +// var out nat +// if _W == 32 { +// out = decFromString(test.out32) +// } else { +// out = decFromString(test.out64) +// } + +// // t.Logf("#%d: len=%d\n", i, len(m)) + +// // check output in table +// xi := &Int{abs: x} +// yi := &Int{abs: y} +// mi := &Int{abs: m} +// p := new(Int).Mod(new(Int).Mul(xi, new(Int).Mul(yi, new(Int).ModInverse(new(Int).Lsh(one, uint(len(m))*_W), mi))), mi) +// if out.cmp(p.abs.norm()) != 0 { +// t.Errorf("#%d: out in table=0x%s, computed=0x%s", i, out.utoa(16), p.abs.norm().utoa(16)) +// } + +// // check k0 in table +// k := new(Int).Mod(&Int{abs: m}, _B) +// k = new(Int).Sub(_B, k) +// k = new(Int).Mod(k, _B) +// k0 := Word(new(Int).ModInverse(k, _B).Uint64()) +// if k0 != Word(test.k0) { +// t.Errorf("#%d: k0 in table=%#x, computed=%#x\n", i, test.k0, k0) +// } + +// // check montgomery with correct k0 produces correct output +// z := nat(nil).montgomery(x, y, m, k0, len(m)) +// z = z.norm() +// if z.cmp(out) != 0 { +// t.Errorf("#%d: got 0x%s want 0x%s", i, z.utoa(16), out.utoa(16)) +// } +// } +// } + +// var expNNTests = []struct { +// x, y, m string +// out string +// }{ +// {"0", "0", "0", "1"}, +// {"0", "0", "1", "0"}, +// {"1", "1", "1", "0"}, +// {"2", "1", "1", "0"}, +// {"2", "2", "1", "0"}, +// {"10", "100000000000", "1", "0"}, +// {"0x8000000000000000", "2", "", "0x40000000000000000000000000000000"}, +// {"0x8000000000000000", "2", "6719", "4944"}, +// {"0x8000000000000000", "3", "6719", "5447"}, +// {"0x8000000000000000", "1000", "6719", "1603"}, +// {"0x8000000000000000", "1000000", "6719", "3199"}, +// { +// "2938462938472983472983659726349017249287491026512746239764525612965293865296239471239874193284792387498274256129746192347", +// "298472983472983471903246121093472394872319615612417471234712061", +// "29834729834729834729347290846729561262544958723956495615629569234729836259263598127342374289365912465901365498236492183464", +// "23537740700184054162508175125554701713153216681790245129157191391322321508055833908509185839069455749219131480588829346291", +// }, +// { +// "11521922904531591643048817447554701904414021819823889996244743037378330903763518501116638828335352811871131385129455853417360623007349090150042001944696604737499160174391019030572483602867266711107136838523916077674888297896995042968746762200926853379", +// "426343618817810911523", +// "444747819283133684179", +// "42", +// }, +// } + +// func TestExpNN(t *testing.T) { +// for i, test := range expNNTests { +// x := decFromString(test.x) +// y := decFromString(test.y) +// out := decFromString(test.out) + +// var m nat +// if len(test.m) > 0 { +// m = decFromString(test.m) +// } + +// z := nat(nil).expNN(x, y, m) +// if z.cmp(out) != 0 { +// t.Errorf("#%d got %s want %s", i, z.utoa(10), out.utoa(10)) +// } +// } +// } + +// func BenchmarkExp3Power(b *testing.B) { +// const x = 3 +// for _, y := range []Word{ +// 0x10, 0x40, 0x100, 0x400, 0x1000, 0x4000, 0x10000, 0x40000, 0x100000, 0x400000, +// } { +// b.Run(fmt.Sprintf("%#x", y), func(b *testing.B) { +// var z nat +// for i := 0; i < b.N; i++ { +// z.expWW(x, y) +// } +// }) +// } +// } + +// func fibo(n int) nat { +// switch n { +// case 0: +// return nil +// case 1: +// return nat{1} +// } +// f0 := fibo(0) +// f1 := fibo(1) +// var f2 nat +// for i := 1; i < n; i++ { +// f2 = f2.add(f0, f1) +// f0, f1, f2 = f1, f2, f0 +// } +// return f1 +// } + +// var fiboNums = []string{ +// "0", +// "55", +// "6765", +// "832040", +// "102334155", +// "12586269025", +// "1548008755920", +// "190392490709135", +// "23416728348467685", +// "2880067194370816120", +// "354224848179261915075", +// } + +// func TestFibo(t *testing.T) { +// for i, want := range fiboNums { +// n := i * 10 +// got := string(fibo(n).utoa(10)) +// if got != want { +// t.Errorf("fibo(%d) failed: got %s want %s", n, got, want) +// } +// } +// } + +// func BenchmarkFibo(b *testing.B) { +// for i := 0; i < b.N; i++ { +// fibo(1e0) +// fibo(1e1) +// fibo(1e2) +// fibo(1e3) +// fibo(1e4) +// fibo(1e5) +// } +// } + +// var bitTests = []struct { +// x string +// i uint +// want uint +// }{ +// {"0", 0, 0}, +// {"0", 1, 0}, +// {"0", 1000, 0}, + +// {"0x1", 0, 1}, +// {"0x10", 0, 0}, +// {"0x10", 3, 0}, +// {"0x10", 4, 1}, +// {"0x10", 5, 0}, + +// {"0x8000000000000000", 62, 0}, +// {"0x8000000000000000", 63, 1}, +// {"0x8000000000000000", 64, 0}, + +// {"0x3" + strings.Repeat("0", 32), 127, 0}, +// {"0x3" + strings.Repeat("0", 32), 128, 1}, +// {"0x3" + strings.Repeat("0", 32), 129, 1}, +// {"0x3" + strings.Repeat("0", 32), 130, 0}, +// } + +// func TestBit(t *testing.T) { +// for i, test := range bitTests { +// x := decFromString(test.x) +// if got := x.bit(test.i); got != test.want { +// t.Errorf("#%d: %s.bit(%d) = %v; want %v", i, test.x, test.i, got, test.want) +// } +// } +// } + +var stickyTests = []struct { + x string + i uint + want uint +}{ + {"0", 0, 0}, + {"0", 1, 0}, + {"0", 1000, 0}, + + {"1", 0, 0}, + {"1", 1, 1}, + + {"1001101010000", 0, 0}, + {"1001101010000", 4, 0}, + {"1001101010000", 5, 1}, + + {"1000000000000000000", 18, 0}, + {"1000000000000000000", 29, 1}, + + {"1" + strings.Repeat("0", 100), 100, 0}, + {"1" + strings.Repeat("0", 100), 101, 1}, +} + +func TestSticky(t *testing.T) { + for i, test := range stickyTests { + x := decFromString(test.x) + if got := x.sticky(test.i); got != test.want { + t.Errorf("#%d: %s.sticky(%d) = %v; want %v", i, test.x, test.i, got, test.want) + } + if test.want == 1 { + // all subsequent i's should also return 1 + for d := uint(1); d <= 3; d++ { + if got := x.sticky(test.i + d); got != 1 { + t.Errorf("#%d: %s.sticky(%d) = %v; want %v", i, test.x, test.i+d, got, 1) + } } + } + } +} + +func testSqr(t *testing.T, x dec) { + got := make(dec, 2*len(x)) + want := make(dec, 2*len(x)) + got = got.sqr(x) + want = want.mul(x, x) + if got.cmp(want) != 0 { + t.Errorf("basicSqr(%v), got %v, want %v", x, got, want) + } +} + +func TestSqr(t *testing.T) { + for _, a := range prodNN { + if a.x != nil { + testSqr(t, a.x) + } + if a.y != nil { + testSqr(t, a.y) + } + if a.z != nil { + testSqr(t, a.z) + } + } +} + +func benchmarkDecSqr(b *testing.B, nwords int) { + x := rndDec(nwords) + var z dec + b.ResetTimer() + for i := 0; i < b.N; i++ { + z.sqr(x) + } +} + +var sqrBenchSizes = []int{ + 1, 2, 3, 5, 8, 10, 20, 30, 50, 80, + 100, 200, 300, 500, 800, + 1000, 10000, 100000, +} + +func BenchmarkDecSqr(b *testing.B) { + for _, n := range sqrBenchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + benchmarkDecSqr(b, n) }) } } + +// func BenchmarkNatSetBytes(b *testing.B) { +// const maxLength = 128 +// lengths := []int{ +// // No remainder: +// 8, 24, maxLength, +// // With remainder: +// 7, 23, maxLength - 1, +// } +// n := make(nat, maxLength/_W) // ensure n doesn't need to grow during the test +// buf := make([]byte, maxLength) +// for _, l := range lengths { +// b.Run(fmt.Sprint(l), func(b *testing.B) { +// for i := 0; i < b.N; i++ { +// n.setBytes(buf[:l]) +// } +// }) +// } +// } + +func TestNatDiv(t *testing.T) { + sizes := []int{ + 1, 2, 5, 8, 15, 25, 40, 65, 100, + 200, 500, 800, 1500, 2500, 4000, 6500, 10000, + } + for _, i := range sizes { + for _, j := range sizes { + a := rndDec1(i) + b := rndDec1(j) + // the test requires b >= 2 + if len(b) == 1 && b[0] == 1 { + b[0] = 2 + } + // choose a remainder c < b + c := rndDec1(len(b)) + if len(c) == len(b) && c[len(c)-1] >= b[len(b)-1] { + c[len(c)-1] = 0 + c = c.norm() + } + // compute x = a*b+c + x := dec(nil).mul(a, b) + x = x.add(x, c) + + var q, r dec + q, r = q.div(r, x, b) + if q.cmp(a) != 0 { + t.Fatalf("wrong quotient: got %s; want %s for %s/%s", q.utoa(10), a.utoa(10), x.utoa(10), b.utoa(10)) + } + if r.cmp(c) != 0 { + t.Fatalf("wrong remainder: got %s; want %s for %s/%s", r.utoa(10), c.utoa(10), x.utoa(10), b.utoa(10)) + } + } + } +} diff --git a/decimal_test.go b/decimal_test.go index 8284923..6e2e58c 100644 --- a/decimal_test.go +++ b/decimal_test.go @@ -7,7 +7,6 @@ import ( "reflect" "strconv" "testing" - "time" ) var benchU uint @@ -32,7 +31,6 @@ var intData = []struct { } func TestDnorm(t *testing.T) { - rand.Seed(time.Now().UnixNano()) for i := 0; i < 10000; i++ { again: w := uint(rand.Uint64()) % _BD @@ -99,7 +97,6 @@ func TestDecimal_SetString(t *testing.T) { } func BenchmarkDnorm(b *testing.B) { - rand.Seed(0xdeadbeefbadf00d) d := dec(nil).make(1000) for i := range d { d[i] = Word(rand.Uint64()) % _BD diff --git a/stdlib.go b/stdlib.go index 6dfff78..64aedba 100644 --- a/stdlib.go +++ b/stdlib.go @@ -8,6 +8,7 @@ import ( "io" "math" "math/bits" + "math/rand" "strconv" ) @@ -86,7 +87,7 @@ func makeAcc(above bool) Accuracy { type Word uint const ( - // _S = _W / 8 // word size in bytes + _S = _W / 8 // word size in bytes _W = bits.UintSize // word size in bits // _B = 1 << _W // digit base @@ -140,6 +141,12 @@ func mulAddWWW(x, y, c Word) (z1, z0 Word) { return Word(hi + cc), Word(lo) } +// z1<<_W + z0 = x*y +func mulWW(x, y Word) (z1, z0 Word) { + hi, lo := bits.Mul(uint(x), uint(y)) + return Word(hi), Word(lo) +} + func same(x, y []Word) bool { return len(x) == len(y) && len(x) > 0 && &x[0] == &y[0] } @@ -272,3 +279,8 @@ type ErrNaN struct { func (err ErrNaN) Error() string { return err.msg } + +var rnd = rand.New(rand.NewSource(0)) + +// TODO(db47h): set this to false +const isRaceBuilder = true