From 5cfed50607d16d4adb0b58cc9e8f8ecc4acedfaf Mon Sep 17 00:00:00 2001 From: Denis Bernard Date: Sun, 10 May 2020 03:21:31 +0200 Subject: [PATCH] Implement division by constant _DB using multiplication Doubles the performance over bits.Div(n1, n0, _DB) on x86 and x86_64. While it theoretically benefits all base-_DB arithmetic primitives, it prevents them from being inlined, leading to diminishing returns until we start manipulating numbers > 100 Words. --- dec.go | 221 +++++++++++++++++++++++++++++++++++++++------- dec_arith.go | 101 +++++++++++++++++---- dec_arith_test.go | 39 ++++++-- dec_conv.go | 4 +- dec_test.go | 27 +++--- decimal.go | 20 ++--- decimal_conv.go | 10 +-- decimal_test.go | 23 +++-- 8 files changed, 347 insertions(+), 98 deletions(-) diff --git a/dec.go b/dec.go index da468de..282a5fa 100644 --- a/dec.go +++ b/dec.go @@ -8,18 +8,6 @@ import ( const debugDecimal = true -const ( - // _W * log10(2) = decimal digits per word. 9 decimal digits per 32 bits - // word and 19 per 64 bits word. - _WD = _W * 30103 / 100000 - // Decimal base for a word. 1e9 for 32 bits words and 1e19 for 64 bits - // words. - // We want this value to be a const. This is a dirty hack to avoid - // conditional compilation; it will break if bits.UintSize != 32 or 64 - _BD = 9999999998000000000*(_WD/19) + 1000000000*(_WD/9) - _MD = _BD - 1 -) - // dec is an unsigned integer x of the form // // x = x[n-1]*_BD^(n-1) + x[n-2]*_BD^(n-2) + ... + x[1]*_BD + x[0] @@ -56,7 +44,7 @@ func (z dec) norm() dec { // digits returns the number of digits of x. func (x dec) digits() uint { if i := len(x) - 1; i >= 0 { - return uint(i*_WD) + decDigits(uint(x[i])) + return uint(i*_DW) + decDigits(uint(x[i])) } return 0 } @@ -64,14 +52,14 @@ func (x dec) digits() uint { func (x dec) ntz() uint { for i, w := range x { if w != 0 { - return uint(i)*_WD + decTrailingZeros(uint(w)) + return uint(i)*_DW + decTrailingZeros(uint(w)) } } return 0 } func (x dec) digit(i uint) uint { - j, i := bits.Div(0, i, _WD) + j, i := bits.Div(0, i, _DW) if j >= uint(len(x)) { return 0 } @@ -110,13 +98,13 @@ func (z dec) setWord(x Word) dec { func (z dec) setUint64(x uint64) (dec, int32) { dig := int32(decDigits64(x)) - if w := Word(x); uint64(w) == x && w < _BD { + if w := Word(x); uint64(w) == x && w < _DB { return z.setWord(w), dig } // x could be a 2 to 3 words value - z = z.make(int(dig+_WD-1) / _WD) + z = z.make(int(dig+_DW-1) / _DW) for i := 0; i < len(z); i++ { - hi, lo := bits.Div64(0, x, _BD) + hi, lo := bits.Div64(0, x, _DB) z[i] = Word(lo) x = hi } @@ -138,7 +126,7 @@ func (x dec) toNat(z []Word) []Word { // r = zz & _B; zz = zz >> _W var r Word for j := len(zz) - 1; j >= 0; j-- { - zz[j], r = mulAddWWW(r, _BD, zz[j]) + zz[j], r = mulAddWWW(r, _DB, zz[j]) } zz = zz.norm() z[i] = r @@ -156,7 +144,7 @@ func (z dec) setInt(x *big.Int) dec { b[i] = Word(bb[i]) } for i := 0; i < len(z); i++ { - z[i] = divWVW(b, 0, b, _BD) + z[i] = divWVW(b, 0, b, _DB) } z = z.norm() return z @@ -165,7 +153,7 @@ func (z dec) setInt(x *big.Int) dec { // sticky returns 1 if there's a non zero digit within the // i least significant digits, otherwise it returns 0. func (x dec) sticky(i uint) uint { - j, i := bits.Div(0, i, _WD) + j, i := bits.Div(0, i, _DW) if j >= uint(len(x)) { if len(x) == 0 { return 0 @@ -327,6 +315,8 @@ func putDec(x *dec) { var decPool sync.Pool +const divRecursiveThreshold = 100 + // q = (uIn-r)/vIn, with 0 <= r < vIn // Uses z as storage for q, and u as storage for r if possible. // See Knuth, Volume 2, section 4.3.1, Algorithm D. @@ -339,7 +329,7 @@ func (z dec) divLarge(u, uIn, vIn dec) (q, r dec) { m := len(uIn) // D1. - d := _BD / (vIn[n-1] + 1) + d := _DB / (vIn[n-1] + 1) // do not modify vIn, it may be used by another goroutine simultaneously vp := getDec(n) v := *vp @@ -356,11 +346,11 @@ func (z dec) divLarge(u, uIn, vIn dec) (q, r dec) { q = z.make(m - n + 1) // TODO(db47h): implement divRecursive - // if n < divRecursiveThreshold { - q.divBasic(u, v) - // } else { - // q.divRecursive(u, v) - // } + if n < divRecursiveThreshold { + q.divBasic(u, v) + } else { + q.divRecursive(u, v) + } putDec(vp) q = q.norm() @@ -385,7 +375,7 @@ func (q dec) divBasic(u, v dec) { vn1 := v[n-1] for j := m; j >= 0; j-- { // D3. - qhat := Word(_MD) + qhat := Word(_DMax) var ujn Word if j+n < len(u) { ujn = u[j+n] @@ -475,9 +465,9 @@ func (z dec) shl(x dec, s uint) dec { } // m > 0 - n := m + int(s/_WD) + n := m + int(s/_DW) z = z.make(n + 1) - z[n] = shl10VU(z[n-m:n], x, s%_WD) + z[n] = shl10VU(z[n-m:n], x, s%_DW) z[0 : n-m].clear() return z.norm() @@ -495,14 +485,14 @@ func (z dec) shr(x dec, s uint) dec { } m := len(x) - n := m - int(s/_WD) + n := m - int(s/_DW) if n <= 0 { return z[:0] } // n > 0 z = z.make(n) - shr10VU(z, x[m-n:], s%_WD) + shr10VU(z, x[m-n:], s%_DW) return z.norm() } @@ -780,3 +770,170 @@ func (z dec) expNN(x, y, m dec) dec { return z.norm() } + +// divRecursive performs word-by-word division of u by v. +// The quotient is written in pre-allocated z. +// The remainder overwrites input u. +// +// Precondition: +// - len(z) >= len(u)-len(v) +// +// See Burnikel, Ziegler, "Fast Recursive Division", Algorithm 1 and 2. +func (z dec) divRecursive(u, v dec) { + // Recursion depth is less than 2 log2(len(v)) + // Allocate a slice of temporaries to be reused across recursion. + recDepth := 2 * bits.Len(uint(len(v))) + // large enough to perform Karatsuba on operands as large as v + tmp := getDec(3 * len(v)) + temps := make([]*dec, recDepth) + z.clear() + z.divRecursiveStep(u, v, 0, tmp, temps) + for _, n := range temps { + if n != nil { + putDec(n) + } + } + putDec(tmp) +} + +// divRecursiveStep computes the division of u by v. +// - z must be large enough to hold the quotient +// - the quotient will overwrite z +// - the remainder will overwrite u +func (z dec) divRecursiveStep(u, v dec, depth int, tmp *dec, temps []*dec) { + u = u.norm() + v = v.norm() + + if len(u) == 0 { + z.clear() + return + } + n := len(v) + if n < divRecursiveThreshold { + z.divBasic(u, v) + return + } + m := len(u) - n + if m < 0 { + return + } + + // Produce the quotient by blocks of B words. + // Division by v (length n) is done using a length n/2 division + // and a length n/2 multiplication for each block. The final + // complexity is driven by multiplication complexity. + B := n / 2 + + // Allocate a nat for qhat below. + if temps[depth] == nil { + temps[depth] = getDec(n) + } else { + *temps[depth] = temps[depth].make(B + 1) + } + + j := m + for j > B { + // Divide u[j-B:j+n] by vIn. Keep remainder in u + // for next block. + // + // The following property will be used (Lemma 2): + // if u = u1 << s + u0 + // v = v1 << s + v0 + // then floor(u1/v1) >= floor(u/v) + // + // Moreover, the difference is at most 2 if len(v1) >= len(u/v) + // We choose s = B-1 since len(v)-B >= B+1 >= len(u/v) + s := (B - 1) + // Except for the first step, the top bits are always + // a division remainder, so the quotient length is <= n. + uu := u[j-B:] + + qhat := *temps[depth] + qhat.clear() + qhat.divRecursiveStep(uu[s:B+n], v[s:], depth+1, tmp, temps) + qhat = qhat.norm() + // Adjust the quotient: + // u = u_h << s + u_l + // v = v_h << s + v_l + // u_h = q̂ v_h + rh + // u = q̂ (v - v_l) + rh << s + u_l + // After the above step, u contains a remainder: + // u = rh << s + u_l + // and we need to subtract q̂ v_l + // + // But it may be a bit too large, in which case q̂ needs to be smaller. + qhatv := tmp.make(3 * n) + qhatv.clear() + qhatv = qhatv.mul(qhat, v[:s]) + for i := 0; i < 2; i++ { + e := qhatv.cmp(uu.norm()) + if e <= 0 { + break + } + sub10VW(qhat, qhat, 1) + c := sub10VV(qhatv[:s], qhatv[:s], v[:s]) + if len(qhatv) > s { + sub10VW(qhatv[s:], qhatv[s:], c) + } + decAddAt(uu[s:], v[s:], 0) + } + if qhatv.cmp(uu.norm()) > 0 { + panic("impossible") + } + c := sub10VV(uu[:len(qhatv)], uu[:len(qhatv)], qhatv) + if c > 0 { + sub10VW(uu[len(qhatv):], uu[len(qhatv):], c) + } + decAddAt(z, qhat, j-B) + j -= B + } + + // Now u < (v< 0 { + sub10VW(qhat, qhat, 1) + c := sub10VV(qhatv[:s], qhatv[:s], v[:s]) + if len(qhatv) > s { + sub10VW(qhatv[s:], qhatv[s:], c) + } + decAddAt(u[s:], v[s:], 0) + } + } + if qhatv.cmp(u.norm()) > 0 { + panic("impossible") + } + c := sub10VV(u[0:len(qhatv)], u[0:len(qhatv)], qhatv) + if c > 0 { + c = sub10VW(u[len(qhatv):], u[len(qhatv):], c) + } + if c > 0 { + panic("impossible") + } + + // Done! + decAddAt(z, qhat.norm(), 0) +} + +// addAt implements z += x*10**(_WD*i); z must be long enough. +// (we don't use dec.add because we need z to stay the same +// slice, and we don't need to normalize z after each addition) +func decAddAt(z, x dec, i int) { + if n := len(x); n > 0 { + if c := add10VV(z[i:i+n], z[i:], x); c != 0 { + j := i + n + if j < len(z) { + add10VW(z[j:], z[j:], c) + } + } + } +} diff --git a/dec_arith.go b/dec_arith.go index 34b479a..5f9e1ed 100644 --- a/dec_arith.go +++ b/dec_arith.go @@ -4,6 +4,21 @@ import ( "math/bits" ) +const ( + // _W * log10(2) = decimal digits per word. 9 decimal digits per 32 bits + // word and 19 per 64 bits word. + _DW = _W * 30103 / 100000 + // Decimal base for a word. 1e9 for 32 bits words and 1e19 for 64 bits + // words. + // We want this value to be a const. This is a dirty hack to avoid + // conditional compilation; it will break if bits.UintSize != 32 or 64 + _DB = 9999999998000000000*(_DW/19) + 1000000000*(_DW/9) + // Maximum value of a decimal Word + _DMax = _DB - 1 + // Bits per decimal Word: Log2(_DB)+1 = _DW * Log2(10) + 1 + _DWb = _DW*100000/30103 + 1 +) + var pow10s = [...]uint64{ 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 1000000000000, 10000000000000, 100000000000000, 1000000000000000, @@ -47,9 +62,9 @@ func decDigits32(x uint) (n uint) { func nlz10(x Word) uint { if bits.UintSize == 32 { - return _WD - decDigits32(uint(x)) + return _DW - decDigits32(uint(x)) } - return _WD - decDigits64(uint64(x)) + return _DW - decDigits64(uint64(x)) } // shl10VU sets z to x*(10**s), s < _WD @@ -61,7 +76,7 @@ func shl10VU(z, x dec, s uint) (r Word) { if len(z) == 0 || len(x) == 0 { return } - d, m := Word(pow10(_WD-s)), Word(pow10(s)) + d, m := Word(pow10(_DW-s)), Word(pow10(s)) var h, l Word r, l = divWW(0, x[len(x)-1], d) for i := len(z) - 1; i > 0; i-- { @@ -85,7 +100,7 @@ func shr10VU(z, x dec, s uint) (r Word) { } var h, l Word - d, m := Word(pow10(s)), Word(pow10(_WD-s)) + d, m := Word(pow10(s)), Word(pow10(_DW-s)) h, r = divWW(0, x[0], Word(d)) for i := 1; i < len(z) && i < len(x); i++ { t := h @@ -163,7 +178,7 @@ func add10VW(z, x dec, y Word) (c Word) { // propagate carry for i := 1; i < len(z) && i < len(x); i++ { s := x[i] + c - if s < _BD { + if s < _DB { z[i] = s // copy remaining digits copy(z[i+1:], x[i+1:]) @@ -182,14 +197,67 @@ func div10WVW(z []Word, xn Word, x []Word, y Word) (r Word) { return } +// divWDB returns the quotient and remainder of a double-Word n divided by _DB: +// +// q = n/_DB, r = n%_DB +// +// with the dividend bits' upper half in parameter n1 and the lower half in +// parameter n0. divDecBase panics if n1 > _DMax (quotient overflow). +// +// This function uses the algorithm from "Division by invariant integers using +// multiplication" by Torbjörn Granlund & Peter L. Montgomery. +// +// See https://doi.org/10.1145/773473.178249, section 8, Dividing udword by uword. +// +// On 386 and amd64, this is over 2 times faster than calling bits.Div(n1, n0, _BD). +// +// In the article, some equations show an addition or subtraction of 2**N, which +// is a no-op. In the comments below, these have been removed for the sake of +// clarity. +// +func divWDB(n1, n0 Word) (q, r Word) { + const ( + N = _W + d = _DB + l = _DWb + mP = (1<<(N+l)-1)/d - 1< _DMax { + panic("decimal: integer overflow") + } + + // if N == 64, N == l => n2 == n1 && n10 == n0 + // go vet complains, but this should be optimized out. + n2 := n1<<(N-l) + n0>>l + n10 := n0 << (N - l) + // -n1 = (n10 < 0 ? -1 : 0) + _n1 := Word(int(n10) >> (N - 1)) + nAdj := n10 + (_n1 & dNorm) + + // q1 = n2 + HIGH(mP * (n2-_n1) + nAdj) + q1, _ := mulAddWWW(mP, n2-_n1, nAdj) + q1 += n2 + // dr = 2**N*n1 + n0 - 2**N*d + (-1-q1)*d + // = (-1-q1) * d + n0 + (1) + // 2**N * (n1 - d) (2) + // let t = -1 - q1 = (^q1 + 1) - 1 = ^q1 + t := ^q1 + drHi, drLo := mulAddWWW(t, d, n0) // (1) + drHi += n1 - d // (2) + // q = drHi - (-1-q1) + // r = drLow + (d & drHi) + return drHi - t, drLo + d&drHi +} + // q = (u1<<_W + u0 - r)/v func div10WW(u1, u0, v Word) (q, r Word) { // q < _BD if u1 < v < _BD - if debugDecimal && !(u1 < v && v < _BD) { - panic("decimal overflow") + if debugDecimal && !(u1 < v && v < _DB) { + panic("decimal: integer overflow") } // convert to base 2 - hi, lo := mulAddWWW(u1, _BD, u0) + hi, lo := mulAddWWW(u1, _DB, u0) // q = (u-r)/v. Since v < _BD => r < _BD return divWW(hi, lo, v) } @@ -208,22 +276,20 @@ func mulAdd10WWW(x, y, c Word) (z1, z0 Word) { hi, lo := bits.Mul(uint(x), uint(y)) var cc uint lo, cc = bits.Add(lo, uint(c), 0) - hi, lo = bits.Div(hi+cc, lo, _BD) - return Word(hi), Word(lo) + return divWDB(Word(hi+cc), Word(lo)) } // z1<<_W + z0 = x*y func mul10WW(x, y Word) (z1, z0 Word) { hi, lo := bits.Mul(uint(x), uint(y)) - hi, lo = bits.Div(hi, lo, _BD) - return Word(hi), Word(lo) + return divWDB(Word(hi), Word(lo)) } func add10WWW(x, y, cIn Word) (s, c Word) { r, cc := bits.Add(uint(x), uint(y), uint(cIn)) - if cc != 0 || r >= _BD { + if cc != 0 || r >= _DB { cc = 1 - r -= _BD + r -= _DB } return Word(r), Word(cc) } @@ -233,8 +299,7 @@ func addMul10VVW(z, x []Word, y Word) (c Word) { // do x[i] * y + c in base 2 => (hi+cc) * 2**_W + lo hi, z0 := mulAddWWW(x[i], y, z[i]) lo, cc := bits.Add(uint(z0), uint(c), 0) - // convert to base _BD - c, z[i] = divWW(hi+Word(cc), Word(lo), _BD) + c, z[i] = divWDB(hi+Word(cc), Word(lo)) } return } @@ -250,7 +315,7 @@ func add10VV(z, x, y []Word) (c Word) { func sub10WWW(x, y, b Word) (d, c Word) { dd, cc := bits.Sub(uint(x), uint(y), uint(b)) if cc != 0 { - dd += _BD + dd += _DB } return Word(dd), Word(cc) } @@ -273,7 +338,7 @@ func sub10VW(z, x []Word, y Word) (c Word) { copy(z[i+1:], x[i+1:]) return } - z[i] = Word(zi + _BD) + z[i] = Word(zi + _DB) } return } diff --git a/dec_arith_test.go b/dec_arith_test.go index 13f764b..18638f1 100644 --- a/dec_arith_test.go +++ b/dec_arith_test.go @@ -1,6 +1,7 @@ package decimal import ( + "math/bits" "reflect" "strconv" "testing" @@ -14,9 +15,9 @@ func TestAdd10VW(t *testing.T) { c Word s int64 }{ - {dec{_BD - 2, _BD - 1}, 2, dec{}, 1, 0}, - {dec{_BD - 2, _BD - 1}, 1, dec{_BD - 1, _BD - 1}, 0, 0}, - {dec{_BD - 2, _BD - 2}, 2, dec{0, _BD - 1}, 0, 0}, + {dec{_DMax - 1, _DMax}, 2, dec{}, 1, 0}, + {dec{_DMax - 1, _DMax}, 1, dec{_DMax, _DMax}, 0, 0}, + {dec{_DMax - 1, _DMax - 1}, 2, dec{0, _DMax}, 0, 0}, } for i, d := range td { t.Run(strconv.Itoa(i), func(t *testing.T) { @@ -50,12 +51,12 @@ func TestDecDigits(t *testing.T) { func BenchmarkDecDigits(b *testing.B) { for i := 0; i < b.N; i++ { - benchU = decDigits(uint(rnd.Uint64()) % _BD) + benchH = Word(decDigits(uint(rnd.Uint64()) % _DB)) } } func rnd10W() Word { - return Word(rnd.Uint64() % _BD) + return Word(rnd.Uint64() % _DB) } func rnd10V(n int) []Word { @@ -65,3 +66,31 @@ func rnd10V(n int) []Word { } return v } + +func TestDivWDB(t *testing.T) { + h, l := rnd10W(), Word(rnd.Uint64()) + for i := 0; i < 1e7; i++ { + q, r := divWDB(h, l) + qq, rr := bits.Div(uint(h), uint(l), _DB) + if q != Word(qq) || r != Word(rr) { + t.Fatalf("Got (%d,%d)/_DB = %d, %d. Expected %d %d", h, l, q, r, qq, rr) + } + } +} + +var benchH, benchL Word + +func BenchmarkDivWDB_bits(b *testing.B) { + h, l := rnd10W(), Word(rnd.Uint64()) + for i := 0; i < b.N; i++ { + h, l := bits.Div(uint(h), uint(l), _DB) + benchH, benchL = Word(h), Word(l) + } +} + +func BenchmarkDivWDB_mul(b *testing.B) { + h, l := rnd10W(), Word(rnd.Uint64()) + for i := 0; i < b.N; i++ { + benchH, benchL = divWDB(h, l) + } +} diff --git a/dec_conv.go b/dec_conv.go index 9201723..3b821ba 100644 --- a/dec_conv.go +++ b/dec_conv.go @@ -111,7 +111,7 @@ func (z dec) scan(r io.ByteScanner, base int, fracOk bool) (res dec, b, count in // if di is "full", add it to the result if i == n { - if bn == _BD { + if bn == _DB { // shift left by _WD digits t := z.make(len(z) + 1) copy(t[1:], z) @@ -233,7 +233,7 @@ func (q dec) convertWords(s []byte, b Word, ndigits int, bb Word, table []decDiv for len(q) > 0 { // extract least significant, base bb "digit" r = q[0] - q = q.shr(q, _WD) + q = q.shr(q, _DW) for j := 0; j < ndigits && i > 0; j++ { i-- // avoid % computation since r%10 == r - int(r/10)*10; diff --git a/dec_test.go b/dec_test.go index ef1d62b..931da38 100644 --- a/dec_test.go +++ b/dec_test.go @@ -23,10 +23,10 @@ var decCmpTests = []struct { {dec{0}, dec{1}, -1}, {dec{1}, dec{0}, 1}, {dec{1}, dec{1}, 0}, - {dec{0, _MD}, dec{1}, 1}, - {dec{1}, dec{0, _MD}, -1}, - {dec{1, _MD}, dec{0, _MD}, 1}, - {dec{0, _MD}, dec{1, _MD}, -1}, + {dec{0, _DMax}, dec{1}, 1}, + {dec{1}, dec{0, _DMax}, -1}, + {dec{1, _DMax}, dec{0, _DMax}, 1}, + {dec{0, _DMax}, dec{1, _DMax}, -1}, {dec{16, 571956, 8794, 68}, dec{837, 9146, 1, 754489}, -1}, {dec{34986, 41, 105, 1957}, dec{56, 7458, 104, 1957}, 1}, } @@ -51,7 +51,7 @@ var decSumNN = []decArgNN{ {dec{1111111110}, dec{123456789}, dec{987654321}}, {dec{0, 0, 0, 1}, nil, dec{0, 0, 0, 1}}, {dec{0, 0, 0, 1111111110}, dec{0, 0, 0, 123456789}, dec{0, 0, 0, 987654321}}, - {dec{0, 0, 0, 1}, dec{0, 0, _MD}, dec{0, 0, 1}}, + {dec{0, 0, 0, 1}, dec{0, 0, _DMax}, dec{0, 0, 1}}, } var decProdNN = []decArgNN{ @@ -242,8 +242,8 @@ func BenchmarkDecMul(b *testing.B) { } func TestDecNLZ10(t *testing.T) { - var x Word = _MD - for i := 0; i <= _WD; i++ { + var x Word = _DMax + for i := 0; i <= _DW; i++ { if int(nlz10(x)) != i { t.Errorf("failed at %x: got %d want %d", x, nlz10(x), i) } @@ -262,8 +262,8 @@ var decLeftShiftTests = []decShiftTest{ {nil, 1, nil}, {decOne, 0, decOne}, {decOne, 1, decTen}, - {dec{_BD / 10}, 1, dec{0}}, - {dec{_BD / 10, 0}, 1, dec{0, 1}}, + {dec{_DB / 10}, 1, dec{0}}, + {dec{_DB / 10, 0}, 1, dec{0, 1}}, } func TestDecShiftLeft(t *testing.T) { @@ -285,8 +285,8 @@ var decRightShiftTests = []decShiftTest{ {decOne, 0, decOne}, {decOne, 1, nil}, {decTen, 1, decOne}, - {dec{0, 1}, 1, dec{_BD / 10}}, - {dec{10, 1, 1}, 1, dec{_BD/10 + 1, _BD / 10}}, + {dec{0, 1}, 1, dec{_DB / 10}}, + {dec{10, 1, 1}, 1, dec{_DB/10 + 1, _DB / 10}}, } func TestDecShiftRight(t *testing.T) { @@ -813,8 +813,7 @@ func TestDecDiv(t *testing.T) { // TODO(bd47h): move this to decimal_test func benchmarkDiv(b *testing.B, aSize, bSize int) { aa := rndDec1(aSize) - // bb := rndDec1(bSize) - bb := dec(nil).setWord(Word(rnd.Intn(_W-1)) + 1) + bb := rndDec1(bSize) if aa.cmp(bb) < 0 { aa, bb = bb, aa } @@ -830,7 +829,7 @@ func benchmarkDiv(b *testing.B, aSize, bSize int) { func BenchmarkDiv(b *testing.B) { sizes := []int{ 10, 20, 50, 100, 200, 500, 1000, - // 1e4, 1e5, 1e6, 1e7, + 1e4, 1e5, 1e6, 1e7, } for _, i := range sizes { j := 2 * i diff --git a/decimal.go b/decimal.go index 912446d..0e7d4eb 100644 --- a/decimal.go +++ b/decimal.go @@ -120,7 +120,7 @@ func (x *Decimal) MinPrec() uint { if x.form != finite { return 0 } - return uint(len(x.mant))*_WD - x.mant.ntz() + return uint(len(x.mant))*_DW - x.mant.ntz() } // Mode returns the rounding mode of x. @@ -258,9 +258,9 @@ func (z *Decimal) SetInt(x *big.Int) *Decimal { return z } // x != 0 - z.mant = z.mant.make((int(prec) + _WD - 1) / _WD).setInt(x) + z.mant = z.mant.make((int(prec) + _DW - 1) / _DW).setInt(x) exp := dnorm(z.mant) - z.setExpAndRound(int64(len(z.mant))*_WD-exp, 0) + z.setExpAndRound(int64(len(z.mant))*_DW-exp, 0) return z } @@ -432,8 +432,8 @@ func (x *Decimal) validate() { if m == 0 { panic("nonzero finite number with empty mantissa") } - if msw := x.mant[m-1]; !(_BD/10 <= msw && msw < _BD) { - panic(fmt.Sprintf("last word of %s is not within [%d %d)", x.Text('e', 0), uint(_BD/10), uint(_BD))) + if msw := x.mant[m-1]; !(_DB/10 <= msw && msw < _DB) { + panic(fmt.Sprintf("last word of %s is not within [%d %d)", x.Text('e', 0), uint(_DB/10), uint(_DB))) } if x.prec == 0 { panic("zero precision finite number") @@ -473,7 +473,7 @@ func (z *Decimal) round(sbit uint) { // z.form == finite && len(z.mant) > 0 // m > 0 implies z.prec > 0 (checked by validate) m := uint32(len(z.mant)) // present mantissa length in words - digits := m * _WD + digits := m * _DW if digits <= z.prec { // mantissa fits => nothing to do return @@ -490,14 +490,14 @@ func (z *Decimal) round(sbit uint) { } sbit &= 1 // be safe and ensure it's a single bit // cut off extra words - n := (z.prec + (_WD - 1)) / _WD // mantissa length in words for desired precision + n := (z.prec + (_DW - 1)) / _DW // mantissa length in words for desired precision if m > n { copy(z.mant, z.mant[m-n:]) // move n last words to front z.mant = z.mant[:n] } // determine number of trailing zero digits (ntz) and compute lsd of mantissa's least-significant word - ntz := uint(n*_WD - z.prec) // 0 <= ntz < _W + ntz := uint(n*_DW - z.prec) // 0 <= ntz < _W lsd := pow10(ntz) // round if result is inexact @@ -535,7 +535,7 @@ func (z *Decimal) round(sbit uint) { z.exp++ // mantissa overflow means that the mantissa before increment // was all nines. In that case, the result is 1**(z.exp+1) - z.mant[n-1] = _BD / 10 + z.mant[n-1] = _DB / 10 } } } @@ -555,7 +555,7 @@ func dnorm(m dec) int64 { if debugDecimal && (len(m) == 0 || m[len(m)-1] == 0) { panic("msw of mantissa is 0") } - s := _WD - decDigits(uint(m[len(m)-1])) + s := _DW - decDigits(uint(m[len(m)-1])) // partial shift if s > 0 { c := shl10VU(m, m, s) diff --git a/decimal_conv.go b/decimal_conv.go index 40355c6..bc598a9 100644 --- a/decimal_conv.go +++ b/decimal_conv.go @@ -27,7 +27,7 @@ func (z *Decimal) SetString(s string) (*Decimal, bool) { func (z *Decimal) scan(r io.ByteScanner, base int) (f *Decimal, b int, err error) { prec := z.prec if prec == 0 { - prec = _WD + prec = _DW } // A reasonable value in case of an error. @@ -77,7 +77,7 @@ func (z *Decimal) scan(r io.ByteScanner, base int) (f *Decimal, b int, err error // normalize mantissa and determine initial exponent contributions exp2 := int64(0) - exp10 := int64(len(z.mant))*_WD - dnorm(z.mant) + exp10 := int64(len(z.mant))*_DW - dnorm(z.mant) // determine binary or decimal exponent contribution of radix point if fcount < 0 { @@ -130,7 +130,7 @@ func (z *Decimal) scan(r io.ByteScanner, base int) (f *Decimal, b int, err error // exp2 != 0 // // apply 2**exp2 - p := new(Decimal).SetPrec(z.Prec() + _WD) // use more bits for p -- TODO(db47h) what is the right number? + p := new(Decimal).SetPrec(z.Prec() + _DW) // use more bits for p -- TODO(db47h) what is the right number? if exp2 < 0 { z.Quo(z, p.pow2(uint64(-exp2))) } else { @@ -143,7 +143,7 @@ func (z *Decimal) scan(r io.ByteScanner, base int) (f *Decimal, b int, err error // pow5 sets z to 5**n and returns z. // n must not be negative. func (z *Decimal) pow2(n uint64) *Decimal { - const m = _WD * 100000 / 30103 // maximum exponent such that 2**m < _BD + const m = _DW * 100000 / 30103 // maximum exponent such that 2**m < _BD if n <= m { return z.SetUint64(1 << n) } @@ -154,7 +154,7 @@ func (z *Decimal) pow2(n uint64) *Decimal { // use more bits for f than for z // TODO(db47h) what is the right number? - f := new(Decimal).SetPrec(z.Prec() + _WD).SetUint64(2) + f := new(Decimal).SetPrec(z.Prec() + _DW).SetUint64(2) for n > 0 { if n&1 != 0 { diff --git a/decimal_test.go b/decimal_test.go index 6e2e58c..6a32cde 100644 --- a/decimal_test.go +++ b/decimal_test.go @@ -2,7 +2,6 @@ package decimal import ( "math/big" - "math/bits" "math/rand" "reflect" "strconv" @@ -23,21 +22,21 @@ var intData = []struct { {"1234567890123456789_0123456789012345678_9012345678901234567_8901234567890123456_78901234567890", 0, 90, dec{7890123456789000000, 8901234567890123456, 9012345678901234567, 123456789012345678, 1234567890123456789}, 90, 90}, - {"1235", 0, 0, dec{1235000000000000000}, _WD, 4}, + {"1235", 0, 0, dec{1235000000000000000}, _DW, 4}, {"1235", 0, 3, dec{1240000000000000000}, 3, 4}, {"1245", 0, 3, dec{1240000000000000000}, 3, 4}, {"12451", 0, 3, dec{1250000000000000000}, 3, 5}, - {"0", 0, 0, nil, _WD, 0}, + {"0", 0, 0, nil, _DW, 0}, } func TestDnorm(t *testing.T) { for i := 0; i < 10000; i++ { again: - w := uint(rand.Uint64()) % _BD - e := uint(rand.Intn(_WD + 1)) - h, l := bits.Mul(w, pow10(e)) + w := uint(rand.Uint64()) % _DB + e := uint(rand.Intn(_DW + 1)) + h, l := mulWW(Word(w), Word(pow10(e))) // convert h, l from base _B (2**64) to base _BD (10**19) or 2**32 -> 10**9 - h, l = bits.Div(h, l, _BD) + h, l = divWDB(h, l) d := dec{Word(l), Word(h)}.norm() if len(d) == 0 { if w == 0 { @@ -48,8 +47,8 @@ func TestDnorm(t *testing.T) { dd := dec(nil).set(d) s := dnorm(dd) // d should now have a single element with e shifted left - ew := w * pow10(_WD-decDigits(w)) - es := int64(uint(len(d)*_WD) - (decDigits(w) + e)) + ew := w * pow10(_DW-decDigits(w)) + es := int64(uint(len(d)*_DW) - (decDigits(w) + e)) if dd[len(dd)-1] != Word(ew) || s != es { t.Fatalf("%ve%v => dnorm(%v) = %v, %v --- Expected %d, %d", w, e, d, dd, s, w, es) @@ -99,11 +98,11 @@ func TestDecimal_SetString(t *testing.T) { func BenchmarkDnorm(b *testing.B) { d := dec(nil).make(1000) for i := range d { - d[i] = Word(rand.Uint64()) % _BD + d[i] = Word(rand.Uint64()) % _DB } for i := 0; i < b.N; i++ { - d[0] = Word(rand.Uint64()) % _BD - d[len(d)-1] = Word(rand.Uint64()) % _BD + d[0] = Word(rand.Uint64()) % _DB + d[len(d)-1] = Word(rand.Uint64()) % _DB benchU = uint(dnorm(d)) } }