Commit d5c45c54 authored by Michael T. Jones's avatar Michael T. Jones Committed by Robert Griesemer

big.nat: Improved speed of nat-to-string conversion

Three optimizations: First, special-case power of two bases
that partion a Word(), bases 2, 4, 16, and 256. These can
be moved directly from internal Word() storage to the output
without multiprecision operations. Next, same approach for
the other power-of-two bases, 8, 32, 64, and 128. These
don't fill a Word() evenly, so special handling is needed
for those cases where input spans the high-bits of one Word
and the low bis of the next one.  Finally, implement the
general case for others bases in 2 <= base <= 256 using
superbases, the largest power of base representable in a
Word(). For base ten, this is 9 digits and a superbase of
10^9 for 32-bit Words and 19 digits and 10^19 for 64-bit
compiles. This way we do just 1/9th or 1/19th of the expensive
multiprecision divisions, unpacking superdigits using fast
native machine arithmetic. The resulting code runs 7x to
800x the speed of the previous approach, depending on the
length of the number to be converted--longer is relatively
faster.

Also, extended the tests and benchmarks for string to nat
(scan()) and nat to string (string()) functions. A further
enhancement awaits the next CL to make general cases about
7x faster for long cases.

R=gri
CC=golang-dev
https://golang.org/cl/4595041
parent c5281d7a
......@@ -615,6 +615,7 @@ func (x nat) bitLen() int {
// MaxBase is the largest number base accepted for string conversions.
const MaxBase = 'z' - 'a' + 10 + 1 // = hexValue('z') + 1
func hexValue(ch int) Word {
d := MaxBase + 1 // illegal base
switch {
......@@ -733,46 +734,137 @@ const (
uppercaseDigits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
)
// decimalString returns a decimal representation of x.
// It calls x.string with the charset "0123456789".
func (x nat) decimalString() string {
return x.string(lowercaseDigits[0:10])
}
// string converts x to a string using digits from a charset; a digit with
// value d is represented by charset[d]. The conversion base is determined
// by len(charset), which must be >= 2.
func (x nat) string(charset string) string {
base := len(charset)
b := Word(len(charset))
// special cases
switch {
case base < 2:
case b < 2 || b > 256:
panic("illegal base")
case len(x) == 0:
return string(charset[0])
}
// allocate buffer for conversion
i := x.bitLen()/log2(Word(base)) + 1 // +1: round up
i := x.bitLen()/log2(b) + 1 // +1: round up
s := make([]byte, i)
// don't destroy x
// special case: power of two bases can avoid divisions completely
if b == b&-b {
// shift is base-b digit size in bits
shift := uint(trailingZeroBits(b)) // shift > 0 because b >= 2
m := len(x)
mask := Word(1)<<shift - 1
w := x[0]
nbits := uint(_W) // number of unprocessed bits in w
// convert less-significant words
for k := 0; k < m-1; k++ {
// convert full digits
for nbits >= shift {
i--
s[i] = charset[w&mask]
w >>= shift
nbits -= shift
}
// convert any partial leading digit and advance to next word
if nbits == 0 {
// no partial digit remaining, just advance
w = x[k+1]
nbits = _W
} else {
// partial digit in current (k) and next (k+1) word
w |= x[k+1] << nbits
i--
s[i] = charset[w&mask]
// advance
w = x[k+1] >> (shift - nbits)
nbits = _W - (shift - nbits)
}
}
// convert digits of most-significant word (omit leading zeros)
for nbits >= 0 && w != 0 {
i--
s[i] = charset[w&mask]
w >>= shift
nbits -= shift
}
return string(s[i:])
}
// general case: extract groups of digits by multiprecision division
// maximize ndigits where b**ndigits < 2^_W; bb (big base) is b**ndigits
bb := Word(1)
ndigits := 0
for max := Word(_M / b); bb <= max; bb *= b {
ndigits++
}
// preserve x, create local copy for use in repeated divisions
q := nat(nil).set(x)
var r Word
// convert
for len(q) > 0 {
i--
var r Word
q, r = q.divW(q, Word(base))
s[i] = charset[r]
if b == 10 { // hard-coding for 10 here speeds this up by 1.25x
for len(q) > 0 {
// extract least significant, base bb "digit"
q, r = q.divW(q, bb) // N.B. >82% of time is here. Optimize divW
if len(q) == 0 {
// skip leading zeros in most-significant group of digits
for j := 0; j < ndigits && r != 0; j++ {
i--
s[i] = charset[r%10]
r /= 10
}
} else {
for j := 0; j < ndigits; j++ {
i--
s[i] = charset[r%10]
r /= 10
}
}
}
} else {
for len(q) > 0 {
// extract least significant group of digits
q, r = q.divW(q, bb) // N.B. >82% of time is here. Optimize divW
if len(q) == 0 {
// skip leading zeros in most-significant group of digits
for j := 0; j < ndigits && r != 0; j++ {
i--
s[i] = charset[r%b]
r /= b
}
} else {
for j := 0; j < ndigits; j++ {
i--
s[i] = charset[r%b]
r /= b
}
}
}
}
return string(s[i:])
}
// decimalString returns a decimal representation of x.
// It calls x.string with the charset "0123456789".
func (x nat) decimalString() string {
return x.string(lowercaseDigits[0:10])
}
const deBruijn32 = 0x077CB531
var deBruijn32Lookup = []byte{
......@@ -789,6 +881,7 @@ var deBruijn64Lookup = []byte{
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
}
// trailingZeroBits returns the number of consecutive zero bits on the right
// side of the given Word.
// See Knuth, volume 4, section 7.3.1
......@@ -960,7 +1053,9 @@ func (z nat) xor(x, y nat) nat {
// greaterThan returns true iff (x1<<_W + x2) > (y1<<_W + y2)
func greaterThan(x1, x2, y1, y2 Word) bool { return x1 > y1 || x1 == y1 && x2 > y2 }
func greaterThan(x1, x2, y1, y2 Word) bool {
return x1 > y1 || x1 == y1 && x2 > y2
}
// modW returns x % d.
......
......@@ -5,6 +5,7 @@
package big
import (
"fmt"
"os"
"strings"
"testing"
......@@ -171,6 +172,36 @@ func BenchmarkMul(b *testing.B) {
}
func toString(x nat, charset string) string {
base := len(charset)
// special cases
switch {
case base < 2:
panic("illegal base")
case len(x) == 0:
return string(charset[0])
}
// allocate buffer for conversion
i := x.bitLen()/log2(Word(base)) + 1 // +1: round up
s := make([]byte, i)
// don't destroy x
q := nat(nil).set(x)
// convert
for len(q) > 0 {
i--
var r Word
q, r = q.divW(q, Word(base))
s[i] = charset[r]
}
return string(s[i:])
}
var strTests = []struct {
x nat // nat value to be converted
c string // conversion charset
......@@ -360,6 +391,187 @@ func BenchmarkScanPi(b *testing.B) {
}
const (
// 314**271
// base 2: 2249 digits
// base 8: 751 digits
// base 10: 678 digits
// base 16: 563 digits
shortBase = 314
shortExponent = 271
// 3141**2178
// base 2: 31577 digits
// base 8: 10527 digits
// base 10: 9507 digits
// base 16: 7895 digits
mediumBase = 3141
mediumExponent = 2718
// 3141**2178
// base 2: 406078 digits
// base 8: 135360 digits
// base 10: 122243 digits
// base 16: 101521 digits
longBase = 31415
longExponent = 27182
)
func BenchmarkScanShort2(b *testing.B) {
ScanHelper(b, 2, shortBase, shortExponent)
}
func BenchmarkScanShort8(b *testing.B) {
ScanHelper(b, 8, shortBase, shortExponent)
}
func BenchmarkScanSort10(b *testing.B) {
ScanHelper(b, 10, shortBase, shortExponent)
}
func BenchmarkScanShort16(b *testing.B) {
ScanHelper(b, 16, shortBase, shortExponent)
}
func BenchmarkScanMedium2(b *testing.B) {
ScanHelper(b, 2, mediumBase, mediumExponent)
}
func BenchmarkScanMedium8(b *testing.B) {
ScanHelper(b, 8, mediumBase, mediumExponent)
}
func BenchmarkScanMedium10(b *testing.B) {
ScanHelper(b, 10, mediumBase, mediumExponent)
}
func BenchmarkScanMedium16(b *testing.B) {
ScanHelper(b, 16, mediumBase, mediumExponent)
}
func BenchmarkScanLong2(b *testing.B) {
ScanHelper(b, 2, longBase, longExponent)
}
func BenchmarkScanLong8(b *testing.B) {
ScanHelper(b, 8, longBase, longExponent)
}
func BenchmarkScanLong10(b *testing.B) {
ScanHelper(b, 10, longBase, longExponent)
}
func BenchmarkScanLong16(b *testing.B) {
ScanHelper(b, 16, longBase, longExponent)
}
func ScanHelper(b *testing.B, base int, xv, yv Word) {
b.StopTimer()
var x, y, z nat
x = x.setWord(xv)
y = y.setWord(yv)
z = z.expNN(x, y, nil)
var s string
s = z.string(lowercaseDigits[0:base])
if t := toString(z, lowercaseDigits[0:base]); t != s {
panic(fmt.Sprintf("scanning: got %s; want %s", s, t))
}
b.StartTimer()
for i := 0; i < b.N; i++ {
x.scan(strings.NewReader(s), base)
}
}
func BenchmarkStringShort2(b *testing.B) {
StringHelper(b, 2, shortBase, shortExponent)
}
func BenchmarkStringShort8(b *testing.B) {
StringHelper(b, 8, shortBase, shortExponent)
}
func BenchmarkStringShort10(b *testing.B) {
StringHelper(b, 10, shortBase, shortExponent)
}
func BenchmarkStringShort16(b *testing.B) {
StringHelper(b, 16, shortBase, shortExponent)
}
func BenchmarkStringMedium2(b *testing.B) {
StringHelper(b, 2, mediumBase, mediumExponent)
}
func BenchmarkStringMedium8(b *testing.B) {
StringHelper(b, 8, mediumBase, mediumExponent)
}
func BenchmarkStringMedium10(b *testing.B) {
StringHelper(b, 10, mediumBase, mediumExponent)
}
func BenchmarkStringMedium16(b *testing.B) {
StringHelper(b, 16, mediumBase, mediumExponent)
}
func BenchmarkStringLong2(b *testing.B) {
StringHelper(b, 2, longBase, longExponent)
}
func BenchmarkStringLong8(b *testing.B) {
StringHelper(b, 8, longBase, longExponent)
}
func BenchmarkStringLong10(b *testing.B) {
StringHelper(b, 10, longBase, longExponent)
}
func BenchmarkStringLong16(b *testing.B) {
StringHelper(b, 16, longBase, longExponent)
}
func StringHelper(b *testing.B, base int, xv, yv Word) {
b.StopTimer()
var x, y, z nat
x = x.setWord(xv)
y = y.setWord(yv)
z = z.expNN(x, y, nil)
b.StartTimer()
for i := 0; i < b.N; i++ {
z.string(lowercaseDigits[0:base])
}
}
func TestLeadingZeros(t *testing.T) {
var x Word = _B >> 1
for i := 0; i <= _W; i++ {
......@@ -479,7 +691,6 @@ func TestTrailingZeroBits(t *testing.T) {
}
}
var expNNTests = []struct {
x, y, m string
out string
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment