math/big: slight improvement to algorithm used for internal bitLen function

The bitLen function currently shifts out blocks of 8 bits at a time. This change replaces this sorta-linear algorithm with a log(N) one (shift out 16 bits, then 8, then 4, then 2, then 1). I left the start of it linear at 16 bits at a time so that the function continues to work with 32 or 64 bit values without any funkiness. The algorithm is similar to several of the nlz ("number of leading zeros") algorithms from "Hacker's Delight" or the "bit twiddling hacks" pages. Doesn't make a big difference to the existing benchmarks, but I'm using the code in a different context that calls bitLen much more often, so it seemed worthwhile making the existing codebase faster so that it's a better building block. Microbenchmark results on a 64-bit Macbook Pro using 6g from weekly.2012-01-20: benchmark old ns/op new ns/op delta big.BenchmarkBitLen0 4 6 +50.12% big.BenchmarkBitLen1 4 6 +33.91% big.BenchmarkBitLen2 6 6 +3.05% big.BenchmarkBitLen3 7 6 -19.05% big.BenchmarkBitLen4 9 6 -30.19% big.BenchmarkBitLen5 11 6 -42.23% big.BenchmarkBitLen8 16 6 -61.78% big.BenchmarkBitLen9 5 6 +18.29% big.BenchmarkBitLen16 18 7 -60.99% big.BenchmarkBitLen17 7 6 -4.64% big.BenchmarkBitLen31 19 7 -62.49% On an ARM machine (with the previous weekly): benchmark old ns/op new ns/op delta big.BenchmarkBitLen0 37 50 +36.56% big.BenchmarkBitLen1 59 51 -13.69% big.BenchmarkBitLen2 74 59 -20.40% big.BenchmarkBitLen3 92 60 -34.89% big.BenchmarkBitLen4 110 59 -46.09% big.BenchmarkBitLen5 127 60 -52.68% big.BenchmarkBitLen8 181 59 -67.24% big.BenchmarkBitLen9 78 60 -23.05% big.BenchmarkBitLen16 199 69 -65.13% big.BenchmarkBitLen17 91 70 -23.17% big.BenchmarkBitLen31 210 95 -54.43% R=golang-dev, dave, edsrzf, gri CC=golang-dev https://golang.org/cl/5570044

math/big: slight improvement to algorithm used for internal bitLen function
The bitLen function currently shifts out blocks of 8 bits at a time. This change replaces this sorta-linear algorithm with a log(N) one (shift out 16 bits, then 8, then 4, then 2, then 1). I left the start of it linear at 16 bits at a time so that the function continues to work with 32 or 64 bit values without any funkiness. The algorithm is similar to several of the nlz ("number of leading zeros") algorithms from "Hacker's Delight" or the "bit twiddling hacks" pages. Doesn't make a big difference to the existing benchmarks, but I'm using the code in a different context that calls bitLen much more often, so it seemed worthwhile making the existing codebase faster so that it's a better building block. Microbenchmark results on a 64-bit Macbook Pro using 6g from weekly.2012-01-20: benchmark old ns/op new ns/op delta big.BenchmarkBitLen0 4 6 +50.12% big.BenchmarkBitLen1 4 6 +33.91% big.BenchmarkBitLen2 6 6 +3.05% big.BenchmarkBitLen3 7 6 -19.05% big.BenchmarkBitLen4 9 6 -30.19% big.BenchmarkBitLen5 11 6 -42.23% big.BenchmarkBitLen8 16 6 -61.78% big.BenchmarkBitLen9 5 6 +18.29% big.BenchmarkBitLen16 18 7 -60.99% big.BenchmarkBitLen17 7 6 -4.64% big.BenchmarkBitLen31 19 7 -62.49% On an ARM machine (with the previous weekly): benchmark old ns/op new ns/op delta big.BenchmarkBitLen0 37 50 +36.56% big.BenchmarkBitLen1 59 51 -13.69% big.BenchmarkBitLen2 74 59 -20.40% big.BenchmarkBitLen3 92 60 -34.89% big.BenchmarkBitLen4 110 59 -46.09% big.BenchmarkBitLen5 127 60 -52.68% big.BenchmarkBitLen8 181 59 -67.24% big.BenchmarkBitLen9 78 60 -23.05% big.BenchmarkBitLen16 199 69 -65.13% big.BenchmarkBitLen17 91 70 -23.17% big.BenchmarkBitLen31 210 95 -54.43% R=golang-dev, dave, edsrzf, gri CC=golang-dev https://golang.org/cl/5570044
1dc37bbf · David G. Andersen · Robert Griesemer · 4417bc37 · 1dc37bbf · 1dc37bbf
Commit 1dc37bbf authored Jan 23, 2012 by David G. Andersen Committed by Robert Griesemer Jan 23, 2012
Show whitespace changes
Inline Side-by-side

Showing with 36 additions and 2 deletions

arith.go src/pkg/math/big/arith.go +14 -2

arith_test.go src/pkg/math/big/arith_test.go +22 -0

No files found.
--- a/src/pkg/math/big/arith.go
+++ b/src/pkg/math/big/arith.go
@@ -80,10 +80,22 @@ func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {

 // Length of x in bits.
 func bitLen(x Word) (n int) {
-	for ; x >= 0x100; x >>= 8 {
+	for ; x >= 0x8000; x >>= 16 {
+		n += 16
+	}
+	if x >= 0x80 {
+		x >>= 8
 		n += 8
 	}
-	for ; x > 0; x >>= 1 {
+	if x >= 0x8 {
+		x >>= 4
+		n += 4
+	}
+	if x >= 0x2 {
+		x >>= 2
+		n += 2
+	}
+	if x >= 0x1 {
 		n++
 	}
 	return

--- a/src/pkg/math/big/arith_test.go
+++ b/src/pkg/math/big/arith_test.go
@@ -333,3 +333,25 @@ func TestMulAddWWW(t *testing.T) {
 		}
 	}
 }
+
+// runs b.N iterations of bitLen called on a Word containing (1 << nbits)-1.
+func benchmarkBitLenN(b *testing.B, nbits uint) {
+	testword := Word((uint64(1) << nbits) - 1)
+	for i := 0; i < b.N; i++ {
+		bitLen(testword)
+	}
+}
+
+// Individual bitLen tests.  Numbers chosen to examine both sides
+// of powers-of-two boundaries.
+func BenchmarkBitLen0(b *testing.B)  { benchmarkBitLenN(b, 0) }
+func BenchmarkBitLen1(b *testing.B)  { benchmarkBitLenN(b, 1) }
+func BenchmarkBitLen2(b *testing.B)  { benchmarkBitLenN(b, 2) }
+func BenchmarkBitLen3(b *testing.B)  { benchmarkBitLenN(b, 3) }
+func BenchmarkBitLen4(b *testing.B)  { benchmarkBitLenN(b, 4) }
+func BenchmarkBitLen5(b *testing.B)  { benchmarkBitLenN(b, 5) }
+func BenchmarkBitLen8(b *testing.B)  { benchmarkBitLenN(b, 8) }
+func BenchmarkBitLen9(b *testing.B)  { benchmarkBitLenN(b, 9) }
+func BenchmarkBitLen16(b *testing.B) { benchmarkBitLenN(b, 16) }
+func BenchmarkBitLen17(b *testing.B) { benchmarkBitLenN(b, 17) }
+func BenchmarkBitLen31(b *testing.B) { benchmarkBitLenN(b, 31) }