Commit 9515cb51 authored by Robert Griesemer's avatar Robert Griesemer

math/bits: faster TrailingZeroes8

For sizes > 8, the existing code is faster.

benchmark                     old ns/op     new ns/op     delta
BenchmarkTrailingZeros8-8     1.95          1.29          -33.85%

Measured on 2.3 GHz Intel Core i7 running macOS 10.12.3.

Change-Id: I6f3a33ec633a2c544ec29693c141f2f99335c745
Reviewed-on: https://go-review.googlesource.com/37581Reviewed-by: 's avatarBrad Fitzpatrick <bradfitz@golang.org>
parent d7a659b1
...@@ -34,7 +34,7 @@ func LeadingZeros64(x uint64) int { return 64 - blen(uint64(x)) } ...@@ -34,7 +34,7 @@ func LeadingZeros64(x uint64) int { return 64 - blen(uint64(x)) }
func TrailingZeros(x uint) int { return ntz(x) } func TrailingZeros(x uint) int { return ntz(x) }
// TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0. // TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0.
func TrailingZeros8(x uint8) int { return ntz8(x) } func TrailingZeros8(x uint8) int { return int(ntz8tab[x]) }
// TrailingZeros16 returns the number of trailing zero bits in x; the result is 16 for x == 0. // TrailingZeros16 returns the number of trailing zero bits in x; the result is 16 for x == 0.
func TrailingZeros16(x uint16) int { return ntz16(x) } func TrailingZeros16(x uint16) int { return ntz16(x) }
......
...@@ -23,14 +23,6 @@ var deBruijn32tab = [32]byte{ ...@@ -23,14 +23,6 @@ var deBruijn32tab = [32]byte{
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
} }
func ntz8(x uint8) (n int) {
if x == 0 {
return 8
}
// see comment in ntz64
return int(deBruijn32tab[uint32(x&-x)*deBruijn32>>(32-5)])
}
func ntz16(x uint16) (n int) { func ntz16(x uint16) (n int) {
if x == 0 { if x == 0 {
return 16 return 16
......
...@@ -6,6 +6,25 @@ ...@@ -6,6 +6,25 @@
package bits package bits
var ntz8tab = [256]uint8{
0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00,
}
var pop8tab = [256]uint8{ var pop8tab = [256]uint8{
0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05, 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04, 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
......
...@@ -30,6 +30,7 @@ package bits ...@@ -30,6 +30,7 @@ package bits
func main() { func main() {
buf := bytes.NewBuffer(header) buf := bytes.NewBuffer(header)
gen(buf, "ntz8tab", ntz8)
gen(buf, "pop8tab", pop8) gen(buf, "pop8tab", pop8)
gen(buf, "rev8tab", rev8) gen(buf, "rev8tab", rev8)
// add more tables as needed // add more tables as needed
...@@ -58,6 +59,14 @@ func gen(w io.Writer, name string, f func(uint8) uint8) { ...@@ -58,6 +59,14 @@ func gen(w io.Writer, name string, f func(uint8) uint8) {
fmt.Fprint(w, "\n}\n\n") fmt.Fprint(w, "\n}\n\n")
} }
func ntz8(x uint8) (n uint8) {
for x&1 == 0 && n < 8 {
x >>= 1
n++
}
return
}
func pop8(x uint8) (n uint8) { func pop8(x uint8) (n uint8) {
for x != 0 { for x != 0 {
x &= x - 1 x &= x - 1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment