Commit 9b875bc0 authored by Russ Cox's avatar Russ Cox

bytes: faster Count, Index, Equal

Benchmarks are from GOARCH=amd64 on a MacPro5,1.

benchmark                                    old MB/s     new MB/s  speedup
bytes_test.BenchmarkEqual32                    452.89       891.07    1.97x
bytes_test.BenchmarkEqual4K                    852.71      1700.44    1.99x
bytes_test.BenchmarkEqual4M                    841.53      1587.93    1.89x
bytes_test.BenchmarkEqual64M                   838.22      1578.14    1.88x

bytes_test.BenchmarkIndex32                     58.02        48.99    0.84x
bytes_test.BenchmarkIndex4K                     48.26        41.32    0.86x
bytes_test.BenchmarkIndex4M                     48.20        41.24    0.86x
bytes_test.BenchmarkIndex64M                    48.08        41.21    0.86x
bytes_test.BenchmarkIndexEasy32                410.04       546.82    1.33x
bytes_test.BenchmarkIndexEasy4K                849.26     14257.37   16.79x
bytes_test.BenchmarkIndexEasy4M                854.54     17222.15   20.15x
bytes_test.BenchmarkIndexEasy64M               843.57     11060.40   13.11x

bytes_test.BenchmarkCount32                     57.24        50.68    0.89x
bytes_test.BenchmarkCount4K                     48.19        41.82    0.87x
bytes_test.BenchmarkCount4M                     48.18        41.74    0.87x
bytes_test.BenchmarkCount64M                    48.17        41.71    0.87x
bytes_test.BenchmarkCountEasy32                433.11       547.44    1.26x
bytes_test.BenchmarkCountEasy4K               1130.59     14194.06   12.55x
bytes_test.BenchmarkCountEasy4M               1131.23     17231.18   15.23x
bytes_test.BenchmarkCountEasy64M              1111.40     11068.88    9.96x

The non-easy Count/Index benchmarks are a worst case input.

regexp.BenchmarkMatchEasy0_32                  237.46       221.47    0.93x
regexp.BenchmarkMatchEasy0_1K                  553.53      1019.72    1.84x
regexp.BenchmarkMatchEasy0_32K                 693.99      1672.06    2.41x
regexp.BenchmarkMatchEasy0_1M                  688.72      1611.68    2.34x
regexp.BenchmarkMatchEasy0_32M                 680.70      1565.05    2.30x
regexp.BenchmarkMatchEasy1_32                  165.56       243.08    1.47x
regexp.BenchmarkMatchEasy1_1K                  336.45       496.32    1.48x
regexp.BenchmarkMatchEasy1_32K                 302.80       425.63    1.41x
regexp.BenchmarkMatchEasy1_1M                  300.42       414.20    1.38x
regexp.BenchmarkMatchEasy1_32M                 299.64       413.47    1.38x

R=golang-dev, r, iant
CC=golang-dev
https://golang.org/cl/5451116
parent 2f2cc24c
...@@ -15,3 +15,19 @@ TEXT ·IndexByte(SB),7,$0 ...@@ -15,3 +15,19 @@ TEXT ·IndexByte(SB),7,$0
SUBL $1, DI SUBL $1, DI
MOVL DI, ret+16(FP) MOVL DI, ret+16(FP)
RET RET
TEXT ·Equal(SB),7,$0
MOVL len+4(FP), BX
MOVL len1+16(FP), CX
MOVL $0, AX
CMPL BX, CX
JNE eqret
MOVL p+0(FP), SI
MOVL q+12(FP), DI
CLD
REP; CMPSB
JNE eqret
MOVL $1, AX
eqret:
MOVB AX, ret+24(FP)
RET
...@@ -90,3 +90,19 @@ success: ...@@ -90,3 +90,19 @@ success:
MOVL DI, ret+24(FP) MOVL DI, ret+24(FP)
RET RET
TEXT ·Equal(SB),7,$0
MOVL len+8(FP), BX
MOVL len1+24(FP), CX
MOVL $0, AX
MOVL $1, DX
CMPL BX, CX
JNE eqret
MOVQ p+0(FP), SI
MOVQ q+16(FP), DI
CLD
REP; CMPSB
CMOVLEQ DX, AX
eqret:
MOVB AX, ret+32(FP)
RET
...@@ -6,3 +6,6 @@ ...@@ -6,3 +6,6 @@
TEXT ·IndexByte(SB),7,$0 TEXT ·IndexByte(SB),7,$0
B ·indexBytePortable(SB) B ·indexBytePortable(SB)
// no memcmp implementation on arm yet
TEXT ·Equal(SB),7,$0
B ·equalPortable(SB)
...@@ -37,7 +37,9 @@ func Compare(a, b []byte) int { ...@@ -37,7 +37,9 @@ func Compare(a, b []byte) int {
} }
// Equal returns a boolean reporting whether a == b. // Equal returns a boolean reporting whether a == b.
func Equal(a, b []byte) bool { func Equal(a, b []byte) bool
func equalPortable(a, b []byte) bool {
if len(a) != len(b) { if len(a) != len(b) {
return false return false
} }
...@@ -74,18 +76,33 @@ func explode(s []byte, n int) [][]byte { ...@@ -74,18 +76,33 @@ func explode(s []byte, n int) [][]byte {
// Count counts the number of non-overlapping instances of sep in s. // Count counts the number of non-overlapping instances of sep in s.
func Count(s, sep []byte) int { func Count(s, sep []byte) int {
if len(sep) == 0 { n := len(sep)
if n == 0 {
return utf8.RuneCount(s) + 1 return utf8.RuneCount(s) + 1
} }
if n > len(s) {
return 0
}
count := 0
c := sep[0] c := sep[0]
n := 0 i := 0
for i := 0; i+len(sep) <= len(s); i++ { t := s[:len(s)-n+1]
if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) { for i < len(t) {
n++ if t[i] != c {
i += len(sep) - 1 o := IndexByte(t[i:], c)
if o < 0 {
break
}
i += o
} }
if n == 1 || Equal(s[i:i+n], sep) {
count++
i += n
continue
}
i++
} }
return n return count
} }
// Contains returns whether subslice is within b. // Contains returns whether subslice is within b.
...@@ -99,11 +116,27 @@ func Index(s, sep []byte) int { ...@@ -99,11 +116,27 @@ func Index(s, sep []byte) int {
if n == 0 { if n == 0 {
return 0 return 0
} }
if n > len(s) {
return -1
}
c := sep[0] c := sep[0]
for i := 0; i+n <= len(s); i++ { if n == 1 {
if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) { return IndexByte(s, c)
}
i := 0
t := s[:len(s)-n+1]
for i < len(t) {
if t[i] != c {
o := IndexByte(t[i:], c)
if o < 0 {
break
}
i += o
}
if Equal(s[i:i+n], sep) {
return i return i
} }
i++
} }
return -1 return -1
} }
......
...@@ -64,13 +64,17 @@ func TestCompare(t *testing.T) { ...@@ -64,13 +64,17 @@ func TestCompare(t *testing.T) {
a := []byte(tt.a) a := []byte(tt.a)
b := []byte(tt.b) b := []byte(tt.b)
cmp := Compare(a, b) cmp := Compare(a, b)
eql := Equal(a, b)
if cmp != tt.i { if cmp != tt.i {
t.Errorf(`Compare(%q, %q) = %v`, tt.a, tt.b, cmp) t.Errorf(`Compare(%q, %q) = %v`, tt.a, tt.b, cmp)
} }
eql := Equal(a, b)
if eql != (tt.i == 0) { if eql != (tt.i == 0) {
t.Errorf(`Equal(%q, %q) = %v`, tt.a, tt.b, eql) t.Errorf(`Equal(%q, %q) = %v`, tt.a, tt.b, eql)
} }
eql = EqualPortable(a, b)
if eql != (tt.i == 0) {
t.Errorf(`EqualPortable(%q, %q) = %v`, tt.a, tt.b, eql)
}
} }
} }
...@@ -264,41 +268,152 @@ func TestIndexRune(t *testing.T) { ...@@ -264,41 +268,152 @@ func TestIndexRune(t *testing.T) {
} }
} }
func BenchmarkIndexByte4K(b *testing.B) { bmIndex(b, IndexByte, 4<<10) } var bmbuf []byte
func BenchmarkIndexByte4M(b *testing.B) { bmIndex(b, IndexByte, 4<<20) }
func BenchmarkIndexByte64M(b *testing.B) { bmIndex(b, IndexByte, 64<<20) } func BenchmarkIndexByte32(b *testing.B) { bmIndexByte(b, IndexByte, 32) }
func BenchmarkIndexByte4K(b *testing.B) { bmIndexByte(b, IndexByte, 4<<10) }
func BenchmarkIndexByte4M(b *testing.B) { bmIndexByte(b, IndexByte, 4<<20) }
func BenchmarkIndexByte64M(b *testing.B) { bmIndexByte(b, IndexByte, 64<<20) }
func BenchmarkIndexBytePortable32(b *testing.B) { bmIndexByte(b, IndexBytePortable, 32) }
func BenchmarkIndexBytePortable4K(b *testing.B) { bmIndexByte(b, IndexBytePortable, 4<<10) }
func BenchmarkIndexBytePortable4M(b *testing.B) { bmIndexByte(b, IndexBytePortable, 4<<20) }
func BenchmarkIndexBytePortable64M(b *testing.B) { bmIndexByte(b, IndexBytePortable, 64<<20) }
func BenchmarkIndexBytePortable4K(b *testing.B) { func bmIndexByte(b *testing.B, index func([]byte, byte) int, n int) {
bmIndex(b, IndexBytePortable, 4<<10) if len(bmbuf) < n {
bmbuf = make([]byte, n)
}
b.SetBytes(int64(n))
buf := bmbuf[0:n]
buf[n-1] = 'x'
for i := 0; i < b.N; i++ {
j := index(buf, 'x')
if j != n-1 {
println("bad index", j)
panic("bad index")
}
}
buf[n-1] = '\x00'
} }
func BenchmarkIndexBytePortable4M(b *testing.B) { func BenchmarkEqual32(b *testing.B) { bmEqual(b, Equal, 32) }
bmIndex(b, IndexBytePortable, 4<<20) func BenchmarkEqual4K(b *testing.B) { bmEqual(b, Equal, 4<<10) }
func BenchmarkEqual4M(b *testing.B) { bmEqual(b, Equal, 4<<20) }
func BenchmarkEqual64M(b *testing.B) { bmEqual(b, Equal, 64<<20) }
func BenchmarkEqualPort32(b *testing.B) { bmEqual(b, EqualPortable, 32) }
func BenchmarkEqualPort4K(b *testing.B) { bmEqual(b, EqualPortable, 4<<10) }
func BenchmarkEqualPortable4M(b *testing.B) { bmEqual(b, EqualPortable, 4<<20) }
func BenchmarkEqualPortable64M(b *testing.B) { bmEqual(b, EqualPortable, 64<<20) }
func bmEqual(b *testing.B, equal func([]byte, []byte) bool, n int) {
if len(bmbuf) < 2*n {
bmbuf = make([]byte, 2*n)
}
b.SetBytes(int64(n))
buf1 := bmbuf[0:n]
buf2 := bmbuf[n : 2*n]
buf1[n-1] = 'x'
buf2[n-1] = 'x'
for i := 0; i < b.N; i++ {
eq := equal(buf1, buf2)
if !eq {
panic("bad equal")
}
}
buf1[n-1] = '\x00'
buf2[n-1] = '\x00'
} }
func BenchmarkIndexBytePortable64M(b *testing.B) { func BenchmarkIndex32(b *testing.B) { bmIndex(b, Index, 32) }
bmIndex(b, IndexBytePortable, 64<<20) func BenchmarkIndex4K(b *testing.B) { bmIndex(b, Index, 4<<10) }
func BenchmarkIndex4M(b *testing.B) { bmIndex(b, Index, 4<<20) }
func BenchmarkIndex64M(b *testing.B) { bmIndex(b, Index, 64<<20) }
func bmIndex(b *testing.B, index func([]byte, []byte) int, n int) {
if len(bmbuf) < n {
bmbuf = make([]byte, n)
}
b.SetBytes(int64(n))
buf := bmbuf[0:n]
buf[n-1] = 'x'
for i := 0; i < b.N; i++ {
j := index(buf, buf[n-7:])
if j != n-7 {
println("bad index", j)
panic("bad index")
}
}
buf[n-1] = '\x00'
} }
var bmbuf []byte func BenchmarkIndexEasy32(b *testing.B) { bmIndexEasy(b, Index, 32) }
func BenchmarkIndexEasy4K(b *testing.B) { bmIndexEasy(b, Index, 4<<10) }
func BenchmarkIndexEasy4M(b *testing.B) { bmIndexEasy(b, Index, 4<<20) }
func BenchmarkIndexEasy64M(b *testing.B) { bmIndexEasy(b, Index, 64<<20) }
func bmIndex(b *testing.B, index func([]byte, byte) int, n int) { func bmIndexEasy(b *testing.B, index func([]byte, []byte) int, n int) {
if len(bmbuf) < n { if len(bmbuf) < n {
bmbuf = make([]byte, n) bmbuf = make([]byte, n)
} }
b.SetBytes(int64(n)) b.SetBytes(int64(n))
buf := bmbuf[0:n] buf := bmbuf[0:n]
buf[n-1] = 'x' buf[n-1] = 'x'
buf[n-7] = 'x'
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
j := index(buf, 'x') j := index(buf, buf[n-7:])
if j != n-1 { if j != n-7 {
println("bad index", j) println("bad index", j)
panic("bad index") panic("bad index")
} }
} }
buf[n-1] = '0' buf[n-1] = '\x00'
buf[n-7] = '\x00'
}
func BenchmarkCount32(b *testing.B) { bmCount(b, Count, 32) }
func BenchmarkCount4K(b *testing.B) { bmCount(b, Count, 4<<10) }
func BenchmarkCount4M(b *testing.B) { bmCount(b, Count, 4<<20) }
func BenchmarkCount64M(b *testing.B) { bmCount(b, Count, 64<<20) }
func bmCount(b *testing.B, count func([]byte, []byte) int, n int) {
if len(bmbuf) < n {
bmbuf = make([]byte, n)
}
b.SetBytes(int64(n))
buf := bmbuf[0:n]
buf[n-1] = 'x'
for i := 0; i < b.N; i++ {
j := count(buf, buf[n-7:])
if j != 1 {
println("bad count", j)
panic("bad count")
}
}
buf[n-1] = '\x00'
}
func BenchmarkCountEasy32(b *testing.B) { bmCountEasy(b, Count, 32) }
func BenchmarkCountEasy4K(b *testing.B) { bmCountEasy(b, Count, 4<<10) }
func BenchmarkCountEasy4M(b *testing.B) { bmCountEasy(b, Count, 4<<20) }
func BenchmarkCountEasy64M(b *testing.B) { bmCountEasy(b, Count, 64<<20) }
func bmCountEasy(b *testing.B, count func([]byte, []byte) int, n int) {
if len(bmbuf) < n {
bmbuf = make([]byte, n)
}
b.SetBytes(int64(n))
buf := bmbuf[0:n]
buf[n-1] = 'x'
buf[n-7] = 'x'
for i := 0; i < b.N; i++ {
j := count(buf, buf[n-7:])
if j != 1 {
println("bad count", j)
panic("bad count")
}
}
buf[n-1] = '\x00'
buf[n-7] = '\x00'
} }
type ExplodeTest struct { type ExplodeTest struct {
......
...@@ -6,3 +6,4 @@ package bytes ...@@ -6,3 +6,4 @@ package bytes
// Export func for testing // Export func for testing
var IndexBytePortable = indexBytePortable var IndexBytePortable = indexBytePortable
var EqualPortable = equalPortable
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment