Commit 7df29b50 authored by Martin Möhrmann's avatar Martin Möhrmann

bytes: speed up Fields and FieldsFunc

Applies the optimizations from golang.org/cl/42810 and golang.org/cl/37959
done to the strings package to the bytes package.

name                      old time/op    new time/op     delta
Fields/ASCII/16              417ns ± 4%      118ns ± 3%    -71.65%  (p=0.000 n=10+10)
Fields/ASCII/256            5.95µs ± 3%     0.88µs ± 0%    -85.23%  (p=0.000 n=10+7)
Fields/ASCII/4096           92.3µs ± 1%     12.8µs ± 2%    -86.13%  (p=0.000 n=10+10)
Fields/ASCII/65536          1.49ms ± 1%     0.25ms ± 1%    -83.14%  (p=0.000 n=10+10)
Fields/ASCII/1048576        25.0ms ± 1%      6.5ms ± 2%    -74.04%  (p=0.000 n=10+10)
Fields/Mixed/16              406ns ± 1%      222ns ± 1%    -45.24%  (p=0.000 n=10+9)
Fields/Mixed/256            5.78µs ± 1%     2.27µs ± 1%    -60.73%  (p=0.000 n=9+10)
Fields/Mixed/4096           97.9µs ± 1%     40.5µs ± 3%    -58.66%  (p=0.000 n=10+10)
Fields/Mixed/65536          1.58ms ± 1%     0.69ms ± 1%    -56.58%  (p=0.000 n=10+10)
Fields/Mixed/1048576        26.6ms ± 1%     12.6ms ± 2%    -52.44%  (p=0.000 n=9+10)
FieldsFunc/ASCII/16          395ns ± 1%      188ns ± 1%    -52.34%  (p=0.000 n=10+10)
FieldsFunc/ASCII/256        5.90µs ± 1%     2.00µs ± 1%    -66.06%  (p=0.000 n=10+10)
FieldsFunc/ASCII/4096       92.5µs ± 1%     33.0µs ± 1%    -64.34%  (p=0.000 n=10+9)
FieldsFunc/ASCII/65536      1.48ms ± 1%     0.54ms ± 1%    -63.38%  (p=0.000 n=10+9)
FieldsFunc/ASCII/1048576    25.1ms ± 1%     10.5ms ± 3%    -58.24%  (p=0.000 n=10+10)
FieldsFunc/Mixed/16          401ns ± 1%      205ns ± 2%    -48.87%  (p=0.000 n=10+10)
FieldsFunc/Mixed/256        5.70µs ± 1%     1.98µs ± 1%    -65.28%  (p=0.000 n=10+10)
FieldsFunc/Mixed/4096       97.5µs ± 1%     35.4µs ± 1%    -63.65%  (p=0.000 n=10+10)
FieldsFunc/Mixed/65536      1.57ms ± 1%     0.61ms ± 1%    -61.20%  (p=0.000 n=10+10)
FieldsFunc/Mixed/1048576    26.5ms ± 1%     11.4ms ± 2%    -56.84%  (p=0.000 n=10+10)

name                      old speed      new speed       delta
Fields/ASCII/16           38.4MB/s ± 4%  134.9MB/s ± 3%   +251.55%  (p=0.000 n=10+10)
Fields/ASCII/256          43.0MB/s ± 3%  290.6MB/s ± 1%   +575.97%  (p=0.000 n=10+8)
Fields/ASCII/4096         44.4MB/s ± 1%  320.0MB/s ± 2%   +620.90%  (p=0.000 n=10+10)
Fields/ASCII/65536        44.0MB/s ± 1%  260.7MB/s ± 1%   +493.15%  (p=0.000 n=10+10)
Fields/ASCII/1048576      42.0MB/s ± 1%  161.6MB/s ± 2%   +285.21%  (p=0.000 n=10+10)
Fields/Mixed/16           39.4MB/s ± 1%   71.7MB/s ± 1%    +82.20%  (p=0.000 n=10+10)
Fields/Mixed/256          44.3MB/s ± 1%  112.8MB/s ± 1%   +154.64%  (p=0.000 n=9+10)
Fields/Mixed/4096         41.9MB/s ± 1%  101.2MB/s ± 3%   +141.92%  (p=0.000 n=10+10)
Fields/Mixed/65536        41.5MB/s ± 1%   95.5MB/s ± 1%   +130.29%  (p=0.000 n=10+10)
Fields/Mixed/1048576      39.4MB/s ± 1%   82.9MB/s ± 2%   +110.28%  (p=0.000 n=9+10)
FieldsFunc/ASCII/16       40.5MB/s ± 1%   84.9MB/s ± 2%   +109.80%  (p=0.000 n=10+10)
FieldsFunc/ASCII/256      43.4MB/s ± 1%  127.9MB/s ± 1%   +194.58%  (p=0.000 n=10+10)
FieldsFunc/ASCII/4096     44.3MB/s ± 1%  124.2MB/s ± 1%   +180.44%  (p=0.000 n=10+9)
FieldsFunc/ASCII/65536    44.2MB/s ± 1%  120.6MB/s ± 1%   +173.06%  (p=0.000 n=10+9)
FieldsFunc/ASCII/1048576  41.8MB/s ± 1%  100.2MB/s ± 3%   +139.53%  (p=0.000 n=10+10)
FieldsFunc/Mixed/16       39.8MB/s ± 1%   77.8MB/s ± 2%    +95.46%  (p=0.000 n=10+10)
FieldsFunc/Mixed/256      44.9MB/s ± 1%  129.4MB/s ± 1%   +187.97%  (p=0.000 n=10+10)
FieldsFunc/Mixed/4096     42.0MB/s ± 1%  115.6MB/s ± 1%   +175.08%  (p=0.000 n=10+10)
FieldsFunc/Mixed/65536    41.6MB/s ± 1%  107.3MB/s ± 1%   +157.75%  (p=0.000 n=10+10)
FieldsFunc/Mixed/1048576  39.6MB/s ± 1%   91.8MB/s ± 2%   +131.72%  (p=0.000 n=10+10)

name                      old alloc/op   new alloc/op    delta
Fields/ASCII/16              80.0B ± 0%      80.0B ± 0%       ~     (all equal)
Fields/ASCII/256              768B ± 0%       768B ± 0%       ~     (all equal)
Fields/ASCII/4096           9.47kB ± 0%     9.47kB ± 0%       ~     (all equal)
Fields/ASCII/65536           147kB ± 0%      147kB ± 0%       ~     (all equal)
Fields/ASCII/1048576        2.27MB ± 0%     2.27MB ± 0%       ~     (all equal)
Fields/Mixed/16              96.0B ± 0%      96.0B ± 0%       ~     (all equal)
Fields/Mixed/256              768B ± 0%       768B ± 0%       ~     (all equal)
Fields/Mixed/4096           9.47kB ± 0%    24.83kB ± 0%   +162.16%  (p=0.000 n=10+10)
Fields/Mixed/65536           147kB ± 0%      497kB ± 0%   +237.24%  (p=0.000 n=10+10)
Fields/Mixed/1048576        2.26MB ± 0%     9.61MB ± 0%   +324.89%  (p=0.000 n=10+10)
FieldsFunc/ASCII/16          80.0B ± 0%      80.0B ± 0%       ~     (all equal)
FieldsFunc/ASCII/256          768B ± 0%       768B ± 0%       ~     (all equal)
FieldsFunc/ASCII/4096       9.47kB ± 0%    24.83kB ± 0%   +162.16%  (p=0.000 n=10+10)
FieldsFunc/ASCII/65536       147kB ± 0%      497kB ± 0%   +237.24%  (p=0.000 n=10+10)
FieldsFunc/ASCII/1048576    2.27MB ± 0%     9.61MB ± 0%   +323.72%  (p=0.000 n=10+10)
FieldsFunc/Mixed/16          96.0B ± 0%      96.0B ± 0%       ~     (all equal)
FieldsFunc/Mixed/256          768B ± 0%       768B ± 0%       ~     (all equal)
FieldsFunc/Mixed/4096       9.47kB ± 0%    24.83kB ± 0%   +162.16%  (p=0.000 n=10+10)
FieldsFunc/Mixed/65536       147kB ± 0%      497kB ± 0%   +237.24%  (p=0.000 n=10+10)
FieldsFunc/Mixed/1048576    2.26MB ± 0%     9.61MB ± 0%   +324.89%  (p=0.000 n=10+10)

name                      old allocs/op  new allocs/op   delta
Fields/ASCII/16               1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/ASCII/256              1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/ASCII/4096             1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/ASCII/65536            1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/ASCII/1048576          1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/Mixed/16               1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/Mixed/256              1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/Mixed/4096             1.00 ± 0%       5.00 ± 0%   +400.00%  (p=0.000 n=10+10)
Fields/Mixed/65536            1.00 ± 0%      12.00 ± 0%  +1100.00%  (p=0.000 n=10+10)
Fields/Mixed/1048576          1.00 ± 0%      24.00 ± 0%  +2300.00%  (p=0.000 n=10+10)
FieldsFunc/ASCII/16           1.00 ± 0%       1.00 ± 0%       ~     (all equal)
FieldsFunc/ASCII/256          1.00 ± 0%       1.00 ± 0%       ~     (all equal)
FieldsFunc/ASCII/4096         1.00 ± 0%       5.00 ± 0%   +400.00%  (p=0.000 n=10+10)
FieldsFunc/ASCII/65536        1.00 ± 0%      12.00 ± 0%  +1100.00%  (p=0.000 n=10+10)
FieldsFunc/ASCII/1048576      1.00 ± 0%      24.00 ± 0%  +2300.00%  (p=0.000 n=10+10)
FieldsFunc/Mixed/16           1.00 ± 0%       1.00 ± 0%       ~     (all equal)
FieldsFunc/Mixed/256          1.00 ± 0%       1.00 ± 0%       ~     (all equal)
FieldsFunc/Mixed/4096         1.00 ± 0%       5.00 ± 0%   +400.00%  (p=0.000 n=10+10)
FieldsFunc/Mixed/65536        1.00 ± 0%      12.00 ± 0%  +1100.00%  (p=0.000 n=10+10)
FieldsFunc/Mixed/1048576      1.00 ± 0%      24.00 ± 0%  +2300.00%  (p=0.000 n=10+10)

Change-Id: If1926782decc2f60d3b4b8c41c2ce7d8bdedfd8f
Reviewed-on: https://go-review.googlesource.com/55131
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarJoe Tsai <thebrokentoaster@gmail.com>
parent dd6880d6
......@@ -265,9 +265,57 @@ func SplitAfter(s, sep []byte) [][]byte {
return genSplit(s, sep, len(sep), -1)
}
var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
// Fields splits the slice s around each instance of one or more consecutive white space
// characters, returning a slice of subslices of s or an empty list if s contains only white space.
// characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
// empty slice if s contains only white space.
func Fields(s []byte) [][]byte {
// First count the fields.
// This is an exact count if s is ASCII, otherwise it is an approximation.
n := 0
wasSpace := 1
// setBits is used to track which bits are set in the bytes of s.
setBits := uint8(0)
for i := 0; i < len(s); i++ {
r := s[i]
setBits |= r
isSpace := int(asciiSpace[r])
n += wasSpace & ^isSpace
wasSpace = isSpace
}
if setBits < utf8.RuneSelf { // ASCII fast path
a := make([][]byte, n)
na := 0
fieldStart := 0
i := 0
// Skip spaces in the front of the input.
for i < len(s) && asciiSpace[s[i]] != 0 {
i++
}
fieldStart = i
for i < len(s) {
if asciiSpace[s[i]] == 0 {
i++
continue
}
a[na] = s[fieldStart:i]
na++
i++
// Skip spaces in between fields.
for i < len(s) && asciiSpace[s[i]] != 0 {
i++
}
fieldStart = i
}
if fieldStart < len(s) { // Last field might end at EOF.
a[na] = s[fieldStart:]
}
return a
}
// Some runes in the input slice are not ASCII.
return FieldsFunc(s, unicode.IsSpace)
}
......@@ -278,39 +326,49 @@ func Fields(s []byte) [][]byte {
// FieldsFunc makes no guarantees about the order in which it calls f(c).
// If f does not return consistent results for a given c, FieldsFunc may crash.
func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
n := 0
inField := false
for i := 0; i < len(s); {
r, size := utf8.DecodeRune(s[i:])
wasInField := inField
inField = !f(r)
if inField && !wasInField {
n++
}
i += size
// A span is used to record a slice of s of the form s[start:end].
// The start index is inclusive and the end index is exclusive.
type span struct {
start int
end int
}
spans := make([]span, 0, 32)
a := make([][]byte, n)
na := 0
fieldStart := -1
for i := 0; i <= len(s) && na < n; {
r, size := utf8.DecodeRune(s[i:])
if fieldStart < 0 && size > 0 && !f(r) {
fieldStart = i
i += size
continue
}
if fieldStart >= 0 && (size == 0 || f(r)) {
a[na] = s[fieldStart:i]
na++
fieldStart = -1
// Find the field start and end indices.
wasField := false
fromIndex := 0
for i := 0; i < len(s); {
size := 1
r := rune(s[i])
if r >= utf8.RuneSelf {
r, size = utf8.DecodeRune(s[i:])
}
if size == 0 {
break
if f(r) {
if wasField {
spans = append(spans, span{start: fromIndex, end: i})
wasField = false
}
} else {
if !wasField {
fromIndex = i
wasField = true
}
}
i += size
}
return a[0:na]
// Last field might end at EOF.
if wasField {
spans = append(spans, span{fromIndex, len(s)})
}
// Create subslices from recorded field indices.
a := make([][]byte, len(spans))
for i, span := range spans {
a[i] = s[span.start:span.end]
}
return a
}
// Join concatenates the elements of s to create a new byte slice. The separator
......
......@@ -1502,19 +1502,58 @@ var makeFieldsInput = func() []byte {
return x
}
var fieldsInput = makeFieldsInput()
var makeFieldsInputASCII = func() []byte {
x := make([]byte, 1<<20)
// Input is ~10% space, rest ASCII non-space.
for i := range x {
if rand.Intn(10) == 0 {
x[i] = ' '
} else {
x[i] = 'x'
}
}
return x
}
var bytesdata = []struct {
name string
data []byte
}{
{"ASCII", makeFieldsInputASCII()},
{"Mixed", makeFieldsInput()},
}
func BenchmarkFields(b *testing.B) {
b.SetBytes(int64(len(fieldsInput)))
for i := 0; i < b.N; i++ {
Fields(fieldsInput)
for _, sd := range bytesdata {
b.Run(sd.name, func(b *testing.B) {
for j := 1 << 4; j <= 1<<20; j <<= 4 {
b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
b.ReportAllocs()
b.SetBytes(int64(j))
data := sd.data[:j]
for i := 0; i < b.N; i++ {
Fields(data)
}
})
}
})
}
}
func BenchmarkFieldsFunc(b *testing.B) {
b.SetBytes(int64(len(fieldsInput)))
for i := 0; i < b.N; i++ {
FieldsFunc(fieldsInput, unicode.IsSpace)
for _, sd := range bytesdata {
b.Run(sd.name, func(b *testing.B) {
for j := 1 << 4; j <= 1<<20; j <<= 4 {
b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
b.ReportAllocs()
b.SetBytes(int64(j))
data := sd.data[:j]
for i := 0; i < b.N; i++ {
FieldsFunc(data, unicode.IsSpace)
}
})
}
})
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment