Commit fd22542e authored by Ilya Tocar's avatar Ilya Tocar Committed by Brad Fitzpatrick

strings: speed-up replace for byteStringReplacer case

Use Count instead of loop to determine a number of replacements.
Also increment index instead of advancing slices, to avoid some extra stores.
Shows very significant speed-up on html benchmarks:

Escape-6          34.2µs ± 2%  20.8µs ± 2%  -39.06%  (p=0.000 n=10+10)
EscapeNone-6      7.04µs ± 1%  1.05µs ± 0%  -85.03%  (p=0.000 n=10+10)

On benchmarks in package strings results are still significant:

ByteStringMatch-6    1.59µs ± 2%    1.17µs ± 2%  -26.35%  (p=0.000 n=10+10)
HTMLEscapeNew-6       390ns ± 2%     337ns ± 2%  -13.62%  (p=0.000 n=10+10)
HTMLEscapeOld-6       621ns ± 2%     603ns ± 2%   -2.95%  (p=0.000 n=10+9)

Change-Id: Ibea3235b6e451ba72cd5db57716d17b917e72944
Reviewed-on: https://go-review.googlesource.com/97255
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarBrad Fitzpatrick <bradfitz@golang.org>
parent c0841ecd
......@@ -54,13 +54,21 @@ func NewReplacer(oldnew ...string) *Replacer {
return &Replacer{r: &r}
}
r := byteStringReplacer{}
r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)}
// The first occurrence of old->new map takes precedence
// over the others with the same old string.
for i := len(oldnew) - 2; i >= 0; i -= 2 {
o := oldnew[i][0]
n := oldnew[i+1]
r[o] = []byte(n)
// To avoid counting repetitions multiple times.
if r.replacements[o] == nil {
// We need to use string([]byte{o}) instead of string(o),
// to avoid utf8 encoding of o.
// E. g. byte(150) produces string of length 2.
r.toReplace = append(r.toReplace, string([]byte{o}))
}
r.replacements[o] = []byte(n)
}
return &Replacer{r: &r}
}
......@@ -454,34 +462,60 @@ func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
// byteStringReplacer is the implementation that's used when all the
// "old" values are single ASCII bytes but the "new" values vary in size.
// The array contains replacement byte slices indexed by old byte.
// A nil []byte means that the old byte should not be replaced.
type byteStringReplacer [256][]byte
type byteStringReplacer struct {
// replacements contains replacement byte slices indexed by old byte.
// A nil []byte means that the old byte should not be replaced.
replacements [256][]byte
// toReplace keeps a list of bytes to replace. Depending on length of toReplace
// and length of target string it may be faster to use Count, or a plain loop.
// We store single byte as a string, because Count takes a string.
toReplace []string
}
// countCutOff controls the ratio of a string length to a number of replacements
// at which (*byteStringReplacer).Replace switches algorithms.
// For strings with higher ration of length to replacements than that value,
// we call Count, for each replacement from toReplace.
// For strings, with a lower ratio we use simple loop, because of Count overhead.
// countCutOff is an empirically determined overhead multiplier.
// TODO(tocarip) revisit once we have register-based abi/mid-stack inlining.
const countCutOff = 8
func (r *byteStringReplacer) Replace(s string) string {
newSize := len(s)
anyChanges := false
for i := 0; i < len(s); i++ {
b := s[i]
if r[b] != nil {
anyChanges = true
// The -1 is because we are replacing 1 byte with len(r[b]) bytes.
newSize += len(r[b]) - 1
// Is it faster to use Count?
if len(r.toReplace)*countCutOff <= len(s) {
for _, x := range r.toReplace {
if c := Count(s, x); c != 0 {
// The -1 is because we are replacing 1 byte with len(replacements[b]) bytes.
newSize += c * (len(r.replacements[x[0]]) - 1)
anyChanges = true
}
}
} else {
for i := 0; i < len(s); i++ {
b := s[i]
if r.replacements[b] != nil {
// See above for explanation of -1
newSize += len(r.replacements[b]) - 1
anyChanges = true
}
}
}
if !anyChanges {
return s
}
buf := make([]byte, newSize)
bi := buf
j := 0
for i := 0; i < len(s); i++ {
b := s[i]
if r[b] != nil {
n := copy(bi, r[b])
bi = bi[n:]
if r.replacements[b] != nil {
j += copy(buf[j:], r.replacements[b])
} else {
bi[0] = b
bi = bi[1:]
buf[j] = b
j++
}
}
return string(buf)
......@@ -492,7 +526,7 @@ func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err erro
last := 0
for i := 0; i < len(s); i++ {
b := s[i]
if r[b] == nil {
if r.replacements[b] == nil {
continue
}
if last != i {
......@@ -503,7 +537,7 @@ func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err erro
}
}
last = i + 1
nw, err := w.Write(r[b])
nw, err := w.Write(r.replacements[b])
n += nw
if err != nil {
return n, err
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment