Commit 8cfed599 authored by Rob Pike's avatar Rob Pike

runtime: special-case small byte appends.

Update #3679.

BenchmarkAppend1Byte            484          199  -58.88%
BenchmarkAppend4Bytes           829          286  -65.50%
BenchmarkAppend8Bytes           484          365  -24.59%
BenchmarkAppend16Bytes          484          498   +2.89%
BenchmarkAppend32Bytes          486          484   -0.41%

R=iant, dave, rsc
CC=golang-dev
https://golang.org/cl/7443047
parent 0598114a
......@@ -19,6 +19,39 @@ func BenchmarkAppend(b *testing.B) {
}
}
func benchmarkAppendBytes(b *testing.B, length int) {
b.StopTimer()
x := make([]byte, 0, N)
y := make([]byte, length)
b.StartTimer()
for i := 0; i < b.N; i++ {
x = x[0:0]
for j := 0; j < N; j++ {
x = append(x, y...)
}
}
}
func BenchmarkAppend1Byte(b *testing.B) {
benchmarkAppendBytes(b, 1)
}
func BenchmarkAppend4Bytes(b *testing.B) {
benchmarkAppendBytes(b, 4)
}
func BenchmarkAppend8Bytes(b *testing.B) {
benchmarkAppendBytes(b, 8)
}
func BenchmarkAppend16Bytes(b *testing.B) {
benchmarkAppendBytes(b, 16)
}
func BenchmarkAppend32Bytes(b *testing.B) {
benchmarkAppendBytes(b, 32)
}
func BenchmarkAppendSpecialCase(b *testing.B) {
b.StopTimer()
x := make([]int, 0, N)
......@@ -50,3 +83,13 @@ func TestSideEffectOrder(t *testing.T) {
t.Error("append failed: ", x[0], x[1])
}
}
func TestAppendOverlap(t *testing.T) {
x := []byte("1234")
x = append(x[1:], x...) // p > q in runtime·appendslice.
got := string(x)
want := "2341234"
if got != want {
t.Errorf("overlap failed: got %q want %q", got, want)
}
}
......@@ -79,6 +79,7 @@ runtime·appendslice(SliceType *t, Slice x, Slice y, Slice ret)
intgo m;
uintptr w;
void *pc;
uint8 *p, *q;
m = x.len+y.len;
w = t->elem->size;
......@@ -104,7 +105,25 @@ runtime·appendslice(SliceType *t, Slice x, Slice y, Slice ret)
runtime·racewriterangepc(ret.array+ret.len*w, y.len*w, w, pc, runtime·appendslice);
}
runtime·memmove(ret.array + ret.len*w, y.array, y.len*w);
// A very common case is appending bytes. Small appends can avoid the overhead of memmove.
// We can generalize a bit here, and just pick small-sized appends.
p = ret.array+ret.len*w;
q = y.array;
w *= y.len;
// TODO: make 16 an architecture-dependent constant.
if(w <= 16) { // 16 empirically tested as approximate crossover on amd64.
if(p <= q || w <= p-q) // No overlap.
while(w-- > 0)
*p++ = *q++;
else {
p += w;
q += w;
while(w-- > 0)
*--p = *--q;
}
} else {
runtime·memmove(p, q, w);
}
ret.len += y.len;
FLUSH(&ret);
}
......@@ -121,7 +140,7 @@ runtime·appendstr(SliceType *t, Slice x, String y, Slice ret)
m = x.len+y.len;
if(m < x.len)
runtime·throw("append: slice overflow");
runtime·throw("append: string overflow");
if(m > x.cap)
growslice1(t, x, m, &ret);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment