Commit f235d5d8 authored by Rob Pike's avatar Rob Pike

runtime: special-case append([]byte, string) for small strings

Also make the crossover point an architecture-dependent constant,
although it's the same everywhere for now.

BenchmarkAppendStr1Byte            416          145  -65.14%
BenchmarkAppendStr4Bytes           743          217  -70.79%
BenchmarkAppendStr8Bytes           421          270  -35.87%
BenchmarkAppendStr16Bytes          415          403   -2.89%
BenchmarkAppendStr32Bytes          415          391   -5.78%

R=golang-dev, iant
CC=golang-dev
https://golang.org/cl/7459044
parent 8cfed599
...@@ -52,6 +52,38 @@ func BenchmarkAppend32Bytes(b *testing.B) { ...@@ -52,6 +52,38 @@ func BenchmarkAppend32Bytes(b *testing.B) {
benchmarkAppendBytes(b, 32) benchmarkAppendBytes(b, 32)
} }
func benchmarkAppendStr(b *testing.B, str string) {
b.StopTimer()
x := make([]byte, 0, N)
b.StartTimer()
for i := 0; i < b.N; i++ {
x = x[0:0]
for j := 0; j < N; j++ {
x = append(x, str...)
}
}
}
func BenchmarkAppendStr1Byte(b *testing.B) {
benchmarkAppendStr(b, "1")
}
func BenchmarkAppendStr4Bytes(b *testing.B) {
benchmarkAppendStr(b, "1234")
}
func BenchmarkAppendStr8Bytes(b *testing.B) {
benchmarkAppendStr(b, "12345678")
}
func BenchmarkAppendStr16Bytes(b *testing.B) {
benchmarkAppendStr(b, "1234567890123456")
}
func BenchmarkAppendStr32Bytes(b *testing.B) {
benchmarkAppendStr(b, "12345678901234567890123456789012")
}
func BenchmarkAppendSpecialCase(b *testing.B) { func BenchmarkAppendSpecialCase(b *testing.B) {
b.StopTimer() b.StopTimer()
x := make([]int, 0, N) x := make([]int, 0, N)
......
enum { enum {
thechar = '8', thechar = '8',
BigEndian = 0, BigEndian = 0,
CacheLineSize = 64 CacheLineSize = 64,
appendCrossover = 16
}; };
enum { enum {
thechar = '6', thechar = '6',
BigEndian = 0, BigEndian = 0,
CacheLineSize = 64 CacheLineSize = 64,
appendCrossover = 16
}; };
enum { enum {
thechar = '5', thechar = '5',
BigEndian = 0, BigEndian = 0,
CacheLineSize = 32 CacheLineSize = 32,
appendCrossover = 16
}; };
...@@ -110,8 +110,7 @@ runtime·appendslice(SliceType *t, Slice x, Slice y, Slice ret) ...@@ -110,8 +110,7 @@ runtime·appendslice(SliceType *t, Slice x, Slice y, Slice ret)
p = ret.array+ret.len*w; p = ret.array+ret.len*w;
q = y.array; q = y.array;
w *= y.len; w *= y.len;
// TODO: make 16 an architecture-dependent constant. if(w <= appendCrossover) {
if(w <= 16) { // 16 empirically tested as approximate crossover on amd64.
if(p <= q || w <= p-q) // No overlap. if(p <= q || w <= p-q) // No overlap.
while(w-- > 0) while(w-- > 0)
*p++ = *q++; *p++ = *q++;
...@@ -136,6 +135,8 @@ runtime·appendstr(SliceType *t, Slice x, String y, Slice ret) ...@@ -136,6 +135,8 @@ runtime·appendstr(SliceType *t, Slice x, String y, Slice ret)
{ {
intgo m; intgo m;
void *pc; void *pc;
uintptr w;
uint8 *p, *q;
m = x.len+y.len; m = x.len+y.len;
...@@ -158,7 +159,16 @@ runtime·appendstr(SliceType *t, Slice x, String y, Slice ret) ...@@ -158,7 +159,16 @@ runtime·appendstr(SliceType *t, Slice x, String y, Slice ret)
runtime·racewriterangepc(ret.array+ret.len, y.len, 1, pc, runtime·appendstr); runtime·racewriterangepc(ret.array+ret.len, y.len, 1, pc, runtime·appendstr);
} }
runtime·memmove(ret.array + ret.len, y.str, y.len); // Small appends can avoid the overhead of memmove.
w = y.len;
p = ret.array+ret.len;
q = y.str;
if(w <= appendCrossover) {
while(w-- > 0)
*p++ = *q++;
} else {
runtime·memmove(p, q, w);
}
ret.len += y.len; ret.len += y.len;
FLUSH(&ret); FLUSH(&ret);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment