Commit 32add8d7 authored by Uttam C Pawar's avatar Uttam C Pawar Committed by Keith Randall

bytes: improve Compare function on amd64 for large byte arrays

This patch contains only loop unrolling change for size > 63B

Following are the performance numbers for various sizes on
On Haswell based system: Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz.

benchcmp go.head.8.25.15.txt go.head.8.25.15.opt.txt
benchmark                       old ns/op     new ns/op     delta
BenchmarkBytesCompare1-4        5.37          5.37          +0.00%
BenchmarkBytesCompare2-4        5.37          5.38          +0.19%
BenchmarkBytesCompare4-4        5.37          5.37          +0.00%
BenchmarkBytesCompare8-4        4.42          4.38          -0.90%
BenchmarkBytesCompare16-4       4.27          4.45          +4.22%
BenchmarkBytesCompare32-4       5.30          5.36          +1.13%
BenchmarkBytesCompare64-4       6.93          6.78          -2.16%
BenchmarkBytesCompare128-4      10.3          9.50          -7.77%
BenchmarkBytesCompare256-4      17.1          13.8          -19.30%
BenchmarkBytesCompare512-4      31.3          22.1          -29.39%
BenchmarkBytesCompare1024-4     62.5          39.0          -37.60%
BenchmarkBytesCompare2048-4     112           73.2          -34.64%

Change-Id: I4eeb1c22732fd62cbac97ba757b0d29f648d4ef1
Reviewed-on: https://go-review.googlesource.com/11871Reviewed-by: 's avatarKeith Randall <khr@golang.org>
parent abab21b1
......@@ -1255,3 +1255,34 @@ func BenchmarkRepeat(b *testing.B) {
Repeat([]byte("-"), 80)
}
}
func benchmarkBytesCompare(b *testing.B, n int) {
var x = make([]byte, n)
var y = make([]byte, n)
for i := 0; i < n; i++ {
x[i] = 'a'
}
for i := 0; i < n; i++ {
y[i] = 'a'
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
Compare(x, y)
}
}
func BenchmarkBytesCompare1(b *testing.B) { benchmarkBytesCompare(b, 1) }
func BenchmarkBytesCompare2(b *testing.B) { benchmarkBytesCompare(b, 2) }
func BenchmarkBytesCompare4(b *testing.B) { benchmarkBytesCompare(b, 4) }
func BenchmarkBytesCompare8(b *testing.B) { benchmarkBytesCompare(b, 8) }
func BenchmarkBytesCompare16(b *testing.B) { benchmarkBytesCompare(b, 16) }
func BenchmarkBytesCompare32(b *testing.B) { benchmarkBytesCompare(b, 32) }
func BenchmarkBytesCompare64(b *testing.B) { benchmarkBytesCompare(b, 64) }
func BenchmarkBytesCompare128(b *testing.B) { benchmarkBytesCompare(b, 128) }
func BenchmarkBytesCompare256(b *testing.B) { benchmarkBytesCompare(b, 256) }
func BenchmarkBytesCompare512(b *testing.B) { benchmarkBytesCompare(b, 512) }
func BenchmarkBytesCompare1024(b *testing.B) { benchmarkBytesCompare(b, 1024) }
func BenchmarkBytesCompare2048(b *testing.B) { benchmarkBytesCompare(b, 2048) }
......@@ -1445,6 +1445,8 @@ TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
CMPQ R8, $8
JB small
CMPQ R8, $63
JA big_loop
loop:
CMPQ R8, $16
JBE _0through16
......@@ -1459,6 +1461,17 @@ loop:
SUBQ $16, R8
JMP loop
diff64:
ADDQ $48, SI
ADDQ $48, DI
JMP diff16
diff48:
ADDQ $32, SI
ADDQ $32, DI
JMP diff16
diff32:
ADDQ $16, SI
ADDQ $16, DI
// AX = bit mask of differences
diff16:
BSFQ AX, BX // index of first byte that differs
......@@ -1545,6 +1558,43 @@ allsame:
MOVQ AX, (R9)
RET
// this works for >= 64 bytes of data.
big_loop:
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff16
MOVOU 16(SI), X0
MOVOU 16(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff32
MOVOU 32(SI), X0
MOVOU 32(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff48
MOVOU 48(SI), X0
MOVOU 48(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff64
ADDQ $64, SI
ADDQ $64, DI
SUBQ $64, R8
CMPQ R8, $64
JBE loop
JMP big_loop
TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), BX
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment