Commit ee349b5d authored by Dave Cheney's avatar Dave Cheney

runtime: add arm64 runtime.cmpstring and bytes.Compare

Add arm64 assembly implementation of runtime.cmpstring and bytes.Compare.

benchmark                                old ns/op     new ns/op     delta
BenchmarkCompareBytesEqual               98.0          27.5          -71.94%
BenchmarkCompareBytesToNil               9.38          10.0          +6.61%
BenchmarkCompareBytesEmpty               13.3          10.0          -24.81%
BenchmarkCompareBytesIdentical           98.0          27.5          -71.94%
BenchmarkCompareBytesSameLength          43.3          16.3          -62.36%
BenchmarkCompareBytesDifferentLength     43.4          16.3          -62.44%
BenchmarkCompareBytesBigUnaligned        6979680       1360979       -80.50%
BenchmarkCompareBytesBig                 6915995       1381979       -80.02%
BenchmarkCompareBytesBigIdentical        6781440       1327304       -80.43%

benchmark                             old MB/s     new MB/s     speedup
BenchmarkCompareBytesBigUnaligned     150.23       770.46       5.13x
BenchmarkCompareBytesBig              151.62       758.76       5.00x
BenchmarkCompareBytesBigIdentical     154.63       790.01       5.11x

* note, the machine we are benchmarking on has some issues. What is clear is
compared to a few days ago the old MB/s value has increased from ~115 to 150.
I'm less certain about the new MB/s number, which used to be close to 1Gb/s.

Change-Id: I4f31b2c7a06296e13912aacc958525632cb0450d
Reviewed-on: https://go-review.googlesource.com/8541Reviewed-by: 's avatarAram Hăvărneanu <aram@mgk.ro>
Reviewed-by: 's avatarDavid Crawshaw <crawshaw@golang.org>
parent b9ba4ed2
......@@ -801,6 +801,56 @@ eq:
MOVB R3, ret+16(FP)
RET
TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
MOVD s1_base+0(FP), R2
MOVD s1_len+8(FP), R0
MOVD s2_base+16(FP), R3
MOVD s2_len+24(FP), R1
BL runtime·cmpbody(SB)
MOVD R8, ret+32(FP)
RET
TEXT bytes·Compare(SB),NOSPLIT,$0-56
MOVD s1+0(FP), R2
MOVD s1+8(FP), R0
MOVD s2+24(FP), R3
MOVD s2+32(FP), R1
BL runtime·cmpbody(SB)
MOVD R8, ret+48(FP)
RET
// On entry:
// R0 is the length of s1
// R1 is the length of s2
// R2 points to the start of s1
// R3 points to the start of s2
//
// On exit:
// R8 is -1/0/+1
// R5, R4, and R6 are clobbered
TEXT runtime·cmpbody<>(SB),NOSPLIT,$-4-0
CMP R0, R1
CSEL LT, R1, R0, R6 // R6 is min(R0, R1)
ADD R2, R6 // R2 is current byte in s1, R6 is last byte in s1 to compare
loop:
CMP R2, R6
BEQ samebytes // all compared bytes were the same; compare lengths
MOVBU.P 1(R2), R4
MOVBU.P 1(R3), R5
CMP R4, R5
BEQ loop
// bytes differed
MOVD $1, R8
CSNEG LT, R8, R8, R8
RET
samebytes:
MOVD $1, R8
CMP R0, R1
CSNEG LT, R8, R8, R8
CSEL EQ, ZR, R8, R8
RET
// eqstring tests whether two strings are equal.
// The compiler guarantees that strings passed
// to eqstring have equal length.
......
......@@ -2,9 +2,9 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Routines that are implemented in assembly in asm_{amd64,386,arm}.s
// Routines that are implemented in assembly in asm_{amd64,386,arm,arm64}.s
// +build arm64 ppc64 ppc64le
// +build ppc64 ppc64le
package runtime
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment