Commit f6332bb8 authored by Keith Randall's avatar Keith Randall

internal/bytealg: move compare functions to bytealg

Move bytes.Compare and runtime·cmpstring to bytealg.

Update #19792

Change-Id: I139e6d7c59686bef7a3017e3dec99eba5fd10447
Reviewed-on: https://go-review.googlesource.com/98515
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 45964e4f
...@@ -14,11 +14,11 @@ func IndexByte(b []byte, c byte) int // in internal/bytealg ...@@ -14,11 +14,11 @@ func IndexByte(b []byte, c byte) int // in internal/bytealg
// Equal returns a boolean reporting whether a and b // Equal returns a boolean reporting whether a and b
// are the same length and contain the same bytes. // are the same length and contain the same bytes.
// A nil argument is equivalent to an empty slice. // A nil argument is equivalent to an empty slice.
func Equal(a, b []byte) bool // ../runtime/asm_$GOARCH.s func Equal(a, b []byte) bool // in internal/bytealg
//go:noescape //go:noescape
// Compare returns an integer comparing two byte slices lexicographically. // Compare returns an integer comparing two byte slices lexicographically.
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b. // The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
// A nil argument is equivalent to an empty slice. // A nil argument is equivalent to an empty slice.
func Compare(a, b []byte) int // ../runtime/noasm.go or ../runtime/asm_{386,amd64}.s func Compare(a, b []byte) int // in internal/bytealg
// 386-specific vet whitelist. See readme.txt for details. // 386-specific vet whitelist. See readme.txt for details.
runtime/asm_ARCHSUFF.s: [GOARCH] cannot check cross-package assembly function: Compare is in package bytes internal/bytealg/compare_386.s: [386] cannot check cross-package assembly function: Compare is in package bytes
internal/bytealg/compare_386.s: [386] cannot check cross-package assembly function: cmpstring is in package runtime
// startup code uses non-standard calling convention and intentionally // startup code uses non-standard calling convention and intentionally
// omits args. // omits args.
...@@ -15,7 +16,6 @@ runtime/asm_386.s: [386] morestack: use of 4(SP) points beyond argument frame ...@@ -15,7 +16,6 @@ runtime/asm_386.s: [386] morestack: use of 4(SP) points beyond argument frame
runtime/asm_386.s: [386] ldt0setup: function ldt0setup missing Go declaration runtime/asm_386.s: [386] ldt0setup: function ldt0setup missing Go declaration
runtime/asm_386.s: [386] emptyfunc: function emptyfunc missing Go declaration runtime/asm_386.s: [386] emptyfunc: function emptyfunc missing Go declaration
runtime/asm_386.s: [386] aeshashbody: function aeshashbody missing Go declaration runtime/asm_386.s: [386] aeshashbody: function aeshashbody missing Go declaration
runtime/asm_386.s: [386] cmpbody: function cmpbody missing Go declaration
runtime/asm_386.s: [386] addmoduledata: function addmoduledata missing Go declaration runtime/asm_386.s: [386] addmoduledata: function addmoduledata missing Go declaration
runtime/duff_386.s: [386] duffzero: function duffzero missing Go declaration runtime/duff_386.s: [386] duffzero: function duffzero missing Go declaration
runtime/duff_386.s: [386] duffcopy: function duffcopy missing Go declaration runtime/duff_386.s: [386] duffcopy: function duffcopy missing Go declaration
......
// amd64-specific vet whitelist. See readme.txt for details. // amd64-specific vet whitelist. See readme.txt for details.
internal/bytealg/compare_amd64.s: [amd64] cannot check cross-package assembly function: Compare is in package bytes
internal/bytealg/compare_amd64.s: [amd64] cannot check cross-package assembly function: cmpstring is in package runtime
// False positives. // False positives.
...@@ -11,7 +13,6 @@ runtime/asm_amd64.s: [amd64] morestack: use of 8(SP) points beyond argument fram ...@@ -11,7 +13,6 @@ runtime/asm_amd64.s: [amd64] morestack: use of 8(SP) points beyond argument fram
// Nothing much to do about cross-package assembly. Unfortunate. // Nothing much to do about cross-package assembly. Unfortunate.
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: indexShortStr is in package strings runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: indexShortStr is in package strings
runtime/asm_amd64.s: [GOARCH] cannot check cross-package assembly function: Compare is in package bytes
runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: indexShortStr is in package bytes runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: indexShortStr is in package bytes
// Intentionally missing declarations. These are special assembly routines. // Intentionally missing declarations. These are special assembly routines.
...@@ -20,7 +21,6 @@ runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: index ...@@ -20,7 +21,6 @@ runtime/asm_amd64.s: [amd64] cannot check cross-package assembly function: index
// Others use the platform ABI. // Others use the platform ABI.
// There is no sensible corresponding Go prototype. // There is no sensible corresponding Go prototype.
runtime/asm_amd64.s: [amd64] aeshashbody: function aeshashbody missing Go declaration runtime/asm_amd64.s: [amd64] aeshashbody: function aeshashbody missing Go declaration
runtime/asm_amd64.s: [amd64] cmpbody: function cmpbody missing Go declaration
runtime/asm_amd64.s: [amd64] addmoduledata: function addmoduledata missing Go declaration runtime/asm_amd64.s: [amd64] addmoduledata: function addmoduledata missing Go declaration
runtime/duff_amd64.s: [amd64] duffzero: function duffzero missing Go declaration runtime/duff_amd64.s: [amd64] duffzero: function duffzero missing Go declaration
runtime/duff_amd64.s: [amd64] duffcopy: function duffcopy missing Go declaration runtime/duff_amd64.s: [amd64] duffcopy: function duffcopy missing Go declaration
......
// arm-specific vet whitelist. See readme.txt for details. // arm-specific vet whitelist. See readme.txt for details.
runtime/asm_ARCHSUFF.s: [GOARCH] cannot check cross-package assembly function: Compare is in package bytes internal/bytealg/compare_arm.s: [arm] cannot check cross-package assembly function: Compare is in package bytes
internal/bytealg/compare_arm.s: [arm] cannot check cross-package assembly function: cmpstring is in package runtime
// Intentionally missing declarations. // Intentionally missing declarations.
runtime/asm_arm.s: [arm] emptyfunc: function emptyfunc missing Go declaration runtime/asm_arm.s: [arm] emptyfunc: function emptyfunc missing Go declaration
runtime/asm_arm.s: [arm] abort: function abort missing Go declaration runtime/asm_arm.s: [arm] abort: function abort missing Go declaration
runtime/asm_arm.s: [arm] armPublicationBarrier: function armPublicationBarrier missing Go declaration runtime/asm_arm.s: [arm] armPublicationBarrier: function armPublicationBarrier missing Go declaration
runtime/asm_arm.s: [arm] cmpbody: function cmpbody missing Go declaration
runtime/asm_arm.s: [arm] usplitR0: function usplitR0 missing Go declaration runtime/asm_arm.s: [arm] usplitR0: function usplitR0 missing Go declaration
runtime/asm_arm.s: [arm] addmoduledata: function addmoduledata missing Go declaration runtime/asm_arm.s: [arm] addmoduledata: function addmoduledata missing Go declaration
runtime/duff_arm.s: [arm] duffzero: function duffzero missing Go declaration runtime/duff_arm.s: [arm] duffzero: function duffzero missing Go declaration
......
// arm64-specific vet whitelist. See readme.txt for details. // arm64-specific vet whitelist. See readme.txt for details.
runtime/asm_ARCHSUFF.s: [GOARCH] cannot check cross-package assembly function: Compare is in package bytes internal/bytealg/compare_arm64.s: [arm64] cannot check cross-package assembly function: Compare is in package bytes
internal/bytealg/compare_arm64.s: [arm64] cannot check cross-package assembly function: cmpstring is in package runtime
// Intentionally missing declarations. // Intentionally missing declarations.
runtime/asm_arm64.s: [arm64] abort: function abort missing Go declaration runtime/asm_arm64.s: [arm64] abort: function abort missing Go declaration
......
// mips/mipsle-specific vet whitelist. See readme.txt for details. // mips/mipsle-specific vet whitelist. See readme.txt for details.
internal/bytealg/compare_mipsx.s: [GOARCH] cannot check cross-package assembly function: Compare is in package bytes
internal/bytealg/compare_mipsx.s: [GOARCH] cannot check cross-package assembly function: cmpstring is in package runtime
runtime/asm_mipsx.s: [GOARCH] abort: function abort missing Go declaration runtime/asm_mipsx.s: [GOARCH] abort: function abort missing Go declaration
runtime/tls_mipsx.s: [GOARCH] save_g: function save_g missing Go declaration runtime/tls_mipsx.s: [GOARCH] save_g: function save_g missing Go declaration
runtime/tls_mipsx.s: [GOARCH] load_g: function load_g missing Go declaration runtime/tls_mipsx.s: [GOARCH] load_g: function load_g missing Go declaration
runtime/asm_mipsx.s: [GOARCH] cannot check cross-package assembly function: Compare is in package bytes
runtime/sys_linux_mipsx.s: [GOARCH] clone: 12(R29) should be mp+8(FP) runtime/sys_linux_mipsx.s: [GOARCH] clone: 12(R29) should be mp+8(FP)
runtime/sys_linux_mipsx.s: [GOARCH] clone: 4(R29) should be flags+0(FP) runtime/sys_linux_mipsx.s: [GOARCH] clone: 4(R29) should be flags+0(FP)
runtime/sys_linux_mipsx.s: [GOARCH] clone: 8(R29) should be stk+4(FP) runtime/sys_linux_mipsx.s: [GOARCH] clone: 8(R29) should be stk+4(FP)
// nacl/amd64p32-specific vet whitelist. See readme.txt for details. // nacl/amd64p32-specific vet whitelist. See readme.txt for details.
internal/bytealg/compare_amd64p32.s: [amd64p32] cannot check cross-package assembly function: Compare is in package bytes
internal/bytealg/compare_amd64p32.s: [amd64p32] cannot check cross-package assembly function: cmpstring is in package runtime
// reflect trampolines intentionally omit arg size. Same for morestack. // reflect trampolines intentionally omit arg size. Same for morestack.
runtime/asm_amd64p32.s: [amd64p32] morestack: use of 8(SP) points beyond argument frame runtime/asm_amd64p32.s: [amd64p32] morestack: use of 8(SP) points beyond argument frame
runtime/asm_amd64p32.s: [amd64p32] morestack: use of 16(SP) points beyond argument frame runtime/asm_amd64p32.s: [amd64p32] morestack: use of 16(SP) points beyond argument frame
...@@ -20,8 +23,6 @@ runtime/sys_nacl_amd64p32.s: [amd64p32] settls: function settls missing Go decla ...@@ -20,8 +23,6 @@ runtime/sys_nacl_amd64p32.s: [amd64p32] settls: function settls missing Go decla
runtime/asm_amd64p32.s: [amd64p32] rt0_go: unknown variable argc runtime/asm_amd64p32.s: [amd64p32] rt0_go: unknown variable argc
runtime/asm_amd64p32.s: [amd64p32] rt0_go: unknown variable argv runtime/asm_amd64p32.s: [amd64p32] rt0_go: unknown variable argv
runtime/asm_amd64p32.s: [amd64p32] cannot check cross-package assembly function: Compare is in package bytes
runtime/asm_amd64p32.s: [amd64p32] cmpbody: function cmpbody missing Go declaration
runtime/asm_amd64p32.s: [amd64p32] asmcgocall: RET without writing to 4-byte ret+8(FP) runtime/asm_amd64p32.s: [amd64p32] asmcgocall: RET without writing to 4-byte ret+8(FP)
runtime/asm_amd64p32.s: [amd64p32] stackcheck: function stackcheck missing Go declaration runtime/asm_amd64p32.s: [amd64p32] stackcheck: function stackcheck missing Go declaration
// ppc64-specific vet whitelist. See readme.txt for details. // ppc64-specific vet whitelist. See readme.txt for details.
runtime/asm_ARCHSUFF.s: [GOARCH] cannot check cross-package assembly function: Compare is in package bytes internal/bytealg/compare_ppc64x.s: [GOARCH] cannot check cross-package assembly function: Compare is in package bytes
internal/bytealg/compare_ppc64x.s: [GOARCH] cannot check cross-package assembly function: cmpstring is in package runtime
runtime/asm_ppc64x.s: [GOARCH] reginit: function reginit missing Go declaration runtime/asm_ppc64x.s: [GOARCH] reginit: function reginit missing Go declaration
runtime/asm_ppc64x.s: [GOARCH] abort: function abort missing Go declaration runtime/asm_ppc64x.s: [GOARCH] abort: function abort missing Go declaration
......
runtime/asm_s390x.s: [s390x] abort: function abort missing Go declaration runtime/asm_s390x.s: [s390x] abort: function abort missing Go declaration
runtime/asm_s390x.s: [s390x] cannot check cross-package assembly function: Compare is in package bytes internal/bytealg/compare_s390x.s: [s390x] cannot check cross-package assembly function: Compare is in package bytes
runtime/asm_s390x.s: [s390x] cmpbody: function cmpbody missing Go declaration internal/bytealg/compare_s390x.s: [s390x] cannot check cross-package assembly function: cmpstring is in package runtime
runtime/asm_s390x.s: [s390x] cmpbodyclc: function cmpbodyclc missing Go declaration
runtime/asm_s390x.s: [s390x] cannot check cross-package assembly function: supportsVX is in package strings runtime/asm_s390x.s: [s390x] cannot check cross-package assembly function: supportsVX is in package strings
runtime/asm_s390x.s: [s390x] cannot check cross-package assembly function: supportsVX is in package bytes runtime/asm_s390x.s: [s390x] cannot check cross-package assembly function: supportsVX is in package bytes
runtime/asm_s390x.s: [s390x] cannot check cross-package assembly function: indexShortStr is in package strings runtime/asm_s390x.s: [s390x] cannot check cross-package assembly function: indexShortStr is in package strings
......
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT,$0-28
MOVL a_base+0(FP), SI
MOVL a_len+4(FP), BX
MOVL b_base+12(FP), DI
MOVL b_len+16(FP), DX
LEAL ret+24(FP), AX
JMP cmpbody<>(SB)
TEXT bytes·Compare(SB),NOSPLIT,$0-28
MOVL a_base+0(FP), SI
MOVL a_len+4(FP), BX
MOVL b_base+12(FP), DI
MOVL b_len+16(FP), DX
LEAL ret+24(FP), AX
JMP cmpbody<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
MOVL a_base+0(FP), SI
MOVL a_len+4(FP), BX
MOVL b_base+8(FP), DI
MOVL b_len+12(FP), DX
LEAL ret+16(FP), AX
JMP cmpbody<>(SB)
// input:
// SI = a
// DI = b
// BX = alen
// DX = blen
// AX = address of return word (set to 1/0/-1)
TEXT cmpbody<>(SB),NOSPLIT,$0-0
MOVL DX, BP
SUBL BX, DX // DX = blen-alen
JLE 2(PC)
MOVL BX, BP // BP = min(alen, blen)
CMPL SI, DI
JEQ allsame
CMPL BP, $4
JB small
CMPB runtime·support_sse2(SB), $1
JNE mediumloop
largeloop:
CMPL BP, $16
JB mediumloop
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, BX
XORL $0xffff, BX // convert EQ to NE
JNE diff16 // branch if at least one byte is not equal
ADDL $16, SI
ADDL $16, DI
SUBL $16, BP
JMP largeloop
diff16:
BSFL BX, BX // index of first byte that differs
XORL DX, DX
MOVB (SI)(BX*1), CX
CMPB CX, (DI)(BX*1)
SETHI DX
LEAL -1(DX*2), DX // convert 1/0 to +1/-1
MOVL DX, (AX)
RET
mediumloop:
CMPL BP, $4
JBE _0through4
MOVL (SI), BX
MOVL (DI), CX
CMPL BX, CX
JNE diff4
ADDL $4, SI
ADDL $4, DI
SUBL $4, BP
JMP mediumloop
_0through4:
MOVL -4(SI)(BP*1), BX
MOVL -4(DI)(BP*1), CX
CMPL BX, CX
JEQ allsame
diff4:
BSWAPL BX // reverse order of bytes
BSWAPL CX
XORL BX, CX // find bit differences
BSRL CX, CX // index of highest bit difference
SHRL CX, BX // move a's bit to bottom
ANDL $1, BX // mask bit
LEAL -1(BX*2), BX // 1/0 => +1/-1
MOVL BX, (AX)
RET
// 0-3 bytes in common
small:
LEAL (BP*8), CX
NEGL CX
JEQ allsame
// load si
CMPB SI, $0xfc
JA si_high
MOVL (SI), SI
JMP si_finish
si_high:
MOVL -4(SI)(BP*1), SI
SHRL CX, SI
si_finish:
SHLL CX, SI
// same for di
CMPB DI, $0xfc
JA di_high
MOVL (DI), DI
JMP di_finish
di_high:
MOVL -4(DI)(BP*1), DI
SHRL CX, DI
di_finish:
SHLL CX, DI
BSWAPL SI // reverse order of bytes
BSWAPL DI
XORL SI, DI // find bit differences
JEQ allsame
BSRL DI, CX // index of highest bit difference
SHRL CX, SI // move a's bit to bottom
ANDL $1, SI // mask bit
LEAL -1(SI*2), BX // 1/0 => +1/-1
MOVL BX, (AX)
RET
// all the bytes in common are the same, so we just need
// to compare the lengths.
allsame:
XORL BX, BX
XORL CX, CX
TESTL DX, DX
SETLT BX // 1 if alen > blen
SETEQ CX // 1 if alen == blen
LEAL -1(CX)(BX*2), BX // 1,0,-1 result
MOVL BX, (AX)
RET
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT,$0-56
MOVQ a_base+0(FP), SI
MOVQ a_len+8(FP), BX
MOVQ b_base+24(FP), DI
MOVQ b_len+32(FP), DX
LEAQ ret+48(FP), R9
JMP cmpbody<>(SB)
TEXT bytes·Compare(SB),NOSPLIT,$0-56
MOVQ a_base+0(FP), SI
MOVQ a_len+8(FP), BX
MOVQ b_base+24(FP), DI
MOVQ b_len+32(FP), DX
LEAQ ret+48(FP), R9
JMP cmpbody<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
MOVQ a_base+0(FP), SI
MOVQ a_len+8(FP), BX
MOVQ b_base+16(FP), DI
MOVQ b_len+24(FP), DX
LEAQ ret+32(FP), R9
JMP cmpbody<>(SB)
// input:
// SI = a
// DI = b
// BX = alen
// DX = blen
// R9 = address of output word (stores -1/0/1 here)
TEXT cmpbody<>(SB),NOSPLIT,$0-0
CMPQ SI, DI
JEQ allsame
CMPQ BX, DX
MOVQ DX, R8
CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
CMPQ R8, $8
JB small
CMPQ R8, $63
JBE loop
CMPB internal∕cpu·X86+const_x86_HasAVX2(SB), $1
JEQ big_loop_avx2
JMP big_loop
loop:
CMPQ R8, $16
JBE _0through16
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX // convert EQ to NE
JNE diff16 // branch if at least one byte is not equal
ADDQ $16, SI
ADDQ $16, DI
SUBQ $16, R8
JMP loop
diff64:
ADDQ $48, SI
ADDQ $48, DI
JMP diff16
diff48:
ADDQ $32, SI
ADDQ $32, DI
JMP diff16
diff32:
ADDQ $16, SI
ADDQ $16, DI
// AX = bit mask of differences
diff16:
BSFQ AX, BX // index of first byte that differs
XORQ AX, AX
MOVB (SI)(BX*1), CX
CMPB CX, (DI)(BX*1)
SETHI AX
LEAQ -1(AX*2), AX // convert 1/0 to +1/-1
MOVQ AX, (R9)
RET
// 0 through 16 bytes left, alen>=8, blen>=8
_0through16:
CMPQ R8, $8
JBE _0through8
MOVQ (SI), AX
MOVQ (DI), CX
CMPQ AX, CX
JNE diff8
_0through8:
MOVQ -8(SI)(R8*1), AX
MOVQ -8(DI)(R8*1), CX
CMPQ AX, CX
JEQ allsame
// AX and CX contain parts of a and b that differ.
diff8:
BSWAPQ AX // reverse order of bytes
BSWAPQ CX
XORQ AX, CX
BSRQ CX, CX // index of highest bit difference
SHRQ CX, AX // move a's bit to bottom
ANDQ $1, AX // mask bit
LEAQ -1(AX*2), AX // 1/0 => +1/-1
MOVQ AX, (R9)
RET
// 0-7 bytes in common
small:
LEAQ (R8*8), CX // bytes left -> bits left
NEGQ CX // - bits lift (== 64 - bits left mod 64)
JEQ allsame
// load bytes of a into high bytes of AX
CMPB SI, $0xf8
JA si_high
MOVQ (SI), SI
JMP si_finish
si_high:
MOVQ -8(SI)(R8*1), SI
SHRQ CX, SI
si_finish:
SHLQ CX, SI
// load bytes of b in to high bytes of BX
CMPB DI, $0xf8
JA di_high
MOVQ (DI), DI
JMP di_finish
di_high:
MOVQ -8(DI)(R8*1), DI
SHRQ CX, DI
di_finish:
SHLQ CX, DI
BSWAPQ SI // reverse order of bytes
BSWAPQ DI
XORQ SI, DI // find bit differences
JEQ allsame
BSRQ DI, CX // index of highest bit difference
SHRQ CX, SI // move a's bit to bottom
ANDQ $1, SI // mask bit
LEAQ -1(SI*2), AX // 1/0 => +1/-1
MOVQ AX, (R9)
RET
allsame:
XORQ AX, AX
XORQ CX, CX
CMPQ BX, DX
SETGT AX // 1 if alen > blen
SETEQ CX // 1 if alen == blen
LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
MOVQ AX, (R9)
RET
// this works for >= 64 bytes of data.
big_loop:
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff16
MOVOU 16(SI), X0
MOVOU 16(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff32
MOVOU 32(SI), X0
MOVOU 32(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff48
MOVOU 48(SI), X0
MOVOU 48(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff64
ADDQ $64, SI
ADDQ $64, DI
SUBQ $64, R8
CMPQ R8, $64
JBE loop
JMP big_loop
// Compare 64-bytes per loop iteration.
// Loop is unrolled and uses AVX2.
big_loop_avx2:
VMOVDQU (SI), Y2
VMOVDQU (DI), Y3
VMOVDQU 32(SI), Y4
VMOVDQU 32(DI), Y5
VPCMPEQB Y2, Y3, Y0
VPMOVMSKB Y0, AX
XORL $0xffffffff, AX
JNE diff32_avx2
VPCMPEQB Y4, Y5, Y6
VPMOVMSKB Y6, AX
XORL $0xffffffff, AX
JNE diff64_avx2
ADDQ $64, SI
ADDQ $64, DI
SUBQ $64, R8
CMPQ R8, $64
JB big_loop_avx2_exit
JMP big_loop_avx2
// Avoid AVX->SSE transition penalty and search first 32 bytes of 64 byte chunk.
diff32_avx2:
VZEROUPPER
JMP diff16
// Same as diff32_avx2, but for last 32 bytes.
diff64_avx2:
VZEROUPPER
JMP diff48
// For <64 bytes remainder jump to normal loop.
big_loop_avx2_exit:
VZEROUPPER
JMP loop
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT,$0-28
MOVL a_base+0(FP), SI
MOVL a_len+4(FP), BX
MOVL b_base+12(FP), DI
MOVL b_len+16(FP), DX
CALL cmpbody<>(SB)
MOVL AX, ret+24(FP)
RET
TEXT bytes·Compare(SB),NOSPLIT,$0-28
MOVL a_base+0(FP), SI
MOVL a_len+4(FP), BX
MOVL b_base+12(FP), DI
MOVL b_len+16(FP), DX
CALL cmpbody<>(SB)
MOVL AX, ret+24(FP)
RET
TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
MOVL a_base+0(FP), SI
MOVL a_len+4(FP), BX
MOVL b_base+8(FP), DI
MOVL b_len+12(FP), DX
CALL cmpbody<>(SB)
MOVL AX, ret+16(FP)
RET
// input:
// SI = a
// DI = b
// BX = alen
// DX = blen
// output:
// AX = 1/0/-1
TEXT cmpbody<>(SB),NOSPLIT,$0-0
CMPQ SI, DI
JEQ allsame
CMPQ BX, DX
MOVQ DX, R8
CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
CMPQ R8, $8
JB small
loop:
CMPQ R8, $16
JBE _0through16
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX // convert EQ to NE
JNE diff16 // branch if at least one byte is not equal
ADDQ $16, SI
ADDQ $16, DI
SUBQ $16, R8
JMP loop
// AX = bit mask of differences
diff16:
BSFQ AX, BX // index of first byte that differs
XORQ AX, AX
ADDQ BX, SI
MOVB (SI), CX
ADDQ BX, DI
CMPB CX, (DI)
SETHI AX
LEAQ -1(AX*2), AX // convert 1/0 to +1/-1
RET
// 0 through 16 bytes left, alen>=8, blen>=8
_0through16:
CMPQ R8, $8
JBE _0through8
MOVQ (SI), AX
MOVQ (DI), CX
CMPQ AX, CX
JNE diff8
_0through8:
ADDQ R8, SI
ADDQ R8, DI
MOVQ -8(SI), AX
MOVQ -8(DI), CX
CMPQ AX, CX
JEQ allsame
// AX and CX contain parts of a and b that differ.
diff8:
BSWAPQ AX // reverse order of bytes
BSWAPQ CX
XORQ AX, CX
BSRQ CX, CX // index of highest bit difference
SHRQ CX, AX // move a's bit to bottom
ANDQ $1, AX // mask bit
LEAQ -1(AX*2), AX // 1/0 => +1/-1
RET
// 0-7 bytes in common
small:
LEAQ (R8*8), CX // bytes left -> bits left
NEGQ CX // - bits lift (== 64 - bits left mod 64)
JEQ allsame
// load bytes of a into high bytes of AX
CMPB SI, $0xf8
JA si_high
MOVQ (SI), SI
JMP si_finish
si_high:
ADDQ R8, SI
MOVQ -8(SI), SI
SHRQ CX, SI
si_finish:
SHLQ CX, SI
// load bytes of b in to high bytes of BX
CMPB DI, $0xf8
JA di_high
MOVQ (DI), DI
JMP di_finish
di_high:
ADDQ R8, DI
MOVQ -8(DI), DI
SHRQ CX, DI
di_finish:
SHLQ CX, DI
BSWAPQ SI // reverse order of bytes
BSWAPQ DI
XORQ SI, DI // find bit differences
JEQ allsame
BSRQ DI, CX // index of highest bit difference
SHRQ CX, SI // move a's bit to bottom
ANDQ $1, SI // mask bit
LEAQ -1(SI*2), AX // 1/0 => +1/-1
RET
allsame:
XORQ AX, AX
XORQ CX, CX
CMPQ BX, DX
SETGT AX // 1 if alen > blen
SETEQ CX // 1 if alen == blen
LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
RET
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-28
MOVW a_base+0(FP), R2
MOVW a_len+4(FP), R0
MOVW b_base+12(FP), R3
MOVW b_len+16(FP), R1
ADD $28, R13, R7
B cmpbody<>(SB)
TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-28
MOVW a_base+0(FP), R2
MOVW a_len+4(FP), R0
MOVW b_base+12(FP), R3
MOVW b_len+16(FP), R1
ADD $28, R13, R7
B cmpbody<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-20
MOVW a_base+0(FP), R2
MOVW a_len+4(FP), R0
MOVW b_base+8(FP), R3
MOVW b_len+12(FP), R1
ADD $20, R13, R7
B cmpbody<>(SB)
// On entry:
// R0 is the length of a
// R1 is the length of b
// R2 points to the start of a
// R3 points to the start of b
// R7 points to return value (-1/0/1 will be written here)
//
// On exit:
// R4, R5, and R6 are clobbered
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
CMP R2, R3
BEQ samebytes
CMP R0, R1
MOVW R0, R6
MOVW.LT R1, R6 // R6 is min(R0, R1)
ADD R2, R6 // R2 is current byte in a, R6 is last byte in a to compare
loop:
CMP R2, R6
BEQ samebytes // all compared bytes were the same; compare lengths
MOVBU.P 1(R2), R4
MOVBU.P 1(R3), R5
CMP R4, R5
BEQ loop
// bytes differed
MOVW.LT $1, R0
MOVW.GT $-1, R0
MOVW R0, (R7)
RET
samebytes:
CMP R0, R1
MOVW.LT $1, R0
MOVW.GT $-1, R0
MOVW.EQ $0, R0
MOVW R0, (R7)
RET
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOVD a_base+0(FP), R2
MOVD a_len+8(FP), R0
MOVD b_base+24(FP), R3
MOVD b_len+32(FP), R1
ADD $56, RSP, R7
B cmpbody<>(SB)
TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOVD a_base+0(FP), R2
MOVD a_len+8(FP), R0
MOVD b_base+24(FP), R3
MOVD b_len+32(FP), R1
ADD $56, RSP, R7
B cmpbody<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
MOVD a_base+0(FP), R2
MOVD a_len+8(FP), R0
MOVD b_base+16(FP), R3
MOVD b_len+24(FP), R1
ADD $40, RSP, R7
B cmpbody<>(SB)
// On entry:
// R0 is the length of a
// R1 is the length of b
// R2 points to the start of a
// R3 points to the start of b
// R7 points to return value (-1/0/1 will be written here)
//
// On exit:
// R4, R5, and R6 are clobbered
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
CMP R2, R3
BEQ samebytes // same starting pointers; compare lengths
CMP R0, R1
CSEL LT, R1, R0, R6 // R6 is min(R0, R1)
ADD R2, R6 // R2 is current byte in a, R6 is last byte in a to compare
loop:
CMP R2, R6
BEQ samebytes // all compared bytes were the same; compare lengths
MOVBU.P 1(R2), R4
MOVBU.P 1(R3), R5
CMP R4, R5
BEQ loop
// bytes differed
MOVD $1, R4
CSNEG LT, R4, R4, R4
MOVD R4, (R7)
RET
samebytes:
MOVD $1, R4
CMP R0, R1
CSNEG LT, R4, R4, R4
CSEL EQ, ZR, R4, R4
MOVD R4, (R7)
RET
// Copyright 2013 The Go Authors. All rights reserved. // Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
// Routines that are implemented in assembly in asm_{amd64,386,arm,arm64,ppc64x,s390x}.s // +build !386,!amd64,!amd64p32,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle
// These routines have corresponding stubs in stubs_asm.go.
// +build mips64 mips64le package bytealg
package runtime
import _ "unsafe" // for go:linkname import _ "unsafe" // for go:linkname
func cmpstring(s1, s2 string) int { func Compare(a, b []byte) int {
l := len(s1) l := len(a)
if len(s2) < l { if len(b) < l {
l = len(s2) l = len(b)
}
if l == 0 || &a[0] == &b[0] {
goto samebytes
} }
for i := 0; i < l; i++ { for i := 0; i < l; i++ {
c1, c2 := s1[i], s2[i] c1, c2 := a[i], b[i]
if c1 < c2 { if c1 < c2 {
return -1 return -1
} }
...@@ -25,26 +25,27 @@ func cmpstring(s1, s2 string) int { ...@@ -25,26 +25,27 @@ func cmpstring(s1, s2 string) int {
return +1 return +1
} }
} }
if len(s1) < len(s2) { samebytes:
if len(a) < len(b) {
return -1 return -1
} }
if len(s1) > len(s2) { if len(a) > len(b) {
return +1 return +1
} }
return 0 return 0
} }
//go:linkname bytes_Compare bytes.Compare //go:linkname bytes_Compare bytes.Compare
func bytes_Compare(s1, s2 []byte) int { func bytes_Compare(a, b []byte) int {
l := len(s1) l := len(a)
if len(s2) < l { if len(b) < l {
l = len(s2) l = len(b)
} }
if l == 0 || &s1[0] == &s2[0] { if l == 0 || &a[0] == &b[0] {
goto samebytes goto samebytes
} }
for i := 0; i < l; i++ { for i := 0; i < l; i++ {
c1, c2 := s1[i], s2[i] c1, c2 := a[i], b[i]
if c1 < c2 { if c1 < c2 {
return -1 return -1
} }
...@@ -53,10 +54,34 @@ func bytes_Compare(s1, s2 []byte) int { ...@@ -53,10 +54,34 @@ func bytes_Compare(s1, s2 []byte) int {
} }
} }
samebytes: samebytes:
if len(s1) < len(s2) { if len(a) < len(b) {
return -1
}
if len(a) > len(b) {
return +1
}
return 0
}
//go:linkname runtime_cmpstring runtime.cmpstring
func runtime_cmpstring(a, b string) int {
l := len(a)
if len(b) < l {
l = len(b)
}
for i := 0; i < l; i++ {
c1, c2 := a[i], b[i]
if c1 < c2 {
return -1
}
if c1 > c2 {
return +1
}
}
if len(a) < len(b) {
return -1 return -1
} }
if len(s1) > len(s2) { if len(a) > len(b) {
return +1 return +1
} }
return 0 return 0
......
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build mips mipsle
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT,$0-28
MOVW a_base+0(FP), R3
MOVW b_base+12(FP), R4
MOVW a_len+4(FP), R1
MOVW b_len+16(FP), R2
BEQ R3, R4, samebytes
SGTU R1, R2, R7
MOVW R1, R8
CMOVN R7, R2, R8 // R8 is min(R1, R2)
ADDU R3, R8 // R3 is current byte in a, R8 is last byte in a to compare
loop:
BEQ R3, R8, samebytes
MOVBU (R3), R6
ADDU $1, R3
MOVBU (R4), R7
ADDU $1, R4
BEQ R6, R7 , loop
SGTU R6, R7, R8
MOVW $-1, R6
CMOVZ R8, R6, R8
JMP cmp_ret
samebytes:
SGTU R1, R2, R6
SGTU R2, R1, R7
SUBU R7, R6, R8
cmp_ret:
MOVW R8, ret+24(FP)
RET
TEXT bytes·Compare(SB),NOSPLIT,$0-28
MOVW a_base+0(FP), R3
MOVW b_base+12(FP), R4
MOVW a_len+4(FP), R1
MOVW b_len+16(FP), R2
BEQ R3, R4, samebytes
SGTU R1, R2, R7
MOVW R1, R8
CMOVN R7, R2, R8 // R8 is min(R1, R2)
ADDU R3, R8 // R3 is current byte in a, R8 is last byte in a to compare
loop:
BEQ R3, R8, samebytes
MOVBU (R3), R6
ADDU $1, R3
MOVBU (R4), R7
ADDU $1, R4
BEQ R6, R7 , loop
SGTU R6, R7, R8
MOVW $-1, R6
CMOVZ R8, R6, R8
JMP cmp_ret
samebytes:
SGTU R1, R2, R6
SGTU R2, R1, R7
SUBU R7, R6, R8
cmp_ret:
MOVW R8, ret+24(FP)
RET
TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
MOVW a_base+0(FP), R3
MOVW a_len+4(FP), R1
MOVW b_base+8(FP), R4
MOVW b_len+12(FP), R2
BEQ R3, R4, samebytes
SGTU R1, R2, R7
MOVW R1, R8
CMOVN R7, R2, R8 // R8 is min(R1, R2)
ADDU R3, R8 // R3 is current byte in a, R8 is last byte in a to compare
loop:
BEQ R3, R8, samebytes // all compared bytes were the same; compare lengths
MOVBU (R3), R6
ADDU $1, R3
MOVBU (R4), R7
ADDU $1, R4
BEQ R6, R7 , loop
// bytes differed
SGTU R6, R7, R8
MOVW $-1, R6
CMOVZ R8, R6, R8
JMP cmp_ret
samebytes:
SGTU R1, R2, R6
SGTU R2, R1, R7
SUBU R7, R6, R8
cmp_ret:
MOVW R8, ret+16(FP)
RET
// Copyright 2016 The Go Authors. All rights reserved. // Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
// +build !mips64,!mips64le // +build 386 amd64 amd64p32 s390x arm arm64 ppc64 ppc64le mips mipsle
// Declarations for routines that are implemented in noasm.go. package bytealg
package runtime //go:noescape
func Compare(a, b []byte) int
func cmpstring(s1, s2 string) int
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ppc64 ppc64le
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOVD a_base+0(FP), R5
MOVD b_base+24(FP), R6
MOVD a_len+8(FP), R3
CMP R5,R6,CR7
MOVD b_len+32(FP), R4
MOVD $ret+48(FP), R7
CMP R3,R4,CR6
BEQ CR7,equal
#ifdef GOARCH_ppc64le
BR cmpbodyLE<>(SB)
#else
BR cmpbodyBE<>(SB)
#endif
equal:
BEQ CR6,done
MOVD $1, R8
BGT CR6,greater
NEG R8
greater:
MOVD R8, (R7)
RET
done:
MOVD $0, (R7)
RET
TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOVD a_base+0(FP), R5
MOVD b_base+24(FP), R6
MOVD a_len+8(FP), R3
CMP R5,R6,CR7
MOVD b_len+32(FP), R4
MOVD $ret+48(FP), R7
CMP R3,R4,CR6
BEQ CR7,equal
#ifdef GOARCH_ppc64le
BR cmpbodyLE<>(SB)
#else
BR cmpbodyBE<>(SB)
#endif
equal:
BEQ CR6,done
MOVD $1, R8
BGT CR6,greater
NEG R8
greater:
MOVD R8, (R7)
RET
done:
MOVD $0, (R7)
RET
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
MOVD a_base+0(FP), R5
MOVD b_base+16(FP), R6
MOVD a_len+8(FP), R3
CMP R5,R6,CR7
MOVD b_len+24(FP), R4
MOVD $ret+32(FP), R7
CMP R3,R4,CR6
BEQ CR7,equal
#ifdef GOARCH_ppc64le
BR cmpbodyLE<>(SB)
#else
BR cmpbodyBE<>(SB)
#endif
equal:
BEQ CR6,done
MOVD $1, R8
BGT CR6,greater
NEG R8
greater:
MOVD R8, (R7)
RET
done:
MOVD $0, (R7)
RET
// Do an efficient memcmp for ppc64le
// R3 = a len
// R4 = b len
// R5 = a addr
// R6 = b addr
// R7 = addr of return value
TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
MOVD R3,R8 // set up length
CMP R3,R4,CR2 // unequal?
BC 12,8,setuplen // BLT CR2
MOVD R4,R8 // use R4 for comparison len
setuplen:
MOVD R8,CTR // set up loop counter
CMP R8,$8 // only optimize >=8
BLT simplecheck
DCBT (R5) // cache hint
DCBT (R6)
CMP R8,$32 // optimize >= 32
MOVD R8,R9
BLT setup8a // 8 byte moves only
setup32a:
SRADCC $5,R8,R9 // number of 32 byte chunks
MOVD R9,CTR
// Special processing for 32 bytes or longer.
// Loading this way is faster and correct as long as the
// doublewords being compared are equal. Once they
// are found unequal, reload them in proper byte order
// to determine greater or less than.
loop32a:
MOVD 0(R5),R9 // doublewords to compare
MOVD 0(R6),R10 // get 4 doublewords
MOVD 8(R5),R14
MOVD 8(R6),R15
CMPU R9,R10 // bytes equal?
MOVD $0,R16 // set up for cmpne
BNE cmpne // further compare for LT or GT
MOVD 16(R5),R9 // get next pair of doublewords
MOVD 16(R6),R10
CMPU R14,R15 // bytes match?
MOVD $8,R16 // set up for cmpne
BNE cmpne // further compare for LT or GT
MOVD 24(R5),R14 // get next pair of doublewords
MOVD 24(R6),R15
CMPU R9,R10 // bytes match?
MOVD $16,R16 // set up for cmpne
BNE cmpne // further compare for LT or GT
MOVD $-8,R16 // for cmpne, R5,R6 already inc by 32
ADD $32,R5 // bump up to next 32
ADD $32,R6
CMPU R14,R15 // bytes match?
BC 8,2,loop32a // br ctr and cr
BNE cmpne
ANDCC $24,R8,R9 // Any 8 byte chunks?
BEQ leftover // and result is 0
setup8a:
SRADCC $3,R9,R9 // get the 8 byte count
BEQ leftover // shifted value is 0
MOVD R9,CTR // loop count for doublewords
loop8:
MOVDBR (R5+R0),R9 // doublewords to compare
MOVDBR (R6+R0),R10 // LE compare order
ADD $8,R5
ADD $8,R6
CMPU R9,R10 // match?
BC 8,2,loop8 // bt ctr <> 0 && cr
BGT greater
BLT less
leftover:
ANDCC $7,R8,R9 // check for leftover bytes
MOVD R9,CTR // save the ctr
BNE simple // leftover bytes
BC 12,10,equal // test CR2 for length comparison
BC 12,8,less
BR greater
simplecheck:
CMP R8,$0 // remaining compare length 0
BNE simple // do simple compare
BC 12,10,equal // test CR2 for length comparison
BC 12,8,less // 1st len < 2nd len, result less
BR greater // 1st len > 2nd len must be greater
simple:
MOVBZ 0(R5), R9 // get byte from 1st operand
ADD $1,R5
MOVBZ 0(R6), R10 // get byte from 2nd operand
ADD $1,R6
CMPU R9, R10
BC 8,2,simple // bc ctr <> 0 && cr
BGT greater // 1st > 2nd
BLT less // 1st < 2nd
BC 12,10,equal // test CR2 for length comparison
BC 12,9,greater // 2nd len > 1st len
BR less // must be less
cmpne: // only here is not equal
MOVDBR (R5+R16),R8 // reload in reverse order
MOVDBR (R6+R16),R9
CMPU R8,R9 // compare correct endianness
BGT greater // here only if NE
less:
MOVD $-1,R3
MOVD R3,(R7) // return value if A < B
RET
equal:
MOVD $0,(R7) // return value if A == B
RET
greater:
MOVD $1,R3
MOVD R3,(R7) // return value if A > B
RET
// Do an efficient memcmp for ppc64 (BE)
// R3 = a len
// R4 = b len
// R5 = a addr
// R6 = b addr
// R7 = addr of return value
TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
MOVD R3,R8 // set up length
CMP R3,R4,CR2 // unequal?
BC 12,8,setuplen // BLT CR2
MOVD R4,R8 // use R4 for comparison len
setuplen:
MOVD R8,CTR // set up loop counter
CMP R8,$8 // only optimize >=8
BLT simplecheck
DCBT (R5) // cache hint
DCBT (R6)
CMP R8,$32 // optimize >= 32
MOVD R8,R9
BLT setup8a // 8 byte moves only
setup32a:
SRADCC $5,R8,R9 // number of 32 byte chunks
MOVD R9,CTR
loop32a:
MOVD 0(R5),R9 // doublewords to compare
MOVD 0(R6),R10 // get 4 doublewords
MOVD 8(R5),R14
MOVD 8(R6),R15
CMPU R9,R10 // bytes equal?
BLT less // found to be less
BGT greater // found to be greater
MOVD 16(R5),R9 // get next pair of doublewords
MOVD 16(R6),R10
CMPU R14,R15 // bytes match?
BLT less // found less
BGT greater // found greater
MOVD 24(R5),R14 // get next pair of doublewords
MOVD 24(R6),R15
CMPU R9,R10 // bytes match?
BLT less // found to be less
BGT greater // found to be greater
ADD $32,R5 // bump up to next 32
ADD $32,R6
CMPU R14,R15 // bytes match?
BC 8,2,loop32a // br ctr and cr
BLT less // with BE, byte ordering is
BGT greater // good for compare
ANDCC $24,R8,R9 // Any 8 byte chunks?
BEQ leftover // and result is 0
setup8a:
SRADCC $3,R9,R9 // get the 8 byte count
BEQ leftover // shifted value is 0
MOVD R9,CTR // loop count for doublewords
loop8:
MOVD (R5),R9
MOVD (R6),R10
ADD $8,R5
ADD $8,R6
CMPU R9,R10 // match?
BC 8,2,loop8 // bt ctr <> 0 && cr
BGT greater
BLT less
leftover:
ANDCC $7,R8,R9 // check for leftover bytes
MOVD R9,CTR // save the ctr
BNE simple // leftover bytes
BC 12,10,equal // test CR2 for length comparison
BC 12,8,less
BR greater
simplecheck:
CMP R8,$0 // remaining compare length 0
BNE simple // do simple compare
BC 12,10,equal // test CR2 for length comparison
BC 12,8,less // 1st len < 2nd len, result less
BR greater // same len, must be equal
simple:
MOVBZ 0(R5),R9 // get byte from 1st operand
ADD $1,R5
MOVBZ 0(R6),R10 // get byte from 2nd operand
ADD $1,R6
CMPU R9,R10
BC 8,2,simple // bc ctr <> 0 && cr
BGT greater // 1st > 2nd
BLT less // 1st < 2nd
BC 12,10,equal // test CR2 for length comparison
BC 12,9,greater // 2nd len > 1st len
less:
MOVD $-1,R3
MOVD R3,(R7) // return value if A < B
RET
equal:
MOVD $0,(R7) // return value if A == B
RET
greater:
MOVD $1,R3
MOVD R3,(R7) // return value if A > B
RET
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOVD a_base+0(FP), R3
MOVD a_len+8(FP), R4
MOVD b_base+24(FP), R5
MOVD b_len+32(FP), R6
LA ret+48(FP), R7
BR cmpbody<>(SB)
TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOVD a_base+0(FP), R3
MOVD a_len+8(FP), R4
MOVD b_base+24(FP), R5
MOVD b_len+32(FP), R6
LA ret+48(FP), R7
BR cmpbody<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
MOVD a_base+0(FP), R3
MOVD a_len+8(FP), R4
MOVD b_base+16(FP), R5
MOVD b_len+24(FP), R6
LA ret+32(FP), R7
BR cmpbody<>(SB)
// input:
// R3 = a
// R4 = alen
// R5 = b
// R6 = blen
// R7 = address of output word (stores -1/0/1 here)
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
CMPBEQ R3, R5, cmplengths
MOVD R4, R8
CMPBLE R4, R6, amin
MOVD R6, R8
amin:
CMPBEQ R8, $0, cmplengths
CMP R8, $256
BLE tail
loop:
CLC $256, 0(R3), 0(R5)
BGT gt
BLT lt
SUB $256, R8
CMP R8, $256
BGT loop
tail:
SUB $1, R8
EXRL $cmpbodyclc<>(SB), R8
BGT gt
BLT lt
cmplengths:
CMP R4, R6
BEQ eq
BLT lt
gt:
MOVD $1, 0(R7)
RET
lt:
MOVD $-1, 0(R7)
RET
eq:
MOVD $0, 0(R7)
RET
TEXT cmpbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0
CLC $1, 0(R3), 0(R5)
RET
...@@ -1343,143 +1343,6 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1 ...@@ -1343,143 +1343,6 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1
SETEQ ret+0(FP) SETEQ ret+0(FP)
RET RET
TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
MOVL s1_base+0(FP), SI
MOVL s1_len+4(FP), BX
MOVL s2_base+8(FP), DI
MOVL s2_len+12(FP), DX
LEAL ret+16(FP), AX
JMP runtime·cmpbody(SB)
TEXT bytes·Compare(SB),NOSPLIT,$0-28
MOVL s1+0(FP), SI
MOVL s1+4(FP), BX
MOVL s2+12(FP), DI
MOVL s2+16(FP), DX
LEAL ret+24(FP), AX
JMP runtime·cmpbody(SB)
// input:
// SI = a
// DI = b
// BX = alen
// DX = blen
// AX = address of return word (set to 1/0/-1)
TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
MOVL DX, BP
SUBL BX, DX // DX = blen-alen
JLE 2(PC)
MOVL BX, BP // BP = min(alen, blen)
CMPL SI, DI
JEQ allsame
CMPL BP, $4
JB small
CMPB runtime·support_sse2(SB), $1
JNE mediumloop
largeloop:
CMPL BP, $16
JB mediumloop
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, BX
XORL $0xffff, BX // convert EQ to NE
JNE diff16 // branch if at least one byte is not equal
ADDL $16, SI
ADDL $16, DI
SUBL $16, BP
JMP largeloop
diff16:
BSFL BX, BX // index of first byte that differs
XORL DX, DX
MOVB (SI)(BX*1), CX
CMPB CX, (DI)(BX*1)
SETHI DX
LEAL -1(DX*2), DX // convert 1/0 to +1/-1
MOVL DX, (AX)
RET
mediumloop:
CMPL BP, $4
JBE _0through4
MOVL (SI), BX
MOVL (DI), CX
CMPL BX, CX
JNE diff4
ADDL $4, SI
ADDL $4, DI
SUBL $4, BP
JMP mediumloop
_0through4:
MOVL -4(SI)(BP*1), BX
MOVL -4(DI)(BP*1), CX
CMPL BX, CX
JEQ allsame
diff4:
BSWAPL BX // reverse order of bytes
BSWAPL CX
XORL BX, CX // find bit differences
BSRL CX, CX // index of highest bit difference
SHRL CX, BX // move a's bit to bottom
ANDL $1, BX // mask bit
LEAL -1(BX*2), BX // 1/0 => +1/-1
MOVL BX, (AX)
RET
// 0-3 bytes in common
small:
LEAL (BP*8), CX
NEGL CX
JEQ allsame
// load si
CMPB SI, $0xfc
JA si_high
MOVL (SI), SI
JMP si_finish
si_high:
MOVL -4(SI)(BP*1), SI
SHRL CX, SI
si_finish:
SHLL CX, SI
// same for di
CMPB DI, $0xfc
JA di_high
MOVL (DI), DI
JMP di_finish
di_high:
MOVL -4(DI)(BP*1), DI
SHRL CX, DI
di_finish:
SHLL CX, DI
BSWAPL SI // reverse order of bytes
BSWAPL DI
XORL SI, DI // find bit differences
JEQ allsame
BSRL DI, CX // index of highest bit difference
SHRL CX, SI // move a's bit to bottom
ANDL $1, SI // mask bit
LEAL -1(SI*2), BX // 1/0 => +1/-1
MOVL BX, (AX)
RET
// all the bytes in common are the same, so we just need
// to compare the lengths.
allsame:
XORL BX, BX
XORL CX, CX
TESTL DX, DX
SETLT BX // 1 if alen > blen
SETEQ CX // 1 if alen == blen
LEAL -1(CX)(BX*2), BX // 1,0,-1 result
MOVL BX, (AX)
RET
TEXT runtime·return0(SB), NOSPLIT, $0 TEXT runtime·return0(SB), NOSPLIT, $0
MOVL $0, AX MOVL $0, AX
RET RET
......
...@@ -1358,228 +1358,6 @@ DATA shifts<>+0xf0(SB)/8, $0x0807060504030201 ...@@ -1358,228 +1358,6 @@ DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09 DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
GLOBL shifts<>(SB),RODATA,$256 GLOBL shifts<>(SB),RODATA,$256
TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
MOVQ s1_base+0(FP), SI
MOVQ s1_len+8(FP), BX
MOVQ s2_base+16(FP), DI
MOVQ s2_len+24(FP), DX
LEAQ ret+32(FP), R9
JMP runtime·cmpbody(SB)
TEXT bytes·Compare(SB),NOSPLIT,$0-56
MOVQ s1+0(FP), SI
MOVQ s1+8(FP), BX
MOVQ s2+24(FP), DI
MOVQ s2+32(FP), DX
LEAQ res+48(FP), R9
JMP runtime·cmpbody(SB)
// input:
// SI = a
// DI = b
// BX = alen
// DX = blen
// R9 = address of output word (stores -1/0/1 here)
TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
CMPQ SI, DI
JEQ allsame
CMPQ BX, DX
MOVQ DX, R8
CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
CMPQ R8, $8
JB small
CMPQ R8, $63
JBE loop
CMPB runtime·support_avx2(SB), $1
JEQ big_loop_avx2
JMP big_loop
loop:
CMPQ R8, $16
JBE _0through16
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX // convert EQ to NE
JNE diff16 // branch if at least one byte is not equal
ADDQ $16, SI
ADDQ $16, DI
SUBQ $16, R8
JMP loop
diff64:
ADDQ $48, SI
ADDQ $48, DI
JMP diff16
diff48:
ADDQ $32, SI
ADDQ $32, DI
JMP diff16
diff32:
ADDQ $16, SI
ADDQ $16, DI
// AX = bit mask of differences
diff16:
BSFQ AX, BX // index of first byte that differs
XORQ AX, AX
MOVB (SI)(BX*1), CX
CMPB CX, (DI)(BX*1)
SETHI AX
LEAQ -1(AX*2), AX // convert 1/0 to +1/-1
MOVQ AX, (R9)
RET
// 0 through 16 bytes left, alen>=8, blen>=8
_0through16:
CMPQ R8, $8
JBE _0through8
MOVQ (SI), AX
MOVQ (DI), CX
CMPQ AX, CX
JNE diff8
_0through8:
MOVQ -8(SI)(R8*1), AX
MOVQ -8(DI)(R8*1), CX
CMPQ AX, CX
JEQ allsame
// AX and CX contain parts of a and b that differ.
diff8:
BSWAPQ AX // reverse order of bytes
BSWAPQ CX
XORQ AX, CX
BSRQ CX, CX // index of highest bit difference
SHRQ CX, AX // move a's bit to bottom
ANDQ $1, AX // mask bit
LEAQ -1(AX*2), AX // 1/0 => +1/-1
MOVQ AX, (R9)
RET
// 0-7 bytes in common
small:
LEAQ (R8*8), CX // bytes left -> bits left
NEGQ CX // - bits lift (== 64 - bits left mod 64)
JEQ allsame
// load bytes of a into high bytes of AX
CMPB SI, $0xf8
JA si_high
MOVQ (SI), SI
JMP si_finish
si_high:
MOVQ -8(SI)(R8*1), SI
SHRQ CX, SI
si_finish:
SHLQ CX, SI
// load bytes of b in to high bytes of BX
CMPB DI, $0xf8
JA di_high
MOVQ (DI), DI
JMP di_finish
di_high:
MOVQ -8(DI)(R8*1), DI
SHRQ CX, DI
di_finish:
SHLQ CX, DI
BSWAPQ SI // reverse order of bytes
BSWAPQ DI
XORQ SI, DI // find bit differences
JEQ allsame
BSRQ DI, CX // index of highest bit difference
SHRQ CX, SI // move a's bit to bottom
ANDQ $1, SI // mask bit
LEAQ -1(SI*2), AX // 1/0 => +1/-1
MOVQ AX, (R9)
RET
allsame:
XORQ AX, AX
XORQ CX, CX
CMPQ BX, DX
SETGT AX // 1 if alen > blen
SETEQ CX // 1 if alen == blen
LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
MOVQ AX, (R9)
RET
// this works for >= 64 bytes of data.
big_loop:
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff16
MOVOU 16(SI), X0
MOVOU 16(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff32
MOVOU 32(SI), X0
MOVOU 32(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff48
MOVOU 48(SI), X0
MOVOU 48(DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX
JNE diff64
ADDQ $64, SI
ADDQ $64, DI
SUBQ $64, R8
CMPQ R8, $64
JBE loop
JMP big_loop
// Compare 64-bytes per loop iteration.
// Loop is unrolled and uses AVX2.
big_loop_avx2:
VMOVDQU (SI), Y2
VMOVDQU (DI), Y3
VMOVDQU 32(SI), Y4
VMOVDQU 32(DI), Y5
VPCMPEQB Y2, Y3, Y0
VPMOVMSKB Y0, AX
XORL $0xffffffff, AX
JNE diff32_avx2
VPCMPEQB Y4, Y5, Y6
VPMOVMSKB Y6, AX
XORL $0xffffffff, AX
JNE diff64_avx2
ADDQ $64, SI
ADDQ $64, DI
SUBQ $64, R8
CMPQ R8, $64
JB big_loop_avx2_exit
JMP big_loop_avx2
// Avoid AVX->SSE transition penalty and search first 32 bytes of 64 byte chunk.
diff32_avx2:
VZEROUPPER
JMP diff16
// Same as diff32_avx2, but for last 32 bytes.
diff64_avx2:
VZEROUPPER
JMP diff48
// For <64 bytes remainder jump to normal loop.
big_loop_avx2_exit:
VZEROUPPER
JMP loop
TEXT strings·indexShortStr(SB),NOSPLIT,$0-40 TEXT strings·indexShortStr(SB),NOSPLIT,$0-40
MOVQ s+0(FP), DI MOVQ s+0(FP), DI
// We want len in DX and AX, because PCMPESTRI implicitly consumes them // We want len in DX and AX, because PCMPESTRI implicitly consumes them
......
...@@ -575,142 +575,6 @@ TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 ...@@ -575,142 +575,6 @@ TEXT runtime·aeshash64(SB),NOSPLIT,$0-12
MOVL AX, ret+8(FP) MOVL AX, ret+8(FP)
RET RET
TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
MOVL s1_base+0(FP), SI
MOVL s1_len+4(FP), BX
MOVL s2_base+8(FP), DI
MOVL s2_len+12(FP), DX
CALL runtime·cmpbody(SB)
MOVL AX, ret+16(FP)
RET
TEXT bytes·Compare(SB),NOSPLIT,$0-28
MOVL s1+0(FP), SI
MOVL s1+4(FP), BX
MOVL s2+12(FP), DI
MOVL s2+16(FP), DX
CALL runtime·cmpbody(SB)
MOVL AX, res+24(FP)
RET
// input:
// SI = a
// DI = b
// BX = alen
// DX = blen
// output:
// AX = 1/0/-1
TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
CMPQ SI, DI
JEQ allsame
CMPQ BX, DX
MOVQ DX, R8
CMOVQLT BX, R8 // R8 = min(alen, blen) = # of bytes to compare
CMPQ R8, $8
JB small
loop:
CMPQ R8, $16
JBE _0through16
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORQ $0xffff, AX // convert EQ to NE
JNE diff16 // branch if at least one byte is not equal
ADDQ $16, SI
ADDQ $16, DI
SUBQ $16, R8
JMP loop
// AX = bit mask of differences
diff16:
BSFQ AX, BX // index of first byte that differs
XORQ AX, AX
ADDQ BX, SI
MOVB (SI), CX
ADDQ BX, DI
CMPB CX, (DI)
SETHI AX
LEAQ -1(AX*2), AX // convert 1/0 to +1/-1
RET
// 0 through 16 bytes left, alen>=8, blen>=8
_0through16:
CMPQ R8, $8
JBE _0through8
MOVQ (SI), AX
MOVQ (DI), CX
CMPQ AX, CX
JNE diff8
_0through8:
ADDQ R8, SI
ADDQ R8, DI
MOVQ -8(SI), AX
MOVQ -8(DI), CX
CMPQ AX, CX
JEQ allsame
// AX and CX contain parts of a and b that differ.
diff8:
BSWAPQ AX // reverse order of bytes
BSWAPQ CX
XORQ AX, CX
BSRQ CX, CX // index of highest bit difference
SHRQ CX, AX // move a's bit to bottom
ANDQ $1, AX // mask bit
LEAQ -1(AX*2), AX // 1/0 => +1/-1
RET
// 0-7 bytes in common
small:
LEAQ (R8*8), CX // bytes left -> bits left
NEGQ CX // - bits lift (== 64 - bits left mod 64)
JEQ allsame
// load bytes of a into high bytes of AX
CMPB SI, $0xf8
JA si_high
MOVQ (SI), SI
JMP si_finish
si_high:
ADDQ R8, SI
MOVQ -8(SI), SI
SHRQ CX, SI
si_finish:
SHLQ CX, SI
// load bytes of b in to high bytes of BX
CMPB DI, $0xf8
JA di_high
MOVQ (DI), DI
JMP di_finish
di_high:
ADDQ R8, DI
MOVQ -8(DI), DI
SHRQ CX, DI
di_finish:
SHLQ CX, DI
BSWAPQ SI // reverse order of bytes
BSWAPQ DI
XORQ SI, DI // find bit differences
JEQ allsame
BSRQ DI, CX // index of highest bit difference
SHRQ CX, SI // move a's bit to bottom
ANDQ $1, SI // mask bit
LEAQ -1(SI*2), AX // 1/0 => +1/-1
RET
allsame:
XORQ AX, AX
XORQ CX, CX
CMPQ BX, DX
SETGT AX // 1 if alen > blen
SETEQ CX // 1 if alen == blen
LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
RET
TEXT runtime·return0(SB), NOSPLIT, $0 TEXT runtime·return0(SB), NOSPLIT, $0
MOVL $0, AX MOVL $0, AX
RET RET
......
...@@ -801,59 +801,6 @@ TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0 ...@@ -801,59 +801,6 @@ TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
MOVW $0, R0 MOVW $0, R0
MOVW (R0), R1 MOVW (R0), R1
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-20
MOVW s1_base+0(FP), R2
MOVW s1_len+4(FP), R0
MOVW s2_base+8(FP), R3
MOVW s2_len+12(FP), R1
ADD $20, R13, R7
B runtime·cmpbody(SB)
TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-28
MOVW s1+0(FP), R2
MOVW s1+4(FP), R0
MOVW s2+12(FP), R3
MOVW s2+16(FP), R1
ADD $28, R13, R7
B runtime·cmpbody(SB)
// On entry:
// R0 is the length of s1
// R1 is the length of s2
// R2 points to the start of s1
// R3 points to the start of s2
// R7 points to return value (-1/0/1 will be written here)
//
// On exit:
// R4, R5, and R6 are clobbered
TEXT runtime·cmpbody(SB),NOSPLIT|NOFRAME,$0-0
CMP R2, R3
BEQ samebytes
CMP R0, R1
MOVW R0, R6
MOVW.LT R1, R6 // R6 is min(R0, R1)
ADD R2, R6 // R2 is current byte in s1, R6 is last byte in s1 to compare
loop:
CMP R2, R6
BEQ samebytes // all compared bytes were the same; compare lengths
MOVBU.P 1(R2), R4
MOVBU.P 1(R3), R5
CMP R4, R5
BEQ loop
// bytes differed
MOVW.LT $1, R0
MOVW.GT $-1, R0
MOVW R0, (R7)
RET
samebytes:
CMP R0, R1
MOVW.LT $1, R0
MOVW.GT $-1, R0
MOVW.EQ $0, R0
MOVW R0, (R7)
RET
TEXT runtime·return0(SB),NOSPLIT,$0 TEXT runtime·return0(SB),NOSPLIT,$0
MOVW $0, R0 MOVW $0, R0
RET RET
......
...@@ -712,58 +712,6 @@ TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0 ...@@ -712,58 +712,6 @@ TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
B (ZR) B (ZR)
UNDEF UNDEF
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
MOVD s1_base+0(FP), R2
MOVD s1_len+8(FP), R0
MOVD s2_base+16(FP), R3
MOVD s2_len+24(FP), R1
ADD $40, RSP, R7
B runtime·cmpbody<>(SB)
TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOVD s1+0(FP), R2
MOVD s1+8(FP), R0
MOVD s2+24(FP), R3
MOVD s2+32(FP), R1
ADD $56, RSP, R7
B runtime·cmpbody<>(SB)
// On entry:
// R0 is the length of s1
// R1 is the length of s2
// R2 points to the start of s1
// R3 points to the start of s2
// R7 points to return value (-1/0/1 will be written here)
//
// On exit:
// R4, R5, and R6 are clobbered
TEXT runtime·cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
CMP R2, R3
BEQ samebytes // same starting pointers; compare lengths
CMP R0, R1
CSEL LT, R1, R0, R6 // R6 is min(R0, R1)
ADD R2, R6 // R2 is current byte in s1, R6 is last byte in s1 to compare
loop:
CMP R2, R6
BEQ samebytes // all compared bytes were the same; compare lengths
MOVBU.P 1(R2), R4
MOVBU.P 1(R3), R5
CMP R4, R5
BEQ loop
// bytes differed
MOVD $1, R4
CSNEG LT, R4, R4, R4
MOVD R4, (R7)
RET
samebytes:
MOVD $1, R4
CMP R0, R1
CSNEG LT, R4, R4, R4
CSEL EQ, ZR, R4, R4
MOVD R4, (R7)
RET
TEXT runtime·return0(SB), NOSPLIT, $0 TEXT runtime·return0(SB), NOSPLIT, $0
MOVW $0, R0 MOVW $0, R0
RET RET
......
...@@ -633,69 +633,6 @@ TEXT runtime·aeshash64(SB),NOSPLIT,$0 ...@@ -633,69 +633,6 @@ TEXT runtime·aeshash64(SB),NOSPLIT,$0
// Not implemented. // Not implemented.
TEXT runtime·aeshashstr(SB),NOSPLIT,$0 TEXT runtime·aeshashstr(SB),NOSPLIT,$0
UNDEF UNDEF
TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
MOVW s1_base+0(FP), R3
MOVW s1_len+4(FP), R1
MOVW s2_base+8(FP), R4
MOVW s2_len+12(FP), R2
BEQ R3, R4, samebytes
SGTU R1, R2, R7
MOVW R1, R8
CMOVN R7, R2, R8 // R8 is min(R1, R2)
ADDU R3, R8 // R3 is current byte in s1, R8 is last byte in s1 to compare
loop:
BEQ R3, R8, samebytes // all compared bytes were the same; compare lengths
MOVBU (R3), R6
ADDU $1, R3
MOVBU (R4), R7
ADDU $1, R4
BEQ R6, R7 , loop
// bytes differed
SGTU R6, R7, R8
MOVW $-1, R6
CMOVZ R8, R6, R8
JMP cmp_ret
samebytes:
SGTU R1, R2, R6
SGTU R2, R1, R7
SUBU R7, R6, R8
cmp_ret:
MOVW R8, ret+16(FP)
RET
TEXT bytes·Compare(SB),NOSPLIT,$0-28
MOVW s1_base+0(FP), R3
MOVW s2_base+12(FP), R4
MOVW s1_len+4(FP), R1
MOVW s2_len+16(FP), R2
BEQ R3, R4, samebytes
SGTU R1, R2, R7
MOVW R1, R8
CMOVN R7, R2, R8 // R8 is min(R1, R2)
ADDU R3, R8 // R3 is current byte in s1, R8 is last byte in s1 to compare
loop:
BEQ R3, R8, samebytes
MOVBU (R3), R6
ADDU $1, R3
MOVBU (R4), R7
ADDU $1, R4
BEQ R6, R7 , loop
SGTU R6, R7, R8
MOVW $-1, R6
CMOVZ R8, R6, R8
JMP cmp_ret
samebytes:
SGTU R1, R2, R6
SGTU R2, R1, R7
SUBU R7, R6, R8
cmp_ret:
MOVW R8, ret+24(FP)
RET
TEXT runtime·return0(SB),NOSPLIT,$0 TEXT runtime·return0(SB),NOSPLIT,$0
MOVW $0, R1 MOVW $0, R1
......
...@@ -738,276 +738,6 @@ TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0 ...@@ -738,276 +738,6 @@ TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0 TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
MOVW (R0), R1 MOVW (R0), R1
// Do an efficient memcmp for ppc64le
// R3 = s1 len
// R4 = s2 len
// R5 = s1 addr
// R6 = s2 addr
// R7 = addr of return value
TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
MOVD R3,R8 // set up length
CMP R3,R4,CR2 // unequal?
BC 12,8,setuplen // BLT CR2
MOVD R4,R8 // use R4 for comparison len
setuplen:
MOVD R8,CTR // set up loop counter
CMP R8,$8 // only optimize >=8
BLT simplecheck
DCBT (R5) // cache hint
DCBT (R6)
CMP R8,$32 // optimize >= 32
MOVD R8,R9
BLT setup8a // 8 byte moves only
setup32a:
SRADCC $5,R8,R9 // number of 32 byte chunks
MOVD R9,CTR
// Special processing for 32 bytes or longer.
// Loading this way is faster and correct as long as the
// doublewords being compared are equal. Once they
// are found unequal, reload them in proper byte order
// to determine greater or less than.
loop32a:
MOVD 0(R5),R9 // doublewords to compare
MOVD 0(R6),R10 // get 4 doublewords
MOVD 8(R5),R14
MOVD 8(R6),R15
CMPU R9,R10 // bytes equal?
MOVD $0,R16 // set up for cmpne
BNE cmpne // further compare for LT or GT
MOVD 16(R5),R9 // get next pair of doublewords
MOVD 16(R6),R10
CMPU R14,R15 // bytes match?
MOVD $8,R16 // set up for cmpne
BNE cmpne // further compare for LT or GT
MOVD 24(R5),R14 // get next pair of doublewords
MOVD 24(R6),R15
CMPU R9,R10 // bytes match?
MOVD $16,R16 // set up for cmpne
BNE cmpne // further compare for LT or GT
MOVD $-8,R16 // for cmpne, R5,R6 already inc by 32
ADD $32,R5 // bump up to next 32
ADD $32,R6
CMPU R14,R15 // bytes match?
BC 8,2,loop32a // br ctr and cr
BNE cmpne
ANDCC $24,R8,R9 // Any 8 byte chunks?
BEQ leftover // and result is 0
setup8a:
SRADCC $3,R9,R9 // get the 8 byte count
BEQ leftover // shifted value is 0
MOVD R9,CTR // loop count for doublewords
loop8:
MOVDBR (R5+R0),R9 // doublewords to compare
MOVDBR (R6+R0),R10 // LE compare order
ADD $8,R5
ADD $8,R6
CMPU R9,R10 // match?
BC 8,2,loop8 // bt ctr <> 0 && cr
BGT greater
BLT less
leftover:
ANDCC $7,R8,R9 // check for leftover bytes
MOVD R9,CTR // save the ctr
BNE simple // leftover bytes
BC 12,10,equal // test CR2 for length comparison
BC 12,8,less
BR greater
simplecheck:
CMP R8,$0 // remaining compare length 0
BNE simple // do simple compare
BC 12,10,equal // test CR2 for length comparison
BC 12,8,less // 1st len < 2nd len, result less
BR greater // 1st len > 2nd len must be greater
simple:
MOVBZ 0(R5), R9 // get byte from 1st operand
ADD $1,R5
MOVBZ 0(R6), R10 // get byte from 2nd operand
ADD $1,R6
CMPU R9, R10
BC 8,2,simple // bc ctr <> 0 && cr
BGT greater // 1st > 2nd
BLT less // 1st < 2nd
BC 12,10,equal // test CR2 for length comparison
BC 12,9,greater // 2nd len > 1st len
BR less // must be less
cmpne: // only here is not equal
MOVDBR (R5+R16),R8 // reload in reverse order
MOVDBR (R6+R16),R9
CMPU R8,R9 // compare correct endianness
BGT greater // here only if NE
less:
MOVD $-1,R3
MOVD R3,(R7) // return value if A < B
RET
equal:
MOVD $0,(R7) // return value if A == B
RET
greater:
MOVD $1,R3
MOVD R3,(R7) // return value if A > B
RET
// Do an efficient memcmp for ppc64 (BE)
// R3 = s1 len
// R4 = s2 len
// R5 = s1 addr
// R6 = s2 addr
// R7 = addr of return value
TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
MOVD R3,R8 // set up length
CMP R3,R4,CR2 // unequal?
BC 12,8,setuplen // BLT CR2
MOVD R4,R8 // use R4 for comparison len
setuplen:
MOVD R8,CTR // set up loop counter
CMP R8,$8 // only optimize >=8
BLT simplecheck
DCBT (R5) // cache hint
DCBT (R6)
CMP R8,$32 // optimize >= 32
MOVD R8,R9
BLT setup8a // 8 byte moves only
setup32a:
SRADCC $5,R8,R9 // number of 32 byte chunks
MOVD R9,CTR
loop32a:
MOVD 0(R5),R9 // doublewords to compare
MOVD 0(R6),R10 // get 4 doublewords
MOVD 8(R5),R14
MOVD 8(R6),R15
CMPU R9,R10 // bytes equal?
BLT less // found to be less
BGT greater // found to be greater
MOVD 16(R5),R9 // get next pair of doublewords
MOVD 16(R6),R10
CMPU R14,R15 // bytes match?
BLT less // found less
BGT greater // found greater
MOVD 24(R5),R14 // get next pair of doublewords
MOVD 24(R6),R15
CMPU R9,R10 // bytes match?
BLT less // found to be less
BGT greater // found to be greater
ADD $32,R5 // bump up to next 32
ADD $32,R6
CMPU R14,R15 // bytes match?
BC 8,2,loop32a // br ctr and cr
BLT less // with BE, byte ordering is
BGT greater // good for compare
ANDCC $24,R8,R9 // Any 8 byte chunks?
BEQ leftover // and result is 0
setup8a:
SRADCC $3,R9,R9 // get the 8 byte count
BEQ leftover // shifted value is 0
MOVD R9,CTR // loop count for doublewords
loop8:
MOVD (R5),R9
MOVD (R6),R10
ADD $8,R5
ADD $8,R6
CMPU R9,R10 // match?
BC 8,2,loop8 // bt ctr <> 0 && cr
BGT greater
BLT less
leftover:
ANDCC $7,R8,R9 // check for leftover bytes
MOVD R9,CTR // save the ctr
BNE simple // leftover bytes
BC 12,10,equal // test CR2 for length comparison
BC 12,8,less
BR greater
simplecheck:
CMP R8,$0 // remaining compare length 0
BNE simple // do simple compare
BC 12,10,equal // test CR2 for length comparison
BC 12,8,less // 1st len < 2nd len, result less
BR greater // same len, must be equal
simple:
MOVBZ 0(R5),R9 // get byte from 1st operand
ADD $1,R5
MOVBZ 0(R6),R10 // get byte from 2nd operand
ADD $1,R6
CMPU R9,R10
BC 8,2,simple // bc ctr <> 0 && cr
BGT greater // 1st > 2nd
BLT less // 1st < 2nd
BC 12,10,equal // test CR2 for length comparison
BC 12,9,greater // 2nd len > 1st len
less:
MOVD $-1,R3
MOVD R3,(R7) // return value if A < B
RET
equal:
MOVD $0,(R7) // return value if A == B
RET
greater:
MOVD $1,R3
MOVD R3,(R7) // return value if A > B
RET
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
MOVD s1_base+0(FP), R5
MOVD s2_base+16(FP), R6
MOVD s1_len+8(FP), R3
CMP R5,R6,CR7
MOVD s2_len+24(FP), R4
MOVD $ret+32(FP), R7
CMP R3,R4,CR6
BEQ CR7,equal
notequal:
#ifdef GOARCH_ppc64le
BR cmpbodyLE<>(SB)
#else
BR cmpbodyBE<>(SB)
#endif
equal:
BEQ CR6,done
MOVD $1, R8
BGT CR6,greater
NEG R8
greater:
MOVD R8, (R7)
RET
done:
MOVD $0, (R7)
RET
TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOVD s1+0(FP), R5
MOVD s2+24(FP), R6
MOVD s1+8(FP), R3
CMP R5,R6,CR7
MOVD s2+32(FP), R4
MOVD $ret+48(FP), R7
CMP R3,R4,CR6
BEQ CR7,equal
#ifdef GOARCH_ppc64le
BR cmpbodyLE<>(SB)
#else
BR cmpbodyBE<>(SB)
#endif
equal:
BEQ CR6,done
MOVD $1, R8
BGT CR6,greater
NEG R8
greater:
MOVD R8, (R7)
RET
done:
MOVD $0, (R7)
RET
TEXT runtime·return0(SB), NOSPLIT, $0 TEXT runtime·return0(SB), NOSPLIT, $0
MOVW $0, R3 MOVW $0, R3
RET RET
......
...@@ -796,67 +796,6 @@ TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0 ...@@ -796,67 +796,6 @@ TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
// compile barrier. // compile barrier.
RET RET
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
MOVD s1_base+0(FP), R3
MOVD s1_len+8(FP), R4
MOVD s2_base+16(FP), R5
MOVD s2_len+24(FP), R6
LA ret+32(FP), R7
BR runtime·cmpbody(SB)
TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
MOVD s1+0(FP), R3
MOVD s1+8(FP), R4
MOVD s2+24(FP), R5
MOVD s2+32(FP), R6
LA res+48(FP), R7
BR runtime·cmpbody(SB)
// input:
// R3 = a
// R4 = alen
// R5 = b
// R6 = blen
// R7 = address of output word (stores -1/0/1 here)
TEXT runtime·cmpbody(SB),NOSPLIT|NOFRAME,$0-0
CMPBEQ R3, R5, cmplengths
MOVD R4, R8
CMPBLE R4, R6, amin
MOVD R6, R8
amin:
CMPBEQ R8, $0, cmplengths
CMP R8, $256
BLE tail
loop:
CLC $256, 0(R3), 0(R5)
BGT gt
BLT lt
SUB $256, R8
CMP R8, $256
BGT loop
tail:
SUB $1, R8
EXRL $runtime·cmpbodyclc(SB), R8
BGT gt
BLT lt
cmplengths:
CMP R4, R6
BEQ eq
BLT lt
gt:
MOVD $1, 0(R7)
RET
lt:
MOVD $-1, 0(R7)
RET
eq:
MOVD $0, 0(R7)
RET
TEXT runtime·cmpbodyclc(SB),NOSPLIT|NOFRAME,$0-0
CLC $1, 0(R3), 0(R5)
RET
// func supportsVX() bool // func supportsVX() bool
TEXT strings·supportsVX(SB),NOSPLIT,$0-1 TEXT strings·supportsVX(SB),NOSPLIT,$0-1
MOVBZ runtime·cpu+facilities_hasVX(SB), R0 MOVBZ runtime·cpu+facilities_hasVX(SB), R0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment