Commit 44877315 authored by fanzha02's avatar fanzha02 Committed by Brad Fitzpatrick

crypto/sha1: optimize arm64 sha1 implemention

Optimize with ARMv8 SHA1 instructions.
Results (Cortex-A72)

name             old time/op    new time/op     delta
Hash8Bytes-64      1.06µs ± 4%     0.56µs ± 4%   -47.19%  (p=0.008 n=5+5)
Hash320Bytes-64    3.92µs ± 1%     0.82µs ± 2%   -79.07%  (p=0.008 n=5+5)
Hash1K-64          10.2µs ± 2%      1.5µs ± 2%   -85.71%  (p=0.008 n=5+5)
Hash8K-64          73.9µs ± 1%      7.6µs ± 1%   -89.66%  (p=0.008 n=5+5)

name             old speed      new speed       delta
Hash8Bytes-64    7.55MB/s ± 4%  14.29MB/s ± 4%   +89.27%  (p=0.008 n=5+5)
Hash320Bytes-64  81.6MB/s ± 1%  390.0MB/s ± 2%  +377.64%  (p=0.008 n=5+5)
Hash1K-64         100MB/s ± 2%    701MB/s ± 2%  +599.65%  (p=0.008 n=5+5)
Hash8K-64         111MB/s ± 1%   1072MB/s ± 1%  +867.44%  (p=0.008 n=5+5)

Change-Id: I84397f980db9518f4150ac4c5ffa2c5a97a34444
Reviewed-on: https://go-review.googlesource.com/61550Reviewed-by: 's avatarBrad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 2955a8a6
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package sha1
import "internal/cpu"
var k = []uint32{
0x5A827999,
0x6ED9EBA1,
0x8F1BBCDC,
0xCA62C1D6,
}
var hasSHA1 = cpu.ARM64.HasSHA1
func sha1block(h []uint32, p []byte, k []uint32)
func block(dig *digest, p []byte) {
if !hasSHA1 {
blockGeneric(dig, p)
} else {
h := dig.h[:]
sha1block(h, p, k)
}
}
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
#define HASHUPDATECHOOSE \
SHA1C V16.S4, V1, V2 \
SHA1H V3, V1 \
VMOV V2.B16, V3.B16
#define HASHUPDATEPARITY \
SHA1P V16.S4, V1, V2 \
SHA1H V3, V1 \
VMOV V2.B16, V3.B16
#define HASHUPDATEMAJ \
SHA1M V16.S4, V1, V2 \
SHA1H V3, V1 \
VMOV V2.B16, V3.B16
// func sha1block(h []uint32, p []byte, k []uint32)
TEXT ·sha1block(SB),NOSPLIT,$0
MOVD h_base+0(FP), R0 // hash value fisrt address
MOVD p_base+24(FP), R1 // message first address
MOVD k_base+48(FP), R2 // k constants first address
MOVD p_len+32(FP), R3 // message length
VLD1.P 16(R0), [V0.S4]
VMOVS (R0), V20
SUB $16, R0, R0
blockloop:
VLD1.P 16(R1), [V4.B16] // load message
VLD1.P 16(R1), [V5.B16]
VLD1.P 16(R1), [V6.B16]
VLD1.P 16(R1), [V7.B16]
VLD1 (R2), [V19.S4] // load constant k0-k79
VMOV V0.B16, V2.B16
VMOV V20.S[0], V1
VMOV V2.B16, V3.B16
VDUP V19.S[0], V17.S4
VREV32 V4.B16, V4.B16 // prepare for using message in Byte format
VREV32 V5.B16, V5.B16
VREV32 V6.B16, V6.B16
VREV32 V7.B16, V7.B16
VDUP V19.S[1], V18.S4
VADD V17.S4, V4.S4, V16.S4
SHA1SU0 V6.S4, V5.S4, V4.S4
HASHUPDATECHOOSE
SHA1SU1 V7.S4, V4.S4
VADD V17.S4, V5.S4, V16.S4
SHA1SU0 V7.S4, V6.S4, V5.S4
HASHUPDATECHOOSE
SHA1SU1 V4.S4, V5.S4
VADD V17.S4, V6.S4, V16.S4
SHA1SU0 V4.S4, V7.S4, V6.S4
HASHUPDATECHOOSE
SHA1SU1 V5.S4, V6.S4
VADD V17.S4, V7.S4, V16.S4
SHA1SU0 V5.S4, V4.S4, V7.S4
HASHUPDATECHOOSE
SHA1SU1 V6.S4, V7.S4
VADD V17.S4, V4.S4, V16.S4
SHA1SU0 V6.S4, V5.S4, V4.S4
HASHUPDATECHOOSE
SHA1SU1 V7.S4, V4.S4
VDUP V19.S[2], V17.S4
VADD V18.S4, V5.S4, V16.S4
SHA1SU0 V7.S4, V6.S4, V5.S4
HASHUPDATEPARITY
SHA1SU1 V4.S4, V5.S4
VADD V18.S4, V6.S4, V16.S4
SHA1SU0 V4.S4, V7.S4, V6.S4
HASHUPDATEPARITY
SHA1SU1 V5.S4, V6.S4
VADD V18.S4, V7.S4, V16.S4
SHA1SU0 V5.S4, V4.S4, V7.S4
HASHUPDATEPARITY
SHA1SU1 V6.S4, V7.S4
VADD V18.S4, V4.S4, V16.S4
SHA1SU0 V6.S4, V5.S4, V4.S4
HASHUPDATEPARITY
SHA1SU1 V7.S4, V4.S4
VADD V18.S4, V5.S4, V16.S4
SHA1SU0 V7.S4, V6.S4, V5.S4
HASHUPDATEPARITY
SHA1SU1 V4.S4, V5.S4
VDUP V19.S[3], V18.S4
VADD V17.S4, V6.S4, V16.S4
SHA1SU0 V4.S4, V7.S4, V6.S4
HASHUPDATEMAJ
SHA1SU1 V5.S4, V6.S4
VADD V17.S4, V7.S4, V16.S4
SHA1SU0 V5.S4, V4.S4, V7.S4
HASHUPDATEMAJ
SHA1SU1 V6.S4, V7.S4
VADD V17.S4, V4.S4, V16.S4
SHA1SU0 V6.S4, V5.S4, V4.S4
HASHUPDATEMAJ
SHA1SU1 V7.S4, V4.S4
VADD V17.S4, V5.S4, V16.S4
SHA1SU0 V7.S4, V6.S4, V5.S4
HASHUPDATEMAJ
SHA1SU1 V4.S4, V5.S4
VADD V17.S4, V6.S4, V16.S4
SHA1SU0 V4.S4, V7.S4, V6.S4
HASHUPDATEMAJ
SHA1SU1 V5.S4, V6.S4
VADD V18.S4, V7.S4, V16.S4
SHA1SU0 V5.S4, V4.S4, V7.S4
HASHUPDATEPARITY
SHA1SU1 V6.S4, V7.S4
VADD V18.S4, V4.S4, V16.S4
HASHUPDATEPARITY
VADD V18.S4, V5.S4, V16.S4
HASHUPDATEPARITY
VADD V18.S4, V6.S4, V16.S4
HASHUPDATEPARITY
VADD V18.S4, V7.S4, V16.S4
HASHUPDATEPARITY
SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes
VADD V2.S4, V0.S4, V0.S4
VADD V1.S4, V20.S4, V20.S4
CBNZ R3, blockloop
sha1ret:
VST1.P [V0.S4], 16(R0) // store hash value H(dcba)
VMOVS V20, (R0) // store hash value H(e)
RET
......@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64,!amd64p32,!386,!arm,!s390x
// +build !amd64,!amd64p32,!386,!arm,!s390x,!arm64
package sha1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment