Commit 0ff6e5f1 authored by Carlos Eduardo Seo's avatar Carlos Eduardo Seo

crypto/cipher: add VSX implementation of xorBytes for ppc64x

This change adds asm implementations of xorBytes for ppc64x that
takes advantage of VSX registers and instructions.

name                   old time/op    new time/op     delta
XORBytes/8Bytes-8        16.4ns ± 0%     11.1ns ± 0%   -32.32%  (p=0.000 n=5+4)
XORBytes/128Bytes-8      45.6ns ± 0%     16.2ns ± 0%   -64.50%  (p=0.008 n=5+5)
XORBytes/2048Bytes-8      433ns ±13%      129ns ± 1%   -70.29%  (p=0.000 n=5+4)
XORBytes/32768Bytes-8    7.16µs ± 0%     1.83µs ± 0%   -74.39%  (p=0.008 n=5+5)

name                   old speed      new speed       delta
XORBytes/8Bytes-8       488MB/s ± 0%    721MB/s ± 0%   +47.75%  (p=0.016 n=5+4)
XORBytes/128Bytes-8    2.80GB/s ± 0%   7.89GB/s ± 0%  +181.33%  (p=0.008 n=5+5)
XORBytes/2048Bytes-8   4.77GB/s ±13%  15.87GB/s ± 0%  +232.68%  (p=0.016 n=5+4)
XORBytes/32768Bytes-8  4.58GB/s ± 0%  17.88GB/s ± 0%  +290.47%  (p=0.008 n=5+5)

Change-Id: Ic27d9b858f8ec2d597fdabc68a288d6844eba701
Reviewed-on: https://go-review.googlesource.com/c/145997
Run-TryBot: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarLynn Boger <laboger@linux.vnet.ibm.com>
parent f5b69503
......@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64
// +build !amd64,!ppc64,!ppc64le
package cipher
......
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ppc64 ppc64le
package cipher
// xorBytes xors the bytes in a and b. The destination should have enough
// space, otherwise xorBytes will panic. Returns the number of bytes xor'd.
func xorBytes(dst, a, b []byte) int {
n := len(a)
if len(b) < n {
n = len(b)
}
if n == 0 {
return 0
}
_ = dst[n-1]
xorBytesVSX(&dst[0], &a[0], &b[0], n)
return n
}
func xorWords(dst, a, b []byte) {
xorBytes(dst, a, b)
}
//go:noescape
func xorBytesVSX(dst, a, b *byte, n int)
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ppc64 ppc64le
#include "textflag.h"
// func xorBytesVSX(dst, a, b *byte, n int)
TEXT ·xorBytesVSX(SB), NOSPLIT, $0
MOVD dst+0(FP), R3 // R3 = dst
MOVD a+8(FP), R4 // R4 = a
MOVD b+16(FP), R5 // R5 = b
MOVD n+24(FP), R6 // R6 = n
CMPU R6, $16, CR7 // Check if n ≥ 16 bytes
MOVD R0, R8 // R8 = index
CMPU R6, $8, CR6 // Check if 8 ≤ n < 16 bytes
BGE CR7, preloop16
BLT CR6, small
// Case for 8 ≤ n < 16 bytes
MOVD (R4)(R8), R14 // R14 = a[i,...,i+7]
MOVD (R5)(R8), R15 // R15 = b[i,...,i+7]
XOR R14, R15, R16 // R16 = a[] ^ b[]
SUB $8, R6 // n = n - 8
MOVD R16, (R3)(R8) // Store to dst
ADD $8, R8
// Check if we're finished
CMP R6, R0
BGT small
JMP done
// Case for n ≥ 16 bytes
preloop16:
SRD $4, R6, R7 // Setup loop counter
MOVD R7, CTR
ANDCC $15, R6, R9 // Check for tailing bytes for later
loop16:
LXVD2X (R4)(R8), VS32 // VS32 = a[i,...,i+15]
LXVD2X (R5)(R8), VS33 // VS33 = b[i,...,i+15]
XXLXOR VS32, VS33, VS34 // VS34 = a[] ^ b[]
STXVD2X VS34, (R3)(R8) // Store to dst
ADD $16, R8 // Update index
BC 16, 0, loop16 // bdnz loop16
BEQ CR0, done
SLD $4, R7
SUB R7, R6 // R6 = n - (R7 * 16)
// Case for n < 8 bytes and tailing bytes from the
// previous cases.
small:
MOVD R6, CTR // Setup loop counter
loop:
MOVBZ (R4)(R8), R14 // R14 = a[i]
MOVBZ (R5)(R8), R15 // R15 = b[i]
XOR R14, R15, R16 // R16 = a[i] ^ b[i]
MOVB R16, (R3)(R8) // Store to dst
ADD $1, R8
BC 16, 0, loop // bdnz loop
done:
RET
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment