Commit 80e1cf73 authored by Shenghou Ma's avatar Shenghou Ma

crypto/rc4: naïve ARM assembly implementation

On 800MHz Cortex-A8:
benchmark           old ns/op    new ns/op    delta
BenchmarkRC4_128         9395         2838  -69.79%
BenchmarkRC4_1K         74497        22120  -70.31%
BenchmarkRC4_8K        587243       171435  -70.81%

benchmark            old MB/s     new MB/s  speedup
BenchmarkRC4_128        13.62        45.09    3.31x
BenchmarkRC4_1K         13.75        46.29    3.37x
BenchmarkRC4_8K         13.79        47.22    3.42x

Result for "OpenSSL 1.0.1c 10 May 2012" from Debian/armhf sid:
type             16 bytes     64 bytes    256 bytes   1024 bytes   8192 bytes
rc4              39553.81k    46522.39k    49336.11k    50085.63k    50258.06k

R=golang-dev, agl, dave
CC=golang-dev
https://golang.org/cl/7310051
parent 0592c449
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Registers
dst = 0
src = 1
n = 2
state = 3
pi = 4
pj = 5
i = 6
j = 7
k = 8
t = 11
t2 = 12
// func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8)
TEXT ·xorKeyStream(SB),7,$0
MOVW 0(FP), R(dst)
MOVW 4(FP), R(src)
MOVW 8(FP), R(n)
MOVW 12(FP), R(state)
MOVW 16(FP), R(pi)
MOVW 20(FP), R(pj)
MOVBU (R(pi)), R(i)
MOVBU (R(pj)), R(j)
MOVW $0, R(k)
loop:
// i += 1; j += state[i]
ADD $1, R(i)
AND $0xff, R(i)
MOVBU R(i)<<0(R(state)), R(t)
ADD R(t), R(j)
AND $0xff, R(j)
// swap state[i] <-> state[j]
MOVBU R(j)<<0(R(state)), R(t2)
MOVB R(t2), R(i)<<0(R(state))
MOVB R(t), R(j)<<0(R(state))
// dst[k] = src[k] ^ state[state[i] + state[j]]
ADD R(t2), R(t)
AND $0xff, R(t)
MOVBU R(t)<<0(R(state)), R(t)
MOVBU R(k)<<0(R(src)), R(t2)
EOR R(t), R(t2)
MOVB R(t2), R(k)<<0(R(dst))
ADD $1, R(k)
CMP R(k), R(n)
BNE loop
done:
MOVB R(i), (R(pi))
MOVB R(j), (R(pj))
RET
......@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64
// +build amd64 arm
package rc4
......
......@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64
// +build !amd64,!arm
package rc4
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment