Commit 475d86b6 authored by Adam Langley's avatar Adam Langley

crypto/rc4: add simple amd64 asm implementation.

(Although it's still half the speed of OpenSSL.)

benchmark           old ns/op    new ns/op    delta
BenchmarkRC4_128         1409          398  -71.75%
BenchmarkRC4_1K         10920         2898  -73.46%
BenchmarkRC4_8K        131323        23083  -82.42%

benchmark            old MB/s     new MB/s  speedup
BenchmarkRC4_128        90.83       321.43    3.54x
BenchmarkRC4_1K         93.77       353.28    3.77x
BenchmarkRC4_8K         61.65       350.73    5.69x

R=rsc, remyoudompheng
CC=golang-dev, jgrahamc
https://golang.org/cl/7234055
parent ee908748
...@@ -42,17 +42,6 @@ func NewCipher(key []byte) (*Cipher, error) { ...@@ -42,17 +42,6 @@ func NewCipher(key []byte) (*Cipher, error) {
return &c, nil return &c, nil
} }
// XORKeyStream sets dst to the result of XORing src with the key stream.
// Dst and src may be the same slice but otherwise should not overlap.
func (c *Cipher) XORKeyStream(dst, src []byte) {
for i := range src {
c.i += 1
c.j += c.s[c.i]
c.s[c.i], c.s[c.j] = c.s[c.j], c.s[c.i]
dst[i] = src[i] ^ c.s[c.s[c.i]+c.s[c.j]]
}
}
// Reset zeros the key data so that it will no longer appear in the // Reset zeros the key data so that it will no longer appear in the
// process's memory. // process's memory.
func (c *Cipher) Reset() { func (c *Cipher) Reset() {
......
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8)
TEXT ·xorKeyStream(SB),7,$0
MOVQ dst+0(FP), DI
MOVQ src+8(FP), SI
MOVQ n+16(FP), CX
MOVQ state+24(FP), R8
MOVQ xPtr+32(FP), AX
MOVBQZX (AX), AX
MOVQ yPtr+40(FP), BX
MOVBQZX (BX), BX
loop:
CMPQ CX, $0
JE done
// c.i += 1
INCB AX
// c.j += c.s[c.i]
MOVB (R8)(AX*1), R9
ADDB R9, BX
MOVBQZX (R8)(BX*1), R10
MOVB R10, (R8)(AX*1)
MOVB R9, (R8)(BX*1)
// R11 = c.s[c.i]+c.s[c.j]
MOVQ R10, R11
ADDB R9, R11
MOVB (R8)(R11*1), R11
MOVB (SI), R12
XORB R11, R12
MOVB R12, (DI)
INCQ SI
INCQ DI
DECQ CX
JMP loop
done:
MOVQ xPtr+32(FP), R8
MOVB AX, (R8)
MOVQ yPtr+40(FP), R8
MOVB BX, (R8)
RET
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build amd64
package rc4
func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8)
// XORKeyStream sets dst to the result of XORing src with the key stream.
// Dst and src may be the same slice but otherwise should not overlap.
func (c *Cipher) XORKeyStream(dst, src []byte) {
if len(src) == 0 {
return
}
xorKeyStream(&dst[0], &src[0], len(src), &c.s, &c.i, &c.j)
}
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64
package rc4
// XORKeyStream sets dst to the result of XORing src with the key stream.
// Dst and src may be the same slice but otherwise should not overlap.
func (c *Cipher) XORKeyStream(dst, src []byte) {
i, j := c.i, c.j
for k, v := range src {
i += 1
j += c.s[i]
c.s[i], c.s[j] = c.s[j], c.s[i]
dst[k] = v ^ c.s[c.s[i]+c.s[j]]
}
c.i, c.j = i, j
}
...@@ -37,6 +37,39 @@ var golden = []rc4Test{ ...@@ -37,6 +37,39 @@ var golden = []rc4Test{
[]byte{0x57, 0x69, 0x6b, 0x69}, []byte{0x57, 0x69, 0x6b, 0x69},
[]byte{0x60, 0x44, 0xdb, 0x6d, 0x41, 0xb7}, []byte{0x60, 0x44, 0xdb, 0x6d, 0x41, 0xb7},
}, },
{
[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
[]byte{
0xde, 0x18, 0x89, 0x41, 0xa3, 0x37, 0x5d, 0x3a,
0x8a, 0x06, 0x1e, 0x67, 0x57, 0x6e, 0x92, 0x6d,
0xc7, 0x1a, 0x7f, 0xa3, 0xf0, 0xcc, 0xeb, 0x97,
0x45, 0x2b, 0x4d, 0x32, 0x27, 0x96, 0x5f, 0x9e,
0xa8, 0xcc, 0x75, 0x07, 0x6d, 0x9f, 0xb9, 0xc5,
0x41, 0x7a, 0xa5, 0xcb, 0x30, 0xfc, 0x22, 0x19,
0x8b, 0x34, 0x98, 0x2d, 0xbb, 0x62, 0x9e, 0xc0,
0x4b, 0x4f, 0x8b, 0x05, 0xa0, 0x71, 0x08, 0x50,
0x92, 0xa0, 0xc3, 0x58, 0x4a, 0x48, 0xe4, 0xa3,
0x0a, 0x39, 0x7b, 0x8a, 0xcd, 0x1d, 0x00, 0x9e,
0xc8, 0x7d, 0x68, 0x11, 0xf2, 0x2c, 0xf4, 0x9c,
0xa3, 0xe5, 0x93, 0x54, 0xb9, 0x45, 0x15, 0x35,
0xa2, 0x18, 0x7a, 0x86, 0x42, 0x6c, 0xca, 0x7d,
0x5e, 0x82, 0x3e, 0xba, 0x00, 0x44, 0x12, 0x67,
0x12, 0x57, 0xb8, 0xd8, 0x60, 0xae, 0x4c, 0xbd,
0x4c, 0x49, 0x06, 0xbb, 0xc5, 0x35, 0xef, 0xe1,
0x58, 0x7f, 0x08, 0xdb, 0x33, 0x95, 0x5c, 0xdb,
0xcb, 0xad, 0x9b, 0x10, 0xf5, 0x3f, 0xc4, 0xe5,
0x2c, 0x59, 0x15, 0x65, 0x51, 0x84, 0x87, 0xfe,
0x08, 0x4d, 0x0e, 0x3f, 0x03, 0xde, 0xbc, 0xc9,
0xda, 0x1c, 0xe9, 0x0d, 0x08, 0x5c, 0x2d, 0x8a,
0x19, 0xd8, 0x37, 0x30, 0x86, 0x16, 0x36, 0x92,
0x14, 0x2b, 0xd8, 0xfc, 0x5d, 0x7a, 0x73, 0x49,
0x6a, 0x8e, 0x59, 0xee, 0x7e, 0xcf, 0x6b, 0x94,
0x06, 0x63, 0xf4, 0xa6, 0xbe, 0xe6, 0x5b, 0xd2,
0xc8, 0x5c, 0x46, 0x98, 0x6c, 0x1b, 0xef, 0x34,
0x90, 0xd3, 0x7b, 0x38, 0xda, 0x85, 0xd3, 0x2e,
0x97, 0x39, 0xcb, 0x23, 0x4a, 0x2b, 0xe7, 0x40,
},
},
} }
func TestGolden(t *testing.T) { func TestGolden(t *testing.T) {
...@@ -51,9 +84,34 @@ func TestGolden(t *testing.T) { ...@@ -51,9 +84,34 @@ func TestGolden(t *testing.T) {
c.XORKeyStream(keystream, keystream) c.XORKeyStream(keystream, keystream)
for j, v := range keystream { for j, v := range keystream {
if g.keystream[j] != v { if g.keystream[j] != v {
t.Errorf("Failed at golden index %d", i) t.Errorf("Failed at golden index %d:\n%x\nvs\n%x", i, keystream, g.keystream)
break break
} }
} }
} }
} }
func benchmark(b *testing.B, size int64) {
buf := make([]byte, size)
c, err := NewCipher(golden[0].key)
if err != nil {
panic(err)
}
b.SetBytes(size)
for i := 0; i < b.N; i++ {
c.XORKeyStream(buf, buf)
}
}
func BenchmarkRC4_128(b *testing.B) {
benchmark(b, 128)
}
func BenchmarkRC4_1K(b *testing.B) {
benchmark(b, 1024)
}
func BenchmarkRC4_8K(b *testing.B) {
benchmark(b, 8096)
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment