Commit 91059de0 authored by Keith Randall's avatar Keith Randall

runtime: make aeshash more DOS-proof

Improve the aeshash implementation to make it harder to engineer collisions.

1) Scramble the seed before xoring with the input string.  This
   makes it harder to cancel known portions of the seed (like the size)
   because it mixes the per-table seed into those other parts.

2) Use table-dependent seeds for all stripes when hashing >16 byte strings.

For small strings this change uses 4 aesenc ops instead of 3, so it
is somewhat slower.  The first two can run in parallel, though, so
it isn't 33% slower.

benchmark                            old ns/op     new ns/op     delta
BenchmarkHash64-12                   10.2          11.2          +9.80%
BenchmarkHash16-12                   5.71          6.13          +7.36%
BenchmarkHash5-12                    6.64          7.01          +5.57%
BenchmarkHashBytesSpeed-12           30.3          31.9          +5.28%
BenchmarkHash65536-12                2785          2882          +3.48%
BenchmarkHash1024-12                 53.6          55.4          +3.36%
BenchmarkHashStringArraySpeed-12     54.9          56.5          +2.91%
BenchmarkHashStringSpeed-12          18.7          19.2          +2.67%
BenchmarkHashInt32Speed-12           14.8          15.1          +2.03%
BenchmarkHashInt64Speed-12           14.5          14.5          +0.00%

Change-Id: I59ea124b5cb92b1c7e8584008257347f9049996c
Reviewed-on: https://go-review.googlesource.com/14124Reviewed-by: 's avatarjcd . <jcd@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 168a51b3
......@@ -335,5 +335,8 @@ func init() {
return
}
getRandomData((*[len(hashkey) * ptrSize]byte)(unsafe.Pointer(&hashkey))[:])
hashkey[0] |= 1 // make sure this number is odd
hashkey[0] |= 1 // make sure these numbers are odd
hashkey[1] |= 1
hashkey[2] |= 1
hashkey[3] |= 1
}
......@@ -964,10 +964,13 @@ TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12
// CX: length
// DX: address to put return value
TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
MOVL h+4(FP), X6 // seed to low 64 bits of xmm6
PINSRD $2, CX, X6 // size to high 64 bits of xmm6
PSHUFHW $0, X6, X6 // replace size with its low 2 bytes repeated 4 times
MOVO runtime·aeskeysched(SB), X7
MOVL h+4(FP), X0 // 32 bits of per-table hash seed
PINSRW $4, CX, X0 // 16 bits of length
PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
MOVO X0, X1 // save unscrambled seed
PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
AESENC X0, X0 // scramble seed
CMPL CX, $16
JB aes0to15
JE aes16
......@@ -987,101 +990,117 @@ aes0to15:
// 16 bytes loaded at this address won't cross
// a page boundary, so we can load it directly.
MOVOU -16(AX), X0
MOVOU -16(AX), X1
ADDL CX, CX
PAND masks<>(SB)(CX*8), X0
PAND masks<>(SB)(CX*8), X1
// scramble 3 times
AESENC X6, X0
AESENC X7, X0
AESENC X7, X0
MOVL X0, (DX)
final1:
AESENC X0, X1 // scramble input, xor in seed
AESENC X1, X1 // scramble combo 2 times
AESENC X1, X1
MOVL X1, (DX)
RET
endofpage:
// address ends in 1111xxxx. Might be up against
// a page boundary, so load ending at last byte.
// Then shift bytes down using pshufb.
MOVOU -32(AX)(CX*1), X0
MOVOU -32(AX)(CX*1), X1
ADDL CX, CX
PSHUFB shifts<>(SB)(CX*8), X0
AESENC X6, X0
AESENC X7, X0
AESENC X7, X0
MOVL X0, (DX)
RET
PSHUFB shifts<>(SB)(CX*8), X1
JMP final1
aes0:
// Return scrambled input seed
AESENC X7, X6
AESENC X7, X6
MOVL X6, (DX)
RET
aes16:
MOVOU (AX), X0
AESENC X6, X0
AESENC X7, X0
AESENC X7, X0
AESENC X0, X0
MOVL X0, (DX)
RET
aes16:
MOVOU (AX), X1
JMP final1
aes17to32:
// make second starting seed
PXOR runtime·aeskeysched+16(SB), X1
AESENC X1, X1
// load data to be hashed
MOVOU (AX), X0
MOVOU -16(AX)(CX*1), X1
MOVOU (AX), X2
MOVOU -16(AX)(CX*1), X3
// scramble 3 times
AESENC X6, X0
AESENC runtime·aeskeysched+16(SB), X1
AESENC X7, X0
AESENC X7, X1
AESENC X7, X0
AESENC X7, X1
AESENC X0, X2
AESENC X1, X3
AESENC X2, X2
AESENC X3, X3
AESENC X2, X2
AESENC X3, X3
// combine results
PXOR X1, X0
MOVL X0, (DX)
PXOR X3, X2
MOVL X2, (DX)
RET
aes33to64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(CX*1), X2
MOVOU -16(AX)(CX*1), X3
// make 3 more starting seeds
MOVO X1, X2
MOVO X1, X3
PXOR runtime·aeskeysched+16(SB), X1
PXOR runtime·aeskeysched+32(SB), X2
PXOR runtime·aeskeysched+48(SB), X3
AESENC X1, X1
AESENC X2, X2
AESENC X3, X3
AESENC X6, X0
AESENC runtime·aeskeysched+16(SB), X1
AESENC runtime·aeskeysched+32(SB), X2
AESENC runtime·aeskeysched+48(SB), X3
AESENC X7, X0
AESENC X7, X1
AESENC X7, X2
AESENC X7, X3
AESENC X7, X0
AESENC X7, X1
AESENC X7, X2
AESENC X7, X3
PXOR X2, X0
PXOR X3, X1
PXOR X1, X0
MOVL X0, (DX)
MOVOU (AX), X4
MOVOU 16(AX), X5
MOVOU -32(AX)(CX*1), X6
MOVOU -16(AX)(CX*1), X7
AESENC X0, X4
AESENC X1, X5
AESENC X2, X6
AESENC X3, X7
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
AESENC X7, X7
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
AESENC X7, X7
PXOR X6, X4
PXOR X7, X5
PXOR X5, X4
MOVL X4, (DX)
RET
aes65plus:
// make 3 more starting seeds
MOVO X1, X2
MOVO X1, X3
PXOR runtime·aeskeysched+16(SB), X1
PXOR runtime·aeskeysched+32(SB), X2
PXOR runtime·aeskeysched+48(SB), X3
AESENC X1, X1
AESENC X2, X2
AESENC X3, X3
// start with last (possibly overlapping) block
MOVOU -64(AX)(CX*1), X0
MOVOU -48(AX)(CX*1), X1
MOVOU -32(AX)(CX*1), X2
MOVOU -16(AX)(CX*1), X3
MOVOU -64(AX)(CX*1), X4
MOVOU -48(AX)(CX*1), X5
MOVOU -32(AX)(CX*1), X6
MOVOU -16(AX)(CX*1), X7
// scramble state once
AESENC X6, X0
AESENC runtime·aeskeysched+16(SB), X1
AESENC runtime·aeskeysched+32(SB), X2
AESENC runtime·aeskeysched+48(SB), X3
AESENC X0, X4
AESENC X1, X5
AESENC X2, X6
AESENC X3, X7
// compute number of remaining 64-byte blocks
DECL CX
......@@ -1089,39 +1108,40 @@ aes65plus:
aesloop:
// scramble state, xor in a block
MOVOU (AX), X4
MOVOU 16(AX), X5
AESENC X4, X0
AESENC X5, X1
MOVOU 32(AX), X4
MOVOU 48(AX), X5
AESENC X4, X2
AESENC X5, X3
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
AESENC X0, X4
AESENC X1, X5
AESENC X2, X6
AESENC X3, X7
// scramble state
AESENC X7, X0
AESENC X7, X1
AESENC X7, X2
AESENC X7, X3
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
AESENC X7, X7
ADDL $64, AX
DECL CX
JNE aesloop
// 2 more scrambles to finish
AESENC X7, X0
AESENC X7, X1
AESENC X7, X2
AESENC X7, X3
AESENC X7, X0
AESENC X7, X1
AESENC X7, X2
AESENC X7, X3
PXOR X2, X0
PXOR X3, X1
PXOR X1, X0
MOVL X0, (DX)
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
AESENC X7, X7
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
AESENC X7, X7
PXOR X6, X4
PXOR X7, X5
PXOR X5, X4
MOVL X4, (DX)
RET
TEXT runtime·aeshash32(SB),NOSPLIT,$0-12
......
......@@ -951,10 +951,14 @@ TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24
// CX: length
// DX: address to put return value
TEXT runtime·aeshashbody(SB),NOSPLIT,$0-0
MOVQ h+8(FP), X6 // seed to low 64 bits of xmm6
PINSRQ $1, CX, X6 // size to high 64 bits of xmm6
PSHUFHW $0, X6, X6 // replace size with its low 2 bytes repeated 4 times
MOVO runtime·aeskeysched(SB), X7
// Fill an SSE register with our seeds.
MOVQ h+8(FP), X0 // 64 bits of per-table hash seed
PINSRW $4, CX, X0 // 16 bits of length
PSHUFHW $0, X0, X0 // repeat length 4 times total
MOVO X0, X1 // save unscrambled seed
PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
AESENC X0, X0 // scramble seed
CMPQ CX, $16
JB aes0to15
JE aes16
......@@ -976,219 +980,275 @@ aes0to15:
// 16 bytes loaded at this address won't cross
// a page boundary, so we can load it directly.
MOVOU -16(AX), X0
MOVOU -16(AX), X1
ADDQ CX, CX
MOVQ $masks<>(SB), AX
PAND (AX)(CX*8), X0
// scramble 3 times
AESENC X6, X0
AESENC X7, X0
AESENC X7, X0
MOVQ X0, (DX)
PAND (AX)(CX*8), X1
final1:
AESENC X0, X1 // scramble input, xor in seed
AESENC X1, X1 // scramble combo 2 times
AESENC X1, X1
MOVQ X1, (DX)
RET
endofpage:
// address ends in 1111xxxx. Might be up against
// a page boundary, so load ending at last byte.
// Then shift bytes down using pshufb.
MOVOU -32(AX)(CX*1), X0
MOVOU -32(AX)(CX*1), X1
ADDQ CX, CX
MOVQ $shifts<>(SB), AX
PSHUFB (AX)(CX*8), X0
AESENC X6, X0
AESENC X7, X0
AESENC X7, X0
MOVQ X0, (DX)
RET
PSHUFB (AX)(CX*8), X1
JMP final1
aes0:
// Return scrambled input seed
AESENC X7, X6
AESENC X7, X6
MOVQ X6, (DX)
AESENC X0, X0
MOVQ X0, (DX)
RET
aes16:
MOVOU (AX), X0
AESENC X6, X0
AESENC X7, X0
AESENC X7, X0
MOVQ X0, (DX)
RET
MOVOU (AX), X1
JMP final1
aes17to32:
// make second starting seed
PXOR runtime·aeskeysched+16(SB), X1
AESENC X1, X1
// load data to be hashed
MOVOU (AX), X0
MOVOU -16(AX)(CX*1), X1
MOVOU (AX), X2
MOVOU -16(AX)(CX*1), X3
// scramble 3 times
AESENC X6, X0
AESENC runtime·aeskeysched+16(SB), X1
AESENC X7, X0
AESENC X7, X1
AESENC X7, X0
AESENC X7, X1
AESENC X0, X2
AESENC X1, X3
AESENC X2, X2
AESENC X3, X3
AESENC X2, X2
AESENC X3, X3
// combine results
PXOR X1, X0
MOVQ X0, (DX)
PXOR X3, X2
MOVQ X2, (DX)
RET
aes33to64:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU -32(AX)(CX*1), X2
MOVOU -16(AX)(CX*1), X3
// make 3 more starting seeds
MOVO X1, X2
MOVO X1, X3
PXOR runtime·aeskeysched+16(SB), X1
PXOR runtime·aeskeysched+32(SB), X2
PXOR runtime·aeskeysched+48(SB), X3
AESENC X1, X1
AESENC X2, X2
AESENC X3, X3
AESENC X6, X0
AESENC runtime·aeskeysched+16(SB), X1
AESENC runtime·aeskeysched+32(SB), X2
AESENC runtime·aeskeysched+48(SB), X3
AESENC X7, X0
AESENC X7, X1
AESENC X7, X2
AESENC X7, X3
AESENC X7, X0
AESENC X7, X1
AESENC X7, X2
AESENC X7, X3
PXOR X2, X0
PXOR X3, X1
PXOR X1, X0
MOVQ X0, (DX)
MOVOU (AX), X4
MOVOU 16(AX), X5
MOVOU -32(AX)(CX*1), X6
MOVOU -16(AX)(CX*1), X7
AESENC X0, X4
AESENC X1, X5
AESENC X2, X6
AESENC X3, X7
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
AESENC X7, X7
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
AESENC X7, X7
PXOR X6, X4
PXOR X7, X5
PXOR X5, X4
MOVQ X4, (DX)
RET
aes65to128:
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
MOVOU -64(AX)(CX*1), X4
MOVOU -48(AX)(CX*1), X5
MOVOU -32(AX)(CX*1), X8
MOVOU -16(AX)(CX*1), X9
// make 7 more starting seeds
MOVO X1, X2
MOVO X1, X3
MOVO X1, X4
MOVO X1, X5
MOVO X1, X6
MOVO X1, X7
PXOR runtime·aeskeysched+16(SB), X1
PXOR runtime·aeskeysched+32(SB), X2
PXOR runtime·aeskeysched+48(SB), X3
PXOR runtime·aeskeysched+64(SB), X4
PXOR runtime·aeskeysched+80(SB), X5
PXOR runtime·aeskeysched+96(SB), X6
PXOR runtime·aeskeysched+112(SB), X7
AESENC X1, X1
AESENC X2, X2
AESENC X3, X3
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
AESENC X7, X7
// load data
MOVOU (AX), X8
MOVOU 16(AX), X9
MOVOU 32(AX), X10
MOVOU 48(AX), X11
MOVOU -64(AX)(CX*1), X12
MOVOU -48(AX)(CX*1), X13
MOVOU -32(AX)(CX*1), X14
MOVOU -16(AX)(CX*1), X15
// scramble data, xor in seed
AESENC X0, X8
AESENC X1, X9
AESENC X2, X10
AESENC X3, X11
AESENC X4, X12
AESENC X5, X13
AESENC X6, X14
AESENC X7, X15
// scramble twice
AESENC X8, X8
AESENC X9, X9
AESENC X10, X10
AESENC X11, X11
AESENC X12, X12
AESENC X13, X13
AESENC X14, X14
AESENC X15, X15
AESENC X6, X0
AESENC runtime·aeskeysched+16(SB), X1
AESENC runtime·aeskeysched+32(SB), X2
AESENC runtime·aeskeysched+48(SB), X3
AESENC runtime·aeskeysched+64(SB), X4
AESENC runtime·aeskeysched+80(SB), X5
AESENC runtime·aeskeysched+96(SB), X8
AESENC runtime·aeskeysched+112(SB), X9
AESENC X7, X0
AESENC X7, X1
AESENC X7, X2
AESENC X7, X3
AESENC X7, X4
AESENC X7, X5
AESENC X7, X8
AESENC X7, X9
AESENC X7, X0
AESENC X7, X1
AESENC X7, X2
AESENC X7, X3
AESENC X7, X4
AESENC X7, X5
AESENC X7, X8
AESENC X7, X9
PXOR X4, X0
PXOR X5, X1
PXOR X8, X2
PXOR X9, X3
PXOR X2, X0
PXOR X3, X1
PXOR X1, X0
MOVQ X0, (DX)
AESENC X8, X8
AESENC X9, X9
AESENC X10, X10
AESENC X11, X11
AESENC X12, X12
AESENC X13, X13
AESENC X14, X14
AESENC X15, X15
// combine results
PXOR X12, X8
PXOR X13, X9
PXOR X14, X10
PXOR X15, X11
PXOR X10, X8
PXOR X11, X9
PXOR X9, X8
MOVQ X8, (DX)
RET
aes129plus:
// make 7 more starting seeds
MOVO X1, X2
MOVO X1, X3
MOVO X1, X4
MOVO X1, X5
MOVO X1, X6
MOVO X1, X7
PXOR runtime·aeskeysched+16(SB), X1
PXOR runtime·aeskeysched+32(SB), X2
PXOR runtime·aeskeysched+48(SB), X3
PXOR runtime·aeskeysched+64(SB), X4
PXOR runtime·aeskeysched+80(SB), X5
PXOR runtime·aeskeysched+96(SB), X6
PXOR runtime·aeskeysched+112(SB), X7
AESENC X1, X1
AESENC X2, X2
AESENC X3, X3
AESENC X4, X4
AESENC X5, X5
AESENC X6, X6
AESENC X7, X7
// start with last (possibly overlapping) block
MOVOU -128(AX)(CX*1), X0
MOVOU -112(AX)(CX*1), X1
MOVOU -96(AX)(CX*1), X2
MOVOU -80(AX)(CX*1), X3
MOVOU -64(AX)(CX*1), X4
MOVOU -48(AX)(CX*1), X5
MOVOU -32(AX)(CX*1), X8
MOVOU -16(AX)(CX*1), X9
// scramble state once
AESENC X6, X0
AESENC runtime·aeskeysched+16(SB), X1
AESENC runtime·aeskeysched+32(SB), X2
AESENC runtime·aeskeysched+48(SB), X3
AESENC runtime·aeskeysched+64(SB), X4
AESENC runtime·aeskeysched+80(SB), X5
AESENC runtime·aeskeysched+96(SB), X8
AESENC runtime·aeskeysched+112(SB), X9
MOVOU -128(AX)(CX*1), X8
MOVOU -112(AX)(CX*1), X9
MOVOU -96(AX)(CX*1), X10
MOVOU -80(AX)(CX*1), X11
MOVOU -64(AX)(CX*1), X12
MOVOU -48(AX)(CX*1), X13
MOVOU -32(AX)(CX*1), X14
MOVOU -16(AX)(CX*1), X15
// scramble input once, xor in seed
AESENC X0, X8
AESENC X1, X9
AESENC X2, X10
AESENC X3, X11
AESENC X4, X12
AESENC X5, X13
AESENC X6, X14
AESENC X7, X15
// compute number of remaining 128-byte blocks
DECQ CX
SHRQ $7, CX
aesloop:
// scramble state, xor in a block
MOVOU (AX), X10
MOVOU 16(AX), X11
MOVOU 32(AX), X12
MOVOU 48(AX), X13
AESENC X10, X0
AESENC X11, X1
AESENC X12, X2
AESENC X13, X3
MOVOU 64(AX), X10
MOVOU 80(AX), X11
MOVOU 96(AX), X12
MOVOU 112(AX), X13
AESENC X10, X4
AESENC X11, X5
AESENC X12, X8
AESENC X13, X9
MOVOU (AX), X0
MOVOU 16(AX), X1
MOVOU 32(AX), X2
MOVOU 48(AX), X3
AESENC X0, X8
AESENC X1, X9
AESENC X2, X10
AESENC X3, X11
MOVOU 64(AX), X4
MOVOU 80(AX), X5
MOVOU 96(AX), X6
MOVOU 112(AX), X7
AESENC X4, X12
AESENC X5, X13
AESENC X6, X14
AESENC X7, X15
// scramble state
AESENC X7, X0
AESENC X7, X1
AESENC X7, X2
AESENC X7, X3
AESENC X7, X4
AESENC X7, X5
AESENC X7, X8
AESENC X7, X9
AESENC X8, X8
AESENC X9, X9
AESENC X10, X10
AESENC X11, X11
AESENC X12, X12
AESENC X13, X13
AESENC X14, X14
AESENC X15, X15
ADDQ $128, AX
DECQ CX
JNE aesloop
// 2 more scrambles to finish
AESENC X7, X0
AESENC X7, X1
AESENC X7, X2
AESENC X7, X3
AESENC X7, X4
AESENC X7, X5
AESENC X7, X8
AESENC X7, X9
AESENC X7, X0
AESENC X7, X1
AESENC X7, X2
AESENC X7, X3
AESENC X7, X4
AESENC X7, X5
AESENC X7, X8
AESENC X7, X9
PXOR X4, X0
PXOR X5, X1
PXOR X8, X2
PXOR X9, X3
PXOR X2, X0
PXOR X3, X1
PXOR X1, X0
MOVQ X0, (DX)
AESENC X8, X8
AESENC X9, X9
AESENC X10, X10
AESENC X11, X11
AESENC X12, X12
AESENC X13, X13
AESENC X14, X14
AESENC X15, X15
AESENC X8, X8
AESENC X9, X9
AESENC X10, X10
AESENC X11, X11
AESENC X12, X12
AESENC X13, X13
AESENC X14, X14
AESENC X15, X15
PXOR X12, X8
PXOR X13, X9
PXOR X14, X10
PXOR X15, X11
PXOR X10, X8
PXOR X11, X9
PXOR X9, X8
MOVQ X8, (DX)
RET
TEXT runtime·aeshash32(SB),NOSPLIT,$0-24
......
......@@ -52,9 +52,9 @@ tail:
h = rotl_15(h*m1) * m2
default:
v1 := h
v2 := uint32(hashkey[1])
v3 := uint32(hashkey[2])
v4 := uint32(hashkey[3])
v2 := uint32(seed * hashkey[1])
v3 := uint32(seed * hashkey[2])
v4 := uint32(seed * hashkey[3])
for s >= 16 {
v1 ^= readUnaligned32(p)
v1 = rotl_15(v1*m1) * m2
......
......@@ -53,9 +53,9 @@ tail:
h = rotl_31(h*m1) * m2
default:
v1 := h
v2 := uint64(hashkey[1])
v3 := uint64(hashkey[2])
v4 := uint64(hashkey[3])
v2 := uint64(seed * hashkey[1])
v3 := uint64(seed * hashkey[2])
v4 := uint64(seed * hashkey[3])
for s >= 32 {
v1 ^= readUnaligned64(p)
v1 = rotl_31(v1*m1) * m2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment