Commit f0fcb2d5 authored by Robert Griesemer's avatar Robert Griesemer

Symmetric changes to md4.go as for md5.go.

Use uint index variables in some cases instead
of int to enable strength reduction; this makes
it possible for the compiler to reduce % into
masks.

Old code: 6g -S md4.go md4block.go | grep "md4block.go:44"
0471 (md4block.go:44) MOVL    AX,BX
0472 (md4block.go:44) MOVL    AX,BP
0473 (md4block.go:44) MOVL    AX,R8
0474 (md4block.go:44) SARL    $31,R8
0475 (md4block.go:44) SHRL    $30,R8
0476 (md4block.go:44) ADDL    R8,BP
0477 (md4block.go:44) SARL    $2,BP
0478 (md4block.go:44) IMULL   $4,BP
0479 (md4block.go:44) SUBL    BP,BX
0480 (md4block.go:44) MOVLQSX BX,BX
0481 (md4block.go:44) LEAQ    shift1+0(SB),BP
0482 (md4block.go:44) CMPL    BX,8(BP)
0483 (md4block.go:44) JCS     ,485
0484 (md4block.go:44) CALL    ,runtime.throwindex+0(SB)
0485 (md4block.go:44) MOVQ    (BP),BP
0486 (md4block.go:44) MOVL    (BP)(BX*4),DI

New code: 6g -S md4.go md4block.go | grep "md4block.go:44"
0471 (md4block.go:44) MOVL    AX,BX
0472 (md4block.go:44) ANDL    $3,BX
0473 (md4block.go:44) MOVLQZX BX,BX
0474 (md4block.go:44) LEAQ    shift1+0(SB),BP
0475 (md4block.go:44) CMPL    BX,8(BP)
0476 (md4block.go:44) JCS     ,478
0477 (md4block.go:44) CALL    ,runtime.throwindex+0(SB)
0478 (md4block.go:44) MOVQ    (BP),BP
0479 (md4block.go:44) MOVL    (BP)(BX*4),DI

R=agl, agl1
CC=golang-dev
https://golang.org/cl/181086
parent 9d07d37f
......@@ -68,8 +68,8 @@ func (d *digest) Write(p []byte) (nn int, err os.Error) {
n := _Block(d, p)
p = p[n:]
if len(p) > 0 {
for i := 0; i < len(p); i++ {
d.x[i] = p[i]
for i, x := range p {
d.x[i] = x
}
d.nx = len(p)
}
......@@ -100,16 +100,12 @@ func (d *digest) Sum() []byte {
p := make([]byte, 16)
j := 0
for i := 0; i < 4; i++ {
s := d.s[i]
p[j] = byte(s)
j++
p[j] = byte(s >> 8)
j++
p[j] = byte(s >> 16)
j++
p[j] = byte(s >> 24)
j++
for _, s := range d.s {
p[j+0] = byte(s >> 0)
p[j+1] = byte(s >> 8)
p[j+2] = byte(s >> 16)
p[j+3] = byte(s >> 24)
j += 4
}
return p
}
......@@ -25,9 +25,10 @@ func _Block(dig *digest, p []byte) int {
for len(p) >= _Chunk {
aa, bb, cc, dd := a, b, c, d
j := 0
for i := 0; i < 16; i++ {
j := i * 4
X[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24
j += 4
}
// If this needs to be made faster in the future,
......@@ -37,9 +38,12 @@ func _Block(dig *digest, p []byte) int {
// with suitable variable renaming in each
// unrolled body, delete the a, b, c, d = d, a, b, c
// (or you can let the optimizer do the renaming).
//
// The index variables are uint so that % by a power
// of two can be optimized easily by a compiler.
// Round 1.
for i := 0; i < 16; i++ {
for i := uint(0); i < 16; i++ {
x := i
s := shift1[i%4]
f := ((c ^ d) & b) ^ d
......@@ -49,7 +53,7 @@ func _Block(dig *digest, p []byte) int {
}
// Round 2.
for i := 0; i < 16; i++ {
for i := uint(0); i < 16; i++ {
x := xIndex2[i]
s := shift2[i%4]
g := (b & c) | (b & d) | (c & d)
......@@ -59,7 +63,7 @@ func _Block(dig *digest, p []byte) int {
}
// Round 3.
for i := 0; i < 16; i++ {
for i := uint(0); i < 16; i++ {
x := xIndex3[i]
s := shift3[i%4]
h := b ^ c ^ d
......
......@@ -98,9 +98,10 @@ func _Block(dig *digest, p []byte) int {
for len(p) >= _Chunk {
aa, bb, cc, dd := a, b, c, d
j := 0
for i := 0; i < 16; i++ {
j := i * 4
X[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24
j += 4
}
// If this needs to be made faster in the future,
......@@ -110,52 +111,47 @@ func _Block(dig *digest, p []byte) int {
// with suitable variable renaming in each
// unrolled body, delete the a, b, c, d = d, a, b, c
// (or you can let the optimizer do the renaming).
//
// The index variables are uint so that % by a power
// of two can be optimized easily by a compiler.
// Round 1.
for i := 0; i < 16; i++ {
for i := uint(0); i < 16; i++ {
x := i
t := i
s := shift1[i%4]
f := ((c ^ d) & b) ^ d
a += f + X[x] + table[t]
a = a<<s | a>>(32-s)
a += b
a += f + X[x] + table[i]
a = a<<s | a>>(32-s) + b
a, b, c, d = d, a, b, c
}
// Round 2.
for i := 0; i < 16; i++ {
for i := uint(0); i < 16; i++ {
x := (1 + 5*i) % 16
t := 16 + i
s := shift2[i%4]
g := ((b ^ c) & d) ^ c
a += g + X[x] + table[t]
a = a<<s | a>>(32-s)
a += b
a += g + X[x] + table[i+16]
a = a<<s | a>>(32-s) + b
a, b, c, d = d, a, b, c
}
// Round 3.
for i := 0; i < 16; i++ {
for i := uint(0); i < 16; i++ {
x := (5 + 3*i) % 16
t := 32 + i
s := shift3[i%4]
h := b ^ c ^ d
a += h + X[x] + table[t]
a = a<<s | a>>(32-s)
a += b
a += h + X[x] + table[i+32]
a = a<<s | a>>(32-s) + b
a, b, c, d = d, a, b, c
}
// Round 4.
for i := 0; i < 16; i++ {
for i := uint(0); i < 16; i++ {
x := (7 * i) % 16
s := shift4[i%4]
t := 48 + i
ii := c ^ (b | ^d)
a += ii + X[x] + table[t]
a = a<<s | a>>(32-s)
a += b
j := c ^ (b | ^d)
a += j + X[x] + table[i+48]
a = a<<s | a>>(32-s) + b
a, b, c, d = d, a, b, c
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment