Commit 38458ce3 authored by Shenghou Ma's avatar Shenghou Ma

crypto/md5: speed up aligned writes and test/bench unaligned writes

Write() can safely use uint32 loads when input is aligned.
Also add test and benchmarks for unaligned writes.

Benchmark result obtained by Dave Cheney on ARMv5TE @ 1.2GHz:
benchmark                       old ns/op    new ns/op    delta
BenchmarkHash8Bytes                  4104         3417  -16.74%
BenchmarkHash1K                     22061        11208  -49.20%
BenchmarkHash8K                    146630        65148  -55.57%
BenchmarkHash8BytesUnaligned         4128         3436  -16.76%
BenchmarkHash1KUnaligned            22054        21473   -2.63%
BenchmarkHash8KUnaligned           146658       146909   +0.17%

benchmark                        old MB/s     new MB/s  speedup
BenchmarkHash8Bytes                  1.95         2.34    1.20x
BenchmarkHash1K                     46.42        91.36    1.97x
BenchmarkHash8K                     55.87       125.74    2.25x
BenchmarkHash8BytesUnaligned         1.94         2.33    1.20x
BenchmarkHash1KUnaligned            46.43        47.69    1.03x
BenchmarkHash8KUnaligned            55.86        55.76    1.00x

R=golang-dev, dave, bradfitz
CC=golang-dev
https://golang.org/cl/6782072
parent 3513d840
......@@ -203,6 +203,8 @@ func block(dig *digest, p []byte) {
// less code and run 1.3x faster if we take advantage of that.
// My apologies.
X = (*[16]uint32)(unsafe.Pointer(&p[0]))
} else if uintptr(unsafe.Pointer(&p[0]))&(unsafe.Alignof(uint32(0))-1) == 0 {
X = (*[16]uint32)(unsafe.Pointer(&p[0]))
} else {
X = &xbuf
j := 0
......
......@@ -9,6 +9,7 @@ import (
"fmt"
"io"
"testing"
"unsafe"
)
type md5Test struct {
......@@ -54,13 +55,19 @@ func TestGolden(t *testing.T) {
for i := 0; i < len(golden); i++ {
g := golden[i]
c := md5.New()
for j := 0; j < 3; j++ {
buf := make([]byte, len(g.in)+4)
for j := 0; j < 3+4; j++ {
if j < 2 {
io.WriteString(c, g.in)
} else {
} else if j == 2 {
io.WriteString(c, g.in[0:len(g.in)/2])
c.Sum(nil)
io.WriteString(c, g.in[len(g.in)/2:])
} else if j > 2 {
// test unaligned write
buf = buf[1:]
copy(buf, g.in)
c.Write(buf[:len(g.in)])
}
s := fmt.Sprintf("%x", c.Sum(nil))
if s != g.out {
......@@ -80,11 +87,18 @@ func ExampleNew() {
}
var bench = md5.New()
var buf = make([]byte, 8192)
var buf = make([]byte, 8192+1)
var sum = make([]byte, bench.Size())
func benchmarkSize(b *testing.B, size int) {
func benchmarkSize(b *testing.B, size int, unaligned bool) {
b.SetBytes(int64(size))
sum := make([]byte, bench.Size())
buf := buf
if unaligned {
if uintptr(unsafe.Pointer(&buf[0]))&(unsafe.Alignof(uint32(0))-1) == 0 {
buf = buf[1:]
}
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
bench.Reset()
bench.Write(buf[:size])
......@@ -93,13 +107,25 @@ func benchmarkSize(b *testing.B, size int) {
}
func BenchmarkHash8Bytes(b *testing.B) {
benchmarkSize(b, 8)
benchmarkSize(b, 8, false)
}
func BenchmarkHash1K(b *testing.B) {
benchmarkSize(b, 1024)
benchmarkSize(b, 1024, false)
}
func BenchmarkHash8K(b *testing.B) {
benchmarkSize(b, 8192)
benchmarkSize(b, 8192, false)
}
func BenchmarkHash8BytesUnaligned(b *testing.B) {
benchmarkSize(b, 8, true)
}
func BenchmarkHash1KUnaligned(b *testing.B) {
benchmarkSize(b, 1024, true)
}
func BenchmarkHash8KUnaligned(b *testing.B) {
benchmarkSize(b, 8192, true)
}
......@@ -22,6 +22,8 @@ func block(dig *digest, p []byte) {
// less code and run 1.3x faster if we take advantage of that.
// My apologies.
X = (*[16]uint32)(unsafe.Pointer(&p[0]))
} else if uintptr(unsafe.Pointer(&p[0]))&(unsafe.Alignof(uint32(0))-1) == 0 {
X = (*[16]uint32)(unsafe.Pointer(&p[0]))
} else {
X = &xbuf
j := 0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment