Commit 60ffae25 authored by Nigel Tao's avatar Nigel Tao

hash/adler32: optimize.

The bulk of the gains come from hoisting the modulo ops outside of
the inner loop.

Reducing the digest type from 8 bytes to 4 bytes gains another 1% on
the hash/adler32 micro-benchmark.

Benchmarks for $GOOS,$GOARCH = linux,amd64 below.

hash/adler32 benchmark:
benchmark             old ns/op    new ns/op    delta
BenchmarkAdler32KB         1660         1364  -17.83%

image/png benchmark:
benchmark                       old ns/op    new ns/op    delta
BenchmarkDecodeGray               2466909      2425539   -1.68%
BenchmarkDecodeNRGBAGradient      9884500      9751705   -1.34%
BenchmarkDecodeNRGBAOpaque        8511615      8379800   -1.55%
BenchmarkDecodePaletted           1366683      1330677   -2.63%
BenchmarkDecodeRGB                6987496      6884974   -1.47%
BenchmarkEncodePaletted           6292408      6040052   -4.01%
BenchmarkEncodeRGBOpaque         19780680     19178440   -3.04%
BenchmarkEncodeRGBA              80738600     79076800   -2.06%

Wall time for Denis Cheremisov's PNG-decoding program given in
https://groups.google.com/group/golang-nuts/browse_thread/thread/22aa8a05040fdd49
Before: 2.44s
After:  2.26s
Delta:  -7%

R=rsc
CC=golang-dev
https://golang.org/cl/6251044
parent 18420978
......@@ -3,7 +3,8 @@
// license that can be found in the LICENSE file.
// Package adler32 implements the Adler-32 checksum.
// Defined in RFC 1950:
//
// It is defined in RFC 1950:
// Adler-32 is composed of two sums accumulated per byte: s1 is
// the sum of all bytes, s2 is the sum of all s1 values. Both sums
// are done modulo 65521. s1 is initialized to 1, s2 to zero. The
......@@ -14,20 +15,22 @@ package adler32
import "hash"
const (
// mod is the largest prime that is less than 65536.
mod = 65521
// nmax is the largest n such that
// 255 * n * (n+1) / 2 + (n+1) * (mod-1) <= 2^32-1.
// It is mentioned in RFC 1950 (search for "5552").
nmax = 5552
)
// The size of an Adler-32 checksum in bytes.
const Size = 4
// digest represents the partial evaluation of a checksum.
type digest struct {
// invariant: (a < mod && b < mod) || a <= b
// invariant: a + b + 255 <= 0xffffffff
a, b uint32
}
// The low 16 bits are s1, the high 16 bits are s2.
type digest uint32
func (d *digest) Reset() { d.a, d.b = 1, 0 }
func (d *digest) Reset() { *d = 1 }
// New returns a new hash.Hash32 computing the Adler-32 checksum.
func New() hash.Hash32 {
......@@ -40,43 +43,36 @@ func (d *digest) Size() int { return Size }
func (d *digest) BlockSize() int { return 1 }
// Add p to the running checksum a, b.
func update(a, b uint32, p []byte) (aa, bb uint32) {
for _, pi := range p {
a += uint32(pi)
b += a
// invariant: a <= b
if b > (0xffffffff-255)/2 {
a %= mod
b %= mod
// invariant: a < mod && b < mod
} else {
// invariant: a + b + 255 <= 2 * b + 255 <= 0xffffffff
// Add p to the running checksum d.
func update(d digest, p []byte) digest {
s1, s2 := uint32(d&0xffff), uint32(d>>16)
for len(p) > 0 {
var q []byte
if len(p) > nmax {
p, q = p[:nmax], p[nmax:]
}
for _, x := range p {
s1 += uint32(x)
s2 += s1
}
s1 %= mod
s2 %= mod
p = q
}
return a, b
}
// Return the 32-bit checksum corresponding to a, b.
func finish(a, b uint32) uint32 {
if b >= mod {
a %= mod
b %= mod
}
return b<<16 | a
return digest(s2<<16 | s1)
}
func (d *digest) Write(p []byte) (nn int, err error) {
d.a, d.b = update(d.a, d.b, p)
*d = update(*d, p)
return len(p), nil
}
func (d *digest) Sum32() uint32 { return finish(d.a, d.b) }
func (d *digest) Sum32() uint32 { return uint32(*d) }
func (d *digest) Sum(in []byte) []byte {
s := d.Sum32()
s := uint32(*d)
return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s))
}
// Checksum returns the Adler-32 checksum of data.
func Checksum(data []byte) uint32 { return finish(update(1, 0, data)) }
func Checksum(data []byte) uint32 { return uint32(update(1, data)) }
......@@ -5,25 +5,23 @@
package adler32
import (
"io"
"strings"
"testing"
)
type _Adler32Test struct {
var golden = []struct {
out uint32
in string
}
var golden = []_Adler32Test{
{0x1, ""},
{0x620062, "a"},
{0x12600c4, "ab"},
{0x24d0127, "abc"},
{0x3d8018b, "abcd"},
{0x5c801f0, "abcde"},
{0x81e0256, "abcdef"},
{0xadb02bd, "abcdefg"},
{0xe000325, "abcdefgh"},
}{
{0x00000001, ""},
{0x00620062, "a"},
{0x012600c4, "ab"},
{0x024d0127, "abc"},
{0x03d8018b, "abcd"},
{0x05c801f0, "abcde"},
{0x081e0256, "abcdef"},
{0x0adb02bd, "abcdefg"},
{0x0e000325, "abcdefgh"},
{0x118e038e, "abcdefghi"},
{0x158603f8, "abcdefghij"},
{0x3f090f02, "Discard medicine more than two years old."},
......@@ -47,17 +45,44 @@ var golden = []_Adler32Test{
{0x91dd304f, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"},
{0x2e5d1316, "How can you write a big system without C++? -Paul Glick"},
{0xd0201df6, "'Invariant assertions' is the most elegant programming technique! -Tom Szymanski"},
{0x211297c8, strings.Repeat("\xff", 5548) + "8"},
{0xbaa198c8, strings.Repeat("\xff", 5549) + "9"},
{0x553499be, strings.Repeat("\xff", 5550) + "0"},
{0xf0c19abe, strings.Repeat("\xff", 5551) + "1"},
{0x8d5c9bbe, strings.Repeat("\xff", 5552) + "2"},
{0x2af69cbe, strings.Repeat("\xff", 5553) + "3"},
{0xc9809dbe, strings.Repeat("\xff", 5554) + "4"},
{0x69189ebe, strings.Repeat("\xff", 5555) + "5"},
{0x86af0001, strings.Repeat("\x00", 1e5)},
{0x79660b4d, strings.Repeat("a", 1e5)},
{0x110588ee, strings.Repeat("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1e4)},
}
// checksum is a slow but simple implementation of the Adler-32 checksum.
// It is a straight port of the sample code in RFC 1950 section 9.
func checksum(p []byte) uint32 {
s1, s2 := uint32(1), uint32(0)
for _, x := range p {
s1 = (s1 + uint32(x)) % mod
s2 = (s2 + s1) % mod
}
return s2<<16 | s1
}
func TestGolden(t *testing.T) {
for i := 0; i < len(golden); i++ {
g := golden[i]
c := New()
io.WriteString(c, g.in)
s := c.Sum32()
if s != g.out {
t.Errorf("adler32(%s) = 0x%x want 0x%x", g.in, s, g.out)
t.FailNow()
for _, g := range golden {
in := g.in
if len(in) > 220 {
in = in[:100] + "..." + in[len(in)-100:]
}
p := []byte(g.in)
if got := checksum(p); got != g.out {
t.Errorf("simple implementation: checksum(%q) = 0x%x want 0x%x", in, got, g.out)
continue
}
if got := Checksum(p); got != g.out {
t.Errorf("optimized implementation: Checksum(%q) = 0x%x want 0x%x", in, got, g.out)
continue
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment