Commit ffd0d02d authored by Nigel Tao's avatar Nigel Tao

compress/flate: benchmark some English text, not just the digits of e.

The testdata/e.txt input is repeated on the longer benchmarks, but the
length of that data is less than flate's window size, so the numbers are
essentially measuring the performance of a trivial compression. A follow-up
CL will add more data to testdata/e.txt.

Sample output on my laptop (linux, amd64):

BenchmarkDecodeDigitsSpeed1e4	    5000	    603153 ns/op	  16.58 MB/s
BenchmarkDecodeDigitsSpeed1e5	    1000	   1465602 ns/op	  68.23 MB/s
BenchmarkDecodeDigitsSpeed1e6	     200	   8036050 ns/op	 124.44 MB/s
BenchmarkDecodeDigitsDefault1e4	    5000	    581796 ns/op	  17.19 MB/s
BenchmarkDecodeDigitsDefault1e5	    2000	    846653 ns/op	 118.11 MB/s
BenchmarkDecodeDigitsDefault1e6	     500	   3385782 ns/op	 295.35 MB/s
BenchmarkDecodeDigitsCompress1e4	    5000	    581180 ns/op	  17.21 MB/s
BenchmarkDecodeDigitsCompress1e5	    2000	    846209 ns/op	 118.17 MB/s
BenchmarkDecodeDigitsCompress1e6	     500	   3386174 ns/op	 295.32 MB/s
BenchmarkDecodeTwainSpeed1e4	    5000	    643563 ns/op	  15.54 MB/s
BenchmarkDecodeTwainSpeed1e5	     500	   5418408 ns/op	  18.46 MB/s
BenchmarkDecodeTwainSpeed1e6	      50	  52277520 ns/op	  19.13 MB/s
BenchmarkDecodeTwainDefault1e4	    5000	    583551 ns/op	  17.14 MB/s
BenchmarkDecodeTwainDefault1e5	     500	   4443428 ns/op	  22.51 MB/s
BenchmarkDecodeTwainDefault1e6	      50	  41862080 ns/op	  23.89 MB/s
BenchmarkDecodeTwainCompress1e4	    5000	    583490 ns/op	  17.14 MB/s
BenchmarkDecodeTwainCompress1e5	     500	   4426356 ns/op	  22.59 MB/s
BenchmarkDecodeTwainCompress1e6	      50	  41657940 ns/op	  24.01 MB/s
BenchmarkEncodeDigitsSpeed1e4	    2000	   1230907 ns/op	   8.12 MB/s
BenchmarkEncodeDigitsSpeed1e5	    1000	   2319129 ns/op	  43.12 MB/s
BenchmarkEncodeDigitsSpeed1e6	     100	  12378950 ns/op	  80.78 MB/s
BenchmarkEncodeDigitsDefault1e4	    1000	   1597865 ns/op	   6.26 MB/s
BenchmarkEncodeDigitsDefault1e5	     500	   3163458 ns/op	  31.61 MB/s
BenchmarkEncodeDigitsDefault1e6	     100	  18770240 ns/op	  53.28 MB/s
BenchmarkEncodeDigitsCompress1e4	    1000	   1603461 ns/op	   6.24 MB/s
BenchmarkEncodeDigitsCompress1e5	     500	   3168766 ns/op	  31.56 MB/s
BenchmarkEncodeDigitsCompress1e6	     100	  18855830 ns/op	  53.03 MB/s
BenchmarkEncodeTwainSpeed1e4	    1000	   1338049 ns/op	   7.47 MB/s
BenchmarkEncodeTwainSpeed1e5	     500	   7341622 ns/op	  13.62 MB/s
BenchmarkEncodeTwainSpeed1e6	      50	  67484600 ns/op	  14.82 MB/s
BenchmarkEncodeTwainDefault1e4	    1000	   1778399 ns/op	   5.62 MB/s
BenchmarkEncodeTwainDefault1e5	     100	  23261810 ns/op	   4.30 MB/s
BenchmarkEncodeTwainDefault1e6	      10	 243533600 ns/op	   4.11 MB/s
BenchmarkEncodeTwainCompress1e4	    1000	   1795469 ns/op	   5.57 MB/s
BenchmarkEncodeTwainCompress1e5	      50	  29447140 ns/op	   3.40 MB/s
BenchmarkEncodeTwainCompress1e6	       5	 321686800 ns/op	   3.11 MB/s
ok  	compress/flate	89.246s

R=rsc, r
CC=golang-dev
https://golang.org/cl/6195055
parent 32b85baa
......@@ -12,20 +12,47 @@ import (
"testing"
)
func benchmarkDecoder(b *testing.B, level, n int) {
const (
digits = iota
twain
)
var testfiles = []string{
// Digits is the digits of the irrational number e. Its decimal representation
// does not repeat, but there are only 10 posible digits, so it should be
// reasonably compressible.
//
// TODO(nigeltao): e.txt is only 10K long, so when benchmarking 100K or 1000K
// of input, the digits are just repeated from the beginning, and flate can
// trivially compress this as a length/distance copy operation. Thus,
// BenchmarkDecodeDigitsXxx1e6 is essentially just measuring the speed of the
// forwardCopy implementation, but isn't particularly representative of real
// usage. The TODO is to replace e.txt with 100K digits, not just 10K digits,
// since that's larger than the windowSize 1<<15 (= 32768).
digits: "../testdata/e.txt",
// Twain is Project Gutenberg's edition of Mark Twain's classic English novel.
twain: "../testdata/Mark.Twain-Tom.Sawyer.txt",
}
func benchmarkDecode(b *testing.B, testfile, level, n int) {
b.StopTimer()
b.SetBytes(int64(n))
buf0, err := ioutil.ReadFile("../testdata/e.txt")
buf0, err := ioutil.ReadFile(testfiles[testfile])
if err != nil {
b.Fatal(err)
}
buf0 = buf0[:10000]
if len(buf0) == 0 {
b.Fatalf("test file %q has no data", testfiles[testfile])
}
compressed := new(bytes.Buffer)
w, err := NewWriter(compressed, level)
if err != nil {
b.Fatal(err)
}
for i := 0; i < n; i += len(buf0) {
if len(buf0) > n-i {
buf0 = buf0[:n-i]
}
io.Copy(w, bytes.NewBuffer(buf0))
}
w.Close()
......@@ -38,38 +65,29 @@ func benchmarkDecoder(b *testing.B, level, n int) {
}
}
func BenchmarkDecoderBestSpeed1K(b *testing.B) {
benchmarkDecoder(b, BestSpeed, 1e4)
}
func BenchmarkDecoderBestSpeed10K(b *testing.B) {
benchmarkDecoder(b, BestSpeed, 1e5)
}
func BenchmarkDecoderBestSpeed100K(b *testing.B) {
benchmarkDecoder(b, BestSpeed, 1e6)
}
func BenchmarkDecoderDefaultCompression1K(b *testing.B) {
benchmarkDecoder(b, DefaultCompression, 1e4)
}
func BenchmarkDecoderDefaultCompression10K(b *testing.B) {
benchmarkDecoder(b, DefaultCompression, 1e5)
}
func BenchmarkDecoderDefaultCompression100K(b *testing.B) {
benchmarkDecoder(b, DefaultCompression, 1e6)
}
func BenchmarkDecoderBestCompression1K(b *testing.B) {
benchmarkDecoder(b, BestCompression, 1e4)
}
func BenchmarkDecoderBestCompression10K(b *testing.B) {
benchmarkDecoder(b, BestCompression, 1e5)
}
// These short names are so that gofmt doesn't break the BenchmarkXxx function
// bodies below over multiple lines.
const (
speed = BestSpeed
default_ = DefaultCompression
compress = BestCompression
)
func BenchmarkDecoderBestCompression100K(b *testing.B) {
benchmarkDecoder(b, BestCompression, 1e6)
}
func BenchmarkDecodeDigitsSpeed1e4(b *testing.B) { benchmarkDecode(b, digits, speed, 1e4) }
func BenchmarkDecodeDigitsSpeed1e5(b *testing.B) { benchmarkDecode(b, digits, speed, 1e5) }
func BenchmarkDecodeDigitsSpeed1e6(b *testing.B) { benchmarkDecode(b, digits, speed, 1e6) }
func BenchmarkDecodeDigitsDefault1e4(b *testing.B) { benchmarkDecode(b, digits, default_, 1e4) }
func BenchmarkDecodeDigitsDefault1e5(b *testing.B) { benchmarkDecode(b, digits, default_, 1e5) }
func BenchmarkDecodeDigitsDefault1e6(b *testing.B) { benchmarkDecode(b, digits, default_, 1e6) }
func BenchmarkDecodeDigitsCompress1e4(b *testing.B) { benchmarkDecode(b, digits, compress, 1e4) }
func BenchmarkDecodeDigitsCompress1e5(b *testing.B) { benchmarkDecode(b, digits, compress, 1e5) }
func BenchmarkDecodeDigitsCompress1e6(b *testing.B) { benchmarkDecode(b, digits, compress, 1e6) }
func BenchmarkDecodeTwainSpeed1e4(b *testing.B) { benchmarkDecode(b, twain, speed, 1e4) }
func BenchmarkDecodeTwainSpeed1e5(b *testing.B) { benchmarkDecode(b, twain, speed, 1e5) }
func BenchmarkDecodeTwainSpeed1e6(b *testing.B) { benchmarkDecode(b, twain, speed, 1e6) }
func BenchmarkDecodeTwainDefault1e4(b *testing.B) { benchmarkDecode(b, twain, default_, 1e4) }
func BenchmarkDecodeTwainDefault1e5(b *testing.B) { benchmarkDecode(b, twain, default_, 1e5) }
func BenchmarkDecodeTwainDefault1e6(b *testing.B) { benchmarkDecode(b, twain, default_, 1e6) }
func BenchmarkDecodeTwainCompress1e4(b *testing.B) { benchmarkDecode(b, twain, compress, 1e4) }
func BenchmarkDecodeTwainCompress1e5(b *testing.B) { benchmarkDecode(b, twain, compress, 1e5) }
func BenchmarkDecodeTwainCompress1e6(b *testing.B) { benchmarkDecode(b, twain, compress, 1e6) }
......@@ -10,16 +10,21 @@ import (
"testing"
)
func benchmarkEncoder(b *testing.B, level, n int) {
func benchmarkEncoder(b *testing.B, testfile, level, n int) {
b.StopTimer()
b.SetBytes(int64(n))
buf0, err := ioutil.ReadFile("../testdata/e.txt")
buf0, err := ioutil.ReadFile(testfiles[testfile])
if err != nil {
b.Fatal(err)
}
buf0 = buf0[:10000]
if len(buf0) == 0 {
b.Fatalf("test file %q has no data", testfiles[testfile])
}
buf1 := make([]byte, n)
for i := 0; i < n; i += len(buf0) {
if len(buf0) > n-i {
buf0 = buf0[:n-i]
}
copy(buf1[i:], buf0)
}
buf0 = nil
......@@ -35,38 +40,21 @@ func benchmarkEncoder(b *testing.B, level, n int) {
}
}
func BenchmarkEncoderBestSpeed1K(b *testing.B) {
benchmarkEncoder(b, BestSpeed, 1e4)
}
func BenchmarkEncoderBestSpeed10K(b *testing.B) {
benchmarkEncoder(b, BestSpeed, 1e5)
}
func BenchmarkEncoderBestSpeed100K(b *testing.B) {
benchmarkEncoder(b, BestSpeed, 1e6)
}
func BenchmarkEncoderDefaultCompression1K(b *testing.B) {
benchmarkEncoder(b, DefaultCompression, 1e4)
}
func BenchmarkEncoderDefaultCompression10K(b *testing.B) {
benchmarkEncoder(b, DefaultCompression, 1e5)
}
func BenchmarkEncoderDefaultCompression100K(b *testing.B) {
benchmarkEncoder(b, DefaultCompression, 1e6)
}
func BenchmarkEncoderBestCompression1K(b *testing.B) {
benchmarkEncoder(b, BestCompression, 1e4)
}
func BenchmarkEncoderBestCompression10K(b *testing.B) {
benchmarkEncoder(b, BestCompression, 1e5)
}
func BenchmarkEncoderBestCompression100K(b *testing.B) {
benchmarkEncoder(b, BestCompression, 1e6)
}
func BenchmarkEncodeDigitsSpeed1e4(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e4) }
func BenchmarkEncodeDigitsSpeed1e5(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e5) }
func BenchmarkEncodeDigitsSpeed1e6(b *testing.B) { benchmarkEncoder(b, digits, speed, 1e6) }
func BenchmarkEncodeDigitsDefault1e4(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e4) }
func BenchmarkEncodeDigitsDefault1e5(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e5) }
func BenchmarkEncodeDigitsDefault1e6(b *testing.B) { benchmarkEncoder(b, digits, default_, 1e6) }
func BenchmarkEncodeDigitsCompress1e4(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e4) }
func BenchmarkEncodeDigitsCompress1e5(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e5) }
func BenchmarkEncodeDigitsCompress1e6(b *testing.B) { benchmarkEncoder(b, digits, compress, 1e6) }
func BenchmarkEncodeTwainSpeed1e4(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e4) }
func BenchmarkEncodeTwainSpeed1e5(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e5) }
func BenchmarkEncodeTwainSpeed1e6(b *testing.B) { benchmarkEncoder(b, twain, speed, 1e6) }
func BenchmarkEncodeTwainDefault1e4(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e4) }
func BenchmarkEncodeTwainDefault1e5(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e5) }
func BenchmarkEncodeTwainDefault1e6(b *testing.B) { benchmarkEncoder(b, twain, default_, 1e6) }
func BenchmarkEncodeTwainCompress1e4(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e4) }
func BenchmarkEncodeTwainCompress1e5(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e5) }
func BenchmarkEncodeTwainCompress1e6(b *testing.B) { benchmarkEncoder(b, twain, compress, 1e6) }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment