Commit fdceb2a1 authored by Katie Hockman's avatar Katie Hockman

compress: reduce copies of new text for compression testing

The previous book was 387 KiB decompressed and 119 KiB compressed, the
new book is 567 KiB decompressed and 132 KiB compressed. Overall, this
change will reduce the release binary size by 196 KiB. The new book will
allow for slightly more extensive compression testing with a larger
text.

Command to run the benchmark tests used with benchstat:
`../bin/go test -run='^$' -count=4 -bench=. compress/bzip2 compress/flate`

When running the benchmarks locally, changed "Newton" to "Twain" and
filtered the tests with the -bench flag to include only those which were
relevant to these changes.

benchstat results below:

name                            old time/op    new time/op     delta
DecodeTwain-8                     19.6ms ± 2%     24.1ms ± 1%  +23.04%  (p=0.029 n=4+4)
Decode/Twain/Huffman/1e4-8         140µs ± 3%      139µs ± 5%     ~     (p=0.886 n=4+4)
Decode/Twain/Huffman/1e5-8        1.27ms ± 3%     1.26ms ± 1%     ~     (p=1.000 n=4+4)
Decode/Twain/Huffman/1e6-8        12.4ms ± 0%     13.2ms ± 1%   +6.42%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e4-8           133µs ± 1%      123µs ± 1%   -7.35%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e5-8          1.20ms ± 0%     1.02ms ± 3%  -15.32%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e6-8          12.0ms ± 2%     10.1ms ± 3%  -15.89%  (p=0.029 n=4+4)
Decode/Twain/Default/1e4-8         131µs ± 6%      108µs ± 5%  -17.84%  (p=0.029 n=4+4)
Decode/Twain/Default/1e5-8        1.06ms ± 2%     0.80ms ± 1%  -24.97%  (p=0.029 n=4+4)
Decode/Twain/Default/1e6-8        10.0ms ± 3%      8.0ms ± 3%  -20.06%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e4-8     128µs ± 4%      115µs ± 4%   -9.70%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e5-8    1.04ms ± 2%     0.83ms ± 4%  -20.37%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e6-8    10.4ms ± 4%      8.1ms ± 5%  -22.25%  (p=0.029 n=4+4)
Encode/Twain/Huffman/1e4-8        55.7µs ± 2%     55.6µs ± 1%     ~     (p=1.000 n=4+4)
Encode/Twain/Huffman/1e5-8         441µs ± 0%      435µs ± 2%     ~     (p=0.343 n=4+4)
Encode/Twain/Huffman/1e6-8        4.31ms ± 4%     4.30ms ± 4%     ~     (p=0.886 n=4+4)
Encode/Twain/Speed/1e4-8           193µs ± 1%      166µs ± 2%  -14.09%  (p=0.029 n=4+4)
Encode/Twain/Speed/1e5-8          1.54ms ± 1%     1.22ms ± 1%  -20.53%  (p=0.029 n=4+4)
Encode/Twain/Speed/1e6-8          15.3ms ± 1%     12.2ms ± 3%  -20.62%  (p=0.029 n=4+4)
Encode/Twain/Default/1e4-8         393µs ± 1%      390µs ± 1%     ~     (p=0.114 n=4+4)
Encode/Twain/Default/1e5-8        6.12ms ± 4%     6.02ms ± 5%     ~     (p=0.486 n=4+4)
Encode/Twain/Default/1e6-8        69.4ms ± 5%     59.0ms ± 4%  -15.07%  (p=0.029 n=4+4)
Encode/Twain/Compression/1e4-8     423µs ± 2%      379µs ± 2%  -10.34%  (p=0.029 n=4+4)
Encode/Twain/Compression/1e5-8    7.00ms ± 1%     7.88ms ± 3%  +12.49%  (p=0.029 n=4+4)
Encode/Twain/Compression/1e6-8    76.6ms ± 5%     80.9ms ± 3%     ~     (p=0.114 n=4+4)

name                            old speed      new speed       delta
DecodeTwain-8                   19.8MB/s ± 2%   23.6MB/s ± 1%  +18.84%  (p=0.029 n=4+4)
Decode/Twain/Huffman/1e4-8      71.7MB/s ± 3%   72.1MB/s ± 6%     ~     (p=0.943 n=4+4)
Decode/Twain/Huffman/1e5-8      78.8MB/s ± 3%   79.5MB/s ± 1%     ~     (p=1.000 n=4+4)
Decode/Twain/Huffman/1e6-8      80.5MB/s ± 0%   75.6MB/s ± 1%   -6.03%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e4-8        75.2MB/s ± 1%   81.2MB/s ± 1%   +7.93%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e5-8        83.4MB/s ± 0%   98.6MB/s ± 3%  +18.16%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e6-8        83.6MB/s ± 2%   99.5MB/s ± 3%  +18.91%  (p=0.029 n=4+4)
Decode/Twain/Default/1e4-8      76.3MB/s ± 6%   92.8MB/s ± 4%  +21.62%  (p=0.029 n=4+4)
Decode/Twain/Default/1e5-8      94.4MB/s ± 3%  125.7MB/s ± 1%  +33.24%  (p=0.029 n=4+4)
Decode/Twain/Default/1e6-8       100MB/s ± 3%    125MB/s ± 3%  +25.12%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e4-8  78.4MB/s ± 4%   86.8MB/s ± 4%  +10.73%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e5-8  95.7MB/s ± 2%  120.3MB/s ± 4%  +25.65%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e6-8  96.4MB/s ± 4%  124.0MB/s ± 5%  +28.64%  (p=0.029 n=4+4)
Encode/Twain/Huffman/1e4-8       179MB/s ± 2%    180MB/s ± 1%     ~     (p=1.000 n=4+4)
Encode/Twain/Huffman/1e5-8       227MB/s ± 0%    230MB/s ± 2%     ~     (p=0.343 n=4+4)
Encode/Twain/Huffman/1e6-8       232MB/s ± 4%    233MB/s ± 4%     ~     (p=0.886 n=4+4)
Encode/Twain/Speed/1e4-8        51.8MB/s ± 1%   60.4MB/s ± 2%  +16.43%  (p=0.029 n=4+4)
Encode/Twain/Speed/1e5-8        65.1MB/s ± 1%   81.9MB/s ± 1%  +25.83%  (p=0.029 n=4+4)
Encode/Twain/Speed/1e6-8        65.2MB/s ± 1%   82.2MB/s ± 3%  +26.00%  (p=0.029 n=4+4)
Encode/Twain/Default/1e4-8      25.4MB/s ± 1%   25.6MB/s ± 1%     ~     (p=0.114 n=4+4)
Encode/Twain/Default/1e5-8      16.4MB/s ± 4%   16.6MB/s ± 5%     ~     (p=0.486 n=4+4)
Encode/Twain/Default/1e6-8      14.4MB/s ± 6%   17.0MB/s ± 4%  +17.67%  (p=0.029 n=4+4)
Encode/Twain/Compression/1e4-8  23.6MB/s ± 2%   26.4MB/s ± 2%  +11.54%  (p=0.029 n=4+4)
Encode/Twain/Compression/1e5-8  14.3MB/s ± 1%   12.7MB/s ± 3%  -11.08%  (p=0.029 n=4+4)
Encode/Twain/Compression/1e6-8  13.1MB/s ± 4%   12.4MB/s ± 3%     ~     (p=0.114 n=4+4)

name                            old alloc/op   new alloc/op    delta
DecodeTwain-8                     3.63MB ± 0%     3.63MB ± 0%   +0.15%  (p=0.029 n=4+4)
Decode/Twain/Huffman/1e4-8        42.0kB ± 0%     41.3kB ± 0%   -1.62%  (p=0.029 n=4+4)
Decode/Twain/Huffman/1e5-8        43.5kB ± 0%     45.1kB ± 0%   +3.74%  (p=0.029 n=4+4)
Decode/Twain/Huffman/1e6-8        71.7kB ± 0%     80.0kB ± 0%  +11.55%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e4-8          41.2kB ± 0%     41.3kB ± 0%     ~     (p=0.286 n=4+4)
Decode/Twain/Speed/1e5-8          45.1kB ± 0%     43.9kB ± 0%   -2.80%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e6-8          72.8kB ± 0%     81.3kB ± 0%  +11.72%  (p=0.029 n=4+4)
Decode/Twain/Default/1e4-8        41.2kB ± 0%     41.2kB ± 0%   -0.22%  (p=0.029 n=4+4)
Decode/Twain/Default/1e5-8        44.4kB ± 0%     43.0kB ± 0%   -3.02%  (p=0.029 n=4+4)
Decode/Twain/Default/1e6-8        71.0kB ± 0%     61.8kB ± 0%  -13.00%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e4-8    41.3kB ± 0%     41.2kB ± 0%   -0.29%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e5-8    43.3kB ± 0%     43.0kB ± 0%   -0.72%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e6-8    69.1kB ± 0%     63.7kB ± 0%   -7.90%  (p=0.029 n=4+4)

name                            old allocs/op  new allocs/op   delta
DecodeTwain-8                       51.0 ± 0%       51.2 ± 1%     ~     (p=1.000 n=4+4)
Decode/Twain/Huffman/1e4-8          15.0 ± 0%       14.0 ± 0%   -6.67%  (p=0.029 n=4+4)
Decode/Twain/Huffman/1e5-8          20.0 ± 0%       23.0 ± 0%  +15.00%  (p=0.029 n=4+4)
Decode/Twain/Huffman/1e6-8           134 ± 0%        161 ± 0%  +20.15%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e4-8            17.0 ± 0%       18.0 ± 0%   +5.88%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e5-8            30.0 ± 0%       31.0 ± 0%   +3.33%  (p=0.029 n=4+4)
Decode/Twain/Speed/1e6-8             193 ± 0%        228 ± 0%  +18.13%  (p=0.029 n=4+4)
Decode/Twain/Default/1e4-8          17.0 ± 0%       15.0 ± 0%  -11.76%  (p=0.029 n=4+4)
Decode/Twain/Default/1e5-8          28.0 ± 0%       32.0 ± 0%  +14.29%  (p=0.029 n=4+4)
Decode/Twain/Default/1e6-8           199 ± 0%        158 ± 0%  -20.60%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e4-8      17.0 ± 0%       15.0 ± 0%  -11.76%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e5-8      28.0 ± 0%       32.0 ± 0%  +14.29%  (p=0.029 n=4+4)
Decode/Twain/Compression/1e6-8       196 ± 0%        150 ± 0%  -23.47%  (p=0.029 n=4+4)

Updates #27151

Change-Id: I6c439694ed16a33bb4c63fbfb8570c7de46b4f2d
Reviewed-on: https://go-review.googlesource.com/135495Reviewed-by: 's avatarDmitri Shuralyov <dmitshur@golang.org>
Reviewed-by: 's avatarJoe Tsai <thebrokentoaster@gmail.com>
parent 5b3aafe2
...@@ -177,6 +177,8 @@ go src=.. ...@@ -177,6 +177,8 @@ go src=..
strconv strconv
testdata testdata
+ +
testdata
+
text text
template template
testdata testdata
......
...@@ -214,7 +214,7 @@ func TestZeroRead(t *testing.T) { ...@@ -214,7 +214,7 @@ func TestZeroRead(t *testing.T) {
var ( var (
digits = mustLoadFile("testdata/e.txt.bz2") digits = mustLoadFile("testdata/e.txt.bz2")
twain = mustLoadFile("testdata/Mark.Twain-Tom.Sawyer.txt.bz2") newton = mustLoadFile("testdata/Isaac.Newton-Opticks.txt.bz2")
random = mustLoadFile("testdata/random.data.bz2") random = mustLoadFile("testdata/random.data.bz2")
) )
...@@ -236,5 +236,5 @@ func benchmarkDecode(b *testing.B, compressed []byte) { ...@@ -236,5 +236,5 @@ func benchmarkDecode(b *testing.B, compressed []byte) {
} }
func BenchmarkDecodeDigits(b *testing.B) { benchmarkDecode(b, digits) } func BenchmarkDecodeDigits(b *testing.B) { benchmarkDecode(b, digits) }
func BenchmarkDecodeTwain(b *testing.B) { benchmarkDecode(b, twain) } func BenchmarkDecodeNewton(b *testing.B) { benchmarkDecode(b, newton) }
func BenchmarkDecodeRand(b *testing.B) { benchmarkDecode(b, random) } func BenchmarkDecodeRand(b *testing.B) { benchmarkDecode(b, random) }
...@@ -371,9 +371,9 @@ var deflateInflateStringTests = []deflateInflateStringTest{ ...@@ -371,9 +371,9 @@ var deflateInflateStringTests = []deflateInflateStringTest{
[...]int{100018, 50650, 50960, 51150, 50930, 50790, 50790, 50790, 50790, 50790, 43683}, [...]int{100018, 50650, 50960, 51150, 50930, 50790, 50790, 50790, 50790, 50790, 43683},
}, },
{ {
"../testdata/Mark.Twain-Tom.Sawyer.txt", "../../testdata/Isaac.Newton-Opticks.txt",
"Mark.Twain-Tom.Sawyer", "Isaac.Newton-Opticks",
[...]int{407330, 187598, 180361, 172974, 169160, 163476, 160936, 160506, 160295, 160295, 233460}, [...]int{567248, 218338, 198211, 193152, 181100, 175427, 175427, 173597, 173422, 173422, 325240},
}, },
} }
...@@ -654,7 +654,7 @@ func (w *failWriter) Write(b []byte) (int, error) { ...@@ -654,7 +654,7 @@ func (w *failWriter) Write(b []byte) (int, error) {
func TestWriterPersistentError(t *testing.T) { func TestWriterPersistentError(t *testing.T) {
t.Parallel() t.Parallel()
d, err := ioutil.ReadFile("../testdata/Mark.Twain-Tom.Sawyer.txt") d, err := ioutil.ReadFile("../../testdata/Isaac.Newton-Opticks.txt")
if err != nil { if err != nil {
t.Fatalf("ReadFile: %v", err) t.Fatalf("ReadFile: %v", err)
} }
......
...@@ -27,8 +27,8 @@ var suites = []struct{ name, file string }{ ...@@ -27,8 +27,8 @@ var suites = []struct{ name, file string }{
// does not repeat, but there are only 10 possible digits, so it should be // does not repeat, but there are only 10 possible digits, so it should be
// reasonably compressible. // reasonably compressible.
{"Digits", "../testdata/e.txt"}, {"Digits", "../testdata/e.txt"},
// Twain is Mark Twain's classic English novel. // Newton is Isaac Newtons's educational text on Opticks.
{"Twain", "../testdata/Mark.Twain-Tom.Sawyer.txt"}, {"Newton", "../../testdata/Isaac.Newton-Opticks.txt"},
} }
func BenchmarkDecode(b *testing.B) { func BenchmarkDecode(b *testing.B) {
......
This diff is collapsed.
...@@ -17,9 +17,9 @@ import ( ...@@ -17,9 +17,9 @@ import (
) )
const ( const (
twain = "testdata/Mark.Twain-Tom.Sawyer.txt" newton = "../testdata/Isaac.Newton-Opticks.txt"
twainLen = 387851 newtonLen = 567198
twainSHA256 = "461eb7cb2d57d293fc680c836464c9125e4382be3596f7d415093ae9db8fcb0e" newtonSHA256 = "d4a9ac22462b35e7821a4f2706c211093da678620a8f9997989ee7cf8d507bbd"
) )
func TestSendfile(t *testing.T) { func TestSendfile(t *testing.T) {
...@@ -43,7 +43,7 @@ func TestSendfile(t *testing.T) { ...@@ -43,7 +43,7 @@ func TestSendfile(t *testing.T) {
defer close(errc) defer close(errc)
defer conn.Close() defer conn.Close()
f, err := os.Open(twain) f, err := os.Open(newton)
if err != nil { if err != nil {
errc <- err errc <- err
return return
...@@ -58,8 +58,8 @@ func TestSendfile(t *testing.T) { ...@@ -58,8 +58,8 @@ func TestSendfile(t *testing.T) {
return return
} }
if sbytes != twainLen { if sbytes != newtonLen {
errc <- fmt.Errorf("sent %d bytes; expected %d", sbytes, twainLen) errc <- fmt.Errorf("sent %d bytes; expected %d", sbytes, newtonLen)
return return
} }
}() }()
...@@ -79,11 +79,11 @@ func TestSendfile(t *testing.T) { ...@@ -79,11 +79,11 @@ func TestSendfile(t *testing.T) {
t.Error(err) t.Error(err)
} }
if rbytes != twainLen { if rbytes != newtonLen {
t.Errorf("received %d bytes; expected %d", rbytes, twainLen) t.Errorf("received %d bytes; expected %d", rbytes, newtonLen)
} }
if res := hex.EncodeToString(h.Sum(nil)); res != twainSHA256 { if res := hex.EncodeToString(h.Sum(nil)); res != newtonSHA256 {
t.Error("retrieved data hash did not match") t.Error("retrieved data hash did not match")
} }
...@@ -113,7 +113,7 @@ func TestSendfileParts(t *testing.T) { ...@@ -113,7 +113,7 @@ func TestSendfileParts(t *testing.T) {
defer close(errc) defer close(errc)
defer conn.Close() defer conn.Close()
f, err := os.Open(twain) f, err := os.Open(newton)
if err != nil { if err != nil {
errc <- err errc <- err
return return
...@@ -174,7 +174,7 @@ func TestSendfileSeeked(t *testing.T) { ...@@ -174,7 +174,7 @@ func TestSendfileSeeked(t *testing.T) {
defer close(errc) defer close(errc)
defer conn.Close() defer conn.Close()
f, err := os.Open(twain) f, err := os.Open(newton)
if err != nil { if err != nil {
errc <- err errc <- err
return return
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment