archive/zip: speed up Zip64 test

Took 76 seconds or so before. By avoiding flate and crc32 on 4GB of data, it's now only 12 seconds. Still a slow test, but not painful to run anymore when you forget -short. R=golang-dev, adg CC=golang-dev https://golang.org/cl/12950043

archive/zip: speed up Zip64 test
Took 76 seconds or so before. By avoiding flate and crc32 on 4GB of data, it's now only 12 seconds. Still a slow test, but not painful to run anymore when you forget -short. R=golang-dev, adg CC=golang-dev https://golang.org/cl/12950043
ec837ad7 · Brad Fitzpatrick · 35d8bb39 · ec837ad7
Commit ec837ad7 authored Aug 15, 2013 by Brad Fitzpatrick
Hide whitespace changes
Inline Side-by-side

Showing with 151 additions and 11 deletions

zip_test.go src/pkg/archive/zip/zip_test.go +151 -11

No files found.
--- a/src/pkg/archive/zip/zip_test.go
+++ b/src/pkg/archive/zip/zip_test.go
@@ -9,8 +9,10 @@ package zip
 import (
 	"bytes"
 	"fmt"
+	"hash"
 	"io"
 	"io/ioutil"
+	"sort"
 	"strings"
 	"testing"
 	"time"
@@ -105,29 +107,156 @@ func TestFileHeaderRoundTrip64(t *testing.T) {
 	testHeaderRoundTrip(fh, uint32max, fh.UncompressedSize64, t)
 }

+type repeatedByte struct {
+	off int64
+	b   byte
+	n   int64
+}
+
+// rleBuffer is a run-length-encoded byte buffer.
+// It's an io.Writer (like a bytes.Buffer) and also an io.ReaderAt,
+// allowing random-access reads.
+type rleBuffer struct {
+	buf []repeatedByte
+}
+
+func (r *rleBuffer) Size() int64 {
+	if len(r.buf) == 0 {
+		return 0
+	}
+	last := &r.buf[len(r.buf)-1]
+	return last.off + last.n
+}
+
+func (r *rleBuffer) Write(p []byte) (n int, err error) {
+	var rp *repeatedByte
+	if len(r.buf) > 0 {
+		rp = &r.buf[len(r.buf)-1]
+		// Fast path, if p is entirely the same byte repeated.
+		if lastByte := rp.b; len(p) > 0 && p[0] == lastByte {
+			all := true
+			for _, b := range p {
+				if b != lastByte {
+					all = false
+					break
+				}
+			}
+			if all {
+				rp.n += int64(len(p))
+				return len(p), nil
+			}
+		}
+	}
+
+	for _, b := range p {
+		if rp == nil || rp.b != b {
+			r.buf = append(r.buf, repeatedByte{r.Size(), b, 1})
+			rp = &r.buf[len(r.buf)-1]
+		} else {
+			rp.n++
+		}
+	}
+	return len(p), nil
+}
+
+func (r *rleBuffer) ReadAt(p []byte, off int64) (n int, err error) {
+	if len(p) == 0 {
+		return
+	}
+	skipParts := sort.Search(len(r.buf), func(i int) bool {
+		part := &r.buf[i]
+		return part.off+part.n > off
+	})
+	parts := r.buf[skipParts:]
+	if len(parts) > 0 {
+		skipBytes := off - parts[0].off
+		for len(parts) > 0 {
+			part := parts[0]
+			for i := skipBytes; i < part.n; i++ {
+				if n == len(p) {
+					return
+				}
+				p[n] = part.b
+				n++
+			}
+			parts = parts[1:]
+			skipBytes = 0
+		}
+	}
+	if n != len(p) {
+		err = io.ErrUnexpectedEOF
+	}
+	return
+}
+
+// Just testing the rleBuffer used in the Zip64 test above. Not used by the zip code.
+func TestRLEBuffer(t *testing.T) {
+	b := new(rleBuffer)
+	var all []byte
+	writes := []string{"abcdeee", "eeeeeee", "eeeefghaaiii"}
+	for _, w := range writes {
+		b.Write([]byte(w))
+		all = append(all, w...)
+	}
+	if len(b.buf) != 10 {
+		t.Fatalf("len(b.buf) = %d; want 10", len(b.buf))
+	}
+
+	for i := 0; i < len(all); i++ {
+		for j := 0; j < len(all)-i; j++ {
+			buf := make([]byte, j)
+			n, err := b.ReadAt(buf, int64(i))
+			if err != nil || n != len(buf) {
+				t.Errorf("ReadAt(%d, %d) = %d, %v; want %d, nil", i, j, n, err, len(buf))
+			}
+			if !bytes.Equal(buf, all[i:i+j]) {
+				t.Errorf("ReadAt(%d, %d) = %q; want %q", i, j, buf, all[i:i+j])
+			}
+		}
+	}
+}
+
+// fakeHash32 is a dummy Hash32 that always returns 0.
+type fakeHash32 struct {
+	hash.Hash32
+}
+
+func (fakeHash32) Write(p []byte) (int, error) { return len(p), nil }
+func (fakeHash32) Sum32() uint32               { return 0 }
+
 func TestZip64(t *testing.T) {
 	if testing.Short() {
 		t.Skip("slow test; skipping")
 	}
+	const size = 1 << 32 // before the "END\n" part
+	testZip64(t, size)
+}
+
+func testZip64(t testing.TB, size int64) {
+	const chunkSize = 1024
+	chunks := int(size / chunkSize)
 	// write 2^32 bytes plus "END\n" to a zip file
-	buf := new(bytes.Buffer)
+	buf := new(rleBuffer)
 	w := NewWriter(buf)
-	f, err := w.Create("huge.txt")
+	f, err := w.CreateHeader(&FileHeader{
+		Name:   "huge.txt",
+		Method: Store,
+	})
 	if err != nil {
 		t.Fatal(err)
 	}
-	chunk := make([]byte, 1024)
+	f.(*fileWriter).crc32 = fakeHash32{}
+	chunk := make([]byte, chunkSize)
 	for i := range chunk {
 		chunk[i] = '.'
 	}
-	chunk[len(chunk)-1] = '\n'
-	end := []byte("END\n")
-	for i := 0; i < (1<<32)/1024; i++ {
+	for i := 0; i < chunks; i++ {
 		_, err := f.Write(chunk)
 		if err != nil {
 			t.Fatal("write chunk:", err)
 		}
 	}
+	end := []byte("END\n")
 	_, err = f.Write(end)
 	if err != nil {
 		t.Fatal("write end:", err)
@@ -137,7 +266,7 @@ func TestZip64(t *testing.T) {
 	}

 	// read back zip file and check that we get to the end of it
-	r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
+	r, err := NewReader(buf, int64(buf.Size()))
 	if err != nil {
 		t.Fatal("reader:", err)
 	}
@@ -146,7 +275,8 @@ func TestZip64(t *testing.T) {
 	if err != nil {
 		t.Fatal("opening:", err)
 	}
-	for i := 0; i < (1<<32)/1024; i++ {
+	rc.(*checksumReader).hash = fakeHash32{}
+	for i := 0; i < chunks; i++ {
 		_, err := io.ReadFull(rc, chunk)
 		if err != nil {
 			t.Fatal("read:", err)
@@ -163,11 +293,13 @@ func TestZip64(t *testing.T) {
 	if err != nil {
 		t.Fatal("closing:", err)
 	}
-	if got, want := f0.UncompressedSize, uint32(uint32max); got != want {
-		t.Errorf("UncompressedSize %d, want %d", got, want)
+	if size == 1<<32 {
+		if got, want := f0.UncompressedSize, uint32(uint32max); got != want {
+			t.Errorf("UncompressedSize %d, want %d", got, want)
+		}
 	}

-	if got, want := f0.UncompressedSize64, (1<<32)+uint64(len(end)); got != want {
+	if got, want := f0.UncompressedSize64, uint64(size)+uint64(len(end)); got != want {
 		t.Errorf("UncompressedSize64 %d, want %d", got, want)
 	}
 }
@@ -253,3 +385,11 @@ func TestZeroLengthHeader(t *testing.T) {
 	}
 	testValidHeader(&h, t)
 }
+
+// Just benchmarking how fast the Zip64 test above is. Not related to
+// our zip performance, since the test above disabled CRC32 and flate.
+func BenchmarkZip64Test(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		testZip64(b, 1<<26)
+	}
+}