Commit 3cea4131 authored by Brad Fitzpatrick's avatar Brad Fitzpatrick

archive/zip: write data descriptor signature for OS X; fix bugs reading it

We now always write the "optional" streaming data descriptor
signature, which turns out to be required for OS X.

Also, handle reading the data descriptor with or without the
signature, per the spec's recommendation. Fix data descriptor
reading bugs found in the process.

Fixes #3252

R=golang-dev, alex.brainman, nigeltao, rsc
CC=golang-dev
https://golang.org/cl/5787062
parent ece0d0e7
...@@ -124,10 +124,6 @@ func (f *File) Open() (rc io.ReadCloser, err error) { ...@@ -124,10 +124,6 @@ func (f *File) Open() (rc io.ReadCloser, err error) {
return return
} }
size := int64(f.CompressedSize) size := int64(f.CompressedSize)
if size == 0 && f.hasDataDescriptor() {
// permit SectionReader to see the rest of the file
size = f.zipsize - (f.headerOffset + bodyOffset)
}
r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size)
switch f.Method { switch f.Method {
case Store: // (no compression) case Store: // (no compression)
...@@ -136,10 +132,13 @@ func (f *File) Open() (rc io.ReadCloser, err error) { ...@@ -136,10 +132,13 @@ func (f *File) Open() (rc io.ReadCloser, err error) {
rc = flate.NewReader(r) rc = flate.NewReader(r)
default: default:
err = ErrAlgorithm err = ErrAlgorithm
return
} }
if rc != nil { var desr io.Reader
rc = &checksumReader{rc, crc32.NewIEEE(), f, r} if f.hasDataDescriptor() {
desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen)
} }
rc = &checksumReader{rc, crc32.NewIEEE(), f, desr, nil}
return return
} }
...@@ -147,23 +146,31 @@ type checksumReader struct { ...@@ -147,23 +146,31 @@ type checksumReader struct {
rc io.ReadCloser rc io.ReadCloser
hash hash.Hash32 hash hash.Hash32
f *File f *File
zipr io.Reader // for reading the data descriptor desr io.Reader // if non-nil, where to read the data descriptor
err error // sticky error
} }
func (r *checksumReader) Read(b []byte) (n int, err error) { func (r *checksumReader) Read(b []byte) (n int, err error) {
if r.err != nil {
return 0, r.err
}
n, err = r.rc.Read(b) n, err = r.rc.Read(b)
r.hash.Write(b[:n]) r.hash.Write(b[:n])
if err != io.EOF { if err == nil {
return return
} }
if r.f.hasDataDescriptor() { if err == io.EOF && r.desr != nil {
if err = readDataDescriptor(r.zipr, r.f); err != nil { if err1 := readDataDescriptor(r.desr, r.f); err1 != nil {
return err = err1
} } else if r.hash.Sum32() != r.f.CRC32 {
}
if r.hash.Sum32() != r.f.CRC32 {
err = ErrChecksum err = ErrChecksum
} }
// TODO(bradfitz): even if there's not a data
// descriptor, we could still compare our accumulated
// crc32 on EOF with the content-precededing file
// header's crc32, if it's non-zero.
}
r.err = err
return return
} }
...@@ -226,10 +233,31 @@ func readDirectoryHeader(f *File, r io.Reader) error { ...@@ -226,10 +233,31 @@ func readDirectoryHeader(f *File, r io.Reader) error {
func readDataDescriptor(r io.Reader, f *File) error { func readDataDescriptor(r io.Reader, f *File) error {
var buf [dataDescriptorLen]byte var buf [dataDescriptorLen]byte
if _, err := io.ReadFull(r, buf[:]); err != nil {
// The spec says: "Although not originally assigned a
// signature, the value 0x08074b50 has commonly been adopted
// as a signature value for the data descriptor record.
// Implementers should be aware that ZIP files may be
// encountered with or without this signature marking data
// descriptors and should account for either case when reading
// ZIP files to ensure compatibility."
//
// dataDescriptorLen includes the size of the signature but
// first read just those 4 bytes to see if it exists.
if _, err := io.ReadFull(r, buf[:4]); err != nil {
return err return err
} }
b := readBuf(buf[:]) off := 0
maybeSig := readBuf(buf[:4])
if maybeSig.uint32() != dataDescriptorSignature {
// No data descriptor signature. Keep these four
// bytes.
off += 4
}
if _, err := io.ReadFull(r, buf[off:12]); err != nil {
return err
}
b := readBuf(buf[:12])
f.CRC32 = b.uint32() f.CRC32 = b.uint32()
f.CompressedSize = b.uint32() f.CompressedSize = b.uint32()
f.UncompressedSize = b.uint32() f.UncompressedSize = b.uint32()
......
...@@ -10,12 +10,14 @@ import ( ...@@ -10,12 +10,14 @@ import (
"io" "io"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath"
"testing" "testing"
"time" "time"
) )
type ZipTest struct { type ZipTest struct {
Name string Name string
Source func() (r io.ReaderAt, size int64) // if non-nil, used instead of testdata/<Name> file
Comment string Comment string
File []ZipTestFile File []ZipTestFile
Error error // the error that Opening this file should return Error error // the error that Opening this file should return
...@@ -24,6 +26,7 @@ type ZipTest struct { ...@@ -24,6 +26,7 @@ type ZipTest struct {
type ZipTestFile struct { type ZipTestFile struct {
Name string Name string
Content []byte // if blank, will attempt to compare against File Content []byte // if blank, will attempt to compare against File
ContentErr error
File string // name of file to compare to (relative to testdata/) File string // name of file to compare to (relative to testdata/)
Mtime string // modified time in format "mm-dd-yy hh:mm:ss" Mtime string // modified time in format "mm-dd-yy hh:mm:ss"
Mode os.FileMode Mode os.FileMode
...@@ -107,6 +110,59 @@ var tests = []ZipTest{ ...@@ -107,6 +110,59 @@ var tests = []ZipTest{
Name: "unix.zip", Name: "unix.zip",
File: crossPlatform, File: crossPlatform,
}, },
{
// created by Go, before we wrote the "optional" data
// descriptor signatures (which are required by OS X)
Name: "go-no-datadesc-sig.zip",
File: []ZipTestFile{
{
Name: "foo.txt",
Content: []byte("foo\n"),
Mtime: "03-08-12 16:59:10",
Mode: 0644,
},
{
Name: "bar.txt",
Content: []byte("bar\n"),
Mtime: "03-08-12 16:59:12",
Mode: 0644,
},
},
},
{
// created by Go, after we wrote the "optional" data
// descriptor signatures (which are required by OS X)
Name: "go-with-datadesc-sig.zip",
File: []ZipTestFile{
{
Name: "foo.txt",
Content: []byte("foo\n"),
Mode: 0666,
},
{
Name: "bar.txt",
Content: []byte("bar\n"),
Mode: 0666,
},
},
},
{
Name: "Bad-CRC32-in-data-descriptor",
Source: returnCorruptCRC32Zip,
File: []ZipTestFile{
{
Name: "foo.txt",
Content: []byte("foo\n"),
Mode: 0666,
ContentErr: ErrChecksum,
},
{
Name: "bar.txt",
Content: []byte("bar\n"),
Mode: 0666,
},
},
},
} }
var crossPlatform = []ZipTestFile{ var crossPlatform = []ZipTestFile{
...@@ -139,7 +195,18 @@ func TestReader(t *testing.T) { ...@@ -139,7 +195,18 @@ func TestReader(t *testing.T) {
} }
func readTestZip(t *testing.T, zt ZipTest) { func readTestZip(t *testing.T, zt ZipTest) {
z, err := OpenReader("testdata/" + zt.Name) var z *Reader
var err error
if zt.Source != nil {
rat, size := zt.Source()
z, err = NewReader(rat, size)
} else {
var rc *ReadCloser
rc, err = OpenReader(filepath.Join("testdata", zt.Name))
if err == nil {
z = &rc.Reader
}
}
if err != zt.Error { if err != zt.Error {
t.Errorf("error=%v, want %v", err, zt.Error) t.Errorf("error=%v, want %v", err, zt.Error)
return return
...@@ -149,11 +216,6 @@ func readTestZip(t *testing.T, zt ZipTest) { ...@@ -149,11 +216,6 @@ func readTestZip(t *testing.T, zt ZipTest) {
if err == ErrFormat { if err == ErrFormat {
return return
} }
defer func() {
if err := z.Close(); err != nil {
t.Errorf("error %q when closing zip file", err)
}
}()
// bail here if no Files expected to be tested // bail here if no Files expected to be tested
// (there may actually be files in the zip, but we don't care) // (there may actually be files in the zip, but we don't care)
...@@ -170,7 +232,7 @@ func readTestZip(t *testing.T, zt ZipTest) { ...@@ -170,7 +232,7 @@ func readTestZip(t *testing.T, zt ZipTest) {
// test read of each file // test read of each file
for i, ft := range zt.File { for i, ft := range zt.File {
readTestFile(t, ft, z.File[i]) readTestFile(t, zt, ft, z.File[i])
} }
// test simultaneous reads // test simultaneous reads
...@@ -179,7 +241,7 @@ func readTestZip(t *testing.T, zt ZipTest) { ...@@ -179,7 +241,7 @@ func readTestZip(t *testing.T, zt ZipTest) {
for i := 0; i < 5; i++ { for i := 0; i < 5; i++ {
for j, ft := range zt.File { for j, ft := range zt.File {
go func(j int, ft ZipTestFile) { go func(j int, ft ZipTestFile) {
readTestFile(t, ft, z.File[j]) readTestFile(t, zt, ft, z.File[j])
done <- true done <- true
}(j, ft) }(j, ft)
n++ n++
...@@ -188,26 +250,11 @@ func readTestZip(t *testing.T, zt ZipTest) { ...@@ -188,26 +250,11 @@ func readTestZip(t *testing.T, zt ZipTest) {
for ; n > 0; n-- { for ; n > 0; n-- {
<-done <-done
} }
// test invalid checksum
if !z.File[0].hasDataDescriptor() { // skip test when crc32 in dd
z.File[0].CRC32++ // invalidate
r, err := z.File[0].Open()
if err != nil {
t.Error(err)
return
}
var b bytes.Buffer
_, err = io.Copy(&b, r)
if err != ErrChecksum {
t.Errorf("%s: copy error=%v, want %v", z.File[0].Name, err, ErrChecksum)
}
}
} }
func readTestFile(t *testing.T, ft ZipTestFile, f *File) { func readTestFile(t *testing.T, zt ZipTest, ft ZipTestFile, f *File) {
if f.Name != ft.Name { if f.Name != ft.Name {
t.Errorf("name=%q, want %q", f.Name, ft.Name) t.Errorf("%s: name=%q, want %q", zt.Name, f.Name, ft.Name)
} }
if ft.Mtime != "" { if ft.Mtime != "" {
...@@ -217,11 +264,11 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) { ...@@ -217,11 +264,11 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
return return
} }
if ft := f.ModTime(); !ft.Equal(mtime) { if ft := f.ModTime(); !ft.Equal(mtime) {
t.Errorf("%s: mtime=%s, want %s", f.Name, ft, mtime) t.Errorf("%s: %s: mtime=%s, want %s", zt.Name, f.Name, ft, mtime)
} }
} }
testFileMode(t, f, ft.Mode) testFileMode(t, zt.Name, f, ft.Mode)
size0 := f.UncompressedSize size0 := f.UncompressedSize
...@@ -238,7 +285,9 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) { ...@@ -238,7 +285,9 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
_, err = io.Copy(&b, r) _, err = io.Copy(&b, r)
if err != nil { if err != nil {
t.Error(err) if err != ft.ContentErr {
t.Errorf("%s: copying contents: %v", zt.Name, err)
}
return return
} }
r.Close() r.Close()
...@@ -264,12 +313,12 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) { ...@@ -264,12 +313,12 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
} }
} }
func testFileMode(t *testing.T, f *File, want os.FileMode) { func testFileMode(t *testing.T, zipName string, f *File, want os.FileMode) {
mode := f.Mode() mode := f.Mode()
if want == 0 { if want == 0 {
t.Errorf("%s mode: got %v, want none", f.Name, mode) t.Errorf("%s: %s mode: got %v, want none", zipName, f.Name, mode)
} else if mode != want { } else if mode != want {
t.Errorf("%s mode: want %v, got %v", f.Name, want, mode) t.Errorf("%s: %s mode: want %v, got %v", zipName, f.Name, want, mode)
} }
} }
...@@ -294,3 +343,13 @@ func TestInvalidFiles(t *testing.T) { ...@@ -294,3 +343,13 @@ func TestInvalidFiles(t *testing.T) {
t.Errorf("sigs: error=%v, want %v", err, ErrFormat) t.Errorf("sigs: error=%v, want %v", err, ErrFormat)
} }
} }
func returnCorruptCRC32Zip() (r io.ReaderAt, size int64) {
data, err := ioutil.ReadFile(filepath.Join("testdata", "go-with-datadesc-sig.zip"))
if err != nil {
panic(err)
}
// Corrupt one of the CRC32s in the data descriptor:
data[0x2d]++
return bytes.NewReader(data), int64(len(data))
}
...@@ -27,10 +27,11 @@ const ( ...@@ -27,10 +27,11 @@ const (
fileHeaderSignature = 0x04034b50 fileHeaderSignature = 0x04034b50
directoryHeaderSignature = 0x02014b50 directoryHeaderSignature = 0x02014b50
directoryEndSignature = 0x06054b50 directoryEndSignature = 0x06054b50
dataDescriptorSignature = 0x08074b50 // de-facto standard; required by OS X Finder
fileHeaderLen = 30 // + filename + extra fileHeaderLen = 30 // + filename + extra
directoryHeaderLen = 46 // + filename + extra + comment directoryHeaderLen = 46 // + filename + extra + comment
directoryEndLen = 22 // + comment directoryEndLen = 22 // + comment
dataDescriptorLen = 12 dataDescriptorLen = 16 // four uint32: descriptor signature, crc32, compressed size, size
// Constants for the first byte in CreatorVersion // Constants for the first byte in CreatorVersion
creatorFAT = 0 creatorFAT = 0
......
...@@ -224,6 +224,7 @@ func (w *fileWriter) close() error { ...@@ -224,6 +224,7 @@ func (w *fileWriter) close() error {
// write data descriptor // write data descriptor
var buf [dataDescriptorLen]byte var buf [dataDescriptorLen]byte
b := writeBuf(buf[:]) b := writeBuf(buf[:])
b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X
b.uint32(fh.CRC32) b.uint32(fh.CRC32)
b.uint32(fh.CompressedSize) b.uint32(fh.CompressedSize)
b.uint32(fh.UncompressedSize) b.uint32(fh.UncompressedSize)
......
...@@ -108,7 +108,7 @@ func testReadFile(t *testing.T, f *File, wt *WriteTest) { ...@@ -108,7 +108,7 @@ func testReadFile(t *testing.T, f *File, wt *WriteTest) {
if f.Name != wt.Name { if f.Name != wt.Name {
t.Fatalf("File name: got %q, want %q", f.Name, wt.Name) t.Fatalf("File name: got %q, want %q", f.Name, wt.Name)
} }
testFileMode(t, f, wt.Mode) testFileMode(t, wt.Name, f, wt.Mode)
rc, err := f.Open() rc, err := f.Open()
if err != nil { if err != nil {
t.Fatal("opening:", err) t.Fatal("opening:", err)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment