Commit e0b6f472 authored by Andrew Gerrand's avatar Andrew Gerrand

archive/zip: more efficient reader and bug fix

Fixes #2090.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/4815068
parent 60dac9b3
......@@ -6,7 +6,6 @@ package zip
import (
"bufio"
"bytes"
"compress/flate"
"hash"
"hash/crc32"
......@@ -37,8 +36,7 @@ type File struct {
FileHeader
zipr io.ReaderAt
zipsize int64
headerOffset uint32
bodyOffset int64
headerOffset int64
}
func (f *File) hasDataDescriptor() bool {
......@@ -90,12 +88,12 @@ func (z *Reader) init(r io.ReaderAt, size int64) os.Error {
// The count of files inside a zip is truncated to fit in a uint16.
// Gloss over this by reading headers until we encounter
// a bad one, and then only report a FormatError if
// a bad one, and then only report a FormatError or UnexpectedEOF if
// the file count modulo 65536 is incorrect.
for {
f := &File{zipr: r, zipsize: size}
err := readDirectoryHeader(f, buf)
if err == FormatError {
err = readDirectoryHeader(f, buf)
if err == FormatError || err == io.ErrUnexpectedEOF {
break
}
if err != nil {
......@@ -104,9 +102,10 @@ func (z *Reader) init(r io.ReaderAt, size int64) os.Error {
z.File = append(z.File, f)
}
if uint16(len(z.File)) != end.directoryRecords {
return FormatError
// Return the readDirectoryHeader error if we read
// the wrong number of directory entries.
return err
}
return nil
}
......@@ -116,26 +115,18 @@ func (rc *ReadCloser) Close() os.Error {
}
// Open returns a ReadCloser that provides access to the File's contents.
// It is safe to Open and Read from files concurrently.
func (f *File) Open() (rc io.ReadCloser, err os.Error) {
off := int64(f.headerOffset)
size := int64(f.CompressedSize)
if f.bodyOffset == 0 {
r := io.NewSectionReader(f.zipr, off, f.zipsize-off)
if err = readFileHeader(f, r); err != nil {
return
}
if f.bodyOffset, err = r.Seek(0, os.SEEK_CUR); err != nil {
return
}
if size == 0 {
size = int64(f.CompressedSize)
}
bodyOffset, err := f.findBodyOffset()
if err != nil {
return
}
if f.hasDataDescriptor() && size == 0 {
size := int64(f.CompressedSize)
if size == 0 && f.hasDataDescriptor() {
// permit SectionReader to see the rest of the file
size = f.zipsize - (off + f.bodyOffset)
size = f.zipsize - (f.headerOffset + bodyOffset)
}
r := io.NewSectionReader(f.zipr, off+f.bodyOffset, size)
r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size)
switch f.Method {
case Store: // (no compression)
rc = ioutil.NopCloser(r)
......@@ -176,75 +167,99 @@ func (r *checksumReader) Read(b []byte) (n int, err os.Error) {
func (r *checksumReader) Close() os.Error { return r.rc.Close() }
func readFileHeader(f *File, r io.Reader) (err os.Error) {
defer recoverError(&err)
var (
signature uint32
filenameLength uint16
extraLength uint16
)
read(r, &signature)
if signature != fileHeaderSignature {
func readFileHeader(f *File, r io.Reader) os.Error {
var b [fileHeaderLen]byte
if _, err := io.ReadFull(r, b[:]); err != nil {
return err
}
c := binary.LittleEndian
if sig := c.Uint32(b[:4]); sig != fileHeaderSignature {
return FormatError
}
read(r, &f.ReaderVersion)
read(r, &f.Flags)
read(r, &f.Method)
read(r, &f.ModifiedTime)
read(r, &f.ModifiedDate)
read(r, &f.CRC32)
read(r, &f.CompressedSize)
read(r, &f.UncompressedSize)
read(r, &filenameLength)
read(r, &extraLength)
f.Name = string(readByteSlice(r, filenameLength))
f.Extra = readByteSlice(r, extraLength)
return
f.ReaderVersion = c.Uint16(b[4:6])
f.Flags = c.Uint16(b[6:8])
f.Method = c.Uint16(b[8:10])
f.ModifiedTime = c.Uint16(b[10:12])
f.ModifiedDate = c.Uint16(b[12:14])
f.CRC32 = c.Uint32(b[14:18])
f.CompressedSize = c.Uint32(b[18:22])
f.UncompressedSize = c.Uint32(b[22:26])
filenameLen := int(c.Uint16(b[26:28]))
extraLen := int(c.Uint16(b[28:30]))
d := make([]byte, filenameLen+extraLen)
if _, err := io.ReadFull(r, d); err != nil {
return err
}
f.Name = string(d[:filenameLen])
f.Extra = d[filenameLen:]
return nil
}
func readDirectoryHeader(f *File, r io.Reader) (err os.Error) {
defer recoverError(&err)
var (
signature uint32
filenameLength uint16
extraLength uint16
commentLength uint16
startDiskNumber uint16 // unused
internalAttributes uint16 // unused
externalAttributes uint32 // unused
)
read(r, &signature)
if signature != directoryHeaderSignature {
// findBodyOffset does the minimum work to verify the file has a header
// and returns the file body offset.
func (f *File) findBodyOffset() (int64, os.Error) {
r := io.NewSectionReader(f.zipr, f.headerOffset, f.zipsize-f.headerOffset)
var b [fileHeaderLen]byte
if _, err := io.ReadFull(r, b[:]); err != nil {
return 0, err
}
c := binary.LittleEndian
if sig := c.Uint32(b[:4]); sig != fileHeaderSignature {
return 0, FormatError
}
filenameLen := int(c.Uint16(b[26:28]))
extraLen := int(c.Uint16(b[28:30]))
return int64(fileHeaderLen + filenameLen + extraLen), nil
}
// readDirectoryHeader attempts to read a directory header from r.
// It returns io.ErrUnexpectedEOF if it cannot read a complete header,
// and FormatError if it doesn't find a valid header signature.
func readDirectoryHeader(f *File, r io.Reader) os.Error {
var b [directoryHeaderLen]byte
if _, err := io.ReadFull(r, b[:]); err != nil {
return err
}
c := binary.LittleEndian
if sig := c.Uint32(b[:4]); sig != directoryHeaderSignature {
return FormatError
}
read(r, &f.CreatorVersion)
read(r, &f.ReaderVersion)
read(r, &f.Flags)
read(r, &f.Method)
read(r, &f.ModifiedTime)
read(r, &f.ModifiedDate)
read(r, &f.CRC32)
read(r, &f.CompressedSize)
read(r, &f.UncompressedSize)
read(r, &filenameLength)
read(r, &extraLength)
read(r, &commentLength)
read(r, &startDiskNumber)
read(r, &internalAttributes)
read(r, &externalAttributes)
read(r, &f.headerOffset)
f.Name = string(readByteSlice(r, filenameLength))
f.Extra = readByteSlice(r, extraLength)
f.Comment = string(readByteSlice(r, commentLength))
return
f.CreatorVersion = c.Uint16(b[4:6])
f.ReaderVersion = c.Uint16(b[6:8])
f.Flags = c.Uint16(b[8:10])
f.Method = c.Uint16(b[10:12])
f.ModifiedTime = c.Uint16(b[12:14])
f.ModifiedDate = c.Uint16(b[14:16])
f.CRC32 = c.Uint32(b[16:20])
f.CompressedSize = c.Uint32(b[20:24])
f.UncompressedSize = c.Uint32(b[24:28])
filenameLen := int(c.Uint16(b[28:30]))
extraLen := int(c.Uint16(b[30:32]))
commentLen := int(c.Uint16(b[32:34]))
// startDiskNumber := c.Uint16(b[34:36]) // Unused
// internalAttributes := c.Uint16(b[36:38]) // Unused
// externalAttributes := c.Uint32(b[38:42]) // Unused
f.headerOffset = int64(c.Uint32(b[42:46]))
d := make([]byte, filenameLen+extraLen+commentLen)
if _, err := io.ReadFull(r, d); err != nil {
return err
}
f.Name = string(d[:filenameLen])
f.Extra = d[filenameLen : filenameLen+extraLen]
f.Comment = string(d[filenameLen+extraLen:])
return nil
}
func readDataDescriptor(r io.Reader, f *File) (err os.Error) {
defer recoverError(&err)
read(r, &f.CRC32)
read(r, &f.CompressedSize)
read(r, &f.UncompressedSize)
return
func readDataDescriptor(r io.Reader, f *File) os.Error {
var b [dataDescriptorLen]byte
if _, err := io.ReadFull(r, b[:]); err != nil {
return err
}
c := binary.LittleEndian
f.CRC32 = c.Uint32(b[:4])
f.CompressedSize = c.Uint32(b[4:8])
f.UncompressedSize = c.Uint32(b[8:12])
return nil
}
func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Error) {
......@@ -268,48 +283,29 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Erro
}
// read header into struct
defer recoverError(&err)
br := bytes.NewBuffer(b[4:]) // skip over signature
c := binary.LittleEndian
d := new(directoryEnd)
read(br, &d.diskNbr)
read(br, &d.dirDiskNbr)
read(br, &d.dirRecordsThisDisk)
read(br, &d.directoryRecords)
read(br, &d.directorySize)
read(br, &d.directoryOffset)
read(br, &d.commentLen)
d.comment = string(readByteSlice(br, d.commentLen))
d.diskNbr = c.Uint16(b[4:6])
d.dirDiskNbr = c.Uint16(b[6:8])
d.dirRecordsThisDisk = c.Uint16(b[8:10])
d.directoryRecords = c.Uint16(b[10:12])
d.directorySize = c.Uint32(b[12:16])
d.directoryOffset = c.Uint32(b[16:20])
d.commentLen = c.Uint16(b[20:22])
d.comment = string(b[22 : 22+int(d.commentLen)])
return d, nil
}
func findSignatureInBlock(b []byte) int {
const minSize = 4 + 2 + 2 + 2 + 2 + 4 + 4 + 2 // fixed part of header
for i := len(b) - minSize; i >= 0; i-- {
for i := len(b) - directoryEndLen; i >= 0; i-- {
// defined from directoryEndSignature in struct.go
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
// n is length of comment
n := int(b[i+minSize-2]) | int(b[i+minSize-1])<<8
if n+minSize+i == len(b) {
n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
if n+directoryEndLen+i == len(b) {
return i
}
}
}
return -1
}
func read(r io.Reader, data interface{}) {
if err := binary.Read(r, binary.LittleEndian, data); err != nil {
panic(err)
}
}
func readByteSlice(r io.Reader, l uint16) []byte {
b := make([]byte, l)
if l == 0 {
return b
}
if _, err := io.ReadFull(r, b); err != nil {
panic(err)
}
return b
}
......@@ -162,6 +162,8 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
t.Errorf("%s: mtime=%s (%d); want %s (%d)", f.Name, time.SecondsToUTC(got), got, mtime, want)
}
size0 := f.UncompressedSize
var b bytes.Buffer
r, err := f.Open()
if err != nil {
......@@ -169,6 +171,10 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
return
}
if size1 := f.UncompressedSize; size0 != size1 {
t.Errorf("file %q changed f.UncompressedSize from %d to %d", f.Name, size0, size1)
}
_, err = io.Copy(&b, r)
if err != nil {
t.Error(err)
......
......@@ -24,6 +24,9 @@ const (
fileHeaderSignature = 0x04034b50
directoryHeaderSignature = 0x02014b50
directoryEndSignature = 0x06054b50
fileHeaderLen = 30 // + filename + extra
directoryHeaderLen = 46 // + filename + extra + comment
directoryEndLen = 22 // + comment
dataDescriptorLen = 12
)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment