Commit a00b98ec authored by Andrew Gerrand's avatar Andrew Gerrand

archive/zip: new package for reading ZIP files

R=rsc
CC=golang-dev
https://golang.org/cl/2125042
parent fdb9e68c
......@@ -15,6 +15,7 @@ all: install
DIRS=\
archive/tar\
archive/zip\
asn1\
big\
bufio\
......
# Copyright 2010 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
include ../../../Make.inc
TARG=archive/zip
GOFILES=\
reader.go\
struct.go\
include ../../../Make.pkg
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
The zip package provides support for reading ZIP archives.
See: http://www.pkware.com/documents/casestudies/APPNOTE.TXT
This package does not support ZIP64 or disk spanning.
*/
package zip
import (
"bufio"
"bytes"
"compress/flate"
"hash"
"hash/crc32"
"encoding/binary"
"io"
"os"
)
var (
FormatError = os.NewError("not a valid zip file")
UnsupportedMethod = os.NewError("unsupported compression algorithm")
ChecksumError = os.NewError("checksum error")
)
type Reader struct {
r io.ReaderAt
File []*File
Comment string
}
type File struct {
FileHeader
zipr io.ReaderAt
zipsize int64
headerOffset uint32
bodyOffset int64
}
// OpenReader will open the Zip file specified by name and return a Reader.
func OpenReader(name string) (*Reader, os.Error) {
f, err := os.Open(name, os.O_RDONLY, 0644)
if err != nil {
return nil, err
}
fi, err := f.Stat()
if err != nil {
return nil, err
}
return NewReader(f, fi.Size)
}
// NewReader returns a new Reader reading from r, which is assumed to
// have the given size in bytes.
func NewReader(r io.ReaderAt, size int64) (*Reader, os.Error) {
end, err := readDirectoryEnd(r, size)
if err != nil {
return nil, err
}
z := &Reader{
r: r,
File: make([]*File, end.directoryRecords),
Comment: end.comment,
}
rs := io.NewSectionReader(r, 0, size)
if _, err = rs.Seek(int64(end.directoryOffset), 0); err != nil {
return nil, err
}
buf := bufio.NewReader(rs)
for i := range z.File {
z.File[i] = &File{zipr: r, zipsize: size}
if err := readDirectoryHeader(z.File[i], buf); err != nil {
return nil, err
}
}
return z, nil
}
// Open returns a ReadCloser that provides access to the File's contents.
func (f *File) Open() (rc io.ReadCloser, err os.Error) {
off := int64(f.headerOffset)
if f.bodyOffset == 0 {
r := io.NewSectionReader(f.zipr, off, f.zipsize-off)
if err = readFileHeader(f, r); err != nil {
return
}
if f.bodyOffset, err = r.Seek(0, 1); err != nil {
return
}
}
r := io.NewSectionReader(f.zipr, off+f.bodyOffset, int64(f.CompressedSize))
switch f.Method {
case 0: // store (no compression)
rc = nopCloser{r}
case 8: // DEFLATE
rc = flate.NewReader(r)
default:
err = UnsupportedMethod
}
if rc != nil {
rc = &checksumReader{rc, crc32.NewIEEE(), f.CRC32}
}
return
}
type checksumReader struct {
rc io.ReadCloser
hash hash.Hash32
sum uint32
}
func (r *checksumReader) Read(b []byte) (n int, err os.Error) {
n, err = r.rc.Read(b)
r.hash.Write(b[:n])
if err != os.EOF {
return
}
if r.hash.Sum32() != r.sum {
err = ChecksumError
}
return
}
func (r *checksumReader) Close() os.Error { return r.rc.Close() }
type nopCloser struct {
io.Reader
}
func (f nopCloser) Close() os.Error { return nil }
func readFileHeader(f *File, r io.Reader) (err os.Error) {
defer func() {
if rerr, ok := recover().(os.Error); ok {
err = rerr
}
}()
var (
signature uint32
filenameLength uint16
extraLength uint16
)
read(r, &signature)
if signature != fileHeaderSignature {
return FormatError
}
read(r, &f.ReaderVersion)
read(r, &f.Flags)
read(r, &f.Method)
read(r, &f.ModifiedTime)
read(r, &f.ModifiedDate)
read(r, &f.CRC32)
read(r, &f.CompressedSize)
read(r, &f.UncompressedSize)
read(r, &filenameLength)
read(r, &extraLength)
f.Name = string(readByteSlice(r, filenameLength))
f.Extra = readByteSlice(r, extraLength)
return
}
func readDirectoryHeader(f *File, r io.Reader) (err os.Error) {
defer func() {
if rerr, ok := recover().(os.Error); ok {
err = rerr
}
}()
var (
signature uint32
filenameLength uint16
extraLength uint16
commentLength uint16
startDiskNumber uint16 // unused
internalAttributes uint16 // unused
externalAttributes uint32 // unused
)
read(r, &signature)
if signature != directoryHeaderSignature {
return FormatError
}
read(r, &f.CreatorVersion)
read(r, &f.ReaderVersion)
read(r, &f.Flags)
read(r, &f.Method)
read(r, &f.ModifiedTime)
read(r, &f.ModifiedDate)
read(r, &f.CRC32)
read(r, &f.CompressedSize)
read(r, &f.UncompressedSize)
read(r, &filenameLength)
read(r, &extraLength)
read(r, &commentLength)
read(r, &startDiskNumber)
read(r, &internalAttributes)
read(r, &externalAttributes)
read(r, &f.headerOffset)
f.Name = string(readByteSlice(r, filenameLength))
f.Extra = readByteSlice(r, extraLength)
f.Comment = string(readByteSlice(r, commentLength))
return
}
func readDirectoryEnd(r io.ReaderAt, size int64) (d *directoryEnd, err os.Error) {
// look for directoryEndSignature in the last 1k, then in the last 65k
var b []byte
for i, bLen := range []int64{1024, 65 * 1024} {
if bLen > size {
bLen = size
}
b = make([]byte, int(bLen))
if _, err := r.ReadAt(b, size-bLen); err != nil && err != os.EOF {
return nil, err
}
if p := findSignatureInBlock(b); p >= 0 {
b = b[p:]
break
}
if i == 1 || bLen == size {
return nil, FormatError
}
}
// read header into struct
defer func() {
if rerr, ok := recover().(os.Error); ok {
err = rerr
d = nil
}
}()
br := bytes.NewBuffer(b[4:]) // skip over signature
d = new(directoryEnd)
read(br, &d.diskNbr)
read(br, &d.dirDiskNbr)
read(br, &d.dirRecordsThisDisk)
read(br, &d.directoryRecords)
read(br, &d.directorySize)
read(br, &d.directoryOffset)
read(br, &d.commentLen)
d.comment = string(readByteSlice(br, d.commentLen))
return d, nil
}
func findSignatureInBlock(b []byte) int {
const minSize = 4 + 2 + 2 + 2 + 2 + 4 + 4 + 2 // fixed part of header
for i := len(b) - minSize; i >= 0; i-- {
// defined from directoryEndSignature in struct.go
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
// n is length of comment
n := int(b[i+minSize-2]) | int(b[i+minSize-1])<<8
if n+minSize+i == len(b) {
return i
}
}
}
return -1
}
func read(r io.Reader, data interface{}) {
if err := binary.Read(r, binary.LittleEndian, data); err != nil {
panic(err)
}
}
func readByteSlice(r io.Reader, l uint16) []byte {
b := make([]byte, l)
if l == 0 {
return b
}
if _, err := io.ReadFull(r, b); err != nil {
panic(err)
}
return b
}
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package zip
import (
"bytes"
"encoding/binary"
"io"
"io/ioutil"
"os"
"testing"
)
type ZipTest struct {
Name string
Comment string
File []ZipTestFile
Error os.Error // the error that Opening this file should return
}
type ZipTestFile struct {
Name string
Content []byte // if blank, will attempt to compare against File
File string // name of file to compare to (relative to testdata/)
}
var tests = []ZipTest{
ZipTest{
Name: "test.zip",
Comment: "This is a zipfile comment.",
File: []ZipTestFile{
ZipTestFile{
Name: "test.txt",
Content: []byte("This is a test text file.\n"),
},
ZipTestFile{
Name: "gophercolor16x16.png",
File: "gophercolor16x16.png",
},
},
},
ZipTest{
Name: "r.zip",
File: []ZipTestFile{
ZipTestFile{
Name: "r/r.zip",
File: "r.zip",
},
},
},
ZipTest{Name: "readme.zip"},
ZipTest{Name: "readme.notzip", Error: FormatError},
}
func TestReader(t *testing.T) {
for _, zt := range tests {
readTestZip(t, zt)
}
}
func readTestZip(t *testing.T, zt ZipTest) {
z, err := OpenReader("testdata/" + zt.Name)
if err != zt.Error {
t.Errorf("error=%v, want %v", err, zt.Error)
return
}
// bail here if no Files expected to be tested
// (there may actually be files in the zip, but we don't care)
if zt.File == nil {
return
}
if z.Comment != zt.Comment {
t.Errorf("%s: comment=%q, want %q", zt.Name, z.Comment, zt.Comment)
}
if len(z.File) != len(zt.File) {
t.Errorf("%s: file count=%d, want %d", zt.Name, len(z.File), len(zt.File))
}
// test read of each file
for i, ft := range zt.File {
readTestFile(t, ft, z.File[i])
}
// test simultaneous reads
n := 0
done := make(chan bool)
for i := 0; i < 5; i++ {
for j, ft := range zt.File {
go func() {
readTestFile(t, ft, z.File[j])
done <- true
}()
n++
}
}
for ; n > 0; n-- {
<-done
}
// test invalid checksum
z.File[0].CRC32++ // invalidate
r, err := z.File[0].Open()
if err != nil {
t.Error(err)
return
}
var b bytes.Buffer
_, err = io.Copy(&b, r)
if err != ChecksumError {
t.Errorf("%s: copy error=%v, want %v", err, ChecksumError)
}
}
func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
if f.Name != ft.Name {
t.Errorf("name=%q, want %q", f.Name, ft.Name)
}
var b bytes.Buffer
r, err := f.Open()
if err != nil {
t.Error(err)
return
}
_, err = io.Copy(&b, r)
if err != nil {
t.Error(err)
return
}
r.Close()
var c []byte
if len(ft.Content) != 0 {
c = ft.Content
} else if c, err = ioutil.ReadFile("testdata/" + ft.File); err != nil {
t.Error(err)
return
}
if b.Len() != len(c) {
t.Errorf("%s: len=%d, want %d", f.Name, b.Len(), len(c))
return
}
for i, b := range b.Bytes() {
if b != c[i] {
t.Errorf("%s: content[%d]=%q want %q", i, b, c[i])
return
}
}
}
func TestInvalidFiles(t *testing.T) {
const size = 1024 * 70 // 70kb
b := make([]byte, size)
// zeroes
_, err := NewReader(sliceReaderAt(b), size)
if err != FormatError {
t.Errorf("zeroes: error=%v, want %v", err, FormatError)
}
// repeated directoryEndSignatures
sig := make([]byte, 4)
binary.LittleEndian.PutUint32(sig, directoryEndSignature)
for i := 0; i < size-4; i += 4 {
copy(b[i:i+4], sig)
}
_, err = NewReader(sliceReaderAt(b), size)
if err != FormatError {
t.Errorf("sigs: error=%v, want %v", err, FormatError)
}
}
type sliceReaderAt []byte
func (r sliceReaderAt) ReadAt(b []byte, off int64) (int, os.Error) {
copy(b, r[int(off):int(off)+len(b)])
return len(b), nil
}
package zip
const (
fileHeaderSignature = 0x04034b50
directoryHeaderSignature = 0x02014b50
directoryEndSignature = 0x06054b50
)
type FileHeader struct {
Name string
CreatorVersion uint16
ReaderVersion uint16
Flags uint16
Method uint16
ModifiedTime uint16
ModifiedDate uint16
CRC32 uint32
CompressedSize uint32
UncompressedSize uint32
Extra []byte
Comment string
}
type directoryEnd struct {
diskNbr uint16 // unused
dirDiskNbr uint16 // unused
dirRecordsThisDisk uint16 // unused
directoryRecords uint16
directorySize uint32
directoryOffset uint32 // relative to file
commentLen uint16
comment string
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment