Commit e3b615fd authored by Joe Tsai's avatar Joe Tsai Committed by Ian Lance Taylor

archive/tar: detect truncated files

Motivation:
* Reader.skipUnread never reports io.ErrUnexpectedEOF. This is strange
given that io.ErrUnexpectedEOF is given through Reader.Read if the
user manually reads the file.
* Reader.skipUnread fails to detect truncated files since io.Seeker
is lazy about reporting errors. Thus, the behavior of Reader differs
whether the input io.Reader also satisfies io.Seeker or not.

To solve this, we seek to one before the end of the data section and
always rely on at least one call to io.CopyN. If the tr.r satisfies
io.Seeker, this is guarunteed to never read more than blockSize.

Fixes #12557

Change-Id: I0ddddfc6bed0d74465cb7e7a02b26f1de7a7a279
Reviewed-on: https://go-review.googlesource.com/15175Reviewed-by: 's avatarBrad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 6083bd65
......@@ -446,16 +446,45 @@ func (tr *Reader) octal(b []byte) int64 {
return int64(x)
}
// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
func (tr *Reader) skipUnread() {
nr := tr.numBytes() + tr.pad // number of bytes to skip
// skipUnread skips any unread bytes in the existing file entry, as well as any
// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
// encountered in the data portion; it is okay to hit io.EOF in the padding.
//
// Note that this function still works properly even when sparse files are being
// used since numBytes returns the bytes remaining in the underlying io.Reader.
func (tr *Reader) skipUnread() error {
dataSkip := tr.numBytes() // Number of data bytes to skip
totalSkip := dataSkip + tr.pad // Total number of bytes to skip
tr.curr, tr.pad = nil, 0
if sr, ok := tr.r.(io.Seeker); ok {
if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
return
// If possible, Seek to the last byte before the end of the data section.
// Do this because Seek is often lazy about reporting errors; this will mask
// the fact that the tar stream may be truncated. We can rely on the
// io.CopyN done shortly afterwards to trigger any IO errors.
var seekSkipped int64 // Number of bytes skipped via Seek
if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
// Not all io.Seeker can actually Seek. For example, os.Stdin implements
// io.Seeker, but calling Seek always returns an error and performs
// no action. Thus, we try an innocent seek to the current position
// to see if Seek is really supported.
pos1, err := sr.Seek(0, os.SEEK_CUR)
if err == nil {
// Seek seems supported, so perform the real Seek.
pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
if err != nil {
tr.err = err
return tr.err
}
seekSkipped = pos2 - pos1
}
}
_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
var copySkipped int64 // Number of bytes skipped via CopyN
copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
tr.err = io.ErrUnexpectedEOF
}
return tr.err
}
func (tr *Reader) verifyChecksum(header []byte) bool {
......@@ -468,18 +497,25 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
return given == unsigned || given == signed
}
// readHeader reads the next block header and assumes that the underlying reader
// is already aligned to a block boundary.
//
// The err will be set to io.EOF only when one of the following occurs:
// * Exactly 0 bytes are read and EOF is hit.
// * Exactly 1 block of zeros is read and EOF is hit.
// * At least 2 blocks of zeros are read.
func (tr *Reader) readHeader() *Header {
header := tr.hdrBuff[:]
copy(header, zeroBlock)
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
return nil
return nil // io.EOF is okay here
}
// Two blocks of zero bytes marks the end of the archive.
if bytes.Equal(header, zeroBlock[0:blockSize]) {
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
return nil
return nil // io.EOF is okay here
}
if bytes.Equal(header, zeroBlock[0:blockSize]) {
tr.err = io.EOF
......
......@@ -422,35 +422,6 @@ func TestPartialRead(t *testing.T) {
}
}
func TestNonSeekable(t *testing.T) {
test := gnuTarTest
f, err := os.Open(test.file)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
defer f.Close()
type readerOnly struct {
io.Reader
}
tr := NewReader(readerOnly{f})
nread := 0
for ; ; nread++ {
_, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
}
if nread != len(test.headers) {
t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread)
}
}
func TestParsePAXHeader(t *testing.T) {
paxTests := [][3]string{
{"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths
......@@ -803,3 +774,130 @@ func TestUninitializedRead(t *testing.T) {
}
}
type reader struct{ io.Reader }
type readSeeker struct{ io.ReadSeeker }
type readBadSeeker struct{ io.ReadSeeker }
func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") }
// TestReadTruncation test the ending condition on various truncated files and
// that truncated files are still detected even if the underlying io.Reader
// satisfies io.Seeker.
func TestReadTruncation(t *testing.T) {
var ss []string
for _, p := range []string{
"testdata/gnu.tar",
"testdata/ustar-file-reg.tar",
"testdata/pax-path-hdr.tar",
"testdata/sparse-formats.tar",
} {
buf, err := ioutil.ReadFile(p)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
ss = append(ss, string(buf))
}
data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3]
data2 += strings.Repeat("\x00", 10*512)
trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes
var vectors = []struct {
input string // Input stream
cnt int // Expected number of headers read
err error // Expected error outcome
}{
{"", 0, io.EOF}, // Empty file is a "valid" tar file
{data1[:511], 0, io.ErrUnexpectedEOF},
{data1[:512], 1, io.ErrUnexpectedEOF},
{data1[:1024], 1, io.EOF},
{data1[:1536], 2, io.ErrUnexpectedEOF},
{data1[:2048], 2, io.EOF},
{data1, 2, io.EOF},
{data1[:2048] + data2[:1536], 3, io.EOF},
{data2[:511], 0, io.ErrUnexpectedEOF},
{data2[:512], 1, io.ErrUnexpectedEOF},
{data2[:1195], 1, io.ErrUnexpectedEOF},
{data2[:1196], 1, io.EOF}, // Exact end of data and start of padding
{data2[:1200], 1, io.EOF},
{data2[:1535], 1, io.EOF},
{data2[:1536], 1, io.EOF}, // Exact end of padding
{data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF},
{data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF},
{data2[:1536] + trash, 1, ErrHeader},
{data2[:2048], 1, io.EOF}, // Exactly 1 empty block
{data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF},
{data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF},
{data2[:2048] + trash, 1, ErrHeader},
{data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream)
{data2[:2560] + trash[:1], 1, io.EOF},
{data2[:2560] + trash[:511], 1, io.EOF},
{data2[:2560] + trash, 1, io.EOF},
{data2[:3072], 1, io.EOF},
{pax, 0, io.EOF}, // PAX header without data is a "valid" tar file
{pax + trash[:1], 0, io.ErrUnexpectedEOF},
{pax + trash[:511], 0, io.ErrUnexpectedEOF},
{sparse[:511], 0, io.ErrUnexpectedEOF},
// TODO(dsnet): This should pass, but currently fails.
// {sparse[:512], 0, io.ErrUnexpectedEOF},
{sparse[:3584], 1, io.EOF},
{sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header
{sparse[:9216], 1, io.EOF},
{sparse[:9728], 2, io.ErrUnexpectedEOF},
{sparse[:10240], 2, io.EOF},
{sparse[:11264], 2, io.ErrUnexpectedEOF},
{sparse, 5, io.EOF},
{sparse + trash, 5, io.EOF},
}
for i, v := range vectors {
for j := 0; j < 6; j++ {
var tr *Reader
var s1, s2 string
switch j {
case 0:
tr = NewReader(&reader{strings.NewReader(v.input)})
s1, s2 = "io.Reader", "auto"
case 1:
tr = NewReader(&reader{strings.NewReader(v.input)})
s1, s2 = "io.Reader", "manual"
case 2:
tr = NewReader(&readSeeker{strings.NewReader(v.input)})
s1, s2 = "io.ReadSeeker", "auto"
case 3:
tr = NewReader(&readSeeker{strings.NewReader(v.input)})
s1, s2 = "io.ReadSeeker", "manual"
case 4:
tr = NewReader(&readBadSeeker{strings.NewReader(v.input)})
s1, s2 = "ReadBadSeeker", "auto"
case 5:
tr = NewReader(&readBadSeeker{strings.NewReader(v.input)})
s1, s2 = "ReadBadSeeker", "manual"
}
var cnt int
var err error
for {
if _, err = tr.Next(); err != nil {
break
}
cnt++
if s2 == "manual" {
if _, err = io.Copy(ioutil.Discard, tr); err != nil {
break
}
}
}
if err != v.err {
t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %v, want %v",
i, s1, s2, err, v.err)
}
if cnt != v.cnt {
t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %d headers, want %d headers",
i, s1, s2, cnt, v.cnt)
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment