archive/tar: detect truncated files

Motivation: * Reader.skipUnread never reports io.ErrUnexpectedEOF. This is strange given that io.ErrUnexpectedEOF is given through Reader.Read if the user manually reads the file. * Reader.skipUnread fails to detect truncated files since io.Seeker is lazy about reporting errors. Thus, the behavior of Reader differs whether the input io.Reader also satisfies io.Seeker or not. To solve this, we seek to one before the end of the data section and always rely on at least one call to io.CopyN. If the tr.r satisfies io.Seeker, this is guarunteed to never read more than blockSize. Fixes #12557 Change-Id: I0ddddfc6bed0d74465cb7e7a02b26f1de7a7a279 Reviewed-on: https://go-review.googlesource.com/15175Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>

archive/tar: detect truncated files
Motivation: * Reader.skipUnread never reports io.ErrUnexpectedEOF. This is strange given that io.ErrUnexpectedEOF is given through Reader.Read if the user manually reads the file. * Reader.skipUnread fails to detect truncated files since io.Seeker is lazy about reporting errors. Thus, the behavior of Reader differs whether the input io.Reader also satisfies io.Seeker or not. To solve this, we seek to one before the end of the data section and always rely on at least one call to io.CopyN. If the tr.r satisfies io.Seeker, this is guarunteed to never read more than blockSize. Fixes #12557 Change-Id: I0ddddfc6bed0d74465cb7e7a02b26f1de7a7a279 Reviewed-on: https://go-review.googlesource.com/15175Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
e3b615fd · Joe Tsai · Ian Lance Taylor · 6083bd65 · e3b615fd · e3b615fd
Commit e3b615fd authored Oct 01, 2015 by Joe Tsai Committed by Ian Lance Taylor Nov 06, 2015
4 changed files
--- a/src/archive/tar/reader.go
+++ b/src/archive/tar/reader.go
@@ -446,16 +446,45 @@ func (tr *Reader) octal(b []byte) int64 {
 	return int64(x)
 }

-// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
-func (tr *Reader) skipUnread() {
-	nr := tr.numBytes() + tr.pad // number of bytes to skip
+// skipUnread skips any unread bytes in the existing file entry, as well as any
+// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is
+// encountered in the data portion; it is okay to hit io.EOF in the padding.
+//
+// Note that this function still works properly even when sparse files are being
+// used since numBytes returns the bytes remaining in the underlying io.Reader.
+func (tr *Reader) skipUnread() error {
+	dataSkip := tr.numBytes()      // Number of data bytes to skip
+	totalSkip := dataSkip + tr.pad // Total number of bytes to skip
 	tr.curr, tr.pad = nil, 0
-	if sr, ok := tr.r.(io.Seeker); ok {
-		if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
-			return
+
+	// If possible, Seek to the last byte before the end of the data section.
+	// Do this because Seek is often lazy about reporting errors; this will mask
+	// the fact that the tar stream may be truncated. We can rely on the
+	// io.CopyN done shortly afterwards to trigger any IO errors.
+	var seekSkipped int64 // Number of bytes skipped via Seek
+	if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 {
+		// Not all io.Seeker can actually Seek. For example, os.Stdin implements
+		// io.Seeker, but calling Seek always returns an error and performs
+		// no action. Thus, we try an innocent seek to the current position
+		// to see if Seek is really supported.
+		pos1, err := sr.Seek(0, os.SEEK_CUR)
+		if err == nil {
+			// Seek seems supported, so perform the real Seek.
+			pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR)
+			if err != nil {
+				tr.err = err
+				return tr.err
+			}
+			seekSkipped = pos2 - pos1
 		}
 	}
-	_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
+
+	var copySkipped int64 // Number of bytes skipped via CopyN
+	copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped)
+	if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip {
+		tr.err = io.ErrUnexpectedEOF
+	}
+	return tr.err
 }

 func (tr *Reader) verifyChecksum(header []byte) bool {
@@ -468,18 +497,25 @@ func (tr *Reader) verifyChecksum(header []byte) bool {
 	return given == unsigned || given == signed
 }

+// readHeader reads the next block header and assumes that the underlying reader
+// is already aligned to a block boundary.
+//
+// The err will be set to io.EOF only when one of the following occurs:
+//	* Exactly 0 bytes are read and EOF is hit.
+//	* Exactly 1 block of zeros is read and EOF is hit.
+//	* At least 2 blocks of zeros are read.
 func (tr *Reader) readHeader() *Header {
 	header := tr.hdrBuff[:]
 	copy(header, zeroBlock)

 	if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
-		return nil
+		return nil // io.EOF is okay here
 	}

 	// Two blocks of zero bytes marks the end of the archive.
 	if bytes.Equal(header, zeroBlock[0:blockSize]) {
 		if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
-			return nil
+			return nil // io.EOF is okay here
 		}
 		if bytes.Equal(header, zeroBlock[0:blockSize]) {
 			tr.err = io.EOF

--- a/src/archive/tar/reader_test.go
+++ b/src/archive/tar/reader_test.go
@@ -422,35 +422,6 @@ func TestPartialRead(t *testing.T) {
 	}
 }

-func TestNonSeekable(t *testing.T) {
-	test := gnuTarTest
-	f, err := os.Open(test.file)
-	if err != nil {
-		t.Fatalf("Unexpected error: %v", err)
-	}
-	defer f.Close()
-
-	type readerOnly struct {
-		io.Reader
-	}
-	tr := NewReader(readerOnly{f})
-	nread := 0
-
-	for ; ; nread++ {
-		_, err := tr.Next()
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			t.Fatalf("Unexpected error: %v", err)
-		}
-	}
-
-	if nread != len(test.headers) {
-		t.Errorf("Didn't process all files\nexpected: %d\nprocessed %d\n", len(test.headers), nread)
-	}
-}
-
 func TestParsePAXHeader(t *testing.T) {
 	paxTests := [][3]string{
 		{"a", "a=name", "10 a=name\n"}, // Test case involving multiple acceptable lengths
@@ -803,3 +774,130 @@ func TestUninitializedRead(t *testing.T) {
 	}

 }
+
+type reader struct{ io.Reader }
+type readSeeker struct{ io.ReadSeeker }
+type readBadSeeker struct{ io.ReadSeeker }
+
+func (rbs *readBadSeeker) Seek(int64, int) (int64, error) { return 0, fmt.Errorf("illegal seek") }
+
+// TestReadTruncation test the ending condition on various truncated files and
+// that truncated files are still detected even if the underlying io.Reader
+// satisfies io.Seeker.
+func TestReadTruncation(t *testing.T) {
+	var ss []string
+	for _, p := range []string{
+		"testdata/gnu.tar",
+		"testdata/ustar-file-reg.tar",
+		"testdata/pax-path-hdr.tar",
+		"testdata/sparse-formats.tar",
+	} {
+		buf, err := ioutil.ReadFile(p)
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		ss = append(ss, string(buf))
+	}
+
+	data1, data2, pax, sparse := ss[0], ss[1], ss[2], ss[3]
+	data2 += strings.Repeat("\x00", 10*512)
+	trash := strings.Repeat("garbage ", 64) // Exactly 512 bytes
+
+	var vectors = []struct {
+		input string // Input stream
+		cnt   int    // Expected number of headers read
+		err   error  // Expected error outcome
+	}{
+		{"", 0, io.EOF}, // Empty file is a "valid" tar file
+		{data1[:511], 0, io.ErrUnexpectedEOF},
+		{data1[:512], 1, io.ErrUnexpectedEOF},
+		{data1[:1024], 1, io.EOF},
+		{data1[:1536], 2, io.ErrUnexpectedEOF},
+		{data1[:2048], 2, io.EOF},
+		{data1, 2, io.EOF},
+		{data1[:2048] + data2[:1536], 3, io.EOF},
+		{data2[:511], 0, io.ErrUnexpectedEOF},
+		{data2[:512], 1, io.ErrUnexpectedEOF},
+		{data2[:1195], 1, io.ErrUnexpectedEOF},
+		{data2[:1196], 1, io.EOF}, // Exact end of data and start of padding
+		{data2[:1200], 1, io.EOF},
+		{data2[:1535], 1, io.EOF},
+		{data2[:1536], 1, io.EOF}, // Exact end of padding
+		{data2[:1536] + trash[:1], 1, io.ErrUnexpectedEOF},
+		{data2[:1536] + trash[:511], 1, io.ErrUnexpectedEOF},
+		{data2[:1536] + trash, 1, ErrHeader},
+		{data2[:2048], 1, io.EOF}, // Exactly 1 empty block
+		{data2[:2048] + trash[:1], 1, io.ErrUnexpectedEOF},
+		{data2[:2048] + trash[:511], 1, io.ErrUnexpectedEOF},
+		{data2[:2048] + trash, 1, ErrHeader},
+		{data2[:2560], 1, io.EOF}, // Exactly 2 empty blocks (normal end-of-stream)
+		{data2[:2560] + trash[:1], 1, io.EOF},
+		{data2[:2560] + trash[:511], 1, io.EOF},
+		{data2[:2560] + trash, 1, io.EOF},
+		{data2[:3072], 1, io.EOF},
+		{pax, 0, io.EOF}, // PAX header without data is a "valid" tar file
+		{pax + trash[:1], 0, io.ErrUnexpectedEOF},
+		{pax + trash[:511], 0, io.ErrUnexpectedEOF},
+		{sparse[:511], 0, io.ErrUnexpectedEOF},
+		// TODO(dsnet): This should pass, but currently fails.
+		// {sparse[:512], 0, io.ErrUnexpectedEOF},
+		{sparse[:3584], 1, io.EOF},
+		{sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header
+		{sparse[:9216], 1, io.EOF},
+		{sparse[:9728], 2, io.ErrUnexpectedEOF},
+		{sparse[:10240], 2, io.EOF},
+		{sparse[:11264], 2, io.ErrUnexpectedEOF},
+		{sparse, 5, io.EOF},
+		{sparse + trash, 5, io.EOF},
+	}
+
+	for i, v := range vectors {
+		for j := 0; j < 6; j++ {
+			var tr *Reader
+			var s1, s2 string
+
+			switch j {
+			case 0:
+				tr = NewReader(&reader{strings.NewReader(v.input)})
+				s1, s2 = "io.Reader", "auto"
+			case 1:
+				tr = NewReader(&reader{strings.NewReader(v.input)})
+				s1, s2 = "io.Reader", "manual"
+			case 2:
+				tr = NewReader(&readSeeker{strings.NewReader(v.input)})
+				s1, s2 = "io.ReadSeeker", "auto"
+			case 3:
+				tr = NewReader(&readSeeker{strings.NewReader(v.input)})
+				s1, s2 = "io.ReadSeeker", "manual"
+			case 4:
+				tr = NewReader(&readBadSeeker{strings.NewReader(v.input)})
+				s1, s2 = "ReadBadSeeker", "auto"
+			case 5:
+				tr = NewReader(&readBadSeeker{strings.NewReader(v.input)})
+				s1, s2 = "ReadBadSeeker", "manual"
+			}
+
+			var cnt int
+			var err error
+			for {
+				if _, err = tr.Next(); err != nil {
+					break
+				}
+				cnt++
+				if s2 == "manual" {
+					if _, err = io.Copy(ioutil.Discard, tr); err != nil {
+						break
+					}
+				}
+			}
+			if err != v.err {
+				t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %v, want %v",
+					i, s1, s2, err, v.err)
+			}
+			if cnt != v.cnt {
+				t.Errorf("test %d, NewReader(%s(...)) with %s discard: got %d headers, want %d headers",
+					i, s1, s2, cnt, v.cnt)
+			}
+		}
+	}
+}
--- a/src/archive/tar/testdata/pax-path-hdr.tar
+++ b/src/archive/tar/testdata/pax-path-hdr.tar
--- a/src/archive/tar/testdata/ustar-file-reg.tar
+++ b/src/archive/tar/testdata/ustar-file-reg.tar