Commit 0e36456b authored by Joe Tsai's avatar Joe Tsai Committed by Joe Tsai

archive/tar: fix and cleanup readOldGNUSparseMap

* Assert that the format is GNU.
Both GNU and STAR have some form of sparse file support with
incompatible header structures. Worse yet, both formats use the
'S' type flag to indicate the presence of a sparse file.
As such, we should check the format (based on magic numbers)
and fail early.

* Move realsize parsing logic into readOldGNUSparseMap.
This is related to the sparse parsing logic and belongs here.

* Fix the termination condition for parsing sparse fields.
The termination condition for reading the sparse fields
is to simply check if the first byte of the offset field is NULL.
This does not seem to be documented in the GNU manual, but this is
the check done by the both the GNU and BSD implementations:
	http://git.savannah.gnu.org/cgit/tar.git/tree/src/sparse.c?id=9a33077a7b7ad7d32815a21dee54eba63b38a81c#n731
	https://github.com/libarchive/libarchive/blob/1fa9c7bf90f0862036a99896b0501c381584451a/libarchive/archive_read_support_format_tar.c#L2207

* Fix the parsing of sparse fields to use parseNumeric.
This is what GNU and BSD do. The previous two links show that
GNU and BSD both handle base-256 and base-8.

* Detect truncated streams.
The call to io.ReadFull does not check if the error is io.EOF.
Getting io.EOF in this method is never okay and should always be
converted to io.ErrUnexpectedEOF.

* Simplify the function.
The logic is essentially a do-while loop so we can remove
some redundant code.

Change-Id: Ib2f601b1a283eaec1e41b1d3396d649c80749c4e
Reviewed-on: https://go-review.googlesource.com/28471Reviewed-by: 's avatarRuss Cox <rsc@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
parent 14204662
......@@ -208,13 +208,7 @@ func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[strin
var sp []sparseEntry
var err error
if hdr.Typeflag == TypeGNUSparse {
var p parser
hdr.Size = p.parseNumeric(rawHdr.GNU().RealSize())
if p.err != nil {
return p.err
}
sp, err = tr.readOldGNUSparseMap(rawHdr)
sp, err = tr.readOldGNUSparseMap(hdr, rawHdr)
if err != nil {
return err
}
......@@ -493,46 +487,56 @@ func (tr *Reader) readHeader() (*Header, *block, error) {
return hdr, &tr.blk, p.err
}
// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
// then one or more extension headers are used to store the rest of the sparse map.
func (tr *Reader) readOldGNUSparseMap(blk *block) ([]sparseEntry, error) {
// readOldGNUSparseMap reads the sparse map from the old GNU sparse format.
// The sparse map is stored in the tar header if it's small enough.
// If it's larger than four entries, then one or more extension headers are used
// to store the rest of the sparse map.
//
// The Header.Size does not reflect the size of any extended headers used.
// Thus, this function will read from the raw io.Reader to fetch extra headers.
// This method mutates blk in the process.
func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, error) {
// Make sure that the input format is GNU.
// Unfortunately, the STAR format also has a sparse header format that uses
// the same type flag but has a completely different layout.
if blk.GetFormat() != formatGNU {
return nil, ErrHeader
}
var p parser
hdr.Size = p.parseNumeric(blk.GNU().RealSize())
if p.err != nil {
return nil, p.err
}
var s sparseArray = blk.GNU().Sparse()
var sp = make([]sparseEntry, 0, s.MaxEntries())
for i := 0; i < s.MaxEntries(); i++ {
offset := p.parseOctal(s.Entry(i).Offset())
numBytes := p.parseOctal(s.Entry(i).NumBytes())
if p.err != nil {
return nil, p.err
}
if offset == 0 && numBytes == 0 {
break
}
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
}
for s.IsExtended()[0] > 0 {
// There are more entries. Read an extension header and parse its entries.
var blk block
if _, err := io.ReadFull(tr.r, blk[:]); err != nil {
return nil, err
}
s = blk.Sparse()
for {
for i := 0; i < s.MaxEntries(); i++ {
offset := p.parseOctal(s.Entry(i).Offset())
numBytes := p.parseOctal(s.Entry(i).NumBytes())
// This termination condition is identical to GNU and BSD tar.
if s.Entry(i).Offset()[0] == 0x00 {
break // Don't return, need to process extended headers (even if empty)
}
offset := p.parseNumeric(s.Entry(i).Offset())
numBytes := p.parseNumeric(s.Entry(i).NumBytes())
if p.err != nil {
return nil, p.err
}
if offset == 0 && numBytes == 0 {
break
}
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
}
if s.IsExtended()[0] > 0 {
// There are more entries. Read an extension header and parse its entries.
if _, err := io.ReadFull(tr.r, blk[:]); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return nil, err
}
s = blk.Sparse()
continue
}
return sp, nil // Done
}
return sp, nil
}
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format
......
......@@ -618,6 +618,64 @@ func TestSparseFileReader(t *testing.T) {
}
}
func TestReadOldGNUSparseMap(t *testing.T) {
const (
t00 = "00000000000\x0000000000000\x00"
t11 = "00000000001\x0000000000001\x00"
t12 = "00000000001\x0000000000002\x00"
t21 = "00000000002\x0000000000001\x00"
)
mkBlk := func(size, sp0, sp1, sp2, sp3, ext string, format int) *block {
var blk block
copy(blk.GNU().RealSize(), size)
copy(blk.GNU().Sparse().Entry(0), sp0)
copy(blk.GNU().Sparse().Entry(1), sp1)
copy(blk.GNU().Sparse().Entry(2), sp2)
copy(blk.GNU().Sparse().Entry(3), sp3)
copy(blk.GNU().Sparse().IsExtended(), ext)
if format != formatUnknown {
blk.SetFormat(format)
}
return &blk
}
vectors := []struct {
data string // Input data
rawHdr *block // Input raw header
want []sparseEntry // Expected sparse entries to be outputted
err error // Expected error to be returned
}{
{"", mkBlk("", "", "", "", "", "", formatUnknown), nil, ErrHeader},
{"", mkBlk("1234", "fewa", "", "", "", "", formatGNU), nil, ErrHeader},
{"", mkBlk("0031", "", "", "", "", "", formatGNU), nil, nil},
{"", mkBlk("1234", t00, t11, "", "", "", formatGNU),
[]sparseEntry{{0, 0}, {1, 1}}, nil},
{"", mkBlk("1234", t11, t12, t21, t11, "", formatGNU),
[]sparseEntry{{1, 1}, {1, 2}, {2, 1}, {1, 1}}, nil},
{"", mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU),
[]sparseEntry{}, io.ErrUnexpectedEOF},
{t11 + t11,
mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU),
[]sparseEntry{}, io.ErrUnexpectedEOF},
{t11 + t21 + strings.Repeat("\x00", 512),
mkBlk("1234", t11, t12, t21, t11, "\x80", formatGNU),
[]sparseEntry{{1, 1}, {1, 2}, {2, 1}, {1, 1}, {1, 1}, {2, 1}}, nil},
}
for i, v := range vectors {
tr := Reader{r: strings.NewReader(v.data)}
hdr := new(Header)
got, err := tr.readOldGNUSparseMap(hdr, v.rawHdr)
if !reflect.DeepEqual(got, v.want) && !(len(got) == 0 && len(v.want) == 0) {
t.Errorf("test %d, readOldGNUSparseMap(...): got %v, want %v", i, got, v.want)
}
if err != v.err {
t.Errorf("test %d, unexpected error: got %v, want %v", i, err, v.err)
}
}
}
func TestReadGNUSparseMap0x1(t *testing.T) {
const (
maxUint = ^uint(0)
......@@ -854,8 +912,7 @@ func TestReadTruncation(t *testing.T) {
{pax + trash[:1], 0, io.ErrUnexpectedEOF},
{pax + trash[:511], 0, io.ErrUnexpectedEOF},
{sparse[:511], 0, io.ErrUnexpectedEOF},
// TODO(dsnet): This should pass, but currently fails.
// {sparse[:512], 0, io.ErrUnexpectedEOF},
{sparse[:512], 0, io.ErrUnexpectedEOF},
{sparse[:3584], 1, io.EOF},
{sparse[:9200], 1, io.EOF}, // Terminate in padding of sparse header
{sparse[:9216], 1, io.EOF},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment