Commit e0ab505a authored by Joe Tsai's avatar Joe Tsai Committed by Joe Tsai

archive/tar: implement Writer support for sparse files

This CL is the second step (of two; part1 is CL/56771) for adding
sparse file support to the Writer.

There are no new identifiers exported in this CL, but this does make
use of Header.SparseHoles added in part1. If the Typeflag is set to
TypeGNUSparse or len(SparseHoles) > 0, then the Writer will emit an
sparse file, where the holes must be written by the user as zeros.

If TypeGNUSparse is set, then the output file must use the GNU format.
Otherwise, it must use the PAX format (with GNU-defined PAX keys).

A future CL may export Reader.Discard and Writer.FillZeros,
but those methods are currently unexported, and only used by the
tests for efficiency reasons.
Calling Discard or FillZeros on a hole 10GiB in size does take
time, even if it is essentially a memcopy.

Updates #13548

Change-Id: Id586d9178c227c0577f796f731ae2cbb72355601
Reviewed-on: https://go-review.googlesource.com/57212Reviewed-by: 's avatarIan Lance Taylor <iant@golang.org>
parent 645ecf5d
......@@ -33,6 +33,7 @@ var (
ErrWriteAfterClose = errors.New("tar: write after close")
errMissData = errors.New("tar: sparse file references non-existent data")
errUnrefData = errors.New("tar: sparse file contains unreferenced data")
errWriteHole = errors.New("tar: write non-NUL byte in sparse hole")
)
// Header type flags.
......@@ -74,10 +75,13 @@ type Header struct {
// SparseHoles represents a sequence of holes in a sparse file.
//
// The regions must be sorted in ascending order, not overlap with
// each other, and not extend past the specified Size.
// The file is sparse if either len(SparseHoles) > 0 or
// the Typeflag is set to TypeGNUSparse.
// A file is sparse if len(SparseHoles) > 0 or Typeflag is TypeGNUSparse.
// A sparse file consists of fragments of data, intermixed with holes
// (described by this field). A hole is semantically a block of NUL-bytes,
// but does not actually exist within the TAR file.
// The logical size of the file stored in the Size field, while
// the holes must be sorted in ascending order,
// not overlap with each other, and not extend past the specified Size.
SparseHoles []SparseEntry
}
......@@ -300,6 +304,20 @@ func (h *Header) allowedFormats() (format int, paxHdrs map[string]string) {
return formatUnknown, nil // Invalid PAX key
}
}
if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse {
if isHeaderOnlyType(h.Typeflag) {
return formatUnknown, nil // Cannot have sparse data on header-only file
}
if !validateSparseEntries(h.SparseHoles, h.Size) {
return formatUnknown, nil
}
if h.Typeflag == TypeGNUSparse {
format &= formatGNU // GNU only
} else {
format &^= formatGNU // No GNU
}
format &^= formatUSTAR // No USTAR
}
return format, paxHdrs
}
......
......@@ -7,20 +7,20 @@ package tar_test
import (
"archive/tar"
"bytes"
"crypto/md5"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"strings"
)
func Example() {
// Create a buffer to write our archive to.
buf := new(bytes.Buffer)
// Create a new tar archive.
// Create and add some files to the archive.
tw := tar.NewWriter(buf)
// Add some files to the archive.
var files = []struct {
Name, Body string
}{
......@@ -35,34 +35,29 @@ func Example() {
Size: int64(len(file.Body)),
}
if err := tw.WriteHeader(hdr); err != nil {
log.Fatalln(err)
log.Fatal(err)
}
if _, err := tw.Write([]byte(file.Body)); err != nil {
log.Fatalln(err)
log.Fatal(err)
}
}
// Make sure to check the error on Close.
if err := tw.Close(); err != nil {
log.Fatalln(err)
log.Fatal(err)
}
// Open the tar archive for reading.
r := bytes.NewReader(buf.Bytes())
tr := tar.NewReader(r)
// Iterate through the files in the archive.
// Open and iterate through the files in the archive.
tr := tar.NewReader(buf)
for {
hdr, err := tr.Next()
if err == io.EOF {
// end of tar archive
break
break // End of archive
}
if err != nil {
log.Fatalln(err)
log.Fatal(err)
}
fmt.Printf("Contents of %s:\n", hdr.Name)
if _, err := io.Copy(os.Stdout, tr); err != nil {
log.Fatalln(err)
log.Fatal(err)
}
fmt.Println()
}
......@@ -78,3 +73,86 @@ func Example() {
// Contents of todo.txt:
// Get animal handling license.
}
// A sparse file can efficiently represent a large file that is mostly empty.
func Example_sparse() {
buf := new(bytes.Buffer)
// Define a sparse file to add to the archive.
// This sparse files contains 5 data fragments, and 4 hole fragments.
// The logical size of the file is 16 KiB, while the physical size of the
// file is only 3 KiB (not counting the header data).
hdr := &tar.Header{
Name: "sparse.db",
Size: 16384,
SparseHoles: []tar.SparseEntry{
// Data fragment at 0..1023
{Offset: 1024, Length: 1024 - 512}, // Hole fragment at 1024..1535
// Data fragment at 1536..2047
{Offset: 2048, Length: 2048 - 512}, // Hole fragment at 2048..3583
// Data fragment at 3584..4095
{Offset: 4096, Length: 4096 - 512}, // Hole fragment at 4096..7679
// Data fragment at 7680..8191
{Offset: 8192, Length: 8192 - 512}, // Hole fragment at 8192..15871
// Data fragment at 15872..16383
},
}
// The regions marked as a sparse hole are filled with NUL-bytes.
// The total length of the body content must match the specified Size field.
body := "" +
strings.Repeat("A", 1024) +
strings.Repeat("\x00", 1024-512) +
strings.Repeat("B", 512) +
strings.Repeat("\x00", 2048-512) +
strings.Repeat("C", 512) +
strings.Repeat("\x00", 4096-512) +
strings.Repeat("D", 512) +
strings.Repeat("\x00", 8192-512) +
strings.Repeat("E", 512)
h := md5.Sum([]byte(body))
fmt.Printf("Write content of %s, Size: %d, MD5: %08x\n", hdr.Name, len(body), h)
fmt.Printf("Write SparseHoles of %s:\n\t%v\n\n", hdr.Name, hdr.SparseHoles)
// Create a new archive and write the sparse file.
tw := tar.NewWriter(buf)
if err := tw.WriteHeader(hdr); err != nil {
log.Fatal(err)
}
if _, err := tw.Write([]byte(body)); err != nil {
log.Fatal(err)
}
if err := tw.Close(); err != nil {
log.Fatal(err)
}
// Open and iterate through the files in the archive.
tr := tar.NewReader(buf)
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
log.Fatal(err)
}
body, err := ioutil.ReadAll(tr)
if err != nil {
log.Fatal(err)
}
h := md5.Sum([]byte(body))
fmt.Printf("Read content of %s, Size: %d, MD5: %08x\n", hdr.Name, len(body), h)
fmt.Printf("Read SparseHoles of %s:\n\t%v\n\n", hdr.Name, hdr.SparseHoles)
}
// Output:
// Write content of sparse.db, Size: 16384, MD5: 9b4e2cfae0f9303d30237718e891e9f9
// Write SparseHoles of sparse.db:
// [{1024 512} {2048 1536} {4096 3584} {8192 7680}]
//
// Read content of sparse.db, Size: 16384, MD5: 9b4e2cfae0f9303d30237718e891e9f9
// Read SparseHoles of sparse.db:
// [{1024 512} {2048 1536} {4096 3584} {8192 7680} {16384 0}]
}
......@@ -500,6 +500,46 @@ func TestReader(t *testing.T) {
Devmajor: 1,
Devminor: 1,
}},
}, {
// Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1.
file: "testdata/gnu-nil-sparse-data.tar",
headers: []*Header{{
Name: "sparse.db",
Typeflag: TypeGNUSparse,
Size: 1000,
ModTime: time.Unix(0, 0),
SparseHoles: []SparseEntry{{Offset: 1000, Length: 0}},
}},
}, {
// Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1.
file: "testdata/gnu-nil-sparse-hole.tar",
headers: []*Header{{
Name: "sparse.db",
Typeflag: TypeGNUSparse,
Size: 1000,
ModTime: time.Unix(0, 0),
SparseHoles: []SparseEntry{{Offset: 0, Length: 1000}},
}},
}, {
// Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1.
file: "testdata/pax-nil-sparse-data.tar",
headers: []*Header{{
Name: "sparse.db",
Typeflag: TypeReg,
Size: 1000,
ModTime: time.Unix(0, 0),
SparseHoles: []SparseEntry{{Offset: 1000, Length: 0}},
}},
}, {
// Generated by Go, works on BSD tar v3.1.2 and GNU tar v.1.27.1.
file: "testdata/pax-nil-sparse-hole.tar",
headers: []*Header{{
Name: "sparse.db",
Typeflag: TypeReg,
Size: 1000,
ModTime: time.Unix(0, 0),
SparseHoles: []SparseEntry{{Offset: 0, Length: 1000}},
}},
}}
for _, v := range vectors {
......@@ -1212,7 +1252,7 @@ func TestReadGNUSparsePAXHeaders(t *testing.T) {
func TestFileReader(t *testing.T) {
type (
testRead struct { // ReadN(cnt) == (wantStr, wantErr)
testRead struct { // Read(cnt) == (wantStr, wantErr)
cnt int
wantStr string
wantErr error
......@@ -1228,22 +1268,24 @@ func TestFileReader(t *testing.T) {
testFnc interface{} // testRead | testDiscard | testRemaining
)
makeReg := func(s string, n int) fileReader {
return &regFileReader{strings.NewReader(s), int64(n)}
}
makeSparse := func(fr fileReader, spd sparseDatas, size int64) fileReader {
if !validateSparseEntries(spd, size) {
t.Fatalf("invalid sparse map: %v", spd)
type (
makeReg struct {
str string
size int64
}
sph := invertSparseEntries(append([]SparseEntry{}, spd...), size)
return &sparseFileReader{fr, sph, 0}
}
makeSparse struct {
makeReg makeReg
spd sparseDatas
size int64
}
fileMaker interface{} // makeReg | makeSparse
)
vectors := []struct {
fr fileReader
maker fileMaker
tests []testFnc
}{{
fr: makeReg("", 0),
maker: makeReg{"", 0},
tests: []testFnc{
testRemaining{0},
testRead{0, "", io.EOF},
......@@ -1253,7 +1295,7 @@ func TestFileReader(t *testing.T) {
testRemaining{0},
},
}, {
fr: makeReg("", 1),
maker: makeReg{"", 1},
tests: []testFnc{
testRemaining{1},
testRead{0, "", io.ErrUnexpectedEOF},
......@@ -1263,14 +1305,14 @@ func TestFileReader(t *testing.T) {
testRemaining{1},
},
}, {
fr: makeReg("hello", 5),
maker: makeReg{"hello", 5},
tests: []testFnc{
testRemaining{5},
testRead{5, "hello", io.EOF},
testRemaining{0},
},
}, {
fr: makeReg("hello, world", 50),
maker: makeReg{"hello, world", 50},
tests: []testFnc{
testRemaining{50},
testDiscard{7, 7, nil},
......@@ -1282,7 +1324,7 @@ func TestFileReader(t *testing.T) {
testRemaining{38},
},
}, {
fr: makeReg("hello, world", 5),
maker: makeReg{"hello, world", 5},
tests: []testFnc{
testRemaining{5},
testRead{0, "", nil},
......@@ -1294,7 +1336,7 @@ func TestFileReader(t *testing.T) {
testRead{0, "", io.EOF},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 8),
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8},
tests: []testFnc{
testRemaining{8},
testRead{3, "ab\x00", nil},
......@@ -1302,92 +1344,92 @@ func TestFileReader(t *testing.T) {
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 8),
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 8},
tests: []testFnc{
testRemaining{8},
testDiscard{100, 8, io.EOF},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{0, 2}, {5, 3}}, 10),
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{0, 2}, {5, 3}}, 10},
tests: []testFnc{
testRemaining{10},
testRead{100, "ab\x00\x00\x00cde\x00\x00", io.EOF},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abc", 5), sparseDatas{{0, 2}, {5, 3}}, 10),
maker: makeSparse{makeReg{"abc", 5}, sparseDatas{{0, 2}, {5, 3}}, 10},
tests: []testFnc{
testRemaining{10},
testRead{100, "ab\x00\x00\x00c", io.ErrUnexpectedEOF},
testRemaining{4},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}}, 8),
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 8},
tests: []testFnc{
testRemaining{8},
testRead{8, "\x00abc\x00\x00de", io.EOF},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8),
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 0}, {6, 0}, {6, 2}}, 8},
tests: []testFnc{
testRemaining{8},
testRead{8, "\x00abc\x00\x00de", io.EOF},
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}}, 10),
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}}, 10},
tests: []testFnc{
testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10),
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 2}, {8, 0}, {8, 0}, {8, 0}, {8, 0}}, 10},
tests: []testFnc{
testRead{100, "\x00abc\x00\x00de\x00\x00", io.EOF},
},
}, {
fr: makeSparse(makeReg("", 0), sparseDatas{}, 2),
maker: makeSparse{makeReg{"", 0}, sparseDatas{}, 2},
tests: []testFnc{
testRead{100, "\x00\x00", io.EOF},
},
}, {
fr: makeSparse(makeReg("", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
maker: makeSparse{makeReg{"", 8}, sparseDatas{{1, 3}, {6, 5}}, 15},
tests: []testFnc{
testRead{100, "\x00", io.ErrUnexpectedEOF},
},
}, {
fr: makeSparse(makeReg("ab", 2), sparseDatas{{1, 3}, {6, 5}}, 15),
maker: makeSparse{makeReg{"ab", 2}, sparseDatas{{1, 3}, {6, 5}}, 15},
tests: []testFnc{
testRead{100, "\x00ab", errMissData},
},
}, {
fr: makeSparse(makeReg("ab", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
maker: makeSparse{makeReg{"ab", 8}, sparseDatas{{1, 3}, {6, 5}}, 15},
tests: []testFnc{
testRead{100, "\x00ab", io.ErrUnexpectedEOF},
},
}, {
fr: makeSparse(makeReg("abc", 3), sparseDatas{{1, 3}, {6, 5}}, 15),
maker: makeSparse{makeReg{"abc", 3}, sparseDatas{{1, 3}, {6, 5}}, 15},
tests: []testFnc{
testRead{100, "\x00abc\x00\x00", errMissData},
},
}, {
fr: makeSparse(makeReg("abc", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
maker: makeSparse{makeReg{"abc", 8}, sparseDatas{{1, 3}, {6, 5}}, 15},
tests: []testFnc{
testRead{100, "\x00abc\x00\x00", io.ErrUnexpectedEOF},
},
}, {
fr: makeSparse(makeReg("abcde", 5), sparseDatas{{1, 3}, {6, 5}}, 15),
maker: makeSparse{makeReg{"abcde", 5}, sparseDatas{{1, 3}, {6, 5}}, 15},
tests: []testFnc{
testRead{100, "\x00abc\x00\x00de", errMissData},
},
}, {
fr: makeSparse(makeReg("abcde", 8), sparseDatas{{1, 3}, {6, 5}}, 15),
maker: makeSparse{makeReg{"abcde", 8}, sparseDatas{{1, 3}, {6, 5}}, 15},
tests: []testFnc{
testRead{100, "\x00abc\x00\x00de", io.ErrUnexpectedEOF},
},
}, {
fr: makeSparse(makeReg("abcdefghEXTRA", 13), sparseDatas{{1, 3}, {6, 5}}, 15),
maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15},
tests: []testFnc{
testRemaining{15},
testRead{100, "\x00abc\x00\x00defgh\x00\x00\x00\x00", errUnrefData},
......@@ -1395,7 +1437,7 @@ func TestFileReader(t *testing.T) {
testRemaining{0},
},
}, {
fr: makeSparse(makeReg("abcdefghEXTRA", 13), sparseDatas{{1, 3}, {6, 5}}, 15),
maker: makeSparse{makeReg{"abcdefghEXTRA", 13}, sparseDatas{{1, 3}, {6, 5}}, 15},
tests: []testFnc{
testRemaining{15},
testDiscard{100, 15, errUnrefData},
......@@ -1405,21 +1447,38 @@ func TestFileReader(t *testing.T) {
}}
for i, v := range vectors {
var fr fileReader
switch maker := v.maker.(type) {
case makeReg:
r := strings.NewReader(maker.str)
fr = &regFileReader{r, maker.size}
case makeSparse:
if !validateSparseEntries(maker.spd, maker.size) {
t.Fatalf("invalid sparse map: %v", maker.spd)
}
sph := invertSparseEntries(maker.spd, maker.size)
r := strings.NewReader(maker.makeReg.str)
fr = &regFileReader{r, maker.makeReg.size}
fr = &sparseFileReader{fr, sph, 0}
default:
t.Fatalf("test %d, unknown make operation: %T", i, maker)
}
for j, tf := range v.tests {
switch tf := tf.(type) {
case testRead:
b := make([]byte, tf.cnt)
n, err := v.fr.Read(b)
n, err := fr.Read(b)
if got := string(b[:n]); got != tf.wantStr || err != tf.wantErr {
t.Errorf("test %d.%d, Read(%d):\ngot (%q, %v)\nwant (%q, %v)", i, j, tf.cnt, got, err, tf.wantStr, tf.wantErr)
}
case testDiscard:
got, err := v.fr.Discard(tf.cnt)
got, err := fr.Discard(tf.cnt)
if got != tf.wantCnt || err != tf.wantErr {
t.Errorf("test %d.%d, Discard(%d) = (%d, %v), want (%d, %v)", i, j, tf.cnt, got, err, tf.wantCnt, tf.wantErr)
}
case testRemaining:
got := v.fr.Remaining()
got := fr.Remaining()
if got != tf.wantCnt {
t.Errorf("test %d.%d, Remaining() = %d, want %d", i, j, got, tf.wantCnt)
}
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment