Commit 19a99594 authored by Joe Tsai's avatar Joe Tsai Committed by Joe Tsai

archive/tar: add raw support for global PAX records

The PAX specification says the following:
<<<
'g' represents global extended header records for the following files in the archive.
The format of these extended header records shall be as described in pax Extended Header.
Each value shall affect all subsequent files that do not override that value
in their own extended header record and until another global extended header record
is reached that provides another value for the same field.
>>>

This CL adds support for parsing and composing global PAX records,
but intentionally does not provide support for automatically
persisting the global state across files.

Changes made:
* When Reader encounters a TypeXGlobalRecord header, it parses the
PAX records and returns them to the user ad-verbatim. Reader does not
store them in its state, ensuring it has no effect on future Next calls.
* When Writer receives a TypeXGlobalRecord header, it writes the
PAX records to the archive ad-verbatim. It does not store them in
its state, ensuring it has no effect on future WriteHeader calls.
* The restriction regarding empty record values is lifted since this
value is used to represent deletion in global headers.

Why provide raw support only:
* Some archives in the wild have a global header section (often empty)
and it is the user's responsibility to manually read and discard it's body.
The logic added here allows users to more easily skip over these sections.
* For users that do care about global headers, having access to the raw
records allows them to implement the functionality of global headers themselves
and manually persist the global state across files.
* We can still upgrade to a full implementation in the future.

Why we don't provide full support:
* Even though the PAX specification describes their operation in detail,
both the GNU and BSD tar tools (which are the most common implementations)
do not have a consistent interpretation of many details.
* Global headers were a controversial feature in PAX, by admission of the
specification itself:
  <<<
  The concept of a global extended header (typeflag g) was controversial.

  The typeflag g global headers should not be used with interchange media that
  could suffer partial data loss in transporting the archive.
  >>>
* Having state persist from entry-to-entry complicates the implementation
for a feature that is not widely used and not well supported.

Change-Id: I1d904cacc2623ddcaa91525a5470b7dbe226c7e8
Reviewed-on: https://go-review.googlesource.com/59190Reviewed-by: 's avatarIan Lance Taylor <iant@golang.org>
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
parent 37b04c90
...@@ -13,6 +13,7 @@ import ( ...@@ -13,6 +13,7 @@ import (
"math" "math"
"os" "os"
"path" "path"
"reflect"
"strconv" "strconv"
"strings" "strings"
"time" "time"
...@@ -336,6 +337,9 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err ...@@ -336,6 +337,9 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err
paxHdrs[paxKey] = s paxHdrs[paxKey] = s
} }
} }
if v, ok := h.PAXRecords[paxKey]; ok && v == s {
paxHdrs[paxKey] = v
}
} }
verifyNumeric := func(n int64, size int, name, paxKey string) { verifyNumeric := func(n int64, size int, name, paxKey string) {
if !fitsInBase256(size, n) { if !fitsInBase256(size, n) {
...@@ -352,6 +356,9 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err ...@@ -352,6 +356,9 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err
paxHdrs[paxKey] = strconv.FormatInt(n, 10) paxHdrs[paxKey] = strconv.FormatInt(n, 10)
} }
} }
if v, ok := h.PAXRecords[paxKey]; ok && v == strconv.FormatInt(n, 10) {
paxHdrs[paxKey] = v
}
} }
verifyTime := func(ts time.Time, size int, name, paxKey string) { verifyTime := func(ts time.Time, size int, name, paxKey string) {
if ts.IsZero() { if ts.IsZero() {
...@@ -373,6 +380,9 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err ...@@ -373,6 +380,9 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err
paxHdrs[paxKey] = formatPAXTime(ts) paxHdrs[paxKey] = formatPAXTime(ts)
} }
} }
if v, ok := h.PAXRecords[paxKey]; ok && v == formatPAXTime(ts) {
paxHdrs[paxKey] = v
}
} }
// Check basic fields. // Check basic fields.
...@@ -396,6 +406,16 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err ...@@ -396,6 +406,16 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err
// Check for header-only types. // Check for header-only types.
var whyOnlyPAX, whyOnlyGNU string var whyOnlyPAX, whyOnlyGNU string
switch h.Typeflag {
case TypeXHeader, TypeGNULongName, TypeGNULongLink:
return FormatUnknown, nil, headerError{"cannot manually encode TypeXHeader, TypeGNULongName, or TypeGNULongLink headers"}
case TypeXGlobalHeader:
if !reflect.DeepEqual(h, &Header{Typeflag: h.Typeflag, Xattrs: h.Xattrs, PAXRecords: h.PAXRecords, Format: h.Format}) {
return FormatUnknown, nil, headerError{"only PAXRecords may be set for TypeXGlobalHeader"}
}
whyOnlyPAX = "only PAX supports TypeXGlobalHeader"
format.mayOnlyBe(FormatPAX)
}
if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 { if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 {
return FormatUnknown, nil, headerError{"negative size on header-only type"} return FormatUnknown, nil, headerError{"negative size on header-only type"}
} }
...@@ -410,19 +430,20 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err ...@@ -410,19 +430,20 @@ func (h *Header) allowedFormats() (format Format, paxHdrs map[string]string, err
} }
if len(h.PAXRecords) > 0 { if len(h.PAXRecords) > 0 {
for k, v := range h.PAXRecords { for k, v := range h.PAXRecords {
_, exists := paxHdrs[k] switch _, exists := paxHdrs[k]; {
ignore := exists || basicKeys[k] || strings.HasPrefix(k, paxGNUSparse) case exists:
if !ignore { continue // Do not overwrite existing records
paxHdrs[k] = v case h.Typeflag == TypeXGlobalHeader:
paxHdrs[k] = v // Copy all records
case !basicKeys[k] && !strings.HasPrefix(k, paxGNUSparse):
paxHdrs[k] = v // Ignore local records that may conflict
} }
} }
whyOnlyPAX = "only PAX supports PAXRecords" whyOnlyPAX = "only PAX supports PAXRecords"
format.mayOnlyBe(FormatPAX) format.mayOnlyBe(FormatPAX)
} }
for k, v := range paxHdrs { for k, v := range paxHdrs {
// Forbid empty values (which represent deletion) since usage of if !validPAXRecord(k, v) {
// them are non-sensible without global PAX record support.
if !validPAXRecord(k, v) || v == "" {
return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", k+" = "+v)} return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", k+" = "+v)}
} }
} }
......
...@@ -85,12 +85,21 @@ loop: ...@@ -85,12 +85,21 @@ loop:
// Check for PAX/GNU special headers and files. // Check for PAX/GNU special headers and files.
switch hdr.Typeflag { switch hdr.Typeflag {
case TypeXHeader: case TypeXHeader, TypeXGlobalHeader:
format.mayOnlyBe(FormatPAX) format.mayOnlyBe(FormatPAX)
paxHdrs, err = parsePAX(tr) paxHdrs, err = parsePAX(tr)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if hdr.Typeflag == TypeXGlobalHeader {
mergePAX(hdr, paxHdrs)
return &Header{
Typeflag: hdr.Typeflag,
Xattrs: hdr.Xattrs,
PAXRecords: hdr.PAXRecords,
Format: format,
}, nil
}
continue loop // This is a meta header affecting the next header continue loop // This is a meta header affecting the next header
case TypeGNULongName, TypeGNULongLink: case TypeGNULongName, TypeGNULongLink:
format.mayOnlyBe(FormatGNU) format.mayOnlyBe(FormatGNU)
...@@ -230,14 +239,13 @@ func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) { ...@@ -230,14 +239,13 @@ func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) {
} }
} }
// mergePAX merges well known headers according to PAX standard. // mergePAX merges paxHdrs into hdr for all relevant fields of Header.
// In general headers with the same name as those found func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) {
// in the header struct overwrite those found in the header for k, v := range paxHdrs {
// struct with higher precision or longer values. Esp. useful if v == "" {
// for name and linkname fields. continue // Keep the original USTAR value
func mergePAX(hdr *Header, headers map[string]string) (err error) { }
var id64 int64 var id64 int64
for k, v := range headers {
switch k { switch k {
case paxPath: case paxPath:
hdr.Name = v hdr.Name = v
...@@ -273,7 +281,7 @@ func mergePAX(hdr *Header, headers map[string]string) (err error) { ...@@ -273,7 +281,7 @@ func mergePAX(hdr *Header, headers map[string]string) (err error) {
return ErrHeader return ErrHeader
} }
} }
hdr.PAXRecords = headers hdr.PAXRecords = paxHdrs
return nil return nil
} }
...@@ -309,13 +317,7 @@ func parsePAX(r io.Reader) (map[string]string, error) { ...@@ -309,13 +317,7 @@ func parsePAX(r io.Reader) (map[string]string, error) {
} }
sparseMap = append(sparseMap, value) sparseMap = append(sparseMap, value)
default: default:
// According to PAX specification, a value is stored only if it is
// non-empty. Otherwise, the key is deleted.
if len(value) > 0 {
paxHdrs[key] = value paxHdrs[key] = value
} else {
delete(paxHdrs, key)
}
} }
} }
if len(sparseMap) > 0 { if len(sparseMap) > 0 {
......
...@@ -348,6 +348,39 @@ func TestReader(t *testing.T) { ...@@ -348,6 +348,39 @@ func TestReader(t *testing.T) {
}, },
Format: FormatPAX, Format: FormatPAX,
}}, }},
}, {
file: "testdata/pax-global-records.tar",
headers: []*Header{{
Typeflag: TypeXGlobalHeader,
PAXRecords: map[string]string{"path": "global1", "mtime": "1500000000.0"},
Format: FormatPAX,
}, {
Typeflag: TypeReg,
Name: "file1",
ModTime: time.Unix(0, 0),
Format: FormatUSTAR,
}, {
Typeflag: TypeReg,
Name: "file2",
PAXRecords: map[string]string{"path": "file2"},
ModTime: time.Unix(0, 0),
Format: FormatPAX,
}, {
Typeflag: TypeXGlobalHeader,
PAXRecords: map[string]string{"path": ""},
Format: FormatPAX,
}, {
Typeflag: TypeReg,
Name: "file3",
ModTime: time.Unix(0, 0),
Format: FormatUSTAR,
}, {
Typeflag: TypeReg,
Name: "file4",
ModTime: time.Unix(1400000000, 0),
PAXRecords: map[string]string{"mtime": "1400000000"},
Format: FormatPAX,
}},
}, { }, {
file: "testdata/nil-uid.tar", // golang.org/issue/5290 file: "testdata/nil-uid.tar", // golang.org/issue/5290
headers: []*Header{{ headers: []*Header{{
...@@ -965,12 +998,18 @@ func TestMergePAX(t *testing.T) { ...@@ -965,12 +998,18 @@ func TestMergePAX(t *testing.T) {
Name: "a/b/c", Name: "a/b/c",
Uid: 1000, Uid: 1000,
ModTime: time.Unix(1350244992, 23960108), ModTime: time.Unix(1350244992, 23960108),
PAXRecords: map[string]string{
"path": "a/b/c",
"uid": "1000",
"mtime": "1350244992.023960108",
},
}, },
ok: true, ok: true,
}, { }, {
in: map[string]string{ in: map[string]string{
"gid": "gtgergergersagersgers", "gid": "gtgergergersagersgers",
}, },
ok: false,
}, { }, {
in: map[string]string{ in: map[string]string{
"missing": "missing", "missing": "missing",
...@@ -978,6 +1017,10 @@ func TestMergePAX(t *testing.T) { ...@@ -978,6 +1017,10 @@ func TestMergePAX(t *testing.T) {
}, },
want: &Header{ want: &Header{
Xattrs: map[string]string{"key": "value"}, Xattrs: map[string]string{"key": "value"},
PAXRecords: map[string]string{
"missing": "missing",
"SCHILY.xattr.key": "value",
},
}, },
ok: true, ok: true,
}} }}
...@@ -985,8 +1028,6 @@ func TestMergePAX(t *testing.T) { ...@@ -985,8 +1028,6 @@ func TestMergePAX(t *testing.T) {
for i, v := range vectors { for i, v := range vectors {
got := new(Header) got := new(Header)
err := mergePAX(got, v.in) err := mergePAX(got, v.in)
// TODO(dsnet): Test more combinations with global record support.
got.PAXRecords = nil
if v.ok && !reflect.DeepEqual(*got, *v.want) { if v.ok && !reflect.DeepEqual(*got, *v.want) {
t.Errorf("test %d, mergePAX(...):\ngot %+v\nwant %+v", i, *got, *v.want) t.Errorf("test %d, mergePAX(...):\ngot %+v\nwant %+v", i, *got, *v.want)
} }
...@@ -1012,7 +1053,7 @@ func TestParsePAX(t *testing.T) { ...@@ -1012,7 +1053,7 @@ func TestParsePAX(t *testing.T) {
{"13 key1=haha\n13 key2=nana\n13 key3=kaka\n", {"13 key1=haha\n13 key2=nana\n13 key3=kaka\n",
map[string]string{"key1": "haha", "key2": "nana", "key3": "kaka"}, true}, map[string]string{"key1": "haha", "key2": "nana", "key3": "kaka"}, true},
{"13 key1=val1\n13 key2=val2\n8 key1=\n", {"13 key1=val1\n13 key2=val2\n8 key1=\n",
map[string]string{"key2": "val2"}, true}, map[string]string{"key1": "", "key2": "val2"}, true},
{"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=2\n" + {"22 GNU.sparse.size=10\n26 GNU.sparse.numblocks=2\n" +
"23 GNU.sparse.offset=1\n25 GNU.sparse.numbytes=2\n" + "23 GNU.sparse.offset=1\n25 GNU.sparse.numbytes=2\n" +
"23 GNU.sparse.offset=3\n25 GNU.sparse.numbytes=4\n", "23 GNU.sparse.offset=3\n25 GNU.sparse.numbytes=4\n",
...@@ -1029,10 +1070,10 @@ func TestParsePAX(t *testing.T) { ...@@ -1029,10 +1070,10 @@ func TestParsePAX(t *testing.T) {
r := strings.NewReader(v.in) r := strings.NewReader(v.in)
got, err := parsePAX(r) got, err := parsePAX(r)
if !reflect.DeepEqual(got, v.want) && !(len(got) == 0 && len(v.want) == 0) { if !reflect.DeepEqual(got, v.want) && !(len(got) == 0 && len(v.want) == 0) {
t.Errorf("test %d, parsePAX(...):\ngot %v\nwant %v", i, got, v.want) t.Errorf("test %d, parsePAX():\ngot %v\nwant %v", i, got, v.want)
} }
if ok := err == nil; ok != v.ok { if ok := err == nil; ok != v.ok {
t.Errorf("test %d, parsePAX(...): got %v, want %v", i, ok, v.ok) t.Errorf("test %d, parsePAX(): got %v, want %v", i, ok, v.ok)
} }
} }
} }
......
...@@ -559,7 +559,8 @@ func TestHeaderAllowedFormats(t *testing.T) { ...@@ -559,7 +559,8 @@ func TestHeaderAllowedFormats(t *testing.T) {
formats: FormatUnknown, formats: FormatUnknown,
}, { }, {
header: &Header{Xattrs: map[string]string{"foo": ""}}, header: &Header{Xattrs: map[string]string{"foo": ""}},
formats: FormatUnknown, paxHdrs: map[string]string{paxSchilyXattr + "foo": ""},
formats: FormatPAX,
}, { }, {
header: &Header{ModTime: time.Unix(0, 0)}, header: &Header{ModTime: time.Unix(0, 0)},
formats: FormatUSTAR | FormatPAX | FormatGNU, formats: FormatUSTAR | FormatPAX | FormatGNU,
......
...@@ -141,7 +141,8 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { ...@@ -141,7 +141,8 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error {
} }
// Write PAX records to the output. // Write PAX records to the output.
if len(paxHdrs) > 0 { isGlobal := hdr.Typeflag == TypeXGlobalHeader
if len(paxHdrs) > 0 || isGlobal {
// Sort keys for deterministic ordering. // Sort keys for deterministic ordering.
var keys []string var keys []string
for k := range paxHdrs { for k := range paxHdrs {
...@@ -160,11 +161,19 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { ...@@ -160,11 +161,19 @@ func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error {
} }
// Write the extended header file. // Write the extended header file.
var name string
var flag byte
if isGlobal {
name = "GlobalHead.0.0"
flag = TypeXGlobalHeader
} else {
dir, file := path.Split(realName) dir, file := path.Split(realName)
name := path.Join(dir, "PaxHeaders.0", file) name = path.Join(dir, "PaxHeaders.0", file)
flag = TypeXHeader
}
data := buf.String() data := buf.String()
if err := tw.writeRawFile(name, data, TypeXHeader, FormatPAX); err != nil { if err := tw.writeRawFile(name, data, flag, FormatPAX); err != nil || isGlobal {
return err return err // Global headers return here
} }
} }
......
...@@ -247,6 +247,66 @@ func TestWriter(t *testing.T) { ...@@ -247,6 +247,66 @@ func TestWriter(t *testing.T) {
}, nil}, }, nil},
testClose{nil}, testClose{nil},
}, },
}, {
// Craft a theoretically valid PAX archive with global headers.
// The GNU and BSD tar tools do not parse these the same way.
//
// BSD tar v3.1.2 parses and ignores all global headers;
// the behavior is verified by researching the source code.
//
// $ bsdtar -tvf pax-global-records.tar
// ---------- 0 0 0 0 Dec 31 1969 file1
// ---------- 0 0 0 0 Dec 31 1969 file2
// ---------- 0 0 0 0 Dec 31 1969 file3
// ---------- 0 0 0 0 May 13 2014 file4
//
// GNU tar v1.27.1 applies global headers to subsequent records,
// but does not do the following properly:
// * It does not treat an empty record as deletion.
// * It does not use subsequent global headers to update previous ones.
//
// $ gnutar -tvf pax-global-records.tar
// ---------- 0/0 0 2017-07-13 19:40 global1
// ---------- 0/0 0 2017-07-13 19:40 file2
// gnutar: Substituting `.' for empty member name
// ---------- 0/0 0 1969-12-31 16:00
// gnutar: Substituting `.' for empty member name
// ---------- 0/0 0 2014-05-13 09:53
//
// According to the PAX specification, this should have been the result:
// ---------- 0/0 0 2017-07-13 19:40 global1
// ---------- 0/0 0 2017-07-13 19:40 file2
// ---------- 0/0 0 2017-07-13 19:40 file3
// ---------- 0/0 0 2014-05-13 09:53 file4
file: "testdata/pax-global-records.tar",
tests: []testFnc{
testHeader{Header{
Typeflag: TypeXGlobalHeader,
PAXRecords: map[string]string{"path": "global1", "mtime": "1500000000.0"},
}, nil},
testHeader{Header{
Typeflag: TypeReg, Name: "file1",
}, nil},
testHeader{Header{
Typeflag: TypeReg,
Name: "file2",
PAXRecords: map[string]string{"path": "file2"},
}, nil},
testHeader{Header{
Typeflag: TypeXGlobalHeader,
PAXRecords: map[string]string{"path": ""}, // Should delete "path", but keep "mtime"
}, nil},
testHeader{Header{
Typeflag: TypeReg, Name: "file3",
}, nil},
testHeader{Header{
Typeflag: TypeReg,
Name: "file4",
ModTime: time.Unix(1400000000, 0),
PAXRecords: map[string]string{"mtime": "1400000000"},
}, nil},
testClose{nil},
},
}, { }, {
file: "testdata/gnu-utf8.tar", file: "testdata/gnu-utf8.tar",
tests: []testFnc{ tests: []testFnc{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment