Commit d6d0a390 authored by David Symonds's avatar David Symonds

Basic POSIX-compatible tar writer.

R=rsc
APPROVED=rsc
DELTA=456  (382 added, 66 deleted, 8 changed)
OCL=31246
CL=31372
parent 903d28bd
......@@ -2,6 +2,7 @@
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
# DO NOT EDIT. Automatically generated by gobuild.
# gobuild -m >Makefile
......@@ -20,7 +21,7 @@ test: packages
coverage: packages
gotest
6cov -g `pwd` | grep -v '_test\.go:'
6cov -g $$(pwd) | grep -v '_test\.go:'
%.$O: %.go
$(GC) -I_obj $*.go
......@@ -32,16 +33,24 @@ coverage: packages
$(AS) $*.s
O1=\
common.$O\
O2=\
untar.$O\
writer.$O\
phases: a1
phases: a1 a2
_obj$D/tar.a: phases
a1: $(O1)
$(AR) grc _obj$D/tar.a untar.$O
$(AR) grc _obj$D/tar.a common.$O
rm -f $(O1)
a2: $(O2)
$(AR) grc _obj$D/tar.a untar.$O writer.$O
rm -f $(O2)
newpkg: clean
mkdir -p _obj$D
......@@ -49,6 +58,7 @@ newpkg: clean
$(O1): newpkg
$(O2): a1
$(O3): a2
nuke: clean
rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/tar.a
......
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The tar package implements access to tar archives.
// It aims to cover most of the variations, including those produced
// by GNU and BSD tars.
//
// References:
// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
// http://www.gnu.org/software/tar/manual/html_node/Standard.html
package tar
const (
blockSize = 512;
// Types
TypeReg = '0';
TypeRegA = '\x00';
TypeLink = '1';
TypeSymlink = '2';
TypeChar = '3';
TypeBlock = '4';
TypeDir = '5';
TypeFifo = '6';
TypeCont = '7';
TypeXHeader = 'x';
TypeXGlobalHeader = 'g';
)
// A Header represents a single header in a tar archive.
// Some fields may not be populated.
type Header struct {
Name string;
Mode int64;
Uid int64;
Gid int64;
Size int64;
Mtime int64;
Typeflag byte;
Linkname string;
Uname string;
Gname string;
Devmajor int64;
Devminor int64;
Atime int64;
Ctime int64;
}
var zeroBlock = make([]byte, blockSize);
// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values.
// We compute and return both.
func checksum(header []byte) (unsigned int64, signed int64) {
for i := 0; i < len(header); i++ {
if i == 148 {
// The chksum field (header[148:156]) is special: it should be treated as space bytes.
unsigned += ' ' * 8;
signed += ' ' * 8;
i += 7;
continue
}
unsigned += int64(header[i]);
signed += int64(int8(header[i]));
}
return
}
type slicer []byte
func (sp *slicer) next(n int) (b []byte) {
s := *sp;
b, *sp = s[0:n], s[n:len(s)];
return
}
......@@ -2,20 +2,14 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The tar package implements access to tar archives.
// It aims to cover most of the variations, including those produced
// by GNU and BSD tars.
//
// References:
// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
// http://www.gnu.org/software/tar/manual/html_node/Standard.html
package tar
// TODO(dsymonds):
// - pax extensions
// - rename this file to reader.go
import (
"bufio";
"archive/tar";
"bytes";
"io";
"os";
......@@ -51,25 +45,6 @@ type Reader struct {
pad int64; // amount of padding (ignored) after current file entry
}
// A Header represents a single header in a tar archive.
// Only some fields may be populated.
type Header struct {
Name string;
Mode int64;
Uid int64;
Gid int64;
Size int64;
Mtime int64;
Typeflag byte;
Linkname string;
Uname string;
Gname string;
Devmajor int64;
Devminor int64;
Atime int64;
Ctime int64;
}
func (tr *Reader) skipUnread()
func (tr *Reader) readHeader() *Header
......@@ -90,25 +65,6 @@ func (tr *Reader) Next() (*Header, os.Error) {
return hdr, tr.err
}
const (
blockSize = 512;
// Types
TypeReg = '0';
TypeRegA = '\x00';
TypeLink = '1';
TypeSymlink = '2';
TypeChar = '3';
TypeBlock = '4';
TypeDir = '5';
TypeFifo = '6';
TypeCont = '7';
TypeXHeader = 'x';
TypeXGlobalHeader = 'g';
)
var zeroBlock = make([]byte, blockSize);
// Parse bytes as a NUL-terminated C-style string.
// If a NUL byte is not found then the whole slice is returned as a string.
func cString(b []byte) string {
......@@ -153,36 +109,15 @@ func (tr *Reader) skipUnread() {
}
func (tr *Reader) verifyChecksum(header []byte) bool {
given := tr.octal(header[148:156]);
if tr.err != nil {
return false
}
// POSIX specifies a sum of the unsigned byte values,
// but the Sun tar uses signed byte values. :-(
var unsigned, signed int64;
for i := 0; i < len(header); i++ {
if i == 148 {
// The chksum field is special: it should be treated as space bytes.
unsigned += ' ' * 8;
signed += ' ' * 8;
i += 7;
continue
}
unsigned += int64(header[i]);
signed += int64(int8(header[i]));
}
given := tr.octal(header[148:156]);
unsigned, signed := checksum(header);
return given == unsigned || given == signed
}
type slicer []byte
func (sp *slicer) next(n int) (b []byte) {
s := *sp;
b, *sp = s[0:n], s[n:len(s)];
return
}
func (tr *Reader) readHeader() *Header {
header := make([]byte, blockSize);
var n int;
......
......@@ -102,9 +102,9 @@ var untarTests = []*untarTest{
},
},
},
};
}
func TestAll(t *testing.T) {
func TestReader(t *testing.T) {
testLoop:
for i, test := range untarTests {
f, err := os.Open(test.file, os.O_RDONLY, 0444);
......
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tar
// TODO(dsymonds):
// - catch more errors (no first header, write after close, etc.)
import (
"archive/tar";
"bytes";
"io";
"os";
"strconv";
"strings";
)
var (
ErrWriteTooLong os.Error = os.ErrorString("write too long");
// TODO(dsymonds): remove ErrIntFieldTooBig after we implement binary extension.
ErrIntFieldTooBig os.Error = os.ErrorString("an integer header field was too big");
)
// A Writer provides sequential writing of a tar archive in POSIX.1 format.
// A tar archive consists of a sequence of files.
// Call WriteHeader to begin a new file, and then call Write to supply that file's data,
// writing at most hdr.Size bytes in total.
//
// Example:
// tw := NewTarWriter(w);
// hdr := new(Header);
// hdr.Size = length of data in bytes;
// // populate other hdr fields as desired
// if err := tw.WriteHeader(hdr); err != nil {
// // handle error
// }
// io.Copy(data, tw);
// tw.Close();
type Writer struct {
w io.Writer;
err os.Error;
nb int64; // number of unwritten bytes for current file entry
pad int64; // amount of padding to write after current file entry
closed bool;
}
// NewWriter creates a new Writer writing to w.
func NewWriter(w io.Writer) *Writer {
return &Writer{ w: w }
}
// Flush finishes writing the current file (optional).
func (tw *Writer) Flush() os.Error {
n := tw.nb + tw.pad;
for n > 0 && tw.err == nil {
nr := n;
if nr > blockSize {
nr = blockSize;
}
var nw int;
nw, tw.err = tw.w.Write(zeroBlock[0:nr]);
n -= int64(nw);
}
tw.nb = 0;
tw.pad = 0;
return tw.err
}
// Write s into b, terminating it with a NUL if there is room.
func (tw *Writer) cString(b []byte, s string) {
if len(s) > len(b) {
if tw.err == nil {
tw.err = ErrIntFieldTooBig;
}
return
}
for i, ch := range strings.Bytes(s) {
b[i] = ch;
}
if len(s) < len(b) {
b[len(s)] = 0;
}
}
// Encode x as an octal ASCII string and write it into b with leading zeros.
func (tw *Writer) octal(b []byte, x int64) {
s := strconv.Itob64(x, 8);
// leading zeros, but leave room for a NUL.
for len(s) + 1 < len(b) {
s = "0" + s;
}
tw.cString(b, s);
}
// WriteHeader writes hdr and prepares to accept the file's contents.
// WriteHeader calls Flush if it is not the first header.
func (tw *Writer) WriteHeader(hdr *Header) os.Error {
if tw.err == nil {
tw.Flush();
}
if tw.err != nil {
return tw.err
}
tw.nb = int64(hdr.Size);
tw.pad = -tw.nb & (blockSize - 1); // blockSize is a power of two
header := make([]byte, blockSize);
s := slicer(header);
// TODO(dsymonds): handle names longer than 100 chars
nr := bytes.Copy(s.next(100), strings.Bytes(hdr.Name));
tw.octal(s.next(8), hdr.Mode);
tw.octal(s.next(8), hdr.Uid);
tw.octal(s.next(8), hdr.Gid);
tw.octal(s.next(12), hdr.Size);
tw.octal(s.next(12), hdr.Mtime);
s.next(8); // chksum
s.next(1)[0] = hdr.Typeflag;
s.next(100); // linkname
bytes.Copy(s.next(8), strings.Bytes("ustar\x0000"));
tw.cString(s.next(32), hdr.Uname);
tw.cString(s.next(32), hdr.Gname);
tw.octal(s.next(8), hdr.Devmajor);
tw.octal(s.next(8), hdr.Devminor);
// The chksum field is terminated by a NUL and a space.
// This is different from the other octal fields.
chksum, _ := checksum(header);
tw.octal(header[148:155], chksum);
header[155] = ' ';
if tw.err != nil {
// problem with header; probably integer too big for a field.
return tw.err
}
var n int;
n, tw.err = tw.w.Write(header);
return tw.err
}
// Write writes to the current entry in the tar archive.
// Write returns the error ErrWriteTooLong if more than
// hdr.Size bytes are written after WriteHeader.
func (tw *Writer) Write(b []uint8) (n int, err os.Error) {
overwrite := false;
if int64(len(b)) > tw.nb {
b = b[0:tw.nb];
overwrite = true;
}
n, err = tw.w.Write(b);
tw.nb -= int64(n);
if err == nil && overwrite {
err = ErrWriteTooLong;
}
tw.err = err;
return
}
func (tw *Writer) Close() os.Error {
if tw.err != nil || tw.closed {
return tw.err
}
tw.Flush();
tw.closed = true;
// trailer: two zero blocks
for i := 0; i < 2; i++ {
var n int;
n, tw.err = tw.w.Write(zeroBlock);
if tw.err != nil {
break
}
}
return tw.err
}
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tar
import (
"archive/tar";
"bytes";
"fmt";
"io";
"os";
"reflect";
"strings";
"testing";
)
type writerTestEntry struct {
header *Header;
contents string;
}
type writerTest struct {
file string; // filename of expected output
entries []*writerTestEntry;
}
var writerTests = []*writerTest{
&writerTest{
file: "testdata/writer.tar",
entries: []*writerTestEntry{
&writerTestEntry{
header: &Header{
Name: "small.txt",
Mode: 0640,
Uid: 73025,
Gid: 5000,
Size: 5,
Mtime: 1246508266,
Typeflag: '0',
Uname: "dsymonds",
Gname: "eng",
},
contents: `Kilts`,
},
&writerTestEntry{
header: &Header{
Name: "small2.txt",
Mode: 0640,
Uid: 73025,
Gid: 5000,
Size: 11,
Mtime: 1245217492,
Typeflag: '0',
Uname: "dsymonds",
Gname: "eng",
},
contents: "Google.com\n",
},
}
},
}
// Render byte array in a two-character hexadecimal string, spaced for easy visual inspection.
func bytestr(b []byte) string {
s := fmt.Sprintf("(%d bytes)\n", len(b));
const rowLen = 32;
for i, ch := range b {
if i % rowLen == 0 {
// start of line: hex offset
s += fmt.Sprintf("%04x", i);
}
switch {
case '0' <= ch && ch <= '9', 'A' <= ch && ch <= 'Z', 'a' <= ch && ch <= 'z':
s += fmt.Sprintf(" %c", ch);
default:
s += fmt.Sprintf(" %02x", ch);
}
if (i + 1) % rowLen == 0 {
// end of line
s += "\n";
} else if (i + 1) % (rowLen / 2) == 0 {
// extra space
s += " ";
}
}
if s[len(s)-1] != '\n' {
s += "\n"
}
return s
}
func TestWriter(t *testing.T) {
testLoop:
for i, test := range writerTests {
expected, err := io.ReadFile(test.file);
if err != nil {
t.Errorf("test %d: Unexpected error: %v", i, err);
continue
}
buf := new(bytes.Buffer);
tw := NewWriter(buf);
for j, entry := range test.entries {
if err := tw.WriteHeader(entry.header); err != nil {
t.Errorf("test %d, entry %d: Failed writing header: %v", i, j, err);
continue testLoop
}
if n, err := io.WriteString(tw, entry.contents); err != nil {
t.Errorf("test %d, entry %d: Failed writing contents: %v", i, j, err);
continue testLoop
}
}
tw.Close();
actual := buf.Data();
if !bytes.Equal(expected, actual) {
t.Errorf("test %d: Incorrect result:\n%v\nwant:\n%v",
i, bytestr(actual), bytestr(expected));
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment