Commit bfb48687 authored by Ross Light's avatar Ross Light Committed by Brad Fitzpatrick

compress/zlib: add FDICT flag in Reader/Writer

R=bradfitzgo, rsc, bradfitzwork
CC=golang-dev
https://golang.org/cl/4406046
parent a12d70f6
...@@ -36,7 +36,7 @@ const zlibDeflate = 8 ...@@ -36,7 +36,7 @@ const zlibDeflate = 8
var ChecksumError os.Error = os.ErrorString("zlib checksum error") var ChecksumError os.Error = os.ErrorString("zlib checksum error")
var HeaderError os.Error = os.ErrorString("invalid zlib header") var HeaderError os.Error = os.ErrorString("invalid zlib header")
var UnsupportedError os.Error = os.ErrorString("unsupported zlib format") var DictionaryError os.Error = os.ErrorString("invalid zlib dictionary")
type reader struct { type reader struct {
r flate.Reader r flate.Reader
...@@ -50,6 +50,12 @@ type reader struct { ...@@ -50,6 +50,12 @@ type reader struct {
// The implementation buffers input and may read more data than necessary from r. // The implementation buffers input and may read more data than necessary from r.
// It is the caller's responsibility to call Close on the ReadCloser when done. // It is the caller's responsibility to call Close on the ReadCloser when done.
func NewReader(r io.Reader) (io.ReadCloser, os.Error) { func NewReader(r io.Reader) (io.ReadCloser, os.Error) {
return NewReaderDict(r, nil)
}
// NewReaderDict is like NewReader but uses a preset dictionary.
// NewReaderDict ignores the dictionary if the compressed data does not refer to it.
func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, os.Error) {
z := new(reader) z := new(reader)
if fr, ok := r.(flate.Reader); ok { if fr, ok := r.(flate.Reader); ok {
z.r = fr z.r = fr
...@@ -65,11 +71,19 @@ func NewReader(r io.Reader) (io.ReadCloser, os.Error) { ...@@ -65,11 +71,19 @@ func NewReader(r io.Reader) (io.ReadCloser, os.Error) {
return nil, HeaderError return nil, HeaderError
} }
if z.scratch[1]&0x20 != 0 { if z.scratch[1]&0x20 != 0 {
// BUG(nigeltao): The zlib package does not implement the FDICT flag. _, err = io.ReadFull(z.r, z.scratch[0:4])
return nil, UnsupportedError if err != nil {
return nil, err
} }
z.digest = adler32.New() checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3])
if checksum != adler32.Checksum(dict) {
return nil, DictionaryError
}
z.decompressor = flate.NewReaderDict(z.r, dict)
} else {
z.decompressor = flate.NewReader(z.r) z.decompressor = flate.NewReader(z.r)
}
z.digest = adler32.New()
return z, nil return z, nil
} }
......
...@@ -15,6 +15,7 @@ type zlibTest struct { ...@@ -15,6 +15,7 @@ type zlibTest struct {
desc string desc string
raw string raw string
compressed []byte compressed []byte
dict []byte
err os.Error err os.Error
} }
...@@ -27,6 +28,7 @@ var zlibTests = []zlibTest{ ...@@ -27,6 +28,7 @@ var zlibTests = []zlibTest{
"", "",
[]byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01}, []byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01},
nil, nil,
nil,
}, },
{ {
"goodbye", "goodbye",
...@@ -37,23 +39,27 @@ var zlibTests = []zlibTest{ ...@@ -37,23 +39,27 @@ var zlibTests = []zlibTest{
0x01, 0x00, 0x28, 0xa5, 0x05, 0x5e, 0x01, 0x00, 0x28, 0xa5, 0x05, 0x5e,
}, },
nil, nil,
nil,
}, },
{ {
"bad header", "bad header",
"", "",
[]byte{0x78, 0x9f, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01}, []byte{0x78, 0x9f, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01},
nil,
HeaderError, HeaderError,
}, },
{ {
"bad checksum", "bad checksum",
"", "",
[]byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0xff}, []byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00, 0x00, 0xff},
nil,
ChecksumError, ChecksumError,
}, },
{ {
"not enough data", "not enough data",
"", "",
[]byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00}, []byte{0x78, 0x9c, 0x03, 0x00, 0x00, 0x00},
nil,
io.ErrUnexpectedEOF, io.ErrUnexpectedEOF,
}, },
{ {
...@@ -64,6 +70,33 @@ var zlibTests = []zlibTest{ ...@@ -64,6 +70,33 @@ var zlibTests = []zlibTest{
0x78, 0x9c, 0xff, 0x78, 0x9c, 0xff,
}, },
nil, nil,
nil,
},
{
"dictionary",
"Hello, World!\n",
[]byte{
0x78, 0xbb, 0x1c, 0x32, 0x04, 0x27, 0xf3, 0x00,
0xb1, 0x75, 0x20, 0x1c, 0x45, 0x2e, 0x00, 0x24,
0x12, 0x04, 0x74,
},
[]byte{
0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x0a,
},
nil,
},
{
"wrong dictionary",
"",
[]byte{
0x78, 0xbb, 0x1c, 0x32, 0x04, 0x27, 0xf3, 0x00,
0xb1, 0x75, 0x20, 0x1c, 0x45, 0x2e, 0x00, 0x24,
0x12, 0x04, 0x74,
},
[]byte{
0x48, 0x65, 0x6c, 0x6c,
},
DictionaryError,
}, },
} }
...@@ -71,7 +104,7 @@ func TestDecompressor(t *testing.T) { ...@@ -71,7 +104,7 @@ func TestDecompressor(t *testing.T) {
b := new(bytes.Buffer) b := new(bytes.Buffer)
for _, tt := range zlibTests { for _, tt := range zlibTests {
in := bytes.NewBuffer(tt.compressed) in := bytes.NewBuffer(tt.compressed)
zlib, err := NewReader(in) zlib, err := NewReaderDict(in, tt.dict)
if err != nil { if err != nil {
if err != tt.err { if err != tt.err {
t.Errorf("%s: NewReader: %s", tt.desc, err) t.Errorf("%s: NewReader: %s", tt.desc, err)
......
...@@ -21,56 +21,80 @@ const ( ...@@ -21,56 +21,80 @@ const (
DefaultCompression = flate.DefaultCompression DefaultCompression = flate.DefaultCompression
) )
type writer struct { // A Writer takes data written to it and writes the compressed
// form of that data to an underlying writer (see NewWriter).
type Writer struct {
w io.Writer w io.Writer
compressor io.WriteCloser compressor *flate.Writer
digest hash.Hash32 digest hash.Hash32
err os.Error err os.Error
scratch [4]byte scratch [4]byte
} }
// NewWriter calls NewWriterLevel with the default compression level. // NewWriter calls NewWriterLevel with the default compression level.
func NewWriter(w io.Writer) (io.WriteCloser, os.Error) { func NewWriter(w io.Writer) (*Writer, os.Error) {
return NewWriterLevel(w, DefaultCompression) return NewWriterLevel(w, DefaultCompression)
} }
// NewWriterLevel creates a new io.WriteCloser that satisfies writes by compressing data written to w. // NewWriterLevel calls NewWriterDict with no dictionary.
func NewWriterLevel(w io.Writer, level int) (*Writer, os.Error) {
return NewWriterDict(w, level, nil)
}
// NewWriterDict creates a new io.WriteCloser that satisfies writes by compressing data written to w.
// It is the caller's responsibility to call Close on the WriteCloser when done. // It is the caller's responsibility to call Close on the WriteCloser when done.
// level is the compression level, which can be DefaultCompression, NoCompression, // level is the compression level, which can be DefaultCompression, NoCompression,
// or any integer value between BestSpeed and BestCompression (inclusive). // or any integer value between BestSpeed and BestCompression (inclusive).
func NewWriterLevel(w io.Writer, level int) (io.WriteCloser, os.Error) { // dict is the preset dictionary to compress with, or nil to use no dictionary.
z := new(writer) func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, os.Error) {
z := new(Writer)
// ZLIB has a two-byte header (as documented in RFC 1950). // ZLIB has a two-byte header (as documented in RFC 1950).
// The first four bits is the CINFO (compression info), which is 7 for the default deflate window size. // The first four bits is the CINFO (compression info), which is 7 for the default deflate window size.
// The next four bits is the CM (compression method), which is 8 for deflate. // The next four bits is the CM (compression method), which is 8 for deflate.
z.scratch[0] = 0x78 z.scratch[0] = 0x78
// The next two bits is the FLEVEL (compression level). The four values are: // The next two bits is the FLEVEL (compression level). The four values are:
// 0=fastest, 1=fast, 2=default, 3=best. // 0=fastest, 1=fast, 2=default, 3=best.
// The next bit, FDICT, is unused, in this implementation. // The next bit, FDICT, is set if a dictionary is given.
// The final five FCHECK bits form a mod-31 checksum. // The final five FCHECK bits form a mod-31 checksum.
switch level { switch level {
case 0, 1: case 0, 1:
z.scratch[1] = 0x01 z.scratch[1] = 0 << 6
case 2, 3, 4, 5: case 2, 3, 4, 5:
z.scratch[1] = 0x5e z.scratch[1] = 1 << 6
case 6, -1: case 6, -1:
z.scratch[1] = 0x9c z.scratch[1] = 2 << 6
case 7, 8, 9: case 7, 8, 9:
z.scratch[1] = 0xda z.scratch[1] = 3 << 6
default: default:
return nil, os.NewError("level out of range") return nil, os.NewError("level out of range")
} }
if dict != nil {
z.scratch[1] |= 1 << 5
}
z.scratch[1] += uint8(31 - (uint16(z.scratch[0])<<8+uint16(z.scratch[1]))%31)
_, err := w.Write(z.scratch[0:2]) _, err := w.Write(z.scratch[0:2])
if err != nil { if err != nil {
return nil, err return nil, err
} }
if dict != nil {
// The next four bytes are the Adler-32 checksum of the dictionary.
checksum := adler32.Checksum(dict)
z.scratch[0] = uint8(checksum >> 24)
z.scratch[1] = uint8(checksum >> 16)
z.scratch[2] = uint8(checksum >> 8)
z.scratch[3] = uint8(checksum >> 0)
_, err = w.Write(z.scratch[0:4])
if err != nil {
return nil, err
}
}
z.w = w z.w = w
z.compressor = flate.NewWriter(w, level) z.compressor = flate.NewWriter(w, level)
z.digest = adler32.New() z.digest = adler32.New()
return z, nil return z, nil
} }
func (z *writer) Write(p []byte) (n int, err os.Error) { func (z *Writer) Write(p []byte) (n int, err os.Error) {
if z.err != nil { if z.err != nil {
return 0, z.err return 0, z.err
} }
...@@ -86,8 +110,17 @@ func (z *writer) Write(p []byte) (n int, err os.Error) { ...@@ -86,8 +110,17 @@ func (z *writer) Write(p []byte) (n int, err os.Error) {
return return
} }
// Flush flushes the underlying compressor.
func (z *Writer) Flush() os.Error {
if z.err != nil {
return z.err
}
z.err = z.compressor.Flush()
return z.err
}
// Calling Close does not close the wrapped io.Writer originally passed to NewWriter. // Calling Close does not close the wrapped io.Writer originally passed to NewWriter.
func (z *writer) Close() os.Error { func (z *Writer) Close() os.Error {
if z.err != nil { if z.err != nil {
return z.err return z.err
} }
......
...@@ -16,13 +16,19 @@ var filenames = []string{ ...@@ -16,13 +16,19 @@ var filenames = []string{
"../testdata/pi.txt", "../testdata/pi.txt",
} }
// Tests that compressing and then decompressing the given file at the given compression level // Tests that compressing and then decompressing the given file at the given compression level and dictionary
// yields equivalent bytes to the original file. // yields equivalent bytes to the original file.
func testFileLevel(t *testing.T, fn string, level int) { func testFileLevelDict(t *testing.T, fn string, level int, d string) {
// Read dictionary, if given.
var dict []byte
if d != "" {
dict = []byte(d)
}
// Read the file, as golden output. // Read the file, as golden output.
golden, err := os.Open(fn) golden, err := os.Open(fn)
if err != nil { if err != nil {
t.Errorf("%s (level=%d): %v", fn, level, err) t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err)
return return
} }
defer golden.Close() defer golden.Close()
...@@ -30,7 +36,7 @@ func testFileLevel(t *testing.T, fn string, level int) { ...@@ -30,7 +36,7 @@ func testFileLevel(t *testing.T, fn string, level int) {
// Read the file again, and push it through a pipe that compresses at the write end, and decompresses at the read end. // Read the file again, and push it through a pipe that compresses at the write end, and decompresses at the read end.
raw, err := os.Open(fn) raw, err := os.Open(fn)
if err != nil { if err != nil {
t.Errorf("%s (level=%d): %v", fn, level, err) t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err)
return return
} }
piper, pipew := io.Pipe() piper, pipew := io.Pipe()
...@@ -38,9 +44,9 @@ func testFileLevel(t *testing.T, fn string, level int) { ...@@ -38,9 +44,9 @@ func testFileLevel(t *testing.T, fn string, level int) {
go func() { go func() {
defer raw.Close() defer raw.Close()
defer pipew.Close() defer pipew.Close()
zlibw, err := NewWriterLevel(pipew, level) zlibw, err := NewWriterDict(pipew, level, dict)
if err != nil { if err != nil {
t.Errorf("%s (level=%d): %v", fn, level, err) t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err)
return return
} }
defer zlibw.Close() defer zlibw.Close()
...@@ -48,7 +54,7 @@ func testFileLevel(t *testing.T, fn string, level int) { ...@@ -48,7 +54,7 @@ func testFileLevel(t *testing.T, fn string, level int) {
for { for {
n, err0 := raw.Read(b[0:]) n, err0 := raw.Read(b[0:])
if err0 != nil && err0 != os.EOF { if err0 != nil && err0 != os.EOF {
t.Errorf("%s (level=%d): %v", fn, level, err0) t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err0)
return return
} }
_, err1 := zlibw.Write(b[0:n]) _, err1 := zlibw.Write(b[0:n])
...@@ -57,7 +63,7 @@ func testFileLevel(t *testing.T, fn string, level int) { ...@@ -57,7 +63,7 @@ func testFileLevel(t *testing.T, fn string, level int) {
return return
} }
if err1 != nil { if err1 != nil {
t.Errorf("%s (level=%d): %v", fn, level, err1) t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err1)
return return
} }
if err0 == os.EOF { if err0 == os.EOF {
...@@ -65,9 +71,9 @@ func testFileLevel(t *testing.T, fn string, level int) { ...@@ -65,9 +71,9 @@ func testFileLevel(t *testing.T, fn string, level int) {
} }
} }
}() }()
zlibr, err := NewReader(piper) zlibr, err := NewReaderDict(piper, dict)
if err != nil { if err != nil {
t.Errorf("%s (level=%d): %v", fn, level, err) t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err)
return return
} }
defer zlibr.Close() defer zlibr.Close()
...@@ -76,20 +82,20 @@ func testFileLevel(t *testing.T, fn string, level int) { ...@@ -76,20 +82,20 @@ func testFileLevel(t *testing.T, fn string, level int) {
b0, err0 := ioutil.ReadAll(golden) b0, err0 := ioutil.ReadAll(golden)
b1, err1 := ioutil.ReadAll(zlibr) b1, err1 := ioutil.ReadAll(zlibr)
if err0 != nil { if err0 != nil {
t.Errorf("%s (level=%d): %v", fn, level, err0) t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err0)
return return
} }
if err1 != nil { if err1 != nil {
t.Errorf("%s (level=%d): %v", fn, level, err1) t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err1)
return return
} }
if len(b0) != len(b1) { if len(b0) != len(b1) {
t.Errorf("%s (level=%d): length mismatch %d versus %d", fn, level, len(b0), len(b1)) t.Errorf("%s (level=%d, dict=%q): length mismatch %d versus %d", fn, level, d, len(b0), len(b1))
return return
} }
for i := 0; i < len(b0); i++ { for i := 0; i < len(b0); i++ {
if b0[i] != b1[i] { if b0[i] != b1[i] {
t.Errorf("%s (level=%d): mismatch at %d, 0x%02x versus 0x%02x\n", fn, level, i, b0[i], b1[i]) t.Errorf("%s (level=%d, dict=%q): mismatch at %d, 0x%02x versus 0x%02x\n", fn, level, d, i, b0[i], b1[i])
return return
} }
} }
...@@ -97,10 +103,21 @@ func testFileLevel(t *testing.T, fn string, level int) { ...@@ -97,10 +103,21 @@ func testFileLevel(t *testing.T, fn string, level int) {
func TestWriter(t *testing.T) { func TestWriter(t *testing.T) {
for _, fn := range filenames { for _, fn := range filenames {
testFileLevel(t, fn, DefaultCompression) testFileLevelDict(t, fn, DefaultCompression, "")
testFileLevel(t, fn, NoCompression) testFileLevelDict(t, fn, NoCompression, "")
for level := BestSpeed; level <= BestCompression; level++ {
testFileLevelDict(t, fn, level, "")
}
}
}
func TestWriterDict(t *testing.T) {
const dictionary = "0123456789."
for _, fn := range filenames {
testFileLevelDict(t, fn, DefaultCompression, dictionary)
testFileLevelDict(t, fn, NoCompression, dictionary)
for level := BestSpeed; level <= BestCompression; level++ { for level := BestSpeed; level <= BestCompression; level++ {
testFileLevel(t, fn, level) testFileLevelDict(t, fn, level, dictionary)
} }
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment