Commit a3d1c1bd authored by Nigel Tao's avatar Nigel Tao

image/jpeg: ignore extraneous data, the same as what libjpeg does.

Fixes #4705.

Note that libjpeg will print a warning to stderr if there are many
extraneous bytes, but can be silent if the extraneous bytes can fit
into its int32 bit-buffer for Huffman decoding. I'm guessing that
this is why whatever encoder that produced the image filed for issue
4705 did not realize that they are, strictly speaking, generating an
invalid JPEG. That issue's attached image has two extraneous bytes.

For example, piping the program below into libjpeg's djpeg program
will print an "18 extraneous bytes" warning, even though N == 20.

$ cat main.go
package main

import (
        "bytes"
        "image"
        "image/color"
        "image/jpeg"
        "os"
)

const N = 20

func main() {
        // Encode a 1x1 red image.
        m := image.NewRGBA(image.Rect(0, 0, 1, 1))
        m.Set(0, 0, color.RGBA{255, 0, 0, 255})
        buf := new(bytes.Buffer)
        jpeg.Encode(buf, m, nil)
        b := buf.Bytes()
        // Strip the final "\xff\xd9" EOI marker.
        b = b[:len(b)-2]
        // Append N dummy 0x80 bytes to the SOS data.
        for i := 0; i < N; i++ {
                b = append(b, 0x80)
        }
        // Put back the "\xff\xd9" EOI marker.
        b = append(b, 0xff, 0xd9)
        os.Stdout.Write(b)
}
$ go run main.go | djpeg /dev/stdin > /tmp/foo.pnm
Corrupt JPEG data: 18 extraneous bytes before marker 0xd9

The resultant /tmp/foo.pnm is a perfectly good 1x1 red image.

R=r
CC=golang-dev
https://golang.org/cl/7750043
parent 9f399a03
......@@ -245,10 +245,38 @@ func (d *decoder) decode(r io.Reader, configOnly bool) (image.Image, error) {
if err != nil {
return nil, err
}
if d.tmp[0] != 0xff {
return nil, FormatError("missing 0xff marker start")
for d.tmp[0] != 0xff {
// Strictly speaking, this is a format error. However, libjpeg is
// liberal in what it accepts. As of version 9, next_marker in
// jdmarker.c treats this as a warning (JWRN_EXTRANEOUS_DATA) and
// continues to decode the stream. Even before next_marker sees
// extraneous data, jpeg_fill_bit_buffer in jdhuff.c reads as many
// bytes as it can, possibly past the end of a scan's data. It
// effectively puts back any markers that it overscanned (e.g. an
// "\xff\xd9" EOI marker), but it does not put back non-marker data,
// and thus it can silently ignore a small number of extraneous
// non-marker bytes before next_marker has a chance to see them (and
// print a warning).
//
// We are therefore also liberal in what we accept. Extraneous data
// is silently ignored.
//
// This is similar to, but not exactly the same as, the restart
// mechanism within a scan (the RST[0-7] markers).
//
// Note that extraneous 0xff bytes in e.g. SOS data are escaped as
// "\xff\x00", and so are detected a little further down below.
d.tmp[0] = d.tmp[1]
d.tmp[1], err = d.r.ReadByte()
if err != nil {
return nil, err
}
}
marker := d.tmp[1]
if marker == 0 {
// Treat "\xff\x00" as extraneous data.
continue
}
for marker == 0xff {
// Section B.1.1.2 says, "Any marker may optionally be preceded by any
// number of fill bytes, which are bytes assigned code X'FF'".
......
......@@ -8,8 +8,11 @@ import (
"bytes"
"fmt"
"image"
"image/color"
"io/ioutil"
"math/rand"
"os"
"strings"
"testing"
)
......@@ -131,6 +134,66 @@ func pixString(pix []byte, stride, x, y int) string {
return s.String()
}
func TestExtraneousData(t *testing.T) {
// Encode a 1x1 red image.
src := image.NewRGBA(image.Rect(0, 0, 1, 1))
src.Set(0, 0, color.RGBA{0xff, 0x00, 0x00, 0xff})
buf := new(bytes.Buffer)
if err := Encode(buf, src, nil); err != nil {
t.Fatalf("encode: %v", err)
}
enc := buf.String()
// Sanity check that the encoded JPEG is long enough, that it ends in a
// "\xff\xd9" EOI marker, and that it contains a "\xff\xda" SOS marker
// somewhere in the final 64 bytes.
if len(enc) < 64 {
t.Fatalf("encoded JPEG is too short: %d bytes", len(enc))
}
if got, want := enc[len(enc)-2:], "\xff\xd9"; got != want {
t.Fatalf("encoded JPEG ends with %q, want %q", got, want)
}
if s := enc[len(enc)-64:]; !strings.Contains(s, "\xff\xda") {
t.Fatalf("encoded JPEG does not contain a SOS marker (ff da) near the end: % x", s)
}
// Test that adding some random junk between the SOS marker and the
// EOI marker does not affect the decoding.
rnd := rand.New(rand.NewSource(1))
for i, nerr := 0, 0; i < 1000 && nerr < 10; i++ {
buf.Reset()
// Write all but the trailing "\xff\xd9" EOI marker.
buf.WriteString(enc[:len(enc)-2])
// Write some random extraneous data.
for n := rnd.Intn(10); n > 0; n-- {
if x := byte(rnd.Intn(256)); x != 0xff {
buf.WriteByte(x)
} else {
// The JPEG format escapes a SOS 0xff data byte as "\xff\x00".
buf.WriteString("\xff\x00")
}
}
// Write the "\xff\xd9" EOI marker.
buf.WriteString("\xff\xd9")
// Check that we can still decode the resultant image.
got, err := Decode(buf)
if err != nil {
t.Errorf("could not decode image #%d: %v", i, err)
nerr++
continue
}
if got.Bounds() != src.Bounds() {
t.Errorf("image #%d, bounds differ: %v and %v", i, got.Bounds(), src.Bounds())
nerr++
continue
}
if averageDelta(got, src) > 2<<8 {
t.Errorf("image #%d changed too much after a round trip", i)
nerr++
continue
}
}
}
func benchmarkDecode(b *testing.B, filename string) {
b.StopTimer()
data, err := ioutil.ReadFile(filename)
......
......@@ -148,7 +148,21 @@ func TestWriter(t *testing.T) {
t.Error(tc.filename, err)
continue
}
// Compute the average delta in RGB space.
if m0.Bounds() != m1.Bounds() {
t.Errorf("%s, bounds differ: %v and %v", tc.filename, m0.Bounds(), m1.Bounds())
continue
}
// Compare the average delta to the tolerance level.
if averageDelta(m0, m1) > tc.tolerance {
t.Errorf("%s, quality=%d: average delta is too high", tc.filename, tc.quality)
continue
}
}
}
// averageDelta returns the average delta in RGB space. The two images must
// have the same bounds.
func averageDelta(m0, m1 image.Image) int64 {
b := m0.Bounds()
var sum, n int64
for y := b.Min.Y; y < b.Max.Y; y++ {
......@@ -163,12 +177,7 @@ func TestWriter(t *testing.T) {
n += 3
}
}
// Compare the average delta to the tolerance level.
if sum/n > tc.tolerance {
t.Errorf("%s, quality=%d: average delta is too high", tc.filename, tc.quality)
continue
}
}
return sum / n
}
func BenchmarkEncode(b *testing.B) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment