Commit f10e0377 authored by Nigel Tao's avatar Nigel Tao

image/jpeg: support decoding CMYK and YCbCrK images.

The new testdata was created by:

convert video-001.png -colorspace cmyk video-001.cmyk.jpeg

video-001.cmyk.jpeg was then converted back to video-001.cmyk.png via
the GIMP. ImageMagick (convert) wasn't used for this second conversion
because IM's default color profiles complicates things.

Fixes #4500.

Change-Id: Ibf533f6a6c7e76883acc493ce3a4289d7875df3f
Reviewed-on: https://go-review.googlesource.com/4801Reviewed-by: 's avatarRob Pike <r@golang.org>
parent b5c3a9e5
......@@ -32,6 +32,7 @@ var imageTests = []imageTest{
// JPEG is a lossy format and hence needs a non-zero tolerance.
{"testdata/video-001.png", "testdata/video-001.jpeg", 8 << 8},
{"testdata/video-001.png", "testdata/video-001.progressive.jpeg", 8 << 8},
{"testdata/video-001.cmyk.png", "testdata/video-001.cmyk.jpeg", 8 << 8},
// Grayscale images.
{"testdata/video-005.gray.png", "testdata/video-005.gray.jpeg", 8 << 8},
{"testdata/video-005.gray.png", "testdata/video-005.gray.png", 0},
......
......@@ -10,6 +10,7 @@ package jpeg
import (
"image"
"image/color"
"image/draw"
"io"
)
......@@ -41,10 +42,7 @@ const (
maxTh = 3
maxTq = 3
// A grayscale JPEG image has only a Y component.
nGrayComponent = 1
// A color JPEG image has Y, Cb and Cr components.
nColorComponent = 3
maxComponents = 4
// We only support 4:4:4, 4:4:0, 4:2:2 and 4:2:0 downsampling, and therefore the
// number of luma samples per chroma sample is at most 2 in the horizontal
......@@ -54,19 +52,30 @@ const (
)
const (
soiMarker = 0xd8 // Start Of Image.
eoiMarker = 0xd9 // End Of Image.
sof0Marker = 0xc0 // Start Of Frame (Baseline).
sof2Marker = 0xc2 // Start Of Frame (Progressive).
dhtMarker = 0xc4 // Define Huffman Table.
dqtMarker = 0xdb // Define Quantization Table.
sosMarker = 0xda // Start Of Scan.
driMarker = 0xdd // Define Restart Interval.
rst0Marker = 0xd0 // ReSTart (0).
rst7Marker = 0xd7 // ReSTart (7).
app0Marker = 0xe0 // APPlication specific (0).
app15Marker = 0xef // APPlication specific (15).
comMarker = 0xfe // COMment.
soiMarker = 0xd8 // Start Of Image.
eoiMarker = 0xd9 // End Of Image.
sof0Marker = 0xc0 // Start Of Frame (Baseline).
sof2Marker = 0xc2 // Start Of Frame (Progressive).
dhtMarker = 0xc4 // Define Huffman Table.
dqtMarker = 0xdb // Define Quantization Table.
sosMarker = 0xda // Start Of Scan.
driMarker = 0xdd // Define Restart Interval.
rst0Marker = 0xd0 // ReSTart (0).
rst7Marker = 0xd7 // ReSTart (7).
comMarker = 0xfe // COMment.
// "APPlication specific" markers aren't part of the JPEG spec per se,
// but in practice, their use is described at
// http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html
app0Marker = 0xe0
app14Marker = 0xee
app15Marker = 0xef
)
// See http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe
const (
adobeTransformUnknown = 0
adobeTransformYCbCr = 1
adobeTransformYCbCrK = 2
)
// unzig maps from the zig-zag ordering to the natural ordering. For example,
......@@ -114,17 +123,24 @@ type decoder struct {
nUnreadable int
}
width, height int
img1 *image.Gray
img3 *image.YCbCr
ri int // Restart Interval.
nComp int
progressive bool
eobRun uint16 // End-of-Band run, specified in section G.1.2.2.
comp [nColorComponent]component
progCoeffs [nColorComponent][]block // Saved state between progressive-mode scans.
huff [maxTc + 1][maxTh + 1]huffman
quant [maxTq + 1]block // Quantization tables, in zig-zag order.
tmp [blockSize + 1]byte
img1 *image.Gray
img3 *image.YCbCr
blackPix []byte
blackStride int
ri int // Restart Interval.
nComp int
progressive bool
adobeTransformValid bool
adobeTransform uint8
eobRun uint16 // End-of-Band run, specified in section G.1.2.2.
comp [maxComponents]component
progCoeffs [maxComponents][]block // Saved state between progressive-mode scans.
huff [maxTc + 1][maxTh + 1]huffman
quant [maxTq + 1]block // Quantization tables, in zig-zag order.
tmp [blockSize + 1]byte
}
// fill fills up the d.bytes.buf buffer from the underlying io.Reader. It
......@@ -285,10 +301,12 @@ func (d *decoder) ignore(n int) error {
// Specified in section B.2.2.
func (d *decoder) processSOF(n int) error {
switch n {
case 6 + 3*nGrayComponent:
d.nComp = nGrayComponent
case 6 + 3*nColorComponent:
d.nComp = nColorComponent
case 6 + 3*1: // Grayscale image.
d.nComp = 1
case 6 + 3*3: // YCbCr image. (TODO(nigeltao): or RGB image.)
d.nComp = 3
case 6 + 3*4: // YCbCrK or CMYK image.
d.nComp = 4
default:
return UnsupportedError("SOF has wrong length")
}
......@@ -307,7 +325,7 @@ func (d *decoder) processSOF(n int) error {
for i := 0; i < d.nComp; i++ {
d.comp[i].c = d.tmp[6+3*i]
d.comp[i].tq = d.tmp[8+3*i]
if d.nComp == nGrayComponent {
if d.nComp == 1 {
// If a JPEG image has only one component, section A.2 says "this data
// is non-interleaved by definition" and section A.2.2 says "[in this
// case...] the order of data units within a scan shall be left-to-right
......@@ -326,16 +344,43 @@ func (d *decoder) processSOF(n int) error {
hv := d.tmp[7+3*i]
d.comp[i].h = int(hv >> 4)
d.comp[i].v = int(hv & 0x0f)
// For color images, we only support 4:4:4, 4:4:0, 4:2:2 or 4:2:0 chroma
// downsampling ratios. This implies that the (h, v) values for the Y
// component are either (1, 1), (1, 2), (2, 1) or (2, 2), and the (h, v)
// values for the Cr and Cb components must be (1, 1).
if i == 0 {
if hv != 0x11 && hv != 0x21 && hv != 0x22 && hv != 0x12 {
switch d.nComp {
case 3:
// For YCbCr images, we only support 4:4:4, 4:4:0, 4:2:2 or 4:2:0 chroma
// downsampling ratios. This implies that the (h, v) values for the Y
// component are either (1, 1), (1, 2), (2, 1) or (2, 2), and the (h, v)
// values for the Cr and Cb components must be (1, 1).
if i == 0 {
if hv != 0x11 && hv != 0x21 && hv != 0x22 && hv != 0x12 {
return UnsupportedError("luma/chroma downsample ratio")
}
} else if hv != 0x11 {
return UnsupportedError("luma/chroma downsample ratio")
}
} else if hv != 0x11 {
return UnsupportedError("luma/chroma downsample ratio")
case 4:
// For 4-component images (either CMYK or YCbCrK), we only support two
// hv vectors: [0x11 0x11 0x11 0x11] and [0x22 0x11 0x11 0x22].
// Theoretically, 4-component JPEG images could mix and match hv values
// but in practice, those two combinations are the only ones in use,
// and it simplifies the applyBlack code below if we can assume that:
// - for CMYK, the C and K channels have full samples, and if the M
// and Y channels subsample, they subsample both horizontally and
// vertically.
// - for YCbCrK, the Y and K channels have full samples.
switch i {
case 0:
if hv != 0x11 && hv != 0x22 {
return UnsupportedError("luma/chroma downsample ratio")
}
case 1, 2:
if hv != 0x11 {
return UnsupportedError("luma/chroma downsample ratio")
}
case 3:
if d.comp[0].h != d.comp[3].h || d.comp[0].v != d.comp[3].v {
return UnsupportedError("luma/chroma downsample ratio")
}
}
}
}
return nil
......@@ -378,6 +423,26 @@ func (d *decoder) processDRI(n int) error {
return nil
}
func (d *decoder) processApp14Marker(n int) error {
if n < 12 {
return d.ignore(n)
}
if err := d.readFull(d.tmp[:12]); err != nil {
return err
}
n -= 12
if d.tmp[0] == 'A' && d.tmp[1] == 'd' && d.tmp[2] == 'o' && d.tmp[3] == 'b' && d.tmp[4] == 'e' {
d.adobeTransformValid = true
d.adobeTransform = d.tmp[11]
}
if n > 0 {
return d.ignore(n)
}
return nil
}
// decode reads a JPEG image from r and returns it as an image.Image.
func (d *decoder) decode(r io.Reader, configOnly bool) (image.Image, error) {
d.r = r
......@@ -459,25 +524,29 @@ func (d *decoder) decode(r io.Reader, configOnly bool) (image.Image, error) {
return nil, FormatError("short segment length")
}
switch {
case marker == sof0Marker || marker == sof2Marker: // Start Of Frame.
switch marker {
case sof0Marker, sof2Marker:
d.progressive = marker == sof2Marker
err = d.processSOF(n)
if configOnly {
return nil, err
}
case marker == dhtMarker: // Define Huffman Table.
case dhtMarker:
err = d.processDHT(n)
case marker == dqtMarker: // Define Quantization Table.
case dqtMarker:
err = d.processDQT(n)
case marker == sosMarker: // Start Of Scan.
case sosMarker:
err = d.processSOS(n)
case marker == driMarker: // Define Restart Interval.
case driMarker:
err = d.processDRI(n)
case app0Marker <= marker && marker <= app15Marker || marker == comMarker: // APPlication specific, or COMment.
err = d.ignore(n)
case app14Marker:
err = d.processApp14Marker(n)
default:
err = UnsupportedError("unknown marker")
if app0Marker <= marker && marker <= app15Marker || marker == comMarker {
err = d.ignore(n)
} else {
err = UnsupportedError("unknown marker")
}
}
if err != nil {
return nil, err
......@@ -487,11 +556,89 @@ func (d *decoder) decode(r io.Reader, configOnly bool) (image.Image, error) {
return d.img1, nil
}
if d.img3 != nil {
if d.blackPix != nil {
return d.applyBlack()
}
return d.img3, nil
}
return nil, FormatError("missing SOS marker")
}
// applyBlack combines d.img3 and d.blackPix into a CMYK image. The formula
// used depends on whether the JPEG image is stored as CMYK or YCbCrK,
// indicated by the APP14 (Adobe) metadata.
//
// Adobe CMYK JPEG images are inverted, where 255 means no ink instead of full
// ink, so we apply "v = 255 - v" at various points. Note that a double
// inversion is a no-op, so inversions might be implicit in the code below.
func (d *decoder) applyBlack() (image.Image, error) {
if !d.adobeTransformValid {
return nil, UnsupportedError("unknown color model: 4-component JPEG doesn't have Adobe APP14 metadata")
}
// If the 4-component JPEG image isn't explicitly marked as "Unknown (RGB
// or CMYK)" as per
// http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe
// we assume that it is YCbCrK. This matches libjpeg's jdapimin.c.
if d.adobeTransform != adobeTransformUnknown {
// Convert the YCbCr part of the YCbCrK to RGB, invert the RGB to get
// CMY, and patch in the original K. The RGB to CMY inversion cancels
// out the 'Adobe inversion' described in the applyBlack doc comment
// above, so in practice, only the fourth channel (black) is inverted.
bounds := d.img3.Bounds()
img := image.NewRGBA(bounds)
// TODO(nigeltao): do the draw.Draw YCbCr -> RGB conversion directly,
// instead of having the image/jpeg package depend on the image/draw
// package.
draw.Draw(img, bounds, d.img3, bounds.Min, draw.Src)
for iBase, y := 0, bounds.Min.Y; y < bounds.Max.Y; iBase, y = iBase+img.Stride, y+1 {
for i, x := iBase+3, bounds.Min.X; x < bounds.Max.X; i, x = i+4, x+1 {
img.Pix[i] = 255 - d.blackPix[(y-bounds.Min.Y)*d.blackStride+(x-bounds.Min.X)]
}
}
return &image.CMYK{
Pix: img.Pix,
Stride: img.Stride,
Rect: img.Rect,
}, nil
}
// The first three channels (cyan, magenta, yellow) of the CMYK
// were decoded into d.img3, but each channel was decoded into a separate
// []byte slice, and some channels may be subsampled. We interleave the
// separate channels into an image.CMYK's single []byte slice containing 4
// contiguous bytes per pixel.
bounds := d.img3.Bounds()
img := image.NewCMYK(bounds)
translations := [4]struct {
src []byte
stride int
}{
{d.img3.Y, d.img3.YStride},
{d.img3.Cb, d.img3.CStride},
{d.img3.Cr, d.img3.CStride},
{d.blackPix, d.blackStride},
}
for t, translation := range translations {
subsample := d.comp[t].h != d.comp[0].h || d.comp[t].v != d.comp[0].v
for iBase, y := 0, bounds.Min.Y; y < bounds.Max.Y; iBase, y = iBase+img.Stride, y+1 {
sy := y - bounds.Min.Y
if subsample {
sy /= 2
}
for i, x := iBase+t, bounds.Min.X; x < bounds.Max.X; i, x = i+4, x+1 {
sx := x - bounds.Min.X
if subsample {
sx /= 2
}
img.Pix[i] = 255 - translation.src[sy*translation.stride+sx]
}
}
}
return img, nil
}
// Decode reads a JPEG image from r and returns it as an image.Image.
func Decode(r io.Reader) (image.Image, error) {
var d decoder
......@@ -506,15 +653,21 @@ func DecodeConfig(r io.Reader) (image.Config, error) {
return image.Config{}, err
}
switch d.nComp {
case nGrayComponent:
case 1:
return image.Config{
ColorModel: color.GrayModel,
Width: d.width,
Height: d.height,
}, nil
case nColorComponent:
case 3:
return image.Config{
ColorModel: color.YCbCrModel, // TODO(nigeltao): support RGB JPEGs.
Width: d.width,
Height: d.height,
}, nil
case 4:
return image.Config{
ColorModel: color.YCbCrModel,
ColorModel: color.CMYKModel,
Width: d.width,
Height: d.height,
}, nil
......
......@@ -10,11 +10,12 @@ import (
// makeImg allocates and initializes the destination image.
func (d *decoder) makeImg(h0, v0, mxx, myy int) {
if d.nComp == nGrayComponent {
if d.nComp == 1 {
m := image.NewGray(image.Rect(0, 0, 8*mxx, 8*myy))
d.img1 = m.SubImage(image.Rect(0, 0, d.width, d.height)).(*image.Gray)
return
}
var subsampleRatio image.YCbCrSubsampleRatio
switch {
case h0 == 1 && v0 == 1:
......@@ -30,6 +31,12 @@ func (d *decoder) makeImg(h0, v0, mxx, myy int) {
}
m := image.NewYCbCr(image.Rect(0, 0, 8*h0*mxx, 8*v0*myy), subsampleRatio)
d.img3 = m.SubImage(image.Rect(0, 0, d.width, d.height)).(*image.YCbCr)
if d.nComp == 4 {
h3, v3 := d.comp[3].h, d.comp[3].v
d.blackPix = make([]byte, 8*h3*mxx*8*v3*myy)
d.blackStride = 8 * h3 * mxx
}
}
// Specified in section B.2.3.
......@@ -47,7 +54,7 @@ func (d *decoder) processSOS(n int) error {
if n != 4+2*nComp {
return FormatError("SOS length inconsistent with number of components")
}
var scan [nColorComponent]struct {
var scan [maxComponents]struct {
compIndex uint8
td uint8 // DC table selector.
ta uint8 // AC table selector.
......@@ -128,7 +135,7 @@ func (d *decoder) processSOS(n int) error {
var (
// b is the decoded coefficients, in natural (not zig-zag) order.
b block
dc [nColorComponent]int32
dc [maxComponents]int32
// bx and by are the location of the current (in terms of 8x8 blocks).
// For example, with 4:2:0 chroma subsampling, the block whose top left
// pixel co-ordinates are (16, 8) is the third block in the first row:
......@@ -276,7 +283,7 @@ func (d *decoder) processSOS(n int) error {
}
idct(&b)
dst, stride := []byte(nil), 0
if d.nComp == nGrayComponent {
if d.nComp == 1 {
dst, stride = d.img1.Pix[8*(by*d.img1.Stride+bx):], d.img1.Stride
} else {
switch compIndex {
......@@ -286,6 +293,8 @@ func (d *decoder) processSOS(n int) error {
dst, stride = d.img3.Cb[8*(by*d.img3.CStride+bx):], d.img3.CStride
case 2:
dst, stride = d.img3.Cr[8*(by*d.img3.CStride+bx):], d.img3.CStride
case 3:
dst, stride = d.blackPix[8*(by*d.blackStride+bx):], d.blackStride
default:
return UnsupportedError("too many components")
}
......@@ -325,7 +334,7 @@ func (d *decoder) processSOS(n int) error {
// Reset the Huffman decoder.
d.bits = bits{}
// Reset the DC components, as per section F.2.1.3.1.
dc = [nColorComponent]int32{}
dc = [maxComponents]int32{}
// Reset the progressive decoder state, as per section G.1.2.2.
d.eobRun = 0
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment