Commit 2113c9ad authored by Martin Möhrmann's avatar Martin Möhrmann Committed by Nigel Tao

image/color: improve speed of RGBA methods

Apply the optimizations added to color conversion functions in
https://go-review.googlesource.com/#/c/21910/ to the RGBA methods.

YCbCrToRGBA/0-4      6.32ns ± 3%  6.58ns ± 2%   +4.15%  (p=0.000 n=20+19)
YCbCrToRGBA/128-4    8.02ns ± 2%  5.89ns ± 2%  -26.57%  (p=0.000 n=20+19)
YCbCrToRGBA/255-4    8.06ns ± 2%  6.59ns ± 3%  -18.18%  (p=0.000 n=20+20)
NYCbCrAToRGBA/0-4    8.71ns ± 2%  8.78ns ± 2%   +0.86%  (p=0.036 n=19+20)
NYCbCrAToRGBA/128-4  10.3ns ± 4%   7.9ns ± 2%  -23.44%  (p=0.000 n=20+20)
NYCbCrAToRGBA/255-4  9.64ns ± 2%  8.79ns ± 3%   -8.80%  (p=0.000 n=20+20)

Fixes: #15260

Change-Id: I225efdf74603e8d2b4f063054f7baee7a5029de6
Reviewed-on: https://go-review.googlesource.com/31773
Run-TryBot: Martin Möhrmann <martisch@uos.de>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarNigel Tao <nigeltao@golang.org>
parent 80a03464
...@@ -139,24 +139,39 @@ func (c YCbCr) RGBA() (uint32, uint32, uint32, uint32) { ...@@ -139,24 +139,39 @@ func (c YCbCr) RGBA() (uint32, uint32, uint32, uint32) {
yy1 := int32(c.Y) * 0x10100 // Convert 0x12 to 0x121200. yy1 := int32(c.Y) * 0x10100 // Convert 0x12 to 0x121200.
cb1 := int32(c.Cb) - 128 cb1 := int32(c.Cb) - 128
cr1 := int32(c.Cr) - 128 cr1 := int32(c.Cr) - 128
r := (yy1 + 91881*cr1) >> 8
g := (yy1 - 22554*cb1 - 46802*cr1) >> 8 // The bit twiddling below is equivalent to
b := (yy1 + 116130*cb1) >> 8 //
if r < 0 { // r := (yy1 + 91881*cr1) >> 8
r = 0 // if r < 0 {
} else if r > 0xffff { // r = 0
r = 0xffff // } else if r > 0xff {
// r = 0xffff
// }
//
// but uses fewer branches and is faster.
// The code below to compute g and b uses a similar pattern.
r := yy1 + 91881*cr1
if uint32(r)&0xff000000 == 0 {
r >>= 8
} else {
r = ^(r >> 31) & 0xffff
} }
if g < 0 {
g = 0 g := yy1 - 22554*cb1 - 46802*cr1
} else if g > 0xffff { if uint32(g)&0xff000000 == 0 {
g = 0xffff g >>= 8
} else {
g = ^(g >> 31) & 0xffff
} }
if b < 0 {
b = 0 b := yy1 + 116130*cb1
} else if b > 0xffff { if uint32(b)&0xff000000 == 0 {
b = 0xffff b >>= 8
} else {
b = ^(b >> 31) & 0xffff
} }
return uint32(r), uint32(g), uint32(b), 0xffff return uint32(r), uint32(g), uint32(b), 0xffff
} }
...@@ -184,23 +199,37 @@ func (c NYCbCrA) RGBA() (uint32, uint32, uint32, uint32) { ...@@ -184,23 +199,37 @@ func (c NYCbCrA) RGBA() (uint32, uint32, uint32, uint32) {
yy1 := int32(c.Y) * 0x10100 // Convert 0x12 to 0x121200. yy1 := int32(c.Y) * 0x10100 // Convert 0x12 to 0x121200.
cb1 := int32(c.Cb) - 128 cb1 := int32(c.Cb) - 128
cr1 := int32(c.Cr) - 128 cr1 := int32(c.Cr) - 128
r := (yy1 + 91881*cr1) >> 8
g := (yy1 - 22554*cb1 - 46802*cr1) >> 8 // The bit twiddling below is equivalent to
b := (yy1 + 116130*cb1) >> 8 //
if r < 0 { // r := (yy1 + 91881*cr1) >> 8
r = 0 // if r < 0 {
} else if r > 0xffff { // r = 0
r = 0xffff // } else if r > 0xff {
// r = 0xffff
// }
//
// but uses fewer branches and is faster.
// The code below to compute g and b uses a similar pattern.
r := yy1 + 91881*cr1
if uint32(r)&0xff000000 == 0 {
r >>= 8
} else {
r = ^(r >> 31) & 0xffff
} }
if g < 0 {
g = 0 g := yy1 - 22554*cb1 - 46802*cr1
} else if g > 0xffff { if uint32(g)&0xff000000 == 0 {
g = 0xffff g >>= 8
} else {
g = ^(g >> 31) & 0xffff
} }
if b < 0 {
b = 0 b := yy1 + 116130*cb1
} else if b > 0xffff { if uint32(b)&0xff000000 == 0 {
b = 0xffff b >>= 8
} else {
b = ^(b >> 31) & 0xffff
} }
// The second part of this method applies the alpha. // The second part of this method applies the alpha.
......
...@@ -172,7 +172,8 @@ func TestPalette(t *testing.T) { ...@@ -172,7 +172,8 @@ func TestPalette(t *testing.T) {
} }
} }
var sink uint8 var sink8 uint8
var sink32 uint32
func BenchmarkYCbCrToRGB(b *testing.B) { func BenchmarkYCbCrToRGB(b *testing.B) {
// YCbCrToRGB does saturating arithmetic. // YCbCrToRGB does saturating arithmetic.
...@@ -180,17 +181,17 @@ func BenchmarkYCbCrToRGB(b *testing.B) { ...@@ -180,17 +181,17 @@ func BenchmarkYCbCrToRGB(b *testing.B) {
// different paths through the generated code. // different paths through the generated code.
b.Run("0", func(b *testing.B) { b.Run("0", func(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
sink, sink, sink = YCbCrToRGB(0, 0, 0) sink8, sink8, sink8 = YCbCrToRGB(0, 0, 0)
} }
}) })
b.Run("128", func(b *testing.B) { b.Run("128", func(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
sink, sink, sink = YCbCrToRGB(128, 128, 128) sink8, sink8, sink8 = YCbCrToRGB(128, 128, 128)
} }
}) })
b.Run("255", func(b *testing.B) { b.Run("255", func(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
sink, sink, sink = YCbCrToRGB(255, 255, 255) sink8, sink8, sink8 = YCbCrToRGB(255, 255, 255)
} }
}) })
} }
...@@ -201,17 +202,65 @@ func BenchmarkRGBToYCbCr(b *testing.B) { ...@@ -201,17 +202,65 @@ func BenchmarkRGBToYCbCr(b *testing.B) {
// through the generated code. // through the generated code.
b.Run("0", func(b *testing.B) { b.Run("0", func(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
sink, sink, sink = RGBToYCbCr(0, 0, 0) sink8, sink8, sink8 = RGBToYCbCr(0, 0, 0)
} }
}) })
b.Run("Cb", func(b *testing.B) { b.Run("Cb", func(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
sink, sink, sink = RGBToYCbCr(0, 0, 255) sink8, sink8, sink8 = RGBToYCbCr(0, 0, 255)
} }
}) })
b.Run("Cr", func(b *testing.B) { b.Run("Cr", func(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
sink, sink, sink = RGBToYCbCr(255, 0, 0) sink8, sink8, sink8 = RGBToYCbCr(255, 0, 0)
}
})
}
func BenchmarkYCbCrToRGBA(b *testing.B) {
// RGB does saturating arithmetic.
// Low, middle, and high values can take
// different paths through the generated code.
b.Run("0", func(b *testing.B) {
c := YCbCr{0, 0, 0}
for i := 0; i < b.N; i++ {
sink32, sink32, sink32, sink32 = c.RGBA()
}
})
b.Run("128", func(b *testing.B) {
c := YCbCr{128, 128, 128}
for i := 0; i < b.N; i++ {
sink32, sink32, sink32, sink32 = c.RGBA()
}
})
b.Run("255", func(b *testing.B) {
c := YCbCr{255, 255, 255}
for i := 0; i < b.N; i++ {
sink32, sink32, sink32, sink32 = c.RGBA()
}
})
}
func BenchmarkNYCbCrAToRGBA(b *testing.B) {
// RGBA does saturating arithmetic.
// Low, middle, and high values can take
// different paths through the generated code.
b.Run("0", func(b *testing.B) {
c := NYCbCrA{YCbCr{0, 0, 0}, 0xff}
for i := 0; i < b.N; i++ {
sink32, sink32, sink32, sink32 = c.RGBA()
}
})
b.Run("128", func(b *testing.B) {
c := NYCbCrA{YCbCr{128, 128, 128}, 0xff}
for i := 0; i < b.N; i++ {
sink32, sink32, sink32, sink32 = c.RGBA()
}
})
b.Run("255", func(b *testing.B) {
c := NYCbCrA{YCbCr{255, 255, 255}, 0xff}
for i := 0; i < b.N; i++ {
sink32, sink32, sink32, sink32 = c.RGBA()
} }
}) })
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment