Commit 10f75748 authored by Ilya Tocar's avatar Ilya Tocar

cmd/compile/internal/ssa: generate bswap on AMD64

Generate bswap+load/store for reading/writing big endian data.
Helps encoding/binary.

name                    old time/op    new time/op    delta
ReadSlice1000Int32s-8     5.06µs ± 8%    4.58µs ± 8%   -9.50%        (p=0.000 n=10+10)
ReadStruct-8              1.07µs ± 0%    1.05µs ± 0%   -1.51%         (p=0.000 n=9+10)
ReadInts-8                 367ns ± 0%     363ns ± 0%   -1.15%          (p=0.000 n=8+9)
WriteInts-8                475ns ± 1%     469ns ± 0%   -1.45%        (p=0.000 n=10+10)
WriteSlice1000Int32s-8    5.03µs ± 3%    4.50µs ± 3%  -10.45%          (p=0.000 n=9+9)
PutUvarint32-8            17.2ns ± 0%    17.2ns ± 0%     ~     (all samples are equal)
PutUvarint64-8            46.7ns ± 0%    46.7ns ± 0%     ~           (p=0.509 n=10+10)

name                    old speed      new speed      delta
ReadSlice1000Int32s-8    791MB/s ± 8%   875MB/s ± 8%  +10.53%        (p=0.000 n=10+10)
ReadStruct-8            70.0MB/s ± 0%  71.1MB/s ± 0%   +1.54%         (p=0.000 n=9+10)
ReadInts-8              81.6MB/s ± 0%  82.6MB/s ± 0%   +1.21%          (p=0.000 n=9+9)
WriteInts-8             63.0MB/s ± 1%  63.9MB/s ± 0%   +1.45%        (p=0.000 n=10+10)
WriteSlice1000Int32s-8   796MB/s ± 4%   888MB/s ± 3%  +11.65%          (p=0.000 n=9+9)
PutUvarint32-8           233MB/s ± 0%   233MB/s ± 0%     ~           (p=0.089 n=10+10)
PutUvarint64-8           171MB/s ± 0%   171MB/s ± 0%     ~            (p=0.137 n=10+9)

Change-Id: Ia2dbdef92198eaa7e2af5443a8ed586d4b401ffb
Reviewed-on: https://go-review.googlesource.com/32222
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarKeith Randall <khr@golang.org>
parent ca5cea9d
......@@ -157,6 +157,38 @@ func f(b []byte, i int) uint32 {
`,
[]string{"\tMOVL\t\\(.*\\)\\(.*\\*1\\),"},
},
{"amd64", "linux", `
import "encoding/binary"
func f(b []byte) uint64 {
return binary.BigEndian.Uint64(b)
}
`,
[]string{"\tBSWAPQ\t"},
},
{"amd64", "linux", `
import "encoding/binary"
func f(b []byte, i int) uint64 {
return binary.BigEndian.Uint64(b[i:])
}
`,
[]string{"\tBSWAPQ\t"},
},
{"amd64", "linux", `
import "encoding/binary"
func f(b []byte) uint32 {
return binary.BigEndian.Uint32(b)
}
`,
[]string{"\tBSWAPL\t"},
},
{"amd64", "linux", `
import "encoding/binary"
func f(b []byte, i int) uint32 {
return binary.BigEndian.Uint32(b[i:])
}
`,
[]string{"\tBSWAPL\t"},
},
{"386", "linux", `
import "encoding/binary"
func f(b []byte) uint32 {
......
......@@ -1507,6 +1507,204 @@
&& clobber(o5)
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQloadidx1 <v.Type> [i] {s} p idx mem)
// Combine byte loads + shifts into larger (unaligned) loads + bswap
(ORL o1:(ORL o0:(ORL
x0:(MOVBload [i] {s} p mem)
s0:(SHLLconst [8] x1:(MOVBload [i-1] {s} p mem)))
s1:(SHLLconst [16] x2:(MOVBload [i-2] {s} p mem)))
s2:(SHLLconst [24] x3:(MOVBload [i-3] {s} p mem)))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& x3.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& s2.Uses == 1
&& o0.Uses == 1
&& o1.Uses == 1
&& mergePoint(b,x0,x1,x2,x3) != nil
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
&& clobber(x3)
&& clobber(s0)
&& clobber(s1)
&& clobber(s2)
&& clobber(o0)
&& clobber(o1)
-> @mergePoint(b,x0,x1,x2,x3) (BSWAPL <v.Type> (MOVLload [i-3] {s} p mem))
(ORL o1:(ORL o0:(ORL
x0:(MOVBloadidx1 [i] {s} p idx mem)
s0:(SHLLconst [8] x1:(MOVBloadidx1 [i-1] {s} p idx mem)))
s1:(SHLLconst [16] x2:(MOVBloadidx1 [i-2] {s} p idx mem)))
s2:(SHLLconst [24] x3:(MOVBloadidx1 [i-3] {s} p idx mem)))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& x3.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& s2.Uses == 1
&& o0.Uses == 1
&& o1.Uses == 1
&& mergePoint(b,x0,x1,x2,x3) != nil
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
&& clobber(x3)
&& clobber(s0)
&& clobber(s1)
&& clobber(s2)
&& clobber(o0)
&& clobber(o1)
-> @mergePoint(b,x0,x1,x2,x3) (BSWAPL <v.Type> (MOVLloadidx1 <v.Type> [i-3] {s} p idx mem))
(ORQ o5:(ORQ o4:(ORQ o3:(ORQ o2:(ORQ o1:(ORQ o0:(ORQ
x0:(MOVBload [i] {s} p mem)
s0:(SHLQconst [8] x1:(MOVBload [i-1] {s} p mem)))
s1:(SHLQconst [16] x2:(MOVBload [i-2] {s} p mem)))
s2:(SHLQconst [24] x3:(MOVBload [i-3] {s} p mem)))
s3:(SHLQconst [32] x4:(MOVBload [i-4] {s} p mem)))
s4:(SHLQconst [40] x5:(MOVBload [i-5] {s} p mem)))
s5:(SHLQconst [48] x6:(MOVBload [i-6] {s} p mem)))
s6:(SHLQconst [56] x7:(MOVBload [i-7] {s} p mem)))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& x3.Uses == 1
&& x4.Uses == 1
&& x5.Uses == 1
&& x6.Uses == 1
&& x7.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& s2.Uses == 1
&& s3.Uses == 1
&& s4.Uses == 1
&& s5.Uses == 1
&& s6.Uses == 1
&& o0.Uses == 1
&& o1.Uses == 1
&& o2.Uses == 1
&& o3.Uses == 1
&& o4.Uses == 1
&& o5.Uses == 1
&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
&& clobber(x3)
&& clobber(x4)
&& clobber(x5)
&& clobber(x6)
&& clobber(x7)
&& clobber(s0)
&& clobber(s1)
&& clobber(s2)
&& clobber(s3)
&& clobber(s4)
&& clobber(s5)
&& clobber(s6)
&& clobber(o0)
&& clobber(o1)
&& clobber(o2)
&& clobber(o3)
&& clobber(o4)
&& clobber(o5)
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (BSWAPQ <v.Type> (MOVQload [i-7] {s} p mem))
(ORQ o5:(ORQ o4:(ORQ o3:(ORQ o2:(ORQ o1:(ORQ o0:(ORQ
x0:(MOVBloadidx1 [i] {s} p idx mem)
s0:(SHLQconst [8] x1:(MOVBloadidx1 [i-1] {s} p idx mem)))
s1:(SHLQconst [16] x2:(MOVBloadidx1 [i-2] {s} p idx mem)))
s2:(SHLQconst [24] x3:(MOVBloadidx1 [i-3] {s} p idx mem)))
s3:(SHLQconst [32] x4:(MOVBloadidx1 [i-4] {s} p idx mem)))
s4:(SHLQconst [40] x5:(MOVBloadidx1 [i-5] {s} p idx mem)))
s5:(SHLQconst [48] x6:(MOVBloadidx1 [i-6] {s} p idx mem)))
s6:(SHLQconst [56] x7:(MOVBloadidx1 [i-7] {s} p idx mem)))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& x3.Uses == 1
&& x4.Uses == 1
&& x5.Uses == 1
&& x6.Uses == 1
&& x7.Uses == 1
&& s0.Uses == 1
&& s1.Uses == 1
&& s2.Uses == 1
&& s3.Uses == 1
&& s4.Uses == 1
&& s5.Uses == 1
&& s6.Uses == 1
&& o0.Uses == 1
&& o1.Uses == 1
&& o2.Uses == 1
&& o3.Uses == 1
&& o4.Uses == 1
&& o5.Uses == 1
&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
&& clobber(x3)
&& clobber(x4)
&& clobber(x5)
&& clobber(x6)
&& clobber(x7)
&& clobber(s0)
&& clobber(s1)
&& clobber(s2)
&& clobber(s3)
&& clobber(s4)
&& clobber(s5)
&& clobber(s6)
&& clobber(o0)
&& clobber(o1)
&& clobber(o2)
&& clobber(o3)
&& clobber(o4)
&& clobber(o5)
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (BSWAPQ <v.Type> (MOVQloadidx1 <v.Type> [i-7] {s} p idx mem))
// Combine stores + shifts into bswap and larger (unaligned) stores
(MOVBstore [i] {s} p w
x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w)
x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w)
x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
-> (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
(MOVBstore [i] {s} p w
x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w)
x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w)
x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w)
x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w)
x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w)
x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w)
x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& x3.Uses == 1
&& x4.Uses == 1
&& x5.Uses == 1
&& x6.Uses == 1
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
&& clobber(x3)
&& clobber(x4)
&& clobber(x5)
&& clobber(x6)
-> (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
// Combine constant stores into larger (unaligned) stores.
(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
&& x.Uses == 1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment