Commit 86dc86b4 authored by Keith Randall's avatar Keith Randall

cmd/compile: don't merge load+op if other op arg is still live

We want to merge a load and op into a single instruction

    l = LOAD ptr mem
    y = OP x l

into

    y = OPload x ptr mem

However, all of our OPload instructions require that y uses
the same register as x. If x is needed past this instruction, then
we must copy x somewhere else, losing the whole benefit of merging
the instructions in the first place.

Disable this optimization if x is live past the OP.

Also disable this optimization if the OP is in a deeper loop than the load.

Update #19595

Change-Id: I87f596aad7e91c9127bfb4705cbae47106e1e77a
Reviewed-on: https://go-review.googlesource.com/38337Reviewed-by: 's avatarIlya Tocar <ilya.tocar@intel.com>
parent d0ff9ece
......@@ -32,6 +32,7 @@ func TestAssembly(t *testing.T) {
}
defer os.RemoveAll(dir)
nameRegexp := regexp.MustCompile("func \\w+")
t.Run("platform", func(t *testing.T) {
for _, ats := range allAsmTests {
ats := ats
......@@ -40,9 +41,9 @@ func TestAssembly(t *testing.T) {
asm := ats.compileToAsm(tt, dir)
for i, at := range ats.tests {
fa := funcAsm(asm, i)
for _, at := range ats.tests {
funcName := nameRegexp.FindString(at.function)[5:]
fa := funcAsm(asm, funcName)
at.verifyAsm(tt, fa)
}
})
......@@ -50,13 +51,13 @@ func TestAssembly(t *testing.T) {
})
}
// funcAsm returns the assembly listing for f{funcIndex}
func funcAsm(asm string, funcIndex int) string {
if i := strings.Index(asm, fmt.Sprintf("TEXT\t\"\".f%d(SB)", funcIndex)); i >= 0 {
// funcAsm returns the assembly listing for the given function name.
func funcAsm(asm string, funcName string) string {
if i := strings.Index(asm, fmt.Sprintf("TEXT\t\"\".%s(SB)", funcName)); i >= 0 {
asm = asm[i:]
}
if i := strings.Index(asm, fmt.Sprintf("TEXT\t\"\".f%d(SB)", funcIndex+1)); i >= 0 {
if i := strings.Index(asm[1:], "TEXT\t\"\"."); i >= 0 {
asm = asm[:i+1]
}
......@@ -474,12 +475,12 @@ var linuxAMD64Tests = []*asmTest{
// Rotate after inlining (see issue 18254).
{
`
func g(x uint32, k uint) uint32 {
return x<<k | x>>(32-k)
}
func f32(x uint32) uint32 {
return g(x, 7)
}
func g(x uint32, k uint) uint32 {
return x<<k | x>>(32-k)
}
`,
[]string{"\tROLL\t[$]7,"},
},
......@@ -698,6 +699,26 @@ var linuxAMD64Tests = []*asmTest{
`,
[]string{"\tBSRQ\t"},
},
// see issue 19595.
// We want to merge load+op in f58, but not in f59.
{
`
func f58(p, q *int) {
x := *p
*q += x
}`,
[]string{"\tADDQ\t\\("},
},
{
`
func f59(p, q *int) {
x := *p
for i := 0; i < 10; i++ {
*q += x
}
}`,
[]string{"\tADDQ\t[A-Z]"},
},
}
var linux386Tests = []*asmTest{
......
......@@ -2025,34 +2025,34 @@
// Merge load and op
// TODO: add indexed variants?
(ADDQ x l:(MOVQload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDQmem x [off] {sym} ptr mem)
(ADDQ l:(MOVQload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDQmem x [off] {sym} ptr mem)
(ADDL x l:(MOVLload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDLmem x [off] {sym} ptr mem)
(ADDL l:(MOVLload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDLmem x [off] {sym} ptr mem)
(SUBQ x l:(MOVQload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (SUBQmem x [off] {sym} ptr mem)
(SUBL x l:(MOVLload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (SUBLmem x [off] {sym} ptr mem)
(ANDQ x l:(MOVQload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ANDQmem x [off] {sym} ptr mem)
(ANDQ l:(MOVQload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ANDQmem x [off] {sym} ptr mem)
(ANDL x l:(MOVLload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ANDLmem x [off] {sym} ptr mem)
(ANDL l:(MOVLload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ANDLmem x [off] {sym} ptr mem)
(ORQ x l:(MOVQload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ORQmem x [off] {sym} ptr mem)
(ORQ l:(MOVQload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ORQmem x [off] {sym} ptr mem)
(ORL x l:(MOVLload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ORLmem x [off] {sym} ptr mem)
(ORL l:(MOVLload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ORLmem x [off] {sym} ptr mem)
(XORQ x l:(MOVQload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (XORQmem x [off] {sym} ptr mem)
(XORQ l:(MOVQload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (XORQmem x [off] {sym} ptr mem)
(XORL x l:(MOVLload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (XORLmem x [off] {sym} ptr mem)
(XORL l:(MOVLload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (XORLmem x [off] {sym} ptr mem)
(ADDSD x l:(MOVSDload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDSDmem x [off] {sym} ptr mem)
(ADDSD l:(MOVSDload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDSDmem x [off] {sym} ptr mem)
(ADDSS x l:(MOVSSload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDSSmem x [off] {sym} ptr mem)
(ADDSS l:(MOVSSload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDSSmem x [off] {sym} ptr mem)
(SUBSD x l:(MOVSDload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (SUBSDmem x [off] {sym} ptr mem)
(SUBSS x l:(MOVSSload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (SUBSSmem x [off] {sym} ptr mem)
(MULSD x l:(MOVSDload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (MULSDmem x [off] {sym} ptr mem)
(MULSD l:(MOVSDload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (MULSDmem x [off] {sym} ptr mem)
(MULSS x l:(MOVSSload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (MULSSmem x [off] {sym} ptr mem)
(MULSS l:(MOVSSload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (MULSSmem x [off] {sym} ptr mem)
(ADDQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDQmem x [off] {sym} ptr mem)
(ADDQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDQmem x [off] {sym} ptr mem)
(ADDL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDLmem x [off] {sym} ptr mem)
(ADDL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDLmem x [off] {sym} ptr mem)
(SUBQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBQmem x [off] {sym} ptr mem)
(SUBL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBLmem x [off] {sym} ptr mem)
(ANDQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ANDQmem x [off] {sym} ptr mem)
(ANDQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ANDQmem x [off] {sym} ptr mem)
(ANDL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ANDLmem x [off] {sym} ptr mem)
(ANDL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ANDLmem x [off] {sym} ptr mem)
(ORQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ORQmem x [off] {sym} ptr mem)
(ORQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ORQmem x [off] {sym} ptr mem)
(ORL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ORLmem x [off] {sym} ptr mem)
(ORL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ORLmem x [off] {sym} ptr mem)
(XORQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (XORQmem x [off] {sym} ptr mem)
(XORQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (XORQmem x [off] {sym} ptr mem)
(XORL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (XORLmem x [off] {sym} ptr mem)
(XORL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (XORLmem x [off] {sym} ptr mem)
(ADDSD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSDmem x [off] {sym} ptr mem)
(ADDSD l:(MOVSDload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSDmem x [off] {sym} ptr mem)
(ADDSS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSSmem x [off] {sym} ptr mem)
(ADDSS l:(MOVSSload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSSmem x [off] {sym} ptr mem)
(SUBSD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBSDmem x [off] {sym} ptr mem)
(SUBSS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBSSmem x [off] {sym} ptr mem)
(MULSD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (MULSDmem x [off] {sym} ptr mem)
(MULSD l:(MOVSDload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (MULSDmem x [off] {sym} ptr mem)
(MULSS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (MULSSmem x [off] {sym} ptr mem)
(MULSS l:(MOVSSload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (MULSSmem x [off] {sym} ptr mem)
// Merge ADDQconst and LEAQ into atomic loads.
(MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
......
......@@ -443,6 +443,14 @@ func (ln *loopnest) findExits() {
ln.initializedExits = true
}
// depth returns the loop nesting level of block b.
func (ln *loopnest) depth(b ID) int16 {
if l := ln.b2l[b]; l != nil {
return l.depth
}
return 0
}
// recordIfExit checks sl (the loop containing b) to see if it
// is outside of loop l, and if so, records b as an exit block
// from l and returns true.
......
......@@ -153,11 +153,36 @@ func canMergeSym(x, y interface{}) bool {
// canMergeLoad reports whether the load can be merged into target without
// invalidating the schedule.
func canMergeLoad(target, load *Value) bool {
// It also checks that the other non-load argument x is something we
// are ok with clobbering (all our current load+op instructions clobber
// their input register).
func canMergeLoad(target, load, x *Value) bool {
if target.Block.ID != load.Block.ID {
// If the load is in a different block do not merge it.
return false
}
// We can't merge the load into the target if the load
// has more than one use.
if load.Uses != 1 {
return false
}
// The register containing x is going to get clobbered.
// Don't merge if we still need the value of x.
// We don't have liveness information here, but we can
// approximate x dying with:
// 1) target is x's only use.
// 2) target is not in a deeper loop than x.
if x.Uses != 1 {
return false
}
loopnest := x.Block.Func.loopnest()
loopnest.calculateDepths()
if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
return false
}
mem := load.MemoryArg()
// We need the load's memory arg to still be alive at target. That
......@@ -258,6 +283,7 @@ search:
}
}
}
return true
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment