Commit 86dc86b4 authored by Keith Randall's avatar Keith Randall

cmd/compile: don't merge load+op if other op arg is still live

We want to merge a load and op into a single instruction

    l = LOAD ptr mem
    y = OP x l

into

    y = OPload x ptr mem

However, all of our OPload instructions require that y uses
the same register as x. If x is needed past this instruction, then
we must copy x somewhere else, losing the whole benefit of merging
the instructions in the first place.

Disable this optimization if x is live past the OP.

Also disable this optimization if the OP is in a deeper loop than the load.

Update #19595

Change-Id: I87f596aad7e91c9127bfb4705cbae47106e1e77a
Reviewed-on: https://go-review.googlesource.com/38337Reviewed-by: 's avatarIlya Tocar <ilya.tocar@intel.com>
parent d0ff9ece
...@@ -32,6 +32,7 @@ func TestAssembly(t *testing.T) { ...@@ -32,6 +32,7 @@ func TestAssembly(t *testing.T) {
} }
defer os.RemoveAll(dir) defer os.RemoveAll(dir)
nameRegexp := regexp.MustCompile("func \\w+")
t.Run("platform", func(t *testing.T) { t.Run("platform", func(t *testing.T) {
for _, ats := range allAsmTests { for _, ats := range allAsmTests {
ats := ats ats := ats
...@@ -40,9 +41,9 @@ func TestAssembly(t *testing.T) { ...@@ -40,9 +41,9 @@ func TestAssembly(t *testing.T) {
asm := ats.compileToAsm(tt, dir) asm := ats.compileToAsm(tt, dir)
for i, at := range ats.tests { for _, at := range ats.tests {
fa := funcAsm(asm, i) funcName := nameRegexp.FindString(at.function)[5:]
fa := funcAsm(asm, funcName)
at.verifyAsm(tt, fa) at.verifyAsm(tt, fa)
} }
}) })
...@@ -50,13 +51,13 @@ func TestAssembly(t *testing.T) { ...@@ -50,13 +51,13 @@ func TestAssembly(t *testing.T) {
}) })
} }
// funcAsm returns the assembly listing for f{funcIndex} // funcAsm returns the assembly listing for the given function name.
func funcAsm(asm string, funcIndex int) string { func funcAsm(asm string, funcName string) string {
if i := strings.Index(asm, fmt.Sprintf("TEXT\t\"\".f%d(SB)", funcIndex)); i >= 0 { if i := strings.Index(asm, fmt.Sprintf("TEXT\t\"\".%s(SB)", funcName)); i >= 0 {
asm = asm[i:] asm = asm[i:]
} }
if i := strings.Index(asm, fmt.Sprintf("TEXT\t\"\".f%d(SB)", funcIndex+1)); i >= 0 { if i := strings.Index(asm[1:], "TEXT\t\"\"."); i >= 0 {
asm = asm[:i+1] asm = asm[:i+1]
} }
...@@ -474,12 +475,12 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -474,12 +475,12 @@ var linuxAMD64Tests = []*asmTest{
// Rotate after inlining (see issue 18254). // Rotate after inlining (see issue 18254).
{ {
` `
func g(x uint32, k uint) uint32 {
return x<<k | x>>(32-k)
}
func f32(x uint32) uint32 { func f32(x uint32) uint32 {
return g(x, 7) return g(x, 7)
} }
func g(x uint32, k uint) uint32 {
return x<<k | x>>(32-k)
}
`, `,
[]string{"\tROLL\t[$]7,"}, []string{"\tROLL\t[$]7,"},
}, },
...@@ -698,6 +699,26 @@ var linuxAMD64Tests = []*asmTest{ ...@@ -698,6 +699,26 @@ var linuxAMD64Tests = []*asmTest{
`, `,
[]string{"\tBSRQ\t"}, []string{"\tBSRQ\t"},
}, },
// see issue 19595.
// We want to merge load+op in f58, but not in f59.
{
`
func f58(p, q *int) {
x := *p
*q += x
}`,
[]string{"\tADDQ\t\\("},
},
{
`
func f59(p, q *int) {
x := *p
for i := 0; i < 10; i++ {
*q += x
}
}`,
[]string{"\tADDQ\t[A-Z]"},
},
} }
var linux386Tests = []*asmTest{ var linux386Tests = []*asmTest{
......
...@@ -2025,34 +2025,34 @@ ...@@ -2025,34 +2025,34 @@
// Merge load and op // Merge load and op
// TODO: add indexed variants? // TODO: add indexed variants?
(ADDQ x l:(MOVQload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDQmem x [off] {sym} ptr mem) (ADDQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDQmem x [off] {sym} ptr mem)
(ADDQ l:(MOVQload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDQmem x [off] {sym} ptr mem) (ADDQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDQmem x [off] {sym} ptr mem)
(ADDL x l:(MOVLload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDLmem x [off] {sym} ptr mem) (ADDL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDLmem x [off] {sym} ptr mem)
(ADDL l:(MOVLload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDLmem x [off] {sym} ptr mem) (ADDL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDLmem x [off] {sym} ptr mem)
(SUBQ x l:(MOVQload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (SUBQmem x [off] {sym} ptr mem) (SUBQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBQmem x [off] {sym} ptr mem)
(SUBL x l:(MOVLload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (SUBLmem x [off] {sym} ptr mem) (SUBL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBLmem x [off] {sym} ptr mem)
(ANDQ x l:(MOVQload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ANDQmem x [off] {sym} ptr mem) (ANDQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ANDQmem x [off] {sym} ptr mem)
(ANDQ l:(MOVQload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ANDQmem x [off] {sym} ptr mem) (ANDQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ANDQmem x [off] {sym} ptr mem)
(ANDL x l:(MOVLload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ANDLmem x [off] {sym} ptr mem) (ANDL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ANDLmem x [off] {sym} ptr mem)
(ANDL l:(MOVLload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ANDLmem x [off] {sym} ptr mem) (ANDL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ANDLmem x [off] {sym} ptr mem)
(ORQ x l:(MOVQload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ORQmem x [off] {sym} ptr mem) (ORQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ORQmem x [off] {sym} ptr mem)
(ORQ l:(MOVQload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ORQmem x [off] {sym} ptr mem) (ORQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ORQmem x [off] {sym} ptr mem)
(ORL x l:(MOVLload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ORLmem x [off] {sym} ptr mem) (ORL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ORLmem x [off] {sym} ptr mem)
(ORL l:(MOVLload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ORLmem x [off] {sym} ptr mem) (ORL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ORLmem x [off] {sym} ptr mem)
(XORQ x l:(MOVQload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (XORQmem x [off] {sym} ptr mem) (XORQ x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (XORQmem x [off] {sym} ptr mem)
(XORQ l:(MOVQload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (XORQmem x [off] {sym} ptr mem) (XORQ l:(MOVQload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (XORQmem x [off] {sym} ptr mem)
(XORL x l:(MOVLload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (XORLmem x [off] {sym} ptr mem) (XORL x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (XORLmem x [off] {sym} ptr mem)
(XORL l:(MOVLload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (XORLmem x [off] {sym} ptr mem) (XORL l:(MOVLload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (XORLmem x [off] {sym} ptr mem)
(ADDSD x l:(MOVSDload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDSDmem x [off] {sym} ptr mem) (ADDSD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSDmem x [off] {sym} ptr mem)
(ADDSD l:(MOVSDload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDSDmem x [off] {sym} ptr mem) (ADDSD l:(MOVSDload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSDmem x [off] {sym} ptr mem)
(ADDSS x l:(MOVSSload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDSSmem x [off] {sym} ptr mem) (ADDSS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSSmem x [off] {sym} ptr mem)
(ADDSS l:(MOVSSload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (ADDSSmem x [off] {sym} ptr mem) (ADDSS l:(MOVSSload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (ADDSSmem x [off] {sym} ptr mem)
(SUBSD x l:(MOVSDload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (SUBSDmem x [off] {sym} ptr mem) (SUBSD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBSDmem x [off] {sym} ptr mem)
(SUBSS x l:(MOVSSload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (SUBSSmem x [off] {sym} ptr mem) (SUBSS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (SUBSSmem x [off] {sym} ptr mem)
(MULSD x l:(MOVSDload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (MULSDmem x [off] {sym} ptr mem) (MULSD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (MULSDmem x [off] {sym} ptr mem)
(MULSD l:(MOVSDload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (MULSDmem x [off] {sym} ptr mem) (MULSD l:(MOVSDload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (MULSDmem x [off] {sym} ptr mem)
(MULSS x l:(MOVSSload [off] {sym} ptr mem)) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (MULSSmem x [off] {sym} ptr mem) (MULSS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> (MULSSmem x [off] {sym} ptr mem)
(MULSS l:(MOVSSload [off] {sym} ptr mem) x) && l.Uses == 1 && canMergeLoad(v, l) && clobber(l) -> (MULSSmem x [off] {sym} ptr mem) (MULSS l:(MOVSSload [off] {sym} ptr mem) x) && canMergeLoad(v, l, x) && clobber(l) -> (MULSSmem x [off] {sym} ptr mem)
// Merge ADDQconst and LEAQ into atomic loads. // Merge ADDQconst and LEAQ into atomic loads.
(MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
......
...@@ -443,6 +443,14 @@ func (ln *loopnest) findExits() { ...@@ -443,6 +443,14 @@ func (ln *loopnest) findExits() {
ln.initializedExits = true ln.initializedExits = true
} }
// depth returns the loop nesting level of block b.
func (ln *loopnest) depth(b ID) int16 {
if l := ln.b2l[b]; l != nil {
return l.depth
}
return 0
}
// recordIfExit checks sl (the loop containing b) to see if it // recordIfExit checks sl (the loop containing b) to see if it
// is outside of loop l, and if so, records b as an exit block // is outside of loop l, and if so, records b as an exit block
// from l and returns true. // from l and returns true.
......
...@@ -153,11 +153,36 @@ func canMergeSym(x, y interface{}) bool { ...@@ -153,11 +153,36 @@ func canMergeSym(x, y interface{}) bool {
// canMergeLoad reports whether the load can be merged into target without // canMergeLoad reports whether the load can be merged into target without
// invalidating the schedule. // invalidating the schedule.
func canMergeLoad(target, load *Value) bool { // It also checks that the other non-load argument x is something we
// are ok with clobbering (all our current load+op instructions clobber
// their input register).
func canMergeLoad(target, load, x *Value) bool {
if target.Block.ID != load.Block.ID { if target.Block.ID != load.Block.ID {
// If the load is in a different block do not merge it. // If the load is in a different block do not merge it.
return false return false
} }
// We can't merge the load into the target if the load
// has more than one use.
if load.Uses != 1 {
return false
}
// The register containing x is going to get clobbered.
// Don't merge if we still need the value of x.
// We don't have liveness information here, but we can
// approximate x dying with:
// 1) target is x's only use.
// 2) target is not in a deeper loop than x.
if x.Uses != 1 {
return false
}
loopnest := x.Block.Func.loopnest()
loopnest.calculateDepths()
if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
return false
}
mem := load.MemoryArg() mem := load.MemoryArg()
// We need the load's memory arg to still be alive at target. That // We need the load's memory arg to still be alive at target. That
...@@ -258,6 +283,7 @@ search: ...@@ -258,6 +283,7 @@ search:
} }
} }
} }
return true return true
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment