Commit 1cfb5c3f authored by Michael Munday's avatar Michael Munday

cmd/compile: merge loads into operations on s390x

Adds the new canMergeLoad function which can be used by rules to
decide whether a load can be merged into an operation. The function
ensures that the merge will not reorder the load relative to memory
operations (for example, stores) in such a way that the block can no
longer be scheduled.

This new function enables transformations such as:

MOVD 0(R1), R2
ADD  R2, R3

to:

ADD  0(R1), R3

The two-operand form of the following instructions can now read a
single memory operand:

 - ADD
 - ADDC
 - ADDW
 - MULLD
 - MULLW
 - SUB
 - SUBC
 - SUBE
 - SUBW
 - AND
 - ANDW
 - OR
 - ORW
 - XOR
 - XORW

Improves SHA3 performance by 6-8%.

Updates #15054.

Change-Id: Ibcb9122126cd1a26f2c01c0dfdbb42fe5e7b5b94
Reviewed-on: https://go-review.googlesource.com/29272
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarKeith Randall <khr@golang.org>
parent 92221fe8
......@@ -144,6 +144,35 @@ TEXT main·foo(SB),7,$16-0 // TEXT main.foo(SB), 7, $16-0
XORW $65536, R1 // c01700010000
XORW $-2, R1 // c017fffffffe
ADD -524288(R1), R2 // e32010008008
ADD 524287(R3), R4 // e3403fff7f08
ADD -524289(R1), R2 // c0a1fff7ffffe32a10000008
ADD 524288(R3), R4 // c0a100080000e34a30000008
ADD -524289(R1)(R2*1), R3 // c0a1fff7ffff41aa2000e33a10000008
ADD 524288(R3)(R4*1), R5 // c0a10008000041aa4000e35a30000008
ADDC (R1), R2 // e3201000000a
ADDW (R5), R6 // 5a605000
ADDW 4095(R7), R8 // 5a807fff
ADDW -1(R1), R2 // e3201fffff5a
ADDW 4096(R3), R4 // e3403000015a
MULLD (R1)(R2*1), R3 // e3321000000c
MULLW (R3)(R4*1), R5 // 71543000
MULLW 4096(R3), R4 // e34030000151
SUB (R1), R2 // e32010000009
SUBC (R1), R2 // e3201000000b
SUBE (R1), R2 // e32010000089
SUBW (R1), R2 // 5b201000
SUBW -1(R1), R2 // e3201fffff5b
AND (R1), R2 // e32010000080
ANDW (R1), R2 // 54201000
ANDW -1(R1), R2 // e3201fffff54
OR (R1), R2 // e32010000081
ORW (R1), R2 // 56201000
ORW -1(R1), R2 // e3201fffff56
XOR (R1), R2 // e32010000082
XORW (R1), R2 // 57201000
XORW -1(R1), R2 // e3201fffff57
LAA R1, R2, 524287(R3) // eb213fff7ff8
LAAG R4, R5, -524288(R6) // eb54600080e8
LAAL R7, R8, 8192(R9) // eb87900002fa
......
......@@ -333,6 +333,22 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.From.Val = math.Float64frombits(uint64(v.AuxInt))
p.To.Type = obj.TYPE_REG
p.To.Reg = x
case ssa.OpS390XADDWload, ssa.OpS390XADDload,
ssa.OpS390XMULLWload, ssa.OpS390XMULLDload,
ssa.OpS390XSUBWload, ssa.OpS390XSUBload,
ssa.OpS390XANDWload, ssa.OpS390XANDload,
ssa.OpS390XORWload, ssa.OpS390XORload,
ssa.OpS390XXORWload, ssa.OpS390XXORload:
r := v.Reg()
if r != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
}
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[1].Reg()
gc.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = r
case ssa.OpS390XMOVDload,
ssa.OpS390XMOVWZload, ssa.OpS390XMOVHZload, ssa.OpS390XMOVBZload,
ssa.OpS390XMOVDBRload, ssa.OpS390XMOVWBRload, ssa.OpS390XMOVHBRload,
......
......@@ -947,6 +947,77 @@
(XOR x x) -> (MOVDconst [0])
(XORW x x) -> (MOVDconst [0])
// Fold memory operations into operations.
// Exclude global data (SB) because these instructions cannot handle relative addresses.
// TODO(mundaym): use LARL in the assembler to handle SB?
// TODO(mundaym): indexed versions of these?
(ADD <t> x g:(MOVDload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ADDload <t> [off] {sym} x ptr mem)
(ADD <t> g:(MOVDload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ADDload <t> [off] {sym} x ptr mem)
(ADDW <t> x g:(MOVWload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ADDWload <t> [off] {sym} x ptr mem)
(ADDW <t> g:(MOVWload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ADDWload <t> [off] {sym} x ptr mem)
(ADDW <t> x g:(MOVWZload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ADDWload <t> [off] {sym} x ptr mem)
(ADDW <t> g:(MOVWZload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ADDWload <t> [off] {sym} x ptr mem)
(MULLD <t> x g:(MOVDload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (MULLDload <t> [off] {sym} x ptr mem)
(MULLD <t> g:(MOVDload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (MULLDload <t> [off] {sym} x ptr mem)
(MULLW <t> x g:(MOVWload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (MULLWload <t> [off] {sym} x ptr mem)
(MULLW <t> g:(MOVWload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (MULLWload <t> [off] {sym} x ptr mem)
(MULLW <t> x g:(MOVWZload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (MULLWload <t> [off] {sym} x ptr mem)
(MULLW <t> g:(MOVWZload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (MULLWload <t> [off] {sym} x ptr mem)
(SUB <t> x g:(MOVDload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (SUBload <t> [off] {sym} x ptr mem)
(SUBW <t> x g:(MOVWload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (SUBWload <t> [off] {sym} x ptr mem)
(SUBW <t> x g:(MOVWZload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (SUBWload <t> [off] {sym} x ptr mem)
(AND <t> x g:(MOVDload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ANDload <t> [off] {sym} x ptr mem)
(AND <t> g:(MOVDload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ANDload <t> [off] {sym} x ptr mem)
(ANDW <t> x g:(MOVWload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ANDWload <t> [off] {sym} x ptr mem)
(ANDW <t> g:(MOVWload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ANDWload <t> [off] {sym} x ptr mem)
(ANDW <t> x g:(MOVWZload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ANDWload <t> [off] {sym} x ptr mem)
(ANDW <t> g:(MOVWZload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ANDWload <t> [off] {sym} x ptr mem)
(OR <t> x g:(MOVDload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ORload <t> [off] {sym} x ptr mem)
(OR <t> g:(MOVDload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ORload <t> [off] {sym} x ptr mem)
(ORW <t> x g:(MOVWload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ORWload <t> [off] {sym} x ptr mem)
(ORW <t> g:(MOVWload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ORWload <t> [off] {sym} x ptr mem)
(ORW <t> x g:(MOVWZload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ORWload <t> [off] {sym} x ptr mem)
(ORW <t> g:(MOVWZload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (ORWload <t> [off] {sym} x ptr mem)
(XOR <t> x g:(MOVDload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (XORload <t> [off] {sym} x ptr mem)
(XOR <t> g:(MOVDload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (XORload <t> [off] {sym} x ptr mem)
(XORW <t> x g:(MOVWload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (XORWload <t> [off] {sym} x ptr mem)
(XORW <t> g:(MOVWload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (XORWload <t> [off] {sym} x ptr mem)
(XORW <t> x g:(MOVWZload [off] {sym} ptr mem)) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (XORWload <t> [off] {sym} x ptr mem)
(XORW <t> g:(MOVWZload [off] {sym} ptr mem) x) && g.Uses == 1 && ptr.Op != OpSB && is20Bit(off) && canMergeLoad(v, g) && clobber(g)
-> (XORWload <t> [off] {sym} x ptr mem)
// Combine constant stores into larger (unaligned) stores.
// It doesn't work to global data (based on SB),
// because STGRL doesn't support unaligned address
......
......@@ -1265,14 +1265,20 @@ const (
OpS390XADDW
OpS390XADDconst
OpS390XADDWconst
OpS390XADDload
OpS390XADDWload
OpS390XSUB
OpS390XSUBW
OpS390XSUBconst
OpS390XSUBWconst
OpS390XSUBload
OpS390XSUBWload
OpS390XMULLD
OpS390XMULLW
OpS390XMULLDconst
OpS390XMULLWconst
OpS390XMULLDload
OpS390XMULLWload
OpS390XMULHD
OpS390XMULHDU
OpS390XDIVD
......@@ -1287,14 +1293,20 @@ const (
OpS390XANDW
OpS390XANDconst
OpS390XANDWconst
OpS390XANDload
OpS390XANDWload
OpS390XOR
OpS390XORW
OpS390XORconst
OpS390XORWconst
OpS390XORload
OpS390XORWload
OpS390XXOR
OpS390XXORW
OpS390XXORconst
OpS390XXORWconst
OpS390XXORload
OpS390XXORWload
OpS390XCMP
OpS390XCMPW
OpS390XCMPU
......@@ -15762,6 +15774,42 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "ADDload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AADD,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "ADDWload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AADDW,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "SUB",
argLen: 2,
......@@ -15824,6 +15872,42 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "SUBload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.ASUB,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "SUBWload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.ASUBW,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "MULLD",
argLen: 2,
......@@ -15890,6 +15974,42 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "MULLDload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AMULLD,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "MULLWload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AMULLW,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "MULHD",
argLen: 2,
......@@ -16114,6 +16234,42 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "ANDload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AAND,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "ANDWload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AANDW,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "OR",
argLen: 2,
......@@ -16178,6 +16334,42 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "ORload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AOR,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "ORWload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AORW,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "XOR",
argLen: 2,
......@@ -16242,6 +16434,42 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "XORload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AXOR,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "XORWload",
auxType: auxSymOff,
argLen: 3,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
asm: s390x.AXORW,
reg: regInfo{
inputs: []inputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
{1, 54270}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 SP
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "CMP",
argLen: 2,
......
......@@ -149,6 +149,116 @@ func canMergeSym(x, y interface{}) bool {
return x == nil || y == nil
}
// canMergeLoad reports whether the load can be merged into target without
// invalidating the schedule.
func canMergeLoad(target, load *Value) bool {
if target.Block.ID != load.Block.ID {
// If the load is in a different block do not merge it.
return false
}
mem := load.Args[len(load.Args)-1]
// We need the load's memory arg to still be alive at target. That
// can't be the case if one of target's args depends on a memory
// state that is a successor of load's memory arg.
//
// For example, it would be invalid to merge load into target in
// the following situation because newmem has killed oldmem
// before target is reached:
// load = read ... oldmem
// newmem = write ... oldmem
// arg0 = read ... newmem
// target = add arg0 load
//
// If the argument comes from a different block then we can exclude
// it immediately because it must dominate load (which is in the
// same block as target).
var args []*Value
for _, a := range target.Args {
if a != load && a.Block.ID == target.Block.ID {
args = append(args, a)
}
}
// memPreds contains memory states known to be predecessors of load's
// memory state. It is lazily initialized.
var memPreds map[*Value]bool
search:
for i := 0; len(args) > 0; i++ {
const limit = 100
if i >= limit {
// Give up if we have done a lot of iterations.
return false
}
v := args[len(args)-1]
args = args[:len(args)-1]
if target.Block.ID != v.Block.ID {
// Since target and load are in the same block
// we can stop searching when we leave the block.
continue search
}
if v.Op == OpPhi {
// A Phi implies we have reached the top of the block.
continue search
}
if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
// We could handle this situation however it is likely
// to be very rare.
return false
}
if v.Type.IsMemory() {
if memPreds == nil {
// Initialise a map containing memory states
// known to be predecessors of load's memory
// state.
memPreds = make(map[*Value]bool)
m := mem
const limit = 50
for i := 0; i < limit; i++ {
if m.Op == OpPhi {
break
}
if m.Block.ID != target.Block.ID {
break
}
if !m.Type.IsMemory() {
break
}
memPreds[m] = true
if len(m.Args) == 0 {
break
}
m = m.Args[len(m.Args)-1]
}
}
// We can merge if v is a predecessor of mem.
//
// For example, we can merge load into target in the
// following scenario:
// x = read ... v
// mem = write ... v
// load = read ... mem
// target = add x load
if memPreds[v] {
continue search
}
return false
}
if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
// If v takes mem as an input then we know mem
// is valid at this point.
continue search
}
for _, a := range v.Args {
if target.Block.ID == a.Block.ID {
args = append(args, a)
}
}
}
return true
}
// isArg returns whether s is an arg symbol
func isArg(s interface{}) bool {
_, ok := s.(*ArgSymbol)
......
......@@ -142,8 +142,12 @@ var optab = []Optab{
Optab{AADD, C_REG, C_NONE, C_NONE, C_REG, 2, 0},
Optab{AADD, C_LCON, C_REG, C_NONE, C_REG, 22, 0},
Optab{AADD, C_LCON, C_NONE, C_NONE, C_REG, 22, 0},
Optab{AADD, C_LOREG, C_NONE, C_NONE, C_REG, 12, 0},
Optab{AADD, C_LAUTO, C_NONE, C_NONE, C_REG, 12, REGSP},
Optab{ASUB, C_LCON, C_REG, C_NONE, C_REG, 21, 0},
Optab{ASUB, C_LCON, C_NONE, C_NONE, C_REG, 21, 0},
Optab{ASUB, C_LOREG, C_NONE, C_NONE, C_REG, 12, 0},
Optab{ASUB, C_LAUTO, C_NONE, C_NONE, C_REG, 12, REGSP},
Optab{AMULHD, C_REG, C_NONE, C_NONE, C_REG, 4, 0},
Optab{AMULHD, C_REG, C_REG, C_NONE, C_REG, 4, 0},
Optab{ADIVW, C_REG, C_REG, C_NONE, C_REG, 2, 0},
......@@ -158,9 +162,13 @@ var optab = []Optab{
Optab{AAND, C_REG, C_NONE, C_NONE, C_REG, 6, 0},
Optab{AAND, C_LCON, C_NONE, C_NONE, C_REG, 23, 0},
Optab{AAND, C_LCON, C_REG, C_NONE, C_REG, 23, 0},
Optab{AAND, C_LOREG, C_NONE, C_NONE, C_REG, 12, 0},
Optab{AAND, C_LAUTO, C_NONE, C_NONE, C_REG, 12, REGSP},
Optab{AANDW, C_REG, C_REG, C_NONE, C_REG, 6, 0},
Optab{AANDW, C_REG, C_NONE, C_NONE, C_REG, 6, 0},
Optab{AANDW, C_LCON, C_NONE, C_NONE, C_REG, 24, 0},
Optab{AANDW, C_LOREG, C_NONE, C_NONE, C_REG, 12, 0},
Optab{AANDW, C_LAUTO, C_NONE, C_NONE, C_REG, 12, REGSP},
Optab{ASLD, C_REG, C_NONE, C_NONE, C_REG, 7, 0},
Optab{ASLD, C_REG, C_REG, C_NONE, C_REG, 7, 0},
Optab{ASLD, C_SCON, C_REG, C_NONE, C_REG, 7, 0},
......@@ -2884,6 +2892,67 @@ func asmout(ctxt *obj.Link, asm *[]byte) {
}
}
case 12:
r1 := p.To.Reg
d2 := vregoff(ctxt, &p.From)
b2 := p.From.Reg
if b2 == 0 {
b2 = o.param
}
x2 := p.From.Index
if -DISP20/2 > d2 || d2 >= DISP20/2 {
zRIL(_a, op_LGFI, REGTMP, uint32(d2), asm)
if x2 != 0 {
zRX(op_LA, REGTMP, REGTMP, uint32(x2), 0, asm)
}
x2 = REGTMP
d2 = 0
}
var opx, opxy uint32
switch p.As {
case AADD:
opxy = op_AG
case AADDC:
opxy = op_ALG
case AADDW:
opx = op_A
opxy = op_AY
case AMULLW:
opx = op_MS
opxy = op_MSY
case AMULLD:
opxy = op_MSG
case ASUB:
opxy = op_SG
case ASUBC:
opxy = op_SLG
case ASUBE:
opxy = op_SLBG
case ASUBW:
opx = op_S
opxy = op_SY
case AAND:
opxy = op_NG
case AANDW:
opx = op_N
opxy = op_NY
case AOR:
opxy = op_OG
case AORW:
opx = op_O
opxy = op_OY
case AXOR:
opxy = op_XG
case AXORW:
opx = op_X
opxy = op_XY
}
if opx != 0 && 0 <= d2 && d2 < DISP12 {
zRX(opx, uint32(r1), uint32(x2), uint32(b2), uint32(d2), asm)
} else {
zRXY(opxy, uint32(r1), uint32(x2), uint32(b2), uint32(d2), asm)
}
case 15: // br/bl (reg)
r := p.To.Reg
if p.As == ABCL || p.As == ABL {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment