[dev.ssa] cmd/compile: simplify 386+PIC+globals a bit

We shouldn't issue instructions like MOVL foo(SB), AX directly from the SSA backend. Instead we should do LEAL foo(SB), AX; MOVL (AX), AX. This simplifies obj logic because now only LEAL needs to be treated specially. The register allocator uses the LEAL to in effect allocate the temporary register required for the shared library thunk calls. Also, the LEALs can now be CSEd. So code like var g int func f() { g += 5 } Requires only one thunk call instead of 2. Change-Id: Ib87d465f617f73af437445871d0ea91a630b2355 Reviewed-on: https://go-review.googlesource.com/26814 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>

[dev.ssa] cmd/compile: simplify 386+PIC+globals a bit
We shouldn't issue instructions like MOVL foo(SB), AX directly from the SSA backend. Instead we should do LEAL foo(SB), AX; MOVL (AX), AX. This simplifies obj logic because now only LEAL needs to be treated specially. The register allocator uses the LEAL to in effect allocate the temporary register required for the shared library thunk calls. Also, the LEALs can now be CSEd. So code like var g int func f() { g += 5 } Requires only one thunk call instead of 2. Change-Id: Ib87d465f617f73af437445871d0ea91a630b2355 Reviewed-on: https://go-review.googlesource.com/26814 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
94c8e59a · Keith Randall · 8f955d36 · 94c8e59a · 94c8e59a · 94c8e59a
Commit 94c8e59a authored Aug 11, 2016 by Keith Randall
Showing with 34 additions and 33 deletions

386.rules src/cmd/compile/internal/ssa/gen/386.rules +24 -18

rewrite386.go src/cmd/compile/internal/ssa/rewrite386.go +0 -0

obj6.go src/cmd/internal/obj/x86/obj6.go +10 -15

No files found.
--- a/src/cmd/compile/internal/ssa/gen/386.rules
+++ b/src/cmd/compile/internal/ssa/gen/386.rules
@@ -673,51 +673,57 @@
 // what variables are being read/written by the ops.
 // Note: we turn off this merging for operations on globals when building
 // position-independent code (when Flag_shared is set).
-// PIC needs a spare register to load the PC into. For loads from globals into integer registers we use
-// the target register, but for other loads and all stores, we need a free register. Having the LEAL be
-// a separate instruction gives us that register.
-(MOVLload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+// PIC needs a spare register to load the PC into.  Having the LEAL be
+// a separate instruction gives us that register.  Having the LEAL be
+// a separate instruction also allows it to be CSEd (which is good because
+// it compiles to a thunk call).
+(MOVLload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVLload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVWload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+(MOVWload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVWload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVBload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+(MOVBload  [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVBload  [off1+off2] {mergeSym(sym1,sym2)} base mem)
 (MOVSSload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-  && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
 (MOVSDload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-  && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem)

-(MOVBLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+(MOVBLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVBLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVWLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
+(MOVWLSXload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVWLSXload [off1+off2] {mergeSym(sym1,sym2)} base mem)

 (MOVLstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-  && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVLstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 (MOVWstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-  && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVWstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 (MOVBstore  [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-  && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVBstore  [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 (MOVSSstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-  && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
 (MOVSDstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2)
-  && (base.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)

 (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-  && (ptr.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
 (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-  && (ptr.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
 (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)
-  && (ptr.Op != OpSB || !b.Func.Config.ctxt.Flag_shared) ->
+  && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
 	(MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)

 // generating indexed loads and stores

--- a/src/cmd/compile/internal/ssa/rewrite386.go
+++ b/src/cmd/compile/internal/ssa/rewrite386.go
--- a/src/cmd/internal/obj/x86/obj6.go
+++ b/src/cmd/internal/obj/x86/obj6.go
@@ -334,15 +334,12 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog) {
 		lea = ALEAL
 		mov = AMOVL
 		reg = REG_CX
-		if p.To.Type == obj.TYPE_REG && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
-			switch p.As {
-			case ALEAL, AMOVL, AMOVWLZX, AMOVBLZX, AMOVWLSX, AMOVBLSX:
-				// Special case: clobber the destination register with
-				// the PC so we don't have to clobber CX.
-				// The SSA backend depends on CX not being clobbered across these instructions.
-				// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
-				reg = p.To.Reg
-			}
+		if p.As == ALEAL && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
+			// Special case: clobber the destination register with
+			// the PC so we don't have to clobber CX.
+			// The SSA backend depends on CX not being clobbered across LEAL.
+			// See cmd/compile/internal/ssa/gen/386.rules (search for Flag_shared).
+			reg = p.To.Reg
 		}
 	}

@@ -554,12 +551,10 @@ func rewriteToPcrel(ctxt *obj.Link, p *obj.Prog) {
 		return
 	}
 	var dst int16 = REG_CX
-	if p.To.Type == obj.TYPE_REG && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
-		switch p.As {
-		case ALEAL, AMOVL, AMOVWLZX, AMOVBLZX, AMOVWLSX, AMOVBLSX:
-			dst = p.To.Reg
-			// Why?  See the comment near the top of rewriteToUseGot above.
-		}
+	if (p.As == ALEAL || p.As == AMOVL) && p.To.Reg != p.From.Reg && p.To.Reg != p.From.Index {
+		dst = p.To.Reg
+		// Why?  See the comment near the top of rewriteToUseGot above.
+		// AMOVLs might be introduced by the GOT rewrites.
 	}
 	q := obj.Appendp(ctxt, p)
 	q.RegTo2 = 1