cmd/{asm,internal/obj/s390x}, math: remove emulated float instructions

The s390x port was based on the ppc64 port and, because of the way the port was done, inherited some instructions from it. ppc64 supports 3-operand (4-operand for FMADD etc.) floating point instructions but s390x doesn't (the destination register is always an input) and so these were emulated. There is a bug in the emulation of FMADD whereby if the destination register is also a source for the multiplication it will be clobbered. This doesn't break any assembly code in the std lib but could affect future work. To fix this I have gone through the floating point instructions and removed all unnecessary 3-/4-operand emulation. The compiler doesn't need it and assembly writers don't need it, it's just a source of bugs. I've also deleted the FNMADD family of emulated instructions. They aren't used anywhere. Change-Id: Ic07cedcf141a6a3b43a0c84895460f6cfbf56c04 Reviewed-on: https://go-review.googlesource.com/33350 Run-TryBot: Michael Munday <munday@ca.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>

cmd/{asm,internal/obj/s390x}, math: remove emulated float instructions
The s390x port was based on the ppc64 port and, because of the way the port was done, inherited some instructions from it. ppc64 supports 3-operand (4-operand for FMADD etc.) floating point instructions but s390x doesn't (the destination register is always an input) and so these were emulated. There is a bug in the emulation of FMADD whereby if the destination register is also a source for the multiplication it will be clobbered. This doesn't break any assembly code in the std lib but could affect future work. To fix this I have gone through the floating point instructions and removed all unnecessary 3-/4-operand emulation. The compiler doesn't need it and assembly writers don't need it, it's just a source of bugs. I've also deleted the FNMADD family of emulated instructions. They aren't used anywhere. Change-Id: Ic07cedcf141a6a3b43a0c84895460f6cfbf56c04 Reviewed-on: https://go-review.googlesource.com/33350 Run-TryBot: Michael Munday <munday@ca.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
a5246168 · Michael Munday · f44e5870 · a5246168 · a5246168 · a5246168
Commit a5246168 authored Nov 17, 2016 by Michael Munday
11 changed files
--- a/src/cmd/asm/internal/arch/s390x.go
+++ b/src/cmd/asm/internal/arch/s390x.go
@@ -48,24 +48,6 @@ func jumpS390x(word string) bool {
 	return false
 }

-// IsS390xRLD reports whether the op (as defined by an s390x.A* constant) is
-// one of the RLD-like instructions that require special handling.
-// The FMADD-like instructions behave similarly.
-func IsS390xRLD(op obj.As) bool {
-	switch op {
-	case s390x.AFMADD,
-		s390x.AFMADDS,
-		s390x.AFMSUB,
-		s390x.AFMSUBS,
-		s390x.AFNMADD,
-		s390x.AFNMADDS,
-		s390x.AFNMSUB,
-		s390x.AFNMSUBS:
-		return true
-	}
-	return false
-}
-
 // IsS390xCMP reports whether the op (as defined by an s390x.A* constant) is
 // one of the CMP instructions that require special handling.
 func IsS390xCMP(op obj.As) bool {

--- a/src/cmd/asm/internal/asm/testdata/s390x.s
+++ b/src/cmd/asm/internal/asm/testdata/s390x.s
@@ -281,6 +281,10 @@ TEXT main·foo(SB),7,$16-0 // TEXT main.foo(SB), 7, $16-0
 	FSQRT	F5, F15                // b31500f5
 	FIEBR	$0, F0, F1             // b3570010
 	FIDBR	$7, F2, F3             // b35f7032
+	FMADD	F1, F1, F1             // b31e1011
+	FMADDS	F1, F2, F3             // b30e3012
+	FMSUB	F4, F5, F5             // b31f5045
+	FMSUBS	F6, F6, F7             // b30f7066

 	VL	(R15), V1              // e710f0000006
 	VST	V1, (R15)              // e710f000000e

--- a/src/cmd/internal/obj/s390x/a.out.go
+++ b/src/cmd/internal/obj/s390x/a.out.go
@@ -290,10 +290,6 @@ const (
 	AFNABS
 	AFNEG
 	AFNEGS
-	AFNMADD
-	AFNMADDS
-	AFNMSUB
-	AFNMSUBS
 	ALEDBR
 	ALDEBR
 	AFSUB

--- a/src/cmd/internal/obj/s390x/anames.go
+++ b/src/cmd/internal/obj/s390x/anames.go
@@ -79,10 +79,6 @@ var Anames = []string{
 	"FNABS",
 	"FNEG",
 	"FNEGS",
-	"FNMADD",
-	"FNMADDS",
-	"FNMSUB",
-	"FNMSUBS",
 	"LEDBR",
 	"LDEBR",
 	"FSUB",

--- a/src/cmd/internal/obj/s390x/asmz.go
+++ b/src/cmd/internal/obj/s390x/asmz.go
@@ -177,13 +177,11 @@ var optab = []Optab{
 	Optab{ACSG, C_REG, C_REG, C_NONE, C_SOREG, 79, 0},

 	// floating point
-	Optab{AFADD, C_FREG, C_NONE, C_NONE, C_FREG, 2, 0},
-	Optab{AFADD, C_FREG, C_FREG, C_NONE, C_FREG, 2, 0},
+	Optab{AFADD, C_FREG, C_NONE, C_NONE, C_FREG, 32, 0},
 	Optab{AFABS, C_FREG, C_NONE, C_NONE, C_FREG, 33, 0},
 	Optab{AFABS, C_NONE, C_NONE, C_NONE, C_FREG, 33, 0},
-	Optab{AFMADD, C_FREG, C_FREG, C_FREG, C_FREG, 34, 0},
+	Optab{AFMADD, C_FREG, C_FREG, C_NONE, C_FREG, 34, 0},
 	Optab{AFMUL, C_FREG, C_NONE, C_NONE, C_FREG, 32, 0},
-	Optab{AFMUL, C_FREG, C_FREG, C_NONE, C_FREG, 32, 0},
 	Optab{AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, 36, REGSP},
 	Optab{AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, 36, 0},
 	Optab{AFMOVD, C_ADDR, C_NONE, C_NONE, C_FREG, 75, 0},
@@ -872,10 +870,6 @@ func buildop(ctxt *obj.Link) {
 			opset(AFMADDS, r)
 			opset(AFMSUB, r)
 			opset(AFMSUBS, r)
-			opset(AFNMADD, r)
-			opset(AFNMADDS, r)
-			opset(AFNMSUB, r)
-			opset(AFNMSUBS, r)
 		case AFMUL:
 			opset(AFMULS, r)
 		case AFCMPO:
@@ -2628,18 +2622,6 @@ func asmout(ctxt *obj.Link, asm *[]byte) {
 			opcode = op_DSGR
 		case ADIVDU, AMODDU:
 			opcode = op_DLGR
-		case AFADD:
-			opcode = op_ADBR
-		case AFADDS:
-			opcode = op_AEBR
-		case AFSUB:
-			opcode = op_SDBR
-		case AFSUBS:
-			opcode = op_SEBR
-		case AFDIV:
-			opcode = op_DDBR
-		case AFDIVS:
-			opcode = op_DEBR
 		}

 		switch p.As {
@@ -2678,29 +2660,6 @@ func asmout(ctxt *obj.Link, asm *[]byte) {
 			zRRE(opcode, REGTMP, uint32(p.From.Reg), asm)
 			zRRE(op_LGR, uint32(p.To.Reg), REGTMP, asm)

-		case AFADD, AFADDS:
-			if r == p.To.Reg {
-				zRRE(opcode, uint32(p.To.Reg), uint32(p.From.Reg), asm)
-			} else if p.From.Reg == p.To.Reg {
-				zRRE(opcode, uint32(p.To.Reg), uint32(r), asm)
-			} else {
-				zRR(op_LDR, uint32(p.To.Reg), uint32(r), asm)
-				zRRE(opcode, uint32(p.To.Reg), uint32(p.From.Reg), asm)
-			}
-
-		case AFSUB, AFSUBS, AFDIV, AFDIVS:
-			if r == p.To.Reg {
-				zRRE(opcode, uint32(p.To.Reg), uint32(p.From.Reg), asm)
-			} else if p.From.Reg == p.To.Reg {
-				zRRE(op_LGDR, REGTMP, uint32(r), asm)
-				zRRE(opcode, uint32(r), uint32(p.From.Reg), asm)
-				zRR(op_LDR, uint32(p.To.Reg), uint32(r), asm)
-				zRRE(op_LDGR, uint32(r), REGTMP, asm)
-			} else {
-				zRR(op_LDR, uint32(p.To.Reg), uint32(r), asm)
-				zRRE(opcode, uint32(p.To.Reg), uint32(p.From.Reg), asm)
-			}
-
 		}

 	case 3: // mov $constant reg
@@ -3146,31 +3105,29 @@ func asmout(ctxt *obj.Link, asm *[]byte) {
 			uint8(wd>>8),
 			uint8(wd))

-	case 32: // fmul freg [freg] freg
-		r := int(p.Reg)
-		if r == 0 {
-			r = int(p.To.Reg)
-		}
-
+	case 32: // float op freg freg
 		var opcode uint32
-
 		switch p.As {
 		default:
 			ctxt.Diag("invalid opcode")
+		case AFADD:
+			opcode = op_ADBR
+		case AFADDS:
+			opcode = op_AEBR
+		case AFDIV:
+			opcode = op_DDBR
+		case AFDIVS:
+			opcode = op_DEBR
 		case AFMUL:
 			opcode = op_MDBR
 		case AFMULS:
 			opcode = op_MEEBR
+		case AFSUB:
+			opcode = op_SDBR
+		case AFSUBS:
+			opcode = op_SEBR
 		}
-
-		if r == int(p.To.Reg) {
-			zRRE(opcode, uint32(p.To.Reg), uint32(p.From.Reg), asm)
-		} else if p.From.Reg == p.To.Reg {
-			zRRE(opcode, uint32(p.To.Reg), uint32(r), asm)
-		} else {
-			zRR(op_LDR, uint32(p.To.Reg), uint32(r), asm)
-			zRRE(opcode, uint32(p.To.Reg), uint32(p.From.Reg), asm)
-		}
+		zRRE(opcode, uint32(p.To.Reg), uint32(p.From.Reg), asm)

 	case 33: // float op [freg] freg
 		r := p.From.Reg
@@ -3199,9 +3156,8 @@ func asmout(ctxt *obj.Link, asm *[]byte) {
 		}
 		zRRE(opcode, uint32(p.To.Reg), uint32(r), asm)

-	case 34: // float multiply-add freg freg freg freg
+	case 34: // float multiply-add freg freg freg
 		var opcode uint32
-
 		switch p.As {
 		default:
 			ctxt.Diag("invalid opcode")
@@ -3213,22 +3169,8 @@ func asmout(ctxt *obj.Link, asm *[]byte) {
 			opcode = op_MSDBR
 		case AFMSUBS:
 			opcode = op_MSEBR
-		case AFNMADD:
-			opcode = op_MADBR
-		case AFNMADDS:
-			opcode = op_MAEBR
-		case AFNMSUB:
-			opcode = op_MSDBR
-		case AFNMSUBS:
-			opcode = op_MSEBR
-		}
-
-		zRR(op_LDR, uint32(p.To.Reg), uint32(p.Reg), asm)
-		zRRD(opcode, uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.From3.Reg), asm)
-
-		if p.As == AFNMADD || p.As == AFNMADDS || p.As == AFNMSUB || p.As == AFNMSUBS {
-			zRRE(op_LCDFR, uint32(p.To.Reg), uint32(p.To.Reg), asm)
 		}
+		zRRD(opcode, uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), asm)

 	case 35: // mov reg mem (no relocation)
 		d2 := regoff(ctxt, &p.To)

--- a/src/cmd/internal/obj/s390x/objz.go
+++ b/src/cmd/internal/obj/s390x/objz.go
@@ -281,8 +281,6 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym) {
 			AFMUL,
 			AFNABS,
 			AFNEG,
-			AFNMADD,
-			AFNMSUB,
 			ALEDBR,
 			ALDEBR,
 			AFSUB:

--- a/src/math/cosh_s390x.s
+++ b/src/math/cosh_s390x.s
@@ -127,7 +127,7 @@ L14:
 	MOVD    $coshtab<>+0(SB), R3
 	WFMADB  V3, V6, V1, V6
 	WORD    $0x68043000     //ld    %f0,0(%r4,%r3)
-	FMSUB   F0, F3, F2, F2
+	FMSUB   F0, F3, F2
 	WORD    $0xA71AF000     //ahi   %r1,-4096
 	WFMADB  V2, V6, V0, V6
 L17:
@@ -135,7 +135,7 @@ L17:
 	BYTE    $0x30
 	BYTE    $0x59
 	WORD    $0xB3C10022     //ldgr %f2,%r2
-	FMADD   F2, F6, F2, F2
+	FMADD   F2, F6, F2
 	MOVD    $coshx4ff<>+0(SB), R1
 	FMOVD   0(R1), F0
 	FMUL    F2, F0
@@ -153,7 +153,7 @@ L20:
 	FMOVD   coshrodataL23<>+8(SB), F4
 	FADD    F3, F2
 	MOVD    $coshe6<>+0(SB), R1
-	FMSUB   F4, F2, F0, F0
+	FMSUB   F4, F2, F0
 	FMOVD   0(R1), F6
 	WFMDB   V0, V0, V1
 	MOVD    $coshe4<>+0(SB), R1
@@ -161,7 +161,7 @@ L20:
 	MOVD    $coshe5<>+0(SB), R1
 	FMOVD   coshrodataL23<>+0(SB), F5
 	WFMADB  V1, V6, V4, V6
-	FMADD   F5, F2, F0, F0
+	FMADD   F5, F2, F0
 	FMOVD   0(R1), F2
 	MOVD    $coshe3<>+0(SB), R1
 	FMOVD   0(R1), F4
@@ -183,13 +183,13 @@ L20:
 	WORD    $0x68145000     //ld %f1,0(%r4,%r5)
 	WFMSDB  V4, V1, V0, V2
 	WORD    $0xA7487FBE     //lhi %r4,32702
-	FMADD   F3, F2, F1, F1
+	FMADD   F3, F2, F1
 	SUBW    R1, R4
 	WORD    $0xECC439BC     //risbg %r12,%r4,57,128+60,3
 	BYTE    $0x03
 	BYTE    $0x55
 	WORD    $0x682C5000     //ld %f2,0(%r12,%r5)
-	FMSUB   F2, F4, F0, F0
+	FMSUB   F2, F4, F0
 	WORD    $0xEC21000F     //risbgn %r2,%r1,64-64+0,64-64+0+16-1,64-0-16
 	BYTE    $0x30
 	BYTE    $0x59
@@ -199,8 +199,8 @@ L20:
 	BYTE    $0x59
 	WORD    $0xB3C10022     //ldgr %f2,%r2
 	WORD    $0xB3C10003     //ldgr %f0,%r3
-	FMADD   F2, F1, F2, F2
-	FMADD   F0, F6, F0, F0
+	FMADD   F2, F1, F2
+	FMADD   F0, F6, F0
 	FADD    F2, F0
 	FMOVD   F0, ret+8(FP)
 	RET
@@ -214,7 +214,7 @@ L22:
 	BYTE    $0x03
 	BYTE    $0x55
 	WORD    $0x68034000     //ld %f0,0(%r3,%r4)
-	FMSUB   F0, F3, F2, F2
+	FMSUB   F0, F3, F2
 	WORD    $0xA7386FBE     //lhi %r3,28606
 	WFMADB  V2, V6, V0, V6
 	SUBW    R1, R3, R1

--- a/src/math/log10_s390x.s
+++ b/src/math/log10_s390x.s
@@ -140,7 +140,7 @@ L4:
 	WORD    $0x68331000     //ld %f3,0(%r3,%r1)
 	WFMADB  V0, V4, V3, V0
 	FMOVD   log10rodataL19<>+24(SB), F4
-	FMADD   F4, F2, F0, F0
+	FMADD   F4, F2, F0
 	FMOVD   F0, ret+8(FP)
 	RET


--- a/src/math/sin_s390x.s
+++ b/src/math/sin_s390x.s
@@ -100,10 +100,10 @@ L2:
 	FADD    F3, F6
 	MOVD    $sincosxpi2h<>+0(SB), R1
 	FMOVD   0(R1), F2
-	FMSUB   F2, F6, F0, F0
+	FMSUB   F2, F6, F0
 	MOVD    $sincosxpi2m<>+0(SB), R1
 	FMOVD   0(R1), F4
-	FMADD   F4, F6, F0, F0
+	FMADD   F4, F6, F0
 	MOVD    $sincosxpi2l<>+0(SB), R1
 	WFMDB   V0, V0, V1
 	FMOVD   0(R1), F7
@@ -156,7 +156,7 @@ L6:
 	WFMADB  V6, V7, V0, V6
 	FMOVD   0(R2), F0
 	MOVD    $sincoss4<>+0(SB), R2
-	FMADD   F4, F1, F0, F0
+	FMADD   F4, F1, F0
 	FMOVD   0(R2), F3
 	MOVD    $sincoss2<>+0(SB), R2
 	FMOVD   0(R2), F4
@@ -192,7 +192,7 @@ L18:
 	WFMADB  V2, V6, V3, V6
 	FMUL    F0, F2
 	WFMADB  V1, V4, V6, V4
-	FMADD   F4, F2, F0, F0
+	FMADD   F4, F2, F0
 	FMOVD   F0, ret+8(FP)
 	RET

@@ -312,16 +312,16 @@ L25:
 	FMOVD   0(R2), F7
 	WFMADB  V6, V3, V7, V3
 	MOVD    $sincoss3<>+0(SB), R2
-	FMADD   F5, F4, F0, F0
+	FMADD   F5, F4, F0
 	FMOVD   0(R2), F4
 	MOVD    $sincoss1<>+0(SB), R2
-	FMADD   F1, F6, F4, F4
+	FMADD   F1, F6, F4
 	FMOVD   0(R2), F1
-	FMADD   F3, F2, F1, F1
+	FMADD   F3, F2, F1
 	FMUL    F0, F2
 	WFMADB  V6, V4, V1, V6
 	WORD    $0xA7110002     //tmll  %r1,2
-	FMADD   F6, F2, F0, F0
+	FMADD   F6, F2, F0
 	BNE     L34
 	FMOVD   F0, ret+8(FP)
 	RET

--- a/src/math/sinh_s390x.s
+++ b/src/math/sinh_s390x.s
@@ -182,11 +182,11 @@ L20:
 	FMOVD   sinhrodataL21<>+8(SB), F0
 	FADD    F6, F2
 	MOVD    $sinhe9<>+0(SB), R2
-	FMSUB   F0, F2, F4, F4
+	FMSUB   F0, F2, F4
 	FMOVD   0(R2), F1
 	FMOVD   sinhrodataL21<>+0(SB), F3
 	MOVD    $sinhe7<>+0(SB), R2
-	FMADD   F3, F2, F4, F4
+	FMADD   F3, F2, F4
 	FMOVD   0(R2), F0
 	MOVD    $sinhe8<>+0(SB), R2
 	WFMDB   V4, V4, V2

--- a/src/math/tanh_s390x.s
+++ b/src/math/tanh_s390x.s
@@ -102,7 +102,7 @@ L2:
 L3:
 	FADD    F4, F2
 	FMOVD   tanhrodataL18<>+80(SB), F4
-	FMADD   F4, F2, F0, F0
+	FMADD   F4, F2, F0
 	FMOVD   tanhrodataL18<>+72(SB), F1
 	WFMDB   V0, V0, V3
 	FMOVD   tanhrodataL18<>+64(SB), F2
@@ -154,7 +154,7 @@ L15:
 L16:
 	FADD    F6, F2
 	FMOVD   tanhrodataL18<>+8(SB), F0
-	FMADD   F4, F2, F0, F0
+	FMADD   F4, F2, F0
 	FMOVD   tanhrodataL18<>+0(SB), F4
 	FNEG    F0, F0
 	WFMADB  V0, V2, V4, V0