cmd/compile: evaluate config as needed in rewrite rules

Prior to this CL, config was an explicit argument to the SSA rewrite rules, and rules that needed a Frontend got at it via config. An upcoming CL moves Frontend from Config to Func, so rules can no longer reach Frontend via Config. Passing a Frontend as an argument to the rewrite rules causes a 2-3% regression in compile times. This CL takes a different approach: It treats the variable names "config" and "fe" as special and calculates them as needed. The "as needed part" is also important to performance: If they are calculated eagerly, the nilchecks themselves cause a regression. This introduces a little bit of magic into the rewrite generator. However, from the perspective of the rules, the config variable was already more or less magic. And it makes the upcoming changes much clearer. Passes toolstash -cmp. Change-Id: I173f2bcc124cba43d53138bfa3775e21316a9107 Reviewed-on: https://go-review.googlesource.com/38326 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com>

cmd/compile: evaluate config as needed in rewrite rules
Prior to this CL, config was an explicit argument to the SSA rewrite rules, and rules that needed a Frontend got at it via config. An upcoming CL moves Frontend from Config to Func, so rules can no longer reach Frontend via Config. Passing a Frontend as an argument to the rewrite rules causes a 2-3% regression in compile times. This CL takes a different approach: It treats the variable names "config" and "fe" as special and calculates them as needed. The "as needed part" is also important to performance: If they are calculated eagerly, the nilchecks themselves cause a regression. This introduces a little bit of magic into the rewrite generator. However, from the perspective of the rules, the config variable was already more or less magic. And it makes the upcoming changes much clearer. Passes toolstash -cmp. Change-Id: I173f2bcc124cba43d53138bfa3775e21316a9107 Reviewed-on: https://go-review.googlesource.com/38326 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> Reviewed-by: Matthew Dempsky <mdempsky@google.com>
193510f2 · Josh Bleecher Snyder · 09272ae9 · 193510f2 · 193510f2 · 193510f2
Commit 193510f2 authored Mar 17, 2017 by Josh Bleecher Snyder
25 changed files
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@@ -15,31 +15,36 @@ import (
 // It is created once, early during compilation,
 // and shared across all compilations.
 type Config struct {
-	arch            string                     // "amd64", etc.
-	IntSize         int64                      // 4 or 8
-	PtrSize         int64                      // 4 or 8
-	RegSize         int64                      // 4 or 8
-	lowerBlock      func(*Block, *Config) bool // lowering function
-	lowerValue      func(*Value, *Config) bool // lowering function
-	registers       []Register                 // machine registers
-	gpRegMask       regMask                    // general purpose integer register mask
-	fpRegMask       regMask                    // floating point register mask
-	specialRegMask  regMask                    // special register mask
-	FPReg           int8                       // register number of frame pointer, -1 if not used
-	LinkReg         int8                       // register number of link register if it is a general purpose register, -1 if not used
-	hasGReg         bool                       // has hardware g register
-	fe              Frontend                   // callbacks into compiler frontend
-	ctxt            *obj.Link                  // Generic arch information
-	optimize        bool                       // Do optimization
-	noDuffDevice    bool                       // Don't use Duff's device
-	nacl            bool                       // GOOS=nacl
-	use387          bool                       // GO386=387
-	OldArch         bool                       // True for older versions of architecture, e.g. true for PPC64BE, false for PPC64LE
-	NeedsFpScratch  bool                       // No direct move between GP and FP register sets
-	BigEndian       bool                       //
-	sparsePhiCutoff uint64                     // Sparse phi location algorithm used above this #blocks*#variables score
+	arch            string        // "amd64", etc.
+	IntSize         int64         // 4 or 8
+	PtrSize         int64         // 4 or 8
+	RegSize         int64         // 4 or 8
+	lowerBlock      blockRewriter // lowering function
+	lowerValue      valueRewriter // lowering function
+	registers       []Register    // machine registers
+	gpRegMask       regMask       // general purpose integer register mask
+	fpRegMask       regMask       // floating point register mask
+	specialRegMask  regMask       // special register mask
+	FPReg           int8          // register number of frame pointer, -1 if not used
+	LinkReg         int8          // register number of link register if it is a general purpose register, -1 if not used
+	hasGReg         bool          // has hardware g register
+	fe              Frontend      // callbacks into compiler frontend
+	ctxt            *obj.Link     // Generic arch information
+	optimize        bool          // Do optimization
+	noDuffDevice    bool          // Don't use Duff's device
+	nacl            bool          // GOOS=nacl
+	use387          bool          // GO386=387
+	OldArch         bool          // True for older versions of architecture, e.g. true for PPC64BE, false for PPC64LE
+	NeedsFpScratch  bool          // No direct move between GP and FP register sets
+	BigEndian       bool          //
+	sparsePhiCutoff uint64        // Sparse phi location algorithm used above this #blocks*#variables score
 }

+type (
+	blockRewriter func(*Block) bool
+	valueRewriter func(*Value) bool
+)
+
 type TypeSource interface {
 	TypeBool() Type
 	TypeInt8() Type

--- a/src/cmd/compile/internal/ssa/gen/386.rules
+++ b/src/cmd/compile/internal/ssa/gen/386.rules
@@ -68,8 +68,8 @@
 (Neg32  x) -> (NEGL x)
 (Neg16  x) -> (NEGL x)
 (Neg8   x) -> (NEGL x)
-(Neg32F x) && !config.use387 -> (PXOR x (MOVSSconst <config.Frontend().TypeFloat32()> [f2i(math.Copysign(0, -1))]))
-(Neg64F x) && !config.use387 -> (PXOR x (MOVSDconst <config.Frontend().TypeFloat64()> [f2i(math.Copysign(0, -1))]))
+(Neg32F x) && !config.use387 -> (PXOR x (MOVSSconst <fe.TypeFloat32()> [f2i(math.Copysign(0, -1))]))
+(Neg64F x) && !config.use387 -> (PXOR x (MOVSDconst <fe.TypeFloat64()> [f2i(math.Copysign(0, -1))]))
 (Neg32F x) && config.use387 -> (FCHS x)
 (Neg64F x) && config.use387 -> (FCHS x)


--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -78,8 +78,8 @@
 (Neg32  x) -> (NEGL x)
 (Neg16  x) -> (NEGL x)
 (Neg8   x) -> (NEGL x)
-(Neg32F x) -> (PXOR x (MOVSSconst <config.Frontend().TypeFloat32()> [f2i(math.Copysign(0, -1))]))
-(Neg64F x) -> (PXOR x (MOVSDconst <config.Frontend().TypeFloat64()> [f2i(math.Copysign(0, -1))]))
+(Neg32F x) -> (PXOR x (MOVSSconst <fe.TypeFloat32()> [f2i(math.Copysign(0, -1))]))
+(Neg64F x) -> (PXOR x (MOVSDconst <fe.TypeFloat64()> [f2i(math.Copysign(0, -1))]))

 (Com64 x) -> (NOTQ x)
 (Com32 x) -> (NOTL x)
@@ -98,10 +98,10 @@

 // Lowering other arithmetic
 (Ctz64 <t> x) -> (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <TypeFlags> (BSFQ x)))
-(Ctz32 x) -> (Select0 (BSFQ (ORQ <config.Frontend().TypeUInt64()> (MOVQconst [1<<32]) x)))
+(Ctz32 x) -> (Select0 (BSFQ (ORQ <fe.TypeUInt64()> (MOVQconst [1<<32]) x)))

 (BitLen64 <t> x) -> (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <TypeFlags> (BSRQ x))))
-(BitLen32 x) -> (BitLen64 (MOVLQZX <config.Frontend().TypeUInt64()> x))
+(BitLen32 x) -> (BitLen64 (MOVLQZX <fe.TypeUInt64()> x))

 (Bswap64 x) -> (BSWAPQ x)
 (Bswap32 x) -> (BSWAPL x)
@@ -472,10 +472,10 @@

 // Atomic stores.  We use XCHG to prevent the hardware reordering a subsequent load.
 // TODO: most runtime uses of atomic stores don't need that property.  Use normal stores for those?
-(AtomicStore32 ptr val mem) -> (Select1 (XCHGL <MakeTuple(config.Frontend().TypeUInt32(),TypeMem)> val ptr mem))
-(AtomicStore64 ptr val mem) -> (Select1 (XCHGQ <MakeTuple(config.Frontend().TypeUInt64(),TypeMem)> val ptr mem))
-(AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 8 -> (Select1 (XCHGQ <MakeTuple(config.Frontend().TypeBytePtr(),TypeMem)> val ptr mem))
-(AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 4 -> (Select1 (XCHGL <MakeTuple(config.Frontend().TypeBytePtr(),TypeMem)> val ptr mem))
+(AtomicStore32 ptr val mem) -> (Select1 (XCHGL <MakeTuple(fe.TypeUInt32(),TypeMem)> val ptr mem))
+(AtomicStore64 ptr val mem) -> (Select1 (XCHGQ <MakeTuple(fe.TypeUInt64(),TypeMem)> val ptr mem))
+(AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 8 -> (Select1 (XCHGQ <MakeTuple(fe.TypeBytePtr(),TypeMem)> val ptr mem))
+(AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 4 -> (Select1 (XCHGL <MakeTuple(fe.TypeBytePtr(),TypeMem)> val ptr mem))

 // Atomic exchanges.
 (AtomicExchange32 ptr val mem) -> (XCHGL val ptr mem)
@@ -553,8 +553,8 @@
 (NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) -> (NEF  cmp yes no)

 // Disabled because it interferes with the pattern match above and makes worse code.
-// (SETNEF x) -> (ORQ (SETNE <config.Frontend().TypeInt8()> x) (SETNAN <config.Frontend().TypeInt8()> x))
-// (SETEQF x) -> (ANDQ (SETEQ <config.Frontend().TypeInt8()> x) (SETORD <config.Frontend().TypeInt8()> x))
+// (SETNEF x) -> (ORQ (SETNE <fe.TypeInt8()> x) (SETNAN <fe.TypeInt8()> x))
+// (SETEQF x) -> (ANDQ (SETEQ <fe.TypeInt8()> x) (SETORD <fe.TypeInt8()> x))

 // fold constants into instructions
 (ADDQ x (MOVQconst [c])) && is32Bit(c) -> (ADDQconst [c] x)

--- a/src/cmd/compile/internal/ssa/gen/ARM.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM.rules
@@ -34,12 +34,12 @@
 (Mul32uhilo x y) -> (MULLU x y)

 (Div32 x y) ->
-	(SUB (XOR <config.fe.TypeUInt32()>                                                                  // negate the result if one operand is negative
-		(Select0 <config.fe.TypeUInt32()> (CALLudiv {config.ctxt.Lookup("udiv", 0)}
-			(SUB <config.fe.TypeUInt32()> (XOR x <config.fe.TypeUInt32()> (Signmask x)) (Signmask x))   // negate x if negative
-			(SUB <config.fe.TypeUInt32()> (XOR y <config.fe.TypeUInt32()> (Signmask y)) (Signmask y)))) // negate y if negative
-		(Signmask (XOR <config.fe.TypeUInt32()> x y))) (Signmask (XOR <config.fe.TypeUInt32()> x y)))
-(Div32u x y) -> (Select0 <config.fe.TypeUInt32()> (CALLudiv {config.ctxt.Lookup("udiv", 0)} x y))
+	(SUB (XOR <fe.TypeUInt32()>                                                                  // negate the result if one operand is negative
+		(Select0 <fe.TypeUInt32()> (CALLudiv {config.ctxt.Lookup("udiv", 0)}
+			(SUB <fe.TypeUInt32()> (XOR x <fe.TypeUInt32()> (Signmask x)) (Signmask x))   // negate x if negative
+			(SUB <fe.TypeUInt32()> (XOR y <fe.TypeUInt32()> (Signmask y)) (Signmask y)))) // negate y if negative
+		(Signmask (XOR <fe.TypeUInt32()> x y))) (Signmask (XOR <fe.TypeUInt32()> x y)))
+(Div32u x y) -> (Select0 <fe.TypeUInt32()> (CALLudiv {config.ctxt.Lookup("udiv", 0)} x y))
 (Div16 x y) -> (Div32 (SignExt16to32 x) (SignExt16to32 y))
 (Div16u x y) -> (Div32u (ZeroExt16to32 x) (ZeroExt16to32 y))
 (Div8 x y) -> (Div32 (SignExt8to32 x) (SignExt8to32 y))
@@ -48,12 +48,12 @@
 (Div64F x y) -> (DIVD x y)

 (Mod32 x y) ->
-	(SUB (XOR <config.fe.TypeUInt32()>                                                                  // negate the result if x is negative
-		(Select1 <config.fe.TypeUInt32()> (CALLudiv {config.ctxt.Lookup("udiv", 0)}
-			(SUB <config.fe.TypeUInt32()> (XOR <config.fe.TypeUInt32()> x (Signmask x)) (Signmask x))   // negate x if negative
-			(SUB <config.fe.TypeUInt32()> (XOR <config.fe.TypeUInt32()> y (Signmask y)) (Signmask y)))) // negate y if negative
+	(SUB (XOR <fe.TypeUInt32()>                                                                  // negate the result if x is negative
+		(Select1 <fe.TypeUInt32()> (CALLudiv {config.ctxt.Lookup("udiv", 0)}
+			(SUB <fe.TypeUInt32()> (XOR <fe.TypeUInt32()> x (Signmask x)) (Signmask x))   // negate x if negative
+			(SUB <fe.TypeUInt32()> (XOR <fe.TypeUInt32()> y (Signmask y)) (Signmask y)))) // negate y if negative
 		(Signmask x)) (Signmask x))
-(Mod32u x y) -> (Select1 <config.fe.TypeUInt32()> (CALLudiv {config.ctxt.Lookup("udiv", 0)} x y))
+(Mod32u x y) -> (Select1 <fe.TypeUInt32()> (CALLudiv {config.ctxt.Lookup("udiv", 0)} x y))
 (Mod16 x y) -> (Mod32 (SignExt16to32 x) (SignExt16to32 y))
 (Mod16u x y) -> (Mod32u (ZeroExt16to32 x) (ZeroExt16to32 y))
 (Mod8 x y) -> (Mod32 (SignExt8to32 x) (SignExt8to32 y))
@@ -111,7 +111,7 @@
 // boolean ops -- booleans are represented with 0=false, 1=true
 (AndB x y) -> (AND x y)
 (OrB x y) -> (OR x y)
-(EqB x y) -> (XORconst [1] (XOR <config.fe.TypeBool()> x y))
+(EqB x y) -> (XORconst [1] (XOR <fe.TypeBool()> x y))
 (NeqB x y) -> (XOR x y)
 (Not x) -> (XORconst [1] x)

@@ -160,11 +160,11 @@
 (Rsh32x64 x (Const64 [c])) && uint64(c) < 32 -> (SRAconst x [c])
 (Rsh32Ux64 x (Const64 [c])) && uint64(c) < 32 -> (SRLconst x [c])
 (Lsh16x64 x (Const64 [c])) && uint64(c) < 16 -> (SLLconst x [c])
-(Rsh16x64 x (Const64 [c])) && uint64(c) < 16 -> (SRAconst (SLLconst <config.fe.TypeUInt32()> x [16]) [c+16])
-(Rsh16Ux64 x (Const64 [c])) && uint64(c) < 16 -> (SRLconst (SLLconst <config.fe.TypeUInt32()> x [16]) [c+16])
+(Rsh16x64 x (Const64 [c])) && uint64(c) < 16 -> (SRAconst (SLLconst <fe.TypeUInt32()> x [16]) [c+16])
+(Rsh16Ux64 x (Const64 [c])) && uint64(c) < 16 -> (SRLconst (SLLconst <fe.TypeUInt32()> x [16]) [c+16])
 (Lsh8x64 x (Const64 [c])) && uint64(c) < 8 -> (SLLconst x [c])
-(Rsh8x64 x (Const64 [c])) && uint64(c) < 8 -> (SRAconst (SLLconst <config.fe.TypeUInt32()> x [24]) [c+24])
-(Rsh8Ux64 x (Const64 [c])) && uint64(c) < 8 -> (SRLconst (SLLconst <config.fe.TypeUInt32()> x [24]) [c+24])
+(Rsh8x64 x (Const64 [c])) && uint64(c) < 8 -> (SRAconst (SLLconst <fe.TypeUInt32()> x [24]) [c+24])
+(Rsh8Ux64 x (Const64 [c])) && uint64(c) < 8 -> (SRLconst (SLLconst <fe.TypeUInt32()> x [24]) [c+24])

 // large constant shifts
 (Lsh32x64 _ (Const64 [c])) && uint64(c) >= 32 -> (Const32 [0])
@@ -176,8 +176,8 @@

 // large constant signed right shift, we leave the sign bit
 (Rsh32x64 x (Const64 [c])) && uint64(c) >= 32 -> (SRAconst x [31])
-(Rsh16x64 x (Const64 [c])) && uint64(c) >= 16 -> (SRAconst (SLLconst <config.fe.TypeUInt32()> x [16]) [31])
-(Rsh8x64 x (Const64 [c])) && uint64(c) >= 8 -> (SRAconst (SLLconst <config.fe.TypeUInt32()> x [24]) [31])
+(Rsh16x64 x (Const64 [c])) && uint64(c) >= 16 -> (SRAconst (SLLconst <fe.TypeUInt32()> x [16]) [31])
+(Rsh8x64 x (Const64 [c])) && uint64(c) >= 8 -> (SRAconst (SLLconst <fe.TypeUInt32()> x [24]) [31])

 // constants
 (Const8 [val]) -> (MOVWconst [val])
@@ -204,7 +204,7 @@
 (SignExt16to32 x) -> (MOVHreg x)

 (Signmask x) -> (SRAconst x [31])
-(Zeromask x) -> (SRAconst (RSBshiftRL <config.fe.TypeInt32()> x x [1]) [31]) // sign bit of uint32(x)>>1 - x
+(Zeromask x) -> (SRAconst (RSBshiftRL <fe.TypeInt32()> x x [1]) [31]) // sign bit of uint32(x)>>1 - x
 (Slicemask <t> x) -> (SRAconst (RSBconst <t> [0] x) [31])

 // float <-> int conversion

--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
@@ -27,8 +27,8 @@

 (Hmul64 x y) -> (MULH x y)
 (Hmul64u x y) -> (UMULH x y)
-(Hmul32 x y) -> (SRAconst (MULL <config.fe.TypeInt64()> x y) [32])
-(Hmul32u x y) -> (SRAconst (UMULL <config.fe.TypeUInt64()> x y) [32])
+(Hmul32 x y) -> (SRAconst (MULL <fe.TypeInt64()> x y) [32])
+(Hmul32u x y) -> (SRAconst (UMULL <fe.TypeUInt64()> x y) [32])

 (Div64 x y) -> (DIV x y)
 (Div64u x y) -> (UDIV x y)
@@ -86,20 +86,20 @@
 (Ctz64 <t> x) -> (CLZ (RBIT <t> x))
 (Ctz32 <t> x) -> (CLZW (RBITW <t> x))

-(BitLen64 x) -> (SUB (MOVDconst [64]) (CLZ <config.fe.TypeInt()> x))
+(BitLen64 x) -> (SUB (MOVDconst [64]) (CLZ <fe.TypeInt()> x))

 (Bswap64 x) -> (REV x)
 (Bswap32 x) -> (REVW x)

 (BitRev64 x) -> (RBIT x)
 (BitRev32 x) -> (RBITW x)
-(BitRev16 x) -> (SRLconst [48] (RBIT <config.fe.TypeUInt64()> x))
-(BitRev8 x) -> (SRLconst [56] (RBIT <config.fe.TypeUInt64()> x))
+(BitRev16 x) -> (SRLconst [48] (RBIT <fe.TypeUInt64()> x))
+(BitRev8 x) -> (SRLconst [56] (RBIT <fe.TypeUInt64()> x))

 // boolean ops -- booleans are represented with 0=false, 1=true
 (AndB x y) -> (AND x y)
 (OrB x y) -> (OR x y)
-(EqB x y) -> (XOR (MOVDconst [1]) (XOR <config.fe.TypeBool()> x y))
+(EqB x y) -> (XOR (MOVDconst [1]) (XOR <fe.TypeBool()> x y))
 (NeqB x y) -> (XOR x y)
 (Not x) -> (XOR (MOVDconst [1]) x)


--- a/src/cmd/compile/internal/ssa/gen/MIPS.rules
+++ b/src/cmd/compile/internal/ssa/gen/MIPS.rules
@@ -10,7 +10,7 @@
 (Add64F x y) -> (ADDD x y)

 (Select0 (Add32carry <t> x y)) -> (ADD <t.FieldType(0)> x y)
-(Select1 (Add32carry <t> x y)) -> (SGTU <config.fe.TypeBool()> x (ADD <t.FieldType(0)> x y))
+(Select1 (Add32carry <t> x y)) -> (SGTU <fe.TypeBool()> x (ADD <t.FieldType(0)> x y))
 (Add32withcarry <t> x y c) -> (ADD c (ADD <t> x y))

 (SubPtr x y) -> (SUB x y)
@@ -21,7 +21,7 @@
 (Sub64F x y) -> (SUBD x y)

 (Select0 (Sub32carry <t> x y)) -> (SUB <t.FieldType(0)> x y)
-(Select1 (Sub32carry <t> x y)) -> (SGTU <config.fe.TypeBool()> (SUB <t.FieldType(0)> x y) x)
+(Select1 (Sub32carry <t> x y)) -> (SGTU <fe.TypeBool()> (SUB <t.FieldType(0)> x y) x)
 (Sub32withcarry <t> x y c) -> (SUB (SUB <t> x y) c)

 (Mul32 x y) -> (MUL x y)
@@ -72,11 +72,11 @@
 (Rsh32x64 x (Const64 [c])) && uint32(c) < 32 -> (SRAconst x [c])
 (Rsh32Ux64 x (Const64 [c])) && uint32(c) < 32 -> (SRLconst x [c])
 (Lsh16x64 x (Const64 [c])) && uint32(c) < 16 -> (SLLconst x [c])
-(Rsh16x64 x (Const64 [c])) && uint32(c) < 16 -> (SRAconst (SLLconst <config.fe.TypeUInt32()> x [16]) [c+16])
-(Rsh16Ux64 x (Const64 [c])) && uint32(c) < 16 -> (SRLconst (SLLconst <config.fe.TypeUInt32()> x [16]) [c+16])
+(Rsh16x64 x (Const64 [c])) && uint32(c) < 16 -> (SRAconst (SLLconst <fe.TypeUInt32()> x [16]) [c+16])
+(Rsh16Ux64 x (Const64 [c])) && uint32(c) < 16 -> (SRLconst (SLLconst <fe.TypeUInt32()> x [16]) [c+16])
 (Lsh8x64 x (Const64 [c])) && uint32(c) < 8 -> (SLLconst x [c])
-(Rsh8x64 x (Const64 [c])) && uint32(c) < 8 -> (SRAconst (SLLconst <config.fe.TypeUInt32()> x [24]) [c+24])
-(Rsh8Ux64 x (Const64 [c])) && uint32(c) < 8 -> (SRLconst (SLLconst <config.fe.TypeUInt32()> x [24]) [c+24])
+(Rsh8x64 x (Const64 [c])) && uint32(c) < 8 -> (SRAconst (SLLconst <fe.TypeUInt32()> x [24]) [c+24])
+(Rsh8Ux64 x (Const64 [c])) && uint32(c) < 8 -> (SRLconst (SLLconst <fe.TypeUInt32()> x [24]) [c+24])

 // large constant shifts
 (Lsh32x64 _ (Const64 [c])) && uint32(c) >= 32 -> (MOVWconst [0])
@@ -88,8 +88,8 @@

 // large constant signed right shift, we leave the sign bit
 (Rsh32x64 x (Const64 [c])) && uint32(c) >= 32 -> (SRAconst x [31])
-(Rsh16x64 x (Const64 [c])) && uint32(c) >= 16 -> (SRAconst (SLLconst <config.fe.TypeUInt32()> x [16]) [31])
-(Rsh8x64 x (Const64 [c])) && uint32(c) >= 8 -> (SRAconst (SLLconst <config.fe.TypeUInt32()> x [24]) [31])
+(Rsh16x64 x (Const64 [c])) && uint32(c) >= 16 -> (SRAconst (SLLconst <fe.TypeUInt32()> x [16]) [31])
+(Rsh8x64 x (Const64 [c])) && uint32(c) >= 8 -> (SRAconst (SLLconst <fe.TypeUInt32()> x [24]) [31])

 // shifts
 // hardware instruction uses only the low 5 bits of the shift
@@ -118,17 +118,17 @@
 (Rsh8Ux16 <t> x y) -> (CMOVZ (SRL <t> (ZeroExt8to32 x) (ZeroExt16to32 y) ) (MOVWconst [0]) (SGTUconst [32] (ZeroExt16to32 y)))
 (Rsh8Ux8 <t> x y) -> (CMOVZ (SRL <t> (ZeroExt8to32 x) (ZeroExt8to32 y) ) (MOVWconst [0]) (SGTUconst [32] (ZeroExt8to32 y)))

-(Rsh32x32 x y) -> (SRA x ( CMOVZ <config.fe.TypeUInt32()> y (MOVWconst [-1]) (SGTUconst [32] y)))
-(Rsh32x16 x y) -> (SRA x ( CMOVZ <config.fe.TypeUInt32()> (ZeroExt16to32 y) (MOVWconst [-1]) (SGTUconst [32] (ZeroExt16to32 y))))
-(Rsh32x8 x y) -> (SRA x ( CMOVZ <config.fe.TypeUInt32()> (ZeroExt8to32 y) (MOVWconst [-1]) (SGTUconst [32] (ZeroExt8to32 y))))
+(Rsh32x32 x y) -> (SRA x ( CMOVZ <fe.TypeUInt32()> y (MOVWconst [-1]) (SGTUconst [32] y)))
+(Rsh32x16 x y) -> (SRA x ( CMOVZ <fe.TypeUInt32()> (ZeroExt16to32 y) (MOVWconst [-1]) (SGTUconst [32] (ZeroExt16to32 y))))
+(Rsh32x8 x y) -> (SRA x ( CMOVZ <fe.TypeUInt32()> (ZeroExt8to32 y) (MOVWconst [-1]) (SGTUconst [32] (ZeroExt8to32 y))))

-(Rsh16x32 x y) -> (SRA (SignExt16to32 x) ( CMOVZ <config.fe.TypeUInt32()> y (MOVWconst [-1]) (SGTUconst [32] y)))
-(Rsh16x16 x y) -> (SRA (SignExt16to32 x) ( CMOVZ <config.fe.TypeUInt32()> (ZeroExt16to32 y) (MOVWconst [-1]) (SGTUconst [32] (ZeroExt16to32 y))))
-(Rsh16x8 x y) -> (SRA (SignExt16to32 x) ( CMOVZ <config.fe.TypeUInt32()> (ZeroExt8to32 y) (MOVWconst [-1]) (SGTUconst [32] (ZeroExt8to32 y))))
+(Rsh16x32 x y) -> (SRA (SignExt16to32 x) ( CMOVZ <fe.TypeUInt32()> y (MOVWconst [-1]) (SGTUconst [32] y)))
+(Rsh16x16 x y) -> (SRA (SignExt16to32 x) ( CMOVZ <fe.TypeUInt32()> (ZeroExt16to32 y) (MOVWconst [-1]) (SGTUconst [32] (ZeroExt16to32 y))))
+(Rsh16x8 x y) -> (SRA (SignExt16to32 x) ( CMOVZ <fe.TypeUInt32()> (ZeroExt8to32 y) (MOVWconst [-1]) (SGTUconst [32] (ZeroExt8to32 y))))

-(Rsh8x32 x y) -> (SRA (SignExt16to32 x) ( CMOVZ <config.fe.TypeUInt32()> y (MOVWconst [-1]) (SGTUconst [32] y)))
-(Rsh8x16 x y) -> (SRA (SignExt16to32 x) ( CMOVZ <config.fe.TypeUInt32()> (ZeroExt16to32 y) (MOVWconst [-1]) (SGTUconst [32] (ZeroExt16to32 y))))
-(Rsh8x8 x y) -> (SRA (SignExt16to32 x) ( CMOVZ <config.fe.TypeUInt32()> (ZeroExt8to32 y) (MOVWconst [-1]) (SGTUconst [32] (ZeroExt8to32 y))))
+(Rsh8x32 x y) -> (SRA (SignExt16to32 x) ( CMOVZ <fe.TypeUInt32()> y (MOVWconst [-1]) (SGTUconst [32] y)))
+(Rsh8x16 x y) -> (SRA (SignExt16to32 x) ( CMOVZ <fe.TypeUInt32()> (ZeroExt16to32 y) (MOVWconst [-1]) (SGTUconst [32] (ZeroExt16to32 y))))
+(Rsh8x8 x y) -> (SRA (SignExt16to32 x) ( CMOVZ <fe.TypeUInt32()> (ZeroExt8to32 y) (MOVWconst [-1]) (SGTUconst [32] (ZeroExt8to32 y))))

 // unary ops
 (Neg32 x) -> (NEG x)
@@ -153,7 +153,7 @@
 // boolean ops -- booleans are represented with 0=false, 1=true
 (AndB x y) -> (AND x y)
 (OrB x y) -> (OR x y)
-(EqB x y) -> (XORconst [1] (XOR <config.fe.TypeBool()> x y))
+(EqB x y) -> (XORconst [1] (XOR <fe.TypeBool()> x y))
 (NeqB x y) -> (XOR x y)
 (Not x) -> (XORconst [1] x)

@@ -393,41 +393,41 @@

 // AtomicOr8(ptr,val) -> LoweredAtomicOr(ptr&^3,uint32(val) << ((ptr & 3) * 8))
 (AtomicOr8 ptr val mem) && !config.BigEndian ->
-	(LoweredAtomicOr (AND <config.fe.TypeUInt32().PtrTo()> (MOVWconst [^3]) ptr)
-		(SLL <config.fe.TypeUInt32()> (ZeroExt8to32 val)
-			(SLLconst <config.fe.TypeUInt32()> [3]
-				(ANDconst <config.fe.TypeUInt32()> [3] ptr))) mem)
+	(LoweredAtomicOr (AND <fe.TypeUInt32().PtrTo()> (MOVWconst [^3]) ptr)
+		(SLL <fe.TypeUInt32()> (ZeroExt8to32 val)
+			(SLLconst <fe.TypeUInt32()> [3]
+				(ANDconst <fe.TypeUInt32()> [3] ptr))) mem)

 // AtomicAnd8(ptr,val) -> LoweredAtomicAnd(ptr&^3,(uint32(val) << ((ptr & 3) * 8)) | ^(uint32(0xFF) << ((ptr & 3) * 8))))
 (AtomicAnd8  ptr val mem) && !config.BigEndian ->
-	(LoweredAtomicAnd (AND <config.fe.TypeUInt32().PtrTo()> (MOVWconst [^3]) ptr)
-		(OR <config.fe.TypeUInt32()> (SLL <config.fe.TypeUInt32()> (ZeroExt8to32 val)
-			(SLLconst <config.fe.TypeUInt32()> [3]
-				(ANDconst  <config.fe.TypeUInt32()> [3] ptr)))
-		(NORconst [0] <config.fe.TypeUInt32()> (SLL <config.fe.TypeUInt32()>
-			(MOVWconst [0xff]) (SLLconst <config.fe.TypeUInt32()> [3]
-				(ANDconst <config.fe.TypeUInt32()> [3]
-					(XORconst <config.fe.TypeUInt32()> [3] ptr)))))) mem)
+	(LoweredAtomicAnd (AND <fe.TypeUInt32().PtrTo()> (MOVWconst [^3]) ptr)
+		(OR <fe.TypeUInt32()> (SLL <fe.TypeUInt32()> (ZeroExt8to32 val)
+			(SLLconst <fe.TypeUInt32()> [3]
+				(ANDconst  <fe.TypeUInt32()> [3] ptr)))
+		(NORconst [0] <fe.TypeUInt32()> (SLL <fe.TypeUInt32()>
+			(MOVWconst [0xff]) (SLLconst <fe.TypeUInt32()> [3]
+				(ANDconst <fe.TypeUInt32()> [3]
+					(XORconst <fe.TypeUInt32()> [3] ptr)))))) mem)

 // AtomicOr8(ptr,val) -> LoweredAtomicOr(ptr&^3,uint32(val) << (((ptr^3) & 3) * 8))
 (AtomicOr8 ptr val mem) && config.BigEndian ->
-	(LoweredAtomicOr (AND <config.fe.TypeUInt32().PtrTo()> (MOVWconst [^3]) ptr)
-		(SLL <config.fe.TypeUInt32()> (ZeroExt8to32 val)
-			(SLLconst <config.fe.TypeUInt32()> [3]
-				(ANDconst <config.fe.TypeUInt32()> [3]
-					(XORconst <config.fe.TypeUInt32()> [3] ptr)))) mem)
+	(LoweredAtomicOr (AND <fe.TypeUInt32().PtrTo()> (MOVWconst [^3]) ptr)
+		(SLL <fe.TypeUInt32()> (ZeroExt8to32 val)
+			(SLLconst <fe.TypeUInt32()> [3]
+				(ANDconst <fe.TypeUInt32()> [3]
+					(XORconst <fe.TypeUInt32()> [3] ptr)))) mem)

 // AtomicAnd8(ptr,val) -> LoweredAtomicAnd(ptr&^3,(uint32(val) << (((ptr^3) & 3) * 8)) | ^(uint32(0xFF) << (((ptr^3) & 3) * 8))))
 (AtomicAnd8  ptr val mem) && config.BigEndian ->
-	(LoweredAtomicAnd (AND <config.fe.TypeUInt32().PtrTo()> (MOVWconst [^3]) ptr)
-		(OR <config.fe.TypeUInt32()> (SLL <config.fe.TypeUInt32()> (ZeroExt8to32 val)
-			(SLLconst <config.fe.TypeUInt32()> [3]
-				(ANDconst  <config.fe.TypeUInt32()> [3]
-					(XORconst <config.fe.TypeUInt32()> [3] ptr))))
-		(NORconst [0] <config.fe.TypeUInt32()> (SLL <config.fe.TypeUInt32()>
-			(MOVWconst [0xff]) (SLLconst <config.fe.TypeUInt32()> [3]
-				(ANDconst <config.fe.TypeUInt32()> [3]
-					(XORconst <config.fe.TypeUInt32()> [3] ptr)))))) mem)
+	(LoweredAtomicAnd (AND <fe.TypeUInt32().PtrTo()> (MOVWconst [^3]) ptr)
+		(OR <fe.TypeUInt32()> (SLL <fe.TypeUInt32()> (ZeroExt8to32 val)
+			(SLLconst <fe.TypeUInt32()> [3]
+				(ANDconst  <fe.TypeUInt32()> [3]
+					(XORconst <fe.TypeUInt32()> [3] ptr))))
+		(NORconst [0] <fe.TypeUInt32()> (SLL <fe.TypeUInt32()>
+			(MOVWconst [0xff]) (SLLconst <fe.TypeUInt32()> [3]
+				(ANDconst <fe.TypeUInt32()> [3]
+					(XORconst <fe.TypeUInt32()> [3] ptr)))))) mem)


 // checks

--- a/src/cmd/compile/internal/ssa/gen/MIPS64.rules
+++ b/src/cmd/compile/internal/ssa/gen/MIPS64.rules
--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules
--- a/src/cmd/compile/internal/ssa/gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/gen/S390X.rules
@@ -437,7 +437,7 @@
 (If (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GTF cmp yes no)
 (If (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) cmp) yes no) -> (GEF cmp yes no)

-(If cond yes no) -> (NE (CMPWconst [0] (MOVBZreg <config.fe.TypeBool()> cond)) yes no)
+(If cond yes no) -> (NE (CMPWconst [0] (MOVBZreg <fe.TypeBool()> cond)) yes no)

 // ***************************
 // Above: lowering rules

--- a/src/cmd/compile/internal/ssa/gen/dec.rules
+++ b/src/cmd/compile/internal/ssa/gen/dec.rules
@@ -13,28 +13,28 @@

 (Load <t> ptr mem) && t.IsComplex() && t.Size() == 8 ->
  (ComplexMake
-    (Load <config.fe.TypeFloat32()> ptr mem)
-    (Load <config.fe.TypeFloat32()>
-      (OffPtr <config.fe.TypeFloat32().PtrTo()> [4] ptr)
+    (Load <fe.TypeFloat32()> ptr mem)
+    (Load <fe.TypeFloat32()>
+      (OffPtr <fe.TypeFloat32().PtrTo()> [4] ptr)
      mem)
    )
 (Store {t} dst (ComplexMake real imag) mem) && t.(Type).Size() == 8 ->
-  (Store {config.fe.TypeFloat32()}
-    (OffPtr <config.fe.TypeFloat32().PtrTo()> [4] dst)
+  (Store {fe.TypeFloat32()}
+    (OffPtr <fe.TypeFloat32().PtrTo()> [4] dst)
    imag
-    (Store {config.fe.TypeFloat32()} dst real mem))
+    (Store {fe.TypeFloat32()} dst real mem))
 (Load <t> ptr mem) && t.IsComplex() && t.Size() == 16 ->
  (ComplexMake
-    (Load <config.fe.TypeFloat64()> ptr mem)
-    (Load <config.fe.TypeFloat64()>
-      (OffPtr <config.fe.TypeFloat64().PtrTo()> [8] ptr)
+    (Load <fe.TypeFloat64()> ptr mem)
+    (Load <fe.TypeFloat64()>
+      (OffPtr <fe.TypeFloat64().PtrTo()> [8] ptr)
      mem)
    )
 (Store {t} dst (ComplexMake real imag) mem) && t.(Type).Size() == 16 ->
-  (Store {config.fe.TypeFloat64()}
-    (OffPtr <config.fe.TypeFloat64().PtrTo()> [8] dst)
+  (Store {fe.TypeFloat64()}
+    (OffPtr <fe.TypeFloat64().PtrTo()> [8] dst)
    imag
-    (Store {config.fe.TypeFloat64()} dst real mem))
+    (Store {fe.TypeFloat64()} dst real mem))

 // string ops
 (StringPtr (StringMake ptr _)) -> ptr
@@ -42,15 +42,15 @@

 (Load <t> ptr mem) && t.IsString() ->
  (StringMake
-    (Load <config.fe.TypeBytePtr()> ptr mem)
-    (Load <config.fe.TypeInt()>
-      (OffPtr <config.fe.TypeInt().PtrTo()> [config.PtrSize] ptr)
+    (Load <fe.TypeBytePtr()> ptr mem)
+    (Load <fe.TypeInt()>
+      (OffPtr <fe.TypeInt().PtrTo()> [config.PtrSize] ptr)
      mem))
 (Store dst (StringMake ptr len) mem) ->
-  (Store {config.fe.TypeInt()}
-    (OffPtr <config.fe.TypeInt().PtrTo()> [config.PtrSize] dst)
+  (Store {fe.TypeInt()}
+    (OffPtr <fe.TypeInt().PtrTo()> [config.PtrSize] dst)
    len
-    (Store {config.fe.TypeBytePtr()} dst ptr mem))
+    (Store {fe.TypeBytePtr()} dst ptr mem))

 // slice ops
 (SlicePtr (SliceMake ptr _ _ )) -> ptr
@@ -60,20 +60,20 @@
 (Load <t> ptr mem) && t.IsSlice() ->
  (SliceMake
    (Load <t.ElemType().PtrTo()> ptr mem)
-    (Load <config.fe.TypeInt()>
-      (OffPtr <config.fe.TypeInt().PtrTo()> [config.PtrSize] ptr)
+    (Load <fe.TypeInt()>
+      (OffPtr <fe.TypeInt().PtrTo()> [config.PtrSize] ptr)
      mem)
-    (Load <config.fe.TypeInt()>
-      (OffPtr <config.fe.TypeInt().PtrTo()> [2*config.PtrSize] ptr)
+    (Load <fe.TypeInt()>
+      (OffPtr <fe.TypeInt().PtrTo()> [2*config.PtrSize] ptr)
      mem))
 (Store dst (SliceMake ptr len cap) mem) ->
-  (Store {config.fe.TypeInt()}
-    (OffPtr <config.fe.TypeInt().PtrTo()> [2*config.PtrSize] dst)
+  (Store {fe.TypeInt()}
+    (OffPtr <fe.TypeInt().PtrTo()> [2*config.PtrSize] dst)
    cap
-    (Store {config.fe.TypeInt()}
-      (OffPtr <config.fe.TypeInt().PtrTo()> [config.PtrSize] dst)
+    (Store {fe.TypeInt()}
+      (OffPtr <fe.TypeInt().PtrTo()> [config.PtrSize] dst)
      len
-      (Store {config.fe.TypeBytePtr()} dst ptr mem)))
+      (Store {fe.TypeBytePtr()} dst ptr mem)))

 // interface ops
 (ITab (IMake itab _)) -> itab
@@ -81,12 +81,12 @@

 (Load <t> ptr mem) && t.IsInterface() ->
  (IMake
-    (Load <config.fe.TypeBytePtr()> ptr mem)
-    (Load <config.fe.TypeBytePtr()>
-      (OffPtr <config.fe.TypeBytePtr().PtrTo()> [config.PtrSize] ptr)
+    (Load <fe.TypeBytePtr()> ptr mem)
+    (Load <fe.TypeBytePtr()>
+      (OffPtr <fe.TypeBytePtr().PtrTo()> [config.PtrSize] ptr)
      mem))
 (Store dst (IMake itab data) mem) ->
-  (Store {config.fe.TypeBytePtr()}
-    (OffPtr <config.fe.TypeBytePtr().PtrTo()> [config.PtrSize] dst)
+  (Store {fe.TypeBytePtr()}
+    (OffPtr <fe.TypeBytePtr().PtrTo()> [config.PtrSize] dst)
    data
-    (Store {config.fe.TypeUintptr()} dst itab mem))
+    (Store {fe.TypeUintptr()} dst itab mem))
--- a/src/cmd/compile/internal/ssa/gen/dec64.rules
+++ b/src/cmd/compile/internal/ssa/gen/dec64.rules
--- a/src/cmd/compile/internal/ssa/gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/gen/generic.rules
--- a/src/cmd/compile/internal/ssa/gen/rulegen.go
+++ b/src/cmd/compile/internal/ssa/gen/rulegen.go
@@ -156,11 +156,11 @@ func genRules(arch arch) {
 	fmt.Fprintln(w, "var _ = math.MinInt8 // in case not otherwise used")

 	// Main rewrite routine is a switch on v.Op.
-	fmt.Fprintf(w, "func rewriteValue%s(v *Value, config *Config) bool {\n", arch.name)
+	fmt.Fprintf(w, "func rewriteValue%s(v *Value) bool {\n", arch.name)
 	fmt.Fprintf(w, "switch v.Op {\n")
 	for _, op := range ops {
 		fmt.Fprintf(w, "case %s:\n", op)
-		fmt.Fprintf(w, "return rewriteValue%s_%s(v, config)\n", arch.name, op)
+		fmt.Fprintf(w, "return rewriteValue%s_%s(v)\n", arch.name, op)
 	}
 	fmt.Fprintf(w, "}\n")
 	fmt.Fprintf(w, "return false\n")
@@ -169,47 +169,71 @@ func genRules(arch arch) {
 	// Generate a routine per op. Note that we don't make one giant routine
 	// because it is too big for some compilers.
 	for _, op := range ops {
-		fmt.Fprintf(w, "func rewriteValue%s_%s(v *Value, config *Config) bool {\n", arch.name, op)
-		fmt.Fprintln(w, "b := v.Block")
-		fmt.Fprintln(w, "_ = b")
+		buf := new(bytes.Buffer)
 		var canFail bool
 		for i, rule := range oprules[op] {
 			match, cond, result := rule.parse()
-			fmt.Fprintf(w, "// match: %s\n", match)
-			fmt.Fprintf(w, "// cond: %s\n", cond)
-			fmt.Fprintf(w, "// result: %s\n", result)
+			fmt.Fprintf(buf, "// match: %s\n", match)
+			fmt.Fprintf(buf, "// cond: %s\n", cond)
+			fmt.Fprintf(buf, "// result: %s\n", result)

 			canFail = false
-			fmt.Fprintf(w, "for {\n")
-			if genMatch(w, arch, match, rule.loc) {
+			fmt.Fprintf(buf, "for {\n")
+			if genMatch(buf, arch, match, rule.loc) {
 				canFail = true
 			}

 			if cond != "" {
-				fmt.Fprintf(w, "if !(%s) {\nbreak\n}\n", cond)
+				fmt.Fprintf(buf, "if !(%s) {\nbreak\n}\n", cond)
 				canFail = true
 			}
 			if !canFail && i != len(oprules[op])-1 {
 				log.Fatalf("unconditional rule %s is followed by other rules", match)
 			}

-			genResult(w, arch, result, rule.loc)
+			genResult(buf, arch, result, rule.loc)
 			if *genLog {
-				fmt.Fprintf(w, "logRule(\"%s\")\n", rule.loc)
+				fmt.Fprintf(buf, "logRule(\"%s\")\n", rule.loc)
 			}
-			fmt.Fprintf(w, "return true\n")
+			fmt.Fprintf(buf, "return true\n")

-			fmt.Fprintf(w, "}\n")
+			fmt.Fprintf(buf, "}\n")
 		}
 		if canFail {
-			fmt.Fprintf(w, "return false\n")
-		}
+			fmt.Fprintf(buf, "return false\n")
+		}
+
+		body := buf.String()
+		// Do a rough match to predict whether we need b, config, and/or fe.
+		// It's not precise--thus the blank assignments--but it's good enough
+		// to avoid generating needless code and doing pointless nil checks.
+		hasb := strings.Contains(body, "b.")
+		hasconfig := strings.Contains(body, "config.")
+		hasfe := strings.Contains(body, "fe.")
+		fmt.Fprintf(w, "func rewriteValue%s_%s(v *Value) bool {\n", arch.name, op)
+		if hasb || hasconfig || hasfe {
+			fmt.Fprintln(w, "b := v.Block")
+			fmt.Fprintln(w, "_ = b")
+		}
+		if hasconfig || hasfe {
+			fmt.Fprintln(w, "config := b.Func.Config")
+			fmt.Fprintln(w, "_ = config")
+		}
+		if hasfe {
+			fmt.Fprintln(w, "fe := config.fe")
+			fmt.Fprintln(w, "_ = fe")
+		}
+		fmt.Fprint(w, body)
 		fmt.Fprintf(w, "}\n")
 	}

 	// Generate block rewrite function. There are only a few block types
 	// so we can make this one function with a switch.
-	fmt.Fprintf(w, "func rewriteBlock%s(b *Block, config *Config) bool {\n", arch.name)
+	fmt.Fprintf(w, "func rewriteBlock%s(b *Block) bool {\n", arch.name)
+	fmt.Fprintln(w, "config := b.Func.Config")
+	fmt.Fprintln(w, "_ = config")
+	fmt.Fprintln(w, "fe := config.fe")
+	fmt.Fprintln(w, "_ = fe")
 	fmt.Fprintf(w, "switch b.Kind {\n")
 	ops = nil
 	for op := range blockrules {
@@ -695,7 +719,7 @@ func typeName(typ string) string {
 	case "Flags", "Mem", "Void", "Int128":
 		return "Type" + typ
 	default:
-		return "config.fe.Type" + typ + "()"
+		return "fe.Type" + typ + "()"
 	}
 }


--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -14,7 +14,7 @@ import (
 	"strings"
 )

-func applyRewrite(f *Func, rb func(*Block, *Config) bool, rv func(*Value, *Config) bool) {
+func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter) {
 	// repeat rewrites until we find no more rewrites
 	var curb *Block
 	var curv *Value
@@ -27,7 +27,6 @@ func applyRewrite(f *Func, rb func(*Block, *Config) bool, rv func(*Value, *Confi
 			// TODO(khr): print source location also
 		}
 	}()
-	config := f.Config
 	for {
 		change := false
 		for _, b := range f.Blocks {
@@ -37,7 +36,7 @@ func applyRewrite(f *Func, rb func(*Block, *Config) bool, rv func(*Value, *Confi
 				}
 			}
 			curb = b
-			if rb(b, config) {
+			if rb(b) {
 				change = true
 			}
 			curb = nil
@@ -66,7 +65,7 @@ func applyRewrite(f *Func, rb func(*Block, *Config) bool, rv func(*Value, *Confi

 				// apply rewrite function
 				curv = v
-				if rv(v, config) {
+				if rv(v) {
 					change = true
 				}
 				curv = nil

--- a/src/cmd/compile/internal/ssa/rewrite386.go
+++ b/src/cmd/compile/internal/ssa/rewrite386.go
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
--- a/src/cmd/compile/internal/ssa/rewriteARM.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM.go
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
--- a/src/cmd/compile/internal/ssa/rewriteMIPS.go
+++ b/src/cmd/compile/internal/ssa/rewriteMIPS.go
--- a/src/cmd/compile/internal/ssa/rewriteMIPS64.go
+++ b/src/cmd/compile/internal/ssa/rewriteMIPS64.go
--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
+++ b/src/cmd/compile/internal/ssa/rewritePPC64.go
--- a/src/cmd/compile/internal/ssa/rewriteS390X.go
+++ b/src/cmd/compile/internal/ssa/rewriteS390X.go
--- a/src/cmd/compile/internal/ssa/rewritedec.go
+++ b/src/cmd/compile/internal/ssa/rewritedec.go
--- a/src/cmd/compile/internal/ssa/rewritedec64.go
+++ b/src/cmd/compile/internal/ssa/rewritedec64.go
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go