Commit f91ff1a5 authored by Josh Bleecher Snyder's avatar Josh Bleecher Snyder

[dev.ssa] cmd/compile: add SSA pass to move values closer to uses

Even this very simple, restricted initial implementation helps.

While running make.bash, it moves 84437 values
to new, closer homes.

As a concrete example:

func f_ssa(i, j int, b bool) int {
	if !b {
		return 0
	}
	return i + j
}

It cuts off one stack slot and two instructions:

Before:

"".f_ssa t=1 size=96 value=0 args=0x20 locals=0x18
	0x0000 00000 (x.go:3)	TEXT	"".f_ssa(SB), $24-32
	0x0000 00000 (x.go:3)	SUBQ	$24, SP
	0x0004 00004 (x.go:3)	FUNCDATA	$0, "".gcargs·0(SB)
	0x0004 00004 (x.go:3)	FUNCDATA	$1, "".gclocals·1(SB)
	0x0004 00004 (x.go:5)	MOVQ	$0, AX
	0x0006 00006 (x.go:3)	MOVQ	32(SP), CX
	0x000b 00011 (x.go:3)	MOVQ	40(SP), DX
	0x0010 00016 (x.go:3)	LEAQ	48(SP), BX
	0x0015 00021 (x.go:3)	MOVB	(BX), BPB
	0x0018 00024 (x.go:3)	MOVQ	$0, SI
	0x001a 00026 (x.go:3)	MOVQ	SI, 56(SP)
	0x001f 00031 (x.go:3)	TESTB	BPB, BPB
	0x0022 00034 (x.go:5)	MOVQ	AX, (SP)
	0x0026 00038 (x.go:3)	MOVQ	CX, 8(SP)
	0x002b 00043 (x.go:3)	MOVQ	DX, 16(SP)
	0x0030 00048 (x.go:4)	JEQ	74
	0x0032 00050 (x.go:3)	MOVQ	8(SP), AX
	0x0037 00055 (x.go:3)	MOVQ	16(SP), CX
	0x003c 00060 (x.go:7)	LEAQ	(AX)(CX*1), DX
	0x0040 00064 (x.go:7)	MOVQ	DX, 56(SP)
	0x0045 00069 (x.go:3)	ADDQ	$24, SP
	0x0049 00073 (x.go:3)	RET
	0x004a 00074 (x.go:5)	MOVQ	(SP), AX
	0x004e 00078 (x.go:5)	MOVQ	AX, 56(SP)
	0x0053 00083 (x.go:3)	JMP	69

After:

"".f_ssa t=1 size=80 value=0 args=0x20 locals=0x10
	0x0000 00000 (x.go:3)	TEXT	"".f_ssa(SB), $16-32
	0x0000 00000 (x.go:3)	SUBQ	$16, SP
	0x0004 00004 (x.go:3)	FUNCDATA	$0, "".gcargs·0(SB)
	0x0004 00004 (x.go:3)	FUNCDATA	$1, "".gclocals·1(SB)
	0x0004 00004 (x.go:3)	MOVQ	32(SP), AX
	0x0009 00009 (x.go:3)	MOVQ	24(SP), CX
	0x000e 00014 (x.go:3)	LEAQ	40(SP), DX
	0x0013 00019 (x.go:3)	MOVB	(DX), BL
	0x0015 00021 (x.go:3)	MOVQ	$0, BP
	0x0017 00023 (x.go:3)	MOVQ	BP, 48(SP)
	0x001c 00028 (x.go:3)	TESTB	BL, BL
	0x001e 00030 (x.go:3)	MOVQ	AX, (SP)
	0x0022 00034 (x.go:3)	MOVQ	CX, 8(SP)
	0x0027 00039 (x.go:4)	JEQ	64
	0x0029 00041 (x.go:3)	MOVQ	8(SP), AX
	0x002e 00046 (x.go:3)	MOVQ	(SP), CX
	0x0032 00050 (x.go:7)	LEAQ	(AX)(CX*1), DX
	0x0036 00054 (x.go:7)	MOVQ	DX, 48(SP)
	0x003b 00059 (x.go:3)	ADDQ	$16, SP
	0x003f 00063 (x.go:3)	RET
	0x0040 00064 (x.go:5)	MOVQ	$0, AX
	0x0042 00066 (x.go:5)	MOVQ	AX, 48(SP)
	0x0047 00071 (x.go:3)	JMP	59

Of course, the old backend is still well ahead:

"".f_ssa t=1 size=48 value=0 args=0x20 locals=0x0
	0x0000 00000 (x.go:3)	TEXT	"".f_ssa(SB), $0-32
	0x0000 00000 (x.go:3)	NOP
	0x0000 00000 (x.go:3)	NOP
	0x0000 00000 (x.go:3)	FUNCDATA	$0, gclocals·a8eabfc4a4514ed6b3b0c61e9680e440(SB)
	0x0000 00000 (x.go:3)	FUNCDATA	$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
	0x0000 00000 (x.go:4)	CMPB	"".b+24(FP), $0
	0x0005 00005 (x.go:4)	JNE	17
	0x0007 00007 (x.go:5)	MOVQ	$0, "".~r3+32(FP)
	0x0010 00016 (x.go:5)	RET
	0x0011 00017 (x.go:7)	MOVQ	"".i+8(FP), BX
	0x0016 00022 (x.go:7)	MOVQ	"".j+16(FP), BP
	0x001b 00027 (x.go:7)	ADDQ	BP, BX
	0x001e 00030 (x.go:7)	MOVQ	BX, "".~r3+32(FP)
	0x0023 00035 (x.go:7)	RET

Some regalloc improvements should help considerably.

Change-Id: I95bb5dd83e56afd70ae4e983f1d32dffd0c3d46a
Reviewed-on: https://go-review.googlesource.com/13142Reviewed-by: 's avatarKeith Randall <khr@golang.org>
parent d1c15a0e
......@@ -65,6 +65,7 @@ var passes = [...]pass{
{"generic deadcode", deadcode},
{"dse", dse},
{"fuse", fuse},
{"tighten", tighten}, // move values closer to their uses
{"lower", lower},
{"lowered cse", cse},
{"lowered deadcode", deadcode},
......@@ -94,6 +95,11 @@ var passOrder = [...]constraint{
{"nilcheckelim", "generic deadcode"},
// nilcheckelim generates sequences of plain basic blocks
{"nilcheckelim", "fuse"},
// tighten should happen before lowering to avoid splitting naturally paired instructions such as CMP/SET
{"tighten", "lower"},
// tighten will be most effective when as many values have been removed as possible
{"generic deadcode", "tighten"},
{"generic cse", "tighten"},
// don't layout blocks until critical edges have been removed
{"critical", "layout"},
// regalloc requires the removal of all critical edges
......
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
// tighten moves Values closer to the Blocks in which they are used.
// This can reduce the amount of register spilling required,
// if it doesn't also create more live values.
// For now, it handles only the trivial case in which a
// Value with one or fewer args is only used in a single Block.
// TODO: Do something smarter.
// A Value can be moved to any block that
// dominates all blocks in which it is used.
// Figure out when that will be an improvement.
func tighten(f *Func) {
// For each value, the number of blocks in which it is used.
uses := make([]int, f.NumValues())
// For each value, one block in which that value is used.
home := make([]*Block, f.NumValues())
changed := true
for changed {
changed = false
// Reset uses
for i := range uses {
uses[i] = 0
}
// No need to reset home; any relevant values will be written anew anyway
for _, b := range f.Blocks {
for _, v := range b.Values {
for _, w := range v.Args {
uses[w.ID]++
home[w.ID] = b
}
}
if b.Control != nil {
uses[b.Control.ID]++
home[b.Control.ID] = b
}
}
for _, b := range f.Blocks {
for i := 0; i < len(b.Values); i++ {
v := b.Values[i]
if v.Op == OpPhi {
continue
}
if uses[v.ID] == 1 && home[v.ID] != b && len(v.Args) < 2 {
// v is used in exactly one block, and it is not b.
// Furthermore, it takes at most one input,
// so moving it will not increase the
// number of live values anywhere.
// Move v to that block.
c := home[v.ID]
c.Values = append(c.Values, v)
v.Block = c
last := len(b.Values) - 1
b.Values[i] = b.Values[last]
b.Values[last] = nil
b.Values = b.Values[:last]
changed = true
}
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment