[dev.ssa] cmd/compile/ssa: add nilcheckelim benchmarks

These benchmarks demonstrate that the nilcheckelim pass is roughly O(n^2): BenchmarkNilCheckDeep1 2000000 741 ns/op 1.35 MB/s BenchmarkNilCheckDeep10 1000000 2237 ns/op 4.47 MB/s BenchmarkNilCheckDeep100 20000 60713 ns/op 1.65 MB/s BenchmarkNilCheckDeep1000 200 7925198 ns/op 0.13 MB/s BenchmarkNilCheckDeep10000 1 1220104252 ns/op 0.01 MB/s Profiling suggests that building the dominator tree is also O(n^2), and before size factors take over, considerably more expensive than nilcheckelim. Change-Id: If966b38ec52243a25f355dab871300d29db02e16 Reviewed-on: https://go-review.googlesource.com/11520Reviewed-by: Keith Randall <khr@golang.org>

[dev.ssa] cmd/compile/ssa: add nilcheckelim benchmarks
These benchmarks demonstrate that the nilcheckelim pass is roughly O(n^2): BenchmarkNilCheckDeep1 2000000 741 ns/op 1.35 MB/s BenchmarkNilCheckDeep10 1000000 2237 ns/op 4.47 MB/s BenchmarkNilCheckDeep100 20000 60713 ns/op 1.65 MB/s BenchmarkNilCheckDeep1000 200 7925198 ns/op 0.13 MB/s BenchmarkNilCheckDeep10000 1 1220104252 ns/op 0.01 MB/s Profiling suggests that building the dominator tree is also O(n^2), and before size factors take over, considerably more expensive than nilcheckelim. Change-Id: If966b38ec52243a25f355dab871300d29db02e16 Reviewed-on: https://go-review.googlesource.com/11520Reviewed-by: Keith Randall <khr@golang.org>
1746e711 · Josh Bleecher Snyder · d9a704cd · 1746e711 · 1746e711 · 1746e711
Commit 1746e711 authored Jun 25, 2015 by Josh Bleecher Snyder
4 changed files
--- a/src/cmd/compile/internal/ssa/dom.go
+++ b/src/cmd/compile/internal/ssa/dom.go
@@ -55,6 +55,8 @@ func postorder(f *Func) []*Block {
 // which maps block ID to the immediate dominator of that block.
 // Unreachable blocks map to nil.  The entry block maps to nil.
 func dominators(f *Func) []*Block {
+	// TODO: Benchmarks. See BenchmarkNilCheckDeep* for an example.
+
 	// A simple algorithm for now
 	// Cooper, Harvey, Kennedy
 	idom := make([]*Block, f.NumBlocks())
@@ -108,6 +110,7 @@ func dominators(f *Func) []*Block {
 // intersect finds the closest dominator of both b and c.
 // It requires a postorder numbering of all the blocks.
 func intersect(b, c *Block, postnum []int, idom []*Block) *Block {
+	// TODO: This loop is O(n^2). See BenchmarkNilCheckDeep*.
 	for b != c {
 		if postnum[b.ID] < postnum[c.ID] {
 			b = idom[b.ID]

--- a/src/cmd/compile/internal/ssa/export_test.go
+++ b/src/cmd/compile/internal/ssa/export_test.go
@@ -12,7 +12,7 @@ var Opt = opt
 var Deadcode = deadcode

 type DummyFrontend struct {
-	t *testing.T
+	t testing.TB
 }

 func (DummyFrontend) StringSym(s string) interface{} {

--- a/src/cmd/compile/internal/ssa/nilcheck.go
+++ b/src/cmd/compile/internal/ssa/nilcheck.go
@@ -33,6 +33,7 @@ func nilcheckelim(f *Func) {
 		var elim bool
 		// Walk up the dominator tree,
 		// looking for identical nil checks.
+		// TODO: This loop is O(n^2). See BenchmarkNilCheckDeep*.
 		for c := idom[b.ID]; c != nil; c = idom[c.ID] {
 			if checkedptr(c) == ptr {
 				elim = true

--- a/src/cmd/compile/internal/ssa/nilcheck_test.go
+++ b/src/cmd/compile/internal/ssa/nilcheck_test.go
+package ssa
+
+import (
+	"strconv"
+	"testing"
+)
+
+func BenchmarkNilCheckDeep1(b *testing.B)     { benchmarkNilCheckDeep(b, 1) }
+func BenchmarkNilCheckDeep10(b *testing.B)    { benchmarkNilCheckDeep(b, 10) }
+func BenchmarkNilCheckDeep100(b *testing.B)   { benchmarkNilCheckDeep(b, 100) }
+func BenchmarkNilCheckDeep1000(b *testing.B)  { benchmarkNilCheckDeep(b, 1000) }
+func BenchmarkNilCheckDeep10000(b *testing.B) { benchmarkNilCheckDeep(b, 10000) }
+
+// benchmarkNilCheckDeep is a stress test of nilcheckelim.
+// It uses the worst possible input: A linear string of
+// nil checks, none of which can be eliminated.
+// Run with multiple depths to observe big-O behavior.
+func benchmarkNilCheckDeep(b *testing.B, depth int) {
+	ptrType := &TypeImpl{Size_: 8, Ptr: true, Name: "testptr"} // dummy for testing
+
+	var blocs []bloc
+	blocs = append(blocs,
+		Bloc("entry",
+			Valu("mem", OpArg, TypeMem, 0, ".mem"),
+			Goto(blockn(0)),
+		),
+	)
+	for i := 0; i < depth; i++ {
+		blocs = append(blocs,
+			Bloc(blockn(i),
+				Valu(ptrn(i), OpGlobal, ptrType, 0, nil),
+				Valu(booln(i), OpIsNonNil, TypeBool, 0, nil, ptrn(i)),
+				If(booln(i), blockn(i+1), "exit"),
+			),
+		)
+	}
+	blocs = append(blocs,
+		Bloc(blockn(depth), Goto("exit")),
+		Bloc("exit", Exit("mem")),
+	)
+
+	c := NewConfig("amd64", DummyFrontend{b})
+	fun := Fun(c, "entry", blocs...)
+
+	CheckFunc(fun.f)
+	b.SetBytes(int64(depth)) // helps for eyeballing linearity
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		nilcheckelim(fun.f)
+	}
+}
+
+func blockn(n int) string { return "b" + strconv.Itoa(n) }
+func ptrn(n int) string   { return "p" + strconv.Itoa(n) }
+func booln(n int) string  { return "c" + strconv.Itoa(n) }