Commit 1e27e480 authored by Josh Bleecher Snyder's avatar Josh Bleecher Snyder

cmd/compile: shrink liveness maps

The GC maps don't care about trailing non-pointers in args.
Work harder to eliminate them.

This should provide a slight speedup to everything that reads these
maps, mainly GC and stack copying.

The non-ptr-y runtime benchmarks happen to go from having a non-empty
args map to an empty args map, so they have a significant speedup.

name                old time/op  new time/op  delta
StackCopyPtr-8      80.2ms ± 4%  79.7ms ± 2%  -0.63%  (p=0.001 n=94+91)
StackCopy-8         63.3ms ± 3%  59.2ms ± 3%  -6.45%  (p=0.000 n=98+97)
StackCopyNoCache-8   107ms ± 3%    98ms ± 3%  -8.00%  (p=0.000 n=95+88)

It also shrinks object files a tiny bit:

name        old object-bytes  new object-bytes  delta
Template          476kB ± 0%        476kB ± 0%  -0.03%  (p=0.008 n=5+5)
Unicode           218kB ± 0%        218kB ± 0%  -0.09%  (p=0.008 n=5+5)
GoTypes          1.58MB ± 0%       1.58MB ± 0%  -0.03%  (p=0.008 n=5+5)
Compiler         6.25MB ± 0%       6.24MB ± 0%  -0.06%  (p=0.008 n=5+5)
SSA              15.9MB ± 0%       15.9MB ± 0%  -0.06%  (p=0.008 n=5+5)
Flate             304kB ± 0%        303kB ± 0%  -0.29%  (p=0.008 n=5+5)
GoParser          370kB ± 0%        370kB ± 0%  +0.02%  (p=0.008 n=5+5)
Reflect          1.27MB ± 0%       1.27MB ± 0%  -0.07%  (p=0.008 n=5+5)
Tar               421kB ± 0%        421kB ± 0%  -0.05%  (p=0.008 n=5+5)
XML               518kB ± 0%        517kB ± 0%  -0.06%  (p=0.008 n=5+5)
[Geo mean]        934kB             933kB       -0.07%

Note that some object files do grow;
this can happen because some maps that were
duplicates of each others must be stored separately.

Change-Id: Ie076891bd8e9d269ff2ff5435d5d25c721e0e31d
Reviewed-on: https://go-review.googlesource.com/104175
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarAustin Clements <austin@google.com>
parent 4d7cf3fe
......@@ -351,7 +351,7 @@ func (lv *Liveness) blockEffects(b *ssa.Block) *BlockEffects {
// on future calls with the same type t.
func onebitwalktype1(t *types.Type, off int64, bv bvec) {
if t.Align > 0 && off&int64(t.Align-1) != 0 {
Fatalf("onebitwalktype1: invalid initial alignment, %v", t)
Fatalf("onebitwalktype1: invalid initial alignment: type %v has alignment %d, but offset is %v", t, t.Align, off)
}
switch t.Etype {
......@@ -422,16 +422,6 @@ func onebitwalktype1(t *types.Type, off int64, bv bvec) {
}
}
// localWords returns the number of words of local variables.
func (lv *Liveness) localWords() int32 {
return int32(lv.stkptrsize / int64(Widthptr))
}
// argWords returns the number of words of in and out arguments.
func (lv *Liveness) argWords() int32 {
return int32(lv.fn.Type.ArgWidth() / int64(Widthptr))
}
// Generates live pointer value maps for arguments and local variables. The
// this argument and the in arguments are always assumed live. The vars
// argument is a slice of *Nodes.
......@@ -1217,11 +1207,38 @@ func (lv *Liveness) printDebug() {
// length of the bitmaps. All bitmaps are assumed to be of equal length. The
// remaining bytes are the raw bitmaps.
func (lv *Liveness) emit(argssym, livesym *obj.LSym) {
args := bvalloc(lv.argWords())
// Size args bitmaps to be just large enough to hold the largest pointer.
// First, find the largest Xoffset node we care about.
// (Nodes without pointers aren't in lv.vars; see livenessShouldTrack.)
var maxArgNode *Node
for _, n := range lv.vars {
switch n.Class() {
case PPARAM, PPARAMOUT:
if maxArgNode == nil || n.Xoffset > maxArgNode.Xoffset {
maxArgNode = n
}
}
}
// Next, find the offset of the largest pointer in the largest node.
var maxArgs int64
if maxArgNode != nil {
maxArgs = maxArgNode.Xoffset + typeptrdata(maxArgNode.Type)
}
// Size locals bitmaps to be stkptrsize sized.
// We cannot shrink them to only hold the largest pointer,
// because their size is used to calculate the beginning
// of the local variables frame.
// Further discussion in https://golang.org/cl/104175.
// TODO: consider trimming leading zeros.
// This would require shifting all bitmaps.
maxLocals := lv.stkptrsize
args := bvalloc(int32(maxArgs / int64(Widthptr)))
aoff := duint32(argssym, 0, uint32(len(lv.stackMaps))) // number of bitmaps
aoff = duint32(argssym, aoff, uint32(args.n)) // number of bits in each bitmap
locals := bvalloc(lv.localWords())
locals := bvalloc(int32(maxLocals / int64(Widthptr)))
loff := duint32(livesym, 0, uint32(len(lv.stackMaps))) // number of bitmaps
loff = duint32(livesym, loff, uint32(locals.n)) // number of bits in each bitmap
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment