Commit 3fca7306 authored by Alessandro Arzilli's avatar Alessandro Arzilli Committed by Alberto Donizetti

cmd/compile: optimize scope tracking

1. Detect and remove the markers of lexical scopes that don't contain
any variables early in noder, instead of waiting until the end of DWARF
generation.
This saves memory by never allocating some of the markers and optimizes
some of the algorithms that depend on the number of scopes.

2. Assign scopes to Progs by doing, for each Prog, a binary search over
the markers array. This is faster, compared to sorting the Prog list
because there are fewer markers than there are Progs.

completed   15 of   15, estimated time remaining 0s (eta 2:30PM)
name        old time/op       new time/op       delta
Template          274ms ± 5%        260ms ± 6%  -4.91%  (p=0.000 n=15+15)
Unicode           126ms ± 5%        127ms ± 9%    ~     (p=0.856 n=13+15)
GoTypes           861ms ± 5%        857ms ± 4%    ~     (p=0.595 n=15+15)
Compiler          4.11s ± 4%        4.12s ± 5%    ~     (p=1.000 n=15+15)
SSA               10.7s ± 2%        10.9s ± 4%  +2.01%  (p=0.002 n=14+14)
Flate             163ms ± 4%        166ms ± 9%    ~     (p=0.134 n=14+15)
GoParser          203ms ± 4%        205ms ± 6%    ~     (p=0.461 n=15+15)
Reflect           544ms ± 5%        549ms ± 4%    ~     (p=0.174 n=15+15)
Tar               249ms ± 9%        245ms ± 6%    ~     (p=0.285 n=15+15)
XML               286ms ± 4%        291ms ± 5%    ~     (p=0.081 n=15+15)
[Geo mean]        528ms             529ms       +0.14%

name        old user-time/op  new user-time/op  delta
Template          358ms ± 7%        354ms ± 5%    ~     (p=0.242 n=14+15)
Unicode           189ms ±11%        191ms ±10%    ~     (p=0.438 n=15+15)
GoTypes           1.15s ± 4%        1.14s ± 3%    ~     (p=0.405 n=15+15)
Compiler          5.36s ± 6%        5.35s ± 5%    ~     (p=0.588 n=15+15)
SSA               14.6s ± 3%        15.0s ± 4%  +2.58%  (p=0.000 n=15+15)
Flate             214ms ±12%        216ms ± 8%    ~     (p=0.539 n=15+15)
GoParser          267ms ± 6%        270ms ± 5%    ~     (p=0.569 n=15+15)
Reflect           712ms ± 5%        709ms ± 4%    ~     (p=0.894 n=15+15)
Tar               329ms ± 8%        330ms ± 5%    ~     (p=0.974 n=14+15)
XML               371ms ± 3%        381ms ± 5%  +2.85%  (p=0.002 n=13+15)
[Geo mean]        705ms             709ms       +0.62%

name        old alloc/op      new alloc/op      delta
Template         38.0MB ± 0%       38.4MB ± 0%  +1.27%  (p=0.000 n=15+14)
Unicode          28.8MB ± 0%       28.8MB ± 0%  +0.16%  (p=0.000 n=15+14)
GoTypes           112MB ± 0%        114MB ± 0%  +1.64%  (p=0.000 n=15+15)
Compiler          465MB ± 0%        474MB ± 0%  +1.91%  (p=0.000 n=15+15)
SSA              1.48GB ± 0%       1.53GB ± 0%  +3.32%  (p=0.000 n=15+15)
Flate            24.3MB ± 0%       24.8MB ± 0%  +1.77%  (p=0.000 n=14+15)
GoParser         30.7MB ± 0%       31.1MB ± 0%  +1.27%  (p=0.000 n=15+15)
Reflect          76.3MB ± 0%       77.1MB ± 0%  +1.03%  (p=0.000 n=15+15)
Tar              39.2MB ± 0%       39.6MB ± 0%  +1.02%  (p=0.000 n=13+15)
XML              41.5MB ± 0%       42.1MB ± 0%  +1.45%  (p=0.000 n=15+15)
[Geo mean]       77.5MB            78.7MB       +1.48%

name        old allocs/op     new allocs/op     delta
Template           385k ± 0%         387k ± 0%  +0.54%  (p=0.000 n=15+15)
Unicode            342k ± 0%         343k ± 0%  +0.10%  (p=0.000 n=15+15)
GoTypes           1.19M ± 0%        1.19M ± 0%  +0.64%  (p=0.000 n=14+15)
Compiler          4.51M ± 0%        4.54M ± 0%  +0.53%  (p=0.000 n=15+15)
SSA               12.2M ± 0%        12.4M ± 0%  +1.16%  (p=0.000 n=15+15)
Flate              234k ± 0%         236k ± 0%  +0.63%  (p=0.000 n=14+15)
GoParser           318k ± 0%         320k ± 0%  +0.63%  (p=0.000 n=15+15)
Reflect            974k ± 0%         977k ± 0%  +0.28%  (p=0.000 n=15+15)
Tar                395k ± 0%         397k ± 0%  +0.38%  (p=0.000 n=15+13)
XML                404k ± 0%         407k ± 0%  +0.55%  (p=0.000 n=15+15)
[Geo mean]         794k              799k       +0.55%

name        old text-bytes    new text-bytes    delta
HelloSize         680kB ± 0%        680kB ± 0%    ~     (all equal)

name        old data-bytes    new data-bytes    delta
HelloSize        9.62kB ± 0%       9.62kB ± 0%    ~     (all equal)

name        old bss-bytes     new bss-bytes     delta
HelloSize         125kB ± 0%        125kB ± 0%    ~     (all equal)

name        old exe-bytes     new exe-bytes     delta
HelloSize        1.11MB ± 0%       1.12MB ± 0%  +1.11%  (p=0.000 n=15+15)

Change-Id: I95a0173ee28c52be1a4851d2a6e389529e74bf28
Reviewed-on: https://go-review.googlesource.com/95396
Run-TryBot: Alberto Donizetti <alb.donizetti@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarMatthew Dempsky <mdempsky@google.com>
Reviewed-by: 's avatarHeschi Kreinick <heschi@google.com>
parent 1023b016
......@@ -128,6 +128,12 @@ type noder struct {
pragcgobuf string
err chan syntax.Error
scope ScopeID
// scopeVars is a stack tracking the number of variables declared in the
// current function at the moment each open scope was opened.
scopeVars []int
lastCloseScopePos syntax.Pos
}
func (p *noder) funchdr(n *Node) ScopeID {
......@@ -147,6 +153,7 @@ func (p *noder) openScope(pos syntax.Pos) {
if trackScopes {
Curfn.Func.Parents = append(Curfn.Func.Parents, p.scope)
p.scopeVars = append(p.scopeVars, len(Curfn.Func.Dcl))
p.scope = ScopeID(len(Curfn.Func.Parents))
p.markScope(pos)
......@@ -154,9 +161,34 @@ func (p *noder) openScope(pos syntax.Pos) {
}
func (p *noder) closeScope(pos syntax.Pos) {
p.lastCloseScopePos = pos
types.Popdcl()
if trackScopes {
scopeVars := p.scopeVars[len(p.scopeVars)-1]
p.scopeVars = p.scopeVars[:len(p.scopeVars)-1]
if scopeVars == len(Curfn.Func.Dcl) {
// no variables were declared in this scope, so we can retract it.
if int(p.scope) != len(Curfn.Func.Parents) {
Fatalf("scope tracking inconsistency, no variables declared but scopes were not retracted")
}
p.scope = Curfn.Func.Parents[p.scope-1]
Curfn.Func.Parents = Curfn.Func.Parents[:len(Curfn.Func.Parents)-1]
nmarks := len(Curfn.Func.Marks)
Curfn.Func.Marks[nmarks-1].Scope = p.scope
prevScope := ScopeID(0)
if nmarks >= 2 {
prevScope = Curfn.Func.Marks[nmarks-2].Scope
}
if Curfn.Func.Marks[nmarks-1].Scope == prevScope {
Curfn.Func.Marks = Curfn.Func.Marks[:nmarks-1]
}
return
}
p.scope = Curfn.Func.Parents[p.scope-1]
p.markScope(pos)
......@@ -177,12 +209,7 @@ func (p *noder) markScope(pos syntax.Pos) {
// "if" statements, as their implicit blocks always end at the same
// position as an explicit block.
func (p *noder) closeAnotherScope() {
types.Popdcl()
if trackScopes {
p.scope = Curfn.Func.Parents[p.scope-1]
Curfn.Func.Marks[len(Curfn.Func.Marks)-1].Scope = p.scope
}
p.closeScope(p.lastCloseScopePos)
}
// linkname records a //go:linkname directive.
......
......@@ -87,21 +87,12 @@ func scopePCs(fnsym *obj.LSym, marks []Mark, dwarfScopes []dwarf.Scope) {
pcs = append(pcs, scopedPCs{start: p0.Pc, end: fnsym.Size, pos: p0.Pos})
}
// Sort PCs by source position, and walk in parallel with
// scope marks to assign a lexical scope to each PC interval.
sort.Sort(pcsByPos(pcs))
var marki int
var scope ScopeID
// Assign scopes to each chunk of instructions.
for i := range pcs {
for marki < len(marks) && !xposBefore(pcs[i].pos, marks[marki].Pos) {
scope = marks[marki].Scope
marki++
}
pcs[i].scope = scope
pcs[i].scope = findScope(marks, pcs[i].pos)
}
// Re-sort to create sorted PC ranges for each DWARF scope.
sort.Sort(pcsByPC(pcs))
// Create sorted PC ranges for each DWARF scope.
for _, pc := range pcs {
r := &dwarfScopes[pc.scope].Ranges
if i := len(*r); i > 0 && (*r)[i-1].End == pc.start {
......@@ -113,23 +104,6 @@ func scopePCs(fnsym *obj.LSym, marks []Mark, dwarfScopes []dwarf.Scope) {
}
func compactScopes(dwarfScopes []dwarf.Scope) []dwarf.Scope {
// Forward pass to collapse empty scopes into parents.
remap := make([]int32, len(dwarfScopes))
j := int32(1)
for i := 1; i < len(dwarfScopes); i++ {
s := &dwarfScopes[i]
s.Parent = remap[s.Parent]
if len(s.Vars) == 0 {
dwarfScopes[s.Parent].UnifyRanges(s)
remap[i] = s.Parent
continue
}
remap[i] = j
dwarfScopes[j] = *s
j++
}
dwarfScopes = dwarfScopes[:j]
// Reverse pass to propagate PC ranges to parent scopes.
for i := len(dwarfScopes) - 1; i > 0; i-- {
s := &dwarfScopes[i]
......@@ -147,14 +121,6 @@ func (s pcsByPC) Less(i, j int) bool {
return s[i].start < s[j].start
}
type pcsByPos []scopedPCs
func (s pcsByPos) Len() int { return len(s) }
func (s pcsByPos) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s pcsByPos) Less(i, j int) bool {
return xposBefore(s[i].pos, s[j].pos)
}
type varsByScopeAndOffset struct {
vars []*dwarf.Var
scopes []ScopeID
......
......@@ -1304,6 +1304,11 @@ func putscope(ctxt Context, s *FnState, scopes []Scope, curscope int32, fnabbrev
return curscope
}
if len(scopes[curscope].Vars) == 0 {
curscope = putscope(ctxt, s, scopes, curscope, fnabbrev, encbuf)
continue
}
if len(scope.Ranges) == 1 {
Uleb128put(ctxt, s.Info, DW_ABRV_LEXICAL_BLOCK_SIMPLE)
putattr(ctxt, s.Info, DW_ABRV_LEXICAL_BLOCK_SIMPLE, DW_FORM_addr, DW_CLS_ADDRESS, scope.Ranges[0].Start, s.StartPC)
......@@ -1316,6 +1321,7 @@ func putscope(ctxt Context, s *FnState, scopes []Scope, curscope int32, fnabbrev
}
curscope = putscope(ctxt, s, scopes, curscope, fnabbrev, encbuf)
Uleb128put(ctxt, s.Info, 0)
}
return curscope
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment