• Josh Bleecher Snyder's avatar
    cmd/compile: avoid a spill in append fast path · 6b33b0e9
    Josh Bleecher Snyder authored
    Instead of spilling newlen, recalculate it.
    This removes a spill from the fast path,
    at the cost of a cheap recalculation
    on the (rare) growth path.
    This uses 8 bytes less of stack space.
    It generates two more bytes of code,
    but that is due to suboptimal register allocation;
    see far below.
    
    Runtime append microbenchmarks are all over the map,
    presumably due to incidental code movement.
    
    Sample code:
    
    func s(b []byte) []byte {
    	b = append(b, 1, 2, 3)
    	return b
    }
    
    Before:
    
    "".s t=1 size=160 args=0x30 locals=0x48
    	0x0000 00000 (append.go:8)	TEXT	"".s(SB), $72-48
    	0x0000 00000 (append.go:8)	MOVQ	(TLS), CX
    	0x0009 00009 (append.go:8)	CMPQ	SP, 16(CX)
    	0x000d 00013 (append.go:8)	JLS	149
    	0x0013 00019 (append.go:8)	SUBQ	$72, SP
    	0x0017 00023 (append.go:8)	FUNCDATA	$0, gclocals·6432f8c6a0d23fa7bee6c5d96f21a92a(SB)
    	0x0017 00023 (append.go:8)	FUNCDATA	$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
    	0x0017 00023 (append.go:9)	MOVQ	"".b+88(FP), CX
    	0x001c 00028 (append.go:9)	LEAQ	3(CX), DX
    	0x0020 00032 (append.go:9)	MOVQ	DX, "".autotmp_0+64(SP)
    	0x0025 00037 (append.go:9)	MOVQ	"".b+96(FP), BX
    	0x002a 00042 (append.go:9)	CMPQ	DX, BX
    	0x002d 00045 (append.go:9)	JGT	$0, 86
    	0x002f 00047 (append.go:8)	MOVQ	"".b+80(FP), AX
    	0x0034 00052 (append.go:9)	MOVB	$1, (AX)(CX*1)
    	0x0038 00056 (append.go:9)	MOVB	$2, 1(AX)(CX*1)
    	0x003d 00061 (append.go:9)	MOVB	$3, 2(AX)(CX*1)
    	0x0042 00066 (append.go:10)	MOVQ	AX, "".~r1+104(FP)
    	0x0047 00071 (append.go:10)	MOVQ	DX, "".~r1+112(FP)
    	0x004c 00076 (append.go:10)	MOVQ	BX, "".~r1+120(FP)
    	0x0051 00081 (append.go:10)	ADDQ	$72, SP
    	0x0055 00085 (append.go:10)	RET
    	0x0056 00086 (append.go:9)	LEAQ	type.[]uint8(SB), AX
    	0x005d 00093 (append.go:9)	MOVQ	AX, (SP)
    	0x0061 00097 (append.go:9)	MOVQ	"".b+80(FP), BP
    	0x0066 00102 (append.go:9)	MOVQ	BP, 8(SP)
    	0x006b 00107 (append.go:9)	MOVQ	CX, 16(SP)
    	0x0070 00112 (append.go:9)	MOVQ	BX, 24(SP)
    	0x0075 00117 (append.go:9)	MOVQ	DX, 32(SP)
    	0x007a 00122 (append.go:9)	PCDATA	$0, $0
    	0x007a 00122 (append.go:9)	CALL	runtime.growslice(SB)
    	0x007f 00127 (append.go:9)	MOVQ	40(SP), AX
    	0x0084 00132 (append.go:9)	MOVQ	56(SP), BX
    	0x0089 00137 (append.go:8)	MOVQ	"".b+88(FP), CX
    	0x008e 00142 (append.go:9)	MOVQ	"".autotmp_0+64(SP), DX
    	0x0093 00147 (append.go:9)	JMP	52
    	0x0095 00149 (append.go:9)	NOP
    	0x0095 00149 (append.go:8)	CALL	runtime.morestack_noctxt(SB)
    	0x009a 00154 (append.go:8)	JMP	0
    
    After:
    
    "".s t=1 size=176 args=0x30 locals=0x40
    	0x0000 00000 (append.go:8)	TEXT	"".s(SB), $64-48
    	0x0000 00000 (append.go:8)	MOVQ	(TLS), CX
    	0x0009 00009 (append.go:8)	CMPQ	SP, 16(CX)
    	0x000d 00013 (append.go:8)	JLS	151
    	0x0013 00019 (append.go:8)	SUBQ	$64, SP
    	0x0017 00023 (append.go:8)	FUNCDATA	$0, gclocals·6432f8c6a0d23fa7bee6c5d96f21a92a(SB)
    	0x0017 00023 (append.go:8)	FUNCDATA	$1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
    	0x0017 00023 (append.go:9)	MOVQ	"".b+80(FP), CX
    	0x001c 00028 (append.go:9)	LEAQ	3(CX), DX
    	0x0020 00032 (append.go:9)	MOVQ	"".b+88(FP), BX
    	0x0025 00037 (append.go:9)	CMPQ	DX, BX
    	0x0028 00040 (append.go:9)	JGT	$0, 81
    	0x002a 00042 (append.go:8)	MOVQ	"".b+72(FP), AX
    	0x002f 00047 (append.go:9)	MOVB	$1, (AX)(CX*1)
    	0x0033 00051 (append.go:9)	MOVB	$2, 1(AX)(CX*1)
    	0x0038 00056 (append.go:9)	MOVB	$3, 2(AX)(CX*1)
    	0x003d 00061 (append.go:10)	MOVQ	AX, "".~r1+96(FP)
    	0x0042 00066 (append.go:10)	MOVQ	DX, "".~r1+104(FP)
    	0x0047 00071 (append.go:10)	MOVQ	BX, "".~r1+112(FP)
    	0x004c 00076 (append.go:10)	ADDQ	$64, SP
    	0x0050 00080 (append.go:10)	RET
    	0x0051 00081 (append.go:9)	LEAQ	type.[]uint8(SB), AX
    	0x0058 00088 (append.go:9)	MOVQ	AX, (SP)
    	0x005c 00092 (append.go:9)	MOVQ	"".b+72(FP), BP
    	0x0061 00097 (append.go:9)	MOVQ	BP, 8(SP)
    	0x0066 00102 (append.go:9)	MOVQ	CX, 16(SP)
    	0x006b 00107 (append.go:9)	MOVQ	BX, 24(SP)
    	0x0070 00112 (append.go:9)	MOVQ	DX, 32(SP)
    	0x0075 00117 (append.go:9)	PCDATA	$0, $0
    	0x0075 00117 (append.go:9)	CALL	runtime.growslice(SB)
    	0x007a 00122 (append.go:9)	MOVQ	40(SP), AX
    	0x007f 00127 (append.go:9)	MOVQ	48(SP), CX
    	0x0084 00132 (append.go:9)	MOVQ	56(SP), BX
    	0x0089 00137 (append.go:9)	ADDQ	$3, CX
    	0x008d 00141 (append.go:9)	MOVQ	CX, DX
    	0x0090 00144 (append.go:8)	MOVQ	"".b+80(FP), CX
    	0x0095 00149 (append.go:9)	JMP	47
    	0x0097 00151 (append.go:9)	NOP
    	0x0097 00151 (append.go:8)	CALL	runtime.morestack_noctxt(SB)
    	0x009c 00156 (append.go:8)	JMP	0
    
    Observe that in the following sequence,
    we should use DX directly instead of using
    CX as a temporary register, which would make
    the new code a strict improvement on the old:
    
    	0x007f 00127 (append.go:9)	MOVQ	48(SP), CX
    	0x0084 00132 (append.go:9)	MOVQ	56(SP), BX
    	0x0089 00137 (append.go:9)	ADDQ	$3, CX
    	0x008d 00141 (append.go:9)	MOVQ	CX, DX
    	0x0090 00144 (append.go:8)	MOVQ	"".b+80(FP), CX
    
    Change-Id: I4ee50b18fa53865901d2d7f86c2cbb54c6fa6924
    Reviewed-on: https://go-review.googlesource.com/21812
    Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
    TryBot-Result: Gobot Gobot <gobot@golang.org>
    Reviewed-by: 's avatarKeith Randall <khr@golang.org>
    6b33b0e9
Name
Last commit
Last update
.github Loading commit data...
api Loading commit data...
doc Loading commit data...
lib/time Loading commit data...
misc Loading commit data...
src Loading commit data...
test Loading commit data...
.gitattributes Loading commit data...
.gitignore Loading commit data...
AUTHORS Loading commit data...
CONTRIBUTING.md Loading commit data...
CONTRIBUTORS Loading commit data...
LICENSE Loading commit data...
PATENTS Loading commit data...
README.md Loading commit data...
favicon.ico Loading commit data...
robots.txt Loading commit data...