Commit 79561a84 authored by Austin Clements's avatar Austin Clements

runtime: simplify reflectcall write barriers

Currently reflectcall has a subtle dance with write barriers where the
assembly code copies the result values from the stack to the in-heap
argument frame without write barriers and then calls into the runtime
after the fact to invoke the necessary write barriers.

For the hybrid barrier (and for ROC), we need to switch to a
*pre*-write write barrier, which is very difficult to do with the
current setup. We could tie ourselves in knots of subtle reasoning
about why it's okay in this particular case to have a post-write write
barrier, but this commit instead takes a different approach. Rather
than making things more complex, this simplifies reflection calls so
that the argument copy is done in Go using normal bulk write barriers.

The one difficulty with this approach is that calling into Go requires
putting arguments on the stack, but the call* functions "donate" their
entire stack frame to the called function. We can get away with this
now because the copy avoids using the stack and has copied the results
out before we clobber the stack frame to call into the write barrier.
The solution in this CL is to call another function, passing arguments
in registers instead of on the stack, and let that other function
reserve more stack space and setup the arguments for the runtime.

This approach seemed to work out the best. I also tried making the
call* functions reserve 32 extra bytes of frame for the write barrier
arguments and adjust SP up by 32 bytes around the call. However, even
with the necessary changes to the assembler to correct the spdelta
table, the runtime was still having trouble with the frame layout (and
the changes to the assembler caused many other things that do strange
things with the SP to fail to assemble). The approach I took doesn't
require any funny business with the SP.

Updates #17503.

Change-Id: Ie2bb0084b24d6cff38b5afb218b9e0534ad2119e
Reviewed-on: https://go-review.googlesource.com/31655
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: 's avatarCherry Zhang <cherryyz@google.com>
parent 1c3ab3d4
......@@ -447,7 +447,7 @@ func (v Value) call(op string, in []Value) []Value {
// because the Values returned by this function contain pointers to the args object,
// and will thus keep the args object alive indefinitely.
memclr(args, retOffset)
// Copy return values out of args.
// Wrap Values around return values in args.
ret = make([]Value, nout)
off = retOffset
for i := 0; i < nout; i++ {
......
......@@ -477,6 +477,7 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \
PCDATA $PCDATA_StackMapIndex, $0; \
CALL AX; \
/* copy return values back */ \
MOVL argtype+0(FP), DX; \
MOVL argptr+8(FP), DI; \
MOVL argsize+12(FP), CX; \
MOVL retoffset+16(FP), BX; \
......@@ -484,17 +485,19 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \
ADDL BX, DI; \
ADDL BX, SI; \
SUBL BX, CX; \
REP;MOVSB; \
/* execute write barrier updates */ \
MOVL argtype+0(FP), DX; \
MOVL argptr+8(FP), DI; \
MOVL argsize+12(FP), CX; \
MOVL retoffset+16(FP), BX; \
MOVL DX, 0(SP); \
MOVL DI, 4(SP); \
MOVL CX, 8(SP); \
MOVL BX, 12(SP); \
CALL runtime·callwritebarrier(SB); \
CALL callRet<>(SB); \
RET
// callRet copies return values back at the end of call*. This is a
// separate function so it can allocate stack space for the arguments
// to reflectcallmove. It does not follow the Go ABI; it expects its
// arguments in registers.
TEXT callRet<>(SB), NOSPLIT, $16-0
MOVL DX, 0(SP)
MOVL DI, 4(SP)
MOVL SI, 8(SP)
MOVL CX, 12(SP)
CALL runtime·reflectcallmove(SB)
RET
CALLFN(·call16, 16)
......
......@@ -416,8 +416,6 @@ TEXT reflect·call(SB), NOSPLIT, $0-0
TEXT ·reflectcall(SB), NOSPLIT, $0-32
MOVLQZX argsize+24(FP), CX
// NOTE(rsc): No call16, because CALLFN needs four words
// of argument space to invoke callwritebarrier.
DISPATCH(runtime·call32, 32)
DISPATCH(runtime·call64, 64)
DISPATCH(runtime·call128, 128)
......@@ -460,24 +458,28 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-32; \
PCDATA $PCDATA_StackMapIndex, $0; \
CALL (DX); \
/* copy return values back */ \
MOVQ argtype+0(FP), DX; \
MOVQ argptr+16(FP), DI; \
MOVLQZX argsize+24(FP), CX; \
MOVLQZX retoffset+28(FP), BX; \
MOVLQZX retoffset+28(FP), BX; \
MOVQ SP, SI; \
ADDQ BX, DI; \
ADDQ BX, SI; \
SUBQ BX, CX; \
REP;MOVSB; \
/* execute write barrier updates */ \
MOVQ argtype+0(FP), DX; \
MOVQ argptr+16(FP), DI; \
MOVLQZX argsize+24(FP), CX; \
MOVLQZX retoffset+28(FP), BX; \
MOVQ DX, 0(SP); \
MOVQ DI, 8(SP); \
MOVQ CX, 16(SP); \
MOVQ BX, 24(SP); \
CALL runtime·callwritebarrier(SB); \
CALL callRet<>(SB); \
RET
// callRet copies return values back at the end of call*. This is a
// separate function so it can allocate stack space for the arguments
// to reflectcallmove. It does not follow the Go ABI; it expects its
// arguments in registers.
TEXT callRet<>(SB), NOSPLIT, $32-0
NO_LOCAL_POINTERS
MOVQ DX, 0(SP)
MOVQ DI, 8(SP)
MOVQ SI, 16(SP)
MOVQ CX, 24(SP)
CALL runtime·reflectcallmove(SB)
RET
CALLFN(·call32, 32)
......
......@@ -371,6 +371,7 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \
MOVL (DX), AX; \
CALL AX; \
/* copy return values back */ \
MOVL argtype+0(FP), DX; \
MOVL argptr+8(FP), DI; \
MOVL argsize+12(FP), CX; \
MOVL retoffset+16(FP), BX; \
......@@ -378,17 +379,19 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \
ADDL BX, DI; \
ADDL BX, SI; \
SUBL BX, CX; \
REP;MOVSB; \
/* execute write barrier updates */ \
MOVL argtype+0(FP), DX; \
MOVL argptr+8(FP), DI; \
MOVL argsize+12(FP), CX; \
MOVL retoffset+16(FP), BX; \
MOVL DX, 0(SP); \
MOVL DI, 4(SP); \
MOVL CX, 8(SP); \
MOVL BX, 12(SP); \
CALL runtime·callwritebarrier(SB); \
CALL callRet<>(SB); \
RET
// callRet copies return values back at the end of call*. This is a
// separate function so it can allocate stack space for the arguments
// to reflectcallmove. It does not follow the Go ABI; it expects its
// arguments in registers.
TEXT callRet<>(SB), NOSPLIT, $16-0
MOVL DX, 0(SP)
MOVL DI, 4(SP)
MOVL SI, 8(SP)
MOVL CX, 12(SP)
CALL runtime·reflectcallmove(SB)
RET
CALLFN(·call16, 16)
......
......@@ -406,6 +406,7 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \
PCDATA $PCDATA_StackMapIndex, $0; \
BL (R0); \
/* copy return values back */ \
MOVW argtype+0(FP), R4; \
MOVW argptr+8(FP), R0; \
MOVW argsize+12(FP), R2; \
MOVW retoffset+16(FP), R3; \
......@@ -413,24 +414,19 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \
ADD R3, R1; \
ADD R3, R0; \
SUB R3, R2; \
loop: \
CMP $0, R2; \
B.EQ end; \
MOVBU.P 1(R1), R5; \
MOVBU.P R5, 1(R0); \
SUB $1, R2, R2; \
B loop; \
end: \
/* execute write barrier updates */ \
MOVW argtype+0(FP), R1; \
MOVW argptr+8(FP), R0; \
MOVW argsize+12(FP), R2; \
MOVW retoffset+16(FP), R3; \
MOVW R1, 4(R13); \
MOVW R0, 8(R13); \
MOVW R2, 12(R13); \
MOVW R3, 16(R13); \
BL runtime·callwritebarrier(SB); \
BL callRet<>(SB); \
RET
// callRet copies return values back at the end of call*. This is a
// separate function so it can allocate stack space for the arguments
// to reflectcallmove. It does not follow the Go ABI; it expects its
// arguments in registers.
TEXT callRet<>(SB), NOSPLIT, $16-0
MOVW R4, 4(R13)
MOVW R0, 8(R13)
MOVW R1, 12(R13)
MOVW R2, 16(R13)
BL runtime·reflectcallmove(SB)
RET
CALLFN(·call16, 16)
......
......@@ -335,8 +335,6 @@ TEXT reflect·call(SB), NOSPLIT, $0-0
TEXT ·reflectcall(SB), NOSPLIT, $-8-32
MOVWU argsize+24(FP), R16
// NOTE(rsc): No call16, because CALLFN needs four words
// of argument space to invoke callwritebarrier.
DISPATCH(runtime·call32, 32)
DISPATCH(runtime·call64, 64)
DISPATCH(runtime·call128, 128)
......@@ -387,33 +385,27 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \
PCDATA $PCDATA_StackMapIndex, $0; \
BL (R0); \
/* copy return values back */ \
MOVD argtype+0(FP), R7; \
MOVD arg+16(FP), R3; \
MOVWU n+24(FP), R4; \
MOVWU retoffset+28(FP), R6; \
MOVD RSP, R5; \
ADD $8, RSP, R5; \
ADD R6, R5; \
ADD R6, R3; \
SUB R6, R4; \
ADD $(8-1), R5; \
SUB $1, R3; \
ADD R5, R4; \
loop: \
CMP R5, R4; \
BEQ end; \
MOVBU.W 1(R5), R6; \
MOVBU.W R6, 1(R3); \
B loop; \
end: \
/* execute write barrier updates */ \
MOVD argtype+0(FP), R7; \
MOVD arg+16(FP), R3; \
MOVWU n+24(FP), R4; \
MOVWU retoffset+28(FP), R6; \
MOVD R7, 8(RSP); \
MOVD R3, 16(RSP); \
MOVD R4, 24(RSP); \
MOVD R6, 32(RSP); \
BL runtime·callwritebarrier(SB); \
BL callRet<>(SB); \
RET
// callRet copies return values back at the end of call*. This is a
// separate function so it can allocate stack space for the arguments
// to reflectcallmove. It does not follow the Go ABI; it expects its
// arguments in registers.
TEXT callRet<>(SB), NOSPLIT, $40-0
MOVD R7, 8(RSP)
MOVD R3, 16(RSP)
MOVD R5, 24(RSP)
MOVD R4, 32(RSP)
BL runtime·reflectcallmove(SB)
RET
// These have 8 added to make the overall frame size a multiple of 16,
......
......@@ -309,8 +309,6 @@ TEXT reflect·call(SB), NOSPLIT, $0-0
TEXT ·reflectcall(SB), NOSPLIT, $-8-32
MOVWU argsize+24(FP), R1
// NOTE(rsc): No call16, because CALLFN needs four words
// of argument space to invoke callwritebarrier.
DISPATCH(runtime·call32, 32)
DISPATCH(runtime·call64, 64)
DISPATCH(runtime·call128, 128)
......@@ -361,33 +359,27 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \
PCDATA $PCDATA_StackMapIndex, $0; \
JAL (R4); \
/* copy return values back */ \
MOVV argtype+0(FP), R5; \
MOVV arg+16(FP), R1; \
MOVWU n+24(FP), R2; \
MOVWU retoffset+28(FP), R4; \
MOVV R29, R3; \
ADDV $8, R29, R3; \
ADDV R4, R3; \
ADDV R4, R1; \
SUBVU R4, R2; \
ADDV $8, R3; \
ADDV R3, R2; \
loop: \
BEQ R3, R2, end; \
MOVBU (R3), R4; \
ADDV $1, R3; \
MOVBU R4, (R1); \
ADDV $1, R1; \
JMP loop; \
end: \
/* execute write barrier updates */ \
MOVV argtype+0(FP), R5; \
MOVV arg+16(FP), R1; \
MOVWU n+24(FP), R2; \
MOVWU retoffset+28(FP), R4; \
MOVV R5, 8(R29); \
MOVV R1, 16(R29); \
MOVV R2, 24(R29); \
MOVV R4, 32(R29); \
JAL runtime·callwritebarrier(SB); \
JAL callRet<>(SB); \
RET
// callRet copies return values back at the end of call*. This is a
// separate function so it can allocate stack space for the arguments
// to reflectcallmove. It does not follow the Go ABI; it expects its
// arguments in registers.
TEXT callRet<>(SB), NOSPLIT, $32-0
MOVV R5, 8(R29)
MOVV R1, 16(R29)
MOVV R3, 24(R29)
MOVV R2, 32(R29)
JAL runtime·reflectcallmove(SB)
RET
CALLFN(·call16, 16)
......
......@@ -363,8 +363,6 @@ TEXT reflect·call(SB), NOSPLIT, $0-0
TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32
MOVWZ argsize+24(FP), R3
// NOTE(rsc): No call16, because CALLFN needs four words
// of argument space to invoke callwritebarrier.
DISPATCH(runtime·call32, 32)
DISPATCH(runtime·call64, 64)
DISPATCH(runtime·call128, 128)
......@@ -418,33 +416,27 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \
BL (CTR); \
MOVD 24(R1), R2; \
/* copy return values back */ \
MOVD argtype+0(FP), R7; \
MOVD arg+16(FP), R3; \
MOVWZ n+24(FP), R4; \
MOVWZ retoffset+28(FP), R6; \
MOVD R1, R5; \
ADD $FIXED_FRAME, R1, R5; \
ADD R6, R5; \
ADD R6, R3; \
SUB R6, R4; \
ADD $(FIXED_FRAME-1), R5; \
SUB $1, R3; \
ADD R5, R4; \
loop: \
CMP R5, R4; \
BEQ end; \
MOVBZU 1(R5), R6; \
MOVBZU R6, 1(R3); \
BR loop; \
end: \
/* execute write barrier updates */ \
MOVD argtype+0(FP), R7; \
MOVD arg+16(FP), R3; \
MOVWZ n+24(FP), R4; \
MOVWZ retoffset+28(FP), R6; \
MOVD R7, FIXED_FRAME+0(R1); \
MOVD R3, FIXED_FRAME+8(R1); \
MOVD R4, FIXED_FRAME+16(R1); \
MOVD R6, FIXED_FRAME+24(R1); \
BL runtime·callwritebarrier(SB); \
BL callRet<>(SB); \
RET
// callRet copies return values back at the end of call*. This is a
// separate function so it can allocate stack space for the arguments
// to reflectcallmove. It does not follow the Go ABI; it expects its
// arguments in registers.
TEXT callRet<>(SB), NOSPLIT, $32-0
MOVD R7, FIXED_FRAME+0(R1)
MOVD R3, FIXED_FRAME+8(R1)
MOVD R5, FIXED_FRAME+16(R1)
MOVD R4, FIXED_FRAME+24(R1)
BL runtime·reflectcallmove(SB)
RET
CALLFN(·call32, 32)
......
......@@ -320,8 +320,6 @@ TEXT reflect·call(SB), NOSPLIT, $0-0
TEXT ·reflectcall(SB), NOSPLIT, $-8-32
MOVWZ argsize+24(FP), R3
// NOTE(rsc): No call16, because CALLFN needs four words
// of argument space to invoke callwritebarrier.
DISPATCH(runtime·call32, 32)
DISPATCH(runtime·call64, 64)
DISPATCH(runtime·call128, 128)
......@@ -377,6 +375,7 @@ callFunction: \
PCDATA $PCDATA_StackMapIndex, $0; \
BL (R8); \
/* copy return values back */ \
MOVD argtype+0(FP), R7; \
MOVD arg+16(FP), R6; \
MOVWZ n+24(FP), R5; \
MOVD $stack-MAXSIZE(SP), R4; \
......@@ -384,27 +383,19 @@ callFunction: \
ADD R1, R4; \
ADD R1, R6; \
SUB R1, R5; \
loopRets: /* copy 256 bytes at a time */ \
CMP R5, $256; \
BLT tailRets; \
SUB $256, R5; \
MVC $256, 0(R4), 0(R6); \
MOVD $256(R4), R4; \
MOVD $256(R6), R6; \
BR loopRets; \
tailRets: /* copy remaining bytes */ \
CMP R5, $0; \
BEQ writeBarrierUpdates; \
SUB $1, R5; \
EXRL $callfnMVC<>(SB), R5; \
writeBarrierUpdates: \
/* execute write barrier updates */ \
MOVD argtype+0(FP), R1; \
MOVD arg+16(FP), R2; \
MOVWZ n+24(FP), R3; \
MOVWZ retoffset+28(FP), R4; \
STMG R1, R4, stack-MAXSIZE(SP); \
BL runtime·callwritebarrier(SB); \
BL callRet<>(SB); \
RET
// callRet copies return values back at the end of call*. This is a
// separate function so it can allocate stack space for the arguments
// to reflectcallmove. It does not follow the Go ABI; it expects its
// arguments in registers.
TEXT callRet<>(SB), NOSPLIT, $32-0
MOVD R7, 8(R15)
MOVD R6, 16(R15)
MOVD R4, 24(R15)
MOVD R5, 32(R15)
BL runtime·reflectcallmove(SB)
RET
CALLFN(·call32, 32)
......
......@@ -215,19 +215,21 @@ func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size
heapBitsBulkBarrier(uintptr(dst), size&^(sys.PtrSize-1))
}
// callwritebarrier is invoked at the end of reflectcall, to execute
// write barrier operations to record the fact that a call's return
// values have just been copied to frame, starting at retoffset
// and continuing to framesize. The entire frame (not just the return
// values) is described by typ. Because the copy has already
// happened, we call writebarrierptr_nostore, and this is nosplit so
// the copy and write barrier appear atomic to GC.
// reflectcallmove is invoked by reflectcall to copy the return values
// out of the stack and into the heap, invoking the necessary write
// barriers. dst, src, and size describe the return value area to
// copy. typ describes the entire frame (not just the return values).
// typ may be nil, which indicates write barriers are not needed.
//
// It must be nosplit and must only call nosplit functions because the
// stack map of reflectcall is wrong.
//
//go:nosplit
func callwritebarrier(typ *_type, frame unsafe.Pointer, framesize, retoffset uintptr) {
if !writeBarrier.needed || typ == nil || typ.kind&kindNoPointers != 0 || framesize-retoffset < sys.PtrSize {
return
func reflectcallmove(typ *_type, dst, src unsafe.Pointer, size uintptr) {
memmove(dst, src, size)
if writeBarrier.needed && typ != nil && typ.kind&kindNoPointers == 0 && size >= sys.PtrSize {
heapBitsBulkBarrier(uintptr(dst), size)
}
heapBitsBulkBarrier(uintptr(add(frame, retoffset)), framesize-retoffset)
}
//go:nosplit
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment