Commit e9456239 authored by Lynn Boger's avatar Lynn Boger

runtime: improve CALLFN macro for ppc64x

The previous CALLFN macro was copying a single byte at a
time which is extremely inefficient on ppc64x. This changes
the macro so it copies 8 bytes at a time.

benchmark in reflect:
name                      old time/op    new time/op    delta
Call-8                       177ns ± 0%     165ns ± 0%    -6.78%  (p=1.000 n=1+1)
CallArgCopy/size=128-8       194ns ± 0%     140ns ± 0%   -27.84%  (p=1.000 n=1+1)
CallArgCopy/size=256-8       253ns ± 0%     159ns ± 0%   -37.15%  (p=1.000 n=1+1)
CallArgCopy/size=1024-8      612ns ± 0%     222ns ± 0%   -63.73%  (p=1.000 n=1+1)
CallArgCopy/size=4096-8     2.14µs ± 0%    0.53µs ± 0%   -75.01%  (p=1.000 n=1+1)
CallArgCopy/size=65536-8    33.0µs ± 0%     7.3µs ± 0%   -77.72%  (p=1.000 n=1+1)

Change-Id: I71f6ee788264e61bb072264d21b77b83592c9dca
Reviewed-on: https://go-review.googlesource.com/134635
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarCarlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Reviewed-by: 's avatarMichael Munday <mike.munday@ibm.com>
parent e0bde68c
......@@ -390,15 +390,36 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \
/* copy arguments to stack */ \
MOVD arg+16(FP), R3; \
MOVWZ argsize+24(FP), R4; \
MOVD R1, R5; \
ADD $(FIXED_FRAME-1), R5; \
SUB $1, R3; \
ADD R5, R4; \
CMP R5, R4; \
BEQ 4(PC); \
MOVBZU 1(R3), R6; \
MOVBZU R6, 1(R5); \
BR -4(PC); \
MOVD R1, R5; \
CMP R4, $8; \
BLT tailsetup; \
/* copy 8 at a time if possible */ \
ADD $(FIXED_FRAME-8), R5; \
SUB $8, R3; \
top: \
MOVDU 8(R3), R7; \
MOVDU R7, 8(R5); \
SUB $8, R4; \
CMP R4, $8; \
BGE top; \
/* handle remaining bytes */ \
CMP $0, R4; \
BEQ callfn; \
ADD $7, R3; \
ADD $7, R5; \
BR tail; \
tailsetup: \
CMP $0, R4; \
BEQ callfn; \
ADD $(FIXED_FRAME-1), R5; \
SUB $1, R3; \
tail: \
MOVBU 1(R3), R6; \
MOVBU R6, 1(R5); \
SUB $1, R4; \
CMP $0, R4; \
BGT tail; \
callfn: \
/* call function */ \
MOVD f+8(FP), R11; \
MOVD (R11), R12; \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment