Commit 150b7286 authored by Cherry Zhang's avatar Cherry Zhang

runtime: use native CAS and memory barrier on ARMv7

This gets us around the kernel helpers on ARMv7.

It is slightly faster than using the kernel helper.

name           old time/op  new time/op  delta
AtomicLoad-4   72.5ns ± 0%  69.5ns ± 0%  -4.08%  (p=0.000 n=9+9)
AtomicStore-4  57.6ns ± 1%  54.4ns ± 0%  -5.58%  (p=0.000 n=10+9)
[Geo mean]     64.6ns       61.5ns       -4.83%

If performance is really critical, we can even do compiler intrinsics
on GOARM=7.

Fixes #23792.

Change-Id: I36497d880890b26bdf01e048b542bd5fd7b17d23
Reviewed-on: https://go-review.googlesource.com/94076
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarAustin Clements <austin@google.com>
parent 1b6fec86
......@@ -26,3 +26,19 @@ func BenchmarkAtomicStore64(b *testing.B) {
atomic.Store64(&x, 0)
}
}
func BenchmarkAtomicLoad(b *testing.B) {
var x uint32
sink = &x
for i := 0; i < b.N; i++ {
_ = atomic.Load(&x)
}
}
func BenchmarkAtomicStore(b *testing.B) {
var x uint32
sink = &x
for i := 0; i < b.N; i++ {
atomic.Store(&x, 0)
}
}
......@@ -24,7 +24,14 @@
TEXT cas<>(SB),NOSPLIT,$0
MOVW $0xffff0fc0, R15 // R15 is hardware PC.
TEXT runtime∕internal∕atomic·Cas(SB),NOSPLIT,$0
TEXT runtime∕internal∕atomic·Cas(SB),NOSPLIT|NOFRAME,$0
MOVB runtime·goarm(SB), R11
CMP $7, R11
BLT 2(PC)
JMP ·armcas(SB)
JMP ·kernelcas<>(SB)
TEXT runtime∕internal∕atomic·kernelcas<>(SB),NOSPLIT,$0
MOVW ptr+0(FP), R2
// trigger potential paging fault here,
// because we don't know how to traceback through __kuser_cmpxchg
......
......@@ -489,13 +489,18 @@ TEXT runtime·usleep(SB),NOSPLIT,$12
// even on single-core devices. The kernel helper takes care of all of
// this for us.
TEXT publicationBarrier<>(SB),NOSPLIT,$0
TEXT kernelPublicationBarrier<>(SB),NOSPLIT,$0
// void __kuser_memory_barrier(void);
MOVW $0xffff0fa0, R15 // R15 is hardware PC.
MOVW $0xffff0fa0, R11
CALL (R11)
RET
TEXT ·publicationBarrier(SB),NOSPLIT,$0
BL publicationBarrier<>(SB)
RET
MOVB ·goarm(SB), R11
CMP $7, R11
BLT 2(PC)
JMP ·armPublicationBarrier(SB)
JMP kernelPublicationBarrier<>(SB) // extra layer so this function is leaf and no SP adjustment on GOARM=7
TEXT runtime·osyield(SB),NOSPLIT,$0
MOVW $SYS_sched_yield, R7
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment