Commit af40cbe8 authored by Frank Somers's avatar Frank Somers Committed by Ian Lance Taylor

runtime: use vDSO on linux/386 to improve time.Now performance

This change adds support for accelerating time.Now by using
the __vdso_clock_gettime fast-path via the vDSO on linux/386
if it is available.

When the vDSO path to the clocks is available, it is typically
5x-10x faster than the syscall path (see benchmark extract
below).  Two such calls are made for each time.Now() call
on most platforms as of go 1.9.

- Add vdso_linux_386.go, containing the ELF32 definitions
  for use by vdso_linux.go, the maximum array size, and
  the symbols to be located in the vDSO.

- Modify runtime.walltime and runtime.nanotime to check for
  and use the vDSO fast-path if available, or fall back to
  the existing syscall path.

- Reduce the stack reservations for runtime.walltime and
  runtime.monotime from 32 to 16 bytes. It appears the syscall
  path actually only needed 8 bytes, but 16 is now needed to
  cover the syscall and vDSO paths.

- Remove clearing DX from the syscall paths as clock_gettime
  only takes 2 args (BX, CX in syscall calling convention),
  so there should be no need to clear DX.

The included BenchmarkTimeNow was run with -cpu=1 -count=20
on an "Intel(R) Celeron(R) CPU J1900 @ 1.99GHz", comparing
released go 1.9.1 vs this change. This shows a gain in
performance on linux/386 (6.89x), and that no regression
occurred on linux/amd64 due to this change.

Kernel: linux/i686, GOOS=linux GOARCH=386
   name      old time/op  new time/op  delta
   TimeNow   978ns ± 0%   142ns ± 0%  -85.48%  (p=0.000 n=16+20)

Kernel: linux/x86_64, GOOS=linux GOARCH=amd64
   name      old time/op  new time/op  delta
   TimeNow   125ns ± 0%   125ns ± 0%   ~       (all equal)

Gains are more dramatic in virtualized environments,
presumably due to the overhead of virtualizing the syscall.

Fixes #22190

Change-Id: I2f83ce60cb1b8b310c9ced0706bb463c1b3aedf8
Reviewed-on: https://go-review.googlesource.com/69390
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarIan Lance Taylor <iant@golang.org>
parent bf237f53
......@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64,!arm,!arm64,!mips,!mipsle,!mips64,!mips64le,!s390x,!ppc64,!ppc64le
// +build !386,!amd64,!arm,!arm64,!mips,!mipsle,!mips64,!mips64le,!s390x,!ppc64,!ppc64le
package runtime
......
......@@ -203,12 +203,34 @@ TEXT runtime·mincore(SB),NOSPLIT,$0-16
RET
// func walltime() (sec int64, nsec int32)
TEXT runtime·walltime(SB), NOSPLIT, $32
TEXT runtime·walltime(SB), NOSPLIT, $16
// Stack layout, depending on call path:
// x(SP) vDSO INVOKE_SYSCALL
// 12 ts.tv_nsec ts.tv_nsec
// 8 ts.tv_sec ts.tv_sec
// 4 &ts -
// 0 CLOCK_<id> -
//
// If we take the vDSO path, we're calling a function with gcc calling convention.
// We're guaranteed 128 bytes on entry. We've taken 16, and the call uses another 4,
// leaving 108 for __vdso_clock_gettime to use.
MOVL runtime·__vdso_clock_gettime_sym(SB), AX
CMPL AX, $0
JEQ fallback
LEAL 8(SP), BX // &ts (struct timespec)
MOVL BX, 4(SP)
MOVL $0, 0(SP) // CLOCK_REALTIME
CALL AX
JMP finish
fallback:
MOVL $SYS_clock_gettime, AX
MOVL $0, BX // CLOCK_REALTIME
LEAL 8(SP), CX
MOVL $0, DX
INVOKE_SYSCALL
finish:
MOVL 8(SP), AX // sec
MOVL 12(SP), BX // nsec
......@@ -220,12 +242,25 @@ TEXT runtime·walltime(SB), NOSPLIT, $32
// int64 nanotime(void) so really
// void nanotime(int64 *nsec)
TEXT runtime·nanotime(SB), NOSPLIT, $32
TEXT runtime·nanotime(SB), NOSPLIT, $16
// See comments above in walltime() about stack space usage and layout.
MOVL runtime·__vdso_clock_gettime_sym(SB), AX
CMPL AX, $0
JEQ fallback
LEAL 8(SP), BX // &ts (struct timespec)
MOVL BX, 4(SP)
MOVL $1, 0(SP) // CLOCK_MONOTONIC
CALL AX
JMP finish
fallback:
MOVL $SYS_clock_gettime, AX
MOVL $1, BX // CLOCK_MONOTONIC
LEAL 8(SP), CX
MOVL $0, DX
INVOKE_SYSCALL
finish:
MOVL 8(SP), AX // sec
MOVL 12(SP), BX // nsec
......
......@@ -3,7 +3,7 @@
// license that can be found in the LICENSE file.
// +build linux
// +build amd64
// +build 386 amd64
package runtime
......
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
// ELF32 structure definitions for use by the Linux vDSO loader
type elfSym struct {
st_name uint32
st_value uint32
st_size uint32
st_info byte
st_other byte
st_shndx uint16
}
type elfVerdef struct {
vd_version uint16 /* Version revision */
vd_flags uint16 /* Version information */
vd_ndx uint16 /* Version Index */
vd_cnt uint16 /* Number of associated aux entries */
vd_hash uint32 /* Version name hash value */
vd_aux uint32 /* Offset in bytes to verdaux array */
vd_next uint32 /* Offset in bytes to next verdef entry */
}
type elfEhdr struct {
e_ident [_EI_NIDENT]byte /* Magic number and other info */
e_type uint16 /* Object file type */
e_machine uint16 /* Architecture */
e_version uint32 /* Object file version */
e_entry uint32 /* Entry point virtual address */
e_phoff uint32 /* Program header table file offset */
e_shoff uint32 /* Section header table file offset */
e_flags uint32 /* Processor-specific flags */
e_ehsize uint16 /* ELF header size in bytes */
e_phentsize uint16 /* Program header table entry size */
e_phnum uint16 /* Program header table entry count */
e_shentsize uint16 /* Section header table entry size */
e_shnum uint16 /* Section header table entry count */
e_shstrndx uint16 /* Section header string table index */
}
type elfPhdr struct {
p_type uint32 /* Segment type */
p_offset uint32 /* Segment file offset */
p_vaddr uint32 /* Segment virtual address */
p_paddr uint32 /* Segment physical address */
p_filesz uint32 /* Segment size in file */
p_memsz uint32 /* Segment size in memory */
p_flags uint32 /* Segment flags */
p_align uint32 /* Segment alignment */
}
type elfShdr struct {
sh_name uint32 /* Section name (string tbl index) */
sh_type uint32 /* Section type */
sh_flags uint32 /* Section flags */
sh_addr uint32 /* Section virtual addr at execution */
sh_offset uint32 /* Section file offset */
sh_size uint32 /* Section size in bytes */
sh_link uint32 /* Link to another section */
sh_info uint32 /* Additional section information */
sh_addralign uint32 /* Section alignment */
sh_entsize uint32 /* Entry size if section holds table */
}
type elfDyn struct {
d_tag int32 /* Dynamic entry type */
d_val uint32 /* Integer value */
}
type elfVerdaux struct {
vda_name uint32 /* Version or dependency names */
vda_next uint32 /* Offset in bytes to next verdaux entry */
}
const (
// vdsoArrayMax is the byte-size of a maximally sized array on this architecture.
// See cmd/compile/internal/x86/galign.go arch.MAXWIDTH initialization, but must also
// be constrained to max +ve int.
vdsoArrayMax = 1<<31 - 1
)
var sym_keys = []symbol_key{
{"__vdso_clock_gettime", 0xd35ec75, 0x6e43a318, &__vdso_clock_gettime_sym},
}
// initialize to fall back to syscall
var (
__vdso_clock_gettime_sym uintptr = 0
)
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build linux
// +build 386 amd64
package runtime_test
import (
"testing"
"time"
_ "unsafe"
)
// These tests are a little risky because they overwrite the __vdso_clock_gettime_sym value.
// It's normally initialized at startup and remains unchanged after that.
//go:linkname __vdso_clock_gettime_sym runtime.__vdso_clock_gettime_sym
var __vdso_clock_gettime_sym uintptr
func TestClockVDSOAndFallbackPaths(t *testing.T) {
// Check that we can call walltime() and nanotime() with and without their (1st) fast-paths.
// This just checks that fast and fallback paths can be called, rather than testing their
// results.
//
// Call them indirectly via time.Now(), so we don't need auxiliary .s files to allow us to
// use go:linkname to refer to the functions directly.
save := __vdso_clock_gettime_sym
if save == 0 {
t.Log("__vdso_clock_gettime symbol not found; fallback path will be used by default")
}
// Call with fast-path enabled (if vDSO symbol found at startup)
time.Now()
// Call with fast-path disabled
__vdso_clock_gettime_sym = 0
time.Now()
__vdso_clock_gettime_sym = save
}
func BenchmarkClockVDSOAndFallbackPaths(b *testing.B) {
run := func(b *testing.B) {
for i := 0; i < b.N; i++ {
// Call via time.Now() - see comment in test above.
time.Now()
}
}
save := __vdso_clock_gettime_sym
b.Run("vDSO", run)
__vdso_clock_gettime_sym = 0
b.Run("Fallback", run)
__vdso_clock_gettime_sym = save
}
func BenchmarkTimeNow(b *testing.B) {
for i := 0; i < b.N; i++ {
time.Now()
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment