Commit 7e1b61c7 authored by Keith Randall's avatar Keith Randall

runtime: mark pages we return to kernel as NOHUGEPAGE

We return memory to the kernel with madvise(..., DONTNEED).
Also mark returned memory with NOHUGEPAGE to keep the kernel from
merging this memory into a huge page, effectively reallocating it.

Only known to be a problem on linux/{386,amd64,amd64p32} at the moment.
It may come up on other os/arch combinations in the future.

Fixes #8832

Change-Id: Ifffc6627a0296926e3f189a8a9b6e4bdb54c79eb
Reviewed-on: https://go-review.googlesource.com/5660Reviewed-by: 's avatarDmitry Vyukov <dvyukov@google.com>
parent 6d1ebeb5
......@@ -12,4 +12,5 @@ const (
_PhysPageSize = goos_nacl*65536 + (1-goos_nacl)*4096 // 4k normally; 64k on NaCl
_PCQuantum = 1
_Int64Align = 4
hugePageSize = 1 << 21
)
......@@ -12,4 +12,5 @@ const (
_PhysPageSize = 4096
_PCQuantum = 1
_Int64Align = 8
hugePageSize = 1 << 21
)
......@@ -12,4 +12,5 @@ const (
_PhysPageSize = 65536*goos_nacl + 4096*(1-goos_nacl)
_PCQuantum = 1
_Int64Align = 8
hugePageSize = 1 << 21
)
......@@ -12,4 +12,5 @@ const (
_PhysPageSize = 65536*goos_nacl + 4096*(1-goos_nacl)
_PCQuantum = 4
_Int64Align = 4
hugePageSize = 0
)
......@@ -12,4 +12,5 @@ const (
_PhysPageSize = 65536
_PCQuantum = 4
_Int64Align = 8
hugePageSize = 0
)
......@@ -12,4 +12,5 @@ const (
_PhysPageSize = 65536
_PCQuantum = 4
_Int64Align = 8
hugePageSize = 0
)
......@@ -17,7 +17,9 @@ const (
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_MADV_DONTNEED = 0x4
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf
_SA_RESTART = 0x10000000
_SA_ONSTACK = 0x8000000
......
......@@ -17,7 +17,9 @@ const (
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_MADV_DONTNEED = 0x4
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf
_SA_RESTART = 0x10000000
_SA_ONSTACK = 0x8000000
......
......@@ -6,14 +6,19 @@ const (
_ENOMEM = 0xc
_EAGAIN = 0xb
_PROT_NONE = 0
_PROT_READ = 0x1
_PROT_WRITE = 0x2
_PROT_EXEC = 0x4
_MAP_ANON = 0x20
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_PROT_NONE = 0
_PROT_READ = 0x1
_PROT_WRITE = 0x2
_PROT_EXEC = 0x4
_MAP_ANON = 0x20
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf
_SA_RESTART = 0x10000000
_SA_ONSTACK = 0x8000000
_SA_RESTORER = 0 // unused on ARM
......
......@@ -17,7 +17,9 @@ const (
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_MADV_DONTNEED = 0x4
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf
_SA_RESTART = 0x10000000
_SA_ONSTACK = 0x8000000
......
......@@ -17,7 +17,9 @@ const (
_MAP_PRIVATE = 0x2
_MAP_FIXED = 0x10
_MADV_DONTNEED = 0x4
_MADV_DONTNEED = 0x4
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf
_SA_RESTART = 0x10000000
_SA_ONSTACK = 0x8000000
......
......@@ -67,10 +67,29 @@ func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer {
}
func sysUnused(v unsafe.Pointer, n uintptr) {
if hugePageSize != 0 && (uintptr(v)%hugePageSize != 0 || n%hugePageSize != 0) {
// See issue 8832
// Linux kernel bug: https://bugzilla.kernel.org/show_bug.cgi?id=93111
// Mark the region as NOHUGEPAGE so the kernel's khugepaged
// doesn't undo our DONTNEED request. khugepaged likes to migrate
// regions which are only partially mapped to huge pages, including
// regions with some DONTNEED marks. That needlessly allocates physical
// memory for our DONTNEED regions.
madvise(v, n, _MADV_NOHUGEPAGE)
}
madvise(v, n, _MADV_DONTNEED)
}
func sysUsed(v unsafe.Pointer, n uintptr) {
if hugePageSize != 0 {
// Undo the NOHUGEPAGE marks from sysUnused. There is no alignment check
// around this call as spans may have been merged in the interim.
// Note that this might enable huge pages for regions which were
// previously disabled. Unfortunately there is no easy way to detect
// what the previous state was, and in any case we probably want huge
// pages to back our heap if the kernel can arrange that.
madvise(v, n, _MADV_HUGEPAGE)
}
}
func sysFree(v unsafe.Pointer, n uintptr, stat *uint64) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment