Commit b81f2f10 authored by Keith Randall's avatar Keith Randall

cmd/compile: place combined loads at the location of the last byte load

We need to make sure all the bounds checks pass before issuing
a load which combines several others.  We do this by issuing the
combined load at the last load's block, where "last" = closest to
the leaf of the dominator tree.

Fixes #15002

Change-Id: I7358116db1e039a072c12c0a73d861f3815d72af
Reviewed-on: https://go-review.googlesource.com/21246Reviewed-by: 's avatarTodd Neal <todd@tneal.org>
parent b83618f9
......@@ -1262,40 +1262,40 @@
// Combining byte loads into larger (unaligned) loads.
// There are many ways these combinations could occur. This is
// designed to match the way encoding/binary.LittleEndian does it.
(ORW x:(MOVBload [i] {s} p mem)
(SHLWconst [8] (MOVBload [i+1] {s} p mem))) -> @x.Block (MOVWload [i] {s} p mem)
(ORW x0:(MOVBload [i] {s} p mem)
(SHLWconst [8] x1:(MOVBload [i+1] {s} p mem))) && mergePoint(b,x0,x1) != nil -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p mem)
(ORL (ORL (ORL
x:(MOVBload [i] {s} p mem)
(SHLLconst [8] (MOVBload [i+1] {s} p mem)))
(SHLLconst [16] (MOVBload [i+2] {s} p mem)))
(SHLLconst [24] (MOVBload [i+3] {s} p mem))) -> @x.Block (MOVLload [i] {s} p mem)
x0:(MOVBload [i] {s} p mem)
(SHLLconst [8] x1:(MOVBload [i+1] {s} p mem)))
(SHLLconst [16] x2:(MOVBload [i+2] {s} p mem)))
(SHLLconst [24] x3:(MOVBload [i+3] {s} p mem))) && mergePoint(b,x0,x1,x2,x3) != nil -> @mergePoint(b,x0,x1,x2,x3) (MOVLload [i] {s} p mem)
(ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ
x:(MOVBload [i] {s} p mem)
(SHLQconst [8] (MOVBload [i+1] {s} p mem)))
(SHLQconst [16] (MOVBload [i+2] {s} p mem)))
(SHLQconst [24] (MOVBload [i+3] {s} p mem)))
(SHLQconst [32] (MOVBload [i+4] {s} p mem)))
(SHLQconst [40] (MOVBload [i+5] {s} p mem)))
(SHLQconst [48] (MOVBload [i+6] {s} p mem)))
(SHLQconst [56] (MOVBload [i+7] {s} p mem))) -> @x.Block (MOVQload [i] {s} p mem)
(ORW x:(MOVBloadidx1 [i] {s} p idx mem)
(SHLWconst [8] (MOVBloadidx1 [i+1] {s} p idx mem))) -> @x.Block (MOVWloadidx1 <v.Type> [i] {s} p idx mem)
x0:(MOVBload [i] {s} p mem)
(SHLQconst [8] x1:(MOVBload [i+1] {s} p mem)))
(SHLQconst [16] x2:(MOVBload [i+2] {s} p mem)))
(SHLQconst [24] x3:(MOVBload [i+3] {s} p mem)))
(SHLQconst [32] x4:(MOVBload [i+4] {s} p mem)))
(SHLQconst [40] x5:(MOVBload [i+5] {s} p mem)))
(SHLQconst [48] x6:(MOVBload [i+6] {s} p mem)))
(SHLQconst [56] x7:(MOVBload [i+7] {s} p mem))) && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQload [i] {s} p mem)
(ORW x0:(MOVBloadidx1 [i] {s} p idx mem)
(SHLWconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem))) && mergePoint(b,x0,x1) != nil -> @mergePoint(b,x0,x1) (MOVWloadidx1 <v.Type> [i] {s} p idx mem)
(ORL (ORL (ORL
x:(MOVBloadidx1 [i] {s} p idx mem)
(SHLLconst [8] (MOVBloadidx1 [i+1] {s} p idx mem)))
(SHLLconst [16] (MOVBloadidx1 [i+2] {s} p idx mem)))
(SHLLconst [24] (MOVBloadidx1 [i+3] {s} p idx mem))) -> @x.Block (MOVLloadidx1 <v.Type> [i] {s} p idx mem)
x0:(MOVBloadidx1 [i] {s} p idx mem)
(SHLLconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem)))
(SHLLconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem)))
(SHLLconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem))) && mergePoint(b,x0,x1,x2,x3) != nil -> @mergePoint(b,x0,x1,x2,x3) (MOVLloadidx1 <v.Type> [i] {s} p idx mem)
(ORQ (ORQ (ORQ (ORQ (ORQ (ORQ (ORQ
x:(MOVBloadidx1 [i] {s} p idx mem)
(SHLQconst [8] (MOVBloadidx1 [i+1] {s} p idx mem)))
(SHLQconst [16] (MOVBloadidx1 [i+2] {s} p idx mem)))
(SHLQconst [24] (MOVBloadidx1 [i+3] {s} p idx mem)))
(SHLQconst [32] (MOVBloadidx1 [i+4] {s} p idx mem)))
(SHLQconst [40] (MOVBloadidx1 [i+5] {s} p idx mem)))
(SHLQconst [48] (MOVBloadidx1 [i+6] {s} p idx mem)))
(SHLQconst [56] (MOVBloadidx1 [i+7] {s} p idx mem))) -> @x.Block (MOVQloadidx1 <v.Type> [i] {s} p idx mem)
x0:(MOVBloadidx1 [i] {s} p idx mem)
(SHLQconst [8] x1:(MOVBloadidx1 [i+1] {s} p idx mem)))
(SHLQconst [16] x2:(MOVBloadidx1 [i+2] {s} p idx mem)))
(SHLQconst [24] x3:(MOVBloadidx1 [i+3] {s} p idx mem)))
(SHLQconst [32] x4:(MOVBloadidx1 [i+4] {s} p idx mem)))
(SHLQconst [40] x5:(MOVBloadidx1 [i+5] {s} p idx mem)))
(SHLQconst [48] x6:(MOVBloadidx1 [i+6] {s} p idx mem)))
(SHLQconst [56] x7:(MOVBloadidx1 [i+7] {s} p idx mem))) && mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVQloadidx1 <v.Type> [i] {s} p idx mem)
......@@ -261,3 +261,53 @@ func duff(size int64) (int64, int64) {
}
return off, adj
}
// mergePoint finds a block among a's blocks which dominates b and is itself
// dominated by all of a's blocks. Returns nil if it can't find one.
// Might return nil even if one does exist.
func mergePoint(b *Block, a ...*Value) *Block {
// Walk backward from b looking for one of the a's blocks.
// Max distance
d := 100
for d > 0 {
for _, x := range a {
if b == x.Block {
goto found
}
}
if len(b.Preds) > 1 {
// Don't know which way to go back. Abort.
return nil
}
b = b.Preds[0]
d--
}
return nil // too far away
found:
// At this point, r is the first value in a that we find by walking backwards.
// if we return anything, r will be it.
r := b
// Keep going, counting the other a's that we find. They must all dominate r.
na := 0
for d > 0 {
for _, x := range a {
if b == x.Block {
na++
}
}
if na == len(a) {
// Found all of a in a backwards walk. We can return r.
return r
}
if len(b.Preds) > 1 {
return nil
}
b = b.Preds[0]
d--
}
return nil // too far away
}
// +build amd64
// +build linux darwin
// run
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"fmt"
"syscall"
)
// Use global variables so the compiler
// doesn't know that they are constants.
var p = syscall.Getpagesize()
var zero = 0
var one = 1
func main() {
// Allocate 2 pages of memory.
b, err := syscall.Mmap(-1, 0, 2*p, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_ANON|syscall.MAP_PRIVATE)
if err != nil {
panic(err)
}
// Mark the second page as faulting.
err = syscall.Mprotect(b[p:], syscall.PROT_NONE)
if err != nil {
panic(err)
}
// Get a slice pointing to the last byte of the good page.
x := b[p-one : p]
test16(x)
test16i(x, 0)
test32(x)
test32i(x, 0)
test64(x)
test64i(x, 0)
}
func test16(x []byte) uint16 {
defer func() {
r := recover()
if r == nil {
panic("no fault or bounds check failure happened")
}
s := fmt.Sprintf("%s", r)
if s != "runtime error: index out of range" {
panic("bad panic: " + s)
}
}()
// Try to read 2 bytes from x.
return uint16(x[0]) | uint16(x[1])<<8
// We expect to get an "index out of range" error from x[1].
// If we promote the first load to a 2-byte load, it will segfault, which we don't want.
}
func test16i(x []byte, i int) uint16 {
defer func() {
r := recover()
if r == nil {
panic("no fault or bounds check failure happened")
}
s := fmt.Sprintf("%s", r)
if s != "runtime error: index out of range" {
panic("bad panic: " + s)
}
}()
return uint16(x[i]) | uint16(x[i+1])<<8
}
func test32(x []byte) uint32 {
defer func() {
r := recover()
if r == nil {
panic("no fault or bounds check failure happened")
}
s := fmt.Sprintf("%s", r)
if s != "runtime error: index out of range" {
panic("bad panic: " + s)
}
}()
return uint32(x[0]) | uint32(x[1])<<8 | uint32(x[2])<<16 | uint32(x[3])<<24
}
func test32i(x []byte, i int) uint32 {
defer func() {
r := recover()
if r == nil {
panic("no fault or bounds check failure happened")
}
s := fmt.Sprintf("%s", r)
if s != "runtime error: index out of range" {
panic("bad panic: " + s)
}
}()
return uint32(x[i]) | uint32(x[i+1])<<8 | uint32(x[i+2])<<16 | uint32(x[i+3])<<24
}
func test64(x []byte) uint64 {
defer func() {
r := recover()
if r == nil {
panic("no fault or bounds check failure happened")
}
s := fmt.Sprintf("%s", r)
if s != "runtime error: index out of range" {
panic("bad panic: " + s)
}
}()
return uint64(x[0]) | uint64(x[1])<<8 | uint64(x[2])<<16 | uint64(x[3])<<24 |
uint64(x[4])<<32 | uint64(x[5])<<40 | uint64(x[6])<<48 | uint64(x[7])<<56
}
func test64i(x []byte, i int) uint64 {
defer func() {
r := recover()
if r == nil {
panic("no fault or bounds check failure happened")
}
s := fmt.Sprintf("%s", r)
if s != "runtime error: index out of range" {
panic("bad panic: " + s)
}
}()
return uint64(x[i+0]) | uint64(x[i+1])<<8 | uint64(x[i+2])<<16 | uint64(x[i+3])<<24 |
uint64(x[i+4])<<32 | uint64(x[i+5])<<40 | uint64(x[i+6])<<48 | uint64(x[i+7])<<56
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment