Commit 14cb4158 authored by David du Colombier's avatar David du Colombier

cmd/compile: don't use MOVOstore instruction on plan9/amd64

CL 54410 and CL 56250 recently added use of the MOVOstore
instruction to improve performance.

However, we can't use the MOVOstore instruction on Plan 9,
because floating point operations are not allowed in the
note handler.

This change adds a configuration flag useSSE to enable the
use of SSE instructions for non-floating point operations.
This flag is enabled by default and disabled on Plan 9.
When this flag is disabled, the MOVOstore instruction is
not used and the MOVQstoreconst instruction is used instead.

Fixes #21599

Change-Id: Ie609e5d9b82ec0092ae874bab4ce01caa5bc8fb8
Reviewed-on: https://go-review.googlesource.com/58850Reviewed-by: 's avatarKeith Randall <khr@golang.org>
parent a164a2f5
......@@ -33,6 +33,7 @@ type Config struct {
ctxt *obj.Link // Generic arch information
optimize bool // Do optimization
noDuffDevice bool // Don't use Duff's device
useSSE bool // Use SSE for non-float operations
nacl bool // GOOS=nacl
use387 bool // GO386=387
NeedsFpScratch bool // No direct move between GP and FP register sets
......@@ -264,11 +265,13 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize bool) *Config
c.ctxt = ctxt
c.optimize = optimize
c.nacl = objabi.GOOS == "nacl"
c.useSSE = true
// Don't use Duff's device on Plan 9 AMD64, because floating
// point operations are not allowed in note handler.
// Don't use Duff's device nor SSE on Plan 9 AMD64, because
// floating point operations are not allowed in note handler.
if objabi.GOOS == "plan9" && arch == "amd64" {
c.noDuffDevice = true
c.useSSE = false
}
if c.nacl {
......
......@@ -386,29 +386,48 @@
(MOVLstoreconst [makeValAndOff(0,3)] destptr
(MOVLstoreconst [0] destptr mem))
(Zero [s] destptr mem) && s > 8 && s < 16 ->
// Strip off any fractional word zeroing.
(Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE ->
(Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8])
(MOVQstoreconst [0] destptr mem))
// Zero small numbers of words directly.
(Zero [16] destptr mem) && !config.useSSE ->
(MOVQstoreconst [makeValAndOff(0,8)] destptr
(MOVQstoreconst [0] destptr mem))
(Zero [24] destptr mem) && !config.useSSE ->
(MOVQstoreconst [makeValAndOff(0,16)] destptr
(MOVQstoreconst [makeValAndOff(0,8)] destptr
(MOVQstoreconst [0] destptr mem)))
(Zero [32] destptr mem) && !config.useSSE ->
(MOVQstoreconst [makeValAndOff(0,24)] destptr
(MOVQstoreconst [makeValAndOff(0,16)] destptr
(MOVQstoreconst [makeValAndOff(0,8)] destptr
(MOVQstoreconst [0] destptr mem))))
(Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE ->
(MOVQstoreconst [makeValAndOff(0,s-8)] destptr
(MOVQstoreconst [0] destptr mem))
// Adjust zeros to be a multiple of 16 bytes.
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 ->
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE ->
(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
(MOVOstore destptr (MOVOconst [0]) mem))
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 ->
(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE ->
(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
(MOVQstoreconst [0] destptr mem))
(Zero [16] destptr mem) ->
(Zero [16] destptr mem) && config.useSSE ->
(MOVOstore destptr (MOVOconst [0]) mem)
(Zero [32] destptr mem) ->
(Zero [32] destptr mem) && config.useSSE ->
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
(MOVOstore destptr (MOVOconst [0]) mem))
(Zero [48] destptr mem) ->
(Zero [48] destptr mem) && config.useSSE ->
(MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
(MOVOstore destptr (MOVOconst [0]) mem)))
(Zero [64] destptr mem) ->
(Zero [64] destptr mem) && config.useSSE ->
(MOVOstore (OffPtr <destptr.Type> destptr [48]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0])
(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
......@@ -421,7 +440,7 @@
// Large zeroing uses REP STOSQ.
(Zero [s] destptr mem)
&& (s > 1024 || (config.noDuffDevice && s > 64))
&& (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32))
&& s%8 == 0 ->
(REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem)
......@@ -2205,6 +2224,7 @@
&& clobber(x)
-> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
(MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
&& config.useSSE
&& x.Uses == 1
&& ValAndOff(c2).Off() + 8 == ValAndOff(c).Off()
&& ValAndOff(c).Val() == 0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment