Commit 863d9b66 authored by Russ Cox's avatar Russ Cox

cmd/asm: add requested amd64 instructions

Add amd64 instructions I promised to add for Go 1.6
at the beginning of January.

These may be the last instructions added by hand.
I intend to generate the whole set mechanically for Go 1.7.

Fixes #13822.

Change-Id: I8c6bae2efd25f717f9ec750402e50f408a911d2b
Reviewed-on: https://go-review.googlesource.com/18853Reviewed-by: 's avatarRob Pike <r@golang.org>
parent 8d881b81
...@@ -162,11 +162,11 @@ func archX86(linkArch *obj.LinkArch) *Arch { ...@@ -162,11 +162,11 @@ func archX86(linkArch *obj.LinkArch) *Arch {
instructions["MOVDQ2Q"] = x86.AMOVQ instructions["MOVDQ2Q"] = x86.AMOVQ
instructions["MOVNTDQ"] = x86.AMOVNTO instructions["MOVNTDQ"] = x86.AMOVNTO
instructions["MOVOA"] = x86.AMOVO instructions["MOVOA"] = x86.AMOVO
instructions["MOVOA"] = x86.AMOVO
instructions["PF2ID"] = x86.APF2IL instructions["PF2ID"] = x86.APF2IL
instructions["PI2FD"] = x86.API2FL instructions["PI2FD"] = x86.API2FL
instructions["PSLLDQ"] = x86.APSLLO instructions["PSLLDQ"] = x86.APSLLO
instructions["PSRLDQ"] = x86.APSRLO instructions["PSRLDQ"] = x86.APSRLO
instructions["PADDD"] = x86.APADDL
return &Arch{ return &Arch{
LinkArch: linkArch, LinkArch: linkArch,
......
...@@ -121,5 +121,11 @@ label: ...@@ -121,5 +121,11 @@ label:
loop: loop:
LOOP loop // LOOP LOOP loop // LOOP
// Intel pseudonyms for our own renamings.
PADDD M2, M1 // PADDL M2, M1
MOVDQ2Q X1, M1 // MOVQ X1, M1
MOVNTDQ X1, (AX) // MOVNTO X1, (AX)
MOVOA (AX), X1 // MOVO (AX), X1
// LTYPE0 nonnon { outcode($1, &$2); } // LTYPE0 nonnon { outcode($1, &$2); }
RET // c3 RET // c3
...@@ -601,15 +601,15 @@ const ( ...@@ -601,15 +601,15 @@ const (
APADDUSB APADDUSB
APADDUSW APADDUSW
APADDW APADDW
APAND
APANDB APANDB
APANDL APANDL
APANDN
APANDSB APANDSB
APANDSW APANDSW
APANDUSB APANDUSB
APANDUSW APANDUSW
APANDW APANDW
APAND
APANDN
APAVGB APAVGB
APAVGW APAVGW
APCMPEQB APCMPEQB
...@@ -618,10 +618,10 @@ const ( ...@@ -618,10 +618,10 @@ const (
APCMPGTB APCMPGTB
APCMPGTL APCMPGTL
APCMPGTW APCMPGTW
APEXTRW
APEXTRB APEXTRB
APEXTRD APEXTRD
APEXTRQ APEXTRQ
APEXTRW
APFACC APFACC
APFADD APFADD
APFCMPEQ APFCMPEQ
...@@ -633,42 +633,63 @@ const ( ...@@ -633,42 +633,63 @@ const (
APFNACC APFNACC
APFPNACC APFPNACC
APFRCP APFRCP
APFRCPIT1
APFRCPI2T APFRCPI2T
APFRCPIT1
APFRSQIT1 APFRSQIT1
APFRSQRT APFRSQRT
APFSUB APFSUB
APFSUBR APFSUBR
APINSRW APHADDD
APHADDSW
APHADDW
APHMINPOSUW
APHSUBD
APHSUBSW
APHSUBW
APINSRB APINSRB
APINSRD APINSRD
APINSRQ APINSRQ
APINSRW
APMADDWL APMADDWL
APMAXSW APMAXSW
APMAXUB APMAXUB
APMINSW APMINSW
APMINUB APMINUB
APMOVMSKB APMOVMSKB
APMOVSXBD
APMOVSXBQ
APMOVSXBW
APMOVSXDQ
APMOVSXWD
APMOVSXWQ
APMOVZXBD
APMOVZXBQ
APMOVZXBW
APMOVZXDQ
APMOVZXWD
APMOVZXWQ
APMULDQ
APMULHRW APMULHRW
APMULHUW APMULHUW
APMULHW APMULHW
APMULLD
APMULLW APMULLW
APMULULQ APMULULQ
APOR APOR
APSADBW APSADBW
APSHUFB
APSHUFHW APSHUFHW
APSHUFL APSHUFL
APSHUFLW APSHUFLW
APSHUFW APSHUFW
APSHUFB
APSLLO
APSLLL APSLLL
APSLLO
APSLLQ APSLLQ
APSLLW APSLLW
APSRAL APSRAL
APSRAW APSRAW
APSRLO
APSRLL APSRLL
APSRLO
APSRLQ APSRLQ
APSRLW APSRLW
APSUBB APSUBB
......
...@@ -550,15 +550,15 @@ var Anames = []string{ ...@@ -550,15 +550,15 @@ var Anames = []string{
"PADDUSB", "PADDUSB",
"PADDUSW", "PADDUSW",
"PADDW", "PADDW",
"PAND",
"PANDB", "PANDB",
"PANDL", "PANDL",
"PANDN",
"PANDSB", "PANDSB",
"PANDSW", "PANDSW",
"PANDUSB", "PANDUSB",
"PANDUSW", "PANDUSW",
"PANDW", "PANDW",
"PAND",
"PANDN",
"PAVGB", "PAVGB",
"PAVGW", "PAVGW",
"PCMPEQB", "PCMPEQB",
...@@ -567,10 +567,10 @@ var Anames = []string{ ...@@ -567,10 +567,10 @@ var Anames = []string{
"PCMPGTB", "PCMPGTB",
"PCMPGTL", "PCMPGTL",
"PCMPGTW", "PCMPGTW",
"PEXTRW",
"PEXTRB", "PEXTRB",
"PEXTRD", "PEXTRD",
"PEXTRQ", "PEXTRQ",
"PEXTRW",
"PFACC", "PFACC",
"PFADD", "PFADD",
"PFCMPEQ", "PFCMPEQ",
...@@ -582,42 +582,63 @@ var Anames = []string{ ...@@ -582,42 +582,63 @@ var Anames = []string{
"PFNACC", "PFNACC",
"PFPNACC", "PFPNACC",
"PFRCP", "PFRCP",
"PFRCPIT1",
"PFRCPI2T", "PFRCPI2T",
"PFRCPIT1",
"PFRSQIT1", "PFRSQIT1",
"PFRSQRT", "PFRSQRT",
"PFSUB", "PFSUB",
"PFSUBR", "PFSUBR",
"PINSRW", "PHADDD",
"PHADDSW",
"PHADDW",
"PHMINPOSUW",
"PHSUBD",
"PHSUBSW",
"PHSUBW",
"PINSRB", "PINSRB",
"PINSRD", "PINSRD",
"PINSRQ", "PINSRQ",
"PINSRW",
"PMADDWL", "PMADDWL",
"PMAXSW", "PMAXSW",
"PMAXUB", "PMAXUB",
"PMINSW", "PMINSW",
"PMINUB", "PMINUB",
"PMOVMSKB", "PMOVMSKB",
"PMOVSXBD",
"PMOVSXBQ",
"PMOVSXBW",
"PMOVSXDQ",
"PMOVSXWD",
"PMOVSXWQ",
"PMOVZXBD",
"PMOVZXBQ",
"PMOVZXBW",
"PMOVZXDQ",
"PMOVZXWD",
"PMOVZXWQ",
"PMULDQ",
"PMULHRW", "PMULHRW",
"PMULHUW", "PMULHUW",
"PMULHW", "PMULHW",
"PMULLD",
"PMULLW", "PMULLW",
"PMULULQ", "PMULULQ",
"POR", "POR",
"PSADBW", "PSADBW",
"PSHUFB",
"PSHUFHW", "PSHUFHW",
"PSHUFL", "PSHUFL",
"PSHUFLW", "PSHUFLW",
"PSHUFW", "PSHUFW",
"PSHUFB",
"PSLLO",
"PSLLL", "PSLLL",
"PSLLO",
"PSLLQ", "PSLLQ",
"PSLLW", "PSLLW",
"PSRAL", "PSRAL",
"PSRAW", "PSRAW",
"PSRLO",
"PSRLL", "PSRLL",
"PSRLO",
"PSRLQ", "PSRLQ",
"PSRLW", "PSRLW",
"PSUBB", "PSUBB",
......
...@@ -222,6 +222,7 @@ const ( ...@@ -222,6 +222,7 @@ const (
Pf3 = 0xf3 /* xmm escape 2: f3 0f */ Pf3 = 0xf3 /* xmm escape 2: f3 0f */
Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */ Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
Pq3 = 0x67 /* xmm escape 3: 66 48 0f */ Pq3 = 0x67 /* xmm escape 3: 66 48 0f */
Pq4 = 0x68 /* xmm escape 4: 66 0F 38 */
Pfw = 0xf4 /* Pf3 with Rex.w: f3 48 0f */ Pfw = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
Pw = 0x48 /* Rex.w */ Pw = 0x48 /* Rex.w */
Pw8 = 0x90 // symbolic; exact value doesn't matter Pw8 = 0x90 // symbolic; exact value doesn't matter
...@@ -675,6 +676,10 @@ var yxm = []ytab{ ...@@ -675,6 +676,10 @@ var yxm = []ytab{
{Yxm, Ynone, Yxr, Zm_r_xm, 1}, {Yxm, Ynone, Yxr, Zm_r_xm, 1},
} }
var yxm_q4 = []ytab{
{Yxm, Ynone, Yxr, Zm_r, 1},
}
var yxcvm1 = []ytab{ var yxcvm1 = []ytab{
{Yxm, Ynone, Yxr, Zm_r_xm, 2}, {Yxm, Ynone, Yxr, Zm_r_xm, 2},
{Yxm, Ynone, Ymr, Zm_r_xm, 2}, {Yxm, Ynone, Ymr, Zm_r_xm, 2},
...@@ -817,6 +822,10 @@ var yxabort = []ytab{ ...@@ -817,6 +822,10 @@ var yxabort = []ytab{
{Yu8, Ynone, Ynone, Zib_, 1}, {Yu8, Ynone, Ynone, Zib_, 1},
} }
var ylddqu = []ytab{
{Ym, Ynone, Yxr, Zm_r, 1},
}
// VEX instructions that come in two forms: // VEX instructions that come in two forms:
// VTHING xmm2/m128, xmmV, xmm1 // VTHING xmm2/m128, xmmV, xmm1
// VTHING ymm2/m256, ymmV, ymm1 // VTHING ymm2/m256, ymmV, ymm1
...@@ -873,6 +882,11 @@ var yvex_xxmyxm = []ytab{ ...@@ -873,6 +882,11 @@ var yvex_xxmyxm = []ytab{
{Yyr, Ynone, Yxm, Zvex_r_v_rm, 2}, {Yyr, Ynone, Yxm, Zvex_r_v_rm, 2},
} }
var ymmxmm0f38 = []ytab{
{Ymm, Ynone, Ymr, Zlitm_r, 3},
{Yxm, Ynone, Yxr, Zlitm_r, 5},
}
/* /*
* You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32, * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
* and p->from and p->to as operands (Addr*). The linker scans optab to find * and p->from and p->to as operands (Addr*). The linker scans optab to find
...@@ -1149,6 +1163,7 @@ var optab = ...@@ -1149,6 +1163,7 @@ var optab =
{ALAHF, ynone, Px, [23]uint8{0x9f}}, {ALAHF, ynone, Px, [23]uint8{0x9f}},
{ALARL, yml_rl, Pm, [23]uint8{0x02}}, {ALARL, yml_rl, Pm, [23]uint8{0x02}},
{ALARW, yml_rl, Pq, [23]uint8{0x02}}, {ALARW, yml_rl, Pq, [23]uint8{0x02}},
{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}}, {ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
{ALEAL, ym_rl, Px, [23]uint8{0x8d}}, {ALEAL, ym_rl, Px, [23]uint8{0x8d}},
{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}}, {ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
...@@ -1293,6 +1308,13 @@ var optab = ...@@ -1293,6 +1308,13 @@ var optab =
{APFRSQRT, ymfp, Px, [23]uint8{0x97}}, {APFRSQRT, ymfp, Px, [23]uint8{0x97}},
{APFSUB, ymfp, Px, [23]uint8{0x9a}}, {APFSUB, ymfp, Px, [23]uint8{0x9a}},
{APFSUBR, ymfp, Px, [23]uint8{0xaa}}, {APFSUBR, ymfp, Px, [23]uint8{0xaa}},
{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}}, {APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}}, {APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}}, {APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
...@@ -1303,9 +1325,23 @@ var optab = ...@@ -1303,9 +1325,23 @@ var optab =
{APMINSW, yxm, Pe, [23]uint8{0xea}}, {APMINSW, yxm, Pe, [23]uint8{0xea}},
{APMINUB, yxm, Pe, [23]uint8{0xda}}, {APMINUB, yxm, Pe, [23]uint8{0xda}},
{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}}, {APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
{APMULHRW, ymfp, Px, [23]uint8{0xb7}}, {APMULHRW, ymfp, Px, [23]uint8{0xb7}},
{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}}, {APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}}, {APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}}, {APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}}, {APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
{APOPAL, ynone, P32, [23]uint8{0x61}}, {APOPAL, ynone, P32, [23]uint8{0x61}},
...@@ -3292,6 +3328,12 @@ func doasm(ctxt *obj.Link, p *obj.Prog) { ...@@ -3292,6 +3328,12 @@ func doasm(ctxt *obj.Link, p *obj.Prog) {
ctxt.Andptr[0] = Pm ctxt.Andptr[0] = Pm
ctxt.Andptr = ctxt.Andptr[1:] ctxt.Andptr = ctxt.Andptr[1:]
case Pq4: /* 66 0F 38 */
ctxt.Andptr[0] = 0x66
ctxt.Andptr[1] = 0x0F
ctxt.Andptr[2] = 0x38
ctxt.Andptr = ctxt.Andptr[3:]
case Pf2, /* xmm opcode escape */ case Pf2, /* xmm opcode escape */
Pf3: Pf3:
ctxt.Andptr[0] = byte(o.prefix) ctxt.Andptr[0] = byte(o.prefix)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment