Commit a35c85c0 authored by Michael Hudson-Doyle's avatar Michael Hudson-Doyle

cmd/internal/obj, runtime: implement IE model TLS on ppc64le

This requires changing the tls access code to match the patterns documented in
the ABI documentation or the system linker will "optimize" it into ridiculousness.

With this change, -buildmode=pie works, although as it is tested in testshared,
the tests are not run yet.

Change-Id: I1efa6687af0a5b8db3385b10f6542a49056b2eb3
Reviewed-on: https://go-review.googlesource.com/15971Reviewed-by: 's avatarRuss Cox <rsc@golang.org>
parent bd329d47
......@@ -480,6 +480,19 @@ const (
// instruction word.
R_POWER_TLS_LE
// R_POWER_TLS_IE is used to implement the "initial exec" model for tls access. It
// relocates a D-form, DS-form instruction sequence like R_ADDRPOWER_DS. It
// inserts to the offset of GOT slot for the thread-local symbol from the TOC (the
// GOT slot is filled by the dynamic linker with the offset of the thread-local
// symbol from the thread pointer (R13)).
R_POWER_TLS_IE
// R_POWER_TLS marks an X-form instruction such as "MOVD 0(R13)(R31*1), g" as
// accessing a particular thread-local symbol. It does not affect code generation
// but is used by the system linker when relaxing "initial exec" model code to
// "local exec" model code.
R_POWER_TLS
// R_ADDRPOWER_DS is similar to R_ADDRPOWER above, but assumes the second
// instruction is a "DS-form" instruction, which has an immediate field occupying
// bits [15:2] of the instruction word. Bits [15:2] of the address of the
......
......@@ -222,6 +222,7 @@ const (
C_GOK
C_ADDR
C_TLS_LE
C_TLS_IE
C_TEXTSIZE
C_NCLASS /* must be the last */
......
......@@ -36,6 +36,7 @@ var cnames9 = []string{
"GOK",
"ADDR",
"TLS_LE",
"TLS_IE",
"TEXTSIZE",
"NCLASS",
}
......@@ -246,6 +246,7 @@ var optab = []Optab{
{AMOVB, C_ADDR, C_NONE, C_NONE, C_REG, 76, 12, 0},
{AMOVD, C_TLS_LE, C_NONE, C_NONE, C_REG, 79, 4, 0},
{AMOVD, C_TLS_IE, C_NONE, C_NONE, C_REG, 80, 8, 0},
/* load constant */
{AMOVD, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB},
......@@ -587,7 +588,11 @@ func aclass(ctxt *obj.Link, a *obj.Addr) int {
ctxt.Instoffset = a.Offset
if a.Sym != nil { // use relocation
if a.Sym.Type == obj.STLSBSS {
return C_TLS_LE
if ctxt.Flag_shared != 0 {
return C_TLS_IE
} else {
return C_TLS_LE
}
}
return C_ADDR
}
......@@ -1652,6 +1657,18 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
if v != 0 {
ctxt.Diag("illegal indexed instruction\n%v", p)
}
if ctxt.Flag_shared != 0 && r == REG_R13 {
rel := obj.Addrel(ctxt.Cursym)
rel.Off = int32(ctxt.Pc)
rel.Siz = 4
// This (and the matching part in the load case
// below) are the only places in the ppc64 toolchain
// that knows the name of the tls variable. Possibly
// we could add some assembly syntax so that the name
// of the variable does not have to be assumed.
rel.Sym = obj.Linklookup(ctxt, "runtime.tls_g", 0)
rel.Type = obj.R_POWER_TLS
}
o1 = AOP_RRR(uint32(opstorex(ctxt, int(p.As))), uint32(p.From.Reg), uint32(p.To.Index), uint32(r))
} else {
if int32(int16(v)) != v {
......@@ -1671,6 +1688,13 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
if v != 0 {
ctxt.Diag("illegal indexed instruction\n%v", p)
}
if ctxt.Flag_shared != 0 && r == REG_R13 {
rel := obj.Addrel(ctxt.Cursym)
rel.Off = int32(ctxt.Pc)
rel.Siz = 4
rel.Sym = obj.Linklookup(ctxt, "runtime.tls_g", 0)
rel.Type = obj.R_POWER_TLS
}
o1 = AOP_RRR(uint32(oploadx(ctxt, int(p.As))), uint32(p.To.Reg), uint32(p.From.Index), uint32(r))
} else {
if int32(int16(v)) != v {
......@@ -2467,6 +2491,18 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
rel.Sym = p.From.Sym
rel.Type = obj.R_POWER_TLS_LE
case 80:
if p.From.Offset != 0 {
ctxt.Diag("invalid offset against tls var %v", p)
}
o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
o2 = AOP_IRR(uint32(opload(ctxt, AMOVD)), uint32(p.To.Reg), uint32(p.To.Reg), 0)
rel := obj.Addrel(ctxt.Cursym)
rel.Off = int32(ctxt.Pc)
rel.Siz = 8
rel.Sym = p.From.Sym
rel.Type = obj.R_POWER_TLS_IE
}
out[0] = o1
......
......@@ -564,23 +564,28 @@ const (
R_PPC_EMB_BIT_FLD = 115
R_PPC_EMB_RELSDA = 116
R_PPC64_ADDR32 = R_PPC_ADDR32
R_PPC64_ADDR16_LO = R_PPC_ADDR16_LO
R_PPC64_ADDR16_HA = R_PPC_ADDR16_HA
R_PPC64_REL24 = R_PPC_REL24
R_PPC64_JMP_SLOT = R_PPC_JMP_SLOT
R_PPC64_TPREL16 = R_PPC_TPREL16
R_PPC64_ADDR64 = 38
R_PPC64_TOC16 = 47
R_PPC64_TOC16_LO = 48
R_PPC64_TOC16_HI = 49
R_PPC64_TOC16_HA = 50
R_PPC64_ADDR16_LO_DS = 57
R_PPC64_TOC16_DS = 63
R_PPC64_TOC16_LO_DS = 64
R_PPC64_REL16_LO = 250
R_PPC64_REL16_HI = 251
R_PPC64_REL16_HA = 252
R_PPC64_ADDR32 = R_PPC_ADDR32
R_PPC64_ADDR16_LO = R_PPC_ADDR16_LO
R_PPC64_ADDR16_HA = R_PPC_ADDR16_HA
R_PPC64_REL24 = R_PPC_REL24
R_PPC64_GOT16_HA = R_PPC_GOT16_HA
R_PPC64_JMP_SLOT = R_PPC_JMP_SLOT
R_PPC64_TPREL16 = R_PPC_TPREL16
R_PPC64_ADDR64 = 38
R_PPC64_TOC16 = 47
R_PPC64_TOC16_LO = 48
R_PPC64_TOC16_HI = 49
R_PPC64_TOC16_HA = 50
R_PPC64_ADDR16_LO_DS = 57
R_PPC64_GOT16_LO_DS = 59
R_PPC64_TOC16_DS = 63
R_PPC64_TOC16_LO_DS = 64
R_PPC64_TLS = 67
R_PPC64_GOT_TPREL16_LO_DS = 88
R_PPC64_GOT_TPREL16_HA = 90
R_PPC64_REL16_LO = 250
R_PPC64_REL16_HI = 251
R_PPC64_REL16_HA = 252
R_SPARC_NONE = 0
R_SPARC_8 = 1
......
......@@ -310,9 +310,18 @@ func elfreloc1(r *ld.Reloc, sectoff int64) int {
return -1
}
case obj.R_POWER_TLS:
ld.Thearch.Vput(ld.R_PPC64_TLS | uint64(elfsym)<<32)
case obj.R_POWER_TLS_LE:
ld.Thearch.Vput(ld.R_PPC64_TPREL16 | uint64(elfsym)<<32)
case obj.R_POWER_TLS_IE:
ld.Thearch.Vput(ld.R_PPC64_GOT_TPREL16_HA | uint64(elfsym)<<32)
ld.Thearch.Vput(uint64(r.Xadd))
ld.Thearch.Vput(uint64(sectoff + 4))
ld.Thearch.Vput(ld.R_PPC64_GOT_TPREL16_LO_DS | uint64(elfsym)<<32)
case obj.R_ADDRPOWER:
ld.Thearch.Vput(ld.R_PPC64_ADDR16_HA | uint64(elfsym)<<32)
ld.Thearch.Vput(uint64(r.Xadd))
......@@ -444,7 +453,7 @@ func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int {
default:
return -1
case obj.R_POWER_TLS_LE:
case obj.R_POWER_TLS, obj.R_POWER_TLS_LE, obj.R_POWER_TLS_IE:
r.Done = 0
// check Outer is nil, Type is TLSBSS?
r.Xadd = r.Add
......
......@@ -27,8 +27,7 @@ TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0
CMP R31, $0
BEQ nocgo
MOVD runtime·tls_g(SB), R31
ADD R13, R31
MOVD g, 0(R31)
MOVD g, 0(R13)(R31*1)
nocgo:
RET
......@@ -44,8 +43,7 @@ nocgo:
// NOTE: _cgo_topofstack assumes this only clobbers g (R30), and R31.
TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0-0
MOVD runtime·tls_g(SB), R31
ADD R13, R31
MOVD 0(R31), g
MOVD 0(R13)(R31*1), g
RET
GLOBL runtime·tls_g+0(SB), TLSBSS, $8
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment