Commit 3a9bc571 authored by Michael Hudson-Doyle's avatar Michael Hudson-Doyle

cmd/internal/obj/arm64, cmd/link: use two instructions rather than three for loads from memory

Reduces size of godoc .text section by about 75k (or 1.4%).

Change-Id: I65850aa569aefbddd6cb07c6ae1addcc39cab6a5
Reviewed-on: https://go-review.googlesource.com/13993Reviewed-by: 's avatarRuss Cox <rsc@golang.org>
parent a7d331b3
......@@ -260,16 +260,16 @@ var optab = []Optab{
{AMOVW, C_VCONADDR, C_NONE, C_REG, 68, 8, 0, 0, 0},
{AMOVD, C_VCON, C_NONE, C_REG, 12, 4, 0, LFROM, 0},
{AMOVD, C_VCONADDR, C_NONE, C_REG, 68, 8, 0, 0, 0},
{AMOVB, C_REG, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
{AMOVBU, C_REG, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
{AMOVH, C_REG, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
{AMOVW, C_REG, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
{AMOVD, C_REG, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
{AMOVB, C_ADDR, C_NONE, C_REG, 65, 12, 0, 0, 0},
{AMOVBU, C_ADDR, C_NONE, C_REG, 65, 12, 0, 0, 0},
{AMOVH, C_ADDR, C_NONE, C_REG, 65, 12, 0, 0, 0},
{AMOVW, C_ADDR, C_NONE, C_REG, 65, 12, 0, 0, 0},
{AMOVD, C_ADDR, C_NONE, C_REG, 65, 12, 0, 0, 0},
{AMOVB, C_REG, C_NONE, C_ADDR, 64, 8, 0, 0, 0},
{AMOVBU, C_REG, C_NONE, C_ADDR, 64, 8, 0, 0, 0},
{AMOVH, C_REG, C_NONE, C_ADDR, 64, 8, 0, 0, 0},
{AMOVW, C_REG, C_NONE, C_ADDR, 64, 8, 0, 0, 0},
{AMOVD, C_REG, C_NONE, C_ADDR, 64, 8, 0, 0, 0},
{AMOVB, C_ADDR, C_NONE, C_REG, 65, 8, 0, 0, 0},
{AMOVBU, C_ADDR, C_NONE, C_REG, 65, 8, 0, 0, 0},
{AMOVH, C_ADDR, C_NONE, C_REG, 65, 8, 0, 0, 0},
{AMOVW, C_ADDR, C_NONE, C_REG, 65, 8, 0, 0, 0},
{AMOVD, C_ADDR, C_NONE, C_REG, 65, 8, 0, 0, 0},
{AMOVD, C_TLS_LE, C_NONE, C_REG, 69, 4, 0, 0, 0},
{AMOVD, C_TLS_IE, C_NONE, C_REG, 70, 8, 0, 0, 0},
{AMUL, C_REG, C_REG, C_REG, 15, 4, 0, 0, 0},
......@@ -450,10 +450,10 @@ var optab = []Optab{
{AFMOVS, C_LOREG, C_NONE, C_FREG, 31, 8, 0, LFROM, 0},
{AFMOVD, C_LAUTO, C_NONE, C_FREG, 31, 8, REGSP, LFROM, 0},
{AFMOVD, C_LOREG, C_NONE, C_FREG, 31, 8, 0, LFROM, 0},
{AFMOVS, C_FREG, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
{AFMOVS, C_ADDR, C_NONE, C_FREG, 65, 12, 0, 0, 0},
{AFMOVD, C_FREG, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
{AFMOVD, C_ADDR, C_NONE, C_FREG, 65, 12, 0, 0, 0},
{AFMOVS, C_FREG, C_NONE, C_ADDR, 64, 8, 0, 0, 0},
{AFMOVS, C_ADDR, C_NONE, C_FREG, 65, 8, 0, 0, 0},
{AFMOVD, C_FREG, C_NONE, C_ADDR, 64, 8, 0, 0, 0},
{AFMOVD, C_ADDR, C_NONE, C_FREG, 65, 8, 0, 0, 0},
{AFADDS, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0},
{AFADDS, C_FREG, C_FREG, C_FREG, 54, 4, 0, 0, 0},
{AFADDS, C_FCON, C_NONE, C_FREG, 54, 4, 0, 0, 0},
......@@ -2704,25 +2704,23 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
/* reloc ops */
case 64: /* movT R,addr -> adrp + add + movT R, (REGTMP) */
o1 = ADR(1, 0, REGTMP)
o2 = opirr(ctxt, AADD) | REGTMP&31<<5 | REGTMP&31
o2 = olsr12u(ctxt, int32(opstr12(ctxt, int(p.As))), 0, REGTMP, int(p.From.Reg))
rel := obj.Addrel(ctxt.Cursym)
rel.Off = int32(ctxt.Pc)
rel.Siz = 8
rel.Sym = p.To.Sym
rel.Add = p.To.Offset
rel.Type = obj.R_ADDRARM64
o3 = olsr12u(ctxt, int32(opstr12(ctxt, int(p.As))), 0, REGTMP, int(p.From.Reg))
rel.Type = movereloc(p.As)
case 65: /* movT addr,R -> adrp + add + movT (REGTMP), R */
case 65: /* movT addr,R -> adrp REGTMP, 0; ldr R, [REGTMP, #0] + relocs */
o1 = ADR(1, 0, REGTMP)
o2 = opirr(ctxt, AADD) | REGTMP&31<<5 | REGTMP&31
o2 = olsr12u(ctxt, int32(opldr12(ctxt, int(p.As))), 0, REGTMP, int(p.To.Reg))
rel := obj.Addrel(ctxt.Cursym)
rel.Off = int32(ctxt.Pc)
rel.Siz = 8
rel.Sym = p.From.Sym
rel.Add = p.From.Offset
rel.Type = obj.R_ADDRARM64
o3 = olsr12u(ctxt, int32(opldr12(ctxt, int(p.As))), 0, REGTMP, int(p.To.Reg))
rel.Siz = 8
rel.Type = movereloc(p.As)
case 66: /* ldp O(R)!, (r1, r2); ldp (R)O!, (r1, r2) */
v := int32(p.From.Offset)
......@@ -4161,3 +4159,19 @@ func movesize(a int) int {
return -1
}
}
func movereloc(a int16) int32 {
switch movesize(int(a)) {
case 0:
return obj.R_ARM64_LOAD8
case 1:
return obj.R_ARM64_LOAD16
case 2:
return obj.R_ARM64_LOAD32
case 3:
return obj.R_ARM64_LOAD64
case -1:
panic("xxx")
}
return -1
}
......@@ -457,6 +457,11 @@ const (
// referenced (thread local) symbol from the GOT.
R_ARM64_TLS_IE
R_ARM64_LOAD8
R_ARM64_LOAD16
R_ARM64_LOAD32
R_ARM64_LOAD64
// PPC64.
// R_POWER_TLS_LE is used to implement the "local exec" model for tls
......
......@@ -82,6 +82,30 @@ func elfreloc1(r *ld.Reloc, sectoff int64) int {
ld.Thearch.Vput(uint64(sectoff + 4))
ld.Thearch.Vput(ld.R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC | uint64(elfsym)<<32)
case obj.R_ARM64_LOAD8:
ld.Thearch.Vput(ld.R_AARCH64_ADR_PREL_PG_HI21 | uint64(elfsym)<<32)
ld.Thearch.Vput(uint64(r.Xadd))
ld.Thearch.Vput(uint64(sectoff + 4))
ld.Thearch.Vput(ld.R_AARCH64_LDST8_ABS_LO12_NC | uint64(elfsym)<<32)
case obj.R_ARM64_LOAD16:
ld.Thearch.Vput(ld.R_AARCH64_ADR_PREL_PG_HI21 | uint64(elfsym)<<32)
ld.Thearch.Vput(uint64(r.Xadd))
ld.Thearch.Vput(uint64(sectoff + 4))
ld.Thearch.Vput(ld.R_AARCH64_LDST16_ABS_LO12_NC | uint64(elfsym)<<32)
case obj.R_ARM64_LOAD32:
ld.Thearch.Vput(ld.R_AARCH64_ADR_PREL_PG_HI21 | uint64(elfsym)<<32)
ld.Thearch.Vput(uint64(r.Xadd))
ld.Thearch.Vput(uint64(sectoff + 4))
ld.Thearch.Vput(ld.R_AARCH64_LDST32_ABS_LO12_NC | uint64(elfsym)<<32)
case obj.R_ARM64_LOAD64:
ld.Thearch.Vput(ld.R_AARCH64_ADR_PREL_PG_HI21 | uint64(elfsym)<<32)
ld.Thearch.Vput(uint64(r.Xadd))
ld.Thearch.Vput(uint64(sectoff + 4))
ld.Thearch.Vput(ld.R_AARCH64_LDST64_ABS_LO12_NC | uint64(elfsym)<<32)
case obj.R_CALLARM64:
if r.Siz != 4 {
return -1
......@@ -178,13 +202,72 @@ func machoreloc1(r *ld.Reloc, sectoff int64) int {
return 0
}
func archrelocaddr(r *ld.Reloc, s *ld.LSym, val *int64) int {
var o1, o2 uint32
if ld.Ctxt.Arch.ByteOrder == binary.BigEndian {
o1 = uint32(*val >> 32)
o2 = uint32(*val)
} else {
o1 = uint32(*val)
o2 = uint32(*val >> 32)
}
// We are inserting an address into two instructions: adrp and
// then either addi or a load.
address := ld.Symaddr(r.Sym) + r.Add
pgaddress := (address &^ 0xfff) - ((s.Value + int64(r.Off)) &^ 0xfff)
if pgaddress < -1<<31 || pgaddress >= 1<<31 {
ld.Ctxt.Diag("relocation for %s is too big (>=2G): %d", s.Name, pgaddress)
}
pgoff := uint32(address & 0xfff)
o1 |= uint32((((pgaddress >> 12) & 3) << 29) | (((pgaddress >> 12 >> 2) & 0x7ffff) << 5))
switch r.Type {
case obj.R_ADDRARM64, obj.R_ARM64_LOAD8:
o2 |= pgoff << 10
case obj.R_ARM64_LOAD16:
if pgoff&0x1 != 0 {
ld.Diag("offset for 16-byte load/store has unaligned value %d", pgoff)
}
o2 |= pgoff << 9
case obj.R_ARM64_LOAD32:
if pgoff&0x3 != 0 {
ld.Diag("offset for 32-byte load/store has unaligned value %d", pgoff)
}
o2 |= pgoff << 8
case obj.R_ARM64_LOAD64:
if pgoff&0x7 != 0 {
ld.Diag("offset for 64-byte load/store has unaligned value %d", pgoff)
}
o2 |= pgoff << 7
default:
return -1
}
if ld.Ctxt.Arch.ByteOrder == binary.BigEndian {
*val = int64(o1)<<32 | int64(o2)
} else {
*val = int64(o2)<<32 | int64(o1)
}
return 0
}
func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int {
if ld.Linkmode == ld.LinkExternal {
switch r.Type {
default:
return -1
case obj.R_ADDRARM64:
case obj.R_ADDRARM64,
obj.R_ARM64_LOAD8,
obj.R_ARM64_LOAD16,
obj.R_ARM64_LOAD32,
obj.R_ARM64_LOAD64:
r.Done = 0
// set up addend for eventual relocation via outer symbol.
......@@ -253,32 +336,8 @@ func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int {
*val = ld.Symaddr(r.Sym) + r.Add - ld.Symaddr(ld.Linklookup(ld.Ctxt, ".got", 0))
return 0
case obj.R_ADDRARM64:
t := ld.Symaddr(r.Sym) + r.Add - ((s.Value + int64(r.Off)) &^ 0xfff)
if t >= 1<<32 || t < -1<<32 {
ld.Diag("program too large, address relocation distance = %d", t)
}
var o0, o1 uint32
if ld.Ctxt.Arch.ByteOrder == binary.BigEndian {
o0 = uint32(*val >> 32)
o1 = uint32(*val)
} else {
o0 = uint32(*val)
o1 = uint32(*val >> 32)
}
o0 |= (uint32((t>>12)&3) << 29) | (uint32((t>>12>>2)&0x7ffff) << 5)
o1 |= uint32(t&0xfff) << 10
// when laid out, the instruction order must always be o1, o2.
if ld.Ctxt.Arch.ByteOrder == binary.BigEndian {
*val = int64(o0)<<32 | int64(o1)
} else {
*val = int64(o1)<<32 | int64(o0)
}
return 0
case obj.R_ADDRARM64, obj.R_ARM64_LOAD8, obj.R_ARM64_LOAD16, obj.R_ARM64_LOAD32, obj.R_ARM64_LOAD64:
return archrelocaddr(r, s, val)
case obj.R_ARM64_TLS_LE:
r.Done = 0
......
......@@ -372,6 +372,10 @@ const (
R_AARCH64_CALL26 = 283
R_AARCH64_ADR_PREL_PG_HI21 = 275
R_AARCH64_ADD_ABS_LO12_NC = 277
R_AARCH64_LDST8_ABS_LO12_NC = 278
R_AARCH64_LDST16_ABS_LO12_NC = 284
R_AARCH64_LDST32_ABS_LO12_NC = 285
R_AARCH64_LDST64_ABS_LO12_NC = 286
R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 = 541
R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC = 542
R_AARCH64_TLSLE_MOVW_TPREL_G0 = 547
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment