Commit 8449863d authored by Russ Cox's avatar Russ Cox

cmd/link: Mach-O (OS X) file formatter

See CL 48870044 for basic structure.

R=iant
CC=golang-codereviews
https://golang.org/cl/48910043
parent 146897b0
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Mach-O (Darwin) object file writing.
package main
import (
"debug/macho"
"encoding/binary"
"io"
"strings"
)
// machoFormat is the implementation of formatter.
type machoFormat struct{}
// machoHeader and friends are data structures
// corresponding to the Mach-O file header
// to be written to disk.
const (
macho64Bit = 1 << 24
machoSubCPU386 = 3
)
// machoArch describes a Mach-O target architecture.
type machoArch struct {
CPU uint32
SubCPU uint32
}
// machoHeader is the Mach-O file header.
type machoHeader struct {
machoArch
FileType uint32
Loads []*machoLoad
Segments []*machoSegment
p *Prog // for reporting errors
}
// machoLoad is a Mach-O load command.
type machoLoad struct {
Type uint32
Data []uint32
}
// machoSegment is a Mach-O segment.
type machoSegment struct {
Name string
VirtAddr Addr
VirtSize Addr
FileOffset Addr
FileSize Addr
Prot1 uint32
Prot2 uint32
Flags uint32
Sections []*machoSection
}
// machoSection is a Mach-O section, inside a segment.
type machoSection struct {
Name string
Segment string
Addr Addr
Size Addr
Offset uint32
Align uint32
Reloc uint32
Nreloc uint32
Flags uint32
Res1 uint32
Res2 uint32
}
// layout positions the segments and sections in p
// to make room for the Mach-O file header.
// That is, it edits their VirtAddr fields to adjust for the presence
// of the Mach-O header at the beginning of the address space.
func (machoFormat) headerSize(p *Prog) (virt, file Addr) {
var h machoHeader
h.init(p)
size := Addr(h.size())
size = round(size, 4096)
p.HeaderSize = size
return size, size
}
// write writes p to w as a Mach-O executable.
// layout(p) must have already been called,
// and the number, sizes, and addresses of the segments
// and sections must not have been modified since the call.
func (machoFormat) write(w io.Writer, p *Prog) {
var h machoHeader
h.init(p)
off := Addr(0)
enc := h.encode()
w.Write(enc)
off += Addr(len(enc))
for _, seg := range p.Segments {
if seg.FileOffset < off {
h.p.errorf("mach-o error: invalid file offset")
}
w.Write(make([]byte, int(seg.FileOffset-off)))
if seg.FileSize != Addr(len(seg.Data)) {
h.p.errorf("mach-o error: invalid file size")
}
w.Write(seg.Data)
off = seg.FileOffset + Addr(len(seg.Data))
}
}
// Conversion of Prog to macho data structures.
// machoArches maps from GOARCH to machoArch.
var machoArches = map[string]machoArch{
"amd64": {
CPU: uint32(macho.CpuAmd64),
SubCPU: uint32(machoSubCPU386),
},
}
// init initializes the header h to describe p.
func (h *machoHeader) init(p *Prog) {
h.p = p
h.Segments = nil
h.Loads = nil
var ok bool
h.machoArch, ok = machoArches[p.GOARCH]
if !ok {
p.errorf("mach-o: unknown target GOARCH %q", p.GOARCH)
return
}
h.FileType = uint32(macho.TypeExec)
mseg := h.addSegment(p, "__PAGEZERO", nil)
mseg.VirtSize = p.UnmappedSize
for _, seg := range p.Segments {
h.addSegment(p, "__"+strings.ToUpper(seg.Name), seg)
}
var data []uint32
switch h.CPU {
default:
p.errorf("mach-o: unknown cpu %#x for GOARCH %q", h.CPU, p.GOARCH)
case uint32(macho.CpuAmd64):
data = make([]uint32, 2+42)
data[0] = 4 // thread type
data[1] = 42 // word count
data[2+32] = uint32(p.Entry) // RIP register, in two parts
data[2+32+1] = uint32(p.Entry >> 32)
}
h.Loads = append(h.Loads, &machoLoad{
Type: uint32(macho.LoadCmdUnixThread),
Data: data,
})
}
// addSegment adds to h a Mach-O segment like seg with the given name.
func (h *machoHeader) addSegment(p *Prog, name string, seg *Segment) *machoSegment {
mseg := &machoSegment{
Name: name,
}
h.Segments = append(h.Segments, mseg)
if seg == nil {
return mseg
}
mseg.VirtAddr = seg.VirtAddr
mseg.VirtSize = seg.VirtSize
mseg.FileOffset = round(seg.FileOffset, 4096)
mseg.FileSize = seg.FileSize
if name == "__TEXT" {
// Initially RWX, then just RX
mseg.Prot1 = 7
mseg.Prot2 = 5
// Text segment maps Mach-O header, needed by dynamic linker.
mseg.VirtAddr -= p.HeaderSize
mseg.VirtSize += p.HeaderSize
mseg.FileOffset -= p.HeaderSize
mseg.FileSize += p.HeaderSize
} else {
// RW
mseg.Prot1 = 3
mseg.Prot2 = 3
}
for _, sect := range seg.Sections {
h.addSection(mseg, seg, sect)
}
return mseg
}
// addSection adds to mseg a Mach-O section like sect, inside seg, with the given name.
func (h *machoHeader) addSection(mseg *machoSegment, seg *Segment, sect *Section) {
msect := &machoSection{
Name: "__" + sect.Name,
Segment: mseg.Name,
// Reloc: sect.RelocOffset,
// NumReloc: sect.RelocLen / 8,
Addr: sect.VirtAddr,
Size: sect.Size,
}
mseg.Sections = append(mseg.Sections, msect)
for 1<<msect.Align < sect.Align {
msect.Align++
}
if off := sect.VirtAddr - seg.VirtAddr; off < seg.FileSize {
// Data in file.
if sect.Size > seg.FileSize-off {
h.p.errorf("mach-o error: section crosses file boundary")
}
msect.Offset = uint32(seg.FileOffset + off)
} else {
// Zero filled.
msect.Flags |= 1
}
if sect.Name == "text" {
msect.Flags |= 0x400 // contains executable instructions
}
}
// A machoWriter helps write Mach-O headers.
// It is basically a buffer with some helper routines for writing integers.
type machoWriter struct {
dst []byte
tmp [8]byte
order binary.ByteOrder
is64 bool
p *Prog
}
// if64 returns x if w is writing a 64-bit object file; otherwise it returns y.
func (w *machoWriter) if64(x, y interface{}) interface{} {
if w.is64 {
return x
}
return y
}
// encode encodes each of the given arguments into the writer.
// It encodes uint32, []uint32, uint64, and []uint64 by writing each value
// in turn in the correct byte order for the output file.
// It encodes an Addr as a uint64 if writing a 64-bit output file, or else as a uint32.
// It encodes []byte and string by writing the raw bytes (no length prefix).
// It skips nil values in the args list.
func (w *machoWriter) encode(args ...interface{}) {
for _, arg := range args {
switch arg := arg.(type) {
default:
w.p.errorf("mach-o error: cannot encode %T", arg)
case nil:
// skip
case []byte:
w.dst = append(w.dst, arg...)
case string:
w.dst = append(w.dst, arg...)
case uint32:
w.order.PutUint32(w.tmp[:], arg)
w.dst = append(w.dst, w.tmp[:4]...)
case []uint32:
for _, x := range arg {
w.order.PutUint32(w.tmp[:], x)
w.dst = append(w.dst, w.tmp[:4]...)
}
case uint64:
w.order.PutUint64(w.tmp[:], arg)
w.dst = append(w.dst, w.tmp[:8]...)
case Addr:
if w.is64 {
w.order.PutUint64(w.tmp[:], uint64(arg))
w.dst = append(w.dst, w.tmp[:8]...)
} else {
if Addr(uint32(arg)) != arg {
w.p.errorf("mach-o error: truncating address %#x to uint32", arg)
}
w.order.PutUint32(w.tmp[:], uint32(arg))
w.dst = append(w.dst, w.tmp[:4]...)
}
}
}
}
// segmentSize returns the size of the encoding of seg in bytes.
func (w *machoWriter) segmentSize(seg *machoSegment) int {
if w.is64 {
return 18*4 + 20*4*len(seg.Sections)
}
return 14*4 + 22*4*len(seg.Sections)
}
// zeroPad returns the string s truncated or padded with NULs to n bytes.
func zeroPad(s string, n int) string {
if len(s) >= n {
return s[:n]
}
return s + strings.Repeat("\x00", n-len(s))
}
// size returns the encoded size of the header.
func (h *machoHeader) size() int {
// Could write separate code, but encoding is cheap; encode and throw it away.
return len(h.encode())
}
// encode returns the Mach-O encoding of the header.
func (h *machoHeader) encode() []byte {
w := &machoWriter{p: h.p}
w.is64 = h.CPU&macho64Bit != 0
switch h.SubCPU {
default:
h.p.errorf("mach-o error: unknown CPU")
case machoSubCPU386:
w.order = binary.LittleEndian
}
loadSize := 0
for _, seg := range h.Segments {
loadSize += w.segmentSize(seg)
}
for _, l := range h.Loads {
loadSize += 4 * (2 + len(l.Data))
}
w.encode(
w.if64(macho.Magic64, macho.Magic32),
uint32(h.CPU),
uint32(h.SubCPU),
uint32(h.FileType),
uint32(len(h.Loads)+len(h.Segments)),
uint32(loadSize),
uint32(1),
w.if64(uint32(0), nil),
)
for _, seg := range h.Segments {
w.encode(
w.if64(uint32(macho.LoadCmdSegment64), uint32(macho.LoadCmdSegment)),
uint32(w.segmentSize(seg)),
zeroPad(seg.Name, 16),
seg.VirtAddr,
seg.VirtSize,
seg.FileOffset,
seg.FileSize,
seg.Prot1,
seg.Prot2,
uint32(len(seg.Sections)),
seg.Flags,
)
for _, sect := range seg.Sections {
w.encode(
zeroPad(sect.Name, 16),
zeroPad(seg.Name, 16),
sect.Addr,
sect.Size,
sect.Offset,
sect.Align,
sect.Reloc,
sect.Nreloc,
sect.Flags,
sect.Res1,
sect.Res2,
w.if64(uint32(0), nil),
)
}
}
for _, load := range h.Loads {
w.encode(
load.Type,
uint32(4*(2+len(load.Data))),
load.Data,
)
}
return w.dst
}
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"bytes"
"debug/macho"
"encoding/binary"
"fmt"
"io/ioutil"
"strings"
"testing"
)
// Test macho writing by checking that each generated prog can be written
// and then read back using debug/macho to get the same prog.
// Also check against golden testdata file.
var machoWriteTests = []struct {
name string
golden bool
prog *Prog
}{
// amd64 exit 9
{
name: "exit9",
golden: true,
prog: &Prog{
GOARCH: "amd64",
UnmappedSize: 0x1000,
Entry: 0x1000,
Segments: []*Segment{
{
Name: "text",
VirtAddr: 0x1000,
VirtSize: 13,
FileOffset: 0,
FileSize: 13,
Data: []byte{
0xb8, 0x01, 0x00, 0x00, 0x02, // MOVL $0x2000001, AX
0xbf, 0x09, 0x00, 0x00, 0x00, // MOVL $9, DI
0x0f, 0x05, // SYSCALL
0xf4, // HLT
},
Sections: []*Section{
{
Name: "text",
VirtAddr: 0x1000,
Size: 13,
Align: 64,
},
},
},
},
},
},
// amd64 write hello world & exit 9
{
name: "hello",
golden: true,
prog: &Prog{
GOARCH: "amd64",
UnmappedSize: 0x1000,
Entry: 0x1000,
Segments: []*Segment{
{
Name: "text",
VirtAddr: 0x1000,
VirtSize: 35,
FileOffset: 0,
FileSize: 35,
Data: []byte{
0xb8, 0x04, 0x00, 0x00, 0x02, // MOVL $0x2000001, AX
0xbf, 0x01, 0x00, 0x00, 0x00, // MOVL $1, DI
0xbe, 0x00, 0x30, 0x00, 0x00, // MOVL $0x3000, SI
0xba, 0x0c, 0x00, 0x00, 0x00, // MOVL $12, DX
0x0f, 0x05, // SYSCALL
0xb8, 0x01, 0x00, 0x00, 0x02, // MOVL $0x2000001, AX
0xbf, 0x09, 0x00, 0x00, 0x00, // MOVL $9, DI
0x0f, 0x05, // SYSCALL
0xf4, // HLT
},
Sections: []*Section{
{
Name: "text",
VirtAddr: 0x1000,
Size: 35,
Align: 64,
},
},
},
{
Name: "data",
VirtAddr: 0x2000,
VirtSize: 12,
FileOffset: 0x1000,
FileSize: 12,
Data: []byte("hello world\n"),
Sections: []*Section{
{
Name: "data",
VirtAddr: 0x2000,
Size: 12,
Align: 64,
},
},
},
},
},
},
// amd64 write hello world from rodata & exit 0
{
name: "helloro",
golden: true,
prog: &Prog{
GOARCH: "amd64",
UnmappedSize: 0x1000,
Entry: 0x1000,
Segments: []*Segment{
{
Name: "text",
VirtAddr: 0x1000,
VirtSize: 0x100c,
FileOffset: 0,
FileSize: 0x100c,
Data: concat(
[]byte{
0xb8, 0x04, 0x00, 0x00, 0x02, // MOVL $0x2000001, AX
0xbf, 0x01, 0x00, 0x00, 0x00, // MOVL $1, DI
0xbe, 0x00, 0x30, 0x00, 0x00, // MOVL $0x3000, SI
0xba, 0x0c, 0x00, 0x00, 0x00, // MOVL $12, DX
0x0f, 0x05, // SYSCALL
0xb8, 0x01, 0x00, 0x00, 0x02, // MOVL $0x2000001, AX
0xbf, 0x00, 0x00, 0x00, 0x00, // MOVL $0, DI
0x0f, 0x05, // SYSCALL
0xf4, // HLT
},
make([]byte, 0x1000-35),
[]byte("hello world\n"),
),
Sections: []*Section{
{
Name: "text",
VirtAddr: 0x1000,
Size: 35,
Align: 64,
},
{
Name: "rodata",
VirtAddr: 0x2000,
Size: 12,
Align: 64,
},
},
},
},
},
},
}
func concat(xs ...[]byte) []byte {
var out []byte
for _, x := range xs {
out = append(out, x...)
}
return out
}
func TestMachoWrite(t *testing.T) {
for _, tt := range machoWriteTests {
name := tt.prog.GOARCH + "." + tt.name
prog := cloneProg(tt.prog)
var f machoFormat
vsize, fsize := f.headerSize(prog)
shiftProg(prog, vsize, fsize)
var buf bytes.Buffer
f.write(&buf, prog)
if false { // enable to debug
ioutil.WriteFile("a.out", buf.Bytes(), 0777)
}
read, err := machoRead(machoArches[tt.prog.GOARCH], buf.Bytes())
if err != nil {
t.Errorf("%s: reading mach-o output:\n\t%v", name, err)
continue
}
diffs := diffProg(read, prog)
if diffs != nil {
t.Errorf("%s: mismatched prog:\n\t%s", name, strings.Join(diffs, "\n\t"))
continue
}
if !tt.golden {
continue
}
checkGolden(t, buf.Bytes(), "testdata/macho."+name)
}
}
// machoRead reads the mach-o file in data and returns a corresponding prog.
func machoRead(arch machoArch, data []byte) (*Prog, error) {
f, err := macho.NewFile(bytes.NewReader(data))
if err != nil {
return nil, err
}
var errors []string
errorf := func(format string, args ...interface{}) {
errors = append(errors, fmt.Sprintf(format, args...))
}
magic := uint32(0xFEEDFACE)
if arch.CPU&macho64Bit != 0 {
magic |= 1
}
if f.Magic != magic {
errorf("header: Magic = %#x, want %#x", f.Magic, magic)
}
if f.Cpu != macho.CpuAmd64 {
errorf("header: CPU = %#x, want %#x", f.Cpu, macho.CpuAmd64)
}
if f.SubCpu != 3 {
errorf("header: SubCPU = %#x, want %#x", f.SubCpu, 3)
}
if f.Type != 2 {
errorf("header: FileType = %d, want %d", f.Type, 2)
}
if f.Flags != 1 {
errorf("header: Flags = %d, want %d", f.Flags, 1)
}
msects := f.Sections
var limit uint64
prog := new(Prog)
for _, load := range f.Loads {
switch load := load.(type) {
default:
errorf("unexpected macho load %T %x", load, load.Raw())
case macho.LoadBytes:
if len(load) < 8 || len(load)%4 != 0 {
errorf("unexpected load length %d", len(load))
continue
}
cmd := f.ByteOrder.Uint32(load)
switch macho.LoadCmd(cmd) {
default:
errorf("unexpected macho load cmd %s", macho.LoadCmd(cmd))
case macho.LoadCmdUnixThread:
data := make([]uint32, len(load[8:])/4)
binary.Read(bytes.NewReader(load[8:]), f.ByteOrder, data)
if len(data) != 44 {
errorf("macho thread len(data) = %d, want 42", len(data))
continue
}
if data[0] != 4 {
errorf("macho thread type = %d, want 4", data[0])
}
if data[1] != uint32(len(data))-2 {
errorf("macho thread desc len = %d, want %d", data[1], uint32(len(data))-2)
continue
}
for i, val := range data[2:] {
switch i {
default:
if val != 0 {
errorf("macho thread data[%d] = %#x, want 0", i, val)
}
case 32:
prog.Entry = Addr(val)
case 33:
prog.Entry |= Addr(val) << 32
}
}
}
case *macho.Segment:
if load.Addr < limit {
errorf("segments out of order: %q at %#x after %#x", load.Name, load.Addr, limit)
}
limit = load.Addr + load.Memsz
if load.Name == "__PAGEZERO" || load.Addr == 0 && load.Filesz == 0 {
if load.Name != "__PAGEZERO" {
errorf("segment with Addr=0, Filesz=0 is named %q, want %q", load.Name, "__PAGEZERO")
} else if load.Addr != 0 || load.Filesz != 0 {
errorf("segment %q has Addr=%#x, Filesz=%d, want Addr=%#x, Filesz=%d", load.Name, load.Addr, load.Filesz, 0, 0)
}
prog.UnmappedSize = Addr(load.Memsz)
continue
}
if !strings.HasPrefix(load.Name, "__") {
errorf("segment name %q does not begin with %q", load.Name, "__")
}
if strings.ToUpper(load.Name) != load.Name {
errorf("segment name %q is not all upper case", load.Name)
}
seg := &Segment{
Name: strings.ToLower(strings.TrimPrefix(load.Name, "__")),
VirtAddr: Addr(load.Addr),
VirtSize: Addr(load.Memsz),
FileOffset: Addr(load.Offset),
FileSize: Addr(load.Filesz),
}
prog.Segments = append(prog.Segments, seg)
data, err := load.Data()
if err != nil {
errorf("loading data from %q: %v", load.Name, err)
}
seg.Data = data
var maxprot, prot uint32
if load.Name == "__TEXT" {
maxprot, prot = 7, 5
} else {
maxprot, prot = 3, 3
}
if load.Maxprot != maxprot || load.Prot != prot {
errorf("segment %q protection is %d, %d, want %d, %d",
load.Maxprot, load.Prot, maxprot, prot)
}
for len(msects) > 0 && msects[0].Addr < load.Addr+load.Memsz {
msect := msects[0]
msects = msects[1:]
if msect.Offset > 0 && prog.HeaderSize == 0 {
prog.HeaderSize = Addr(msect.Offset)
if seg.FileOffset != 0 {
errorf("initial segment %q does not map header", load.Name)
}
seg.VirtAddr += prog.HeaderSize
seg.VirtSize -= prog.HeaderSize
seg.FileOffset += prog.HeaderSize
seg.FileSize -= prog.HeaderSize
seg.Data = seg.Data[prog.HeaderSize:]
}
if msect.Addr < load.Addr {
errorf("section %q at address %#x is missing segment", msect.Name, msect.Addr)
continue
}
if !strings.HasPrefix(msect.Name, "__") {
errorf("section name %q does not begin with %q", msect.Name, "__")
}
if strings.ToLower(msect.Name) != msect.Name {
errorf("section name %q is not all lower case", msect.Name)
}
if msect.Seg != load.Name {
errorf("section %q is lists segment name %q, want %q",
msect.Name, msect.Seg, load.Name)
}
if uint64(msect.Offset) != uint64(load.Offset)+msect.Addr-load.Addr {
errorf("section %q file offset is %#x, want %#x",
msect.Name, msect.Offset, load.Offset+msect.Addr-load.Addr)
}
if msect.Reloff != 0 || msect.Nreloc != 0 {
errorf("section %q has reloff %d,%d, want %d,%d",
msect.Name, msect.Reloff, msect.Nreloc, 0, 0)
}
flags := uint32(0)
if msect.Name == "__text" {
flags = 0x400
}
if msect.Offset == 0 {
flags = 1
}
if msect.Flags != flags {
errorf("section %q flags = %#x, want %#x", msect.Flags, flags)
}
sect := &Section{
Name: strings.ToLower(strings.TrimPrefix(msect.Name, "__")),
VirtAddr: Addr(msect.Addr),
Size: Addr(msect.Size),
Align: 1 << msect.Align,
}
seg.Sections = append(seg.Sections, sect)
}
}
}
for _, msect := range msects {
errorf("section %q has no segment", msect.Name)
}
limit = 0
for _, msect := range f.Sections {
if msect.Addr < limit {
errorf("sections out of order: %q at %#x after %#x", msect.Name, msect.Addr, limit)
}
limit = msect.Addr + msect.Size
}
err = nil
if errors != nil {
err = fmt.Errorf("%s", strings.Join(errors, "\n\t"))
}
return prog, err
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment