bpf: new package to assemble and disassemble Berkeley Packet Filter programs.

The package currently implements the operations defined by https://www.kernel.org/doc/Documentation/networking/filter.txt , which comprises the base BPF virtual machine plus the Linux kernel's extension opcodes. Updates golang/go#14982 Change-Id: Iafb43d80e067040e60465a9bfb7d5f2ca90cc2ae Reviewed-on: https://go-review.googlesource.com/21212Reviewed-by: Mikio Hara <mikioh.mikioh@gmail.com>

bpf: new package to assemble and disassemble Berkeley Packet Filter programs.
The package currently implements the operations defined by https://www.kernel.org/doc/Documentation/networking/filter.txt , which comprises the base BPF virtual machine plus the Linux kernel's extension opcodes. Updates golang/go#14982 Change-Id: Iafb43d80e067040e60465a9bfb7d5f2ca90cc2ae Reviewed-on: https://go-review.googlesource.com/21212Reviewed-by: Mikio Hara <mikioh.mikioh@gmail.com>
3c208088 · David Anderson · Mikio Hara · 1600a4cd · 3c208088 · 3c208088
Commit 3c208088 authored Mar 27, 2016 by David Anderson Committed by Mikio Hara Mar 28, 2016
7 changed files
--- a/bpf/asm.go
+++ b/bpf/asm.go
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bpf
+
+import "fmt"
+
+// Assemble converts insts into raw instructions suitable for loading
+// into a BPF virtual machine.
+//
+// Currently, no optimization is attempted, the assembled program flow
+// is exactly as provided.
+func Assemble(insts []Instruction) ([]RawInstruction, error) {
+	ret := make([]RawInstruction, len(insts))
+	var err error
+	for i, inst := range insts {
+		ret[i], err = inst.Assemble()
+		if err != nil {
+			return nil, fmt.Errorf("assembling instruction %d: %s", i+1, err)
+		}
+	}
+	return ret, nil
+}
+
+// Disassemble attempts to parse raw back into
+// Instructions. Unrecognized RawInstructions are assumed to be an
+// extension not implemented by this package, and are passed through
+// unchanged to the output. The allDecoded value reports whether insts
+// contains no RawInstructions.
+func Disassemble(raw []RawInstruction) (insts []Instruction, allDecoded bool) {
+	insts = make([]Instruction, len(raw))
+	allDecoded = true
+	for i, r := range raw {
+		insts[i] = r.Disassemble()
+		if _, ok := insts[i].(RawInstruction); ok {
+			allDecoded = false
+		}
+	}
+	return insts, allDecoded
+}
--- a/bpf/constants.go
+++ b/bpf/constants.go
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bpf
+
+// A Register is a register of the BPF virtual machine.
+type Register uint16
+
+const (
+	// RegA is the accumulator register. RegA is always the
+	// destination register of ALU operations.
+	RegA Register = iota
+	// RegX is the indirection register, used by LoadIndirect
+	// operations.
+	RegX
+)
+
+// An ALUOp is an arithmetic or logic operation.
+type ALUOp uint16
+
+// ALU binary operation types.
+const (
+	ALUOpAdd ALUOp = iota << 4
+	ALUOpSub
+	ALUOpMul
+	ALUOpDiv
+	ALUOpOr
+	ALUOpAnd
+	ALUOpShiftLeft
+	ALUOpShiftRight
+	aluOpNeg // Not exported because it's the only unary ALU operation, and gets its own instruction type.
+	ALUOpMod
+	ALUOpXor
+)
+
+// A JumpTest is a comparison operator used in conditional jumps.
+type JumpTest uint16
+
+// Supported operators for conditional jumps.
+const (
+	// K == A
+	JumpEqual JumpTest = iota
+	// K != A
+	JumpNotEqual
+	// K > A
+	JumpGreaterThan
+	// K < A
+	JumpLessThan
+	// K >= A
+	JumpGreaterOrEqual
+	// K <= A
+	JumpLessOrEqual
+	// K & A != 0
+	JumpBitsSet
+	// K & A == 0
+	JumpBitsNotSet
+)
+
+// An Extension is a function call provided by the kernel that
+// performs advanced operations that are expensive or impossible
+// within the BPF virtual machine.
+//
+// Extensions are only implemented by the Linux kernel.
+//
+// TODO: should we prune this list? Some of these extensions seem
+// either broken or near-impossible to use correctly, whereas other
+// (len, random, ifindex) are quite useful.
+type Extension int
+
+// Extension functions available in the Linux kernel.
+const (
+	// ExtLen returns the length of the packet.
+	ExtLen Extension = 1
+	// ExtProto returns the packet's L3 protocol type.
+	ExtProto = 0
+	// ExtType returns the packet's type (skb->pkt_type in the kernel)
+	//
+	// TODO: better documentation. How nice an API do we want to
+	// provide for these esoteric extensions?
+	ExtType = 4
+	// ExtPayloadOffset returns the offset of the packet payload, or
+	// the first protocol header that the kernel does not know how to
+	// parse.
+	ExtPayloadOffset = 52
+	// ExtInterfaceIndex returns the index of the interface on which
+	// the packet was received.
+	ExtInterfaceIndex = 8
+	// ExtNetlinkAttr returns the netlink attribute of type X at
+	// offset A.
+	ExtNetlinkAttr = 12
+	// ExtNetlinkAttrNested returns the nested netlink attribute of
+	// type X at offset A.
+	ExtNetlinkAttrNested = 16
+	// ExtMark returns the packet's mark value.
+	ExtMark = 20
+	// ExtQueue returns the packet's assigned hardware queue.
+	ExtQueue = 24
+	// ExtLinkLayerType returns the packet's hardware address type
+	// (e.g. Ethernet, Infiniband).
+	ExtLinkLayerType = 28
+	// ExtRXHash returns the packets receive hash.
+	//
+	// TODO: figure out what this rxhash actually is.
+	ExtRXHash = 32
+	// ExtCPUID returns the ID of the CPU processing the current
+	// packet.
+	ExtCPUID = 36
+	// ExtVLANTag returns the packet's VLAN tag.
+	ExtVLANTag = 44
+	// ExtVLANTagPresent returns non-zero if the packet has a VLAN
+	// tag.
+	//
+	// TODO: I think this might be a lie: it reads bit 0x1000 of the
+	// VLAN header, which changed meaning in recent revisions of the
+	// spec - this extension may now return meaningless information.
+	ExtVLANTagPresent = 48
+	// ExtVLANProto returns 0x8100 if the frame has a VLAN header,
+	// 0x88a8 if the frame has a "Q-in-Q" double VLAN header, or some
+	// other value if no VLAN information is present.
+	ExtVLANProto = 60
+	// ExtRand returns a uniformly random uint32.
+	ExtRand = 56
+)
+
+// The following gives names to various bit patterns used in opcode construction.
+
+const opClsMask uint16 = 0x7
+
+const (
+	// +---------------+-----------------+---+---+---+
+	// | AddrMode (3b) | LoadWidth (2b)  | 0 | 0 | 0 |
+	// +---------------+-----------------+---+---+---+
+	opClsLoadA uint16 = iota
+	// +---------------+-----------------+---+---+---+
+	// | AddrMode (3b) | LoadWidth (2b)  | 0 | 0 | 1 |
+	// +---------------+-----------------+---+---+---+
+	opClsLoadX
+	// +---+---+---+---+---+---+---+---+
+	// | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
+	// +---+---+---+---+---+---+---+---+
+	opClsStoreA
+	// +---+---+---+---+---+---+---+---+
+	// | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
+	// +---+---+---+---+---+---+---+---+
+	opClsStoreX
+	// +---------------+-----------------+---+---+---+
+	// | Operator (4b) | OperandSrc (1b) | 1 | 0 | 0 |
+	// +---------------+-----------------+---+---+---+
+	opClsALU
+	// +-----------------------------+---+---+---+---+
+	// |      TestOperator (4b)      | 0 | 1 | 0 | 1 |
+	// +-----------------------------+---+---+---+---+
+	opClsJump
+	// +---+-------------------------+---+---+---+---+
+	// | 0 | 0 | 0 |   RetSrc (1b)   | 0 | 1 | 1 | 0 |
+	// +---+-------------------------+---+---+---+---+
+	opClsReturn
+	// +---+-------------------------+---+---+---+---+
+	// | 0 | 0 | 0 |  TXAorTAX (1b)  | 0 | 1 | 1 | 1 |
+	// +---+-------------------------+---+---+---+---+
+	opClsMisc
+)
+
+const (
+	opAddrModeImmediate uint16 = iota << 5
+	opAddrModeAbsolute
+	opAddrModeIndirect
+	opAddrModeScratch
+	// These are actually extensions, not addressing modes.
+	opAddrModePacketLen
+	opAddrModeIPv4HeaderLen
+)
+
+const (
+	opLoadWidth4 uint16 = iota << 3
+	opLoadWidth2
+	opLoadWidth1
+)
+
+// Operator defined by ALUOp*
+const opALUOpMask = 0xf0
+
+const opALUSrcMask = 0x08
+
+const (
+	opALUSrcConstant uint16 = iota << 3
+	opALUSrcX
+)
+
+const (
+	opJumpAlways = iota << 4
+	opJumpEqual
+	opJumpGT
+	opJumpGE
+	opJumpSet
+)
+
+const (
+	opRetSrcConstant uint16 = iota << 4
+	opRetSrcA
+)
+
+const (
+	opMiscTAX = 0x00
+	opMiscTXA = 0x80
+)
--- a/bpf/doc.go
+++ b/bpf/doc.go
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package bpf implements marshalling and unmarshalling of programs
+// for the Berkeley Packet Filter virtual machine.
+//
+// TODO: brief overview of the BPF virtual machine (registers, scratch, packet access, execution constraints)
+//
+// TODO: simple BPF program examples
+package bpf // import "golang.org/x/net/bpf"
--- a/bpf/instructions.go
+++ b/bpf/instructions.go
--- a/bpf/instructions_test.go
+++ b/bpf/instructions_test.go
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bpf
+
+import (
+	"io/ioutil"
+	"reflect"
+	"strconv"
+	"strings"
+	"testing"
+)
+
+// This is a direct translation of the program in
+// testdata/all_instructions.txt.
+var allInstructions = []Instruction{
+	LoadConstant{Dst: RegA, Val: 42},
+	LoadConstant{Dst: RegX, Val: 42},
+
+	LoadScratch{Dst: RegA, N: 3},
+	LoadScratch{Dst: RegX, N: 3},
+
+	LoadAbsolute{Off: 42, Size: 1},
+	LoadAbsolute{Off: 42, Size: 2},
+	LoadAbsolute{Off: 42, Size: 4},
+
+	LoadIndirect{Off: 42, Size: 1},
+	LoadIndirect{Off: 42, Size: 2},
+	LoadIndirect{Off: 42, Size: 4},
+
+	LoadIPv4HeaderLen{Off: 42},
+
+	LoadExtension{Num: ExtLen},
+	LoadExtension{Num: ExtProto},
+	LoadExtension{Num: ExtType},
+	LoadExtension{Num: ExtRand},
+
+	StoreScratch{Src: RegA, N: 3},
+	StoreScratch{Src: RegX, N: 3},
+
+	ALUOpConstant{Op: ALUOpAdd, Val: 42},
+	ALUOpConstant{Op: ALUOpSub, Val: 42},
+	ALUOpConstant{Op: ALUOpMul, Val: 42},
+	ALUOpConstant{Op: ALUOpDiv, Val: 42},
+	ALUOpConstant{Op: ALUOpOr, Val: 42},
+	ALUOpConstant{Op: ALUOpAnd, Val: 42},
+	ALUOpConstant{Op: ALUOpShiftLeft, Val: 42},
+	ALUOpConstant{Op: ALUOpShiftRight, Val: 42},
+	ALUOpConstant{Op: ALUOpMod, Val: 42},
+	ALUOpConstant{Op: ALUOpXor, Val: 42},
+
+	ALUOpX{Op: ALUOpAdd},
+	ALUOpX{Op: ALUOpSub},
+	ALUOpX{Op: ALUOpMul},
+	ALUOpX{Op: ALUOpDiv},
+	ALUOpX{Op: ALUOpOr},
+	ALUOpX{Op: ALUOpAnd},
+	ALUOpX{Op: ALUOpShiftLeft},
+	ALUOpX{Op: ALUOpShiftRight},
+	ALUOpX{Op: ALUOpMod},
+	ALUOpX{Op: ALUOpXor},
+
+	NegateA{},
+
+	Jump{Skip: 10},
+	JumpIf{Cond: JumpEqual, Val: 42, SkipTrue: 8, SkipFalse: 9},
+	JumpIf{Cond: JumpNotEqual, Val: 42, SkipTrue: 8},
+	JumpIf{Cond: JumpLessThan, Val: 42, SkipTrue: 7},
+	JumpIf{Cond: JumpLessOrEqual, Val: 42, SkipTrue: 6},
+	JumpIf{Cond: JumpGreaterThan, Val: 42, SkipTrue: 4, SkipFalse: 5},
+	JumpIf{Cond: JumpGreaterOrEqual, Val: 42, SkipTrue: 3, SkipFalse: 4},
+	JumpIf{Cond: JumpBitsSet, Val: 42, SkipTrue: 2, SkipFalse: 3},
+
+	TAX{},
+	TXA{},
+
+	RetA{},
+	RetConstant{Val: 42},
+}
+var allInstructionsExpected = "testdata/all_instructions.bpf"
+
+// Check that we produce the same output as the canonical bpf_asm
+// linux kernel tool.
+func TestInterop(t *testing.T) {
+	out, err := Assemble(allInstructions)
+	if err != nil {
+		t.Fatalf("assembly of allInstructions program failed: %s", err)
+	}
+	t.Logf("Assembled program is %d instructions long", len(out))
+
+	bs, err := ioutil.ReadFile(allInstructionsExpected)
+	if err != nil {
+		t.Fatalf("reading %s: %s", allInstructionsExpected, err)
+	}
+	// First statement is the number of statements, last statement is
+	// empty. We just ignore both and rely on slice length.
+	stmts := strings.Split(string(bs), ",")
+	if len(stmts)-2 != len(out) {
+		t.Fatalf("test program lengths don't match: %s has %d, Go implementation has %d", allInstructionsExpected, len(stmts)-2, len(allInstructions))
+	}
+
+	for i, stmt := range stmts[1 : len(stmts)-2] {
+		nums := strings.Split(stmt, " ")
+		if len(nums) != 4 {
+			t.Fatalf("malformed instruction %d in %s: %s", i+1, allInstructionsExpected, stmt)
+		}
+
+		actual := out[i]
+
+		op, err := strconv.ParseUint(nums[0], 10, 16)
+		if err != nil {
+			t.Fatalf("malformed opcode %s in instruction %d of %s", nums[0], i+1, allInstructionsExpected)
+		}
+		if actual.Op != uint16(op) {
+			t.Errorf("opcode mismatch on instruction %d (%#v): got 0x%02x, want 0x%02x", i+1, allInstructions[i], actual.Op, op)
+		}
+
+		jt, err := strconv.ParseUint(nums[1], 10, 8)
+		if err != nil {
+			t.Fatalf("malformed jt offset %s in instruction %d of %s", nums[1], i+1, allInstructionsExpected)
+		}
+		if actual.Jt != uint8(jt) {
+			t.Errorf("jt mismatch on instruction %d (%#v): got %d, want %d", i+1, allInstructions[i], actual.Jt, jt)
+		}
+
+		jf, err := strconv.ParseUint(nums[2], 10, 8)
+		if err != nil {
+			t.Fatalf("malformed jf offset %s in instruction %d of %s", nums[2], i+1, allInstructionsExpected)
+		}
+		if actual.Jf != uint8(jf) {
+			t.Errorf("jf mismatch on instruction %d (%#v): got %d, want %d", i+1, allInstructions[i], actual.Jf, jf)
+		}
+
+		k, err := strconv.ParseUint(nums[3], 10, 32)
+		if err != nil {
+			t.Fatalf("malformed constant %s in instruction %d of %s", nums[3], i+1, allInstructionsExpected)
+		}
+		if actual.K != uint32(k) {
+			t.Errorf("constant mismatch on instruction %d (%#v): got %d, want %d", i+1, allInstructions[i], actual.K, k)
+		}
+	}
+}
+
+// Check that assembly and disassembly match each other.
+//
+// Because we offer "fake" jump conditions that don't appear in the
+// machine code, disassembly won't be a 1:1 match with the original
+// source, although the behavior will be identical. However,
+// reassembling the disassembly should produce an identical program.
+func TestAsmDisasm(t *testing.T) {
+	prog1, err := Assemble(allInstructions)
+	if err != nil {
+		t.Fatalf("assembly of allInstructions program failed: %s", err)
+	}
+	t.Logf("Assembled program is %d instructions long", len(prog1))
+
+	src, allDecoded := Disassemble(prog1)
+	if !allDecoded {
+		t.Errorf("Disassemble(Assemble(allInstructions)) produced unrecognized instructions:")
+		for i, inst := range src {
+			if r, ok := inst.(RawInstruction); ok {
+				t.Logf("  insn %d, %#v --> %#v", i+1, allInstructions[i], r)
+			}
+		}
+	}
+
+	prog2, err := Assemble(src)
+	if err != nil {
+		t.Fatalf("assembly of Disassemble(Assemble(allInstructions)) failed: %s", err)
+	}
+
+	if len(prog2) != len(prog1) {
+		t.Fatalf("disassembly changed program size: %d insns before, %d insns after", len(prog1), len(prog2))
+	}
+	if !reflect.DeepEqual(prog1, prog2) {
+		t.Errorf("program mutated by disassembly:")
+		for i := range prog2 {
+			if !reflect.DeepEqual(prog1[i], prog2[i]) {
+				t.Logf("  insn %d, s: %#v, p1: %#v, p2: %#v", i+1, allInstructions[i], prog1[i], prog2[i])
+			}
+		}
+	}
+}
--- a/bpf/testdata/all_instructions.bpf
+++ b/bpf/testdata/all_instructions.bpf
+50,0 0 0 42,1 0 0 42,96 0 0 3,97 0 0 3,48 0 0 42,40 0 0 42,32 0 0 42,80 0 0 42,72 0 0 42,64 0 0 42,177 0 0 42,128 0 0 0,32 0 0 4294963200,32 0 0 4294963204,32 0 0 4294963256,2 0 0 3,3 0 0 3,4 0 0 42,20 0 0 42,36 0 0 42,52 0 0 42,68 0 0 42,84 0 0 42,100 0 0 42,116 0 0 42,148 0 0 42,164 0 0 42,12 0 0 0,28 0 0 0,44 0 0 0,60 0 0 0,76 0 0 0,92 0 0 0,108 0 0 0,124 0 0 0,156 0 0 0,172 0 0 0,132 0 0 0,5 0 0 10,21 8 9 42,21 0 8 42,53 0 7 42,37 0 6 42,37 4 5 42,53 3 4 42,69 2 3 42,7 0 0 0,135 0 0 0,22 0 0 0,6 0 0 0,
--- a/bpf/testdata/all_instructions.txt
+++ b/bpf/testdata/all_instructions.txt
+# This filter is compiled to all_instructions.bpf by the `bpf_asm`
+# tool, which can be found in the linux kernel source tree under
+# tools/net.
+
+# Load immediate
+ld #42
+ldx #42
+
+# Load scratch
+ld M[3]
+ldx M[3]
+
+# Load absolute
+ldb [42]
+ldh [42]
+ld [42]
+
+# Load indirect
+ldb [x + 42]
+ldh [x + 42]
+ld [x + 42]
+
+# Load IPv4 header length
+ldx 4*([42]&0xf)
+
+# Run extension function
+ld #len
+ld #proto
+ld #type
+ld #rand
+
+# Store scratch
+st M[3]
+stx M[3]
+
+# A <op> constant
+add #42
+sub #42
+mul #42
+div #42
+or #42
+and #42
+lsh #42
+rsh #42
+mod #42
+xor #42
+
+# A <op> X
+add x
+sub x
+mul x
+div x
+or x
+and x
+lsh x
+rsh x
+mod x
+xor x
+
+# !A
+neg
+
+# Jumps
+ja end
+jeq #42,prev,end
+jne #42,end
+jlt #42,end
+jle #42,end
+jgt #42,prev,end
+jge #42,prev,end
+jset #42,prev,end
+
+# Register transfers
+tax
+txa
+
+# Returns
+prev: ret a
+end: ret #42