cmd/compile/internal/gc: escape binary export data so it contains no '$'

Necessary to ensure that subsequent tools can continue to find then end of the export data section simply by searching for "$$". Adjusted gcimporter used by go/types accordingly. Also, fixed a bug in gcimporter related to reading export data in debug format. Change-Id: Iaea4ed05edd8a5bab28ebe5b19a4740f5e537d35 Reviewed-on: https://go-review.googlesource.com/16283Reviewed-by: Chris Manghane <cmang@golang.org>

cmd/compile/internal/gc: escape binary export data so it contains no '$'
Necessary to ensure that subsequent tools can continue to find then end of the export data section simply by searching for "$$". Adjusted gcimporter used by go/types accordingly. Also, fixed a bug in gcimporter related to reading export data in debug format. Change-Id: Iaea4ed05edd8a5bab28ebe5b19a4740f5e537d35 Reviewed-on: https://go-review.googlesource.com/16283Reviewed-by: Chris Manghane <cmang@golang.org>
e6ccfc1a · Robert Griesemer · d3df04cd · e6ccfc1a · e6ccfc1a · e6ccfc1a
Commit e6ccfc1a authored Oct 23, 2015 by Robert Griesemer
4 changed files
--- a/src/cmd/compile/internal/gc/bexport.go
+++ b/src/cmd/compile/internal/gc/bexport.go
@@ -828,10 +828,8 @@ func (p *exporter) string(s string) {
 		p.tracef("%q ", s)
 	}
 	p.rawInt64(int64(len(s)))
-	w, err := obj.Bwritestring(p.out, s)
-	p.written += w
-	if w != len(s) || err != nil {
-		Fatalf("write error: %v (wrote %d bytes of %d)", err, w, len(s))
+	for i := 0; i < len(s); i++ {
+		p.byte(s[i])
 	}
 }

@@ -843,22 +841,39 @@ func (p *exporter) marker(m byte) {
 	p.rawInt64(int64(p.written))
 }

-func (p *exporter) byte(b byte) {
-	obj.Bputc(p.out, b)
-	p.written++
-}
-
 // rawInt64 should only be used by low-level encoders
 func (p *exporter) rawInt64(x int64) {
 	var tmp [binary.MaxVarintLen64]byte
 	n := binary.PutVarint(tmp[:], x)
-	w, err := p.out.Write(tmp[:n])
-	p.written += w
-	if err != nil {
-		Fatalf("write error: %v", err)
+	for i := 0; i < n; i++ {
+		p.byte(tmp[i])
 	}
 }

+// byte is the bottleneck interface to write to p.out.
+// byte escapes b as follows (any encoding does that
+// hides '$'):
+//
+//	'$'  => '|' 'S'
+//	'|'  => '|' '|'
+//
+// Necessary so other tools can find the end of the
+// export data by searching for "$$".
+func (p *exporter) byte(b byte) {
+	switch b {
+	case '$':
+		// write '$' as '|' 'S'
+		b = 'S'
+		fallthrough
+	case '|':
+		// write '|' as '|' '|'
+		obj.Bputc(p.out, '|')
+		p.written++
+	}
+	obj.Bputc(p.out, b)
+	p.written++
+}
+
 // tracef is like fmt.Printf but it rewrites the format string
 // to take care of indentation.
 func (p *exporter) tracef(format string, args ...interface{}) {

--- a/src/cmd/compile/internal/gc/bimport.go
+++ b/src/cmd/compile/internal/gc/bimport.go
@@ -573,10 +573,8 @@ func (p *importer) string() string {
 		} else {
 			p.buf = p.buf[:n]
 		}
-		r := obj.Bread(p.in, p.buf)
-		p.read += r
-		if r != n {
-			Fatalf("read error: read %d bytes of %d", r, n)
+		for i := 0; i < n; i++ {
+			p.buf[i] = p.byte()
 		}
 		return string(p.buf)
 	}
@@ -595,15 +593,6 @@ func (p *importer) marker(want byte) {
 	}
 }

-func (p *importer) byte() byte {
-	if c := obj.Bgetc(p.in); c >= 0 {
-		p.read++
-		return byte(c)
-	}
-	Fatalf("read error")
-	return 0
-}
-
 // rawInt64 should only be used by low-level decoders
 func (p *importer) rawInt64() int64 {
 	i, err := binary.ReadVarint(p)
@@ -617,3 +606,29 @@ func (p *importer) rawInt64() int64 {
 func (p *importer) ReadByte() (byte, error) {
 	return p.byte(), nil
 }
+
+// byte is the bottleneck interface for reading from p.in.
+// It unescapes '|' 'S' to '$' and '|' '|' to '|'.
+func (p *importer) byte() byte {
+	c := obj.Bgetc(p.in)
+	p.read++
+	if c < 0 {
+		Fatalf("read error")
+	}
+	if c == '|' {
+		c = obj.Bgetc(p.in)
+		p.read++
+		if c < 0 {
+			Fatalf("read error")
+		}
+		switch c {
+		case 'S':
+			c = '$'
+		case '|':
+			// nothing to do
+		default:
+			Fatalf("unexpected escape sequence in export data")
+		}
+	}
+	return byte(c)
+}
--- a/src/cmd/compile/internal/gc/export.go
+++ b/src/cmd/compile/internal/gc/export.go
@@ -375,13 +375,9 @@ func dumpexport() {
 			if n, err := bout.Write(copy.Bytes()); n != size || err != nil {
 				Fatalf("error writing export data: got %d bytes, want %d bytes, err = %v", n, size, err)
 			}
-
-			// verify there's no "\n$$\n" inside the export data
-			// TODO(gri) fragile - the end marker needs to be fixed
-			// TODO(gri) investigate if exporting a string containing "\n$$\n"
-			//           causes problems (old and new format)
-			if bytes.Index(copy.Bytes(), []byte("\n$$\n")) >= 0 {
-				Fatalf("export data contains end marker in its midst")
+			// export data must contain no '$' so that we can find the end by searching for "$$"
+			if bytes.IndexByte(copy.Bytes(), '$') >= 0 {
+				Fatalf("export data contains $")
 			}

 			// verify that we can read the copied export data back in

--- a/src/go/internal/gcimporter/bimport.go
+++ b/src/go/internal/gcimporter/bimport.go
@@ -20,27 +20,24 @@ import (
 // If data is obviously malformed, an error is returned but in
 // general it is not recommended to call BImportData on untrusted data.
 func BImportData(imports map[string]*types.Package, data []byte, path string) (int, *types.Package, error) {
-	// determine low-level encoding format
-	read := 0
-	var format byte = 'm' // missing format
-	if len(data) > 0 {
-		format = data[0]
-		data = data[1:]
-		read++
+	p := importer{
+		imports: imports,
+		data:    data,
 	}
-	if format != 'c' && format != 'd' {
-		return read, nil, fmt.Errorf("invalid encoding format in export data: got %q; want 'c' or 'd'", format)
+	p.buf = p.bufarray[:]
+
+	// read low-level encoding format
+	switch format := p.byte(); format {
+	case 'c':
+		// compact format - nothing to do
+	case 'd':
+		p.debugFormat = true
+	default:
+		return p.read, nil, fmt.Errorf("invalid encoding format in export data: got %q; want 'c' or 'd'", format)
 	}

 	// --- generic export data ---

-	p := importer{
-		imports:     imports,
-		data:        data,
-		debugFormat: format == 'd',
-		read:        read,
-	}
-
 	if v := p.string(); v != "v0" {
 		return p.read, nil, fmt.Errorf("unknown version: %s", v)
 	}
@@ -103,6 +100,8 @@ func BImportData(imports map[string]*types.Package, data []byte, path string) (i
 		_ = p.typ().(*types.Named)
 	}

+	// ignore compiler-specific import data
+
 	// complete interfaces
 	for _, typ := range p.typList {
 		if it, ok := typ.(*types.Interface); ok {
@@ -122,10 +121,12 @@ func BImportData(imports map[string]*types.Package, data []byte, path string) (i
 }

 type importer struct {
-	imports map[string]*types.Package
-	data    []byte
-	pkgList []*types.Package
-	typList []types.Type
+	imports  map[string]*types.Package
+	data     []byte
+	buf      []byte   // for reading strings
+	bufarray [64]byte // initial underlying array for buf, large enough to avoid allocation when compiling std lib
+	pkgList  []*types.Package
+	typList  []types.Type

 	debugFormat bool
 	read        int // bytes read
@@ -440,7 +441,7 @@ func exported(name string) bool {
 }

 func (p *importer) value() constant.Value {
-	switch kind := constant.Kind(p.int()); kind {
+	switch tag := p.tagOrIndex(); tag {
 	case falseTag:
 		return constant.MakeBool(false)
 	case trueTag:
@@ -456,7 +457,7 @@ func (p *importer) value() constant.Value {
 	case stringTag:
 		return constant.MakeString(p.string())
 	default:
-		panic(fmt.Sprintf("unexpected value kind %d", kind))
+		panic(fmt.Sprintf("unexpected value tag %d", tag))
 	}
 }

@@ -517,7 +518,11 @@ func (p *importer) tagOrIndex() int {
 }

 func (p *importer) int() int {
-	return int(p.int64())
+	x := p.int64()
+	if int64(int(x)) != x {
+		panic("exported integer too large")
+	}
+	return int(x)
 }

 func (p *importer) int64() int64 {
@@ -533,21 +538,25 @@ func (p *importer) string() string {
 		p.marker('s')
 	}

-	var b []byte
 	if n := int(p.rawInt64()); n > 0 {
-		b = p.data[:n]
-		p.data = p.data[n:]
-		p.read += n
+		if cap(p.buf) < n {
+			p.buf = make([]byte, n)
+		} else {
+			p.buf = p.buf[:n]
+		}
+		for i := 0; i < n; i++ {
+			p.buf[i] = p.byte()
+		}
+		return string(p.buf)
 	}
-	return string(b)
+
+	return ""
 }

 func (p *importer) marker(want byte) {
-	if got := p.data[0]; got != want {
+	if got := p.byte(); got != want {
 		panic(fmt.Sprintf("incorrect marker: got %c; want %c (pos = %d)", got, want, p.read))
 	}
-	p.data = p.data[1:]
-	p.read++

 	pos := p.read
 	if n := int(p.rawInt64()); n != pos {
@@ -557,12 +566,41 @@ func (p *importer) marker(want byte) {

 // rawInt64 should only be used by low-level decoders
 func (p *importer) rawInt64() int64 {
-	i, n := binary.Varint(p.data)
-	p.data = p.data[n:]
-	p.read += n
+	i, err := binary.ReadVarint(p)
+	if err != nil {
+		panic(fmt.Sprintf("read error: %v", err))
+	}
 	return i
 }

+// needed for binary.ReadVarint in rawInt64
+func (p *importer) ReadByte() (byte, error) {
+	return p.byte(), nil
+}
+
+// byte is the bottleneck interface for reading p.data.
+// It unescapes '|' 'S' to '$' and '|' '|' to '|'.
+func (p *importer) byte() byte {
+	b := p.data[0]
+	r := 1
+	if b == '|' {
+		b = p.data[1]
+		r = 2
+		switch b {
+		case 'S':
+			b = '$'
+		case '|':
+			// nothing to do
+		default:
+			panic("unexpected escape sequence in export data")
+		}
+	}
+	p.data = p.data[r:]
+	p.read += r
+	return b
+
+}
+
 // ----------------------------------------------------------------------------
 // Export format