Commit 5c593a32 authored by Robert Griesemer's avatar Robert Griesemer

cmd/compile: first cut at exporting position info

- position info for all exported globals, plus methods and fields
- use delta-encoded line number info in most cases
- canonicalize all strings: each filename appears only once,
  but will also compact other strings (names) to at most one
  occurence in encoding
- positions not yet hooked up when reading in

Also:
- adjusted go/importer (gcimporter)
- some refactoring for better symmetry

Stats:
- comparison of export data size w/o and w/ position info (bytes).
- delta is increase in %
- overall (see bottom of table): 14% increase
- however, the current binary format decreased from
  the original binary format last week by 14%
- compared to original textual format: 65% decrease
  (increase by 14% after decrease by 14% still leads
  to a decrease from original textual format)

(caveat: we used the textual size from last week, assuming
it has not changed - there may be a small error here).

package				w/o pos	w/ pos	delta

archive/tar			4234	4902	16%
archive/zip			6387	7340	15%
bufio				3106	3419	10%
bytes				4362	4757	9%
cmd/addr2line			27	70	159%
cmd/api				12065	13590	13%
cmd/asm				27	64	137%
cmd/asm/internal/arch		9957	11529	16%
cmd/asm/internal/asm		11788	13385	14%
cmd/asm/internal/flags		239	311	30%
cmd/asm/internal/lex		13415	15358	14%
cmd/cgo				13064	15006	15%
cmd/compile			27	67	148%
cmd/compile/internal/amd64	461	869	89%
cmd/compile/internal/arm	5963	7273	22%
cmd/compile/internal/arm64	363	657	81%
cmd/compile/internal/big	7186	8590	20%
cmd/compile/internal/gc		48242	56234	17%
cmd/compile/internal/mips64	367	666	81%
cmd/compile/internal/ppc64	372	721	94%
cmd/compile/internal/s390x	330	569	72%
cmd/compile/internal/ssa	30464	35058	15%
cmd/compile/internal/x86	429	770	79%
cmd/cover			3984	4731	19%
cmd/dist			74	154	108%
cmd/doc				7272	8591	18%
cmd/expdump			27	71	163%
cmd/fix				342	419	23%
cmd/go				8126	9520	17%
cmd/gofmt			27	70	159%
cmd/gofmt2			27	69	156%
cmd/gofmt2/internal/format	702	856	22%
cmd/gofmt2/internal/lexical	2954	3509	19%
cmd/gofmt2/internal/parse	6185	7295	18%
cmd/gofmt2/internal/syntax	3533	4738	34%
cmd/gofmt2/internal/test	540	615	14%
cmd/internal/bio		5395	6060	12%
cmd/internal/gcprog		533	663	24%
cmd/internal/goobj		1022	1277	25%
cmd/internal/obj		10951	12825	17%
cmd/internal/obj/arm		8612	9985	16%
cmd/internal/obj/arm64		15814	17638	12%
cmd/internal/obj/mips		10928	12487	14%
cmd/internal/obj/ppc64		13576	15277	13%
cmd/internal/obj/s390x		16513	18708	13%
cmd/internal/obj/x86		21152	23482	11%
cmd/internal/objfile		14442	16505	14%
cmd/internal/pprof/commands	1663	1885	13%
cmd/internal/pprof/driver	9517	10789	13%
cmd/internal/pprof/fetch	7632	8635	13%
cmd/internal/pprof/plugin	13150	14809	13%
cmd/internal/pprof/profile	7004	8248	18%
cmd/internal/pprof/report	7763	8942	15%
cmd/internal/pprof/svg		1332	1534	15%
cmd/internal/pprof/symbolizer	7376	8439	14%
cmd/internal/pprof/symbolz	6970	7976	14%
cmd/internal/pprof/tempfile	3645	4093	12%
cmd/internal/sys		505	619	23%
cmd/internal/unvendor/golang.org/x/arch/arm/armasm	73951	79188	7%
cmd/internal/unvendor/golang.org/x/arch/x86/x86asm	10140	11738	16%
cmd/link			27	64	137%
cmd/link/internal/amd64		9317	11034	18%
cmd/link/internal/arm		110	213	94%
cmd/link/internal/arm64		112	219	96%
cmd/link/internal/ld		53524	60149	12%
cmd/link/internal/mips64	113	222	96%
cmd/link/internal/ppc64		113	220	95%
cmd/link/internal/s390x		112	219	96%
cmd/link/internal/x86		110	212	93%
cmd/nm				27	61	126%
cmd/objdump			27	68	152%
cmd/pack			4141	4688	13%
cmd/pprof			27	67	148%
cmd/trace			624	842	35%
cmd/vet				11194	13140	17%
cmd/vet/internal/whitelist	52	113	117%
cmd/yacc			1141	1317	15%
compress/bzip2			2101	2484	18%
compress/flate			3619	4336	20%
compress/gzip			6261	7111	14%
compress/lzw			276	401	45%
compress/zlib			3630	4158	15%
container/heap			187	250	34%
container/list			1370	1506	10%
container/ring			466	546	17%
context				3005	3338	11%
crypto				728	856	18%
crypto/aes			181	321	77%
crypto/cipher			744	1163	56%
crypto/des			220	320	45%
crypto/dsa			4526	4990	10%
crypto/ecdsa			5341	5982	12%
crypto/elliptic			4969	5593	13%
crypto/hmac			188	250	33%
crypto/md5			560	706	26%
crypto/rand			4218	4746	13%
crypto/rc4			214	321	50%
crypto/rsa			5648	6355	13%
crypto/sha1			597	751	26%
crypto/sha256			228	351	54%
crypto/sha512			354	484	37%
crypto/subtle			586	621	6%
crypto/tls			20909	23438	12%
crypto/x509			14862	16857	13%
crypto/x509/pkix		8384	9278	11%
database/sql			6721	7715	15%
database/sql/driver		1243	1535	23%
debug/dwarf			7867	9153	16%
debug/elf			25479	28025	10%
debug/gosym			1887	2267	20%
debug/macho			7222	8846	22%
debug/pe			6921	8081	17%
debug/plan9obj			1084	1319	22%
encoding			217	280	29%
encoding/ascii85		587	722	23%
encoding/asn1			1043	1268	22%
encoding/base32			929	1112	20%
encoding/base64			1166	1368	17%
encoding/binary			2168	2410	11%
encoding/csv			3761	4203	12%
encoding/gob			11304	12936	14%
encoding/hex			510	606	19%
encoding/json			9965	11395	14%
encoding/pem			202	266	32%
encoding/xml			11817	13361	13%
errors				126	170	35%
expvar				930	1142	23%
flag				5905	6519	10%
fmt				1027	1190	16%
go/ast				12910	15541	20%
go/build			5460	6173	13%
go/constant			1645	1816	10%
go/doc				3107	3882	25%
go/format			1416	1729	22%
go/importer			1426	1668	17%
go/internal/gccgoimporter	1624	2028	25%
go/internal/gcimporter		2650	3095	17%
go/parser			6220	7073	14%
go/printer			1924	2306	20%
go/scanner			3137	3602	15%
go/token			3053	3474	14%
go/types			21793	25561	17%
hash				234	327	40%
hash/adler32			465	553	19%
hash/crc32			668	817	22%
hash/crc64			630	727	15%
hash/fnv			1413	1582	12%
html				76	114	50%
html/template			14382	16457	14%
image				10248	11409	11%
image/color			2247	2562	14%
image/color/palette		107	169	58%
image/draw			2313	2494	8%
image/gif			3079	3450	12%
image/internal/imageutil	3136	3456	10%
image/jpeg			2349	2735	16%
image/png			2404	2695	12%
index/suffixarray		4978	5596	12%
internal/race			225	278	24%
internal/singleflight		551	697	26%
internal/syscall/windows/sysdll	97	166	71%
internal/testenv		4488	5052	13%
internal/trace			1392	1680	21%
io				2811	3318	18%
io/ioutil			3988	4467	12%
log				3532	3907	11%
log/syslog			4247	4775	12%
math				3021	4499	49%
math/big			7250	8456	17%
math/cmplx			1034	1617	56%
math/rand			734	885	21%
mime				1889	2194	16%
mime/multipart			4313	4849	12%
mime/quotedprintable		1758	1996	14%
net				15686	18617	19%
net/http			42182	47848	13%
net/http/cgi			19496	21768	12%
net/http/cookiejar		4615	5248	14%
net/http/fcgi			17758	19771	11%
net/http/httptest		26108	29350	12%
net/http/httputil		20732	23286	12%
net/http/internal		2195	2497	14%
net/http/pprof			17596	19545	11%
net/internal/socktest		1689	2153	27%
net/mail			4328	4810	11%
net/rpc				24328	27249	12%
net/rpc/jsonrpc			11052	12438	13%
net/smtp			17127	19174	12%
net/textproto			3705	4329	17%
net/url				1193	1371	15%
os				8493	10113	19%
os/exec				6625	7532	14%
os/signal			137	236	72%
os/user				529	761	44%
path				295	372	26%
path/filepath			3452	3952	14%
reflect				5091	6028	18%
regexp				4848	5585	15%
regexp/syntax			2590	3076	19%
runtime				8721	11598	33%
runtime/cgo			17	17	0%
runtime/debug			2721	3130	15%
runtime/internal/atomic		569	704	24%
runtime/internal/sys		1874	2318	24%
runtime/pprof			478	582	22%
runtime/race			18	18	0%
runtime/trace			95	146	54%
sort				1052	1215	15%
strconv				1389	1667	20%
strings				3372	3772	12%
sync				946	1371	45%
sync/atomic			962	1079	12%
syscall				41574	45613	10%
testing				6184	7243	17%
testing/iotest			883	1116	26%
testing/quick			4659	5443	17%
text/scanner			2930	3269	12%
text/tabwriter			2333	2607	12%
text/template			13335	15274	15%
text/template/parse		8270	9285	12%
time				4687	5313	13%
unicode				3831	4355	14%
unicode/utf16			530	584	10%
unicode/utf8			872	946	8%
vendor/golang.org/x/net/http2/hpack	3386	3970	17%

				1295440	1481566	14%
orig. textual			4253585	1481566	-65%
orig. binary			1724071	1481566 -14%

Change-Id: I4177c6511cc57ebe5eb80c89bf3aefc83376ce86
Reviewed-on: https://go-review.googlesource.com/22096Reviewed-by: 's avatarMatthew Dempsky <mdempsky@google.com>
parent ac8127d7
......@@ -36,25 +36,21 @@ If the field is a pointer to another object, that object is serialized,
recursively. Otherwise the field is written. Non-pointer fields are all
encoded as integer or string values.
Only packages and types may be referred to more than once. When getting
to a package or type that was not serialized before, an integer _index_
Some objects (packages, types) may be referred to more than once. When
reaching an object that was not serialized before, an integer _index_
is assigned to it, starting at 0. In this case, the encoding starts
with an integer _tag_ < 0. The tag value indicates the kind of object
(package or type) that follows and that this is the first time that we
see this object. If the package or tag was already serialized, the encoding
starts with the respective package or type index >= 0. An importer can
trivially determine if a package or type needs to be read in for the first
time (tag < 0) and entered into the respective package or type table, or
if the package or type was seen already (index >= 0), in which case the
index is used to look up the object in a table.
that follows and that this is the first time that we see this object.
If the object was already serialized, the encoding is simply the object
index >= 0. An importer can trivially determine if an object needs to
be read in for the first time (tag < 0) and entered into the respective
object table, or if the object was seen already (index >= 0), in which
case the index is used to look up the object in a table.
Before exporting or importing, the type tables are populated with the
predeclared types (int, string, error, unsafe.Pointer, etc.). This way
they are automatically encoded with a known and fixed type index.
TODO(gri) We may consider using the same sharing for other items
that are written out, such as strings, or possibly symbols (*Sym).
Encoding format:
The export data starts with a single byte indicating the encoding format
......@@ -73,11 +69,17 @@ the previously imported type pointer so that we have exactly one version
(i.e., one pointer) for each named type (and read but discard the current
type encoding). Unnamed types simply encode their respective fields.
In the encoding, some lists start with the list length (incl. strings).
Some lists are terminated with an end marker (usually for lists where
we may not know the length a priori).
In the encoding, some lists start with the list length. Some lists are
terminated with an end marker (usually for lists where we may not know
the length a priori).
Integers use variable-length encoding for compact representation.
All integer values use variable-length encoding for compact representation.
Strings are canonicalized similar to objects that may occur multiple times:
If the string was exported already, it is represented by its index only.
Otherwise, the export data starts with the negative string length (negative,
so we can distinguish from string index), followed by the string bytes.
The empty string is mapped to index 0.
The exporter and importer are completely symmetric in implementation: For
each encoding routine there is a matching and symmetric decoding routine.
......@@ -125,9 +127,15 @@ const exportInlined = true // default: true
type exporter struct {
out *bufio.Writer
pkgIndex map[*Pkg]int // pkg -> pkg index in order of appearance
typIndex map[*Type]int // type -> type index in order of appearance
funcList []*Func // in order of appearance
// object -> index maps, indexed in order of serialization
strIndex map[string]int
pkgIndex map[*Pkg]int
typIndex map[*Type]int
funcList []*Func
// position encoding
prevFile string
prevLine int
// debugging support
written int // bytes written
......@@ -139,6 +147,7 @@ type exporter struct {
func export(out *bufio.Writer, trace bool) int {
p := exporter{
out: out,
strIndex: map[string]int{"": 0}, // empty string is mapped to 0
pkgIndex: make(map[*Pkg]int),
typIndex: make(map[*Type]int),
trace: trace,
......@@ -149,7 +158,7 @@ func export(out *bufio.Writer, trace bool) int {
if debugFormat {
format = 'd'
}
p.byte(format)
p.rawByte(format)
// --- generic export data ---
......@@ -419,6 +428,7 @@ func (p *exporter) obj(sym *Sym) {
}
p.tag(constTag)
p.pos(n)
// TODO(gri) In inlined functions, constants are used directly
// so they should never occur as re-exported objects. We may
// not need the qualified name here. See also comment above.
......@@ -447,6 +457,7 @@ func (p *exporter) obj(sym *Sym) {
if n.Type.Etype == TFUNC && n.Class == PFUNC {
// function
p.tag(funcTag)
p.pos(n)
p.qualifiedName(sym)
sig := sym.Def.Type
......@@ -471,6 +482,7 @@ func (p *exporter) obj(sym *Sym) {
} else {
// variable
p.tag(varTag)
p.pos(n)
p.qualifiedName(sym)
p.typ(sym.Def.Type)
}
......@@ -480,6 +492,26 @@ func (p *exporter) obj(sym *Sym) {
}
}
func (p *exporter) pos(n *Node) {
var file string
var line int
if n != nil {
file, line = Ctxt.LineHist.FileLine(int(n.Lineno))
}
if file == p.prevFile && line != p.prevLine {
// common case: write delta-encoded line number
p.int(line - p.prevLine) // != 0
} else {
// uncommon case: filename changed, or line didn't change
p.int(0)
p.string(file)
p.int(line)
p.prevFile = file
}
p.prevLine = line
}
func isInlineable(n *Node) bool {
if exportInlined && n != nil && n.Func != nil && len(n.Func.Inl.Slice()) != 0 {
// when lazily typechecking inlined bodies, some re-exported ones may not have been typechecked yet.
......@@ -525,13 +557,17 @@ func (p *exporter) typ(t *Type) {
if t.Orig == t {
Fatalf("exporter: predeclared type missing from type map?")
}
// TODO(gri) The assertion below seems incorrect (crashes during all.bash).
// we expect the respective definition to point to us
// TODO(gri) The assertion below is incorrect (crashes during all.bash),
// likely because of symbol shadowing (we expect the respective definition
// to point to us). Determine the correct Def so we get correct position
// info.
// if tsym.Def.Type != t {
// Fatalf("exporter: type definition doesn't point to us?")
// }
p.tag(namedTag)
p.pos(tsym.Def) // TODO(gri) this may not be the correct node - fix and add tests
p.qualifiedName(tsym)
// write underlying type
......@@ -564,6 +600,7 @@ func (p *exporter) typ(t *Type) {
Fatalf("invalid symbol name: %s (%v)", m.Sym.Name, m.Sym)
}
p.pos(m.Sym.Def)
p.fieldSym(m.Sym, false)
sig := m.Type
......@@ -668,8 +705,12 @@ func (p *exporter) fieldList(t *Type) {
}
func (p *exporter) field(f *Field) {
p.pos(f.Sym.Def)
p.fieldName(f.Sym, f)
p.typ(f.Type)
// TODO(gri) Do we care that a non-present tag cannot be distinguished
// from a present but empty ta string? (reflect doesn't seem to make
// a difference). Investigate.
p.note(f.Note)
}
......@@ -697,6 +738,7 @@ func (p *exporter) methodList(t *Type) {
}
func (p *exporter) method(m *Field) {
p.pos(m.Sym.Def)
p.fieldName(m.Sym, m)
p.paramList(m.Type.Params(), false)
p.paramList(m.Type.Results(), false)
......@@ -793,9 +835,6 @@ func (p *exporter) param(q *Field, n int, numbered bool) {
// TODO(gri) This is compiler-specific (escape info).
// Move into compiler-specific section eventually?
// (Not having escape info causes tests to fail, e.g. runtime GCInfoTest)
//
// TODO(gri) The q.Note is much more verbose that necessary and
// adds significantly to export data size. FIX THIS.
p.note(q.Note)
}
......@@ -1497,9 +1536,17 @@ func (p *exporter) string(s string) {
if p.trace {
p.tracef("%q ", s)
}
p.rawInt64(int64(len(s)))
// if we saw the string before, write its index (>= 0)
// (the empty string is mapped to 0)
if i, ok := p.strIndex[s]; ok {
p.rawInt64(int64(i))
return
}
// otherwise, remember string and write its negative length and bytes
p.strIndex[s] = len(p.strIndex)
p.rawInt64(-int64(len(s)))
for i := 0; i < len(s); i++ {
p.byte(s[i])
p.rawByte(s[i])
}
}
......@@ -1507,7 +1554,7 @@ func (p *exporter) string(s string) {
// it easy for a reader to detect if it is "out of sync". Used only
// if debugFormat is set.
func (p *exporter) marker(m byte) {
p.byte(m)
p.rawByte(m)
// Uncomment this for help tracking down the location
// of an incorrect marker when running in debugFormat.
// if p.trace {
......@@ -1521,12 +1568,12 @@ func (p *exporter) rawInt64(x int64) {
var tmp [binary.MaxVarintLen64]byte
n := binary.PutVarint(tmp[:], x)
for i := 0; i < n; i++ {
p.byte(tmp[i])
p.rawByte(tmp[i])
}
}
// byte is the bottleneck interface to write to p.out.
// byte escapes b as follows (any encoding does that
// rawByte is the bottleneck interface to write to p.out.
// rawByte escapes b as follows (any encoding does that
// hides '$'):
//
// '$' => '|' 'S'
......@@ -1534,7 +1581,8 @@ func (p *exporter) rawInt64(x int64) {
//
// Necessary so other tools can find the end of the
// export data by searching for "$$".
func (p *exporter) byte(b byte) {
// rawByte should only be used by low-level encoders.
func (p *exporter) rawByte(b byte) {
switch b {
case '$':
// write '$' as '|' 'S'
......
......@@ -20,13 +20,18 @@ import (
// changes to bimport.go and bexport.go.
type importer struct {
in *bufio.Reader
buf []byte // for reading strings
bufarray [64]byte // initial underlying array for buf, large enough to avoid allocation when compiling std lib
in *bufio.Reader
buf []byte // reused for reading strings
pkgList []*Pkg // in order of appearance
typList []*Type // in order of appearance
funcList []*Node // in order of appearance; nil entry means already declared
// object lists, in order of deserialization
strList []string
pkgList []*Pkg
typList []*Type
funcList []*Node // nil entry means already declared
// position encoding
prevFile string
prevLine int
// debugging support
debugFormat bool
......@@ -35,11 +40,13 @@ type importer struct {
// Import populates importpkg from the serialized package data.
func Import(in *bufio.Reader) {
p := importer{in: in}
p.buf = p.bufarray[:]
p := importer{
in: in,
strList: []string{""}, // empty string is mapped to 0
}
// read low-level encoding format
switch format := p.byte(); format {
switch format := p.rawByte(); format {
case 'c':
// compact format - nothing to do
case 'd':
......@@ -221,6 +228,7 @@ func idealType(typ *Type) *Type {
func (p *importer) obj(tag int) {
switch tag {
case constTag:
p.pos()
sym := p.qualifiedName()
typ := p.typ()
val := p.value(typ)
......@@ -230,11 +238,13 @@ func (p *importer) obj(tag int) {
p.typ()
case varTag:
p.pos()
sym := p.qualifiedName()
typ := p.typ()
importvar(sym, typ)
case funcTag:
p.pos()
sym := p.qualifiedName()
params := p.paramList()
result := p.paramList()
......@@ -268,6 +278,22 @@ func (p *importer) obj(tag int) {
}
}
func (p *importer) pos() {
file := p.prevFile
line := p.prevLine
if delta := p.int(); delta != 0 {
line += delta
} else {
file = p.string()
line = p.int()
p.prevFile = file
}
p.prevLine = line
// TODO(gri) register new position
}
func (p *importer) newtyp(etype EType) *Type {
t := typ(etype)
p.typList = append(p.typList, t)
......@@ -286,6 +312,7 @@ func (p *importer) typ() *Type {
switch i {
case namedTag:
// parser.go:hidden_importsym
p.pos()
tsym := p.qualifiedName()
// parser.go:hidden_pkgtype
......@@ -311,6 +338,7 @@ func (p *importer) typ() *Type {
for i := p.int(); i > 0; i-- {
// parser.go:hidden_fndcl
p.pos()
sym := p.fieldSym()
recv := p.paramList() // TODO(gri) do we need a full param list for the receiver?
......@@ -409,20 +437,19 @@ func (p *importer) qualifiedName() *Sym {
}
// parser.go:hidden_structdcl_list
func (p *importer) fieldList() []*Node {
i := p.int()
if i == 0 {
return nil
}
n := make([]*Node, i)
for i := range n {
n[i] = p.field()
func (p *importer) fieldList() (fields []*Node) {
if n := p.int(); n > 0 {
fields = make([]*Node, n)
for i := range fields {
fields[i] = p.field()
}
}
return n
return
}
// parser.go:hidden_structdcl
func (p *importer) field() *Node {
p.pos()
sym := p.fieldName()
typ := p.typ()
note := p.note()
......@@ -456,20 +483,19 @@ func (p *importer) note() (v Val) {
}
// parser.go:hidden_interfacedcl_list
func (p *importer) methodList() []*Node {
i := p.int()
if i == 0 {
return nil
}
n := make([]*Node, i)
for i := range n {
n[i] = p.method()
func (p *importer) methodList() (methods []*Node) {
if n := p.int(); n > 0 {
methods = make([]*Node, n)
for i := range methods {
methods[i] = p.method()
}
}
return n
return
}
// parser.go:hidden_interfacedcl
func (p *importer) method() *Node {
p.pos()
sym := p.fieldName()
params := p.paramList()
result := p.paramList()
......@@ -1056,29 +1082,31 @@ func (p *importer) int64() int64 {
}
func (p *importer) string() string {
if p.debugFormat {
if debugFormat {
p.marker('s')
}
// TODO(gri) should we intern strings here?
if n := int(p.rawInt64()); n > 0 {
if cap(p.buf) < n {
p.buf = make([]byte, n)
} else {
p.buf = p.buf[:n]
}
for i := range p.buf {
p.buf[i] = p.byte()
}
return string(p.buf)
// if the string was seen before, i is its index (>= 0)
// (the empty string is at index 0)
i := p.rawInt64()
if i >= 0 {
return p.strList[i]
}
return ""
// otherwise, i is the negative string length (< 0)
if n := int(-i); n <= cap(p.buf) {
p.buf = p.buf[:n]
} else {
p.buf = make([]byte, n)
}
for i := range p.buf {
p.buf[i] = p.rawByte()
}
s := string(p.buf)
p.strList = append(p.strList, s)
return s
}
func (p *importer) marker(want byte) {
if got := p.byte(); got != want {
if got := p.rawByte(); got != want {
Fatalf("importer: incorrect marker: got %c; want %c (pos = %d)", got, want, p.read)
}
......@@ -1099,12 +1127,13 @@ func (p *importer) rawInt64() int64 {
// needed for binary.ReadVarint in rawInt64
func (p *importer) ReadByte() (byte, error) {
return p.byte(), nil
return p.rawByte(), nil
}
// byte is the bottleneck interface for reading from p.in.
// rawByte is the bottleneck interface for reading from p.in.
// It unescapes '|' 'S' to '$' and '|' '|' to '|'.
func (p *importer) byte() byte {
// rawByte should only be used by low-level decoders.
func (p *importer) rawByte() byte {
c, err := p.in.ReadByte()
p.read++
if err != nil {
......
......@@ -19,13 +19,18 @@ type importer struct {
imports map[string]*types.Package
data []byte
path string
buf []byte // for reading strings
buf []byte // for reading strings
bufarray [64]byte // initial underlying array for buf, large enough to avoid allocation when compiling std lib
// object lists
strList []string // in order of appearance
pkgList []*types.Package // in order of appearance
typList []types.Type // in order of appearance
pkgList []*types.Package
typList []types.Type
// position encoding
prevFile string
prevLine int
// debugging support
debugFormat bool
read int // bytes read
}
......@@ -39,11 +44,11 @@ func BImportData(imports map[string]*types.Package, data []byte, path string) (i
imports: imports,
data: data,
path: path,
strList: []string{""}, // empty string is mapped to 0
}
p.buf = p.bufarray[:]
// read low-level encoding format
switch format := p.byte(); format {
switch format := p.rawByte(); format {
case 'c':
// compact format - nothing to do
case 'd':
......@@ -160,6 +165,7 @@ func (p *importer) declare(obj types.Object) {
func (p *importer) obj(tag int) {
switch tag {
case constTag:
p.pos()
pkg, name := p.qualifiedName()
typ := p.typ(nil)
val := p.value()
......@@ -169,11 +175,13 @@ func (p *importer) obj(tag int) {
_ = p.typ(nil)
case varTag:
p.pos()
pkg, name := p.qualifiedName()
typ := p.typ(nil)
p.declare(types.NewVar(token.NoPos, pkg, name, typ))
case funcTag:
p.pos()
pkg, name := p.qualifiedName()
params, isddd := p.paramList()
result, _ := p.paramList()
......@@ -185,6 +193,22 @@ func (p *importer) obj(tag int) {
}
}
func (p *importer) pos() {
file := p.prevFile
line := p.prevLine
if delta := p.int(); delta != 0 {
line += delta
} else {
file = p.string()
line = p.int()
p.prevFile = file
}
p.prevLine = line
// TODO(gri) register new position
}
func (p *importer) qualifiedName() (pkg *types.Package, name string) {
name = p.string()
pkg = p.pkg()
......@@ -220,6 +244,7 @@ func (p *importer) typ(parent *types.Package) types.Type {
switch i {
case namedTag:
// read type object
p.pos()
parent, name := p.qualifiedName()
scope := parent.Scope()
obj := scope.Lookup(name)
......@@ -252,6 +277,7 @@ func (p *importer) typ(parent *types.Package) types.Type {
// read associated methods
for i := p.int(); i > 0; i-- {
// TODO(gri) replace this with something closer to fieldName
p.pos()
name := p.string()
if !exported(name) {
p.pkg()
......@@ -293,14 +319,7 @@ func (p *importer) typ(parent *types.Package) types.Type {
t := new(types.Struct)
p.record(t)
n := p.int()
fields := make([]*types.Var, n)
tags := make([]string, n)
for i := range fields {
fields[i] = p.field(parent)
tags[i] = p.string()
}
*t = *types.NewStruct(fields, tags)
*t = *types.NewStruct(p.fieldList(parent))
return t
case pointerTag:
......@@ -332,17 +351,7 @@ func (p *importer) typ(parent *types.Package) types.Type {
panic("unexpected embedded interface")
}
// read methods
methods := make([]*types.Func, p.int())
for i := range methods {
pkg, name := p.fieldName(parent)
params, isddd := p.paramList()
result, _ := p.paramList()
sig := types.NewSignature(nil, params, result, isddd)
methods[i] = types.NewFunc(token.NoPos, pkg, name, sig)
}
t := types.NewInterface(methods, nil)
t := types.NewInterface(p.methodList(parent), nil)
p.typList[n] = t
return t
......@@ -380,7 +389,20 @@ func (p *importer) typ(parent *types.Package) types.Type {
}
}
func (p *importer) fieldList(parent *types.Package) (fields []*types.Var, tags []string) {
if n := p.int(); n > 0 {
fields = make([]*types.Var, n)
tags = make([]string, n)
for i := range fields {
fields[i] = p.field(parent)
tags[i] = p.string()
}
}
return
}
func (p *importer) field(parent *types.Package) *types.Var {
p.pos()
pkg, name := p.fieldName(parent)
typ := p.typ(parent)
......@@ -402,6 +424,25 @@ func (p *importer) field(parent *types.Package) *types.Var {
return types.NewField(token.NoPos, pkg, name, typ, anonymous)
}
func (p *importer) methodList(parent *types.Package) (methods []*types.Func) {
if n := p.int(); n > 0 {
methods = make([]*types.Func, n)
for i := range methods {
methods[i] = p.method(parent)
}
}
return
}
func (p *importer) method(parent *types.Package) *types.Func {
p.pos()
pkg, name := p.fieldName(parent)
params, isddd := p.paramList()
result, _ := p.paramList()
sig := types.NewSignature(nil, params, result, isddd)
return types.NewFunc(token.NoPos, pkg, name, sig)
}
func (p *importer) fieldName(parent *types.Package) (*types.Package, string) {
pkg := parent
if pkg == nil {
......@@ -567,24 +608,28 @@ func (p *importer) string() string {
if p.debugFormat {
p.marker('s')
}
if n := int(p.rawInt64()); n > 0 {
if cap(p.buf) < n {
p.buf = make([]byte, n)
} else {
p.buf = p.buf[:n]
}
for i := 0; i < n; i++ {
p.buf[i] = p.byte()
}
return string(p.buf)
// if the string was seen before, i is its index (>= 0)
// (the empty string is at index 0)
i := p.rawInt64()
if i >= 0 {
return p.strList[i]
}
return ""
// otherwise, i is the negative string length (< 0)
if n := int(-i); n <= cap(p.buf) {
p.buf = p.buf[:n]
} else {
p.buf = make([]byte, n)
}
for i := range p.buf {
p.buf[i] = p.rawByte()
}
s := string(p.buf)
p.strList = append(p.strList, s)
return s
}
func (p *importer) marker(want byte) {
if got := p.byte(); got != want {
if got := p.rawByte(); got != want {
panic(fmt.Sprintf("incorrect marker: got %c; want %c (pos = %d)", got, want, p.read))
}
......@@ -605,12 +650,13 @@ func (p *importer) rawInt64() int64 {
// needed for binary.ReadVarint in rawInt64
func (p *importer) ReadByte() (byte, error) {
return p.byte(), nil
return p.rawByte(), nil
}
// byte is the bottleneck interface for reading p.data.
// It unescapes '|' 'S' to '$' and '|' '|' to '|'.
func (p *importer) byte() byte {
// rawByte should only be used by low-level decoders.
func (p *importer) rawByte() byte {
b := p.data[0]
r := 1
if b == '|' {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment