Commit b85147cd authored by Rob Pike's avatar Rob Pike

change the encoding of uints to simplify overflow checking and to make them

easier and faster to read.  they are now either a one-byte value or a n-byte value
preceded by a byte holding -n.

R=rsc
DELTA=150  (45 added, 7 deleted, 98 changed)
OCL=32381
CL=32387
parent 189153ed
...@@ -36,15 +36,17 @@ O1=\ ...@@ -36,15 +36,17 @@ O1=\
type.$O\ type.$O\
O2=\ O2=\
decode.$O\
encode.$O\ encode.$O\
O3=\ O3=\
decoder.$O\ decode.$O\
encoder.$O\ encoder.$O\
O4=\
decoder.$O\
phases: a1 a2 a3 phases: a1 a2 a3 a4
_obj$D/gob.a: phases _obj$D/gob.a: phases
a1: $(O1) a1: $(O1)
...@@ -52,13 +54,17 @@ a1: $(O1) ...@@ -52,13 +54,17 @@ a1: $(O1)
rm -f $(O1) rm -f $(O1)
a2: $(O2) a2: $(O2)
$(AR) grc _obj$D/gob.a decode.$O encode.$O $(AR) grc _obj$D/gob.a encode.$O
rm -f $(O2) rm -f $(O2)
a3: $(O3) a3: $(O3)
$(AR) grc _obj$D/gob.a decoder.$O encoder.$O $(AR) grc _obj$D/gob.a decode.$O encoder.$O
rm -f $(O3) rm -f $(O3)
a4: $(O4)
$(AR) grc _obj$D/gob.a decoder.$O
rm -f $(O4)
newpkg: clean newpkg: clean
mkdir -p _obj$D mkdir -p _obj$D
...@@ -68,6 +74,7 @@ $(O1): newpkg ...@@ -68,6 +74,7 @@ $(O1): newpkg
$(O2): a1 $(O2): a1
$(O3): a2 $(O3): a2
$(O4): a3 $(O4): a3
$(O5): a4
nuke: clean nuke: clean
rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/gob.a rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/gob.a
......
This diff is collapsed.
...@@ -18,6 +18,7 @@ import ( ...@@ -18,6 +18,7 @@ import (
) )
var ( var (
errBadUint = os.ErrorString("gob: encoded unsigned integer out of range");
errRange = os.ErrorString("gob: internal error: field numbers out of bounds"); errRange = os.ErrorString("gob: internal error: field numbers out of bounds");
errNotStruct = os.ErrorString("gob: TODO: can only handle structs") errNotStruct = os.ErrorString("gob: TODO: can only handle structs")
) )
...@@ -27,6 +28,14 @@ type decodeState struct { ...@@ -27,6 +28,14 @@ type decodeState struct {
b *bytes.Buffer; b *bytes.Buffer;
err os.Error; err os.Error;
fieldnum int; // the last field number read. fieldnum int; // the last field number read.
buf []byte;
}
func newDecodeState(b *bytes.Buffer) *decodeState {
d := new(decodeState);
d.b = b;
d.buf = make([]byte, uint64Size);
return d;
} }
func overflow(name string) os.ErrorString { func overflow(name string) os.ErrorString {
...@@ -35,21 +44,34 @@ func overflow(name string) os.ErrorString { ...@@ -35,21 +44,34 @@ func overflow(name string) os.ErrorString {
// decodeUintReader reads an encoded unsigned integer from an io.Reader. // decodeUintReader reads an encoded unsigned integer from an io.Reader.
// Used only by the Decoder to read the message length. // Used only by the Decoder to read the message length.
func decodeUintReader(r io.Reader, oneByte []byte) (x uint64, err os.Error) { func decodeUintReader(r io.Reader, buf []byte) (x uint64, err os.Error) {
for shift := uint(0);; shift += 7 { n1, err := r.Read(buf[0:1]);
var n int; if err != nil {
n, err = r.Read(oneByte); return
if err != nil { }
return 0, err b := buf[0];
} if b <= 0x7f {
b := oneByte[0]; return uint64(b), nil
x |= uint64(b) << shift; }
if b&0x80 != 0 { nb := -int(int8(b));
x &^= 0x80 << shift; if nb > uint64Size {
break err = errBadUint;
return;
}
var n int;
n, err = io.ReadFull(r, buf[0:nb]);
if err != nil {
if err == os.EOF {
err = io.ErrUnexpectedEOF
} }
return
}
// Could check that the high byte is zero but it's not worth it.
for i := 0; i < n; i++ {
x <<= 8;
x |= uint64(buf[i]);
} }
return x, nil; return
} }
// decodeUint reads an encoded unsigned integer from state.r. // decodeUint reads an encoded unsigned integer from state.r.
...@@ -59,17 +81,23 @@ func decodeUint(state *decodeState) (x uint64) { ...@@ -59,17 +81,23 @@ func decodeUint(state *decodeState) (x uint64) {
if state.err != nil { if state.err != nil {
return return
} }
for shift := uint(0);; shift += 7 { var b uint8;
var b uint8; b, state.err = state.b.ReadByte();
b, state.err = state.b.ReadByte(); if b <= 0x7f { // includes state.err != nil
if state.err != nil { return uint64(b)
return 0 }
} nb := -int(int8(b));
x |= uint64(b) << shift; if nb > uint64Size {
if b&0x80 != 0 { state.err = errBadUint;
x &^= 0x80 << shift; return;
break }
} var n int;
n, state.err = state.b.Read(state.buf[0:nb]);
// Don't need to check error; it's safe to loop regardless.
// Could check that the high byte is zero but it's not worth it.
for i := 0; i < n; i++ {
x <<= 8;
x |= uint64(state.buf[i]);
} }
return x; return x;
} }
...@@ -338,8 +366,7 @@ func decodeStruct(engine *decEngine, rtyp *reflect.StructType, b *bytes.Buffer, ...@@ -338,8 +366,7 @@ func decodeStruct(engine *decEngine, rtyp *reflect.StructType, b *bytes.Buffer,
} }
p = *(*uintptr)(up); p = *(*uintptr)(up);
} }
state := new(decodeState); state := newDecodeState(b);
state.b = b;
state.fieldnum = -1; state.fieldnum = -1;
basep := p; basep := p;
for state.err == nil { for state.err == nil {
...@@ -368,8 +395,7 @@ func decodeStruct(engine *decEngine, rtyp *reflect.StructType, b *bytes.Buffer, ...@@ -368,8 +395,7 @@ func decodeStruct(engine *decEngine, rtyp *reflect.StructType, b *bytes.Buffer,
} }
func ignoreStruct(engine *decEngine, b *bytes.Buffer) os.Error { func ignoreStruct(engine *decEngine, b *bytes.Buffer) os.Error {
state := new(decodeState); state := newDecodeState(b);
state.b = b;
state.fieldnum = -1; state.fieldnum = -1;
for state.err == nil { for state.err == nil {
delta := int(decodeUint(state)); delta := int(decodeUint(state));
......
...@@ -30,7 +30,7 @@ func NewDecoder(r io.Reader) *Decoder { ...@@ -30,7 +30,7 @@ func NewDecoder(r io.Reader) *Decoder {
dec := new(Decoder); dec := new(Decoder);
dec.r = r; dec.r = r;
dec.seen = make(map[typeId] *wireType); dec.seen = make(map[typeId] *wireType);
dec.state = new(decodeState); // buffer set in Decode(); rest is unimportant dec.state = newDecodeState(nil); // buffer set in Decode(); rest is unimportant
dec.oneByte = make([]byte, 1); dec.oneByte = make([]byte, 1);
return dec; return dec;
......
...@@ -15,6 +15,8 @@ import ( ...@@ -15,6 +15,8 @@ import (
"unsafe"; "unsafe";
) )
const uint64Size = unsafe.Sizeof(uint64(0))
// The global execution state of an instance of the encoder. // The global execution state of an instance of the encoder.
// Field numbers are delta encoded and always increase. The field // Field numbers are delta encoded and always increase. The field
// number is initialized to -1 so 0 comes out as delta(1). A delta of // number is initialized to -1 so 0 comes out as delta(1). A delta of
...@@ -23,27 +25,33 @@ type encoderState struct { ...@@ -23,27 +25,33 @@ type encoderState struct {
b *bytes.Buffer; b *bytes.Buffer;
err os.Error; // error encountered during encoding; err os.Error; // error encountered during encoding;
fieldnum int; // the last field number written. fieldnum int; // the last field number written.
buf [16]byte; // buffer used by the encoder; here to avoid allocation. buf [1+uint64Size]byte; // buffer used by the encoder; here to avoid allocation.
} }
// Integers encode as a variant of Google's protocol buffer varint (varvarint?). // Unsigned integers have a two-state encoding. If the number is less
// The variant is that the continuation bytes have a zero top bit instead of a one. // than 128 (0 through 0x7F), its value is written directly.
// That way there's only one bit to clear and the value is a little easier to see if // Otherwise the value is written in big-endian byte order preceded
// you're the unfortunate sort of person who must read the hex to debug. // by the byte length, negated.
// encodeUint writes an encoded unsigned integer to state.b. Sets state.err. // encodeUint writes an encoded unsigned integer to state.b. Sets state.err.
// If state.err is already non-nil, it does nothing. // If state.err is already non-nil, it does nothing.
func encodeUint(state *encoderState, x uint64) { func encodeUint(state *encoderState, x uint64) {
var n int;
if state.err != nil { if state.err != nil {
return return
} }
for n = 0; x > 0x7F; n++ { if x <= 0x7F {
state.buf[n] = uint8(x & 0x7F); state.err = state.b.WriteByte(uint8(x));
x >>= 7; return;
}
var n, m int;
m = uint64Size;
for n = 1; x > 0; n++ {
state.buf[m] = uint8(x & 0xFF);
x >>= 8;
m--;
} }
state.buf[n] = 0x80 | uint8(x); state.buf[m] = uint8(-(n-1));
n, state.err = state.b.Write(state.buf[0:n+1]); n, state.err = state.b.Write(state.buf[m:uint64Size+1]);
} }
// encodeInt writes an encoded signed integer to state.w. // encodeInt writes an encoded signed integer to state.w.
......
...@@ -78,11 +78,11 @@ ...@@ -78,11 +78,11 @@
The rest of this comment documents the encoding, details that are not important The rest of this comment documents the encoding, details that are not important
for most users. Details are presented bottom-up. for most users. Details are presented bottom-up.
An unsigned integer is encoded as an arbitrary-precision, variable-length sequence An unsigned integer is sent one of two ways. If it is less than 128, it is sent
of bytes. It is sent in little-endian order (low bits first), with seven bits per as a byte with that value. Otherwise it is sent as a minimal-length big-endian
byte. The high bit of each byte is zero, except that the high bit of the final (high byte first) byte stream holding the value, preceded by one byte holding the
(highest precision) byte of the encoding will be set. Thus 0 is transmitted as byte count, negated. Thus 0 is transmitted as (00), 7 is transmitted as (07) and
(80), 7 is transmitted as (87) and 256=2*128 is transmitted as (00 82). 256 is transmitted as (FE 01 00).
A boolean is encoded within an unsigned integer: 0 for false, 1 for true. A boolean is encoded within an unsigned integer: 0 for false, 1 for true.
......
...@@ -50,8 +50,7 @@ func TestBasicEncoder(t *testing.T) { ...@@ -50,8 +50,7 @@ func TestBasicEncoder(t *testing.T) {
} }
// Decode the result by hand to verify; // Decode the result by hand to verify;
state := new(decodeState); state := newDecodeState(b);
state.b = b;
// The output should be: // The output should be:
// 0) The length, 38. // 0) The length, 38.
length := decodeUint(state); length := decodeUint(state);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment