Commit b85147cd authored by Rob Pike's avatar Rob Pike

change the encoding of uints to simplify overflow checking and to make them

easier and faster to read.  they are now either a one-byte value or a n-byte value
preceded by a byte holding -n.

R=rsc
DELTA=150  (45 added, 7 deleted, 98 changed)
OCL=32381
CL=32387
parent 189153ed
......@@ -36,15 +36,17 @@ O1=\
type.$O\
O2=\
decode.$O\
encode.$O\
O3=\
decoder.$O\
decode.$O\
encoder.$O\
O4=\
decoder.$O\
phases: a1 a2 a3
phases: a1 a2 a3 a4
_obj$D/gob.a: phases
a1: $(O1)
......@@ -52,13 +54,17 @@ a1: $(O1)
rm -f $(O1)
a2: $(O2)
$(AR) grc _obj$D/gob.a decode.$O encode.$O
$(AR) grc _obj$D/gob.a encode.$O
rm -f $(O2)
a3: $(O3)
$(AR) grc _obj$D/gob.a decoder.$O encoder.$O
$(AR) grc _obj$D/gob.a decode.$O encoder.$O
rm -f $(O3)
a4: $(O4)
$(AR) grc _obj$D/gob.a decoder.$O
rm -f $(O4)
newpkg: clean
mkdir -p _obj$D
......@@ -68,6 +74,7 @@ $(O1): newpkg
$(O2): a1
$(O3): a2
$(O4): a3
$(O5): a4
nuke: clean
rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/gob.a
......
This diff is collapsed.
......@@ -18,6 +18,7 @@ import (
)
var (
errBadUint = os.ErrorString("gob: encoded unsigned integer out of range");
errRange = os.ErrorString("gob: internal error: field numbers out of bounds");
errNotStruct = os.ErrorString("gob: TODO: can only handle structs")
)
......@@ -27,6 +28,14 @@ type decodeState struct {
b *bytes.Buffer;
err os.Error;
fieldnum int; // the last field number read.
buf []byte;
}
func newDecodeState(b *bytes.Buffer) *decodeState {
d := new(decodeState);
d.b = b;
d.buf = make([]byte, uint64Size);
return d;
}
func overflow(name string) os.ErrorString {
......@@ -35,21 +44,34 @@ func overflow(name string) os.ErrorString {
// decodeUintReader reads an encoded unsigned integer from an io.Reader.
// Used only by the Decoder to read the message length.
func decodeUintReader(r io.Reader, oneByte []byte) (x uint64, err os.Error) {
for shift := uint(0);; shift += 7 {
var n int;
n, err = r.Read(oneByte);
if err != nil {
return 0, err
}
b := oneByte[0];
x |= uint64(b) << shift;
if b&0x80 != 0 {
x &^= 0x80 << shift;
break
func decodeUintReader(r io.Reader, buf []byte) (x uint64, err os.Error) {
n1, err := r.Read(buf[0:1]);
if err != nil {
return
}
b := buf[0];
if b <= 0x7f {
return uint64(b), nil
}
nb := -int(int8(b));
if nb > uint64Size {
err = errBadUint;
return;
}
var n int;
n, err = io.ReadFull(r, buf[0:nb]);
if err != nil {
if err == os.EOF {
err = io.ErrUnexpectedEOF
}
return
}
// Could check that the high byte is zero but it's not worth it.
for i := 0; i < n; i++ {
x <<= 8;
x |= uint64(buf[i]);
}
return x, nil;
return
}
// decodeUint reads an encoded unsigned integer from state.r.
......@@ -59,17 +81,23 @@ func decodeUint(state *decodeState) (x uint64) {
if state.err != nil {
return
}
for shift := uint(0);; shift += 7 {
var b uint8;
b, state.err = state.b.ReadByte();
if state.err != nil {
return 0
}
x |= uint64(b) << shift;
if b&0x80 != 0 {
x &^= 0x80 << shift;
break
}
var b uint8;
b, state.err = state.b.ReadByte();
if b <= 0x7f { // includes state.err != nil
return uint64(b)
}
nb := -int(int8(b));
if nb > uint64Size {
state.err = errBadUint;
return;
}
var n int;
n, state.err = state.b.Read(state.buf[0:nb]);
// Don't need to check error; it's safe to loop regardless.
// Could check that the high byte is zero but it's not worth it.
for i := 0; i < n; i++ {
x <<= 8;
x |= uint64(state.buf[i]);
}
return x;
}
......@@ -338,8 +366,7 @@ func decodeStruct(engine *decEngine, rtyp *reflect.StructType, b *bytes.Buffer,
}
p = *(*uintptr)(up);
}
state := new(decodeState);
state.b = b;
state := newDecodeState(b);
state.fieldnum = -1;
basep := p;
for state.err == nil {
......@@ -368,8 +395,7 @@ func decodeStruct(engine *decEngine, rtyp *reflect.StructType, b *bytes.Buffer,
}
func ignoreStruct(engine *decEngine, b *bytes.Buffer) os.Error {
state := new(decodeState);
state.b = b;
state := newDecodeState(b);
state.fieldnum = -1;
for state.err == nil {
delta := int(decodeUint(state));
......
......@@ -30,7 +30,7 @@ func NewDecoder(r io.Reader) *Decoder {
dec := new(Decoder);
dec.r = r;
dec.seen = make(map[typeId] *wireType);
dec.state = new(decodeState); // buffer set in Decode(); rest is unimportant
dec.state = newDecodeState(nil); // buffer set in Decode(); rest is unimportant
dec.oneByte = make([]byte, 1);
return dec;
......
......@@ -15,6 +15,8 @@ import (
"unsafe";
)
const uint64Size = unsafe.Sizeof(uint64(0))
// The global execution state of an instance of the encoder.
// Field numbers are delta encoded and always increase. The field
// number is initialized to -1 so 0 comes out as delta(1). A delta of
......@@ -23,27 +25,33 @@ type encoderState struct {
b *bytes.Buffer;
err os.Error; // error encountered during encoding;
fieldnum int; // the last field number written.
buf [16]byte; // buffer used by the encoder; here to avoid allocation.
buf [1+uint64Size]byte; // buffer used by the encoder; here to avoid allocation.
}
// Integers encode as a variant of Google's protocol buffer varint (varvarint?).
// The variant is that the continuation bytes have a zero top bit instead of a one.
// That way there's only one bit to clear and the value is a little easier to see if
// you're the unfortunate sort of person who must read the hex to debug.
// Unsigned integers have a two-state encoding. If the number is less
// than 128 (0 through 0x7F), its value is written directly.
// Otherwise the value is written in big-endian byte order preceded
// by the byte length, negated.
// encodeUint writes an encoded unsigned integer to state.b. Sets state.err.
// If state.err is already non-nil, it does nothing.
func encodeUint(state *encoderState, x uint64) {
var n int;
if state.err != nil {
return
}
for n = 0; x > 0x7F; n++ {
state.buf[n] = uint8(x & 0x7F);
x >>= 7;
if x <= 0x7F {
state.err = state.b.WriteByte(uint8(x));
return;
}
var n, m int;
m = uint64Size;
for n = 1; x > 0; n++ {
state.buf[m] = uint8(x & 0xFF);
x >>= 8;
m--;
}
state.buf[n] = 0x80 | uint8(x);
n, state.err = state.b.Write(state.buf[0:n+1]);
state.buf[m] = uint8(-(n-1));
n, state.err = state.b.Write(state.buf[m:uint64Size+1]);
}
// encodeInt writes an encoded signed integer to state.w.
......
......@@ -78,11 +78,11 @@
The rest of this comment documents the encoding, details that are not important
for most users. Details are presented bottom-up.
An unsigned integer is encoded as an arbitrary-precision, variable-length sequence
of bytes. It is sent in little-endian order (low bits first), with seven bits per
byte. The high bit of each byte is zero, except that the high bit of the final
(highest precision) byte of the encoding will be set. Thus 0 is transmitted as
(80), 7 is transmitted as (87) and 256=2*128 is transmitted as (00 82).
An unsigned integer is sent one of two ways. If it is less than 128, it is sent
as a byte with that value. Otherwise it is sent as a minimal-length big-endian
(high byte first) byte stream holding the value, preceded by one byte holding the
byte count, negated. Thus 0 is transmitted as (00), 7 is transmitted as (07) and
256 is transmitted as (FE 01 00).
A boolean is encoded within an unsigned integer: 0 for false, 1 for true.
......
......@@ -50,8 +50,7 @@ func TestBasicEncoder(t *testing.T) {
}
// Decode the result by hand to verify;
state := new(decodeState);
state.b = b;
state := newDecodeState(b);
// The output should be:
// 0) The length, 38.
length := decodeUint(state);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment