Commit 5d118386 authored by Pascal S. de Kloe's avatar Pascal S. de Kloe Committed by Brad Fitzpatrick

encoding/json: read ahead after value consumption

Eliminates the need for an extra scanner, read undo and some other tricks.

name                    old time/op    new time/op    delta
CodeEncoder-12            1.92ms ± 0%    1.91ms ± 1%   -0.65%  (p=0.000 n=17+20)
CodeMarshal-12            2.13ms ± 2%    2.12ms ± 1%   -0.49%  (p=0.038 n=18+17)
CodeDecoder-12            8.55ms ± 2%    8.49ms ± 1%     ~     (p=0.119 n=20+18)
UnicodeDecoder-12          411ns ± 0%     422ns ± 0%   +2.77%  (p=0.000 n=19+15)
DecoderStream-12           320ns ± 1%     307ns ± 1%   -3.80%  (p=0.000 n=18+20)
CodeUnmarshal-12          9.65ms ± 3%    9.58ms ± 3%     ~     (p=0.157 n=20+20)
CodeUnmarshalReuse-12     8.54ms ± 3%    8.56ms ± 2%     ~     (p=0.602 n=20+20)
UnmarshalString-12         110ns ± 1%      87ns ± 2%  -21.53%  (p=0.000 n=16+20)
UnmarshalFloat64-12        101ns ± 1%      77ns ± 2%  -23.08%  (p=0.000 n=19+20)
UnmarshalInt64-12         94.5ns ± 2%    68.4ns ± 1%  -27.60%  (p=0.000 n=20+20)
Issue10335-12              128ns ± 1%     100ns ± 1%  -21.89%  (p=0.000 n=19+18)
Unmapped-12                427ns ± 3%     247ns ± 4%  -42.17%  (p=0.000 n=20+20)
NumberIsValid-12          23.0ns ± 0%    21.7ns ± 0%   -5.73%  (p=0.000 n=20+20)
NumberIsValidRegexp-12     641ns ± 0%     642ns ± 0%   +0.15%  (p=0.003 n=19+19)
EncoderEncode-12          56.9ns ± 0%    55.0ns ± 1%   -3.32%  (p=0.012 n=2+17)

name                    old speed      new speed      delta
CodeEncoder-12          1.01GB/s ± 1%  1.02GB/s ± 1%   +0.71%  (p=0.000 n=18+20)
CodeMarshal-12           913MB/s ± 2%   917MB/s ± 1%   +0.49%  (p=0.038 n=18+17)
CodeDecoder-12           227MB/s ± 2%   229MB/s ± 1%     ~     (p=0.110 n=20+18)
UnicodeDecoder-12       34.1MB/s ± 0%  33.1MB/s ± 0%   -2.73%  (p=0.000 n=19+19)
CodeUnmarshal-12         201MB/s ± 3%   203MB/s ± 3%     ~     (p=0.151 n=20+20)

name                    old alloc/op   new alloc/op   delta
Issue10335-12               320B ± 0%      184B ± 0%  -42.50%  (p=0.000 n=20+20)
Unmapped-12                 568B ± 0%      216B ± 0%  -61.97%  (p=0.000 n=20+20)
EncoderEncode-12           0.00B          0.00B          ~     (all equal)

name                    old allocs/op  new allocs/op  delta
Issue10335-12               4.00 ± 0%      3.00 ± 0%  -25.00%  (p=0.000 n=20+20)
Unmapped-12                 18.0 ± 0%       4.0 ± 0%  -77.78%  (p=0.000 n=20+20)
EncoderEncode-12            0.00           0.00          ~     (all equal)

Fixes #17914
Updates #20693
Updates #10335

Change-Id: I0459a52febb8b79c9a2991e69ed2614cf8740429
Reviewed-on: https://go-review.googlesource.com/47152Reviewed-by: 's avatarBrad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent c15984c6
......@@ -189,6 +189,7 @@ func (d *decodeState) unmarshal(v interface{}) (err error) {
}
d.scan.reset()
d.scanWhile(scanSkipSpace)
// We decode rv not rv.Elem because the Unmarshaler interface
// test must be applied at the top level of the value.
d.value(rv)
......@@ -274,9 +275,9 @@ func isValidNumber(s string) bool {
// decodeState represents the state while decoding a JSON value.
type decodeState struct {
data []byte
off int // read offset in data
off int // next read offset in data
opcode int // last read result
scan scanner
nextscan scanner // for calls to nextValue
errorContext struct { // provides context for type errors
Struct string
Field string
......@@ -286,6 +287,11 @@ type decodeState struct {
disallowUnknownFields bool
}
// readIndex returns the position of the last byte read.
func (d *decodeState) readIndex() int {
return d.off - 1
}
// errPhase is used for errors that should not happen unless
// there is a bug in the JSON decoder or something is editing
// the data slice while the decoder executes.
......@@ -326,94 +332,83 @@ func (d *decodeState) addErrorContext(err error) error {
return err
}
// next cuts off and returns the next full JSON value in d.data[d.off:].
// The next value is known to be an object or array, not a literal.
func (d *decodeState) next() []byte {
c := d.data[d.off]
item, rest, err := nextValue(d.data[d.off:], &d.nextscan)
if err != nil {
d.error(err)
// skip scans to the end of what was started.
func (d *decodeState) skip() {
s, data, i := &d.scan, d.data, d.off
depth := len(s.parseState)
for {
op := s.step(s, data[i])
i++
if len(s.parseState) < depth {
d.off = i
d.opcode = op
return
}
}
d.off = len(d.data) - len(rest)
}
// Our scanner has seen the opening brace/bracket
// and thinks we're still in the middle of the object.
// invent a closing brace/bracket to get it out.
if c == '{' {
d.scan.step(&d.scan, '}')
// scanNext processes the byte at d.data[d.off].
func (d *decodeState) scanNext() {
s, data, i := &d.scan, d.data, d.off
if i < len(data) {
d.opcode = s.step(s, data[i])
d.off = i + 1
} else {
d.scan.step(&d.scan, ']')
d.opcode = s.eof()
d.off = len(data) + 1 // mark processed EOF with len+1
}
return item
}
// scanWhile processes bytes in d.data[d.off:] until it
// receives a scan code not equal to op.
// It updates d.off and returns the new scan code.
func (d *decodeState) scanWhile(op int) int {
var newOp int
for {
if d.off >= len(d.data) {
newOp = d.scan.eof()
d.off = len(d.data) + 1 // mark processed EOF with len+1
} else {
c := d.data[d.off]
d.off++
newOp = d.scan.step(&d.scan, c)
}
func (d *decodeState) scanWhile(op int) {
s, data, i := &d.scan, d.data, d.off
for i < len(d.data) {
newOp := s.step(s, data[i])
i++
if newOp != op {
break
d.opcode = newOp
d.off = i
return
}
}
return newOp
d.off = len(d.data) + 1 // mark processed EOF with len+1
d.opcode = d.scan.eof()
}
// value decodes a JSON value from d.data[d.off:] into the value.
// it updates d.off to point past the decoded value.
// value consumes a JSON value from d.data[d.off-1:], decoding into v, and
// reads the following byte ahead. If v is invalid, the value is discarded.
// The first byte of the value has been read already.
func (d *decodeState) value(v reflect.Value) {
if !v.IsValid() {
_, rest, err := nextValue(d.data[d.off:], &d.nextscan)
if err != nil {
d.error(err)
}
d.off = len(d.data) - len(rest)
// d.scan thinks we're still at the beginning of the item.
// Feed in an empty string - the shortest, simplest value -
// so that it knows we got to the end of the value.
if d.scan.redo {
// rewind.
d.scan.redo = false
d.scan.step = stateBeginValue
}
d.scan.step(&d.scan, '"')
d.scan.step(&d.scan, '"')
n := len(d.scan.parseState)
if n > 0 && d.scan.parseState[n-1] == parseObjectKey {
// d.scan thinks we just read an object key; finish the object
d.scan.step(&d.scan, ':')
d.scan.step(&d.scan, '"')
d.scan.step(&d.scan, '"')
d.scan.step(&d.scan, '}')
}
return
}
switch op := d.scanWhile(scanSkipSpace); op {
switch d.opcode {
default:
d.error(errPhase)
case scanBeginArray:
d.array(v)
if v.IsValid() {
d.array(v)
} else {
d.skip()
}
d.scanNext()
case scanBeginObject:
d.object(v)
if v.IsValid() {
d.object(v)
} else {
d.skip()
}
d.scanNext()
case scanBeginLiteral:
d.literal(v)
// All bytes inside literal return scanContinue op code.
start := d.readIndex()
d.scanWhile(scanContinue)
if v.IsValid() {
d.literalStore(d.data[start:d.readIndex()], v, false)
}
}
}
......@@ -424,15 +419,17 @@ type unquotedValue struct{}
// If it finds anything other than a quoted string literal or null,
// valueQuoted returns unquotedValue{}.
func (d *decodeState) valueQuoted() interface{} {
switch op := d.scanWhile(scanSkipSpace); op {
switch d.opcode {
default:
d.error(errPhase)
case scanBeginArray:
d.array(reflect.Value{})
d.skip()
d.scanNext()
case scanBeginObject:
d.object(reflect.Value{})
d.skip()
d.scanNext()
case scanBeginLiteral:
switch v := d.literalInterface().(type) {
......@@ -447,7 +444,7 @@ func (d *decodeState) valueQuoted() interface{} {
// until it gets to a non-pointer.
// if it encounters an Unmarshaler, indirect stops and returns that.
// if decodingNull is true, indirect stops at the last pointer so it can be set to nil.
func (d *decodeState) indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnmarshaler, reflect.Value) {
func indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnmarshaler, reflect.Value) {
// Issue #24153 indicates that it is generally not a guaranteed property
// that you may round-trip a reflect.Value by calling Value.Addr().Elem()
// and expect the value to still be settable for values derived from
......@@ -512,14 +509,15 @@ func (d *decodeState) indirect(v reflect.Value, decodingNull bool) (Unmarshaler,
return nil, nil, v
}
// array consumes an array from d.data[d.off-1:], decoding into the value v.
// the first byte of the array ('[') has been read already.
// array consumes an array from d.data[d.off-1:], decoding into v.
// The first byte of the array ('[') has been read already.
func (d *decodeState) array(v reflect.Value) {
// Check for unmarshaler.
u, ut, pv := d.indirect(v, false)
u, ut, pv := indirect(v, false)
if u != nil {
d.off--
err := u.UnmarshalJSON(d.next())
start := d.readIndex()
d.skip()
err := u.UnmarshalJSON(d.data[start:d.off])
if err != nil {
d.error(err)
}
......@@ -527,11 +525,9 @@ func (d *decodeState) array(v reflect.Value) {
}
if ut != nil {
d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)})
d.off--
d.next()
d.skip()
return
}
v = pv
// Check type of target.
......@@ -546,8 +542,7 @@ func (d *decodeState) array(v reflect.Value) {
fallthrough
default:
d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)})
d.off--
d.next()
d.skip()
return
case reflect.Array:
case reflect.Slice:
......@@ -557,15 +552,11 @@ func (d *decodeState) array(v reflect.Value) {
i := 0
for {
// Look ahead for ] - can only happen on first iteration.
op := d.scanWhile(scanSkipSpace)
if op == scanEndArray {
d.scanWhile(scanSkipSpace)
if d.opcode == scanEndArray {
break
}
// Back up so d.value can have the byte we just read.
d.off--
d.scan.undo(op)
// Get element of array, growing if necessary.
if v.Kind() == reflect.Slice {
// Grow slice if necessary
......@@ -593,11 +584,13 @@ func (d *decodeState) array(v reflect.Value) {
i++
// Next token must be , or ].
op = d.scanWhile(scanSkipSpace)
if op == scanEndArray {
if d.opcode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
if d.opcode == scanEndArray {
break
}
if op != scanArrayValue {
if d.opcode != scanArrayValue {
d.error(errPhase)
}
}
......@@ -621,14 +614,15 @@ func (d *decodeState) array(v reflect.Value) {
var nullLiteral = []byte("null")
var textUnmarshalerType = reflect.TypeOf(new(encoding.TextUnmarshaler)).Elem()
// object consumes an object from d.data[d.off-1:], decoding into the value v.
// the first byte ('{') of the object has been read already.
// object consumes an object from d.data[d.off-1:], decoding into v.
// The first byte ('{') of the object has been read already.
func (d *decodeState) object(v reflect.Value) {
// Check for unmarshaler.
u, ut, pv := d.indirect(v, false)
u, ut, pv := indirect(v, false)
if u != nil {
d.off--
err := u.UnmarshalJSON(d.next())
start := d.readIndex()
d.skip()
err := u.UnmarshalJSON(d.data[start:d.off])
if err != nil {
d.error(err)
}
......@@ -636,8 +630,7 @@ func (d *decodeState) object(v reflect.Value) {
}
if ut != nil {
d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)})
d.off--
d.next() // skip over { } in input
d.skip()
return
}
v = pv
......@@ -664,8 +657,7 @@ func (d *decodeState) object(v reflect.Value) {
default:
if !reflect.PtrTo(t.Key()).Implements(textUnmarshalerType) {
d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)})
d.off--
d.next() // skip over { } in input
d.skip()
return
}
}
......@@ -676,8 +668,7 @@ func (d *decodeState) object(v reflect.Value) {
// ok
default:
d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)})
d.off--
d.next() // skip over { } in input
d.skip()
return
}
......@@ -685,19 +676,19 @@ func (d *decodeState) object(v reflect.Value) {
for {
// Read opening " of string key or closing }.
op := d.scanWhile(scanSkipSpace)
if op == scanEndObject {
d.scanWhile(scanSkipSpace)
if d.opcode == scanEndObject {
// closing } - can only happen on first iteration.
break
}
if op != scanBeginLiteral {
if d.opcode != scanBeginLiteral {
d.error(errPhase)
}
// Read key.
start := d.off - 1
op = d.scanWhile(scanContinue)
item := d.data[start : d.off-1]
start := d.readIndex()
d.scanWhile(scanContinue)
item := d.data[start:d.readIndex()]
key, ok := unquoteBytes(item)
if !ok {
d.error(errPhase)
......@@ -761,12 +752,13 @@ func (d *decodeState) object(v reflect.Value) {
}
// Read : before value.
if op == scanSkipSpace {
op = d.scanWhile(scanSkipSpace)
if d.opcode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
if op != scanObjectKey {
if d.opcode != scanObjectKey {
d.error(errPhase)
}
d.scanWhile(scanSkipSpace)
if destring {
switch qv := d.valueQuoted().(type) {
......@@ -819,11 +811,13 @@ func (d *decodeState) object(v reflect.Value) {
}
// Next token must be , or }.
op = d.scanWhile(scanSkipSpace)
if op == scanEndObject {
if d.opcode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
if d.opcode == scanEndObject {
break
}
if op != scanObjectValue {
if d.opcode != scanObjectValue {
d.error(errPhase)
}
......@@ -832,21 +826,6 @@ func (d *decodeState) object(v reflect.Value) {
}
}
// literal consumes a literal from d.data[d.off-1:], decoding into the value v.
// The first byte of the literal has been read already
// (that's how the caller knows it's a literal).
func (d *decodeState) literal(v reflect.Value) {
// All bytes inside literal return scanContinue op code.
start := d.off - 1
op := d.scanWhile(scanContinue)
// Scan read one byte too far; back up.
d.off--
d.scan.undo(op)
d.literalStore(d.data[start:d.off], v, false)
}
// convertNumber converts the number literal s to a float64 or a Number
// depending on the setting of d.useNumber.
func (d *decodeState) convertNumber(s string) (interface{}, error) {
......@@ -875,7 +854,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
return
}
isNull := item[0] == 'n' // null
u, ut, pv := d.indirect(v, isNull)
u, ut, pv := indirect(v, isNull)
if u != nil {
err := u.UnmarshalJSON(item)
if err != nil {
......@@ -897,7 +876,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
default:
val = "number"
}
d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.off)})
d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())})
}
return
}
......@@ -944,7 +923,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
if fromQuoted {
d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()))
} else {
d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.off)})
d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())})
}
case reflect.Bool:
v.SetBool(value)
......@@ -952,7 +931,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
if v.NumMethod() == 0 {
v.Set(reflect.ValueOf(value))
} else {
d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.off)})
d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())})
}
}
......@@ -967,10 +946,10 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
}
switch v.Kind() {
default:
d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.off)})
d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())})
case reflect.Slice:
if v.Type().Elem().Kind() != reflect.Uint8 {
d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.off)})
d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())})
break
}
b := make([]byte, base64.StdEncoding.DecodedLen(len(s)))
......@@ -986,7 +965,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
if v.NumMethod() == 0 {
v.Set(reflect.ValueOf(string(s)))
} else {
d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.off)})
d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())})
}
}
......@@ -1011,7 +990,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
if fromQuoted {
d.error(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()))
} else {
d.error(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.off)})
d.error(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())})
}
case reflect.Interface:
n, err := d.convertNumber(s)
......@@ -1020,7 +999,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
break
}
if v.NumMethod() != 0 {
d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.off)})
d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())})
break
}
v.Set(reflect.ValueOf(n))
......@@ -1028,7 +1007,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
n, err := strconv.ParseInt(s, 10, 64)
if err != nil || v.OverflowInt(n) {
d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.off)})
d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())})
break
}
v.SetInt(n)
......@@ -1036,7 +1015,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
n, err := strconv.ParseUint(s, 10, 64)
if err != nil || v.OverflowUint(n) {
d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.off)})
d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())})
break
}
v.SetUint(n)
......@@ -1044,7 +1023,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
case reflect.Float32, reflect.Float64:
n, err := strconv.ParseFloat(s, v.Type().Bits())
if err != nil || v.OverflowFloat(n) {
d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.off)})
d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: v.Type(), Offset: int64(d.readIndex())})
break
}
v.SetFloat(n)
......@@ -1057,18 +1036,21 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
// but they avoid the weight of reflection in this common case.
// valueInterface is like value but returns interface{}
func (d *decodeState) valueInterface() interface{} {
switch d.scanWhile(scanSkipSpace) {
func (d *decodeState) valueInterface() (val interface{}) {
switch d.opcode {
default:
d.error(errPhase)
panic("unreachable")
case scanBeginArray:
return d.arrayInterface()
val = d.arrayInterface()
d.scanNext()
case scanBeginObject:
return d.objectInterface()
val = d.objectInterface()
d.scanNext()
case scanBeginLiteral:
return d.literalInterface()
val = d.literalInterface()
}
return
}
// arrayInterface is like array but returns []interface{}.
......@@ -1076,23 +1058,21 @@ func (d *decodeState) arrayInterface() []interface{} {
var v = make([]interface{}, 0)
for {
// Look ahead for ] - can only happen on first iteration.
op := d.scanWhile(scanSkipSpace)
if op == scanEndArray {
d.scanWhile(scanSkipSpace)
if d.opcode == scanEndArray {
break
}
// Back up so d.value can have the byte we just read.
d.off--
d.scan.undo(op)
v = append(v, d.valueInterface())
// Next token must be , or ].
op = d.scanWhile(scanSkipSpace)
if op == scanEndArray {
if d.opcode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
if d.opcode == scanEndArray {
break
}
if op != scanArrayValue {
if d.opcode != scanArrayValue {
d.error(errPhase)
}
}
......@@ -1104,57 +1084,59 @@ func (d *decodeState) objectInterface() map[string]interface{} {
m := make(map[string]interface{})
for {
// Read opening " of string key or closing }.
op := d.scanWhile(scanSkipSpace)
if op == scanEndObject {
d.scanWhile(scanSkipSpace)
if d.opcode == scanEndObject {
// closing } - can only happen on first iteration.
break
}
if op != scanBeginLiteral {
if d.opcode != scanBeginLiteral {
d.error(errPhase)
}
// Read string key.
start := d.off - 1
op = d.scanWhile(scanContinue)
item := d.data[start : d.off-1]
start := d.readIndex()
d.scanWhile(scanContinue)
item := d.data[start:d.readIndex()]
key, ok := unquote(item)
if !ok {
d.error(errPhase)
}
// Read : before value.
if op == scanSkipSpace {
op = d.scanWhile(scanSkipSpace)
if d.opcode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
if op != scanObjectKey {
if d.opcode != scanObjectKey {
d.error(errPhase)
}
d.scanWhile(scanSkipSpace)
// Read value.
m[key] = d.valueInterface()
// Next token must be , or }.
op = d.scanWhile(scanSkipSpace)
if op == scanEndObject {
if d.opcode == scanSkipSpace {
d.scanWhile(scanSkipSpace)
}
if d.opcode == scanEndObject {
break
}
if op != scanObjectValue {
if d.opcode != scanObjectValue {
d.error(errPhase)
}
}
return m
}
// literalInterface is like literal but returns an interface value.
// literalInterface consumes and returns a literal from d.data[d.off-1:] and
// it reads the following byte ahead. The first byte of the literal has been
// read already (that's how the caller knows it's a literal).
func (d *decodeState) literalInterface() interface{} {
// All bytes inside literal return scanContinue op code.
start := d.off - 1
op := d.scanWhile(scanContinue)
start := d.readIndex()
d.scanWhile(scanContinue)
// Scan read one byte too far; back up.
d.off--
d.scan.undo(op)
item := d.data[start:d.off]
item := d.data[start:d.readIndex()]
switch c := item[0]; c {
case 'n': // null
......
......@@ -8,7 +8,7 @@ package json
// Just about at the limit of what is reasonable to write by hand.
// Some parts are a bit tedious, but overall it nicely factors out the
// otherwise common code from the multiple scanning functions
// in this package (Compact, Indent, checkValid, nextValue, etc).
// in this package (Compact, Indent, checkValid, etc).
//
// This file starts with two simple examples using the scanner
// before diving into the scanner itself.
......@@ -36,35 +36,6 @@ func checkValid(data []byte, scan *scanner) error {
return nil
}
// nextValue splits data after the next whole JSON value,
// returning that value and the bytes that follow it as separate slices.
// scan is passed in for use by nextValue to avoid an allocation.
func nextValue(data []byte, scan *scanner) (value, rest []byte, err error) {
scan.reset()
for i, c := range data {
v := scan.step(scan, c)
if v >= scanEndObject {
switch v {
// probe the scanner with a space to determine whether we will
// get scanEnd on the next character. Otherwise, if the next character
// is not a space, scanEndTop allocates a needless error.
case scanEndObject, scanEndArray:
if scan.step(scan, ' ') == scanEnd {
return data[:i+1], data[i+1:], nil
}
case scanError:
return nil, nil, scan.err
case scanEnd:
return data[:i], data[i:], nil
}
}
}
if scan.eof() == scanError {
return nil, nil, scan.err
}
return data, nil, nil
}
// A SyntaxError is a description of a JSON syntax error.
type SyntaxError struct {
msg string // description of error
......@@ -101,11 +72,6 @@ type scanner struct {
// Error that happened, if any.
err error
// 1-byte redo (see undo method)
redo bool
redoCode int
redoState func(*scanner, byte) int
// total bytes consumed, updated by decoder.Decode
bytes int64
}
......@@ -151,7 +117,6 @@ func (s *scanner) reset() {
s.step = stateBeginValue
s.parseState = s.parseState[0:0]
s.err = nil
s.redo = false
s.endTop = false
}
......@@ -184,7 +149,6 @@ func (s *scanner) pushParseState(p int) {
func (s *scanner) popParseState() {
n := len(s.parseState) - 1
s.parseState = s.parseState[0:n]
s.redo = false
if n == 0 {
s.step = stateEndTop
s.endTop = true
......@@ -607,22 +571,3 @@ func quoteChar(c byte) string {
s := strconv.Quote(string(c))
return "'" + s[1:len(s)-1] + "'"
}
// undo causes the scanner to return scanCode from the next state transition.
// This gives callers a simple 1-byte undo mechanism.
func (s *scanner) undo(scanCode int) {
if s.redo {
panic("json: invalid use of scanner")
}
s.redoCode = scanCode
s.redoState = s.step
s.step = stateRedo
s.redo = true
}
// stateRedo helps implement the scanner's 1-byte undo.
func stateRedo(s *scanner, c byte) int {
s.redo = false
s.step = s.redoState
return s.redoCode
}
......@@ -200,43 +200,6 @@ func TestIndentErrors(t *testing.T) {
}
}
func TestNextValueBig(t *testing.T) {
initBig()
var scan scanner
item, rest, err := nextValue(jsonBig, &scan)
if err != nil {
t.Fatalf("nextValue: %s", err)
}
if len(item) != len(jsonBig) || &item[0] != &jsonBig[0] {
t.Errorf("invalid item: %d %d", len(item), len(jsonBig))
}
if len(rest) != 0 {
t.Errorf("invalid rest: %d", len(rest))
}
item, rest, err = nextValue(append(jsonBig, "HELLO WORLD"...), &scan)
if err != nil {
t.Fatalf("nextValue extra: %s", err)
}
if len(item) != len(jsonBig) {
t.Errorf("invalid item: %d %d", len(item), len(jsonBig))
}
if string(rest) != "HELLO WORLD" {
t.Errorf("invalid rest: %d", len(rest))
}
}
var benchScan scanner
func BenchmarkSkipValue(b *testing.B) {
initBig()
b.ResetTimer()
for i := 0; i < b.N; i++ {
nextValue(jsonBig, &benchScan)
}
b.SetBytes(int64(len(jsonBig)))
}
func diff(t *testing.T, a, b []byte) {
for i := 0; ; i++ {
if i >= len(a) || i >= len(b) || a[i] != b[i] {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment