Commit 2d3599e5 authored by Daniel Martí's avatar Daniel Martí

encoding/json: encode struct field names ahead of time

Struct field names are static, so we can run HTMLEscape on them when
building each struct type encoder. Then, when running the struct
encoder, we can select either the original or the escaped field name to
write directly.

When the encoder is not escaping HTML, using the original string works
because neither Go struct field names nor JSON tags allow any characters
that would need to be escaped, like '"', '\\', or '\n'.

When the encoder is escaping HTML, the only difference is that '<', '>',
and '&' are allowed via JSON struct field tags, hence why we use
HTMLEscape to properly escape them.

All of the above lets us encode field names with a simple if/else and
WriteString calls, which are considerably simpler and faster than
encoding an arbitrary string.

While at it, also include the quotes and colon in these strings, to
avoid three WriteByte calls in the loop hot path.

Also added a few tests, to ensure that the behavior in these edge cases
is not broken. The output of the tests is the same if this optimization
is reverted.

name           old time/op    new time/op    delta
CodeEncoder-4    7.12ms ± 0%    6.14ms ± 0%  -13.85%  (p=0.004 n=6+5)

name           old speed      new speed      delta
CodeEncoder-4   272MB/s ± 0%   316MB/s ± 0%  +16.08%  (p=0.004 n=6+5)

name           old alloc/op   new alloc/op   delta
CodeEncoder-4    91.9kB ± 0%    93.2kB ± 0%   +1.43%  (p=0.002 n=6+6)

name           old allocs/op  new allocs/op  delta
CodeEncoder-4      0.00           0.00          ~     (all equal)

Updates #5683.

Change-Id: I6f6a340d0de4670799ce38cf95b2092822d2e3ef
Reviewed-on: https://go-review.googlesource.com/122460
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarBrad Fitzpatrick <bradfitz@golang.org>
parent e7f59f02
......@@ -142,7 +142,7 @@ var (
umstructXY = ustructText{unmarshalerText{"x", "y"}}
ummapType = map[unmarshalerText]bool{}
ummapXY = map[unmarshalerText]bool{unmarshalerText{"x", "y"}: true}
ummapXY = map[unmarshalerText]bool{{"x", "y"}: true}
)
// Test data structures for anonymous fields.
......
......@@ -641,8 +641,11 @@ func (se *structEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) {
} else {
e.WriteByte(',')
}
e.string(f.name, opts.escapeHTML)
e.WriteByte(':')
if opts.escapeHTML {
e.WriteString(f.nameEscHTML)
} else {
e.WriteString(f.nameNonEsc)
}
opts.quoted = f.quoted
se.fieldEncs[i](e, fv, opts)
}
......@@ -1036,6 +1039,9 @@ type field struct {
nameBytes []byte // []byte(name)
equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent
nameNonEsc string // `"` + name + `":`
nameEscHTML string // `"` + HTMLEscape(name) + `":`
tag bool
index []int
typ reflect.Type
......@@ -1086,6 +1092,9 @@ func typeFields(t reflect.Type) []field {
// Fields found.
var fields []field
// Buffer to run HTMLEscape on field names.
var nameEscBuf bytes.Buffer
for len(next) > 0 {
current, next = next, current[:0]
count, nextCount = nextCount, map[reflect.Type]int{}
......@@ -1152,14 +1161,24 @@ func typeFields(t reflect.Type) []field {
if name == "" {
name = sf.Name
}
fields = append(fields, fillField(field{
field := fillField(field{
name: name,
tag: tagged,
index: index,
typ: ft,
omitEmpty: opts.Contains("omitempty"),
quoted: quoted,
}))
})
// Build nameEscHTML and nameNonEsc ahead of time.
nameEscBuf.Reset()
nameEscBuf.WriteString(`"`)
HTMLEscape(&nameEscBuf, field.nameBytes)
nameEscBuf.WriteString(`":`)
field.nameEscHTML = nameEscBuf.String()
field.nameNonEsc = `"` + field.name + `":`
fields = append(fields, field)
if count[f.typ] > 1 {
// If there were multiple instances, add a second,
// so that the annihilation code will see a duplicate.
......
......@@ -995,3 +995,18 @@ func TestMarshalPanic(t *testing.T) {
Marshal(&marshalPanic{})
t.Error("Marshal should have panicked")
}
func TestMarshalUncommonFieldNames(t *testing.T) {
v := struct {
A0, À, int
}{}
b, err := Marshal(v)
if err != nil {
t.Fatal("Marshal:", err)
}
want := `{"A0":0,"À":0,"Aβ":0}`
got := string(b)
if got != want {
t.Fatalf("Marshal: got %s want %s", got, want)
}
}
......@@ -93,6 +93,10 @@ func TestEncoderIndent(t *testing.T) {
func TestEncoderSetEscapeHTML(t *testing.T) {
var c C
var ct CText
var tagStruct struct {
Valid int `json:"<>&#! "`
Invalid int `json:"\\"`
}
for _, tt := range []struct {
name string
v interface{}
......@@ -102,6 +106,11 @@ func TestEncoderSetEscapeHTML(t *testing.T) {
{"c", c, `"\u003c\u0026\u003e"`, `"<&>"`},
{"ct", ct, `"\"\u003c\u0026\u003e\""`, `"\"<&>\""`},
{`"<&>"`, "<&>", `"\u003c\u0026\u003e"`, `"<&>"`},
{
"tagStruct", tagStruct,
`{"\u003c\u003e\u0026#! ":0,"Invalid":0}`,
`{"<>&#! ":0,"Invalid":0}`,
},
} {
var buf bytes.Buffer
enc := NewEncoder(&buf)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment