Commit f4e3e5ea authored by Didier Spezia's avatar Didier Spezia Committed by Rob Pike

html/template: fix quadratic performance with special tags

The current implementation of the tSpecialTagEnd function
is inefficient since it generates plenty of memory allocations
and converts the whole buffer to lowercase at each call.

If the number of special tags increases linearly with the
template size, the complexity becomes quadratic.

This CL provides an alternative implementation.
While the algorithm is probably still not optimal, it avoids
the quadratic behavior and the memory allocations.

benchmark                          old ns/op     new ns/op     delta
BenchmarkTemplateSpecialTags-4     19326431      532190        -97.25%

benchmark                          old allocs    new allocs    delta
BenchmarkTemplateSpecialTags-4     2650          190           -92.83%

benchmark                          old bytes     new bytes     delta
BenchmarkTemplateSpecialTags-4     4106460       46568         -98.87%

While we are there, make sure we respect the HTML tokenization algorithm.
An end tag needs to be followed by a space, tab, CR, FF, /, or > as described
in https://html.spec.whatwg.org/multipage/syntax.html#tokenization
Explicitly add this check.

Fixes #10605

Change-Id: Ia33ddee164ab608a69ac4183e16ec506bbeaa54c
Reviewed-on: https://go-review.googlesource.com/9502Reviewed-by: 's avatarRob Pike <r@golang.org>
parent 79a990b8
......@@ -183,24 +183,54 @@ func tHTMLCmt(c context, s []byte) (context, int) {
// specialTagEndMarkers maps element types to the character sequence that
// case-insensitively signals the end of the special tag body.
var specialTagEndMarkers = [...]string{
elementScript: "</script",
elementStyle: "</style",
elementTextarea: "</textarea",
elementTitle: "</title",
var specialTagEndMarkers = [...][]byte{
elementScript: []byte("script"),
elementStyle: []byte("style"),
elementTextarea: []byte("textarea"),
elementTitle: []byte("title"),
}
var (
specialTagEndPrefix = []byte("</")
tagEndSeparators = []byte("> \t\n\f/")
)
// tSpecialTagEnd is the context transition function for raw text and RCDATA
// element states.
func tSpecialTagEnd(c context, s []byte) (context, int) {
if c.element != elementNone {
if i := strings.Index(strings.ToLower(string(s)), specialTagEndMarkers[c.element]); i != -1 {
if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
return context{}, i
}
}
return c, len(s)
}
// indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
func indexTagEnd(s []byte, tag []byte) int {
res := 0
plen := len(specialTagEndPrefix)
for len(s) > 0 {
// Try to find the tag end prefix first
i := bytes.Index(s, specialTagEndPrefix)
if i == -1 {
return i
}
s = s[i+plen:]
// Try to match the actual tag if there is still space for it
if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
s = s[len(tag):]
// Check the tag is followed by a proper separator
if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
return res + i
}
res += len(tag)
}
res += i + plen
}
return -1
}
// tAttr is the context transition function for the attribute state.
func tAttr(c context, s []byte) (context, int) {
return c, len(s)
......
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package template
import (
"bytes"
"strings"
"testing"
)
func TestFindEndTag(t *testing.T) {
tests := []struct {
s, tag string
want int
}{
{"", "tag", -1},
{"hello </textarea> hello", "textarea", 6},
{"hello </TEXTarea> hello", "textarea", 6},
{"hello </textAREA>", "textarea", 6},
{"hello </textarea", "textareax", -1},
{"hello </textarea>", "tag", -1},
{"hello tag </textarea", "tag", -1},
{"hello </tag> </other> </textarea> <other>", "textarea", 22},
{"</textarea> <other>", "textarea", 0},
{"<div> </div> </TEXTAREA>", "textarea", 13},
{"<div> </div> </TEXTAREA\t>", "textarea", 13},
{"<div> </div> </TEXTAREA >", "textarea", 13},
{"<div> </div> </TEXTAREAfoo", "textarea", -1},
{"</TEXTAREAfoo </textarea>", "textarea", 14},
{"<</script >", "script", 1},
{"</script>", "textarea", -1},
}
for _, test := range tests {
if got := indexTagEnd([]byte(test.s), []byte(test.tag)); test.want != got {
t.Errorf("%q/%q: want\n\t%d\nbut got\n\t%d", test.s, test.tag, test.want, got)
}
}
}
func BenchmarkTemplateSpecialTags(b *testing.B) {
r := struct {
Name, Gift string
}{"Aunt Mildred", "bone china tea set"}
h1 := "<textarea> Hello Hello Hello </textarea> "
h2 := "<textarea> <p> Dear {{.Name}},\n{{with .Gift}}Thank you for the lovely {{.}}. {{end}}\nBest wishes. </p>\n</textarea>"
html := strings.Repeat(h1, 100) + h2 + strings.Repeat(h1, 100) + h2
var buf bytes.Buffer
for i := 0; i < b.N; i++ {
tmpl := Must(New("foo").Parse(html))
if err := tmpl.Execute(&buf, r); err != nil {
b.Fatal(err)
}
buf.Reset()
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment