Commit a1c1a763 authored by Didier Spezia's avatar Didier Spezia Committed by Rob Pike

html/template: fix string iteration in replacement operations

In css, js, and html, the replacement operations are implemented
by iterating on strings (rune by rune). The for/range
statement is used. The length of the rune is required
and added to the index to properly slice the string.

This is potentially wrong because there is a discrepancy between
the result of utf8.RuneLen and the increment of the index
(set by the for/range statement). For invalid strings,
utf8.RuneLen('\ufffd') == 3, while the index is incremented
only by 1 byte.

htmlReplacer triggers a panic at slicing time for some
invalid strings.

Use a more robust iteration mechanism based on
utf8.DecodeRuneInString, and make sure the same
pattern is used for all similar functions in this
package.

Fixes #10799

Change-Id: Ibad3857b2819435d9fa564f06fc2ca8774102841
Reviewed-on: https://go-review.googlesource.com/10105Reviewed-by: 's avatarRob Pike <r@golang.org>
parent d6bbcea2
......@@ -157,56 +157,20 @@ func isCSSSpace(b byte) bool {
func cssEscaper(args ...interface{}) string {
s, _ := stringify(args...)
var b bytes.Buffer
written := 0
for i, r := range s {
r, w, written := rune(0), 0, 0
for i := 0; i < len(s); i += w {
// See comment in htmlEscaper.
r, w = utf8.DecodeRuneInString(s[i:])
var repl string
switch r {
case 0:
repl = `\0`
case '\t':
repl = `\9`
case '\n':
repl = `\a`
case '\f':
repl = `\c`
case '\r':
repl = `\d`
// Encode HTML specials as hex so the output can be embedded
// in HTML attributes without further encoding.
case '"':
repl = `\22`
case '&':
repl = `\26`
case '\'':
repl = `\27`
case '(':
repl = `\28`
case ')':
repl = `\29`
case '+':
repl = `\2b`
case '/':
repl = `\2f`
case ':':
repl = `\3a`
case ';':
repl = `\3b`
case '<':
repl = `\3c`
case '>':
repl = `\3e`
case '\\':
repl = `\\`
case '{':
repl = `\7b`
case '}':
repl = `\7d`
switch {
case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "":
repl = cssReplacementTable[r]
default:
continue
}
b.WriteString(s[written:i])
b.WriteString(repl)
written = i + utf8.RuneLen(r)
written = i + w
if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) {
b.WriteByte(' ')
}
......@@ -218,6 +182,30 @@ func cssEscaper(args ...interface{}) string {
return b.String()
}
var cssReplacementTable = []string{
0: `\0`,
'\t': `\9`,
'\n': `\a`,
'\f': `\c`,
'\r': `\d`,
// Encode HTML specials as hex so the output can be embedded
// in HTML attributes without further encoding.
'"': `\22`,
'&': `\26`,
'\'': `\27`,
'(': `\28`,
')': `\29`,
'+': `\2b`,
'/': `\2f`,
':': `\3a`,
';': `\3b`,
'<': `\3c`,
'>': `\3e`,
'\\': `\\`,
'{': `\7b`,
'}': `\7d`,
}
var expressionBytes = []byte("expression")
var mozBindingBytes = []byte("mozbinding")
......
......@@ -138,21 +138,24 @@ var htmlNospaceNormReplacementTable = []string{
// and when badRunes is true, certain bad runes are allowed through unescaped.
func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
written, b := 0, new(bytes.Buffer)
for i, r := range s {
r, w := rune(0), 0
for i := 0; i < len(s); i += w {
// Cannot use 'for range s' because we need to preserve the width
// of the runes in the input. If we see a decoding error, the input
// width will not be utf8.Runelen(r) and we will overrun the buffer.
r, w = utf8.DecodeRuneInString(s[i:])
if int(r) < len(replacementTable) {
if repl := replacementTable[r]; len(repl) != 0 {
b.WriteString(s[written:i])
b.WriteString(repl)
// Valid as long as replacementTable doesn't
// include anything above 0x7f.
written = i + utf8.RuneLen(r)
written = i + w
}
} else if badRunes {
// No-op.
// IE does not allow these ranges in unquoted attrs.
} else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
written = i + utf8.RuneLen(r)
written = i + w
}
}
if written == 0 {
......
......@@ -19,7 +19,8 @@ func TestHTMLNospaceEscaper(t *testing.T) {
`PQRSTUVWXYZ[\]^_` +
"`abcdefghijklmno" +
"pqrstuvwxyz{|}~\x7f" +
"\u00A0\u0100\u2028\u2029\ufeff\ufdec\U0001D11E")
"\u00A0\u0100\u2028\u2029\ufeff\ufdec\U0001D11E" +
"erroneous\x960") // keep at the end
want := ("&#xfffd;\x01\x02\x03\x04\x05\x06\x07" +
"\x08&#9;&#10;&#11;&#12;&#13;\x0E\x0F" +
......@@ -31,14 +32,16 @@ func TestHTMLNospaceEscaper(t *testing.T) {
`PQRSTUVWXYZ[\]^_` +
`&#96;abcdefghijklmno` +
`pqrstuvwxyz{|}~` + "\u007f" +
"\u00A0\u0100\u2028\u2029\ufeff&#xfdec;\U0001D11E")
"\u00A0\u0100\u2028\u2029\ufeff&#xfdec;\U0001D11E" +
"erroneous&#xfffd;0") // keep at the end
got := htmlNospaceEscaper(input)
if got != want {
t.Errorf("encode: want\n\t%q\nbut got\n\t%q", want, got)
}
got, want = html.UnescapeString(got), strings.Replace(input, "\x00", "\ufffd", 1)
r := strings.NewReplacer("\x00", "\ufffd", "\x96", "\ufffd")
got, want = html.UnescapeString(got), r.Replace(input)
if want != got {
t.Errorf("decode: want\n\t%q\nbut got\n\t%q", want, got)
}
......
......@@ -246,8 +246,10 @@ func jsRegexpEscaper(args ...interface{}) string {
// `\u2029`.
func replace(s string, replacementTable []string) string {
var b bytes.Buffer
written := 0
for i, r := range s {
r, w, written := rune(0), 0, 0
for i := 0; i < len(s); i += w {
// See comment in htmlEscaper.
r, w = utf8.DecodeRuneInString(s[i:])
var repl string
switch {
case int(r) < len(replacementTable) && replacementTable[r] != "":
......@@ -261,7 +263,7 @@ func replace(s string, replacementTable []string) string {
}
b.WriteString(s[written:i])
b.WriteString(repl)
written = i + utf8.RuneLen(r)
written = i + w
}
if written == 0 {
return s
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment