Commit fdc45367 authored by Gustavo Niemeyer's avatar Gustavo Niemeyer

encoding/xml: fix decoding of unknown entities in non-strict mode

Fixes #3447.

R=rsc, gustavo
CC=golang-dev
https://golang.org/cl/6039045
parent 9242a90a
......@@ -850,6 +850,8 @@ Input:
// Parsers are required to recognize lt, gt, amp, apos, and quot
// even if they have not been declared. That's all we allow.
var i int
var semicolon bool
var valid bool
for i = 0; i < len(d.tmp); i++ {
var ok bool
d.tmp[i], ok = d.getc()
......@@ -861,6 +863,8 @@ Input:
}
c := d.tmp[i]
if c == ';' {
semicolon = true
valid = i > 0
break
}
if 'a' <= c && c <= 'z' ||
......@@ -873,14 +877,25 @@ Input:
break
}
s := string(d.tmp[0:i])
if i >= len(d.tmp) {
if !valid {
if !d.Strict {
b0, b1 = 0, 0
d.buf.WriteByte('&')
d.buf.Write(d.tmp[0:i])
if semicolon {
d.buf.WriteByte(';')
}
continue Input
}
d.err = d.syntaxError("character entity expression &" + s + "... too long")
semi := ";"
if !semicolon {
semi = " (no semicolon)"
}
if i < len(d.tmp) {
d.err = d.syntaxError("invalid character entity &" + s + semi)
} else {
d.err = d.syntaxError("invalid character entity &" + s + "... too long")
}
return nil
}
var haveText bool
......@@ -910,6 +925,7 @@ Input:
b0, b1 = 0, 0
d.buf.WriteByte('&')
d.buf.Write(d.tmp[0:i])
d.buf.WriteByte(';')
continue Input
}
d.err = d.syntaxError("invalid character entity &" + s + ";")
......
......@@ -5,6 +5,7 @@
package xml
import (
"fmt"
"io"
"reflect"
"strings"
......@@ -158,6 +159,39 @@ func TestRawToken(t *testing.T) {
testRawToken(t, d, rawTokens)
}
const nonStrictInput = `
<tag>non&entity</tag>
<tag>&unknown;entity</tag>
<tag>&#123</tag>
<tag>&#zzz;</tag>
`
var nonStrictTokens = []Token{
CharData("\n"),
StartElement{Name{"", "tag"}, []Attr{}},
CharData("non&entity"),
EndElement{Name{"", "tag"}},
CharData("\n"),
StartElement{Name{"", "tag"}, []Attr{}},
CharData("&unknown;entity"),
EndElement{Name{"", "tag"}},
CharData("\n"),
StartElement{Name{"", "tag"}, []Attr{}},
CharData("&#123"),
EndElement{Name{"", "tag"}},
CharData("\n"),
StartElement{Name{"", "tag"}, []Attr{}},
CharData("&#zzz;"),
EndElement{Name{"", "tag"}},
CharData("\n"),
}
func TestNonStrictRawToken(t *testing.T) {
d := NewDecoder(strings.NewReader(nonStrictInput))
d.Strict = false
testRawToken(t, d, nonStrictTokens)
}
type downCaser struct {
t *testing.T
r io.ByteReader
......@@ -219,7 +253,18 @@ func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) {
t.Fatalf("token %d: unexpected error: %s", i, err)
}
if !reflect.DeepEqual(have, want) {
t.Errorf("token %d = %#v want %#v", i, have, want)
var shave, swant string
if _, ok := have.(CharData); ok {
shave = fmt.Sprintf("CharData(%q)", have)
} else {
shave = fmt.Sprintf("%#v", have)
}
if _, ok := want.(CharData); ok {
swant = fmt.Sprintf("CharData(%q)", want)
} else {
swant = fmt.Sprintf("%#v", want)
}
t.Errorf("token %d = %s, want %s", i, shave, swant)
}
}
}
......@@ -531,8 +576,8 @@ var characterTests = []struct {
{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
{"<doc>&\x01;</doc>", "invalid character entity &;"},
{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &;"},
{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity & (no semicolon)"},
}
func TestDisallowedCharacters(t *testing.T) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment