Commit aa9a81b1 authored by Andrew Balholm's avatar Andrew Balholm Committed by Nigel Tao

exp/html: discard tags that are terminated by EOF instead of by '>'

If a tag doesn't have a closing '>', it isn't considered a tag;
it is just ignored and EOF is returned instead.

Pass one additional test in the test suite.

Change tokenizer tests to match correct behavior.

R=nigeltao
CC=golang-dev
https://golang.org/cl/6454131
parent a9d0ff6e
PASS "<foo bar=qux/>"
PASS "<p id=\"status\"><noscript><strong>A</strong></noscript><span>B</span></p>"
PASS "<div><sarcasm><div></div></sarcasm></div>"
FAIL "<html><body><img src=\"\" border=\"0\" alt=\"><div>A</div></body></html>"
PASS "<html><body><img src=\"\" border=\"0\" alt=\"><div>A</div></body></html>"
PASS "<table><td></tbody>A"
PASS "<table><td></thead>A"
PASS "<table><td></tfoot>A"
......
......@@ -692,7 +692,7 @@ loop:
// been consumed, where 'a' means anything in [A-Za-z].
func (z *Tokenizer) readStartTag() TokenType {
z.readTag(true)
if z.err != nil && len(z.attr) == 0 {
if z.err != nil {
return ErrorToken
}
// Several tags flag the tokenizer's next token as raw.
......@@ -948,7 +948,11 @@ loop:
}
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
z.readTag(false)
z.tt = EndTagToken
if z.err != nil {
z.tt = ErrorToken
} else {
z.tt = EndTagToken
}
return z.tt
}
z.raw.end--
......
......@@ -128,7 +128,7 @@ var tokenTests = []tokenTest{
{
"tag name eof #4",
`<a x`,
`<a x="">`,
``,
},
// Some malformed tags that are missing a '>'.
{
......@@ -144,12 +144,12 @@ var tokenTests = []tokenTest{
{
"malformed tag #2",
`<p id`,
`<p id="">`,
``,
},
{
"malformed tag #3",
`<p id=`,
`<p id="">`,
``,
},
{
"malformed tag #4",
......@@ -159,7 +159,7 @@ var tokenTests = []tokenTest{
{
"malformed tag #5",
`<p id=0`,
`<p id="0">`,
``,
},
{
"malformed tag #6",
......@@ -169,13 +169,18 @@ var tokenTests = []tokenTest{
{
"malformed tag #7",
`<p id="0</p>`,
`<p id="0&lt;/p&gt;">`,
``,
},
{
"malformed tag #8",
`<p id="0"</p>`,
`<p id="0" <="" p="">`,
},
{
"malformed tag #9",
`<p></p id`,
`<p>`,
},
// Raw text and RCDATA.
{
"basic raw text",
......@@ -205,7 +210,7 @@ var tokenTests = []tokenTest{
{
"' ' completes script end tag",
"<SCRIPT>a</SCRipt ",
"<script>$a$</script>",
"<script>$a",
},
{
"'>' completes script end tag",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment