Commit aa9a81b1 authored by Andrew Balholm's avatar Andrew Balholm Committed by Nigel Tao

exp/html: discard tags that are terminated by EOF instead of by '>'

If a tag doesn't have a closing '>', it isn't considered a tag;
it is just ignored and EOF is returned instead.

Pass one additional test in the test suite.

Change tokenizer tests to match correct behavior.

R=nigeltao
CC=golang-dev
https://golang.org/cl/6454131
parent a9d0ff6e
PASS "<foo bar=qux/>" PASS "<foo bar=qux/>"
PASS "<p id=\"status\"><noscript><strong>A</strong></noscript><span>B</span></p>" PASS "<p id=\"status\"><noscript><strong>A</strong></noscript><span>B</span></p>"
PASS "<div><sarcasm><div></div></sarcasm></div>" PASS "<div><sarcasm><div></div></sarcasm></div>"
FAIL "<html><body><img src=\"\" border=\"0\" alt=\"><div>A</div></body></html>" PASS "<html><body><img src=\"\" border=\"0\" alt=\"><div>A</div></body></html>"
PASS "<table><td></tbody>A" PASS "<table><td></tbody>A"
PASS "<table><td></thead>A" PASS "<table><td></thead>A"
PASS "<table><td></tfoot>A" PASS "<table><td></tfoot>A"
......
...@@ -692,7 +692,7 @@ loop: ...@@ -692,7 +692,7 @@ loop:
// been consumed, where 'a' means anything in [A-Za-z]. // been consumed, where 'a' means anything in [A-Za-z].
func (z *Tokenizer) readStartTag() TokenType { func (z *Tokenizer) readStartTag() TokenType {
z.readTag(true) z.readTag(true)
if z.err != nil && len(z.attr) == 0 { if z.err != nil {
return ErrorToken return ErrorToken
} }
// Several tags flag the tokenizer's next token as raw. // Several tags flag the tokenizer's next token as raw.
...@@ -948,7 +948,11 @@ loop: ...@@ -948,7 +948,11 @@ loop:
} }
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' { if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
z.readTag(false) z.readTag(false)
z.tt = EndTagToken if z.err != nil {
z.tt = ErrorToken
} else {
z.tt = EndTagToken
}
return z.tt return z.tt
} }
z.raw.end-- z.raw.end--
......
...@@ -128,7 +128,7 @@ var tokenTests = []tokenTest{ ...@@ -128,7 +128,7 @@ var tokenTests = []tokenTest{
{ {
"tag name eof #4", "tag name eof #4",
`<a x`, `<a x`,
`<a x="">`, ``,
}, },
// Some malformed tags that are missing a '>'. // Some malformed tags that are missing a '>'.
{ {
...@@ -144,12 +144,12 @@ var tokenTests = []tokenTest{ ...@@ -144,12 +144,12 @@ var tokenTests = []tokenTest{
{ {
"malformed tag #2", "malformed tag #2",
`<p id`, `<p id`,
`<p id="">`, ``,
}, },
{ {
"malformed tag #3", "malformed tag #3",
`<p id=`, `<p id=`,
`<p id="">`, ``,
}, },
{ {
"malformed tag #4", "malformed tag #4",
...@@ -159,7 +159,7 @@ var tokenTests = []tokenTest{ ...@@ -159,7 +159,7 @@ var tokenTests = []tokenTest{
{ {
"malformed tag #5", "malformed tag #5",
`<p id=0`, `<p id=0`,
`<p id="0">`, ``,
}, },
{ {
"malformed tag #6", "malformed tag #6",
...@@ -169,13 +169,18 @@ var tokenTests = []tokenTest{ ...@@ -169,13 +169,18 @@ var tokenTests = []tokenTest{
{ {
"malformed tag #7", "malformed tag #7",
`<p id="0</p>`, `<p id="0</p>`,
`<p id="0&lt;/p&gt;">`, ``,
}, },
{ {
"malformed tag #8", "malformed tag #8",
`<p id="0"</p>`, `<p id="0"</p>`,
`<p id="0" <="" p="">`, `<p id="0" <="" p="">`,
}, },
{
"malformed tag #9",
`<p></p id`,
`<p>`,
},
// Raw text and RCDATA. // Raw text and RCDATA.
{ {
"basic raw text", "basic raw text",
...@@ -205,7 +210,7 @@ var tokenTests = []tokenTest{ ...@@ -205,7 +210,7 @@ var tokenTests = []tokenTest{
{ {
"' ' completes script end tag", "' ' completes script end tag",
"<SCRIPT>a</SCRipt ", "<SCRIPT>a</SCRipt ",
"<script>$a$</script>", "<script>$a",
}, },
{ {
"'>' completes script end tag", "'>' completes script end tag",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment