exp/html: discard tags that are terminated by EOF instead of by '>'

If a tag doesn't have a closing '>', it isn't considered a tag; it is just ignored and EOF is returned instead. Pass one additional test in the test suite. Change tokenizer tests to match correct behavior. R=nigeltao CC=golang-dev https://golang.org/cl/6454131

exp/html: discard tags that are terminated by EOF instead of by '>'
If a tag doesn't have a closing '>', it isn't considered a tag; it is just ignored and EOF is returned instead. Pass one additional test in the test suite. Change tokenizer tests to match correct behavior. R=nigeltao CC=golang-dev https://golang.org/cl/6454131
aa9a81b1 · Andrew Balholm · Nigel Tao · a9d0ff6e · aa9a81b1 · aa9a81b1
Commit aa9a81b1 authored Aug 13, 2012 by Andrew Balholm Committed by Nigel Tao Aug 13, 2012
Hide whitespace changes
Inline Side-by-side

Showing with 18 additions and 9 deletions

webkit02.dat.log src/pkg/exp/html/testlogs/webkit02.dat.log +1 -1

token.go src/pkg/exp/html/token.go +6 -2

token_test.go src/pkg/exp/html/token_test.go +11 -6

No files found.
--- a/src/pkg/exp/html/testlogs/webkit02.dat.log
+++ b/src/pkg/exp/html/testlogs/webkit02.dat.log
 PASS "<foo bar=qux/>"
 PASS "<p id=\"status\"><noscript><strong>A</strong></noscript><span>B</span></p>"
 PASS "<div><sarcasm><div></div></sarcasm></div>"
-FAIL "<html><body><img src=\"\" border=\"0\" alt=\"><div>A</div></body></html>"
+PASS "<html><body><img src=\"\" border=\"0\" alt=\"><div>A</div></body></html>"
 PASS "<table><td></tbody>A"
 PASS "<table><td></thead>A"
 PASS "<table><td></tfoot>A"

--- a/src/pkg/exp/html/token.go
+++ b/src/pkg/exp/html/token.go
@@ -692,7 +692,7 @@ loop:
 // been consumed, where 'a' means anything in [A-Za-z].
 func (z *Tokenizer) readStartTag() TokenType {
 	z.readTag(true)
-	if z.err != nil && len(z.attr) == 0 {
+	if z.err != nil {
 		return ErrorToken
 	}
 	// Several tags flag the tokenizer's next token as raw.
@@ -948,7 +948,11 @@ loop:
 			}
 			if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
 				z.readTag(false)
-				z.tt = EndTagToken
+				if z.err != nil {
+					z.tt = ErrorToken
+				} else {
+					z.tt = EndTagToken
+				}
 				return z.tt
 			}
 			z.raw.end--

--- a/src/pkg/exp/html/token_test.go
+++ b/src/pkg/exp/html/token_test.go
@@ -128,7 +128,7 @@ var tokenTests = []tokenTest{
 	{
 		"tag name eof #4",
 		`<a x`,
-		`<a x="">`,
+		``,
 	},
 	// Some malformed tags that are missing a '>'.
 	{
@@ -144,12 +144,12 @@ var tokenTests = []tokenTest{
 	{
 		"malformed tag #2",
 		`<p id`,
-		`<p id="">`,
+		``,
 	},
 	{
 		"malformed tag #3",
 		`<p id=`,
-		`<p id="">`,
+		``,
 	},
 	{
 		"malformed tag #4",
@@ -159,7 +159,7 @@ var tokenTests = []tokenTest{
 	{
 		"malformed tag #5",
 		`<p id=0`,
-		`<p id="0">`,
+		``,
 	},
 	{
 		"malformed tag #6",
@@ -169,13 +169,18 @@ var tokenTests = []tokenTest{
 	{
 		"malformed tag #7",
 		`<p id="0</p>`,
-		`<p id="0&lt;/p&gt;">`,
+		``,
 	},
 	{
 		"malformed tag #8",
 		`<p id="0"</p>`,
 		`<p id="0" <="" p="">`,
 	},
+	{
+		"malformed tag #9",
+		`<p></p id`,
+		`<p>`,
+	},
 	// Raw text and RCDATA.
 	{
 		"basic raw text",
@@ -205,7 +210,7 @@ var tokenTests = []tokenTest{
 	{
 		"' ' completes script end tag",
 		"<SCRIPT>a</SCRipt ",
-		"<script>$a$</script>",
+		"<script>$a",
 	},
 	{
 		"'>' completes script end tag",