Commit f4e5f364 authored by Brad Fitzpatrick's avatar Brad Fitzpatrick

html: parse empty, unquoted, and single-quoted attribute values

Fixes #1391

R=nigeltao
CC=golang-dev
https://golang.org/cl/4453054
parent 54731036
...@@ -331,10 +331,10 @@ func (z *Tokenizer) trim(i int) int { ...@@ -331,10 +331,10 @@ func (z *Tokenizer) trim(i int) int {
return k return k
} }
// lower finds the largest alphabetic [0-9A-Za-z]* word at the start of z.buf[i:] // word finds the largest alphabetic [0-9A-Za-z]* word at the start
// and returns that word lower-cased, as well as the trimmed cursor location // of z.buf[i:] and returns that word (optionally lower-cased), as
// after that word. // well as the trimmed cursor location after that word.
func (z *Tokenizer) lower(i int) ([]byte, int) { func (z *Tokenizer) word(i int, lower bool) ([]byte, int) {
i0 := i i0 := i
loop: loop:
for ; i < z.p1; i++ { for ; i < z.p1; i++ {
...@@ -343,7 +343,9 @@ loop: ...@@ -343,7 +343,9 @@ loop:
case '0' <= c && c <= '9': case '0' <= c && c <= '9':
// No-op. // No-op.
case 'A' <= c && c <= 'Z': case 'A' <= c && c <= 'Z':
if lower {
z.buf[i] = c + 'a' - 'A' z.buf[i] = c + 'a' - 'A'
}
case 'a' <= c && c <= 'z': case 'a' <= c && c <= 'z':
// No-op. // No-op.
default: default:
...@@ -388,7 +390,7 @@ func (z *Tokenizer) TagName() (name []byte, hasAttr bool) { ...@@ -388,7 +390,7 @@ func (z *Tokenizer) TagName() (name []byte, hasAttr bool) {
if z.buf[i] == '/' { if z.buf[i] == '/' {
i++ i++
} }
name, z.p0 = z.lower(i) name, z.p0 = z.word(i, true)
hasAttr = z.p0 != z.p1 hasAttr = z.p0 != z.p1
return return
} }
...@@ -397,23 +399,36 @@ func (z *Tokenizer) TagName() (name []byte, hasAttr bool) { ...@@ -397,23 +399,36 @@ func (z *Tokenizer) TagName() (name []byte, hasAttr bool) {
// attribute for the current tag token and whether there are more attributes. // attribute for the current tag token and whether there are more attributes.
// The contents of the returned slices may change on the next call to Next. // The contents of the returned slices may change on the next call to Next.
func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) { func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
key, i := z.lower(z.p0) key, i := z.word(z.p0, true)
// Get past the "=\"". // Check for an empty attribute value.
if i == z.p1 || z.buf[i] != '=' { if i == z.p1 {
z.p0 = i
return
}
// Get past the equals and quote characters.
if z.buf[i] != '=' {
z.p0, moreAttr = i, true
return return
} }
i = z.trim(i + 1) i = z.trim(i + 1)
if i == z.p1 || z.buf[i] != '"' { if i == z.p1 {
z.p0 = i
return
}
closeQuote := z.buf[i]
if closeQuote != '\'' && closeQuote != '"' {
val, z.p0 = z.word(i, false)
moreAttr = z.p0 != z.p1
return return
} }
i = z.trim(i + 1) i = z.trim(i + 1)
// Copy and unescape everything up to the closing '"'. // Copy and unescape everything up to the closing quote.
dst, src := i, i dst, src := i, i
loop: loop:
for src < z.p1 { for src < z.p1 {
c := z.buf[src] c := z.buf[src]
switch c { switch c {
case '"': case closeQuote:
src++ src++
break loop break loop
case '&': case '&':
......
...@@ -107,6 +107,44 @@ var tokenTests = []tokenTest{ ...@@ -107,6 +107,44 @@ var tokenTests = []tokenTest{
`<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`, `<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`,
`<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`, `<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`,
}, },
// Attribute tests:
// http://dev.w3.org/html5/spec/Overview.html#attributes-0
{
"Empty attribute",
`<input disabled FOO>`,
`<input disabled="" foo="">`,
},
{
"Empty attribute, whitespace",
`<input disabled FOO >`,
`<input disabled="" foo="">`,
},
{
"Unquoted attribute value",
`<input value=yes FOO=BAR>`,
`<input value="yes" foo="BAR">`,
},
{
"Unquoted attribute value, trailing space",
`<input value=yes FOO=BAR >`,
`<input value="yes" foo="BAR">`,
},
{
"Single-quoted attribute value",
`<input value='yes' FOO='BAR'>`,
`<input value="yes" foo="BAR">`,
},
{
"Single-quoted attribute value, trailing space",
`<input value='yes' FOO='BAR' >`,
`<input value="yes" foo="BAR">`,
},
{
"Double-quoted attribute value",
`<input value="I'm an attribute" FOO="BAR">`,
`<input value="I&apos;m an attribute" foo="BAR">`,
},
} }
func TestTokenizer(t *testing.T) { func TestTokenizer(t *testing.T) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment