Commit f17e3d22 authored by Mike Samuel's avatar Mike Samuel

exp/template/html: handle custom attrs and HTML5 embedded elements.

HTML5 allows embedded SVG and MathML.
Code searches show SVG is used for graphing.

This changes transition to deal with constructs like
   <svg xmlns:xlink="http://www.w3.org/1999/xlink">
It changes attr and clients to call a single function that combines
the name lookup and "on" prefix check to determine an attribute
value type given an attribute name.

That function uses heuristics to recognize that
     xlink:href and svg:href
have URL content, and that data-url is likely contains URL content,
since "javascript:" injection is such a problem.

I did a code search over a closure templates codebase to determine
patterns of custom attribute usage.  I did something like

$ find . -name \*.soy | \
    xargs egrep perl -ne 'while (s/\b((data-|\w+:)\w+)\s*=//) { print "$1\n"; }' | \
    sort | uniq

to produce the list at the bottom.

Filtering that by egrep -i 'src|url|uri' produces

data-docConsumptionUri
data-docIconUrl
data-launchUrl
data-lazySrc
data-pageUrl
data-shareurl
data-suggestServerUrl
data-tweetUrl
g:secondaryurls
g:url

which seem to match all the ones that are likely URL content.
There are some short words that match that heuristic, but I still think it decent since
any custom attribute that has a numeric or enumerated keyword value will be unaffected by
the URL assumption.
Counterexamples from /usr/share/dict:
during, hourly, maturity, nourish, purloin, security, surly

Custom attributes present in existing closure templates codebase:
buzz:aid
data-a
data-action
data-actor
data-allowEqualityOps
data-analyticsId
data-bid
data-c
data-cartId
data-categoryId
data-cid
data-command
data-count
data-country
data-creativeId
data-cssToken
data-dest
data-docAttribution
data-docConsumptionUri
data-docCurrencyCode
data-docIconUrl
data-docId
data-docPrice
data-docPriceMicros
data-docTitle
data-docType
data-docid
data-email
data-entityid
data-errorindex
data-f
data-feature
data-fgid
data-filter
data-fireEvent
data-followable
data-followed
data-hashChange
data-height
data-hover
data-href
data-id
data-index
data-invitable
data-isFree
data-isPurchased
data-jid
data-jumpid
data-launchUrl
data-lazySrc
data-listType
data-maxVisiblePages
data-name
data-nid
data-nodeid
data-numItems
data-numPerPage
data-offerType
data-oid
data-opUsesEquality
data-overflowclass
data-packageName
data-pageId
data-pageUrl
data-pos
data-priceBrief
data-profileIds
data-query
data-rating
data-ref
data-rentalGrantPeriodDays
data-rentalactivePeriodHours
data-reviewId
data-role
data-score
data-shareurl
data-showGeLe
data-showLineInclude
data-size
data-sortval
data-suggestServerType
data-suggestServerUrl
data-suggestionIndex
data-tabBarId
data-tabBarIndex
data-tags
data-target
data-textColor
data-theme
data-title
data-toggletarget
data-tooltip
data-trailerId
data-transactionId
data-transition
data-ts
data-tweetContent
data-tweetUrl
data-type
data-useAjax
data-value
data-width
data-x
dm:index
dm:type
g:aspects
g:decorateusingsecondary
g:em
g:entity
g:groups
g:id
g:istoplevel
g:li
g:numresults
g:oid
g:parentId
g:pl
g:pt
g:rating_override
g:secondaryurls
g:sortby
g:startindex
g:target
g:type
g:url
g:value
ga:barsize
ga:css
ga:expandAfterCharsExceed
ga:initialNumRows
ga:nocancelicon
ga:numRowsToExpandTo
ga:type
ga:unlockwhenrated
gw:address
gw:businessname
gw:comment
gw:phone
gw:source
ng:controller
xlink:href
xml:lang
xmlns:atom
xmlns:dc
xmlns:jstd
xmlns:ng
xmlns:og
xmlns:webstore
xmlns:xlink

R=nigeltao
CC=golang-dev
https://golang.org/cl/5119041
parent 582bb304
This diff is collapsed.
......@@ -1400,6 +1400,66 @@ func TestEscapeText(t *testing.T) {
`<style>value`,
context{state: stateCSS, element: elementStyle},
},
{
`<a xlink:href`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a xmlns`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a xmlns:foo`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a xmlnsxyz`,
context{state: stateAttrName},
},
{
`<a data-url`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a data-iconUri`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a data-urlItem`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a g:`,
context{state: stateAttrName},
},
{
`<a g:url`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a g:iconUri`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a g:urlItem`,
context{state: stateAttrName, attr: attrURL},
},
{
`<a g:value`,
context{state: stateAttrName},
},
{
`<a svg:style='`,
context{state: stateCSS, delim: delimSingleQuote},
},
{
`<svg:font-face`,
context{state: stateTag},
},
{
`<svg:a svg:onclick="`,
context{state: stateJS, delim: delimDoubleQuote},
},
}
for _, test := range tests {
......
......@@ -230,7 +230,7 @@ func htmlNameFilter(args ...interface{}) string {
return filterFailsafe
}
s = strings.ToLower(s)
if t := attrType[s]; t != contentTypePlain && attrType["on"+s] != contentTypeJS {
if t := attrType(s); t != contentTypePlain {
// TODO: Split attr and element name part filters so we can whitelist
// attributes.
return filterFailsafe
......
......@@ -106,18 +106,13 @@ func tTag(c context, s []byte) (context, int) {
err: errorf(ErrBadHTML, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
}, len(s)
}
canonAttrName := strings.ToLower(string(s[i:j]))
switch attrType[canonAttrName] {
switch attrType(string(s[i:j])) {
case contentTypeURL:
attr = attrURL
case contentTypeCSS:
attr = attrStyle
case contentTypeJS:
attr = attrScript
default:
if strings.HasPrefix(canonAttrName, "on") {
attr = attrScript
}
}
if j == len(s) {
state = stateAttrName
......@@ -512,16 +507,34 @@ var elementNameMap = map[string]element{
"title": elementTitle,
}
// asciiAlpha returns whether c is an ASCII letter.
func asciiAlpha(c byte) bool {
return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
}
// asciiAlphaNum returns whether c is an ASCII letter or digit.
func asciiAlphaNum(c byte) bool {
return asciiAlpha(c) || '0' <= c && c <= '9'
}
// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
func eatTagName(s []byte, i int) (int, element) {
j := i
for ; j < len(s); j++ {
if i == len(s) || !asciiAlpha(s[i]) {
return i, elementNone
}
j := i + 1
for j < len(s) {
x := s[j]
if !(('a' <= x && x <= 'z') ||
('A' <= x && x <= 'Z') ||
('0' <= x && x <= '9' && i != j)) {
break
if asciiAlphaNum(x) {
j++
continue
}
// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
j += 2
continue
}
break
}
return j, elementNameMap[strings.ToLower(string(s[i:j]))]
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment