Commit 748fab9d authored by Nigel Tao's avatar Nigel Tao

html: foreign element HTML integration points, tag name adjustment,

shorten the MathML namespace abbreviation from "mathml" to "math".
Python's html5lib uses "mathml", but I think that that is an internal
implementation detail; the test cases use "math".

Pass tests10.dat, test 30:
<div><svg><path><foreignObject><math></div>a

| <html>
|   <head>
|   <body>
|     <div>
|       <svg svg>
|         <svg path>
|           <svg foreignObject>
|             <math math>
|               "a"

R=andybalholm
CC=golang-dev
https://golang.org/cl/5529044
parent 0ad241dd
......@@ -23,6 +23,23 @@ func adjustForeignAttributes(aa []Attribute) {
}
}
func htmlIntegrationPoint(n *Node) bool {
if n.Type != ElementNode {
return false
}
switch n.Namespace {
case "math":
// TODO: annotation-xml elements whose start tags have "text/html" or
// "application/xhtml+xml" encodings.
case "svg":
switch n.Data {
case "desc", "foreignObject", "title":
return true
}
}
return false
}
// Section 12.2.5.5.
var breakout = map[string]bool{
"b": true,
......@@ -72,4 +89,44 @@ var breakout = map[string]bool{
"var": true,
}
// TODO: add look-up tables for MathML and SVG adjustments.
// Section 12.2.5.5.
var svgTagNameAdjustments = map[string]string{
"altglyph": "altGlyph",
"altglyphdef": "altGlyphDef",
"altglyphitem": "altGlyphItem",
"animatecolor": "animateColor",
"animatemotion": "animateMotion",
"animatetransform": "animateTransform",
"clippath": "clipPath",
"feblend": "feBlend",
"fecolormatrix": "feColorMatrix",
"fecomponenttransfer": "feComponentTransfer",
"fecomposite": "feComposite",
"feconvolvematrix": "feConvolveMatrix",
"fediffuselighting": "feDiffuseLighting",
"fedisplacementmap": "feDisplacementMap",
"fedistantlight": "feDistantLight",
"feflood": "feFlood",
"fefunca": "feFuncA",
"fefuncb": "feFuncB",
"fefuncg": "feFuncG",
"fefuncr": "feFuncR",
"fegaussianblur": "feGaussianBlur",
"feimage": "feImage",
"femerge": "feMerge",
"femergenode": "feMergeNode",
"femorphology": "feMorphology",
"feoffset": "feOffset",
"fepointlight": "fePointLight",
"fespecularlighting": "feSpecularLighting",
"fespotlight": "feSpotLight",
"fetile": "feTile",
"feturbulence": "feTurbulence",
"foreignobject": "foreignObject",
"glyphref": "glyphRef",
"lineargradient": "linearGradient",
"radialgradient": "radialGradient",
"textpath": "textPath",
}
// TODO: add look-up tables for MathML and SVG attribute adjustments.
......@@ -26,6 +26,10 @@ var scopeMarker = Node{Type: scopeMarkerNode}
// content for text) and are part of a tree of Nodes. Element nodes may also
// have a Namespace and contain a slice of Attributes. Data is unescaped, so
// that it looks like "a<b" rather than "a&lt;b".
//
// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
// "svg" is short for "http://www.w3.org/2000/svg".
type Node struct {
Parent *Node
Child []*Node
......
......@@ -51,58 +51,87 @@ func (p *parser) top() *Node {
return p.doc
}
// stopTags for use in popUntil. These come from section 12.2.3.2.
// Stop tags for use in popUntil. These come from section 12.2.3.2.
var (
defaultScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}
listItemScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object", "ol", "ul"}
buttonScopeStopTags = []string{"applet", "caption", "html", "table", "td", "th", "marquee", "object", "button"}
tableScopeStopTags = []string{"html", "table"}
defaultScopeStopTags = map[string][]string{
"": {"applet", "caption", "html", "table", "td", "th", "marquee", "object"},
"math": {"annotation-xml", "mi", "mn", "mo", "ms", "mtext"},
"svg": {"desc", "foreignObject", "title"},
}
)
// stopTags for use in clearStackToContext.
var (
tableRowContextStopTags = []string{"tr", "html"}
type scope int
const (
defaultScope scope = iota
listItemScope
buttonScope
tableScope
tableRowScope
)
// popUntil pops the stack of open elements at the highest element whose tag
// is in matchTags, provided there is no higher element in stopTags. It returns
// whether or not there was such an element. If there was not, popUntil leaves
// the stack unchanged.
// is in matchTags, provided there is no higher element in the scope's stop
// tags (as defined in section 12.2.3.2). It returns whether or not there was
// such an element. If there was not, popUntil leaves the stack unchanged.
//
// For example, if the stack was:
// For example, the set of stop tags for table scope is: "html", "table". If
// the stack was:
// ["html", "body", "font", "table", "b", "i", "u"]
// then popUntil([]string{"html, "table"}, "font") would return false, but
// popUntil([]string{"html, "table"}, "i") would return true and the resultant
// stack would be:
// then popUntil(tableScope, "font") would return false, but
// popUntil(tableScope, "i") would return true and the stack would become:
// ["html", "body", "font", "table", "b"]
//
// If an element's tag is in both stopTags and matchTags, then the stack will
// be popped and the function returns true (provided, of course, there was no
// higher element in the stack that was also in stopTags). For example,
// popUntil([]string{"html, "table"}, "table") would return true and leave:
// If an element's tag is in both the stop tags and matchTags, then the stack
// will be popped and the function returns true (provided, of course, there was
// no higher element in the stack that was also in the stop tags). For example,
// popUntil(tableScope, "table") returns true and leaves:
// ["html", "body", "font"]
func (p *parser) popUntil(stopTags []string, matchTags ...string) bool {
if i := p.indexOfElementInScope(stopTags, matchTags...); i != -1 {
func (p *parser) popUntil(s scope, matchTags ...string) bool {
if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
p.oe = p.oe[:i]
return true
}
return false
}
// indexOfElementInScope returns the index in p.oe of the highest element
// whose tag is in matchTags that is in scope according to stopTags.
// If no matching element is in scope, it returns -1.
func (p *parser) indexOfElementInScope(stopTags []string, matchTags ...string) int {
// indexOfElementInScope returns the index in p.oe of the highest element whose
// tag is in matchTags that is in scope. If no matching element is in scope, it
// returns -1.
func (p *parser) indexOfElementInScope(s scope, matchTags ...string) int {
for i := len(p.oe) - 1; i >= 0; i-- {
tag := p.oe[i].Data
for _, t := range matchTags {
if t == tag {
return i
if p.oe[i].Namespace == "" {
for _, t := range matchTags {
if t == tag {
return i
}
}
switch s {
case defaultScope:
// No-op.
case listItemScope:
if tag == "ol" || tag == "ul" {
return -1
}
case buttonScope:
if tag == "button" {
return -1
}
case tableScope:
if tag == "html" || tag == "table" {
return -1
}
default:
panic("unreachable")
}
}
for _, t := range stopTags {
if t == tag {
return -1
switch s {
case defaultScope, listItemScope, buttonScope:
for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
if t == tag {
return -1
}
}
}
}
......@@ -111,8 +140,30 @@ func (p *parser) indexOfElementInScope(stopTags []string, matchTags ...string) i
// elementInScope is like popUntil, except that it doesn't modify the stack of
// open elements.
func (p *parser) elementInScope(stopTags []string, matchTags ...string) bool {
return p.indexOfElementInScope(stopTags, matchTags...) != -1
func (p *parser) elementInScope(s scope, matchTags ...string) bool {
return p.indexOfElementInScope(s, matchTags...) != -1
}
// clearStackToContext pops elements off the stack of open elements until a
// scope-defined element is found.
func (p *parser) clearStackToContext(s scope) {
for i := len(p.oe) - 1; i >= 0; i-- {
tag := p.oe[i].Data
switch s {
case tableScope:
if tag == "html" || tag == "table" {
p.oe = p.oe[:i+1]
return
}
case tableRowScope:
if tag == "html" || tag == "tr" {
p.oe = p.oe[:i+1]
return
}
default:
panic("unreachable")
}
}
}
// addChild adds a child node n to the top element, and pushes n onto the stack
......@@ -624,10 +675,10 @@ func inBodyIM(p *parser) bool {
case "html":
copyAttributes(p.oe[0], p.tok)
case "address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu", "nav", "ol", "p", "section", "summary", "ul":
p.popUntil(buttonScopeStopTags, "p")
p.popUntil(buttonScope, "p")
p.addElement(p.tok.Data, p.tok.Attr)
case "h1", "h2", "h3", "h4", "h5", "h6":
p.popUntil(buttonScopeStopTags, "p")
p.popUntil(buttonScope, "p")
switch n := p.top(); n.Data {
case "h1", "h2", "h3", "h4", "h5", "h6":
p.oe.pop()
......@@ -649,7 +700,7 @@ func inBodyIM(p *parser) bool {
p.addFormattingElement(p.tok.Data, p.tok.Attr)
case "nobr":
p.reconstructActiveFormattingElements()
if p.elementInScope(defaultScopeStopTags, "nobr") {
if p.elementInScope(defaultScope, "nobr") {
p.inBodyEndTagFormatting("nobr")
p.reconstructActiveFormattingElements()
}
......@@ -667,14 +718,14 @@ func inBodyIM(p *parser) bool {
p.framesetOK = false
case "table":
if !p.quirks {
p.popUntil(buttonScopeStopTags, "p")
p.popUntil(buttonScope, "p")
}
p.addElement(p.tok.Data, p.tok.Attr)
p.framesetOK = false
p.im = inTableIM
return true
case "hr":
p.popUntil(buttonScopeStopTags, "p")
p.popUntil(buttonScope, "p")
p.addElement(p.tok.Data, p.tok.Attr)
p.oe.pop()
p.acknowledgeSelfClosingTag()
......@@ -687,7 +738,7 @@ func inBodyIM(p *parser) bool {
return true
case "form":
if p.form == nil {
p.popUntil(buttonScopeStopTags, "p")
p.popUntil(buttonScope, "p")
p.addElement(p.tok.Data, p.tok.Attr)
p.form = p.top()
}
......@@ -697,7 +748,7 @@ func inBodyIM(p *parser) bool {
node := p.oe[i]
switch node.Data {
case "li":
p.popUntil(listItemScopeStopTags, "li")
p.popUntil(listItemScope, "li")
case "address", "div", "p":
continue
default:
......@@ -707,7 +758,7 @@ func inBodyIM(p *parser) bool {
}
break
}
p.popUntil(buttonScopeStopTags, "p")
p.popUntil(buttonScope, "p")
p.addElement(p.tok.Data, p.tok.Attr)
case "dd", "dt":
p.framesetOK = false
......@@ -725,13 +776,13 @@ func inBodyIM(p *parser) bool {
}
break
}
p.popUntil(buttonScopeStopTags, "p")
p.popUntil(buttonScope, "p")
p.addElement(p.tok.Data, p.tok.Attr)
case "plaintext":
p.popUntil(buttonScopeStopTags, "p")
p.popUntil(buttonScope, "p")
p.addElement(p.tok.Data, p.tok.Attr)
case "button":
p.popUntil(defaultScopeStopTags, "button")
p.popUntil(defaultScope, "button")
p.reconstructActiveFormattingElements()
p.addElement(p.tok.Data, p.tok.Attr)
p.framesetOK = false
......@@ -788,7 +839,7 @@ func inBodyIM(p *parser) bool {
}
}
p.acknowledgeSelfClosingTag()
p.popUntil(buttonScopeStopTags, "p")
p.popUntil(buttonScope, "p")
p.addElement("form", nil)
p.form = p.top()
if action != "" {
......@@ -806,23 +857,20 @@ func inBodyIM(p *parser) bool {
p.oe.pop()
p.form = nil
case "xmp":
p.popUntil(buttonScopeStopTags, "p")
p.popUntil(buttonScope, "p")
p.reconstructActiveFormattingElements()
p.framesetOK = false
p.addElement(p.tok.Data, p.tok.Attr)
case "math", "svg":
p.reconstructActiveFormattingElements()
namespace := ""
if p.tok.Data == "math" {
// TODO: adjust MathML attributes.
namespace = "mathml"
} else {
// TODO: adjust SVG attributes.
namespace = "svg"
}
adjustForeignAttributes(p.tok.Attr)
p.addElement(p.tok.Data, p.tok.Attr)
p.top().Namespace = namespace
p.top().Namespace = p.tok.Data
return true
case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
// Ignore the token.
......@@ -837,16 +885,16 @@ func inBodyIM(p *parser) bool {
p.im = afterBodyIM
return true
case "p":
if !p.elementInScope(buttonScopeStopTags, "p") {
if !p.elementInScope(buttonScope, "p") {
p.addElement("p", nil)
}
p.popUntil(buttonScopeStopTags, "p")
p.popUntil(buttonScope, "p")
case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u":
p.inBodyEndTagFormatting(p.tok.Data)
case "address", "article", "aside", "blockquote", "button", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", "menu", "nav", "ol", "pre", "section", "summary", "ul":
p.popUntil(defaultScopeStopTags, p.tok.Data)
p.popUntil(defaultScope, p.tok.Data)
case "applet", "marquee", "object":
if p.popUntil(defaultScopeStopTags, p.tok.Data) {
if p.popUntil(defaultScope, p.tok.Data) {
p.clearActiveFormattingElements()
}
case "br":
......@@ -895,7 +943,7 @@ func (p *parser) inBodyEndTagFormatting(tag string) {
p.afe.remove(formattingElement)
return
}
if !p.elementInScope(defaultScopeStopTags, tag) {
if !p.elementInScope(defaultScope, tag) {
// Ignore the tag.
return
}
......@@ -1029,35 +1077,35 @@ func inTableIM(p *parser) bool {
case StartTagToken:
switch p.tok.Data {
case "caption":
p.clearStackToContext(tableScopeStopTags)
p.clearStackToContext(tableScope)
p.afe = append(p.afe, &scopeMarker)
p.addElement(p.tok.Data, p.tok.Attr)
p.im = inCaptionIM
return true
case "tbody", "tfoot", "thead":
p.clearStackToContext(tableScopeStopTags)
p.clearStackToContext(tableScope)
p.addElement(p.tok.Data, p.tok.Attr)
p.im = inTableBodyIM
return true
case "td", "th", "tr":
p.clearStackToContext(tableScopeStopTags)
p.clearStackToContext(tableScope)
p.addElement("tbody", nil)
p.im = inTableBodyIM
return false
case "table":
if p.popUntil(tableScopeStopTags, "table") {
if p.popUntil(tableScope, "table") {
p.resetInsertionMode()
return false
}
// Ignore the token.
return true
case "colgroup":
p.clearStackToContext(tableScopeStopTags)
p.clearStackToContext(tableScope)
p.addElement(p.tok.Data, p.tok.Attr)
p.im = inColumnGroupIM
return true
case "col":
p.clearStackToContext(tableScopeStopTags)
p.clearStackToContext(tableScope)
p.addElement("colgroup", p.tok.Attr)
p.im = inColumnGroupIM
return false
......@@ -1078,7 +1126,7 @@ func inTableIM(p *parser) bool {
case EndTagToken:
switch p.tok.Data {
case "table":
if p.popUntil(tableScopeStopTags, "table") {
if p.popUntil(tableScope, "table") {
p.resetInsertionMode()
return true
}
......@@ -1105,26 +1153,13 @@ func inTableIM(p *parser) bool {
return inBodyIM(p)
}
// clearStackToContext pops elements off the stack of open elements
// until an element listed in stopTags is found.
func (p *parser) clearStackToContext(stopTags []string) {
for i := len(p.oe) - 1; i >= 0; i-- {
for _, tag := range stopTags {
if p.oe[i].Data == tag {
p.oe = p.oe[:i+1]
return
}
}
}
}
// Section 12.2.5.4.11.
func inCaptionIM(p *parser) bool {
switch p.tok.Type {
case StartTagToken:
switch p.tok.Data {
case "caption", "col", "colgroup", "tbody", "td", "tfoot", "thead", "tr":
if p.popUntil(tableScopeStopTags, "caption") {
if p.popUntil(tableScope, "caption") {
p.clearActiveFormattingElements()
p.im = inTableIM
return false
......@@ -1142,13 +1177,13 @@ func inCaptionIM(p *parser) bool {
case EndTagToken:
switch p.tok.Data {
case "caption":
if p.popUntil(tableScopeStopTags, "caption") {
if p.popUntil(tableScope, "caption") {
p.clearActiveFormattingElements()
p.im = inTableIM
}
return true
case "table":
if p.popUntil(tableScopeStopTags, "caption") {
if p.popUntil(tableScope, "caption") {
p.clearActiveFormattingElements()
p.im = inTableIM
return false
......@@ -1232,7 +1267,7 @@ func inTableBodyIM(p *parser) bool {
data = "tr"
consumed = false
case "caption", "col", "colgroup", "tbody", "tfoot", "thead":
if !p.popUntil(tableScopeStopTags, "tbody", "thead", "tfoot") {
if !p.popUntil(tableScope, "tbody", "thead", "tfoot") {
// Ignore the token.
return true
}
......@@ -1244,7 +1279,7 @@ func inTableBodyIM(p *parser) bool {
case EndTagToken:
switch p.tok.Data {
case "table":
if p.popUntil(tableScopeStopTags, "tbody", "thead", "tfoot") {
if p.popUntil(tableScope, "tbody", "thead", "tfoot") {
p.im = inTableIM
return false
}
......@@ -1280,13 +1315,13 @@ func inRowIM(p *parser) bool {
case StartTagToken:
switch p.tok.Data {
case "td", "th":
p.clearStackToContext(tableRowContextStopTags)
p.clearStackToContext(tableRowScope)
p.addElement(p.tok.Data, p.tok.Attr)
p.afe = append(p.afe, &scopeMarker)
p.im = inCellIM
return true
case "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr":
if p.popUntil(tableScopeStopTags, "tr") {
if p.popUntil(tableScope, "tr") {
p.im = inTableBodyIM
return false
}
......@@ -1298,14 +1333,14 @@ func inRowIM(p *parser) bool {
case EndTagToken:
switch p.tok.Data {
case "tr":
if p.popUntil(tableScopeStopTags, "tr") {
if p.popUntil(tableScope, "tr") {
p.im = inTableBodyIM
return true
}
// Ignore the token.
return true
case "table":
if p.popUntil(tableScopeStopTags, "tr") {
if p.popUntil(tableScope, "tr") {
p.im = inTableBodyIM
return false
}
......@@ -1350,7 +1385,7 @@ func inCellIM(p *parser) bool {
case EndTagToken:
switch p.tok.Data {
case "td", "th":
if !p.popUntil(tableScopeStopTags, p.tok.Data) {
if !p.popUntil(tableScope, p.tok.Data) {
// Ignore the token.
return true
}
......@@ -1371,7 +1406,7 @@ func inCellIM(p *parser) bool {
return true
}
if closeTheCellAndReprocess {
if p.popUntil(tableScopeStopTags, "td") || p.popUntil(tableScopeStopTags, "th") {
if p.popUntil(tableScope, "td") || p.popUntil(tableScope, "th") {
p.clearActiveFormattingElements()
p.im = inRowIM
return false
......@@ -1451,7 +1486,7 @@ func inSelectInTableIM(p *parser) bool {
case StartTagToken, EndTagToken:
switch p.tok.Data {
case "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th":
if p.tok.Type == StartTagToken || p.elementInScope(tableScopeStopTags, p.tok.Data) {
if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.Data) {
p.endSelect()
return false
} else {
......@@ -1672,6 +1707,11 @@ func parseForeignContent(p *parser) bool {
Data: p.tok.Data,
})
case StartTagToken:
if htmlIntegrationPoint(p.top()) {
inBodyIM(p)
p.resetInsertionMode()
return true
}
if breakout[p.tok.Data] {
for i := len(p.oe) - 1; i >= 0; i-- {
// TODO: HTML, MathML integration points.
......@@ -1683,10 +1723,14 @@ func parseForeignContent(p *parser) bool {
return false
}
switch p.top().Namespace {
case "mathml":
case "math":
// TODO: adjust MathML attributes.
case "svg":
// TODO: adjust SVG tag names.
// Adjust SVG tag names. The tokenizer lower-cases tag names, but
// SVG wants e.g. "foreignObject" with a capital second "O".
if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
p.tok.Data = x
}
// TODO: adjust SVG attributes.
default:
panic("html: bad parser state: unexpected namespace")
......
......@@ -184,7 +184,7 @@ func TestParser(t *testing.T) {
{"tests4.dat", -1},
{"tests5.dat", -1},
{"tests6.dat", -1},
{"tests10.dat", 30},
{"tests10.dat", 31},
}
for _, tf := range testFiles {
f, err := os.Open("testdata/webkit/" + tf.filename)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment