First cut at the parser for the new template package.

This is not a full grammar, but the pieces are there to implement whatever we converge on. R=rsc CC=golang-dev https://golang.org/cl/4629053

First cut at the parser for the new template package.
This is not a full grammar, but the pieces are there to implement whatever we converge on. R=rsc CC=golang-dev https://golang.org/cl/4629053
cd7826e5 · Rob Pike · 123549ac · cd7826e5 · cd7826e5 · cd7826e5
Commit cd7826e5 authored Jun 22, 2011 by Rob Pike
5 changed files
--- a/src/pkg/exp/template/Makefile
+++ b/src/pkg/exp/template/Makefile
@@ -7,5 +7,6 @@ include ../../../Make.inc
 TARG=template
 GOFILES=\
 	lex.go\
+	parse.go\

 include ../../../Make.pkg
--- a/src/pkg/exp/template/lex.go
+++ b/src/pkg/exp/template/lex.go
@@ -17,22 +17,77 @@ type item struct {
 	val string
 }

+func (i item) String() string {
+	switch i.typ {
+	case itemEOF:
+		return "EOF"
+	case itemError:
+		return i.val
+	}
+	if len(i.val) > 10 {
+		return fmt.Sprintf("%.10q...", i.val)
+	}
+	return fmt.Sprintf("%q", i.val)
+}
+
 // itemType identifies the type of lex item.
 type itemType int

 const (
-	itemError      itemType = iota // error occurred; value is text of error
-	itemText                       // plain text
-	itemLeftMeta                   // left meta-string
-	itemRightMeta                  // right meta-string
-	itemPipe                       // pipe symbol
-	itemIdentifier                 // alphanumeric identifier
-	itemNumber                     // number
-	itemRawString                  // raw quoted string (includes quotes)
-	itemString                     // quoted string (includes quotes)
+	itemError itemType = iota // error occurred; value is text of error
+	itemDot                   // the cursor, spelled '.'.
 	itemEOF
+	itemElse       // else keyword
+	itemEnd        // end keyword
+	itemField      // alphanumeric identifier, starting with '.'.
+	itemIdentifier // alphanumeric identifier
+	itemIf         // if keyword
+	itemLeftMeta   // left meta-string
+	itemNumber     // number
+	itemPipe       // pipe symbol
+	itemRange      // range keyword
+	itemRawString  // raw quoted string (includes quotes)
+	itemRightMeta  // right meta-string
+	itemString     // quoted string (includes quotes)
+	itemText       // plain text
 )

+// Make the types prettyprint.
+var itemName = map[itemType]string{
+	itemError:      "error",
+	itemDot:        ".",
+	itemEOF:        "EOF",
+	itemElse:       "else",
+	itemEnd:        "end",
+	itemField:      "field",
+	itemIdentifier: "identifier",
+	itemIf:         "if",
+	itemLeftMeta:   "left meta",
+	itemNumber:     "number",
+	itemPipe:       "pipe",
+	itemRange:      "range",
+	itemRawString:  "raw string",
+	itemRightMeta:  "rightMeta",
+	itemString:     "string",
+	itemText:       "text",
+}
+
+func (i itemType) String() string {
+	s := itemName[i]
+	if s == "" {
+		return fmt.Sprintf("item%d", int(i))
+	}
+	return s
+}
+
+var key = map[string]itemType{
+	".":     itemDot,
+	"else":  itemElse,
+	"end":   itemEnd,
+	"if":    itemIf,
+	"range": itemRange,
+}
+
 const eof = -1

 // stateFn represents the state of the scanner as a function that returns the next state.
@@ -51,6 +106,7 @@ type lexer struct {
 // next returns the next rune in the input.
 func (l *lexer) next() (rune int) {
 	if l.pos >= len(l.input) {
+		l.width = 0
 		return eof
 	}
 	rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
@@ -72,12 +128,11 @@ func (l *lexer) backup() {

 // emit passes an item back to the client.
 func (l *lexer) emit(t itemType) {
-	start := l.start
+	l.items <- item{t, l.input[l.start:l.pos]}
 	l.start = l.pos
-	l.items <- item{t, l.input[start:l.pos]}
 }

-// ignore discards whatever input is before this point.
+// ignore skips over the pending input before this point.
 func (l *lexer) ignore() {
 	l.start = l.pos
 }
@@ -106,13 +161,12 @@ func (l *lexer) lineNumber() int {

 // error returns an error token and terminates the scan by passing
 // back a nil pointer that will be the next state, terminating l.run.
-func (l *lexer) error(format string, args ...interface{}) stateFn {
-	format = fmt.Sprintf("%s:%d %s", l.name, l.lineNumber(), format)
+func (l *lexer) errorf(format string, args ...interface{}) stateFn {
 	l.items <- item{itemError, fmt.Sprintf(format, args...)}
 	return nil
 }

-// run lexes the input by execute state functions until nil.
+// run lexes the input by executing state functions until nil.
 func (l *lexer) run() {
 	for state := lexText; state != nil; {
 		state = state(l)
@@ -121,14 +175,14 @@ func (l *lexer) run() {
 }

 // lex launches a new scanner and returns the channel of items.
-func lex(name, input string) chan item {
+func lex(name, input string) (*lexer, chan item) {
 	l := &lexer{
 		name:  name,
 		input: input,
 		items: make(chan item),
 	}
 	go l.run()
-	return l.items
+	return l, l.items
 }

 // state functions
@@ -182,7 +236,7 @@ func lexInsideAction(l *lexer) stateFn {
 		}
 		switch r := l.next(); {
 		case r == eof || r == '\n':
-			return l.error("unclosed action")
+			return l.errorf("unclosed action")
 		case isSpace(r):
 			l.ignore()
 		case r == '|':
@@ -191,20 +245,29 @@ func lexInsideAction(l *lexer) stateFn {
 			return lexQuote
 		case r == '`':
 			return lexRawQuote
-		case r == '+' || r == '-' || r == '.' || ('0' <= r && r <= '9'):
+		case r == '.':
+			// special look-ahead for ".field" so we don't break l.backup().
+			if l.pos < len(l.input) {
+				r := l.input[l.pos]
+				if r < '0' || '9' < r {
+					return lexIdentifier // itemDot comes from the keyword table.
+				}
+			}
+			fallthrough // '.' can start a number.
+		case r == '+' || r == '-' || ('0' <= r && r <= '9'):
 			l.backup()
 			return lexNumber
 		case isAlphaNumeric(r):
 			l.backup()
 			return lexIdentifier
 		default:
-			return l.error("unrecognized character in action: %#U", r)
+			return l.errorf("unrecognized character in action: %#U", r)
 		}
 	}
 	return nil
 }

-// lexIdentifier scans an alphanumeric.
+// lexIdentifier scans an alphanumeric or field.
 func lexIdentifier(l *lexer) stateFn {
 Loop:
 	for {
@@ -213,7 +276,15 @@ Loop:
 			// absorb
 		default:
 			l.backup()
-			l.emit(itemIdentifier)
+			word := l.input[l.start:l.pos]
+			switch {
+			case key[word] != itemError:
+				l.emit(key[word])
+			case word[0] == '.':
+				l.emit(itemField)
+			default:
+				l.emit(itemIdentifier)
+			}
 			break Loop
 		}
 	}
@@ -246,7 +317,7 @@ func lexNumber(l *lexer) stateFn {
 	// Next thing mustn't be alphanumeric.
 	if isAlphaNumeric(l.peek()) {
 		l.next()
-		return l.error("bad number syntax: %q", l.input[l.start:l.pos])
+		return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
 	}
 	l.emit(itemNumber)
 	return lexInsideAction
@@ -263,7 +334,7 @@ Loop:
 			}
 			fallthrough
 		case eof, '\n':
-			return l.error("unterminated quoted string")
+			return l.errorf("unterminated quoted string")
 		case '"':
 			break Loop
 		}
@@ -278,7 +349,7 @@ Loop:
 	for {
 		switch l.next() {
 		case eof, '\n':
-			return l.error("unterminated raw quoted string")
+			return l.errorf("unterminated raw quoted string")
 		case '`':
 			break Loop
 		}

--- a/src/pkg/exp/template/lex_test.go
+++ b/src/pkg/exp/template/lex_test.go
@@ -5,33 +5,10 @@
 package template

 import (
-	"fmt"
 	"reflect"
 	"testing"
 )

-// Make the types prettyprint.
-var itemName = map[itemType]string{
-	itemError:      "Error",
-	itemText:       "Text",
-	itemLeftMeta:   "LeftMeta",
-	itemRightMeta:  "RightMeta",
-	itemPipe:       "Pipe",
-	itemIdentifier: "Identifier",
-	itemNumber:     "Number",
-	itemRawString:  "RawString",
-	itemString:     "String",
-	itemEOF:        "EOF",
-}
-
-func (i itemType) String() string {
-	s := itemName[i]
-	if s == "" {
-		return fmt.Sprintf("item%d", int(i))
-	}
-	return s
-}
-
 type lexTest struct {
 	name  string
 	input string
@@ -42,6 +19,7 @@ var (
 	tEOF      = item{itemEOF, ""}
 	tLeft     = item{itemLeftMeta, "{{"}
 	tRight    = item{itemRightMeta, "}}"}
+	tRange    = item{itemRange, "range"}
 	tPipe     = item{itemPipe, "|"}
 	tFor      = item{itemIdentifier, "for"}
 	tQuote    = item{itemString, `"abc \n\t\" "`}
@@ -68,6 +46,25 @@ var lexTests = []lexTest{
 		tRight,
 		tEOF,
 	}},
+	{"dots", "{{.x . .2 .x.y }}", []item{
+		tLeft,
+		{itemField, ".x"},
+		{itemDot, "."},
+		{itemNumber, ".2"},
+		{itemField, ".x"},
+		{itemField, ".y"},
+		tRight,
+		tEOF,
+	}},
+	{"keywords", "{{range if else end}}", []item{
+		tLeft,
+		{itemRange, "range"},
+		{itemIf, "if"},
+		{itemElse, "else"},
+		{itemEnd, "end"},
+		tRight,
+		tEOF,
+	}},
 	{"pipeline", `intro {{echo hi 1.2 |noargs|args 1 "hi"}} outro`, []item{
 		{itemText, "intro "},
 		tLeft,
@@ -88,29 +85,35 @@ var lexTests = []lexTest{
 	{"badchar", "#{{#}}", []item{
 		{itemText, "#"},
 		tLeft,
-		{itemError, "badchar:1 unrecognized character in action: U+0023 '#'"},
+		{itemError, "unrecognized character in action: U+0023 '#'"},
 	}},
 	{"unclosed action", "{{\n}}", []item{
 		tLeft,
-		{itemError, "unclosed action:2 unclosed action"},
+		{itemError, "unclosed action"},
+	}},
+	{"EOF in action", "{{range", []item{
+		tLeft,
+		tRange,
+		{itemError, "unclosed action"},
 	}},
 	{"unclosed quote", "{{\"\n\"}}", []item{
 		tLeft,
-		{itemError, "unclosed quote:2 unterminated quoted string"},
+		{itemError, "unterminated quoted string"},
 	}},
 	{"unclosed raw quote", "{{`xx\n`}}", []item{
 		tLeft,
-		{itemError, "unclosed raw quote:2 unterminated raw quoted string"},
+		{itemError, "unterminated raw quoted string"},
 	}},
 	{"bad number", "{{3k}}", []item{
 		tLeft,
-		{itemError, `bad number:1 bad number syntax: "3k"`},
+		{itemError, `bad number syntax: "3k"`},
 	}},
 }

 // collect gathers the emitted items into a slice.
 func collect(t *lexTest) (items []item) {
-	for i := range lex(t.name, t.input) {
+	_, tokens := lex(t.name, t.input)
+	for i := range tokens {
 		items = append(items, i)
 	}
 	return
@@ -120,7 +123,7 @@ func TestLex(t *testing.T) {
 	for _, test := range lexTests {
 		items := collect(&test)
 		if !reflect.DeepEqual(items, test.items) {
-			t.Errorf("%s: got\n\t%v; expected\n\t%v", test.name, items, test.items)
+			t.Errorf("%s: got\n\t%v\nexpected\n\t%v", test.name, items, test.items)
 		}
 	}
 }
--- a/src/pkg/exp/template/parse.go
+++ b/src/pkg/exp/template/parse.go
--- a/src/pkg/exp/template/parse_test.go
+++ b/src/pkg/exp/template/parse_test.go
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+	"fmt"
+	"testing"
+)
+
+const dumpErrors = true
+
+type numberTest struct {
+	text      string
+	isInt     bool
+	isUint    bool
+	isFloat   bool
+	imaginary bool
+	int64
+	uint64
+	float64
+}
+
+var numberTests = []numberTest{
+	// basics
+	{"0", true, true, true, false, 0, 0, 0},
+	{"73", true, true, true, false, 73, 73, 73},
+	{"-73", true, false, true, false, -73, 0, -73},
+	{"+73", true, false, true, false, 73, 0, 73},
+	{"100", true, true, true, false, 100, 100, 100},
+	{"1e9", true, true, true, false, 1e9, 1e9, 1e9},
+	{"-1e9", true, false, true, false, -1e9, 0, -1e9},
+	{"-1.2", false, false, true, false, 0, 0, -1.2},
+	{"1e19", false, true, true, false, 0, 1e19, 1e19},
+	{"-1e19", false, false, true, false, 0, 0, -1e19},
+	{"4i", false, false, true, true, 0, 0, 4},
+	// funny bases
+	{"0123", true, true, true, false, 0123, 0123, 0123},
+	{"-0x0", true, false, true, false, 0, 0, 0},
+	{"0xdeadbeef", true, true, true, false, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef},
+	// some broken syntax
+	{text: "+-2"},
+	{text: "0x123."},
+	{text: "1e."},
+	{text: "0xi."},
+}
+
+func TestNumberParse(t *testing.T) {
+	for _, test := range numberTests {
+		n, err := newNumber(test.text)
+		ok := test.isInt || test.isUint || test.isFloat
+		if ok && err != nil {
+			t.Errorf("unexpected error for %q", test.text)
+			continue
+		}
+		if !ok && err == nil {
+			t.Errorf("expected error for %q", test.text)
+			continue
+		}
+		if !ok {
+			continue
+		}
+		if n.imaginary != test.imaginary {
+			t.Errorf("imaginary incorrect for %q; should be %t", test.text, test.imaginary)
+		}
+		if test.isInt {
+			if !n.isInt {
+				t.Errorf("expected integer for %q", test.text)
+			}
+			if n.int64 != test.int64 {
+				t.Errorf("int64 for %q should be %d is %d", test.text, test.int64, n.int64)
+			}
+		} else if n.isInt {
+			t.Errorf("did not expect integer for %q", test.text)
+		}
+		if test.isUint {
+			if !n.isUint {
+				t.Errorf("expected unsigned integer for %q", test.text)
+			}
+			if n.uint64 != test.uint64 {
+				t.Errorf("uint64 for %q should be %d is %d", test.text, test.uint64, n.uint64)
+			}
+		} else if n.isUint {
+			t.Errorf("did not expect unsigned integer for %q", test.text)
+		}
+		if test.isFloat {
+			if !n.isFloat {
+				t.Errorf("expected float for %q", test.text)
+			}
+			if n.float64 != test.float64 {
+				t.Errorf("float64 for %q should be %g is %g", test.text, test.float64, n.float64)
+			}
+		} else if n.isFloat {
+			t.Errorf("did not expect float for %q", test.text)
+		}
+	}
+}
+
+func num(s string) *numberNode {
+	n, err := newNumber(s)
+	if err != nil {
+		panic(err)
+	}
+	return n
+}
+
+type parseTest struct {
+	name   string
+	input  string
+	ok     bool
+	result string
+}
+
+const (
+	noError  = true
+	hasError = false
+)
+
+var parseTests = []parseTest{
+	{"empty", "", noError,
+		`[]`},
+	{"spaces", " \t\n", noError,
+		`[(text: " \t\n")]`},
+	{"text", "some text", noError,
+		`[(text: "some text")]`},
+	{"emptyMeta", "{{}}", noError,
+		`[(action: [])]`},
+	{"simple command", "{{hello}}", noError,
+		`[(action: [(command: [I=hello])])]`},
+	{"multi-word command", "{{hello world}}", noError,
+		`[(action: [(command: [I=hello I=world])])]`},
+	{"multi-word command with number", "{{hello 80}}", noError,
+		`[(action: [(command: [I=hello N=80])])]`},
+	{"multi-word command with string", "{{hello `quoted text`}}", noError,
+		"[(action: [(command: [I=hello S=`quoted text`])])]"},
+	{"pipeline", "{{hello|world}}", noError,
+		`[(action: [(command: [I=hello]) (command: [I=world])])]`},
+	{"simple range", "{{range .x}}hello{{end}}", noError,
+		`[({{range F=.x}} [(text: "hello")])]`},
+	{"nested range", "{{range .x}}hello{{range .y}}goodbye{{end}}{{end}}", noError,
+		`[({{range F=.x}} [(text: "hello")({{range F=.y}} [(text: "goodbye")])])]`},
+	{"range with else", "{{range .x}}true{{else}}false{{end}}", noError,
+		`[({{range F=.x}} [(text: "true")] {{else}} [(text: "false")])]`},
+	// Errors.
+	{"unclosed action", "hello{{range", hasError, ""},
+	{"not a field", "hello{{range x}}{{end}}", hasError, ""},
+	{"missing end", "hello{{range .x}}", hasError, ""},
+	{"missing end after else", "hello{{range .x}}{{else}}", hasError, ""},
+}
+
+func TestParse(t *testing.T) {
+	for _, test := range parseTests {
+		tmpl := New(test.name)
+		err := tmpl.Parse(test.input)
+		switch {
+		case err == nil && !test.ok:
+			t.Errorf("%q: expected error; got none", test.name)
+			continue
+		case err != nil && test.ok:
+			t.Errorf("%q: unexpected error: %v", test.name, err)
+			continue
+		case err != nil && !test.ok:
+			// expected error, got one
+			if dumpErrors {
+				fmt.Printf("%s: %s\n\t%s\n", test.name, test.input, err)
+			}
+			continue
+		}
+		result := tmpl.root.String()
+		if result != test.result {
+			t.Errorf("%s=(%q): got\n\t%v\nexpected\n\t%v", test.name, test.input, result, test.result)
+		}
+	}
+}