cmd/compile/internal/gc: recursive-descent parser

This is a translation of the yacc-based parser with adjustements to make the grammar work for a recursive-descent parser followed by cleanups and simplifications. The yacc actions were mostly literally copied for correctness with better temporary names. A few of the syntax tests were adjusted for slightly different error messages (it is very difficult to match the yacc-based error messages in all cases, and sometimes the new parser could produce better errors). The new parser is enabled by default. To switch back to the yacc-based parser, set -oldparser. To hardwire the switch back, uncomment "oldparser = 1" in lex.go. - passes all.bash - ~18% reduced parse time per file on average for make.bash - ~3% reduced compile time for building cmd/compile Change-Id: Icb5651bb9d8b9f66261762d2c94a03793050d4ce Reviewed-on: https://go-review.googlesource.com/16665 Run-TryBot: Robert Griesemer <gri@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>

cmd/compile/internal/gc: recursive-descent parser
This is a translation of the yacc-based parser with adjustements to make the grammar work for a recursive-descent parser followed by cleanups and simplifications. The yacc actions were mostly literally copied for correctness with better temporary names. A few of the syntax tests were adjusted for slightly different error messages (it is very difficult to match the yacc-based error messages in all cases, and sometimes the new parser could produce better errors). The new parser is enabled by default. To switch back to the yacc-based parser, set -oldparser. To hardwire the switch back, uncomment "oldparser = 1" in lex.go. - passes all.bash - ~18% reduced parse time per file on average for make.bash - ~3% reduced compile time for building cmd/compile Change-Id: Icb5651bb9d8b9f66261762d2c94a03793050d4ce Reviewed-on: https://go-review.googlesource.com/16665 Run-TryBot: Robert Griesemer <gri@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>
b569b87c · Robert Griesemer · Russ Cox · b5c1b5d7 · b569b87c · b569b87c
Commit b569b87c authored Nov 04, 2015 by Robert Griesemer Committed by Russ Cox Nov 13, 2015
11 changed files
--- a/src/cmd/compile/internal/gc/go.go
+++ b/src/cmd/compile/internal/gc/go.go
@@ -628,7 +628,7 @@ var Widthint int
 var Widthreg int
-var typesw *Node
+var typesw *Node // TODO(gri) remove when yacc-based parser is gone
 var nblank *Node

--- a/src/cmd/compile/internal/gc/lex.go
+++ b/src/cmd/compile/internal/gc/lex.go
@@ -202,7 +202,8 @@ func Main() {
 	obj.Flagcount("live", "debug liveness analysis", &debuglive)
 	obj.Flagcount("m", "print optimization decisions", &Debug['m'])
 	obj.Flagcount("msan", "build code compatible with C/C++ memory sanitizer", &flag_msan)
-	obj.Flagcount("newexport", "use new export format", &newexport) // TODO remove eventually
+	obj.Flagcount("newexport", "use new export format", &newexport) // TODO(gri) remove eventually
+	obj.Flagcount("oldparser", "use old parser", &oldparser)        // TODO(gri) remove eventually
 	obj.Flagcount("nolocalimports", "reject local (relative) imports", &nolocalimports)
 	obj.Flagstr("o", "write output to `file`", &outfile)
 	obj.Flagstr("p", "set expected package import `path`", &myimportpath)
@@ -317,7 +318,19 @@ func Main() {
 	lexlineno = 1
 	const BOM = 0xFEFF
+	// Uncomment the line below to temporarily switch the compiler back
+	// to the yacc-based parser. Short-term work-around for issues with
+	// the new recursive-descent parser for which setting -oldparser is
+	// not sufficient.
+	// TODO(gri) remove this eventually
+	//
+	// oldparser = 1
 	for _, infile = range flag.Args() {
+		if trace && Debug['x'] != 0 && oldparser == 0 {
+			fmt.Printf("--- %s ---\n", infile)
+		}
 		linehistpush(infile)
 		curio.infile = infile
@@ -831,6 +844,10 @@ func importfile(f *Val, line int) {
 		curio.nlsemi = false
 		typecheckok = true
+		if oldparser == 0 {
+			push_parser()
+		}
 	case 'B':
 		// new export format
 		obj.Bgetc(imp) // skip \n after $$B
@@ -850,6 +867,10 @@ func importfile(f *Val, line int) {
 }
 func unimportfile() {
+	if oldparser == 0 {
+		pop_parser()
+	}
 	if curio.bin != nil {
 		obj.Bterm(curio.bin)
 		curio.bin = nil
@@ -879,6 +900,10 @@ func cannedimports(file string, cp string) {
 	typecheckok = true
 	incannedimport = 1
+	if oldparser == 0 {
+		push_parser()
+	}
 }
 func isSpace(c int) bool {
@@ -1358,8 +1383,10 @@ l0:
 		// a '{' with loophack == true becomes LBODY and disables loophack.
 		//
 		// I said it was clumsy.
+		//
+		// We only need the loophack when running with -oldparser.
 	case '(', '[':
-		if loophack || _yylex_lstk != nil {
+		if oldparser != 0 && (loophack || _yylex_lstk != nil) {
 			h = new(Loophack)
 			if h == nil {
 				Flusherrors()
@@ -1376,7 +1403,7 @@ l0:
 		goto lx
 	case ')', ']':
-		if _yylex_lstk != nil {
+		if oldparser != 0 && _yylex_lstk != nil {
 			h = _yylex_lstk
 			loophack = h.v
 			_yylex_lstk = h.next
@@ -1385,7 +1412,7 @@ l0:
 		goto lx
 	case '{':
-		if loophack {
+		if oldparser != 0 && loophack {
 			if Debug['x'] != 0 {
 				fmt.Printf("%v lex: LBODY\n", Ctxt.Line(int(lexlineno)))
 			}
@@ -1460,7 +1487,9 @@ talph:
 		goto l0
 	case LFOR, LIF, LSWITCH, LSELECT:
-		loophack = true // see comment about loophack above
+		if oldparser != 0 {
+			loophack = true // see comment about loophack above
+		}
 	}
 	if Debug['x'] != 0 {
@@ -1902,13 +1931,18 @@ func (yy) Error(msg string) {
 	Yyerror("%s", msg)
 }
+var oldparser int // if set, theparser is used (otherwise we use the recursive-descent parser)
 var theparser yyParser
 var parsing bool
 func yyparse() {
-	theparser = yyNewParser()
 	parsing = true
-	theparser.Parse(yy{})
+	if oldparser != 0 {
+		theparser = yyNewParser()
+		theparser.Parse(yy{})
+	} else {
+		parse_file()
+	}
 	parsing = false
 }

--- a/src/cmd/compile/internal/gc/parser.go
+++ b/src/cmd/compile/internal/gc/parser.go
--- a/src/cmd/compile/internal/gc/subr.go
+++ b/src/cmd/compile/internal/gc/subr.go
@@ -34,7 +34,7 @@ func errorexit() {
 }
 func parserline() int {
-	if parsing && theparser.Lookahead() > 0 {
+	if oldparser != 0 && parsing && theparser.Lookahead() > 0 {
 		// parser has one symbol lookahead
 		return int(prevlineno)
 	}

--- a/test/fixedbugs/bug358.go
+++ b/test/fixedbugs/bug358.go
@@ -10,13 +10,14 @@
 package main
 import (
-	"io/ioutil"	// GCCGO_ERROR "imported and not used"
+	// avoid imported and not used errors
+	// "io/ioutil"
 	"net/http"
-	"os"		// GCCGO_ERROR "imported and not used"
+	// "os"
 )
 func makeHandler(fn func(http.ResponseWriter, *http.Request, string)) http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request)  // ERROR "syntax error|invalid use of type"
+	return func(w http.ResponseWriter, r *http.Request)  // ERROR "syntax error|not an expression|invalid use of type"
 }
 type Page struct {

--- a/test/syntax/chan.go
+++ b/test/syntax/chan.go
@@ -8,10 +8,10 @@ package main
 type xyz struct {
    ch chan
-} // ERROR "unexpected .*}.* in channel type"
+} // ERROR "unexpected .*}.* in channel type|missing channel element type"
-func Foo(y chan) { // ERROR "unexpected .*\).* in channel type"
+func Foo(y chan) { // ERROR "unexpected .*\).* in channel type|missing channel element type"
 }
-func Bar(x chan, y int) { // ERROR "unexpected comma in channel type"
+func Bar(x chan, y int) { // ERROR "unexpected comma in channel type|missing channel element type"
 }
--- a/test/syntax/forvar.go
+++ b/test/syntax/forvar.go
@@ -7,4 +7,5 @@
 package main
 func main() {
+	var x int // avoid undefined: x error below with recursive-descent parser
 	for var x = 0; x < 10; x++ {	// ERROR "var declaration not allowed in for initializer"
--- a/test/syntax/semi4.go
+++ b/test/syntax/semi4.go
@@ -8,7 +8,7 @@ package main
 func main() {
 	for x		// GCCGO_ERROR "undefined"
-	{		// ERROR "missing .*{.* after for clause"
+	{		// ERROR "missing .*{.* after for clause|missing operand"
 		z	// GCCGO_ERROR "undefined"
--- a/test/syntax/semi6.go
+++ b/test/syntax/semi6.go
@@ -7,7 +7,5 @@
 package main
 type T	// ERROR "unexpected semicolon or newline in type declaration"
-{
+// line below uncommented to avoid follow-up error
+// {
\ No newline at end of file
--- a/test/syntax/semi7.go
+++ b/test/syntax/semi7.go
@@ -8,7 +8,7 @@ package main
 func main() {
 	if x { }	// GCCGO_ERROR "undefined"
-	else { }	// ERROR "unexpected semicolon or newline before .?else.?"
+	else { }	// ERROR "unexpected semicolon or newline before .?else.?|unexpected else"
 }
--- a/test/syntax/vareq1.go
+++ b/test/syntax/vareq1.go
@@ -6,5 +6,5 @@
 package main
-var x map[string]string{"a":"b"}		// ERROR "unexpected { at end of statement|expected ';' or newline after top level declaration"
+var x map[string]string{"a":"b"}		// ERROR "unexpected { at end of statement|unexpected { after top level declaration|expected ';' or newline after top level declaration"