Commit 0da66a2e authored by Robert Griesemer's avatar Robert Griesemer

ebnf: use scanner instead of go/scanner

R=rsc, r
CC=golang-dev
https://golang.org/cl/5192043
parent b2f1eba3
......@@ -98,12 +98,12 @@ func main() {
src = extractEBNF(src)
}
grammar, err := ebnf.Parse(fset, filename, src)
grammar, err := ebnf.Parse(filename, bytes.NewBuffer(src))
if err != nil {
report(err)
}
if err = ebnf.Verify(fset, grammar, *start); err != nil {
if err = ebnf.Verify(grammar, *start); err != nil {
report(err)
}
}
......@@ -23,13 +23,39 @@
package ebnf
import (
"go/scanner"
"go/token"
"fmt"
"os"
"scanner"
"unicode"
"utf8"
)
// ----------------------------------------------------------------------------
// Error handling
type errorList []os.Error
func (list errorList) Error() os.Error {
if len(list) == 0 {
return nil
}
return list
}
func (list errorList) String() string {
switch len(list) {
case 0:
return "no errors"
case 1:
return list[0].String()
}
return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1)
}
func newError(pos scanner.Position, msg string) os.Error {
return os.NewError(fmt.Sprintf("%s: %s", pos, msg))
}
// ----------------------------------------------------------------------------
// Internal representation
......@@ -37,7 +63,7 @@ type (
// An Expression node represents a production expression.
Expression interface {
// Pos is the position of the first character of the syntactic construct
Pos() token.Pos
Pos() scanner.Position
}
// An Alternative node represents a non-empty list of alternative expressions.
......@@ -48,13 +74,13 @@ type (
// A Name node represents a production name.
Name struct {
StringPos token.Pos
StringPos scanner.Position
String string
}
// A Token node represents a literal.
Token struct {
StringPos token.Pos
StringPos scanner.Position
String string
}
......@@ -65,50 +91,50 @@ type (
// A Group node represents a grouped expression.
Group struct {
Lparen token.Pos
Lparen scanner.Position
Body Expression // (body)
}
// An Option node represents an optional expression.
Option struct {
Lbrack token.Pos
Lbrack scanner.Position
Body Expression // [body]
}
// A Repetition node represents a repeated expression.
Repetition struct {
Lbrace token.Pos
Lbrace scanner.Position
Body Expression // {body}
}
// A Bad node stands for pieces of source code that lead to a parse error.
Bad struct {
TokPos token.Pos
Error string // parser error message
}
// A Production node represents an EBNF production.
Production struct {
Name *Name
Expr Expression
}
// A Bad node stands for pieces of source code that lead to a parse error.
Bad struct {
TokPos scanner.Position
Error string // parser error message
}
// A Grammar is a set of EBNF productions. The map
// is indexed by production name.
//
Grammar map[string]*Production
)
func (x Alternative) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Alternative
func (x Sequence) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Sequences
func (x *Name) Pos() token.Pos { return x.StringPos }
func (x *Token) Pos() token.Pos { return x.StringPos }
func (x *Range) Pos() token.Pos { return x.Begin.Pos() }
func (x *Group) Pos() token.Pos { return x.Lparen }
func (x *Option) Pos() token.Pos { return x.Lbrack }
func (x *Repetition) Pos() token.Pos { return x.Lbrace }
func (x *Bad) Pos() token.Pos { return x.TokPos }
func (x *Production) Pos() token.Pos { return x.Name.Pos() }
func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative
func (x Sequence) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Sequences
func (x *Name) Pos() scanner.Position { return x.StringPos }
func (x *Token) Pos() scanner.Position { return x.StringPos }
func (x *Range) Pos() scanner.Position { return x.Begin.Pos() }
func (x *Group) Pos() scanner.Position { return x.Lparen }
func (x *Option) Pos() scanner.Position { return x.Lbrack }
func (x *Repetition) Pos() scanner.Position { return x.Lbrace }
func (x *Production) Pos() scanner.Position { return x.Name.Pos() }
func (x *Bad) Pos() scanner.Position { return x.TokPos }
// ----------------------------------------------------------------------------
// Grammar verification
......@@ -119,15 +145,14 @@ func isLexical(name string) bool {
}
type verifier struct {
fset *token.FileSet
scanner.ErrorVector
errors errorList
worklist []*Production
reached Grammar // set of productions reached from (and including) the root production
grammar Grammar
}
func (v *verifier) error(pos token.Pos, msg string) {
v.Error(v.fset.Position(pos), msg)
func (v *verifier) error(pos scanner.Position, msg string) {
v.errors = append(v.errors, newError(pos, msg))
}
func (v *verifier) push(prod *Production) {
......@@ -187,24 +212,23 @@ func (v *verifier) verifyExpr(expr Expression, lexical bool) {
v.verifyExpr(x.Body, lexical)
case *Repetition:
v.verifyExpr(x.Body, lexical)
case *Bad:
v.error(x.Pos(), x.Error)
default:
panic("unreachable")
panic(fmt.Sprintf("internal error: unexpected type %T", expr))
}
}
func (v *verifier) verify(fset *token.FileSet, grammar Grammar, start string) {
func (v *verifier) verify(grammar Grammar, start string) {
// find root production
root, found := grammar[start]
if !found {
// token.NoPos doesn't require a file set;
// ok to set v.fset only afterwards
v.error(token.NoPos, "no start production "+start)
var noPos scanner.Position
v.error(noPos, "no start production "+start)
return
}
// initialize verifier
v.fset = fset
v.ErrorVector.Reset()
v.worklist = v.worklist[0:0]
v.reached = make(Grammar)
v.grammar = grammar
......@@ -238,8 +262,8 @@ func (v *verifier) verify(fset *token.FileSet, grammar Grammar, start string) {
//
// Position information is interpreted relative to the file set fset.
//
func Verify(fset *token.FileSet, grammar Grammar, start string) os.Error {
func Verify(grammar Grammar, start string) os.Error {
var v verifier
v.verify(fset, grammar, start)
return v.GetError(scanner.Sorted)
v.verify(grammar, start)
return v.errors.Error()
}
......@@ -5,13 +5,10 @@
package ebnf
import (
"go/token"
"io/ioutil"
"bytes"
"testing"
)
var fset = token.NewFileSet()
var goodGrammars = []string{
`Program = .`,
......@@ -46,18 +43,19 @@ var badGrammars = []string{
`Program = {} .`,
}
func checkGood(t *testing.T, filename string, src []byte) {
grammar, err := Parse(fset, filename, src)
func checkGood(t *testing.T, src string) {
grammar, err := Parse("", bytes.NewBuffer([]byte(src)))
if err != nil {
t.Errorf("Parse(%s) failed: %v", src, err)
return
}
if err = Verify(fset, grammar, "Program"); err != nil {
if err = Verify(grammar, "Program"); err != nil {
t.Errorf("Verify(%s) failed: %v", src, err)
}
}
func checkBad(t *testing.T, filename string, src []byte) {
_, err := Parse(fset, filename, src)
func checkBad(t *testing.T, src string) {
_, err := Parse("", bytes.NewBuffer([]byte(src)))
if err == nil {
t.Errorf("Parse(%s) should have failed", src)
}
......@@ -65,23 +63,9 @@ func checkBad(t *testing.T, filename string, src []byte) {
func TestGrammars(t *testing.T) {
for _, src := range goodGrammars {
checkGood(t, "", []byte(src))
checkGood(t, src)
}
for _, src := range badGrammars {
checkBad(t, "", []byte(src))
}
}
var files = []string{
// TODO(gri) add some test files
}
func TestFiles(t *testing.T) {
for _, filename := range files {
src, err := ioutil.ReadFile(filename)
if err != nil {
t.Fatal(err)
}
checkGood(t, filename, src)
checkBad(t, src)
}
}
......@@ -5,51 +5,47 @@
package ebnf
import (
"go/scanner"
"go/token"
"io"
"os"
"scanner"
"strconv"
)
type parser struct {
fset *token.FileSet
scanner.ErrorVector
errors errorList
scanner scanner.Scanner
pos token.Pos // token position
tok token.Token // one token look-ahead
lit string // token literal
pos scanner.Position // token position
tok int // one token look-ahead
lit string // token literal
}
func (p *parser) next() {
p.pos, p.tok, p.lit = p.scanner.Scan()
if p.tok.IsKeyword() {
// TODO Should keyword mapping always happen outside scanner?
// Or should there be a flag to scanner to enable keyword mapping?
p.tok = token.IDENT
}
p.tok = p.scanner.Scan()
p.pos = p.scanner.Position
p.lit = p.scanner.TokenText()
}
func (p *parser) error(pos token.Pos, msg string) {
p.Error(p.fset.Position(pos), msg)
func (p *parser) error(pos scanner.Position, msg string) {
p.errors = append(p.errors, newError(pos, msg))
}
func (p *parser) errorExpected(pos token.Pos, msg string) {
msg = "expected " + msg
if pos == p.pos {
func (p *parser) errorExpected(pos scanner.Position, msg string) {
msg = `expected "` + msg + `"`
if pos.Offset == p.pos.Offset {
// the error happened at the current position;
// make the error message more specific
msg += ", found '" + p.tok.String() + "'"
if p.tok.IsLiteral() {
msg += ", found " + scanner.TokenString(p.tok)
if p.tok < 0 {
msg += " " + p.lit
}
}
p.error(pos, msg)
}
func (p *parser) expect(tok token.Token) token.Pos {
func (p *parser) expect(tok int) scanner.Position {
pos := p.pos
if p.tok != tok {
p.errorExpected(pos, "'"+tok.String()+"'")
p.errorExpected(pos, scanner.TokenString(tok))
}
p.next() // make progress in any case
return pos
......@@ -58,21 +54,21 @@ func (p *parser) expect(tok token.Token) token.Pos {
func (p *parser) parseIdentifier() *Name {
pos := p.pos
name := p.lit
p.expect(token.IDENT)
p.expect(scanner.Ident)
return &Name{pos, name}
}
func (p *parser) parseToken() *Token {
pos := p.pos
value := ""
if p.tok == token.STRING {
if p.tok == scanner.String {
value, _ = strconv.Unquote(p.lit)
// Unquote may fail with an error, but only if the scanner found
// an illegal string in the first place. In this case the error
// has already been reported.
p.next()
} else {
p.expect(token.STRING)
p.expect(scanner.String)
}
return &Token{pos, value}
}
......@@ -82,32 +78,32 @@ func (p *parser) parseTerm() (x Expression) {
pos := p.pos
switch p.tok {
case token.IDENT:
case scanner.Ident:
x = p.parseIdentifier()
case token.STRING:
case scanner.String:
tok := p.parseToken()
x = tok
const ellipsis = "…" // U+2026, the horizontal ellipsis character
if p.tok == token.ILLEGAL && p.lit == ellipsis {
const ellipsis = '…' // U+2026, the horizontal ellipsis character
if p.tok == ellipsis {
p.next()
x = &Range{tok, p.parseToken()}
}
case token.LPAREN:
case '(':
p.next()
x = &Group{pos, p.parseExpression()}
p.expect(token.RPAREN)
p.expect(')')
case token.LBRACK:
case '[':
p.next()
x = &Option{pos, p.parseExpression()}
p.expect(token.RBRACK)
p.expect(']')
case token.LBRACE:
case '{':
p.next()
x = &Repetition{pos, p.parseExpression()}
p.expect(token.RBRACE)
p.expect('}')
}
return x
......@@ -137,7 +133,7 @@ func (p *parser) parseExpression() Expression {
for {
list = append(list, p.parseSequence())
if p.tok != token.OR {
if p.tok != '|' {
break
}
p.next()
......@@ -154,24 +150,22 @@ func (p *parser) parseExpression() Expression {
func (p *parser) parseProduction() *Production {
name := p.parseIdentifier()
p.expect(token.ASSIGN)
p.expect('=')
var expr Expression
if p.tok != token.PERIOD {
if p.tok != '.' {
expr = p.parseExpression()
}
p.expect(token.PERIOD)
p.expect('.')
return &Production{name, expr}
}
func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar {
// initialize parser
p.fset = fset
p.ErrorVector.Reset()
p.scanner.Init(fset.AddFile(filename, fset.Base(), len(src)), src, p, scanner.AllowIllegalChars)
func (p *parser) parse(filename string, src io.Reader) Grammar {
p.scanner.Init(src)
p.scanner.Filename = filename
p.next() // initializes pos, tok, lit
grammar := make(Grammar)
for p.tok != token.EOF {
for p.tok != scanner.EOF {
prod := p.parseProduction()
name := prod.Name.String
if _, found := grammar[name]; !found {
......@@ -187,11 +181,11 @@ func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar
// Parse parses a set of EBNF productions from source src.
// It returns a set of productions. Errors are reported
// for incorrect syntax and if a production is declared
// more than once. Position information is recorded relative
// to the file set fset.
// more than once; the filename is used only for error
// positions.
//
func Parse(fset *token.FileSet, filename string, src []byte) (Grammar, os.Error) {
func Parse(filename string, src io.Reader) (Grammar, os.Error) {
var p parser
grammar := p.parse(fset, filename, src)
return grammar, p.GetError(scanner.Sorted)
grammar := p.parse(filename, src)
return grammar, p.errors.Error()
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment