Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
golang
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
go
golang
Commits
0da66a2e
Commit
0da66a2e
authored
Oct 05, 2011
by
Robert Griesemer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ebnf: use scanner instead of go/scanner
R=rsc, r CC=golang-dev
https://golang.org/cl/5192043
parent
b2f1eba3
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
117 additions
and
115 deletions
+117
-115
ebnflint.go
src/cmd/ebnflint/ebnflint.go
+2
-2
ebnf.go
src/pkg/ebnf/ebnf.go
+62
-38
ebnf_test.go
src/pkg/ebnf/ebnf_test.go
+9
-25
parser.go
src/pkg/ebnf/parser.go
+44
-50
No files found.
src/cmd/ebnflint/ebnflint.go
View file @
0da66a2e
...
...
@@ -98,12 +98,12 @@ func main() {
src
=
extractEBNF
(
src
)
}
grammar
,
err
:=
ebnf
.
Parse
(
f
set
,
filename
,
src
)
grammar
,
err
:=
ebnf
.
Parse
(
f
ilename
,
bytes
.
NewBuffer
(
src
)
)
if
err
!=
nil
{
report
(
err
)
}
if
err
=
ebnf
.
Verify
(
fset
,
grammar
,
*
start
);
err
!=
nil
{
if
err
=
ebnf
.
Verify
(
grammar
,
*
start
);
err
!=
nil
{
report
(
err
)
}
}
src/pkg/ebnf/ebnf.go
View file @
0da66a2e
...
...
@@ -23,13 +23,39 @@
package
ebnf
import
(
"go/scanner"
"go/token"
"fmt"
"os"
"scanner"
"unicode"
"utf8"
)
// ----------------------------------------------------------------------------
// Error handling
type
errorList
[]
os
.
Error
func
(
list
errorList
)
Error
()
os
.
Error
{
if
len
(
list
)
==
0
{
return
nil
}
return
list
}
func
(
list
errorList
)
String
()
string
{
switch
len
(
list
)
{
case
0
:
return
"no errors"
case
1
:
return
list
[
0
]
.
String
()
}
return
fmt
.
Sprintf
(
"%s (and %d more errors)"
,
list
[
0
],
len
(
list
)
-
1
)
}
func
newError
(
pos
scanner
.
Position
,
msg
string
)
os
.
Error
{
return
os
.
NewError
(
fmt
.
Sprintf
(
"%s: %s"
,
pos
,
msg
))
}
// ----------------------------------------------------------------------------
// Internal representation
...
...
@@ -37,7 +63,7 @@ type (
// An Expression node represents a production expression.
Expression
interface
{
// Pos is the position of the first character of the syntactic construct
Pos
()
token
.
Pos
Pos
()
scanner
.
Position
}
// An Alternative node represents a non-empty list of alternative expressions.
...
...
@@ -48,13 +74,13 @@ type (
// A Name node represents a production name.
Name
struct
{
StringPos
token
.
Pos
StringPos
scanner
.
Position
String
string
}
// A Token node represents a literal.
Token
struct
{
StringPos
token
.
Pos
StringPos
scanner
.
Position
String
string
}
...
...
@@ -65,50 +91,50 @@ type (
// A Group node represents a grouped expression.
Group
struct
{
Lparen
token
.
Pos
Lparen
scanner
.
Position
Body
Expression
// (body)
}
// An Option node represents an optional expression.
Option
struct
{
Lbrack
token
.
Pos
Lbrack
scanner
.
Position
Body
Expression
// [body]
}
// A Repetition node represents a repeated expression.
Repetition
struct
{
Lbrace
token
.
Pos
Lbrace
scanner
.
Position
Body
Expression
// {body}
}
// A Bad node stands for pieces of source code that lead to a parse error.
Bad
struct
{
TokPos
token
.
Pos
Error
string
// parser error message
}
// A Production node represents an EBNF production.
Production
struct
{
Name
*
Name
Expr
Expression
}
// A Bad node stands for pieces of source code that lead to a parse error.
Bad
struct
{
TokPos
scanner
.
Position
Error
string
// parser error message
}
// A Grammar is a set of EBNF productions. The map
// is indexed by production name.
//
Grammar
map
[
string
]
*
Production
)
func
(
x
Alternative
)
Pos
()
token
.
Pos
{
return
x
[
0
]
.
Pos
()
}
// the parser always generates non-empty Alternative
func
(
x
Sequence
)
Pos
()
token
.
Pos
{
return
x
[
0
]
.
Pos
()
}
// the parser always generates non-empty Sequences
func
(
x
*
Name
)
Pos
()
token
.
Pos
{
return
x
.
StringPos
}
func
(
x
*
Token
)
Pos
()
token
.
Pos
{
return
x
.
StringPos
}
func
(
x
*
Range
)
Pos
()
token
.
Pos
{
return
x
.
Begin
.
Pos
()
}
func
(
x
*
Group
)
Pos
()
token
.
Pos
{
return
x
.
Lparen
}
func
(
x
*
Option
)
Pos
()
token
.
Pos
{
return
x
.
Lbrack
}
func
(
x
*
Repetition
)
Pos
()
token
.
Pos
{
return
x
.
Lbrace
}
func
(
x
*
Bad
)
Pos
()
token
.
Pos
{
return
x
.
TokPos
}
func
(
x
*
Production
)
Pos
()
token
.
Pos
{
return
x
.
Name
.
Pos
()
}
func
(
x
Alternative
)
Pos
()
scanner
.
Position
{
return
x
[
0
]
.
Pos
()
}
// the parser always generates non-empty Alternative
func
(
x
Sequence
)
Pos
()
scanner
.
Position
{
return
x
[
0
]
.
Pos
()
}
// the parser always generates non-empty Sequences
func
(
x
*
Name
)
Pos
()
scanner
.
Position
{
return
x
.
StringPos
}
func
(
x
*
Token
)
Pos
()
scanner
.
Position
{
return
x
.
StringPos
}
func
(
x
*
Range
)
Pos
()
scanner
.
Position
{
return
x
.
Begin
.
Pos
()
}
func
(
x
*
Group
)
Pos
()
scanner
.
Position
{
return
x
.
Lparen
}
func
(
x
*
Option
)
Pos
()
scanner
.
Position
{
return
x
.
Lbrack
}
func
(
x
*
Repetition
)
Pos
()
scanner
.
Position
{
return
x
.
Lbrace
}
func
(
x
*
Production
)
Pos
()
scanner
.
Position
{
return
x
.
Name
.
Pos
()
}
func
(
x
*
Bad
)
Pos
()
scanner
.
Position
{
return
x
.
TokPos
}
// ----------------------------------------------------------------------------
// Grammar verification
...
...
@@ -119,15 +145,14 @@ func isLexical(name string) bool {
}
type
verifier
struct
{
fset
*
token
.
FileSet
scanner
.
ErrorVector
errors
errorList
worklist
[]
*
Production
reached
Grammar
// set of productions reached from (and including) the root production
grammar
Grammar
}
func
(
v
*
verifier
)
error
(
pos
token
.
Pos
,
msg
string
)
{
v
.
Error
(
v
.
fset
.
Position
(
pos
),
msg
)
func
(
v
*
verifier
)
error
(
pos
scanner
.
Position
,
msg
string
)
{
v
.
errors
=
append
(
v
.
errors
,
newError
(
pos
,
msg
)
)
}
func
(
v
*
verifier
)
push
(
prod
*
Production
)
{
...
...
@@ -187,24 +212,23 @@ func (v *verifier) verifyExpr(expr Expression, lexical bool) {
v
.
verifyExpr
(
x
.
Body
,
lexical
)
case
*
Repetition
:
v
.
verifyExpr
(
x
.
Body
,
lexical
)
case
*
Bad
:
v
.
error
(
x
.
Pos
(),
x
.
Error
)
default
:
panic
(
"unreachable"
)
panic
(
fmt
.
Sprintf
(
"internal error: unexpected type %T"
,
expr
)
)
}
}
func
(
v
*
verifier
)
verify
(
fset
*
token
.
FileSet
,
grammar
Grammar
,
start
string
)
{
func
(
v
*
verifier
)
verify
(
grammar
Grammar
,
start
string
)
{
// find root production
root
,
found
:=
grammar
[
start
]
if
!
found
{
// token.NoPos doesn't require a file set;
// ok to set v.fset only afterwards
v
.
error
(
token
.
NoPos
,
"no start production "
+
start
)
var
noPos
scanner
.
Position
v
.
error
(
noPos
,
"no start production "
+
start
)
return
}
// initialize verifier
v
.
fset
=
fset
v
.
ErrorVector
.
Reset
()
v
.
worklist
=
v
.
worklist
[
0
:
0
]
v
.
reached
=
make
(
Grammar
)
v
.
grammar
=
grammar
...
...
@@ -238,8 +262,8 @@ func (v *verifier) verify(fset *token.FileSet, grammar Grammar, start string) {
//
// Position information is interpreted relative to the file set fset.
//
func
Verify
(
fset
*
token
.
FileSet
,
grammar
Grammar
,
start
string
)
os
.
Error
{
func
Verify
(
grammar
Grammar
,
start
string
)
os
.
Error
{
var
v
verifier
v
.
verify
(
fset
,
grammar
,
start
)
return
v
.
GetError
(
scanner
.
Sorted
)
v
.
verify
(
grammar
,
start
)
return
v
.
errors
.
Error
(
)
}
src/pkg/ebnf/ebnf_test.go
View file @
0da66a2e
...
...
@@ -5,13 +5,10 @@
package
ebnf
import
(
"go/token"
"io/ioutil"
"bytes"
"testing"
)
var
fset
=
token
.
NewFileSet
()
var
goodGrammars
=
[]
string
{
`Program = .`
,
...
...
@@ -46,18 +43,19 @@ var badGrammars = []string{
`Program = {} .`
,
}
func
checkGood
(
t
*
testing
.
T
,
filename
string
,
src
[]
byte
)
{
grammar
,
err
:=
Parse
(
fset
,
filename
,
src
)
func
checkGood
(
t
*
testing
.
T
,
src
string
)
{
grammar
,
err
:=
Parse
(
""
,
bytes
.
NewBuffer
([]
byte
(
src
))
)
if
err
!=
nil
{
t
.
Errorf
(
"Parse(%s) failed: %v"
,
src
,
err
)
return
}
if
err
=
Verify
(
fset
,
grammar
,
"Program"
);
err
!=
nil
{
if
err
=
Verify
(
grammar
,
"Program"
);
err
!=
nil
{
t
.
Errorf
(
"Verify(%s) failed: %v"
,
src
,
err
)
}
}
func
checkBad
(
t
*
testing
.
T
,
filename
string
,
src
[]
byte
)
{
_
,
err
:=
Parse
(
fset
,
filename
,
src
)
func
checkBad
(
t
*
testing
.
T
,
src
string
)
{
_
,
err
:=
Parse
(
""
,
bytes
.
NewBuffer
([]
byte
(
src
))
)
if
err
==
nil
{
t
.
Errorf
(
"Parse(%s) should have failed"
,
src
)
}
...
...
@@ -65,23 +63,9 @@ func checkBad(t *testing.T, filename string, src []byte) {
func
TestGrammars
(
t
*
testing
.
T
)
{
for
_
,
src
:=
range
goodGrammars
{
checkGood
(
t
,
""
,
[]
byte
(
src
)
)
checkGood
(
t
,
src
)
}
for
_
,
src
:=
range
badGrammars
{
checkBad
(
t
,
""
,
[]
byte
(
src
))
}
}
var
files
=
[]
string
{
// TODO(gri) add some test files
}
func
TestFiles
(
t
*
testing
.
T
)
{
for
_
,
filename
:=
range
files
{
src
,
err
:=
ioutil
.
ReadFile
(
filename
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
checkGood
(
t
,
filename
,
src
)
checkBad
(
t
,
src
)
}
}
src/pkg/ebnf/parser.go
View file @
0da66a2e
...
...
@@ -5,51 +5,47 @@
package
ebnf
import
(
"go/scanner"
"go/token"
"io"
"os"
"scanner"
"strconv"
)
type
parser
struct
{
fset
*
token
.
FileSet
scanner
.
ErrorVector
errors
errorList
scanner
scanner
.
Scanner
pos
token
.
Pos
// token position
tok
token
.
Token
// one token look-ahead
lit
string
// token literal
pos
scanner
.
Position
// token position
tok
int
// one token look-ahead
lit
string
// token literal
}
func
(
p
*
parser
)
next
()
{
p
.
pos
,
p
.
tok
,
p
.
lit
=
p
.
scanner
.
Scan
()
if
p
.
tok
.
IsKeyword
()
{
// TODO Should keyword mapping always happen outside scanner?
// Or should there be a flag to scanner to enable keyword mapping?
p
.
tok
=
token
.
IDENT
}
p
.
tok
=
p
.
scanner
.
Scan
()
p
.
pos
=
p
.
scanner
.
Position
p
.
lit
=
p
.
scanner
.
TokenText
()
}
func
(
p
*
parser
)
error
(
pos
token
.
Pos
,
msg
string
)
{
p
.
Error
(
p
.
fset
.
Position
(
pos
),
msg
)
func
(
p
*
parser
)
error
(
pos
scanner
.
Position
,
msg
string
)
{
p
.
errors
=
append
(
p
.
errors
,
newError
(
pos
,
msg
)
)
}
func
(
p
*
parser
)
errorExpected
(
pos
token
.
Pos
,
msg
string
)
{
msg
=
"expected "
+
msg
if
pos
==
p
.
pos
{
func
(
p
*
parser
)
errorExpected
(
pos
scanner
.
Position
,
msg
string
)
{
msg
=
`expected "`
+
msg
+
`"`
if
pos
.
Offset
==
p
.
pos
.
Offset
{
// the error happened at the current position;
// make the error message more specific
msg
+=
", found
'"
+
p
.
tok
.
String
()
+
"'"
if
p
.
tok
.
IsLiteral
()
{
msg
+=
", found
"
+
scanner
.
TokenString
(
p
.
tok
)
if
p
.
tok
<
0
{
msg
+=
" "
+
p
.
lit
}
}
p
.
error
(
pos
,
msg
)
}
func
(
p
*
parser
)
expect
(
tok
token
.
Token
)
token
.
Pos
{
func
(
p
*
parser
)
expect
(
tok
int
)
scanner
.
Position
{
pos
:=
p
.
pos
if
p
.
tok
!=
tok
{
p
.
errorExpected
(
pos
,
"'"
+
tok
.
String
()
+
"'"
)
p
.
errorExpected
(
pos
,
scanner
.
TokenString
(
tok
)
)
}
p
.
next
()
// make progress in any case
return
pos
...
...
@@ -58,21 +54,21 @@ func (p *parser) expect(tok token.Token) token.Pos {
func
(
p
*
parser
)
parseIdentifier
()
*
Name
{
pos
:=
p
.
pos
name
:=
p
.
lit
p
.
expect
(
token
.
IDENT
)
p
.
expect
(
scanner
.
Ident
)
return
&
Name
{
pos
,
name
}
}
func
(
p
*
parser
)
parseToken
()
*
Token
{
pos
:=
p
.
pos
value
:=
""
if
p
.
tok
==
token
.
STRING
{
if
p
.
tok
==
scanner
.
String
{
value
,
_
=
strconv
.
Unquote
(
p
.
lit
)
// Unquote may fail with an error, but only if the scanner found
// an illegal string in the first place. In this case the error
// has already been reported.
p
.
next
()
}
else
{
p
.
expect
(
token
.
STRING
)
p
.
expect
(
scanner
.
String
)
}
return
&
Token
{
pos
,
value
}
}
...
...
@@ -82,32 +78,32 @@ func (p *parser) parseTerm() (x Expression) {
pos
:=
p
.
pos
switch
p
.
tok
{
case
token
.
IDENT
:
case
scanner
.
Ident
:
x
=
p
.
parseIdentifier
()
case
token
.
STRING
:
case
scanner
.
String
:
tok
:=
p
.
parseToken
()
x
=
tok
const
ellipsis
=
"…"
// U+2026, the horizontal ellipsis character
if
p
.
tok
==
token
.
ILLEGAL
&&
p
.
lit
==
ellipsis
{
const
ellipsis
=
'…'
// U+2026, the horizontal ellipsis character
if
p
.
tok
==
ellipsis
{
p
.
next
()
x
=
&
Range
{
tok
,
p
.
parseToken
()}
}
case
token
.
LPAREN
:
case
'('
:
p
.
next
()
x
=
&
Group
{
pos
,
p
.
parseExpression
()}
p
.
expect
(
token
.
RPAREN
)
p
.
expect
(
')'
)
case
token
.
LBRACK
:
case
'['
:
p
.
next
()
x
=
&
Option
{
pos
,
p
.
parseExpression
()}
p
.
expect
(
token
.
RBRACK
)
p
.
expect
(
']'
)
case
token
.
LBRACE
:
case
'{'
:
p
.
next
()
x
=
&
Repetition
{
pos
,
p
.
parseExpression
()}
p
.
expect
(
token
.
RBRACE
)
p
.
expect
(
'}'
)
}
return
x
...
...
@@ -137,7 +133,7 @@ func (p *parser) parseExpression() Expression {
for
{
list
=
append
(
list
,
p
.
parseSequence
())
if
p
.
tok
!=
token
.
OR
{
if
p
.
tok
!=
'|'
{
break
}
p
.
next
()
...
...
@@ -154,24 +150,22 @@ func (p *parser) parseExpression() Expression {
func
(
p
*
parser
)
parseProduction
()
*
Production
{
name
:=
p
.
parseIdentifier
()
p
.
expect
(
token
.
ASSIGN
)
p
.
expect
(
'='
)
var
expr
Expression
if
p
.
tok
!=
token
.
PERIOD
{
if
p
.
tok
!=
'.'
{
expr
=
p
.
parseExpression
()
}
p
.
expect
(
token
.
PERIOD
)
p
.
expect
(
'.'
)
return
&
Production
{
name
,
expr
}
}
func
(
p
*
parser
)
parse
(
fset
*
token
.
FileSet
,
filename
string
,
src
[]
byte
)
Grammar
{
// initialize parser
p
.
fset
=
fset
p
.
ErrorVector
.
Reset
()
p
.
scanner
.
Init
(
fset
.
AddFile
(
filename
,
fset
.
Base
(),
len
(
src
)),
src
,
p
,
scanner
.
AllowIllegalChars
)
func
(
p
*
parser
)
parse
(
filename
string
,
src
io
.
Reader
)
Grammar
{
p
.
scanner
.
Init
(
src
)
p
.
scanner
.
Filename
=
filename
p
.
next
()
// initializes pos, tok, lit
grammar
:=
make
(
Grammar
)
for
p
.
tok
!=
token
.
EOF
{
for
p
.
tok
!=
scanner
.
EOF
{
prod
:=
p
.
parseProduction
()
name
:=
prod
.
Name
.
String
if
_
,
found
:=
grammar
[
name
];
!
found
{
...
...
@@ -187,11 +181,11 @@ func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar
// Parse parses a set of EBNF productions from source src.
// It returns a set of productions. Errors are reported
// for incorrect syntax and if a production is declared
// more than once
. Position information is recorded relative
//
to the file set fset
.
// more than once
; the filename is used only for error
//
positions
.
//
func
Parse
(
f
set
*
token
.
FileSet
,
filename
string
,
src
[]
byte
)
(
Grammar
,
os
.
Error
)
{
func
Parse
(
f
ilename
string
,
src
io
.
Reader
)
(
Grammar
,
os
.
Error
)
{
var
p
parser
grammar
:=
p
.
parse
(
f
set
,
f
ilename
,
src
)
return
grammar
,
p
.
GetError
(
scanner
.
Sorted
)
grammar
:=
p
.
parse
(
filename
,
src
)
return
grammar
,
p
.
errors
.
Error
(
)
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment