Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
golang
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
go
golang
Commits
a70caf44
Commit
a70caf44
authored
Dec 10, 2009
by
Robert Griesemer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
implemented InsertSemis mode for go/scanner
R=rsc
https://golang.org/cl/175047
parent
2b3813d0
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
236 additions
and
20 deletions
+236
-20
scanner.go
src/pkg/go/scanner/scanner.go
+79
-17
scanner_test.go
src/pkg/go/scanner/scanner_test.go
+157
-3
No files found.
src/pkg/go/scanner/scanner.go
View file @
a70caf44
...
...
@@ -29,9 +29,11 @@ type Scanner struct {
mode
uint
;
// scanning mode
// scanning state
pos
token
.
Position
;
// previous reading position (position before ch)
offset
int
;
// current reading offset (position after ch)
ch
int
;
// one char look-ahead
pos
token
.
Position
;
// previous reading position (position before ch)
offset
int
;
// current reading offset (position after ch)
ch
int
;
// one char look-ahead
insertSemi
bool
;
// insert a semicolon before next newline
pendingComment
token
.
Position
;
// valid if pendingComment.Line > 0
// public state - ok to modify
ErrorCount
int
;
// number of errors encountered
...
...
@@ -69,6 +71,7 @@ func (S *Scanner) next() {
const
(
ScanComments
=
1
<<
iota
;
// return comments as COMMENT tokens
AllowIllegalChars
;
// do not report an error for illegal chars
InsertSemis
;
// automatically insert semicolons
)
...
...
@@ -420,6 +423,8 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
}
var
semicolon
=
[]
byte
{
';'
}
// Scan scans the next token and returns the token position pos,
// the token tok, and the literal text lit corresponding to the
// token. The source end is indicated by token.EOF.
...
...
@@ -432,40 +437,63 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
// of the error handler, if there was one installed.
//
func
(
S
*
Scanner
)
Scan
()
(
pos
token
.
Position
,
tok
token
.
Token
,
lit
[]
byte
)
{
scan_again
:
if
S
.
pendingComment
.
Line
>
0
{
// "consume" pending comment
S
.
pos
=
S
.
pendingComment
;
S
.
offset
=
S
.
pos
.
Offset
+
1
;
S
.
ch
=
'/'
;
S
.
pendingComment
.
Line
=
0
;
}
scanAgain
:
// skip white space
for
S
.
ch
==
' '
||
S
.
ch
==
'\t'
||
S
.
ch
==
'\n'
||
S
.
ch
==
'\r'
{
for
S
.
ch
==
' '
||
S
.
ch
==
'\t'
||
S
.
ch
==
'\n'
&&
!
S
.
insertSemi
||
S
.
ch
==
'\r'
{
S
.
next
()
}
// current token start
insertSemi
:=
false
;
pos
,
tok
=
S
.
pos
,
token
.
ILLEGAL
;
// determine token value
switch
ch
:=
S
.
ch
;
{
case
isLetter
(
ch
)
:
tok
=
S
.
scanIdentifier
()
tok
=
S
.
scanIdentifier
();
switch
tok
{
case
token
.
IDENT
,
token
.
BREAK
,
token
.
CONTINUE
,
token
.
FALLTHROUGH
,
token
.
RETURN
:
insertSemi
=
true
default
:
insertSemi
=
false
}
case
digitVal
(
ch
)
<
10
:
tok
=
S
.
scanNumber
(
false
)
insertSemi
=
true
;
tok
=
S
.
scanNumber
(
false
);
default
:
S
.
next
();
// always make progress
switch
ch
{
case
-
1
:
tok
=
token
.
EOF
case
'\n'
:
S
.
insertSemi
=
false
;
return
pos
,
token
.
SEMICOLON
,
semicolon
;
case
'"'
:
insertSemi
=
true
;
tok
=
token
.
STRING
;
S
.
scanString
(
pos
);
case
'\'
'
:
insertSemi
=
true
;
tok
=
token
.
CHAR
;
S
.
scanChar
(
pos
);
case
'`'
:
insertSemi
=
true
;
tok
=
token
.
STRING
;
S
.
scanRawString
(
pos
);
case
':'
:
tok
=
S
.
switch2
(
token
.
COLON
,
token
.
DEFINE
)
case
'.'
:
if
digitVal
(
S
.
ch
)
<
10
{
tok
=
S
.
scanNumber
(
true
)
insertSemi
=
true
;
tok
=
S
.
scanNumber
(
true
);
}
else
if
S
.
ch
==
'.'
{
S
.
next
();
if
S
.
ch
==
'.'
{
...
...
@@ -482,27 +510,57 @@ scan_again:
case
'('
:
tok
=
token
.
LPAREN
case
')'
:
tok
=
token
.
RPAREN
insertSemi
=
true
;
tok
=
token
.
RPAREN
;
case
'['
:
tok
=
token
.
LBRACK
case
']'
:
tok
=
token
.
RBRACK
insertSemi
=
true
;
tok
=
token
.
RBRACK
;
case
'{'
:
tok
=
token
.
LBRACE
case
'}'
:
tok
=
token
.
RBRACE
insertSemi
=
true
;
tok
=
token
.
RBRACE
;
case
'+'
:
tok
=
S
.
switch3
(
token
.
ADD
,
token
.
ADD_ASSIGN
,
'+'
,
token
.
INC
)
tok
=
S
.
switch3
(
token
.
ADD
,
token
.
ADD_ASSIGN
,
'+'
,
token
.
INC
);
if
tok
==
token
.
INC
{
insertSemi
=
true
}
case
'-'
:
tok
=
S
.
switch3
(
token
.
SUB
,
token
.
SUB_ASSIGN
,
'-'
,
token
.
DEC
)
tok
=
S
.
switch3
(
token
.
SUB
,
token
.
SUB_ASSIGN
,
'-'
,
token
.
DEC
);
if
tok
==
token
.
DEC
{
insertSemi
=
true
}
case
'*'
:
tok
=
S
.
switch2
(
token
.
MUL
,
token
.
MUL_ASSIGN
)
case
'/'
:
if
S
.
ch
==
'/'
||
S
.
ch
==
'*'
{
S
.
scanComment
(
pos
);
tok
=
token
.
COMMENT
;
if
S
.
mode
&
ScanComments
==
0
{
goto
scan_again
// comment
newline
:=
false
;
if
S
.
insertSemi
{
if
S
.
ch
==
'/'
{
// a line comment acts like a newline
newline
=
true
}
else
{
// a general comment may act like a newline
S
.
scanComment
(
pos
);
newline
=
pos
.
Line
<
S
.
pos
.
Line
;
}
}
else
{
S
.
scanComment
(
pos
)
}
if
newline
{
// insert a semicolon and retain pending comment
S
.
insertSemi
=
false
;
S
.
pendingComment
=
pos
;
return
pos
,
token
.
SEMICOLON
,
semicolon
;
}
else
if
S
.
mode
&
ScanComments
==
0
{
// skip comment
goto
scanAgain
}
else
{
insertSemi
=
S
.
insertSemi
;
// preserve insertSemi info
tok
=
token
.
COMMENT
;
}
}
else
{
tok
=
S
.
switch2
(
token
.
QUO
,
token
.
QUO_ASSIGN
)
...
...
@@ -537,9 +595,13 @@ scan_again:
if
S
.
mode
&
AllowIllegalChars
==
0
{
S
.
error
(
pos
,
"illegal character "
+
charString
(
ch
))
}
insertSemi
=
S
.
insertSemi
;
// preserve insertSemi info
}
}
if
S
.
mode
&
InsertSemis
!=
0
{
S
.
insertSemi
=
insertSemi
}
return
pos
,
tok
,
S
.
src
[
pos
.
Offset
:
S
.
pos
.
Offset
];
}
...
...
src/pkg/go/scanner/scanner_test.go
View file @
a70caf44
...
...
@@ -225,13 +225,13 @@ func TestScan(t *testing.T) {
}
checkPos
(
t
,
lit
,
pos
,
epos
);
if
tok
!=
e
.
tok
{
t
.
Errorf
(
"bad token for %
s
: got %s, expected %s"
,
lit
,
tok
.
String
(),
e
.
tok
.
String
())
t
.
Errorf
(
"bad token for %
q
: got %s, expected %s"
,
lit
,
tok
.
String
(),
e
.
tok
.
String
())
}
if
e
.
tok
.
IsLiteral
()
&&
lit
!=
e
.
lit
{
t
.
Errorf
(
"bad literal for %
s: got %s, expected %s
"
,
lit
,
lit
,
e
.
lit
)
t
.
Errorf
(
"bad literal for %
q: got %q, expected %q
"
,
lit
,
lit
,
e
.
lit
)
}
if
tokenclass
(
tok
)
!=
e
.
class
{
t
.
Errorf
(
"bad class for %
s
: got %d, expected %d"
,
lit
,
tokenclass
(
tok
),
e
.
class
)
t
.
Errorf
(
"bad class for %
q
: got %d, expected %d"
,
lit
,
tokenclass
(
tok
),
e
.
class
)
}
epos
.
Offset
+=
len
(
lit
)
+
len
(
whitespace
);
epos
.
Line
+=
NewlineCount
(
lit
)
+
whitespace_linecount
;
...
...
@@ -249,6 +249,160 @@ func TestScan(t *testing.T) {
}
func
getTok
(
_
token
.
Position
,
tok
token
.
Token
,
_
[]
byte
)
token
.
Token
{
return
tok
}
func
checkSemi
(
t
*
testing
.
T
,
line
string
,
mode
uint
)
{
var
S
Scanner
;
S
.
Init
(
"TestSemis"
,
strings
.
Bytes
(
line
),
nil
,
mode
);
pos
,
tok
,
lit
:=
S
.
Scan
();
for
tok
!=
token
.
EOF
{
if
tok
==
token
.
ILLEGAL
{
// next token must be a semicolon
offs
:=
pos
.
Offset
+
1
;
pos
,
tok
,
lit
=
S
.
Scan
();
if
tok
==
token
.
SEMICOLON
{
if
pos
.
Offset
!=
offs
{
t
.
Errorf
(
"bad offset for %q: got %d, expected %d"
,
line
,
pos
.
Offset
,
offs
)
}
if
string
(
lit
)
!=
";"
{
t
.
Errorf
(
`bad literal for %q: got %q, expected ";"`
,
line
,
lit
)
}
}
else
{
t
.
Errorf
(
"bad token for %q: got %s, expected ;"
,
line
,
tok
.
String
())
}
}
else
if
tok
==
token
.
SEMICOLON
{
t
.
Errorf
(
"bad token for %q: got ;, expected no ;"
,
line
)
}
pos
,
tok
,
lit
=
S
.
Scan
();
}
}
var
lines
=
[]
string
{
// the $ character indicates where a semicolon is expected
""
,
"foo$
\n
"
,
"123$
\n
"
,
"1.2$
\n
"
,
"'x'$
\n
"
,
`"x"`
+
"$
\n
"
,
"`x`$
\n
"
,
"+
\n
"
,
"-
\n
"
,
"*
\n
"
,
"/
\n
"
,
"%
\n
"
,
"&
\n
"
,
"|
\n
"
,
"^
\n
"
,
"<<
\n
"
,
">>
\n
"
,
"&^
\n
"
,
"+=
\n
"
,
"-=
\n
"
,
"*=
\n
"
,
"/=
\n
"
,
"%=
\n
"
,
"&=
\n
"
,
"|=
\n
"
,
"^=
\n
"
,
"<<=
\n
"
,
">>=
\n
"
,
"&^=
\n
"
,
"&&
\n
"
,
"||
\n
"
,
"<-
\n
"
,
"++$
\n
"
,
"--$
\n
"
,
"==
\n
"
,
"<
\n
"
,
">
\n
"
,
"=
\n
"
,
"!
\n
"
,
"!=
\n
"
,
"<=
\n
"
,
">=
\n
"
,
":=
\n
"
,
"...
\n
"
,
"(
\n
"
,
"[
\n
"
,
"{
\n
"
,
",
\n
"
,
".
\n
"
,
")$
\n
"
,
"]$
\n
"
,
"}$
\n
"
,
"$;
\n
"
,
":
\n
"
,
"break$
\n
"
,
"case
\n
"
,
"chan
\n
"
,
"const
\n
"
,
"continue$
\n
"
,
"default
\n
"
,
"defer
\n
"
,
"else
\n
"
,
"fallthrough$
\n
"
,
"for
\n
"
,
"func
\n
"
,
"go
\n
"
,
"goto
\n
"
,
"if
\n
"
,
"import
\n
"
,
"interface
\n
"
,
"map
\n
"
,
"package
\n
"
,
"range
\n
"
,
"return$
\n
"
,
"select
\n
"
,
"struct
\n
"
,
"switch
\n
"
,
"type
\n
"
,
"var
\n
"
,
"foo$//comment
\n
"
,
"foo$/*comment*/
\n
"
,
"foo$/*
\n
*/"
,
"foo $// comment
\n
"
,
"foo $/*comment*/
\n
"
,
"foo $/*
\n
*/"
,
// TODO(gri): These need to insert the semicolon *before* the
// first comment which requires arbitrary far look-
// ahead. Only relevant for gofmt placement of
// comments.
"foo /*comment*/ $
\n
"
,
"foo /*0*/ /*1*/ $/*2*/
\n
"
,
}
func
TestSemis
(
t
*
testing
.
T
)
{
for
_
,
line
:=
range
lines
{
checkSemi
(
t
,
line
,
AllowIllegalChars
|
InsertSemis
)
}
for
_
,
line
:=
range
lines
{
checkSemi
(
t
,
line
,
AllowIllegalChars
|
InsertSemis
|
ScanComments
)
}
}
type
seg
struct
{
srcline
string
;
// a line of source text
filename
string
;
// filename for current token
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment