Commit a70caf44 authored by Robert Griesemer's avatar Robert Griesemer

implemented InsertSemis mode for go/scanner

R=rsc
https://golang.org/cl/175047
parent 2b3813d0
...@@ -29,9 +29,11 @@ type Scanner struct { ...@@ -29,9 +29,11 @@ type Scanner struct {
mode uint; // scanning mode mode uint; // scanning mode
// scanning state // scanning state
pos token.Position; // previous reading position (position before ch) pos token.Position; // previous reading position (position before ch)
offset int; // current reading offset (position after ch) offset int; // current reading offset (position after ch)
ch int; // one char look-ahead ch int; // one char look-ahead
insertSemi bool; // insert a semicolon before next newline
pendingComment token.Position; // valid if pendingComment.Line > 0
// public state - ok to modify // public state - ok to modify
ErrorCount int; // number of errors encountered ErrorCount int; // number of errors encountered
...@@ -69,6 +71,7 @@ func (S *Scanner) next() { ...@@ -69,6 +71,7 @@ func (S *Scanner) next() {
const ( const (
ScanComments = 1 << iota; // return comments as COMMENT tokens ScanComments = 1 << iota; // return comments as COMMENT tokens
AllowIllegalChars; // do not report an error for illegal chars AllowIllegalChars; // do not report an error for illegal chars
InsertSemis; // automatically insert semicolons
) )
...@@ -420,6 +423,8 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke ...@@ -420,6 +423,8 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
} }
var semicolon = []byte{';'}
// Scan scans the next token and returns the token position pos, // Scan scans the next token and returns the token position pos,
// the token tok, and the literal text lit corresponding to the // the token tok, and the literal text lit corresponding to the
// token. The source end is indicated by token.EOF. // token. The source end is indicated by token.EOF.
...@@ -432,40 +437,63 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke ...@@ -432,40 +437,63 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
// of the error handler, if there was one installed. // of the error handler, if there was one installed.
// //
func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) { func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) {
scan_again: if S.pendingComment.Line > 0 {
// "consume" pending comment
S.pos = S.pendingComment;
S.offset = S.pos.Offset + 1;
S.ch = '/';
S.pendingComment.Line = 0;
}
scanAgain:
// skip white space // skip white space
for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' || S.ch == '\r' { for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' {
S.next() S.next()
} }
// current token start // current token start
insertSemi := false;
pos, tok = S.pos, token.ILLEGAL; pos, tok = S.pos, token.ILLEGAL;
// determine token value // determine token value
switch ch := S.ch; { switch ch := S.ch; {
case isLetter(ch): case isLetter(ch):
tok = S.scanIdentifier() tok = S.scanIdentifier();
switch tok {
case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
insertSemi = true
default:
insertSemi = false
}
case digitVal(ch) < 10: case digitVal(ch) < 10:
tok = S.scanNumber(false) insertSemi = true;
tok = S.scanNumber(false);
default: default:
S.next(); // always make progress S.next(); // always make progress
switch ch { switch ch {
case -1: case -1:
tok = token.EOF tok = token.EOF
case '\n':
S.insertSemi = false;
return pos, token.SEMICOLON, semicolon;
case '"': case '"':
insertSemi = true;
tok = token.STRING; tok = token.STRING;
S.scanString(pos); S.scanString(pos);
case '\'': case '\'':
insertSemi = true;
tok = token.CHAR; tok = token.CHAR;
S.scanChar(pos); S.scanChar(pos);
case '`': case '`':
insertSemi = true;
tok = token.STRING; tok = token.STRING;
S.scanRawString(pos); S.scanRawString(pos);
case ':': case ':':
tok = S.switch2(token.COLON, token.DEFINE) tok = S.switch2(token.COLON, token.DEFINE)
case '.': case '.':
if digitVal(S.ch) < 10 { if digitVal(S.ch) < 10 {
tok = S.scanNumber(true) insertSemi = true;
tok = S.scanNumber(true);
} else if S.ch == '.' { } else if S.ch == '.' {
S.next(); S.next();
if S.ch == '.' { if S.ch == '.' {
...@@ -482,27 +510,57 @@ scan_again: ...@@ -482,27 +510,57 @@ scan_again:
case '(': case '(':
tok = token.LPAREN tok = token.LPAREN
case ')': case ')':
tok = token.RPAREN insertSemi = true;
tok = token.RPAREN;
case '[': case '[':
tok = token.LBRACK tok = token.LBRACK
case ']': case ']':
tok = token.RBRACK insertSemi = true;
tok = token.RBRACK;
case '{': case '{':
tok = token.LBRACE tok = token.LBRACE
case '}': case '}':
tok = token.RBRACE insertSemi = true;
tok = token.RBRACE;
case '+': case '+':
tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC) tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC);
if tok == token.INC {
insertSemi = true
}
case '-': case '-':
tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC) tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC);
if tok == token.DEC {
insertSemi = true
}
case '*': case '*':
tok = S.switch2(token.MUL, token.MUL_ASSIGN) tok = S.switch2(token.MUL, token.MUL_ASSIGN)
case '/': case '/':
if S.ch == '/' || S.ch == '*' { if S.ch == '/' || S.ch == '*' {
S.scanComment(pos); // comment
tok = token.COMMENT; newline := false;
if S.mode&ScanComments == 0 { if S.insertSemi {
goto scan_again if S.ch == '/' {
// a line comment acts like a newline
newline = true
} else {
// a general comment may act like a newline
S.scanComment(pos);
newline = pos.Line < S.pos.Line;
}
} else {
S.scanComment(pos)
}
if newline {
// insert a semicolon and retain pending comment
S.insertSemi = false;
S.pendingComment = pos;
return pos, token.SEMICOLON, semicolon;
} else if S.mode&ScanComments == 0 {
// skip comment
goto scanAgain
} else {
insertSemi = S.insertSemi; // preserve insertSemi info
tok = token.COMMENT;
} }
} else { } else {
tok = S.switch2(token.QUO, token.QUO_ASSIGN) tok = S.switch2(token.QUO, token.QUO_ASSIGN)
...@@ -537,9 +595,13 @@ scan_again: ...@@ -537,9 +595,13 @@ scan_again:
if S.mode&AllowIllegalChars == 0 { if S.mode&AllowIllegalChars == 0 {
S.error(pos, "illegal character "+charString(ch)) S.error(pos, "illegal character "+charString(ch))
} }
insertSemi = S.insertSemi; // preserve insertSemi info
} }
} }
if S.mode&InsertSemis != 0 {
S.insertSemi = insertSemi
}
return pos, tok, S.src[pos.Offset:S.pos.Offset]; return pos, tok, S.src[pos.Offset:S.pos.Offset];
} }
......
...@@ -225,13 +225,13 @@ func TestScan(t *testing.T) { ...@@ -225,13 +225,13 @@ func TestScan(t *testing.T) {
} }
checkPos(t, lit, pos, epos); checkPos(t, lit, pos, epos);
if tok != e.tok { if tok != e.tok {
t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String()) t.Errorf("bad token for %q: got %s, expected %s", lit, tok.String(), e.tok.String())
} }
if e.tok.IsLiteral() && lit != e.lit { if e.tok.IsLiteral() && lit != e.lit {
t.Errorf("bad literal for %s: got %s, expected %s", lit, lit, e.lit) t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit)
} }
if tokenclass(tok) != e.class { if tokenclass(tok) != e.class {
t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class) t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
} }
epos.Offset += len(lit) + len(whitespace); epos.Offset += len(lit) + len(whitespace);
epos.Line += NewlineCount(lit) + whitespace_linecount; epos.Line += NewlineCount(lit) + whitespace_linecount;
...@@ -249,6 +249,160 @@ func TestScan(t *testing.T) { ...@@ -249,6 +249,160 @@ func TestScan(t *testing.T) {
} }
func getTok(_ token.Position, tok token.Token, _ []byte) token.Token {
return tok
}
func checkSemi(t *testing.T, line string, mode uint) {
var S Scanner;
S.Init("TestSemis", strings.Bytes(line), nil, mode);
pos, tok, lit := S.Scan();
for tok != token.EOF {
if tok == token.ILLEGAL {
// next token must be a semicolon
offs := pos.Offset + 1;
pos, tok, lit = S.Scan();
if tok == token.SEMICOLON {
if pos.Offset != offs {
t.Errorf("bad offset for %q: got %d, expected %d", line, pos.Offset, offs)
}
if string(lit) != ";" {
t.Errorf(`bad literal for %q: got %q, expected ";"`, line, lit)
}
} else {
t.Errorf("bad token for %q: got %s, expected ;", line, tok.String())
}
} else if tok == token.SEMICOLON {
t.Errorf("bad token for %q: got ;, expected no ;", line)
}
pos, tok, lit = S.Scan();
}
}
var lines = []string{
// the $ character indicates where a semicolon is expected
"",
"foo$\n",
"123$\n",
"1.2$\n",
"'x'$\n",
`"x"` + "$\n",
"`x`$\n",
"+\n",
"-\n",
"*\n",
"/\n",
"%\n",
"&\n",
"|\n",
"^\n",
"<<\n",
">>\n",
"&^\n",
"+=\n",
"-=\n",
"*=\n",
"/=\n",
"%=\n",
"&=\n",
"|=\n",
"^=\n",
"<<=\n",
">>=\n",
"&^=\n",
"&&\n",
"||\n",
"<-\n",
"++$\n",
"--$\n",
"==\n",
"<\n",
">\n",
"=\n",
"!\n",
"!=\n",
"<=\n",
">=\n",
":=\n",
"...\n",
"(\n",
"[\n",
"{\n",
",\n",
".\n",
")$\n",
"]$\n",
"}$\n",
"$;\n",
":\n",
"break$\n",
"case\n",
"chan\n",
"const\n",
"continue$\n",
"default\n",
"defer\n",
"else\n",
"fallthrough$\n",
"for\n",
"func\n",
"go\n",
"goto\n",
"if\n",
"import\n",
"interface\n",
"map\n",
"package\n",
"range\n",
"return$\n",
"select\n",
"struct\n",
"switch\n",
"type\n",
"var\n",
"foo$//comment\n",
"foo$/*comment*/\n",
"foo$/*\n*/",
"foo $// comment\n",
"foo $/*comment*/\n",
"foo $/*\n*/",
// TODO(gri): These need to insert the semicolon *before* the
// first comment which requires arbitrary far look-
// ahead. Only relevant for gofmt placement of
// comments.
"foo /*comment*/ $\n",
"foo /*0*/ /*1*/ $/*2*/\n",
}
func TestSemis(t *testing.T) {
for _, line := range lines {
checkSemi(t, line, AllowIllegalChars|InsertSemis)
}
for _, line := range lines {
checkSemi(t, line, AllowIllegalChars|InsertSemis|ScanComments)
}
}
type seg struct { type seg struct {
srcline string; // a line of source text srcline string; // a line of source text
filename string; // filename for current token filename string; // filename for current token
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment