Commit a70caf44 authored by Robert Griesemer's avatar Robert Griesemer

implemented InsertSemis mode for go/scanner

R=rsc
https://golang.org/cl/175047
parent 2b3813d0
......@@ -32,6 +32,8 @@ type Scanner struct {
pos token.Position; // previous reading position (position before ch)
offset int; // current reading offset (position after ch)
ch int; // one char look-ahead
insertSemi bool; // insert a semicolon before next newline
pendingComment token.Position; // valid if pendingComment.Line > 0
// public state - ok to modify
ErrorCount int; // number of errors encountered
......@@ -69,6 +71,7 @@ func (S *Scanner) next() {
const (
ScanComments = 1 << iota; // return comments as COMMENT tokens
AllowIllegalChars; // do not report an error for illegal chars
InsertSemis; // automatically insert semicolons
)
......@@ -420,6 +423,8 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
}
var semicolon = []byte{';'}
// Scan scans the next token and returns the token position pos,
// the token tok, and the literal text lit corresponding to the
// token. The source end is indicated by token.EOF.
......@@ -432,40 +437,63 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
// of the error handler, if there was one installed.
//
func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) {
scan_again:
if S.pendingComment.Line > 0 {
// "consume" pending comment
S.pos = S.pendingComment;
S.offset = S.pos.Offset + 1;
S.ch = '/';
S.pendingComment.Line = 0;
}
scanAgain:
// skip white space
for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' || S.ch == '\r' {
for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' {
S.next()
}
// current token start
insertSemi := false;
pos, tok = S.pos, token.ILLEGAL;
// determine token value
switch ch := S.ch; {
case isLetter(ch):
tok = S.scanIdentifier()
tok = S.scanIdentifier();
switch tok {
case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
insertSemi = true
default:
insertSemi = false
}
case digitVal(ch) < 10:
tok = S.scanNumber(false)
insertSemi = true;
tok = S.scanNumber(false);
default:
S.next(); // always make progress
switch ch {
case -1:
tok = token.EOF
case '\n':
S.insertSemi = false;
return pos, token.SEMICOLON, semicolon;
case '"':
insertSemi = true;
tok = token.STRING;
S.scanString(pos);
case '\'':
insertSemi = true;
tok = token.CHAR;
S.scanChar(pos);
case '`':
insertSemi = true;
tok = token.STRING;
S.scanRawString(pos);
case ':':
tok = S.switch2(token.COLON, token.DEFINE)
case '.':
if digitVal(S.ch) < 10 {
tok = S.scanNumber(true)
insertSemi = true;
tok = S.scanNumber(true);
} else if S.ch == '.' {
S.next();
if S.ch == '.' {
......@@ -482,27 +510,57 @@ scan_again:
case '(':
tok = token.LPAREN
case ')':
tok = token.RPAREN
insertSemi = true;
tok = token.RPAREN;
case '[':
tok = token.LBRACK
case ']':
tok = token.RBRACK
insertSemi = true;
tok = token.RBRACK;
case '{':
tok = token.LBRACE
case '}':
tok = token.RBRACE
insertSemi = true;
tok = token.RBRACE;
case '+':
tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)
tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC);
if tok == token.INC {
insertSemi = true
}
case '-':
tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)
tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC);
if tok == token.DEC {
insertSemi = true
}
case '*':
tok = S.switch2(token.MUL, token.MUL_ASSIGN)
case '/':
if S.ch == '/' || S.ch == '*' {
// comment
newline := false;
if S.insertSemi {
if S.ch == '/' {
// a line comment acts like a newline
newline = true
} else {
// a general comment may act like a newline
S.scanComment(pos);
newline = pos.Line < S.pos.Line;
}
} else {
S.scanComment(pos)
}
if newline {
// insert a semicolon and retain pending comment
S.insertSemi = false;
S.pendingComment = pos;
return pos, token.SEMICOLON, semicolon;
} else if S.mode&ScanComments == 0 {
// skip comment
goto scanAgain
} else {
insertSemi = S.insertSemi; // preserve insertSemi info
tok = token.COMMENT;
if S.mode&ScanComments == 0 {
goto scan_again
}
} else {
tok = S.switch2(token.QUO, token.QUO_ASSIGN)
......@@ -537,9 +595,13 @@ scan_again:
if S.mode&AllowIllegalChars == 0 {
S.error(pos, "illegal character "+charString(ch))
}
insertSemi = S.insertSemi; // preserve insertSemi info
}
}
if S.mode&InsertSemis != 0 {
S.insertSemi = insertSemi
}
return pos, tok, S.src[pos.Offset:S.pos.Offset];
}
......
......@@ -225,13 +225,13 @@ func TestScan(t *testing.T) {
}
checkPos(t, lit, pos, epos);
if tok != e.tok {
t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String())
t.Errorf("bad token for %q: got %s, expected %s", lit, tok.String(), e.tok.String())
}
if e.tok.IsLiteral() && lit != e.lit {
t.Errorf("bad literal for %s: got %s, expected %s", lit, lit, e.lit)
t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit)
}
if tokenclass(tok) != e.class {
t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class)
t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
}
epos.Offset += len(lit) + len(whitespace);
epos.Line += NewlineCount(lit) + whitespace_linecount;
......@@ -249,6 +249,160 @@ func TestScan(t *testing.T) {
}
func getTok(_ token.Position, tok token.Token, _ []byte) token.Token {
return tok
}
func checkSemi(t *testing.T, line string, mode uint) {
var S Scanner;
S.Init("TestSemis", strings.Bytes(line), nil, mode);
pos, tok, lit := S.Scan();
for tok != token.EOF {
if tok == token.ILLEGAL {
// next token must be a semicolon
offs := pos.Offset + 1;
pos, tok, lit = S.Scan();
if tok == token.SEMICOLON {
if pos.Offset != offs {
t.Errorf("bad offset for %q: got %d, expected %d", line, pos.Offset, offs)
}
if string(lit) != ";" {
t.Errorf(`bad literal for %q: got %q, expected ";"`, line, lit)
}
} else {
t.Errorf("bad token for %q: got %s, expected ;", line, tok.String())
}
} else if tok == token.SEMICOLON {
t.Errorf("bad token for %q: got ;, expected no ;", line)
}
pos, tok, lit = S.Scan();
}
}
var lines = []string{
// the $ character indicates where a semicolon is expected
"",
"foo$\n",
"123$\n",
"1.2$\n",
"'x'$\n",
`"x"` + "$\n",
"`x`$\n",
"+\n",
"-\n",
"*\n",
"/\n",
"%\n",
"&\n",
"|\n",
"^\n",
"<<\n",
">>\n",
"&^\n",
"+=\n",
"-=\n",
"*=\n",
"/=\n",
"%=\n",
"&=\n",
"|=\n",
"^=\n",
"<<=\n",
">>=\n",
"&^=\n",
"&&\n",
"||\n",
"<-\n",
"++$\n",
"--$\n",
"==\n",
"<\n",
">\n",
"=\n",
"!\n",
"!=\n",
"<=\n",
">=\n",
":=\n",
"...\n",
"(\n",
"[\n",
"{\n",
",\n",
".\n",
")$\n",
"]$\n",
"}$\n",
"$;\n",
":\n",
"break$\n",
"case\n",
"chan\n",
"const\n",
"continue$\n",
"default\n",
"defer\n",
"else\n",
"fallthrough$\n",
"for\n",
"func\n",
"go\n",
"goto\n",
"if\n",
"import\n",
"interface\n",
"map\n",
"package\n",
"range\n",
"return$\n",
"select\n",
"struct\n",
"switch\n",
"type\n",
"var\n",
"foo$//comment\n",
"foo$/*comment*/\n",
"foo$/*\n*/",
"foo $// comment\n",
"foo $/*comment*/\n",
"foo $/*\n*/",
// TODO(gri): These need to insert the semicolon *before* the
// first comment which requires arbitrary far look-
// ahead. Only relevant for gofmt placement of
// comments.
"foo /*comment*/ $\n",
"foo /*0*/ /*1*/ $/*2*/\n",
}
func TestSemis(t *testing.T) {
for _, line := range lines {
checkSemi(t, line, AllowIllegalChars|InsertSemis)
}
for _, line := range lines {
checkSemi(t, line, AllowIllegalChars|InsertSemis|ScanComments)
}
}
type seg struct {
srcline string; // a line of source text
filename string; // filename for current token
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment