Commit b8906889 authored by Robert Griesemer's avatar Robert Griesemer

cmd/compile/internal/syntax: implement comment reporting in scanner

R=go1.11

In order to collect comments in the AST and for error testing purposes,
the scanner needs to not only recognize and skip comments, but also be
able to report them if so desired. This change adds a mode flag to the
scanner's init function which controls the scanner behavior around
comments.

In the common case where comments are not needed, there must be no
significant overhead. Thus, comments are reported via a handler upcall
rather than being returned as a _Comment token (which the parser would
have to filter out with every scanner.next() call).

Because the handlers for error messages, directives, and comments all
look the same (they take a position and text), and because directives
look like comments, and errors never start with a '/', this change
simplifies the scanner's init call to only take one (error) handler
instead of 2 or 3 different handlers with identical signature. It is
trivial in the handler to determine if we have an error, directive,
or general comment.

Finally, because directives are comments, when reporting directives
the full comment text is returned now rather than just the directive
text. This simplifies the implementation and makes the scanner API
more regular. Furthermore, it provides important information about
the comment style used by a directive, which may matter eventually
when we fully implement /*line file:line:col*/ directives.

Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c
Reviewed-on: https://go-review.googlesource.com/88215Reviewed-by: 's avatarMatthew Dempsky <mdempsky@google.com>
parent 67049482
...@@ -38,24 +38,31 @@ func (p *parser) init(base *src.PosBase, r io.Reader, errh ErrorHandler, pragh P ...@@ -38,24 +38,31 @@ func (p *parser) init(base *src.PosBase, r io.Reader, errh ErrorHandler, pragh P
p.mode = mode p.mode = mode
p.scanner.init( p.scanner.init(
r, r,
// Error and pragma handlers for scanner. // Error and directive handler for scanner.
// Because the (line, col) positions passed to these // Because the (line, col) positions passed to the
// handlers are always at or after the current reading // handler is always at or after the current reading
// position, it is save to use the most recent position // position, it is safe to use the most recent position
// base to compute the corresponding Pos value. // base to compute the corresponding Pos value.
func(line, col uint, msg string) { func(line, col uint, msg string) {
p.error_at(p.pos_at(line, col), msg) if msg[0] != '/' {
}, p.error_at(p.pos_at(line, col), msg)
func(line, col uint, text string) {
const prefix = "line "
if strings.HasPrefix(text, prefix) {
p.updateBase(line, col+uint(len(prefix)), text[len(prefix):])
return return
} }
if pragh != nil {
// otherwise it must be a comment containing a line or go: directive
text := commentText(msg)
col += 2 // text starts after // or /*
if strings.HasPrefix(text, "line ") {
p.updateBase(line, col+5, text[5:])
return
}
// go: directive (but be conservative and test)
if pragh != nil && strings.HasPrefix(text, "go:") {
p.pragma |= pragh(p.pos_at(line, col), text) p.pragma |= pragh(p.pos_at(line, col), text)
} }
}, },
directives,
) )
p.first = nil p.first = nil
...@@ -109,6 +116,20 @@ func (p *parser) updateBase(line, col uint, text string) { ...@@ -109,6 +116,20 @@ func (p *parser) updateBase(line, col uint, text string) {
p.base = src.NewLinePragmaBase(src.MakePos(p.base.Pos().Base(), line, col), filename, absFilename, uint(n) /*uint(n2)*/) p.base = src.NewLinePragmaBase(src.MakePos(p.base.Pos().Base(), line, col), filename, absFilename, uint(n) /*uint(n2)*/)
} }
func commentText(s string) string {
if s[:2] == "/*" {
return s[2 : len(s)-2] // lop off /* and */
}
// line comment (does not include newline)
// (on Windows, the line comment may end in \r\n)
i := len(s)
if s[i-1] == '\r' {
i--
}
return s[2:i] // lop off // and \r at end, if any
}
func trailingDigits(text string) (uint, uint, bool) { func trailingDigits(text string) (uint, uint, bool) {
// Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails. // Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':') i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')
......
...@@ -19,9 +19,17 @@ import ( ...@@ -19,9 +19,17 @@ import (
"unicode/utf8" "unicode/utf8"
) )
// The mode flags below control which comments are reported
// by calling the error handler. If no flag is set, comments
// are ignored.
const (
comments uint = 1 << iota // call handler for all comments
directives // call handler for directives only
)
type scanner struct { type scanner struct {
source source
pragh func(line, col uint, msg string) mode uint
nlsemi bool // if set '\n' and EOF translate to ';' nlsemi bool // if set '\n' and EOF translate to ';'
// current token, valid after calling next() // current token, valid after calling next()
...@@ -33,25 +41,32 @@ type scanner struct { ...@@ -33,25 +41,32 @@ type scanner struct {
prec int // valid if tok is _Operator, _AssignOp, or _IncOp prec int // valid if tok is _Operator, _AssignOp, or _IncOp
} }
func (s *scanner) init(src io.Reader, errh, pragh func(line, col uint, msg string)) { func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mode uint) {
s.source.init(src, errh) s.source.init(src, errh)
s.pragh = pragh s.mode = mode
s.nlsemi = false s.nlsemi = false
} }
// next advances the scanner by reading the next token. // next advances the scanner by reading the next token.
// //
// If a read, source encoding, or lexical error occurs, next // If a read, source encoding, or lexical error occurs, next calls
// calls the error handler installed with init. The handler // the installed error handler with the respective error position
// must exist. // and message. The error message is guaranteed to be non-empty and
// never starts with a '/'. The error handler must exist.
//
// If the scanner mode includes the comments flag and a comment
// (including comments containing directives) is encountered, the
// error handler is also called with each comment position and text
// (including opening /* or // and closing */, but without a newline
// at the end of line comments). Comment text always starts with a /
// which can be used to distinguish these handler calls from errors.
// //
// If a //line or //go: directive is encountered at the start // If the scanner mode includes the directives (but not the comments)
// of a line, next calls the directive handler pragh installed // flag, only comments containing a //line, /*line, or //go: directive
// with init, if not nil. // are reported, in the same way as regular comments. Directives in
// //-style comments are only recognized if they are at the beginning
// of a line.
// //
// The (line, col) position passed to the error and directive
// handler is always at or after the current source reading
// position.
func (s *scanner) next() { func (s *scanner) next() {
nlsemi := s.nlsemi nlsemi := s.nlsemi
s.nlsemi = false s.nlsemi = false
...@@ -565,6 +580,10 @@ func (s *scanner) rawString() { ...@@ -565,6 +580,10 @@ func (s *scanner) rawString() {
s.tok = _Literal s.tok = _Literal
} }
func (s *scanner) comment(text string) {
s.errh(s.line, s.col, text)
}
func (s *scanner) skipLine(r rune) { func (s *scanner) skipLine(r rune) {
for r >= 0 { for r >= 0 {
if r == '\n' { if r == '\n' {
...@@ -578,14 +597,20 @@ func (s *scanner) skipLine(r rune) { ...@@ -578,14 +597,20 @@ func (s *scanner) skipLine(r rune) {
func (s *scanner) lineComment() { func (s *scanner) lineComment() {
r := s.getr() r := s.getr()
if s.mode&comments != 0 {
s.startLit()
s.skipLine(r)
s.comment("//" + string(s.stopLit()))
return
}
// directives must start at the beginning of the line (s.col == colbase) // directives must start at the beginning of the line (s.col == colbase)
if s.col != colbase || s.pragh == nil || (r != 'g' && r != 'l') { if s.mode&directives == 0 || s.col != colbase || (r != 'g' && r != 'l') {
s.skipLine(r) s.skipLine(r)
return return
} }
// s.col == colbase && s.pragh != nil && (r == 'g' || r == 'l')
// recognize directives // recognize go: or line directives
prefix := "go:" prefix := "go:"
if r == 'l' { if r == 'l' {
prefix = "line " prefix = "line "
...@@ -598,38 +623,43 @@ func (s *scanner) lineComment() { ...@@ -598,38 +623,43 @@ func (s *scanner) lineComment() {
r = s.getr() r = s.getr()
} }
// directive text without line ending (which may be "\r\n" if Windows), // directive text
s.startLit() s.startLit()
s.skipLine(r) s.skipLine(r)
text := s.stopLit() s.comment("//" + prefix + string(s.stopLit()))
if i := len(text) - 1; i >= 0 && text[i] == '\r' {
text = text[:i]
}
s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since directive text starts after //
} }
func (s *scanner) skipComment(r rune) { func (s *scanner) skipComment(r rune) bool {
for r >= 0 { for r >= 0 {
for r == '*' { for r == '*' {
r = s.getr() r = s.getr()
if r == '/' { if r == '/' {
return return true
} }
} }
r = s.getr() r = s.getr()
} }
s.errh(s.line, s.col, "comment not terminated") s.errh(s.line, s.col, "comment not terminated")
return false
} }
func (s *scanner) fullComment() { func (s *scanner) fullComment() {
r := s.getr() r := s.getr()
if s.pragh == nil || r != 'l' { if s.mode&comments != 0 {
s.startLit()
if s.skipComment(r) {
s.comment("/*" + string(s.stopLit()))
} else {
s.killLit() // not a complete comment - ignore
}
return
}
if s.mode&directives == 0 || r != 'l' {
s.skipComment(r) s.skipComment(r)
return return
} }
// s.pragh != nil && r == 'l'
// recognize line directive // recognize line directive
const prefix = "line " const prefix = "line "
...@@ -641,15 +671,13 @@ func (s *scanner) fullComment() { ...@@ -641,15 +671,13 @@ func (s *scanner) fullComment() {
r = s.getr() r = s.getr()
} }
// directive text without comment ending // directive text
s.startLit() s.startLit()
s.skipComment(r) if s.skipComment(r) {
text := s.stopLit() s.comment("/*" + prefix + string(s.stopLit()))
if i := len(text) - 2; i >= 0 && text[i] == '*' && text[i+1] == '/' { } else {
text = text[:i] s.killLit() // not a complete comment - ignore
} }
s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since directive text starts after /*
} }
func (s *scanner) escape(quote rune) bool { func (s *scanner) escape(quote rune) bool {
......
...@@ -24,7 +24,7 @@ func TestScanner(t *testing.T) { ...@@ -24,7 +24,7 @@ func TestScanner(t *testing.T) {
defer src.Close() defer src.Close()
var s scanner var s scanner
s.init(src, nil, nil) s.init(src, nil, 0)
for { for {
s.next() s.next()
if s.tok == _EOF { if s.tok == _EOF {
...@@ -53,7 +53,7 @@ func TestTokens(t *testing.T) { ...@@ -53,7 +53,7 @@ func TestTokens(t *testing.T) {
// scan source // scan source
var got scanner var got scanner
got.init(&buf, nil, nil) got.init(&buf, nil, 0)
got.next() got.next()
for i, want := range sampleTokens { for i, want := range sampleTokens {
nlsemi := false nlsemi := false
...@@ -263,6 +263,66 @@ var sampleTokens = [...]struct { ...@@ -263,6 +263,66 @@ var sampleTokens = [...]struct {
{_Var, "var", 0, 0}, {_Var, "var", 0, 0},
} }
func TestComments(t *testing.T) {
type comment struct {
line, col uint // 0-based
text string
}
for _, test := range []struct {
src string
want comment
}{
// no comments
{"no comment here", comment{0, 0, ""}},
{" /", comment{0, 0, ""}},
{"\n /*/", comment{0, 0, ""}},
//-style comments
{"// line comment\n", comment{0, 0, "// line comment"}},
{"package p // line comment\n", comment{0, 10, "// line comment"}},
{"//\n//\n\t// want this one\r\n", comment{2, 1, "// want this one\r"}},
{"\n\n//\n", comment{2, 0, "//"}},
{"//", comment{0, 0, "//"}},
/*-style comments */
{"/* regular comment */", comment{0, 0, "/* regular comment */"}},
{"package p /* regular comment", comment{0, 0, ""}},
{"\n\n\n/*\n*//* want this one */", comment{4, 2, "/* want this one */"}},
{"\n\n/**/", comment{2, 0, "/**/"}},
{"/*", comment{0, 0, ""}},
} {
var s scanner
var got comment
s.init(strings.NewReader(test.src),
func(line, col uint, msg string) {
if msg[0] != '/' {
// error
if msg != "comment not terminated" {
t.Errorf("%q: %s", test.src, msg)
}
return
}
got = comment{line - linebase, col - colbase, msg} // keep last one
}, comments)
for {
s.next()
if s.tok == _EOF {
break
}
}
want := test.want
if got.line != want.line || got.col != want.col {
t.Errorf("%q: got position %d:%d; want %d:%d", test.src, got.line, got.col, want.line, want.col)
}
if got.text != want.text {
t.Errorf("%q: got %q; want %q", test.src, got.text, want.text)
}
}
}
func TestScanErrors(t *testing.T) { func TestScanErrors(t *testing.T) {
for _, test := range []struct { for _, test := range []struct {
src, msg string src, msg string
...@@ -354,7 +414,7 @@ func TestScanErrors(t *testing.T) { ...@@ -354,7 +414,7 @@ func TestScanErrors(t *testing.T) {
// TODO(gri) make this use position info // TODO(gri) make this use position info
t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line) t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
} }
}, nil) }, 0)
for { for {
s.next() s.next()
...@@ -373,7 +433,7 @@ func TestIssue21938(t *testing.T) { ...@@ -373,7 +433,7 @@ func TestIssue21938(t *testing.T) {
s := "/*" + strings.Repeat(" ", 4089) + "*/ .5" s := "/*" + strings.Repeat(" ", 4089) + "*/ .5"
var got scanner var got scanner
got.init(strings.NewReader(s), nil, nil) got.init(strings.NewReader(s), nil, 0)
got.next() got.next()
if got.tok != _Literal || got.lit != ".5" { if got.tok != _Literal || got.lit != ".5" {
......
...@@ -124,7 +124,8 @@ redo: ...@@ -124,7 +124,8 @@ redo:
// EOF // EOF
if s.r == s.w { if s.r == s.w {
if s.ioerr != io.EOF { if s.ioerr != io.EOF {
s.error(s.ioerr.Error()) // ensure we never start with a '/' (e.g., rooted path) in the error message
s.error("I/O error: " + s.ioerr.Error())
} }
return -1 return -1
} }
...@@ -201,6 +202,10 @@ func (s *source) stopLit() []byte { ...@@ -201,6 +202,10 @@ func (s *source) stopLit() []byte {
if len(s.lit) > 0 { if len(s.lit) > 0 {
lit = append(s.lit, lit...) lit = append(s.lit, lit...)
} }
s.suf = -1 // no pending literal s.killLit()
return lit return lit
} }
func (s *source) killLit() {
s.suf = -1 // no pending literal
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment