Intersperse comments nicely when printing an AST.

gofmt formatted source code looks pretty good already; with a bit more fine-tuning it should be great. printer.go: - Implemented comment intersperse algorithm. The approach is a result of many trial-and-error experiments but at this point reasonably simple and open to arbitrary fine-tuning. parser.go: - Simplified handling of lead and line comments (formerly called leading and trailing comments). - Use a comments list instead of an array (I may change this back - this is not obviously better and uses more space). doc.go: - Remove comments from AST nodes that have been 'consumed' in the documentation to avoid duplicate printing of them. Allows for better control of what is printed w/o use of printing control flags (which are hard to use and not fine-grained enough). Corresponding adjustments to various clients of these files. R=rsc DELTA=478 (275 added, 108 deleted, 95 changed) OCL=32185 CL=32380

Intersperse comments nicely when printing an AST.
gofmt formatted source code looks pretty good already; with a bit more fine-tuning it should be great. printer.go: - Implemented comment intersperse algorithm. The approach is a result of many trial-and-error experiments but at this point reasonably simple and open to arbitrary fine-tuning. parser.go: - Simplified handling of lead and line comments (formerly called leading and trailing comments). - Use a comments list instead of an array (I may change this back - this is not obviously better and uses more space). doc.go: - Remove comments from AST nodes that have been 'consumed' in the documentation to avoid duplicate printing of them. Allows for better control of what is printed w/o use of printing control flags (which are hard to use and not fine-grained enough). Corresponding adjustments to various clients of these files. R=rsc DELTA=478 (275 added, 108 deleted, 95 changed) OCL=32185 CL=32380
2210a643 · Robert Griesemer · f6258511 · 2210a643 · 2210a643 · 2210a643
Commit 2210a643 authored Jul 28, 2009 by Robert Griesemer
12 changed files
--- a/src/cmd/godoc/godoc.go
+++ b/src/cmd/godoc/godoc.go
@@ -207,11 +207,7 @@ func parse(path string, mode uint) (*ast.File, *parseErrors) {
 func nodeText(node interface{}) []byte {
 	var buf bytes.Buffer;
 	tw := makeTabwriter(&buf);
-	mode := uint(0);
-	if _, isProgram := node.(*ast.File); isProgram {
-		mode = printer.DocComments;
-	}
-	printer.Fprint(tw, node, mode);
+	printer.Fprint(tw, node, 0);
 	tw.Flush();
 	return buf.Data();
 }

--- a/src/cmd/gofmt/gofmt.go
+++ b/src/cmd/gofmt/gofmt.go
@@ -28,6 +28,7 @@ var (

 	// operation modes
 	allgo = flag.Bool("a", false, "include all .go files for package");
+	comments = flag.Bool("c", false, "omit comments");
 	silent = flag.Bool("s", false, "silent mode: parsing only");
 	verbose = flag.Bool("v", false, "verbose mode: trace parsing");
 	exports = flag.Bool("x", false, "show exports only");
@@ -48,7 +49,10 @@ func usage() {


 func parserMode() uint {
-	mode := parser.ParseComments;
+	mode := uint(0);
+	if !*comments {
+		mode |= parser.ParseComments;
+	}
 	if *verbose {
 		mode |= parser.Trace;
 	}
@@ -99,7 +103,7 @@ func getPackage(path string) (*ast.Package, os.Error) {


 func printerMode() uint {
-	mode := printer.DocComments;
+	mode := uint(0);
 	if *optcommas {
 		mode |= printer.OptCommas;
 	}

--- a/src/cmd/gofmt/test.sh
+++ b/src/cmd/gofmt/test.sh
@@ -30,11 +30,15 @@ apply1() {
 	#echo $1 $2
 	case `basename $F` in
 	# files with errors (skip them)
-	# the following have semantic errors: bug039.go | bug040.go
+	# the following have semantic errors:
+	#   bug039.go | bug040.go
+	# the following are not idempotent at the moment because of comment formatting:
+	comment.go | net.go | powser1.go | powser2.go | bug052.go | simpbool.go | "shift.go" | range.go | \
+	\
 	test_errors.go | calc.go | method1.go | selftest1.go | func3.go | const2.go | \
 	bug014.go | bug025.go | bug029.go | bug032.go | bug039.go | bug040.go | bug050.go |  bug068.go | \
 	bug088.go | bug083.go | bug106.go | bug121.go | bug125.go | bug126.go | bug132.go | bug133.go | \
-	bug134.go | bug160.go | bug163.go | bug166.go ) ;;
+	bug134.go | bug160.go | bug163.go | bug166.go | bug169.go ) ;;
 	* ) $1 $2; count $F;;
 	esac
 }

--- a/src/pkg/go/ast/ast.go
+++ b/src/pkg/go/ast/ast.go
@@ -89,12 +89,12 @@ type Comment struct {
 }


-// A CommentGroup represents a sequence of single comments
+// A CommentGroup represents a sequence of comments
 // with no other tokens and no empty lines between.
 //
 type CommentGroup struct {
 	List []*Comment;
-	EndLine int;  // line where the last comment in the group ends
+	Next *CommentGroup;  // next comment group in source order
 }


@@ -116,7 +116,7 @@ type (
 		Names []*Ident;  // field/method/parameter names; nil if anonymous field
 		Type Expr;  // field/method/parameter type
 		Tag []*StringLit;  // field tag; or nil
-		Comment *CommentGroup;  // trailing comments on same line; or nil
+		Comment *CommentGroup;  // line comments; or nil
 	};
 )

@@ -675,7 +675,7 @@ type (
 		Doc *CommentGroup;  // associated documentation; or nil
 		Name *Ident;  // local package name (including "."); or nil
 		Path []*StringLit;  // package path
-		Comment *CommentGroup;  // trailing comments on same line; or nil
+		Comment *CommentGroup;  // line comments; or nil
 	};

 	// A ValueSpec node represents a constant or variable declaration
@@ -685,7 +685,7 @@ type (
 		Names []*Ident;  // value names
 		Type Expr;  // value type; or nil
 		Values []Expr;  // initial values; or nil
-		Comment *CommentGroup;  // trailing comments on same line; or nil
+		Comment *CommentGroup;  // line comments; or nil
 	};

 	// A TypeSpec node represents a type declaration (TypeSpec production).
@@ -693,7 +693,7 @@ type (
 		Doc *CommentGroup;  // associated documentation; or nil
 		Name *Ident;  // type name
 		Type Expr;
-		Comment *CommentGroup;  // trailing comments on same line; or nil
+		Comment *CommentGroup;  // line comments; or nil
 	};
 )

@@ -773,7 +773,7 @@ type File struct {
 	token.Position;  // position of "package" keyword
 	Name *Ident;  // package name
 	Decls []Decl;  // top-level declarations
-	Comments []*CommentGroup;  // list of unassociated comments
+	Comments *CommentGroup;  // list of all comments in the source file
 }



--- a/src/pkg/go/ast/filter.go
+++ b/src/pkg/go/ast/filter.go
@@ -169,7 +169,7 @@ func filterDecl(decl Decl) bool {


 // FilterExports trims an AST in place such that only exported nodes remain:
-// all top-level identififiers which are not exported and their associated
+// all top-level identifiers which are not exported and their associated
 // information (such as type, initial value, or function body) are removed.
 // Non-exported fields and methods of exported types are stripped, and the
 // function bodies of exported functions are set to nil.

--- a/src/pkg/go/doc/doc.go
+++ b/src/pkg/go/doc/doc.go
@@ -27,6 +27,11 @@ type typeDoc struct {


 // DocReader accumulates documentation for a single package.
+// It modifies the AST: Comments (declaration documentation)
+// that have been collected by the DocReader are set to nil
+// in the respective AST nodes so that they are not printed
+// twice (once when printing the documentation and once when
+// printing the corresponding AST node).
 //
 type DocReader struct {
 	name string;  // package name
@@ -151,8 +156,8 @@ func (doc *DocReader) addDecl(decl ast.Decl) {
 					// makeTypeDocs below). Simpler data structures, but
 					// would lose GenDecl documentation if the TypeSpec
 					// has documentation as well.
-					s := spec.(*ast.TypeSpec);
-					doc.addType(&ast.GenDecl{d.Doc, d.Pos(), token.TYPE, noPos, []ast.Spec{s}, noPos});
+					doc.addType(&ast.GenDecl{d.Doc, d.Pos(), token.TYPE, noPos, []ast.Spec{spec}, noPos});
+					// A new GenDecl node is created, no need to nil out d.Doc.
 				}
 			case token.VAR:
 				// variables are always handled as a group
@@ -197,7 +202,8 @@ func (doc *DocReader) AddFile(src *ast.File) {
 	// add package documentation
 	// TODO(gri) what to do if there are multiple files?
 	if src.Doc != nil {
-		doc.doc = src.Doc
+		doc.doc = src.Doc;
+		src.Doc = nil;  // doc consumed - remove from ast.File node
 	}

 	// add all declarations
@@ -206,7 +212,7 @@ func (doc *DocReader) AddFile(src *ast.File) {
 	}

 	// collect BUG(...) comments
-	for _, c := range src.Comments {
+	for c := src.Comments; c != nil; c = c.Next {
 		text := c.List[0].Text;
 		cstr := string(text);
 		if m := bug_markers.Execute(cstr); len(m) > 0 {
@@ -215,10 +221,11 @@ func (doc *DocReader) AddFile(src *ast.File) {
 				// non-empty BUG comment; collect comment without BUG prefix
 				list := copyCommentList(c.List);
 				list[0].Text = text[m[1] : len(text)];
-				doc.bugs.Push(&ast.CommentGroup{list, c.EndLine});
+				doc.bugs.Push(&ast.CommentGroup{list, nil});
 			}
 		}
 	}
+	src.Comments = nil;  // consumed unassociated comments - remove from ast.File node
 }

 // ----------------------------------------------------------------------------
@@ -282,6 +289,7 @@ func makeValueDocs(v *vector.Vector) []*ValueDoc {
 	for i := range d {
 		decl := v.At(i).(*ast.GenDecl);
 		d[i] = &ValueDoc{astComment(decl.Doc), decl, i};
+		decl.Doc = nil;  // doc consumed - removed from AST
 	}
 	sort.Sort(sortValueDoc(d));
 	return d;
@@ -310,6 +318,7 @@ func makeFuncDocs(m map[string] *ast.FuncDecl) []*FuncDoc {
 	for _, f := range m {
 		doc := new(FuncDoc);
 		doc.Doc = astComment(f.Doc);
+		f.Doc = nil;  // doc consumed - remove from ast.FuncDecl node
 		if f.Recv != nil {
 			doc.Recv = f.Recv.Type;
 		}
@@ -359,10 +368,12 @@ func makeTypeDocs(m map[string] *typeDoc) []*TypeDoc {
 		typespec := old.decl.Specs[0].(*ast.TypeSpec);
 		t := new(TypeDoc);
 		doc := typespec.Doc;
+		typespec.Doc = nil;  // doc consumed - remove from ast.TypeSpec node
 		if doc == nil {
 			// no doc associated with the spec, use the declaration doc, if any
 			doc = old.decl.Doc;
 		}
+		old.decl.Doc = nil;  // doc consumed - remove from ast.Decl node
 		t.Doc = astComment(doc);
 		t.Type = typespec;
 		t.Factories = makeFuncDocs(old.factories);

--- a/src/pkg/go/parser/parser.go
+++ b/src/pkg/go/parser/parser.go
@@ -22,17 +22,6 @@ import (
 )


-// Names to index the parser's commentIndex array.
-const (
-	leading = iota;  // index of the leading comments entry
-	trailing;  // index of the trailing comments entry
-)
-
-
-// Initial value for parser.commentsIndex.
-var noIndex = [2]int{-1, -1};
-
-
 // noPos is used when there is no corresponding source position for a token.
 var noPos token.Position;

@@ -60,8 +49,10 @@ type parser struct {
 	indent uint;  // indentation used for tracing output

 	// Comments
-	comments vector.Vector;  // list of collected, unassociated comment groups
-	commentsIndex [2]int;  // comments indexes of last leading/trailing comment group; or -1
+	comments *ast.CommentGroup;  // list of collected comments
+	lastComment *ast.CommentGroup;  // last comment in the comments list
+	leadComment *ast.CommentGroup;  // the last lead comment
+	lineComment *ast.CommentGroup;  // the last line comment

 	// Next token
 	pos token.Position;  // token position
@@ -90,8 +81,6 @@ func (p *parser) init(filename string, src []byte, mode uint) {
 	p.scanner.Init(filename, src, p, scannerMode(mode));
 	p.mode = mode;
 	p.trace = mode & Trace != 0;  // for convenience (p.trace is used frequently)
-	p.comments.Init(0);
-	p.commentsIndex = noIndex;
 	p.next();
 }

@@ -190,42 +179,49 @@ func (p *parser) consumeCommentGroup() int {
 		group[i] = list.At(i).(*ast.Comment);
 	}

-	p.comments.Push(&ast.CommentGroup{group, endline});
+	// add comment group to the comments list
+	g := &ast.CommentGroup{group, nil};
+	if p.lastComment != nil {
+		p.lastComment.Next = g;
+	} else {
+		p.comments = g;
+	}
+	p.lastComment = g;
+
 	return endline;
 }


 // Advance to the next non-comment token. In the process, collect
-// any comment groups encountered, and remember the last leading
-// and trailing comments.
+// any comment groups encountered, and remember the last lead and
+// and line comments.
 //
-// A leading comment is a comment group that starts and ends in a
+// A lead comment is a comment group that starts and ends in a
 // line without any other tokens and that is followed by a non-comment
 // token on the line immediately after the comment group.
 //
-// A trailing comment is a comment group that follows a non-comment
+// A line comment is a comment group that follows a non-comment
 // token on the same line, and that has no tokens after it on the line
 // where it ends.
 //
-// Leading and trailing comments may be considered documentation
-// that is stored in the AST. In that case they are removed from
-// the parser's list of unassociated comments (via getComment).
+// Lead and line comments may be considered documentation that is
+// stored in the AST.
 //
 func (p *parser) next() {
-	p.commentsIndex = noIndex;
+	p.leadComment = nil;
+	p.lineComment = nil;
 	line := p.pos.Line;  // current line
 	p.next0();

 	if p.tok == token.COMMENT {
 		if p.pos.Line == line {
 			// The comment is on same line as previous token; it
-			// cannot be a leading comment but may be a trailing
-			// comment.
+			// cannot be a lead comment but may be a line comment.
 			endline := p.consumeCommentGroup();
 			if p.pos.Line != endline {
 				// The next token is on a different line, thus
-				// the last comment group is a trailing comment.
-				p.commentsIndex[trailing] = p.comments.Len() - 1;
+				// the last comment group is a line comment.
+				p.lineComment = p.lastComment;
 			}
 		}

@@ -237,27 +233,13 @@ func (p *parser) next() {

 		if endline >= 0 && endline+1 == p.pos.Line {
 			// The next token is following on the line immediately after the
-			// comment group, thus the last comment group is a leading comment.
-			p.commentsIndex[leading] = p.comments.Len() - 1;
+			// comment group, thus the last comment group is a lead comment.
+			p.leadComment = p.lastComment;
 		}
 	}
 }


-// Get leading/trailing comment group, if any.
-func (p *parser) getComment(kind int) *ast.CommentGroup {
-	i := p.commentsIndex[kind];
-	if i >= 0 {
-		// get comment and remove if from the list of unassociated comment groups
-		c := p.comments.At(i).(*ast.CommentGroup);
-		p.comments.Set(i, nil);  // clear entry
-		p.commentsIndex[kind] = -1;  // comment was consumed
-		return c;
-	}
-	return nil;
-}
-
-
 func (p *parser) errorExpected(pos token.Position, msg string) {
 	msg = "expected " + msg;
 	if pos.Offset == p.pos.Offset {
@@ -435,7 +417,7 @@ func (p *parser) parseFieldDecl() *ast.Field {
 		defer un(trace(p, "FieldDecl"));
 	}

-	doc := p.getComment(leading);
+	doc := p.leadComment;

 	// a list of identifiers looks like a list of type names
 	list := vector.New(0);
@@ -496,9 +478,9 @@ func (p *parser) parseStructType() *ast.StructType {
 			list.Push(f);
 			if p.tok == token.SEMICOLON {
 				p.next();
-				f.Comment = p.getComment(trailing);
+				f.Comment = p.lineComment;
 			} else {
-				f.Comment = p.getComment(trailing);
+				f.Comment = p.lineComment;
 				break;
 			}
 		}
@@ -680,7 +662,7 @@ func (p *parser) parseMethodSpec() *ast.Field {
 		defer un(trace(p, "MethodSpec"));
 	}

-	doc := p.getComment(leading);
+	doc := p.leadComment;
 	var idents []*ast.Ident;
 	var typ ast.Expr;
 	x := p.parseQualifiedIdent();
@@ -1680,7 +1662,7 @@ func (p *parser) parseStmt() ast.Stmt {
 type parseSpecFunction func(p *parser, doc *ast.CommentGroup, getSemi bool) (spec ast.Spec, gotSemi bool)


-// Consume semicolon if there is one and getSemi is set, and get any trailing comment.
+// Consume semicolon if there is one and getSemi is set, and get any line comment.
 // Return the comment if any and indicate if a semicolon was consumed.
 //
 func (p *parser) parseComment(getSemi bool) (comment *ast.CommentGroup, gotSemi bool) {
@@ -1688,7 +1670,7 @@ func (p *parser) parseComment(getSemi bool) (comment *ast.CommentGroup, gotSemi
 		p.next();
 		gotSemi = true;
 	}
-	return p.getComment(trailing), gotSemi;
+	return p.lineComment, gotSemi;
 }


@@ -1772,7 +1754,7 @@ func (p *parser) parseGenDecl(keyword token.Token, f parseSpecFunction, getSemi
 		defer un(trace(p, keyword.String() + "Decl"));
 	}

-	doc := p.getComment(leading);
+	doc := p.leadComment;
 	pos := p.expect(keyword);
 	var lparen, rparen token.Position;
 	list := vector.New(0);
@@ -1780,7 +1762,7 @@ func (p *parser) parseGenDecl(keyword token.Token, f parseSpecFunction, getSemi
 		lparen = p.pos;
 		p.next();
 		for p.tok != token.RPAREN && p.tok != token.EOF {
-			doc := p.getComment(leading);
+			doc := p.leadComment;
 			spec, semi := f(p, doc, true);  // consume semicolon if any
 			list.Push(spec);
 			if !semi {
@@ -1845,7 +1827,7 @@ func (p *parser) parseFunctionDecl() *ast.FuncDecl {
 		defer un(trace(p, "FunctionDecl"));
 	}

-	doc := p.getComment(leading);
+	doc := p.leadComment;
 	pos := p.expect(token.FUNC);

 	var recv *ast.Field;
@@ -1883,13 +1865,7 @@ func (p *parser) parseDecl(getSemi bool) (decl ast.Decl, gotSemi bool) {

 	case token.FUNC:
 		decl = p.parseFunctionDecl();
-		// Do not use parseComment here to consume a semicolon
-		// because we don't want to remove a trailing comment
-		// from the list of unassociated comments.
-		if getSemi && p.tok == token.SEMICOLON {
-			p.next();
-			gotSemi = true;
-		}
+		_, gotSemi := p.parseComment(getSemi);
 		return decl, gotSemi;

 	default:
@@ -1915,7 +1891,7 @@ func (p *parser) parseFile() *ast.File {
 	}

 	// package clause
-	comment := p.getComment(leading);
+	doc := p.leadComment;
 	pos := p.expect(token.PACKAGE);
 	ident := p.parseIdent();
 	var decls []ast.Decl;
@@ -1946,22 +1922,5 @@ func (p *parser) parseFile() *ast.File {
 		}
 	}

-	// convert comments list
-	// 1) determine number of remaining comments
-	n := 0;
-	for i := 0; i < p.comments.Len(); i++ {
-		if p.comments.At(i) != nil {
-			n++;
-		}
-	}
-	// 2) convert the remaining comments
-	comments := make([]*ast.CommentGroup, n);
-	for i, j := 0, 0; i < p.comments.Len(); i++ {
-		if p.comments.At(i) != nil {
-			comments[j] = p.comments.At(i).(*ast.CommentGroup);
-			j++;
-		}
-	}
-
-	return &ast.File{comment, pos, ident, decls, comments};
+	return &ast.File{doc, pos, ident, decls, p.comments};
 }
--- a/src/pkg/go/printer/printer.go
+++ b/src/pkg/go/printer/printer.go
--- a/src/pkg/go/printer/printer_test.go
+++ b/src/pkg/go/printer/printer_test.go
@@ -49,12 +49,13 @@ func check(t *testing.T, source, golden string, exports bool) {
 	// filter exports if necessary
 	if exports {
 		ast.FilterExports(prog);  // ignore result
+		prog.Comments = nil;  // don't print comments that are not in AST
 	}

 	// format source
 	var buf bytes.Buffer;
 	w := tabwriter.NewWriter(&buf, tabwidth, padding, tabchar, 0);
-	Fprint(w, prog, DocComments);
+	Fprint(w, prog, 0);
 	w.Flush();
 	res := buf.Data();


--- a/src/pkg/go/printer/testdata/golden1.go
+++ b/src/pkg/go/printer/testdata/golden1.go
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This is a package for testing purposes.
+//
 package main

 import 	"fmt"	// fmt

 const c0	= 0	// zero
-
 const (
 	c1	= iota;	// c1
 	c2	// c2
 )

+
 // The T type.
 type T struct {
 	a, b, c	int	// 3 fields
 }

-var x int	// x
+// This comment group should be separated
+// with a newline from the next comment
+// group.

+// This comment should NOT be associated with the next declaration.
+
+var x int	// x
 var ()

+
+// This comment SHOULD be associated with the next declaration.
 func f0() {
-	const pi	= 3.14;
-	var s1 struct {}
+	const pi			= 3.14;					// pi
+	var s1 struct {}	/* an empty struct */	/* foo */
+	// a struct constructor
+	// --------------------
 	var s2 struct {}	= struct {}{};
 	x := pi
 }
+//
+// NO SPACE HERE
+//
+func f1() {
+	f0();
+	/* 1 */
+	// 2
+	/* 3 */
+	/* 4 */
+	f0()
+}
--- a/src/pkg/go/printer/testdata/golden1.x
+++ b/src/pkg/go/printer/testdata/golden1.x
+// This is a package for testing purposes.
+//
 package main

 // The T type.

--- a/src/pkg/go/printer/testdata/source1.go
+++ b/src/pkg/go/printer/testdata/source1.go
+// Copyright 2009 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This is a package for testing purposes.
+//
 package main

 import "fmt"  // fmt
@@ -14,14 +20,33 @@ type T struct {
 	a, b, c int  // 3 fields
 }

+// This comment group should be separated
+// with a newline from the next comment
+// group.
+
+// This comment should NOT be associated with the next declaration.

 var x int;  // x
 var ()


+// This comment SHOULD be associated with the next declaration.
 func f0() {
 	const pi = 3.14;  // pi
-	var s1 struct {}
+	var s1 struct {}  /* an empty struct */ /* foo */
+	// a struct constructor
+	// --------------------
 	var s2 struct {} = struct {}{};
 	x := pi;
 }
+//
+// NO SPACE HERE
+//
+func f1() {
+	f0();
+	/* 1 */
+	// 2
+	/* 3 */
+	/* 4 */
+	f0();
+}