- renamed scanner.Location to token.Position

- by moving Position into token, scanner dependencies are removed from several files - clearer field names in token.Position, now possible to have a Pos() accessor w/o naming conflicts - added Pos() accessor - use anonymous token.Position field in AST nodes R=r DELTA=244 (28 added, 55 deleted, 161 changed) OCL=26786 CL=26793

- renamed scanner.Location to token.Position
- by moving Position into token, scanner dependencies are removed from several files - clearer field names in token.Position, now possible to have a Pos() accessor w/o naming conflicts - added Pos() accessor - use anonymous token.Position field in AST nodes R=r DELTA=244 (28 added, 55 deleted, 161 changed) OCL=26786 CL=26793
5a72ca45 · Robert Griesemer · b923b016 · 5a72ca45 · 5a72ca45 · 5a72ca45
Commit 5a72ca45 authored Mar 26, 2009 by Robert Griesemer
Showing with 74 additions and 64 deletions

scanner.go src/lib/go/scanner.go +44 -50

scanner_test.go src/lib/go/scanner_test.go +11 -11

token.go src/lib/go/token.go +19 -3

ast.go usr/gri/pretty/ast.go +0 -0

No files found.
--- a/src/lib/go/scanner.go
+++ b/src/lib/go/scanner.go
@@ -16,28 +16,20 @@ import (
 )


-// Source locations are represented by a Location value.
-type Location struct {
-	Pos int;  // byte position in source
-	Line int;  // line count, starting at 1
-	Col int;  // column, starting at 1 (character count)
-}
-
-
 // An implementation of an ErrorHandler must be provided to the Scanner.
-// If a syntax error is encountered, Error is called with a location and
-// an error message. The location points at the beginning of the offending
+// If a syntax error is encountered, Error is called with a position and
+// an error message. The position points to the beginning of the offending
 // token.
 //
 type ErrorHandler interface {
-	Error(loc Location, msg string);
+	Error(pos token.Position, msg string);
 }


 // A Scanner holds the scanner's internal state while processing
 // a given text.  It can be allocated as part of another data
-// structure but must be initialized via Init before use.
-// See also the package comment for a sample use.
+// structure but must be initialized via Init before use. For
+// a sample use, see the implementation of Tokenize.
 //
 type Scanner struct {
 	// immutable state
@@ -46,31 +38,32 @@ type Scanner struct {
 	scan_comments bool;  // if set, comments are reported as tokens

 	// scanning state
-	loc Location;  // location before ch (src[loc.Pos] == ch)
-	pos int;  // current reading position (position after ch)
+	pos token.Position;  // previous reading position (position before ch)
+	offset int;  // current reading offset (position after ch)
 	ch int;  // one char look-ahead
 }


 // Read the next Unicode char into S.ch.
 // S.ch < 0 means end-of-file.
+//
 func (S *Scanner) next() {
-	if S.pos < len(S.src) {
-		S.loc.Pos = S.pos;
-		S.loc.Col++;
-		r, w := int(S.src[S.pos]), 1;
+	if S.offset < len(S.src) {
+		S.pos.Offset = S.offset;
+		S.pos.Column++;
+		r, w := int(S.src[S.offset]), 1;
 		switch {
 		case r == '\n':
-			S.loc.Line++;
-			S.loc.Col = 0;
+			S.pos.Line++;
+			S.pos.Column = 0;
 		case r >= 0x80:
 			// not ASCII
-			r, w = utf8.DecodeRune(S.src[S.pos : len(S.src)]);
+			r, w = utf8.DecodeRune(S.src[S.offset : len(S.src)]);
 		}
-		S.pos += w;
+		S.offset += w;
 		S.ch = r;
 	} else {
-		S.loc.Pos = len(S.src);
+		S.pos.Offset = len(S.src);
 		S.ch = -1;  // eof
 	}
 }
@@ -86,13 +79,13 @@ func (S *Scanner) Init(src []byte, err ErrorHandler, scan_comments bool) {
 	S.src = src;
 	S.err = err;
 	S.scan_comments = scan_comments;
-	S.loc.Line = 1;
+	S.pos.Line = 1;
 	S.next();
 }


 func charString(ch int) string {
-	s := string(ch);
+	var s string;
 	switch ch {
 	case '\a': s = `\a`;
 	case '\b': s = `\b`;
@@ -103,25 +96,26 @@ func charString(ch int) string {
 	case '\v': s = `\v`;
 	case '\\': s = `\\`;
 	case '\'': s = `\'`;
+	default  : s = utf8.EncodeRuneToString(ch);
 	}
 	return "'" + s + "' (U+" + strconv.Itob(ch, 16) + ")";
 }


-func (S *Scanner) error(loc Location, msg string) {
-	S.err.Error(loc, msg);
+func (S *Scanner) error(pos token.Position, msg string) {
+	S.err.Error(pos, msg);
 }


 func (S *Scanner) expect(ch int) {
 	if S.ch != ch {
-		S.error(S.loc, "expected " + charString(ch) + ", found " + charString(S.ch));
+		S.error(S.pos, "expected " + charString(ch) + ", found " + charString(S.ch));
 	}
 	S.next();  // always make progress
 }


-func (S *Scanner) scanComment(loc Location) {
+func (S *Scanner) scanComment(pos token.Position) {
 	// first '/' already consumed

 	if S.ch == '/' {
@@ -147,7 +141,7 @@ func (S *Scanner) scanComment(loc Location) {
 		}
 	}

-	S.error(loc, "comment not terminated");
+	S.error(pos, "comment not terminated");
 }


@@ -168,11 +162,11 @@ func isDigit(ch int) bool {


 func (S *Scanner) scanIdentifier() token.Token {
-	pos := S.loc.Pos;
+	pos := S.pos.Offset;
 	for isLetter(S.ch) || isDigit(S.ch) {
 		S.next();
 	}
-	return token.Lookup(S.src[pos : S.loc.Pos]);
+	return token.Lookup(S.src[pos : S.pos.Offset]);
 }


@@ -255,13 +249,13 @@ func (S *Scanner) scanDigits(base, length int) {
 		length--;
 	}
 	if length > 0 {
-		S.error(S.loc, "illegal char escape");
+		S.error(S.pos, "illegal char escape");
 	}
 }


 func (S *Scanner) scanEscape(quote int) {
-	loc := S.loc;
+	pos := S.pos;
 	ch := S.ch;
 	S.next();
 	switch ch {
@@ -276,7 +270,7 @@ func (S *Scanner) scanEscape(quote int) {
 	case 'U':
 		S.scanDigits(16, 8);
 	default:
-		S.error(loc, "illegal char escape");
+		S.error(pos, "illegal char escape");
 	}
 }

@@ -294,14 +288,14 @@ func (S *Scanner) scanChar() {
 }


-func (S *Scanner) scanString(loc Location) {
+func (S *Scanner) scanString(pos token.Position) {
 	// '"' already consumed

 	for S.ch != '"' {
 		ch := S.ch;
 		S.next();
 		if ch == '\n' || ch < 0 {
-			S.error(loc, "string not terminated");
+			S.error(pos, "string not terminated");
 			break;
 		}
 		if ch == '\\' {
@@ -313,14 +307,14 @@ func (S *Scanner) scanString(loc Location) {
 }


-func (S *Scanner) scanRawString(loc Location) {
+func (S *Scanner) scanRawString(pos token.Position) {
 	// '`' already consumed

 	for S.ch != '`' {
 		ch := S.ch;
 		S.next();
 		if ch == '\n' || ch < 0 {
-			S.error(loc, "string not terminated");
+			S.error(pos, "string not terminated");
 			break;
 		}
 	}
@@ -374,11 +368,11 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
 }


-// Scan scans the next token and returns the token location loc,
+// Scan scans the next token and returns the token position pos,
 // the token tok, and the literal text lit corresponding to the
 // token. The source end is indicated by token.EOF.
 //
-func (S *Scanner) Scan() (loc Location, tok token.Token, lit []byte) {
+func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) {
 scan_again:
 	// skip white space
 	for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' || S.ch == '\r' {
@@ -386,7 +380,7 @@ scan_again:
 	}

 	// current token start
-	loc, tok = S.loc, token.ILLEGAL;
+	pos, tok = S.pos, token.ILLEGAL;

 	// determine token value
 	switch ch := S.ch; {
@@ -398,9 +392,9 @@ scan_again:
 		S.next();  // always make progress
 		switch ch {
 		case -1  : tok = token.EOF;
-		case '"' : tok = token.STRING; S.scanString(loc);
+		case '"' : tok = token.STRING; S.scanString(pos);
 		case '\'': tok = token.CHAR; S.scanChar();
-		case '`' : tok = token.STRING; S.scanRawString(loc);
+		case '`' : tok = token.STRING; S.scanRawString(pos);
 		case ':' : tok = S.switch2(token.COLON, token.DEFINE);
 		case '.' :
 			if digitVal(S.ch) < 10 {
@@ -427,7 +421,7 @@ scan_again:
 		case '*': tok = S.switch2(token.MUL, token.MUL_ASSIGN);
 		case '/':
 			if S.ch == '/' || S.ch == '*' {
-				S.scanComment(loc);
+				S.scanComment(pos);
 				tok = token.COMMENT;
 				if !S.scan_comments {
 					goto scan_again;
@@ -455,20 +449,20 @@ scan_again:
 				tok = S.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND);
 			}
 		case '|': tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR);
-		default: S.error(loc, "illegal character " + charString(ch));
+		default: S.error(pos, "illegal character " + charString(ch));
 		}
 	}

-	return loc, tok, S.src[loc.Pos : S.loc.Pos];
+	return pos, tok, S.src[pos.Offset : S.pos.Offset];
 }


-// Tokenize calls a function f with the token location, token value, and token
+// Tokenize calls a function f with the token position, token value, and token
 // text for each token in the source src. The other parameters have the same
 // meaning as for the Init function. Tokenize keeps scanning until f returns
 // false (usually when the token value is token.EOF).
 //
-func Tokenize(src []byte, err ErrorHandler, scan_comments bool, f func (loc Location, tok token.Token, lit []byte) bool) {
+func Tokenize(src []byte, err ErrorHandler, scan_comments bool, f func (pos token.Position, tok token.Token, lit []byte) bool) {
 	var s Scanner;
 	s.Init(src, err, scan_comments);
 	for f(s.Scan()) {

--- a/src/lib/go/scanner_test.go
+++ b/src/lib/go/scanner_test.go
@@ -160,7 +160,7 @@ type TestErrorHandler struct {
 	t *testing.T
 }

-func (h *TestErrorHandler) Error(loc scanner.Location, msg string) {
+func (h *TestErrorHandler) Error(pos token.Position, msg string) {
 	h.t.Errorf("Error() called (msg = %s)", msg);
 }

@@ -186,9 +186,9 @@ func Test(t *testing.T) {

 	// verify scan
 	index := 0;
-	eloc := scanner.Location{0, 1, 1};
+	eloc := token.Position{0, 1, 1};
 	scanner.Tokenize(io.StringBytes(src), &TestErrorHandler{t}, true,
-		func (loc Location, tok token.Token, litb []byte) bool {
+		func (pos token.Position, tok token.Token, litb []byte) bool {
 			e := elt{token.EOF, "", special};
 			if index < len(tokens) {
 				e = tokens[index];
@@ -196,16 +196,16 @@ func Test(t *testing.T) {
 			lit := string(litb);
 			if tok == token.EOF {
 				lit = "<EOF>";
-				eloc.Col = 0;
+				eloc.Column = 0;
 			}
-			if loc.Pos != eloc.Pos {
-				t.Errorf("bad position for %s: got %d, expected %d", lit, loc.Pos, eloc.Pos);
+			if pos.Offset != eloc.Offset {
+				t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, eloc.Offset);
 			}
-			if loc.Line != eloc.Line {
-				t.Errorf("bad line for %s: got %d, expected %d", lit, loc.Line, eloc.Line);
+			if pos.Line != eloc.Line {
+				t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, eloc.Line);
 			}
-			if loc.Col != eloc.Col {
-				t.Errorf("bad column for %s: got %d, expected %d", lit, loc.Col, eloc.Col);
+			if pos.Column!= eloc.Column {
+				t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, eloc.Column);
 			}
 			if tok != e.tok {
 				t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String());
@@ -216,7 +216,7 @@ func Test(t *testing.T) {
 			if tokenclass(tok) != e.class {
 				t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class);
 			}
-			eloc.Pos += len(lit) + len(whitespace);
+			eloc.Offset += len(lit) + len(whitespace);
 			eloc.Line += NewlineCount(lit) + whitespace_linecount;
 			index++;
 			return tok != token.EOF;

--- a/src/lib/go/token.go
+++ b/src/lib/go/token.go
@@ -255,10 +255,10 @@ const (


 // Precedence returns the syntax precedence of the operator
-// token tok or LowestPrecedence if tok is not an operator.
+// token op or LowestPrecedence if op is not an operator.
 //
-func (tok Token) Precedence() int {
-	switch tok {
+func (op Token) Precedence() int {
+	switch op {
 	case COLON:
 		return 0;
 	case LOR:
@@ -322,3 +322,19 @@ func (tok Token) IsOperator() bool {
 func (tok Token) IsKeyword() bool {
 	return keyword_beg < tok && tok < keyword_end;
 }
+
+
+// Token source positions are represented by a Position value.
+type Position struct {
+	Offset int;  // byte offset, starting at 0
+	Line int;  // line number, starting at 1
+	Column int;  // column number, starting at 1 (character count)
+}
+
+
+// Pos is an accessor method for anonymous Position fields.
+// It returns its receiver.
+//
+func (pos *Position) Pos() Position {
+	return *pos;
+}
--- a/usr/gri/pretty/ast.go
+++ b/usr/gri/pretty/ast.go