Commit 5be33e95 authored by Russ Cox's avatar Russ Cox

godoc, exp/ebnf, exp/types, go/scanner, scanner: use rune

API question: is a scanner token an int or a rune?

Since the rune is the common case and the token values
are the special (negative) case, I chose rune.  But it could
easily go the other way.

R=gri
CC=golang-dev
https://golang.org/cl/5301049
parent db339597
...@@ -46,7 +46,7 @@ func isPkgDir(fi FileInfo) bool { ...@@ -46,7 +46,7 @@ func isPkgDir(fi FileInfo) bool {
func firstSentence(s string) string { func firstSentence(s string) string {
i := -1 // index+1 of first terminator (punctuation ending a sentence) i := -1 // index+1 of first terminator (punctuation ending a sentence)
j := -1 // index+1 of first terminator followed by white space j := -1 // index+1 of first terminator followed by white space
prev := 'A' prev := rune('A')
for k, ch := range s { for k, ch := range s {
k1 := k + 1 k1 := k + 1
if ch == '.' || ch == '!' || ch == '?' { if ch == '.' || ch == '!' || ch == '?' {
......
...@@ -23,7 +23,7 @@ type ebnfParser struct { ...@@ -23,7 +23,7 @@ type ebnfParser struct {
scanner scanner.Scanner scanner scanner.Scanner
prev int // offset of previous token prev int // offset of previous token
pos int // offset of current token pos int // offset of current token
tok int // one token look-ahead tok rune // one token look-ahead
lit string // token literal lit string // token literal
} }
...@@ -47,7 +47,7 @@ func (p *ebnfParser) errorExpected(msg string) { ...@@ -47,7 +47,7 @@ func (p *ebnfParser) errorExpected(msg string) {
p.printf(`<span class="highlight">error: expected %s, found %s</span>`, msg, scanner.TokenString(p.tok)) p.printf(`<span class="highlight">error: expected %s, found %s</span>`, msg, scanner.TokenString(p.tok))
} }
func (p *ebnfParser) expect(tok int) { func (p *ebnfParser) expect(tok rune) {
if p.tok != tok { if p.tok != tok {
p.errorExpected(scanner.TokenString(tok)) p.errorExpected(scanner.TokenString(tok))
} }
......
...@@ -163,7 +163,7 @@ func (v *verifier) push(prod *Production) { ...@@ -163,7 +163,7 @@ func (v *verifier) push(prod *Production) {
} }
} }
func (v *verifier) verifyChar(x *Token) int { func (v *verifier) verifyChar(x *Token) rune {
s := x.String s := x.String
if utf8.RuneCountInString(s) != 1 { if utf8.RuneCountInString(s) != 1 {
v.error(x.Pos(), "single char expected, found "+s) v.error(x.Pos(), "single char expected, found "+s)
......
...@@ -15,7 +15,7 @@ type parser struct { ...@@ -15,7 +15,7 @@ type parser struct {
errors errorList errors errorList
scanner scanner.Scanner scanner scanner.Scanner
pos scanner.Position // token position pos scanner.Position // token position
tok int // one token look-ahead tok rune // one token look-ahead
lit string // token literal lit string // token literal
} }
...@@ -42,7 +42,7 @@ func (p *parser) errorExpected(pos scanner.Position, msg string) { ...@@ -42,7 +42,7 @@ func (p *parser) errorExpected(pos scanner.Position, msg string) {
p.error(pos, msg) p.error(pos, msg)
} }
func (p *parser) expect(tok int) scanner.Position { func (p *parser) expect(tok rune) scanner.Position {
pos := p.pos pos := p.pos
if p.tok != tok { if p.tok != tok {
p.errorExpected(pos, scanner.TokenString(tok)) p.errorExpected(pos, scanner.TokenString(tok))
......
...@@ -71,7 +71,7 @@ func findPkg(path string) (filename, id string) { ...@@ -71,7 +71,7 @@ func findPkg(path string) (filename, id string) {
// object/archive file and populates its scope with the results. // object/archive file and populates its scope with the results.
type gcParser struct { type gcParser struct {
scanner scanner.Scanner scanner scanner.Scanner
tok int // current token tok rune // current token
lit string // literal string; only valid for Ident, Int, String tokens lit string // literal string; only valid for Ident, Int, String tokens
id string // package id of imported package id string // package id of imported package
imports map[string]*ast.Object // package id -> package object imports map[string]*ast.Object // package id -> package object
...@@ -195,7 +195,7 @@ func (p *gcParser) errorf(format string, args ...interface{}) { ...@@ -195,7 +195,7 @@ func (p *gcParser) errorf(format string, args ...interface{}) {
p.error(fmt.Sprintf(format, args...)) p.error(fmt.Sprintf(format, args...))
} }
func (p *gcParser) expect(tok int) string { func (p *gcParser) expect(tok rune) string {
lit := p.lit lit := p.lit
if p.tok != tok { if p.tok != tok {
p.errorf("expected %q, got %q (%q)", scanner.TokenString(tok), scanner.TokenString(p.tok), lit) p.errorf("expected %q, got %q (%q)", scanner.TokenString(tok), scanner.TokenString(p.tok), lit)
...@@ -205,9 +205,9 @@ func (p *gcParser) expect(tok int) string { ...@@ -205,9 +205,9 @@ func (p *gcParser) expect(tok int) string {
} }
func (p *gcParser) expectSpecial(tok string) { func (p *gcParser) expectSpecial(tok string) {
sep := 'x' // not white space sep := rune('x') // not white space
i := 0 i := 0
for i < len(tok) && p.tok == int(tok[i]) && sep > ' ' { for i < len(tok) && p.tok == rune(tok[i]) && sep > ' ' {
sep = p.scanner.Peek() // if sep <= ' ', there is white space before the next token sep = p.scanner.Peek() // if sep <= ' ', there is white space before the next token
p.next() p.next()
i++ i++
...@@ -260,7 +260,7 @@ func (p *gcParser) parsePkgId() *ast.Object { ...@@ -260,7 +260,7 @@ func (p *gcParser) parsePkgId() *ast.Object {
func (p *gcParser) parseDotIdent() string { func (p *gcParser) parseDotIdent() string {
ident := "" ident := ""
if p.tok != scanner.Int { if p.tok != scanner.Int {
sep := 'x' // not white space sep := rune('x') // not white space
for (p.tok == scanner.Ident || p.tok == scanner.Int || p.tok == '·') && sep > ' ' { for (p.tok == scanner.Ident || p.tok == scanner.Int || p.tok == '·') && sep > ' ' {
ident += p.lit ident += p.lit
sep = p.scanner.Peek() // if sep <= ' ', there is white space before the next token sep = p.scanner.Peek() // if sep <= ' ', there is white space before the next token
......
...@@ -43,7 +43,7 @@ type Scanner struct { ...@@ -43,7 +43,7 @@ type Scanner struct {
mode uint // scanning mode mode uint // scanning mode
// scanning state // scanning state
ch int // current character ch rune // current character
offset int // character offset offset int // character offset
rdOffset int // reading offset (position after current character) rdOffset int // reading offset (position after current character)
lineOffset int // current line offset lineOffset int // current line offset
...@@ -63,7 +63,7 @@ func (S *Scanner) next() { ...@@ -63,7 +63,7 @@ func (S *Scanner) next() {
S.lineOffset = S.offset S.lineOffset = S.offset
S.file.AddLine(S.offset) S.file.AddLine(S.offset)
} }
r, w := int(S.src[S.rdOffset]), 1 r, w := rune(S.src[S.rdOffset]), 1
switch { switch {
case r == 0: case r == 0:
S.error(S.offset, "illegal character NUL") S.error(S.offset, "illegal character NUL")
...@@ -232,11 +232,11 @@ func (S *Scanner) findLineEnd() bool { ...@@ -232,11 +232,11 @@ func (S *Scanner) findLineEnd() bool {
return false return false
} }
func isLetter(ch int) bool { func isLetter(ch rune) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch) return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
} }
func isDigit(ch int) bool { func isDigit(ch rune) bool {
return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch) return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
} }
...@@ -248,14 +248,14 @@ func (S *Scanner) scanIdentifier() token.Token { ...@@ -248,14 +248,14 @@ func (S *Scanner) scanIdentifier() token.Token {
return token.Lookup(S.src[offs:S.offset]) return token.Lookup(S.src[offs:S.offset])
} }
func digitVal(ch int) int { func digitVal(ch rune) int {
switch { switch {
case '0' <= ch && ch <= '9': case '0' <= ch && ch <= '9':
return ch - '0' return int(ch - '0')
case 'a' <= ch && ch <= 'f': case 'a' <= ch && ch <= 'f':
return ch - 'a' + 10 return int(ch - 'a' + 10)
case 'A' <= ch && ch <= 'F': case 'A' <= ch && ch <= 'F':
return ch - 'A' + 10 return int(ch - 'A' + 10)
} }
return 16 // larger than any legal digit val return 16 // larger than any legal digit val
} }
...@@ -337,7 +337,7 @@ exit: ...@@ -337,7 +337,7 @@ exit:
return tok return tok
} }
func (S *Scanner) scanEscape(quote int) { func (S *Scanner) scanEscape(quote rune) {
offs := S.offset offs := S.offset
var i, base, max uint32 var i, base, max uint32
...@@ -462,7 +462,7 @@ func (S *Scanner) switch2(tok0, tok1 token.Token) token.Token { ...@@ -462,7 +462,7 @@ func (S *Scanner) switch2(tok0, tok1 token.Token) token.Token {
return tok0 return tok0
} }
func (S *Scanner) switch3(tok0, tok1 token.Token, ch2 int, tok2 token.Token) token.Token { func (S *Scanner) switch3(tok0, tok1 token.Token, ch2 rune, tok2 token.Token) token.Token {
if S.ch == '=' { if S.ch == '=' {
S.next() S.next()
return tok1 return tok1
...@@ -474,7 +474,7 @@ func (S *Scanner) switch3(tok0, tok1 token.Token, ch2 int, tok2 token.Token) tok ...@@ -474,7 +474,7 @@ func (S *Scanner) switch3(tok0, tok1 token.Token, ch2 int, tok2 token.Token) tok
return tok0 return tok0
} }
func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Token) token.Token { func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Token) token.Token {
if S.ch == '=' { if S.ch == '=' {
S.next() S.next()
return tok1 return tok1
......
...@@ -93,7 +93,7 @@ const ( ...@@ -93,7 +93,7 @@ const (
skipComment skipComment
) )
var tokenString = map[int]string{ var tokenString = map[rune]string{
EOF: "EOF", EOF: "EOF",
Ident: "Ident", Ident: "Ident",
Int: "Int", Int: "Int",
...@@ -105,7 +105,7 @@ var tokenString = map[int]string{ ...@@ -105,7 +105,7 @@ var tokenString = map[int]string{
} }
// TokenString returns a (visible) string for a token or Unicode character. // TokenString returns a (visible) string for a token or Unicode character.
func TokenString(tok int) string { func TokenString(tok rune) string {
if s, found := tokenString[tok]; found { if s, found := tokenString[tok]; found {
return s return s
} }
...@@ -144,7 +144,7 @@ type Scanner struct { ...@@ -144,7 +144,7 @@ type Scanner struct {
tokEnd int // token text tail end (srcBuf index) tokEnd int // token text tail end (srcBuf index)
// One character look-ahead // One character look-ahead
ch int // character before current srcPos ch rune // character before current srcPos
// Error is called for each error encountered. If no Error // Error is called for each error encountered. If no Error
// function is set, the error is reported to os.Stderr. // function is set, the error is reported to os.Stderr.
...@@ -218,8 +218,8 @@ func (s *Scanner) Init(src io.Reader) *Scanner { ...@@ -218,8 +218,8 @@ func (s *Scanner) Init(src io.Reader) *Scanner {
// that only a minimal amount of work needs to be done in the common ASCII // that only a minimal amount of work needs to be done in the common ASCII
// case (one test to check for both ASCII and end-of-buffer, and one test // case (one test to check for both ASCII and end-of-buffer, and one test
// to check for newlines). // to check for newlines).
func (s *Scanner) next() int { func (s *Scanner) next() rune {
ch, width := int(s.srcBuf[s.srcPos]), 1 ch, width := rune(s.srcBuf[s.srcPos]), 1
if ch >= utf8.RuneSelf { if ch >= utf8.RuneSelf {
// uncommon case: not ASCII or not enough bytes // uncommon case: not ASCII or not enough bytes
...@@ -264,7 +264,7 @@ func (s *Scanner) next() int { ...@@ -264,7 +264,7 @@ func (s *Scanner) next() int {
} }
} }
// at least one byte // at least one byte
ch = int(s.srcBuf[s.srcPos]) ch = rune(s.srcBuf[s.srcPos])
if ch >= utf8.RuneSelf { if ch >= utf8.RuneSelf {
// uncommon case: not ASCII // uncommon case: not ASCII
ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd]) ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd])
...@@ -304,7 +304,7 @@ func (s *Scanner) next() int { ...@@ -304,7 +304,7 @@ func (s *Scanner) next() int {
// it prints an error message to os.Stderr. Next does not // it prints an error message to os.Stderr. Next does not
// update the Scanner's Position field; use Pos() to // update the Scanner's Position field; use Pos() to
// get the current position. // get the current position.
func (s *Scanner) Next() int { func (s *Scanner) Next() rune {
s.tokPos = -1 // don't collect token text s.tokPos = -1 // don't collect token text
s.Line = 0 // invalidate token position s.Line = 0 // invalidate token position
ch := s.Peek() ch := s.Peek()
...@@ -315,7 +315,7 @@ func (s *Scanner) Next() int { ...@@ -315,7 +315,7 @@ func (s *Scanner) Next() int {
// Peek returns the next Unicode character in the source without advancing // Peek returns the next Unicode character in the source without advancing
// the scanner. It returns EOF if the scanner's position is at the last // the scanner. It returns EOF if the scanner's position is at the last
// character of the source. // character of the source.
func (s *Scanner) Peek() int { func (s *Scanner) Peek() rune {
if s.ch < 0 { if s.ch < 0 {
s.ch = s.next() s.ch = s.next()
} }
...@@ -335,7 +335,7 @@ func (s *Scanner) error(msg string) { ...@@ -335,7 +335,7 @@ func (s *Scanner) error(msg string) {
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg) fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
} }
func (s *Scanner) scanIdentifier() int { func (s *Scanner) scanIdentifier() rune {
ch := s.next() // read character after first '_' or letter ch := s.next() // read character after first '_' or letter
for ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) { for ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) {
ch = s.next() ch = s.next()
...@@ -343,35 +343,35 @@ func (s *Scanner) scanIdentifier() int { ...@@ -343,35 +343,35 @@ func (s *Scanner) scanIdentifier() int {
return ch return ch
} }
func digitVal(ch int) int { func digitVal(ch rune) int {
switch { switch {
case '0' <= ch && ch <= '9': case '0' <= ch && ch <= '9':
return ch - '0' return int(ch - '0')
case 'a' <= ch && ch <= 'f': case 'a' <= ch && ch <= 'f':
return ch - 'a' + 10 return int(ch - 'a' + 10)
case 'A' <= ch && ch <= 'F': case 'A' <= ch && ch <= 'F':
return ch - 'A' + 10 return int(ch - 'A' + 10)
} }
return 16 // larger than any legal digit val return 16 // larger than any legal digit val
} }
func isDecimal(ch int) bool { return '0' <= ch && ch <= '9' } func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
func (s *Scanner) scanMantissa(ch int) int { func (s *Scanner) scanMantissa(ch rune) rune {
for isDecimal(ch) { for isDecimal(ch) {
ch = s.next() ch = s.next()
} }
return ch return ch
} }
func (s *Scanner) scanFraction(ch int) int { func (s *Scanner) scanFraction(ch rune) rune {
if ch == '.' { if ch == '.' {
ch = s.scanMantissa(s.next()) ch = s.scanMantissa(s.next())
} }
return ch return ch
} }
func (s *Scanner) scanExponent(ch int) int { func (s *Scanner) scanExponent(ch rune) rune {
if ch == 'e' || ch == 'E' { if ch == 'e' || ch == 'E' {
ch = s.next() ch = s.next()
if ch == '-' || ch == '+' { if ch == '-' || ch == '+' {
...@@ -382,7 +382,7 @@ func (s *Scanner) scanExponent(ch int) int { ...@@ -382,7 +382,7 @@ func (s *Scanner) scanExponent(ch int) int {
return ch return ch
} }
func (s *Scanner) scanNumber(ch int) (int, int) { func (s *Scanner) scanNumber(ch rune) (rune, rune) {
// isDecimal(ch) // isDecimal(ch)
if ch == '0' { if ch == '0' {
// int or float // int or float
...@@ -426,7 +426,7 @@ func (s *Scanner) scanNumber(ch int) (int, int) { ...@@ -426,7 +426,7 @@ func (s *Scanner) scanNumber(ch int) (int, int) {
return Int, ch return Int, ch
} }
func (s *Scanner) scanDigits(ch, base, n int) int { func (s *Scanner) scanDigits(ch rune, base, n int) rune {
for n > 0 && digitVal(ch) < base { for n > 0 && digitVal(ch) < base {
ch = s.next() ch = s.next()
n-- n--
...@@ -437,7 +437,7 @@ func (s *Scanner) scanDigits(ch, base, n int) int { ...@@ -437,7 +437,7 @@ func (s *Scanner) scanDigits(ch, base, n int) int {
return ch return ch
} }
func (s *Scanner) scanEscape(quote int) int { func (s *Scanner) scanEscape(quote rune) rune {
ch := s.next() // read character after '/' ch := s.next() // read character after '/'
switch ch { switch ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
...@@ -457,7 +457,7 @@ func (s *Scanner) scanEscape(quote int) int { ...@@ -457,7 +457,7 @@ func (s *Scanner) scanEscape(quote int) int {
return ch return ch
} }
func (s *Scanner) scanString(quote int) (n int) { func (s *Scanner) scanString(quote rune) (n int) {
ch := s.next() // read character after quote ch := s.next() // read character after quote
for ch != quote { for ch != quote {
if ch == '\n' || ch < 0 { if ch == '\n' || ch < 0 {
...@@ -491,7 +491,7 @@ func (s *Scanner) scanChar() { ...@@ -491,7 +491,7 @@ func (s *Scanner) scanChar() {
} }
} }
func (s *Scanner) scanComment(ch int) int { func (s *Scanner) scanComment(ch rune) rune {
// ch == '/' || ch == '*' // ch == '/' || ch == '*'
if ch == '/' { if ch == '/' {
// line comment // line comment
...@@ -524,7 +524,7 @@ func (s *Scanner) scanComment(ch int) int { ...@@ -524,7 +524,7 @@ func (s *Scanner) scanComment(ch int) int {
// It returns EOF at the end of the source. It reports scanner errors (read and // It returns EOF at the end of the source. It reports scanner errors (read and
// token errors) by calling s.Error, if not nil; otherwise it prints an error // token errors) by calling s.Error, if not nil; otherwise it prints an error
// message to os.Stderr. // message to os.Stderr.
func (s *Scanner) Scan() int { func (s *Scanner) Scan() rune {
ch := s.Peek() ch := s.Peek()
// reset token text position // reset token text position
......
...@@ -64,7 +64,7 @@ func TestNext(t *testing.T) { ...@@ -64,7 +64,7 @@ func TestNext(t *testing.T) {
} }
type token struct { type token struct {
tok int tok rune
text string text string
} }
...@@ -233,7 +233,7 @@ func makeSource(pattern string) *bytes.Buffer { ...@@ -233,7 +233,7 @@ func makeSource(pattern string) *bytes.Buffer {
return &buf return &buf
} }
func checkTok(t *testing.T, s *Scanner, line, got, want int, text string) { func checkTok(t *testing.T, s *Scanner, line int, got, want rune, text string) {
if got != want { if got != want {
t.Fatalf("tok = %s, want %s for %q", TokenString(got), TokenString(want), text) t.Fatalf("tok = %s, want %s for %q", TokenString(got), TokenString(want), text)
} }
...@@ -329,7 +329,7 @@ func TestScanZeroMode(t *testing.T) { ...@@ -329,7 +329,7 @@ func TestScanZeroMode(t *testing.T) {
} }
} }
func testScanSelectedMode(t *testing.T, mode uint, class int) { func testScanSelectedMode(t *testing.T, mode uint, class rune) {
src := makeSource("%s\n") src := makeSource("%s\n")
s := new(Scanner).Init(src) s := new(Scanner).Init(src)
s.Mode = mode s.Mode = mode
...@@ -398,7 +398,7 @@ func TestScanWhitespace(t *testing.T) { ...@@ -398,7 +398,7 @@ func TestScanWhitespace(t *testing.T) {
} }
} }
func testError(t *testing.T, src, pos, msg string, tok int) { func testError(t *testing.T, src, pos, msg string, tok rune) {
s := new(Scanner).Init(bytes.NewBufferString(src)) s := new(Scanner).Init(bytes.NewBufferString(src))
errorCalled := false errorCalled := false
s.Error = func(s *Scanner, m string) { s.Error = func(s *Scanner, m string) {
...@@ -463,7 +463,7 @@ func checkPos(t *testing.T, got, want Position) { ...@@ -463,7 +463,7 @@ func checkPos(t *testing.T, got, want Position) {
} }
} }
func checkNextPos(t *testing.T, s *Scanner, offset, line, column, char int) { func checkNextPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
if ch := s.Next(); ch != char { if ch := s.Next(); ch != char {
t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char)) t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
} }
...@@ -471,7 +471,7 @@ func checkNextPos(t *testing.T, s *Scanner, offset, line, column, char int) { ...@@ -471,7 +471,7 @@ func checkNextPos(t *testing.T, s *Scanner, offset, line, column, char int) {
checkPos(t, s.Pos(), want) checkPos(t, s.Pos(), want)
} }
func checkScanPos(t *testing.T, s *Scanner, offset, line, column, char int) { func checkScanPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
want := Position{Offset: offset, Line: line, Column: column} want := Position{Offset: offset, Line: line, Column: column}
checkPos(t, s.Pos(), want) checkPos(t, s.Pos(), want)
if ch := s.Scan(); ch != char { if ch := s.Scan(); ch != char {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment