cmd/compile: towards simpler and faster lexing: always use getr

Always reading runes (rather than bytes) has negligible overhead (a simple if at the moment - it can be eliminated eventually) but simplifies the lexer logic and opens up the door for speedups. In the process remove many int conversions that are now not needed anymore. Also, because identifiers are now more easily recognized, remove talph label and move identifier lexing "in place". Also, instead of accepting all chars < 0x80 and then check for "frogs", only permit valid characters in the first place. Removes an extra call for common simple tokens and leads to simpler logic. `time go build -a net/http` (best of 5 runs) seems 1% faster. Assuming this is in the noise, there is no noticeable performance degradation with this change. Change-Id: I3454c9bf8b91808188cf7a5f559341749da9a1eb Reviewed-on: https://go-review.googlesource.com/19847Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Robert Griesemer <gri@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>

cmd/compile: towards simpler and faster lexing: always use getr
Always reading runes (rather than bytes) has negligible overhead (a simple if at the moment - it can be eliminated eventually) but simplifies the lexer logic and opens up the door for speedups. In the process remove many int conversions that are now not needed anymore. Also, because identifiers are now more easily recognized, remove talph label and move identifier lexing "in place". Also, instead of accepting all chars < 0x80 and then check for "frogs", only permit valid characters in the first place. Removes an extra call for common simple tokens and leads to simpler logic. `time go build -a net/http` (best of 5 runs) seems 1% faster. Assuming this is in the noise, there is no noticeable performance degradation with this change. Change-Id: I3454c9bf8b91808188cf7a5f559341749da9a1eb Reviewed-on: https://go-review.googlesource.com/19847Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Robert Griesemer <gri@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
539aa05a · Robert Griesemer · 814978a0 · 539aa05a · 539aa05a
Commit 539aa05a authored Feb 23, 2016 by Robert Griesemer
Hide whitespace changes
Inline Side-by-side

Showing with 198 additions and 222 deletions

lex.go src/cmd/compile/internal/gc/lex.go +197 -221

issue11610.go test/fixedbugs/issue11610.go +1 -1

No files found.
--- a/src/cmd/compile/internal/gc/lex.go
+++ b/src/cmd/compile/internal/gc/lex.go
@@ -37,6 +37,8 @@ var (
 	Debug_wb     int
 )
+const BOM = 0xFEFF
 // Debug arguments.
 // These can be specified with the -d flag, as in "-d nil"
 // to set the debug_checknil variable. In general the list passed
@@ -310,7 +312,6 @@ func Main() {
 	dclcontext = PEXTERN
 	nerrors = 0
 	lexlineno = 1
-	const BOM = 0xFEFF
 	loadsys()
@@ -575,10 +576,14 @@ func addidir(dir string) {
 	}
 }
+func isDriveLetter(b byte) bool {
+	return 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z'
+}
 // is this path a local name?  begins with ./ or ../ or /
 func islocalname(name string) bool {
 	return strings.HasPrefix(name, "/") ||
-		Ctxt.Windows != 0 && len(name) >= 3 && isAlpha(int(name[0])) && name[1] == ':' && name[2] == '/' ||
+		Ctxt.Windows != 0 && len(name) >= 3 && isDriveLetter(name[0]) && name[1] == ':' && name[2] == '/' ||
 		strings.HasPrefix(name, "./") || name == "." ||
 		strings.HasPrefix(name, "../") || name == ".."
 }
@@ -829,20 +834,17 @@ func importfile(f *Val, indent []byte) {
 	}
 }
-func isSpace(c int) bool {
+func isSpace(c rune) bool {
 	return c == ' ' || c == '\t' || c == '\n' || c == '\r'
 }
-func isAlpha(c int) bool {
+func isLetter(c rune) bool {
-	return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z'
+	return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_'
 }
-func isDigit(c int) bool {
+func isDigit(c rune) bool {
 	return '0' <= c && c <= '9'
 }
-func isAlnum(c int) bool {
-	return isAlpha(c) || isDigit(c)
-}
 func plan9quote(s string) string {
 	if s == "" {
@@ -856,23 +858,11 @@ func plan9quote(s string) string {
 	return s
 }
-func isfrog(c int) bool {
-	// complain about possibly invisible control characters
-	if c < ' ' {
-		return !isSpace(c) // exclude good white space
-	}
-	if 0x7f <= c && c <= 0xa0 { // DEL, unicode block including unbreakable space.
-		return true
-	}
-	return false
-}
 type lexer struct {
 	// source
 	bin    *obj.Biobuf
-	peekc  int
+	peekr1 rune
-	peekc1 int // second peekc for ...
+	peekr2 rune // second peekc for ...
 	nlsemi bool // if set, '\n' and EOF translate to ';'
@@ -932,7 +922,7 @@ const (
 )
 func (l *lexer) next() {
-	var c1 int
+	var c1 rune
 	var op Op
 	var escflag int
 	var v int64
@@ -947,33 +937,73 @@ func (l *lexer) next() {
 l0:
 	// skip white space
-	c := l.getc()
+	c := l.getr()
 	for isSpace(c) {
 		if c == '\n' && nlsemi {
-			l.ungetc(c)
+			// TODO(gri) we may be able avoid the ungetr and simply use lexlineno-1 below
+			l.ungetr(c) // for correct line number
 			if Debug['x'] != 0 {
 				fmt.Printf("lex: implicit semi\n")
 			}
+			lineno = lexlineno
 			l.tok = ';'
 			return
 		}
-		c = l.getc()
+		c = l.getr()
 	}
 	// start of token
 	lineno = lexlineno
-	if c >= utf8.RuneSelf {
+	// identifiers and keywords
-		// all multibyte runes are alpha
+	// (for better error messages consume all chars >= utf8.RuneSelf for identifiers)
+	if isLetter(c) || c >= utf8.RuneSelf {
 		cp = &lexbuf
 		cp.Reset()
-		goto talph
-	}
-	if isAlpha(c) {
+		// accelerate common case (7bit ASCII)
-		cp = &lexbuf
+		for isLetter(c) || isDigit(c) {
-		cp.Reset()
+			cp.WriteByte(byte(c))
-		goto talph
+			c = l.getr()
+		}
+		// general case
+		for {
+			if c >= utf8.RuneSelf {
+				if unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) || importpkg != nil && c == 0xb7 {
+					if cp.Len() == 0 && unicode.IsDigit(c) {
+						Yyerror("identifier cannot begin with digit %#U", c)
+					}
+				} else {
+					Yyerror("invalid identifier character %#U", c)
+				}
+				cp.WriteRune(c)
+			} else if isLetter(c) || isDigit(c) {
+				cp.WriteByte(byte(c))
+			} else {
+				break
+			}
+			c = l.getr()
+		}
+		cp = nil
+		l.ungetr(c)
+		s = LookupBytes(lexbuf.Bytes())
+		if s.Lexical == LIGNORE {
+			goto l0
+		}
+		if Debug['x'] != 0 {
+			fmt.Printf("lex: %s %s\n", s, lexname(rune(s.Lexical)))
+		}
+		l.sym_ = s
+		switch s.Lexical {
+		case LNAME, LRETURN, LBREAK, LCONTINUE, LFALL:
+			l.nlsemi = true
+		}
+		l.tok = int32(s.Lexical)
+		return
 	}
 	if isDigit(c) {
@@ -982,7 +1012,7 @@ l0:
 		if c != '0' {
 			for {
 				cp.WriteByte(byte(c))
-				c = l.getc()
+				c = l.getr()
 				if isDigit(c) {
 					continue
 				}
@@ -1000,11 +1030,11 @@ l0:
 		}
 		cp.WriteByte(byte(c))
-		c = l.getc()
+		c = l.getr()
 		if c == 'x' || c == 'X' {
 			for {
 				cp.WriteByte(byte(c))
-				c = l.getc()
+				c = l.getr()
 				if isDigit(c) {
 					continue
 				}
@@ -1037,7 +1067,7 @@ l0:
 				c1 = 1 // not octal
 			}
 			cp.WriteByte(byte(c))
-			c = l.getc()
+			c = l.getr()
 		}
 		if c == '.' {
@@ -1057,8 +1087,7 @@ l0:
 	switch c {
 	case EOF:
-		lineno = prevlineno
+		l.ungetr(EOF) // return EOF again in future next call
-		l.ungetc(EOF)
 		// Treat EOF as "end of line" for the purposes
 		// of inserting a semicolon.
 		if nlsemi {
@@ -1071,13 +1100,8 @@ l0:
 		l.tok = -1
 		return
-	case '_':
-		cp = &lexbuf
-		cp.Reset()
-		goto talph
 	case '.':
-		c1 = l.getc()
+		c1 = l.getr()
 		if isDigit(c1) {
 			cp = &lexbuf
 			cp.Reset()
@@ -1087,13 +1111,13 @@ l0:
 		}
 		if c1 == '.' {
-			c1 = l.getc()
+			c1 = l.getr()
 			if c1 == '.' {
 				c = LDDD
 				goto lx
 			}
-			l.ungetc(c1)
+			l.ungetr(c1)
 			c1 = '.'
 		}
@@ -1127,7 +1151,7 @@ l0:
 		cp.Reset()
 		for {
-			c = int(l.getr())
+			c = l.getr()
 			if c == '\r' {
 				continue
 			}
@@ -1139,7 +1163,7 @@ l0:
 			if c == '`' {
 				break
 			}
-			cp.WriteRune(rune(c))
+			cp.WriteRune(c)
 		}
 		goto strlit
@@ -1153,7 +1177,7 @@ l0:
 		if !l.escchar('\'', &escflag, &v) {
 			Yyerror("missing '")
-			l.ungetc(int(v))
+			l.ungetr(rune(v))
 		}
 		x := new(Mpint)
@@ -1163,25 +1187,25 @@ l0:
 		if Debug['x'] != 0 {
 			fmt.Printf("lex: codepoint literal\n")
 		}
-		litbuf = "string literal"
+		litbuf = "rune literal"
 		l.nlsemi = true
 		l.tok = LLITERAL
 		return
 	case '/':
-		c1 = l.getc()
+		c1 = l.getr()
 		if c1 == '*' {
 			nl := false
 			for {
-				c = int(l.getr())
+				c = l.getr()
 				if c == '\n' {
 					nl = true
 				}
 				for c == '*' {
-					c = int(l.getr())
+					c = l.getr()
 					if c == '/' {
 						if nl {
-							l.ungetc('\n')
+							l.ungetr('\n')
 						}
 						goto l0
 					}
@@ -1202,11 +1226,11 @@ l0:
 			c = l.getlinepragma()
 			for {
 				if c == '\n' || c == EOF {
-					l.ungetc(c)
+					l.ungetr(c)
 					goto l0
 				}
-				c = int(l.getr())
+				c = l.getr()
 			}
 		}
@@ -1216,31 +1240,31 @@ l0:
 		}
 	case ':':
-		c1 = l.getc()
+		c1 = l.getr()
 		if c1 == '=' {
-			c = int(LCOLAS)
+			c = LCOLAS
 			goto lx
 		}
 	case '*':
-		c1 = l.getc()
+		c1 = l.getr()
 		if c1 == '=' {
 			op = OMUL
 			goto asop
 		}
 	case '%':
-		c1 = l.getc()
+		c1 = l.getr()
 		if c1 == '=' {
 			op = OMOD
 			goto asop
 		}
 	case '+':
-		c1 = l.getc()
+		c1 = l.getr()
 		if c1 == '+' {
 			l.nlsemi = true
-			c = int(LINC)
+			c = LINC
 			goto lx
 		}
@@ -1250,10 +1274,10 @@ l0:
 		}
 	case '-':
-		c1 = l.getc()
+		c1 = l.getr()
 		if c1 == '-' {
 			l.nlsemi = true
-			c = int(LDEC)
+			c = LDEC
 			goto lx
 		}
@@ -1263,10 +1287,10 @@ l0:
 		}
 	case '>':
-		c1 = l.getc()
+		c1 = l.getr()
 		if c1 == '>' {
-			c = int(LRSH)
+			c = LRSH
-			c1 = l.getc()
+			c1 = l.getr()
 			if c1 == '=' {
 				op = ORSH
 				goto asop
@@ -1276,17 +1300,17 @@ l0:
 		}
 		if c1 == '=' {
-			c = int(LGE)
+			c = LGE
 			goto lx
 		}
-		c = int(LGT)
+		c = LGT
 	case '<':
-		c1 = l.getc()
+		c1 = l.getr()
 		if c1 == '<' {
-			c = int(LLSH)
+			c = LLSH
-			c1 = l.getc()
+			c1 = l.getr()
 			if c1 == '=' {
 				op = OLSH
 				goto asop
@@ -1296,41 +1320,41 @@ l0:
 		}
 		if c1 == '=' {
-			c = int(LLE)
+			c = LLE
 			goto lx
 		}
 		if c1 == '-' {
-			c = int(LCOMM)
+			c = LCOMM
 			goto lx
 		}
-		c = int(LLT)
+		c = LLT
 	case '=':
-		c1 = l.getc()
+		c1 = l.getr()
 		if c1 == '=' {
-			c = int(LEQ)
+			c = LEQ
 			goto lx
 		}
 	case '!':
-		c1 = l.getc()
+		c1 = l.getr()
 		if c1 == '=' {
-			c = int(LNE)
+			c = LNE
 			goto lx
 		}
 	case '&':
-		c1 = l.getc()
+		c1 = l.getr()
 		if c1 == '&' {
-			c = int(LANDAND)
+			c = LANDAND
 			goto lx
 		}
 		if c1 == '^' {
-			c = int(LANDNOT)
+			c = LANDNOT
-			c1 = l.getc()
+			c1 = l.getr()
 			if c1 == '=' {
 				op = OANDNOT
 				goto asop
@@ -1345,9 +1369,9 @@ l0:
 		}
 	case '|':
-		c1 = l.getc()
+		c1 = l.getr()
 		if c1 == '|' {
-			c = int(LOROR)
+			c = LOROR
 			goto lx
 		}
@@ -1357,21 +1381,32 @@ l0:
 		}
 	case '^':
-		c1 = l.getc()
+		c1 = l.getr()
 		if c1 == '=' {
 			op = OXOR
 			goto asop
 		}
+	case '(', '[', '{', ',', ';':
+		goto lx
 	case ')', ']', '}':
 		l.nlsemi = true
 		goto lx
+	case '#', '$', '?', '@', '\\':
+		if importpkg != nil {
+			goto lx
+		}
+		fallthrough
 	default:
-		goto lx
+		// anything else is illegal
+		Yyerror("syntax error: illegal character %#U", c)
+		goto l0
 	}
-	l.ungetc(c1)
+	l.ungetr(c1)
 lx:
 	if Debug['x'] != 0 {
@@ -1381,17 +1416,8 @@ lx:
 			fmt.Printf("%v lex: TOKEN '%c'\n", Ctxt.Line(int(lexlineno)), c)
 		}
 	}
-	if isfrog(c) {
-		Yyerror("illegal character 0x%x", uint(c))
-		goto l0
-	}
-	if importpkg == nil && (c == '#' || c == '$' || c == '?' || c == '@' || c == '\\') {
-		Yyerror("%s: unexpected %c", "syntax error", c)
-		goto l0
-	}
-	l.tok = int32(c)
+	l.tok = c
 	return
 asop:
@@ -1402,52 +1428,9 @@ asop:
 	l.tok = LASOP
 	return
-	// cp is set to lexbuf and some
-	// prefix has been stored
-talph:
-	for {
-		if c >= utf8.RuneSelf {
-			l.ungetc(c)
-			r := rune(l.getr())
-			// 0xb7 · is used for internal names
-			if !unicode.IsLetter(r) && !unicode.IsDigit(r) && (importpkg == nil || r != 0xb7) {
-				Yyerror("invalid identifier character U+%04x", r)
-			}
-			if cp.Len() == 0 && unicode.IsDigit(r) {
-				Yyerror("identifier cannot begin with digit U+%04x", r)
-			}
-			cp.WriteRune(r)
-		} else if !isAlnum(c) && c != '_' {
-			break
-		} else {
-			cp.WriteByte(byte(c))
-		}
-		c = l.getc()
-	}
-	cp = nil
-	l.ungetc(c)
-	s = LookupBytes(lexbuf.Bytes())
-	if s.Lexical == LIGNORE {
-		goto l0
-	}
-	if Debug['x'] != 0 {
-		fmt.Printf("lex: %s %s\n", s, lexname(int(s.Lexical)))
-	}
-	l.sym_ = s
-	switch s.Lexical {
-	case LNAME, LRETURN, LBREAK, LCONTINUE, LFALL:
-		l.nlsemi = true
-	}
-	l.tok = int32(s.Lexical)
-	return
 ncu:
 	cp = nil
-	l.ungetc(c)
+	l.ungetr(c)
 	str = lexbuf.String()
 	l.val.U = new(Mpint)
@@ -1468,7 +1451,7 @@ ncu:
 casedot:
 	for {
 		cp.WriteByte(byte(c))
-		c = l.getc()
+		c = l.getr()
 		if !isDigit(c) {
 			break
 		}
@@ -1488,10 +1471,10 @@ caseep:
 		Yyerror("malformed floating point constant")
 	}
 	cp.WriteByte(byte(c))
-	c = l.getc()
+	c = l.getr()
 	if c == '+' || c == '-' {
 		cp.WriteByte(byte(c))
-		c = l.getc()
+		c = l.getr()
 	}
 	if !isDigit(c) {
@@ -1499,7 +1482,7 @@ caseep:
 	}
 	for isDigit(c) {
 		cp.WriteByte(byte(c))
-		c = l.getc()
+		c = l.getr()
 	}
 	if c == 'i' {
@@ -1530,7 +1513,7 @@ casei:
 caseout:
 	cp = nil
-	l.ungetc(c)
+	l.ungetr(c)
 	str = lexbuf.String()
 	l.val.U = newMpflt()
@@ -1571,7 +1554,7 @@ func internString(b []byte) string {
 func more(pp *string) bool {
 	p := *pp
-	for p != "" && isSpace(int(p[0])) {
+	for p != "" && isSpace(rune(p[0])) {
 		p = p[1:]
 	}
 	*pp = p
@@ -1582,16 +1565,16 @@ func more(pp *string) bool {
 // //line parse.y:15
 // as a discontinuity in sequential line numbers.
 // the next line of input comes from parse.y:15
-func (l *lexer) getlinepragma() int {
+func (l *lexer) getlinepragma() rune {
 	var cmd, verb, name string
-	c := int(l.getr())
+	c := l.getr()
 	if c == 'g' {
 		cp := &lexbuf
 		cp.Reset()
 		cp.WriteByte('g') // already read
 		for {
-			c = int(l.getr())
+			c = l.getr()
 			if c == EOF || c >= utf8.RuneSelf {
 				return c
 			}
@@ -1683,8 +1666,8 @@ func (l *lexer) getlinepragma() int {
 		return c
 	}
 	for i := 1; i < 5; i++ {
-		c = int(l.getr())
+		c = l.getr()
-		if c != int("line "[i]) {
+		if c != rune("line "[i]) {
 			return c
 		}
 	}
@@ -1693,7 +1676,7 @@ func (l *lexer) getlinepragma() int {
 	cp.Reset()
 	linep := 0
 	for {
-		c = int(l.getr())
+		c = l.getr()
 		if c == EOF {
 			return c
 		}
@@ -1746,7 +1729,7 @@ func getimpsym(pp *string) string {
 		return ""
 	}
 	i := 0
-	for i < len(p) && !isSpace(int(p[i])) && p[i] != '"' {
+	for i < len(p) && !isSpace(rune(p[i])) && p[i] != '"' {
 		i++
 	}
 	sym := p[:i]
@@ -1874,79 +1857,72 @@ func pragcgo(text string) {
 	}
 }
-func (l *lexer) getc() int {
+func (l *lexer) getr() rune {
-	c := l.peekc
+	// unread rune != 0 available
-	if c != 0 {
+	if r := l.peekr1; r != 0 {
-		l.peekc = l.peekc1
+		l.peekr1 = l.peekr2
-		l.peekc1 = 0
+		l.peekr2 = 0
-		goto check
+		if r == '\n' && importpkg == nil {
+			lexlineno++
+		}
+		return r
 	}
-loop:
+redo:
-	c = obj.Bgetc(l.bin)
+	// common case: 7bit ASCII
-	// recognize BOM (U+FEFF): UTF-8 encoding is 0xef 0xbb 0xbf
+	c := obj.Bgetc(l.bin)
-	if c == 0xef {
+	if c < utf8.RuneSelf {
-		buf, err := l.bin.Peek(2)
+		if c == 0 {
-		if err != nil {
+			// TODO(gri) do we need lineno = lexlineno here? Why not?
-			yyerrorl(int(lexlineno), "illegal UTF-8 sequence ef % x followed by read error (%v)", string(buf), err)
+			Yyerror("illegal NUL byte")
-			errorexit()
+			return 0
 		}
-		if buf[0] == 0xbb && buf[1] == 0xbf {
+		if c == '\n' && importpkg == nil {
-			yyerrorl(int(lexlineno), "Unicode (UTF-8) BOM in middle of file")
+			lexlineno++
-			// consume BOM bytes
-			obj.Bgetc(l.bin)
-			obj.Bgetc(l.bin)
-			goto loop
 		}
+		return rune(c)
 	}
+	// c >= utf8.RuneSelf
-check:
+	// uncommon case: non-ASCII
-	if c == 0 {
+	var buf [utf8.UTFMax]byte
-		Yyerror("illegal NUL byte")
+	buf[0] = byte(c)
-		return 0
+	buf[1] = byte(obj.Bgetc(l.bin))
+	i := 2
+	for ; i < len(buf) && !utf8.FullRune(buf[:i]); i++ {
+		buf[i] = byte(obj.Bgetc(l.bin))
 	}
-	if c == '\n' && importpkg == nil {
-		lexlineno++
+	r, w := utf8.DecodeRune(buf[:i])
+	if r == utf8.RuneError && w == 1 {
+		lineno = lexlineno
+		// The string conversion here makes a copy for passing
+		// to fmt.Printf, so that buf itself does not escape and
+		// can be allocated on the stack.
+		Yyerror("illegal UTF-8 sequence % x", string(buf[:i+1]))
 	}
-	return c
-}
-func (l *lexer) ungetc(c int) {
+	if r == BOM {
-	l.peekc1 = l.peekc
+		// TODO(gri) can we use Yyerror here? Why not?
-	l.peekc = c
+		yyerrorl(int(lexlineno), "Unicode (UTF-8) BOM in middle of file")
-	if c == '\n' && importpkg == nil {
+		goto redo
-		lexlineno--
 	}
-}
-func (l *lexer) getr() int32 {
+	return r
-	var buf [utf8.UTFMax]byte
+}
-	for i := 0; ; i++ {
+func (l *lexer) ungetr(r rune) {
-		c := l.getc()
+	l.peekr2 = l.peekr1
-		if i == 0 && c < utf8.RuneSelf {
+	l.peekr1 = r
-			return int32(c)
+	if r == '\n' && importpkg == nil {
-		}
+		lexlineno--
-		buf[i] = byte(c)
-		if i+1 == len(buf) || utf8.FullRune(buf[:i+1]) {
-			r, w := utf8.DecodeRune(buf[:i+1])
-			if r == utf8.RuneError && w == 1 {
-				lineno = lexlineno
-				// The string conversion here makes a copy for passing
-				// to fmt.Printf, so that buf itself does not escape and can
-				// be allocated on the stack.
-				Yyerror("illegal UTF-8 sequence % x", string(buf[:i+1]))
-			}
-			return int32(r)
-		}
 	}
 }
-func (l *lexer) escchar(e int, escflg *int, val *int64) bool {
+func (l *lexer) escchar(e rune, escflg *int, val *int64) bool {
 	*escflg = 0
-	c := int(l.getr())
+	c := l.getr()
 	switch c {
 	case EOF:
 		Yyerror("eof in string")
@@ -1968,7 +1944,7 @@ func (l *lexer) escchar(e int, escflg *int, val *int64) bool {
 	}
 	u := 0
-	c = int(l.getr())
+	c = l.getr()
 	var i int
 	switch c {
 	case 'x':
@@ -1997,14 +1973,14 @@ func (l *lexer) escchar(e int, escflg *int, val *int64) bool {
 		*escflg = 1 // it's a byte
 		x := int64(c) - '0'
 		for i := 2; i > 0; i-- {
-			c = l.getc()
+			c = l.getr()
 			if c >= '0' && c <= '7' {
 				x = x*8 + int64(c) - '0'
 				continue
 			}
 			Yyerror("non-octal character in escape sequence: %c", c)
-			l.ungetc(c)
+			l.ungetr(c)
 		}
 		if x > 255 {
@@ -2043,7 +2019,7 @@ func (l *lexer) escchar(e int, escflg *int, val *int64) bool {
 hex:
 	x := int64(0)
 	for ; i > 0; i-- {
-		c = l.getc()
+		c = l.getr()
 		if c >= '0' && c <= '9' {
 			x = x*16 + int64(c) - '0'
 			continue
@@ -2060,7 +2036,7 @@ hex:
 		}
 		Yyerror("non-hex character in escape sequence: %c", c)
-		l.ungetc(c)
+		l.ungetr(c)
 		break
 	}
@@ -2377,7 +2353,7 @@ func lexfini() {
 	nodfp.Sym = Lookup(".fp")
 }
-var lexn = map[int]string{
+var lexn = map[rune]string{
 	LANDAND:    "ANDAND",
 	LANDNOT:    "ANDNOT",
 	LASOP:      "ASOP",
@@ -2424,7 +2400,7 @@ var lexn = map[int]string{
 	LVAR:       "VAR",
 }
-func lexname(lex int) string {
+func lexname(lex rune) string {
 	if s, ok := lexn[lex]; ok {
 		return s
 	}

--- a/test/fixedbugs/issue11610.go
+++ b/test/fixedbugs/issue11610.go
@@ -9,7 +9,7 @@
 package a
 import""  // ERROR "import path is empty"
-var?      // ERROR "unexpected \?"
+var?      // ERROR "illegal character U\+003F '\?'"
 var x int // ERROR "unexpected var" "cannot declare name"