Commit 77e98fb8 authored by Robert Griesemer's avatar Robert Griesemer

go/scanner: don't drop identifiers starting with non-ASCII letter...

Bug introduced with CL 6454150.

Fixes #4000.

R=r
CC=golang-dev
https://golang.org/cl/6474061
parent 053b448d
...@@ -572,8 +572,7 @@ scanAgain: ...@@ -572,8 +572,7 @@ scanAgain:
// determine token value // determine token value
insertSemi := false insertSemi := false
switch ch := s.ch; { switch ch := s.ch; {
case 'a' <= ch && ch <= 'z': case isLetter(ch):
// literals start with a lower-case letter
lit = s.scanIdentifier() lit = s.scanIdentifier()
if len(lit) > 1 { if len(lit) > 1 {
// keywords are longer than one letter - avoid lookup otherwise // keywords are longer than one letter - avoid lookup otherwise
...@@ -586,10 +585,6 @@ scanAgain: ...@@ -586,10 +585,6 @@ scanAgain:
insertSemi = true insertSemi = true
tok = token.IDENT tok = token.IDENT
} }
case 'A' <= ch && ch <= 'Z' || ch == '_':
insertSemi = true
tok = token.IDENT
lit = s.scanIdentifier()
case '0' <= ch && ch <= '9': case '0' <= ch && ch <= '9':
insertSemi = true insertSemi = true
tok, lit = s.scanNumber(false) tok, lit = s.scanNumber(false)
...@@ -715,17 +710,10 @@ scanAgain: ...@@ -715,17 +710,10 @@ scanAgain:
case '|': case '|':
tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR) tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
default: default:
if isLetter(ch) { s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
// handle any letters we might have missed insertSemi = s.insertSemi // preserve insertSemi info
insertSemi = true tok = token.ILLEGAL
tok = token.IDENT lit = string(ch)
s.scanIdentifier()
} else {
s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
insertSemi = s.insertSemi // preserve insertSemi info
tok = token.ILLEGAL
lit = string(ch)
}
} }
} }
if s.mode&dontInsertSemis == 0 { if s.mode&dontInsertSemis == 0 {
......
...@@ -52,6 +52,8 @@ var tokens = [...]elt{ ...@@ -52,6 +52,8 @@ var tokens = [...]elt{
{token.IDENT, "a۰۱۸", literal}, {token.IDENT, "a۰۱۸", literal},
{token.IDENT, "foo६४", literal}, {token.IDENT, "foo६४", literal},
{token.IDENT, "bar9876", literal}, {token.IDENT, "bar9876", literal},
{token.IDENT, "ŝ", literal}, // was bug (issue 4000)
{token.IDENT, "ŝfoo", literal}, // was bug (issue 4000)
{token.INT, "0", literal}, {token.INT, "0", literal},
{token.INT, "1", literal}, {token.INT, "1", literal},
{token.INT, "123456789012345678890", literal}, {token.INT, "123456789012345678890", literal},
...@@ -544,7 +546,7 @@ func TestLineComments(t *testing.T) { ...@@ -544,7 +546,7 @@ func TestLineComments(t *testing.T) {
} }
} }
// Verify that initializing the same scanner more then once works correctly. // Verify that initializing the same scanner more than once works correctly.
func TestInit(t *testing.T) { func TestInit(t *testing.T) {
var s Scanner var s Scanner
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment