Commit 968732b6 authored by Robert Griesemer's avatar Robert Griesemer

go/scanner: reject BOMs that are not at the beginning

For compliance with gc. See also issue 5265.
Not Go1.1 critical, but harmless.

R=r
CC=golang-dev
https://golang.org/cl/8736043
parent d4d06358
......@@ -48,6 +48,8 @@ type Scanner struct {
ErrorCount int // number of errors encountered
}
const bom = 0xFEFF // byte order mark, only permitted as very first character
// Read the next Unicode char into s.ch.
// s.ch < 0 means end-of-file.
//
......@@ -67,6 +69,8 @@ func (s *Scanner) next() {
r, w = utf8.DecodeRune(s.src[s.rdOffset:])
if r == utf8.RuneError && w == 1 {
s.error(s.offset, "illegal UTF-8 encoding")
} else if r == bom && s.offset > 0 {
s.error(s.offset, "illegal byte order mark")
}
}
s.rdOffset += w
......@@ -125,8 +129,8 @@ func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode
s.ErrorCount = 0
s.next()
if s.ch == '\uFEFF' {
s.next() // ignore BOM
if s.ch == bom {
s.next() // ignore BOM at file beginning
}
}
......@@ -713,7 +717,10 @@ scanAgain:
case '|':
tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
default:
s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
// next reports unexpected BOMs - don't repeat
if ch != bom {
s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
}
insertSemi = s.insertSemi // preserve insertSemi info
tok = token.ILLEGAL
lit = string(ch)
......
......@@ -695,7 +695,10 @@ var errors = []struct {
{"0X", token.INT, 0, "illegal hexadecimal number"},
{"\"abc\x00def\"", token.STRING, 4, "illegal character NUL"},
{"\"abc\x80def\"", token.STRING, 4, "illegal UTF-8 encoding"},
{"\ufeff\ufeff", token.ILLEGAL, 3, "illegal character U+FEFF"}, // only first BOM is ignored
{"\ufeff\ufeff", token.ILLEGAL, 3, "illegal byte order mark"}, // only first BOM is ignored
{"//\ufeff", token.COMMENT, 2, "illegal byte order mark"}, // only first BOM is ignored
{"'\ufeff" + `'`, token.CHAR, 1, "illegal byte order mark"}, // only first BOM is ignored
{`"` + "abc\ufeffdef" + `"`, token.STRING, 4, "illegal byte order mark"}, // only first BOM is ignored
}
func TestScanErrors(t *testing.T) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment