Commit fb6ffd8f authored by Robert Griesemer's avatar Robert Griesemer

go/scanner: strip CRs from raw literals

R=rsc
CC=golang-dev
https://golang.org/cl/5495049
parent fd1f1096
...@@ -426,13 +426,16 @@ func (S *Scanner) scanString() { ...@@ -426,13 +426,16 @@ func (S *Scanner) scanString() {
S.next() S.next()
} }
func (S *Scanner) scanRawString() { func (S *Scanner) scanRawString() (hasCR bool) {
// '`' opening already consumed // '`' opening already consumed
offs := S.offset - 1 offs := S.offset - 1
for S.ch != '`' { for S.ch != '`' {
ch := S.ch ch := S.ch
S.next() S.next()
if ch == '\r' {
hasCR = true
}
if ch < 0 { if ch < 0 {
S.error(offs, "string not terminated") S.error(offs, "string not terminated")
break break
...@@ -440,6 +443,7 @@ func (S *Scanner) scanRawString() { ...@@ -440,6 +443,7 @@ func (S *Scanner) scanRawString() {
} }
S.next() S.next()
return
} }
func (S *Scanner) skipWhitespace() { func (S *Scanner) skipWhitespace() {
...@@ -490,6 +494,18 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Tok ...@@ -490,6 +494,18 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Tok
return tok0 return tok0
} }
func stripCR(b []byte) []byte {
c := make([]byte, len(b))
i := 0
for _, ch := range b {
if ch != '\r' {
c[i] = ch
i++
}
}
return c[:i]
}
// Scan scans the next token and returns the token position, // Scan scans the next token and returns the token position,
// the token, and the literal string corresponding to the // the token, and the literal string corresponding to the
// token. The source end is indicated by token.EOF. // token. The source end is indicated by token.EOF.
...@@ -518,6 +534,7 @@ scanAgain: ...@@ -518,6 +534,7 @@ scanAgain:
insertSemi := false insertSemi := false
offs := S.offset offs := S.offset
tok := token.ILLEGAL tok := token.ILLEGAL
hasCR := false
// determine token value // determine token value
switch ch := S.ch; { switch ch := S.ch; {
...@@ -556,7 +573,7 @@ scanAgain: ...@@ -556,7 +573,7 @@ scanAgain:
case '`': case '`':
insertSemi = true insertSemi = true
tok = token.STRING tok = token.STRING
S.scanRawString() hasCR = S.scanRawString()
case ':': case ':':
tok = S.switch2(token.COLON, token.DEFINE) tok = S.switch2(token.COLON, token.DEFINE)
case '.': case '.':
...@@ -663,5 +680,9 @@ scanAgain: ...@@ -663,5 +680,9 @@ scanAgain:
// TODO(gri): The scanner API should change such that the literal string // TODO(gri): The scanner API should change such that the literal string
// is only valid if an actual literal was scanned. This will // is only valid if an actual literal was scanned. This will
// permit a more efficient implementation. // permit a more efficient implementation.
return S.file.Pos(offs), tok, string(S.src[offs:S.offset]) lit := S.src[offs:S.offset]
if hasCR {
lit = stripCR(lit)
}
return S.file.Pos(offs), tok, string(lit)
} }
...@@ -83,6 +83,8 @@ var tokens = [...]elt{ ...@@ -83,6 +83,8 @@ var tokens = [...]elt{
"`", "`",
literal, literal,
}, },
{token.STRING, "`\r`", literal},
{token.STRING, "`foo\r\nbar`", literal},
// Operators and delimiters // Operators and delimiters
{token.ADD, "+", operator}, {token.ADD, "+", operator},
...@@ -239,8 +241,16 @@ func TestScan(t *testing.T) { ...@@ -239,8 +241,16 @@ func TestScan(t *testing.T) {
if tok != e.tok { if tok != e.tok {
t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok) t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok)
} }
if e.tok.IsLiteral() && lit != e.lit { if e.tok.IsLiteral() {
t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit) // no CRs in raw string literals
elit := e.lit
if elit[0] == '`' {
elit = string(stripCR([]byte(elit)))
epos.Offset += len(e.lit) - len(lit) // correct position
}
if lit != elit {
t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit)
}
} }
if tokenclass(tok) != e.class { if tokenclass(tok) != e.class {
t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class) t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment