Commit bfebf9ea authored by Russ Cox's avatar Russ Cox

cmd/yacc: fix parsing of character tokens

From issue 7967 I learned:

1) yacc accepts either 'x' or "x" to mean token value 0x78
2) yacc also accepts 'xyz' and "XYZ" to mean token value 0x78

Use strconv.Unquote to simplify the handling of quoted
strings and check that each has only one rune.

Although this does clean things up, it makes 'x' and "x"
treated as different internally (now they are stored as
`'x'` and `"x"`; before they were both ` x`). Grammars that
use both interchangeably will now die with an error
similar to the one from issue 7967:

        yacc bug -- cannot have 2 different Ts with same value
                "+" and '+'

The echoing of the quotes should make clear what is going on.

The other semantic change caused by using strconv.Unquote
is that '\"' and "\'" are no longer valid. Like in Go, they must be
spelled without the backslash: '"' and "'".

On the other hand, now yacc and Go agree about what character
and string literals mean.

LGTM=r
R=r
CC=golang-codereviews
https://golang.org/cl/149110043
parent b2487ef6
......@@ -52,9 +52,9 @@ import (
"go/format"
"io/ioutil"
"os"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)
// the following are adjustable
......@@ -756,64 +756,16 @@ func defin(nt int, s string) int {
// establish value for token
// single character literal
if s[0] == ' ' {
s = s[1:]
r, size := utf8.DecodeRuneInString(s)
if r == utf8.RuneError && size == 1 {
errorf("invalid UTF-8 sequence %q", s)
}
val = int(r)
if val == '\\' { // escape sequence
switch {
case len(s) == 2:
// single character escape sequence
switch s[1] {
case '\'':
val = '\''
case '"':
val = '"'
case '\\':
val = '\\'
case 'a':
val = '\a'
case 'b':
val = '\b'
case 'f':
val = '\f'
case 'n':
val = '\n'
case 'r':
val = '\r'
case 't':
val = '\t'
case 'v':
val = '\v'
default:
errorf("invalid escape %s", s)
}
case s[1] == 'u' && len(s) == 2+4, // \unnnn sequence
s[1] == 'U' && len(s) == 2+8: // \Unnnnnnnn sequence
val = 0
s = s[2:]
for s != "" {
c := int(s[0])
switch {
case c >= '0' && c <= '9':
c -= '0'
case c >= 'a' && c <= 'f':
c -= 'a' - 10
case c >= 'A' && c <= 'F':
c -= 'A' - 10
default:
errorf(`illegal \u or \U construction`)
}
val = val*16 + c
s = s[1:]
}
default:
errorf("invalid escape %s", s)
if s[0] == '\'' || s[0] == '"' {
q, err := strconv.Unquote(s)
if err != nil {
errorf("invalid token: %s", err)
}
rq := []rune(q)
if len(rq) != 1 {
errorf("character token too long: %s", s)
}
val = int(rq[0])
if val == 0 {
errorf("token value 0 is illegal")
}
......@@ -896,7 +848,7 @@ func gettok() int {
case '"', '\'':
match = c
tokname = " "
tokname = string(c)
for {
c = getrune(finput)
if c == '\n' || c == EOF {
......@@ -909,6 +861,7 @@ func gettok() int {
if tokflag {
fmt.Printf(">>> IDENTIFIER \"%v\" %v\n", tokname, lineno)
}
tokname += string(c)
return IDENTIFIER
}
tokname += string(c)
......@@ -1029,7 +982,7 @@ func fdtype(t int) int {
}
func chfind(t int, s string) int {
if s[0] == ' ' {
if s[0] == '"' || s[0] == '\'' {
t = 0
}
for i := 0; i <= ntokens; i++ {
......@@ -1516,9 +1469,6 @@ func symnam(i int) string {
} else {
s = tokset[i].name
}
if s[0] == ' ' {
s = s[1:]
}
return s
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment