Commit a9e8befb authored by Rob Pike's avatar Rob Pike

generate replacement rune when asked to encode a negative rune value.

Fixes #425.

R=rsc
https://golang.org/cl/178043
parent fe0eb17f
...@@ -227,6 +227,11 @@ func RuneLen(rune int) int { ...@@ -227,6 +227,11 @@ func RuneLen(rune int) int {
// EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune. // EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune.
// It returns the number of bytes written. // It returns the number of bytes written.
func EncodeRune(rune int, p []byte) int { func EncodeRune(rune int, p []byte) int {
// Negative values are erroneous.
if rune < 0 {
rune = RuneError
}
if rune <= _Rune1Max { if rune <= _Rune1Max {
p[0] = byte(rune); p[0] = byte(rune);
return 1; return 1;
......
...@@ -42,6 +42,7 @@ var utf8map = []Utf8Map{ ...@@ -42,6 +42,7 @@ var utf8map = []Utf8Map{
Utf8Map{0x10001, "\xf0\x90\x80\x81"}, Utf8Map{0x10001, "\xf0\x90\x80\x81"},
Utf8Map{0x10fffe, "\xf4\x8f\xbf\xbe"}, Utf8Map{0x10fffe, "\xf4\x8f\xbf\xbe"},
Utf8Map{0x10ffff, "\xf4\x8f\xbf\xbf"}, Utf8Map{0x10ffff, "\xf4\x8f\xbf\xbf"},
Utf8Map{0xFFFD, "\xef\xbf\xbd"},
} }
// strings.Bytes with one extra byte at end // strings.Bytes with one extra byte at end
...@@ -81,7 +82,7 @@ func TestEncodeRune(t *testing.T) { ...@@ -81,7 +82,7 @@ func TestEncodeRune(t *testing.T) {
n := EncodeRune(m.rune, &buf); n := EncodeRune(m.rune, &buf);
b1 := buf[0:n]; b1 := buf[0:n];
if !bytes.Equal(b, b1) { if !bytes.Equal(b, b1) {
t.Errorf("EncodeRune(0x%04x) = %q want %q", m.rune, b1, b) t.Errorf("EncodeRune(%#04x) = %q want %q", m.rune, b1, b)
} }
} }
} }
...@@ -92,23 +93,23 @@ func TestDecodeRune(t *testing.T) { ...@@ -92,23 +93,23 @@ func TestDecodeRune(t *testing.T) {
b := makeBytes(m.str); b := makeBytes(m.str);
rune, size := DecodeRune(b); rune, size := DecodeRune(b);
if rune != m.rune || size != len(b) { if rune != m.rune || size != len(b) {
t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b, rune, size, m.rune, len(b)) t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, rune, size, m.rune, len(b))
} }
s := m.str; s := m.str;
rune, size = DecodeRuneInString(s); rune, size = DecodeRuneInString(s);
if rune != m.rune || size != len(b) { if rune != m.rune || size != len(b) {
t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", s, rune, size, m.rune, len(b)) t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", s, rune, size, m.rune, len(b))
} }
// there's an extra byte that bytes left behind - make sure trailing byte works // there's an extra byte that bytes left behind - make sure trailing byte works
rune, size = DecodeRune(b[0:cap(b)]); rune, size = DecodeRune(b[0:cap(b)]);
if rune != m.rune || size != len(b) { if rune != m.rune || size != len(b) {
t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b, rune, size, m.rune, len(b)) t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, rune, size, m.rune, len(b))
} }
s = m.str + "\x00"; s = m.str + "\x00";
rune, size = DecodeRuneInString(s); rune, size = DecodeRuneInString(s);
if rune != m.rune || size != len(b) { if rune != m.rune || size != len(b) {
t.Errorf("DecodeRuneInString(%q) = 0x%04x, %d want 0x%04x, %d", s, rune, size, m.rune, len(b)) t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, rune, size, m.rune, len(b))
} }
// make sure missing bytes fail // make sure missing bytes fail
...@@ -118,12 +119,12 @@ func TestDecodeRune(t *testing.T) { ...@@ -118,12 +119,12 @@ func TestDecodeRune(t *testing.T) {
} }
rune, size = DecodeRune(b[0 : len(b)-1]); rune, size = DecodeRune(b[0 : len(b)-1]);
if rune != RuneError || size != wantsize { if rune != RuneError || size != wantsize {
t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b[0:len(b)-1], rune, size, RuneError, wantsize) t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b[0:len(b)-1], rune, size, RuneError, wantsize)
} }
s = m.str[0 : len(m.str)-1]; s = m.str[0 : len(m.str)-1];
rune, size = DecodeRuneInString(s); rune, size = DecodeRuneInString(s);
if rune != RuneError || size != wantsize { if rune != RuneError || size != wantsize {
t.Errorf("DecodeRuneInString(%q) = 0x%04x, %d want 0x%04x, %d", s, rune, size, RuneError, wantsize) t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, rune, size, RuneError, wantsize)
} }
// make sure bad sequences fail // make sure bad sequences fail
...@@ -134,16 +135,27 @@ func TestDecodeRune(t *testing.T) { ...@@ -134,16 +135,27 @@ func TestDecodeRune(t *testing.T) {
} }
rune, size = DecodeRune(b); rune, size = DecodeRune(b);
if rune != RuneError || size != 1 { if rune != RuneError || size != 1 {
t.Errorf("DecodeRune(%q) = 0x%04x, %d want 0x%04x, %d", b, rune, size, RuneError, 1) t.Errorf("DecodeRune(%q) = %#04x, %d want %#04x, %d", b, rune, size, RuneError, 1)
} }
s = string(b); s = string(b);
rune, size = DecodeRune(b); rune, size = DecodeRune(b);
if rune != RuneError || size != 1 { if rune != RuneError || size != 1 {
t.Errorf("DecodeRuneInString(%q) = 0x%04x, %d want 0x%04x, %d", s, rune, size, RuneError, 1) t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, rune, size, RuneError, 1)
} }
} }
} }
// Check that negative runes encode as U+FFFD.
func TestNegativeRune(t *testing.T) {
errorbuf := make([]byte, UTFMax);
errorbuf = errorbuf[0:EncodeRune(RuneError, errorbuf)];
buf := make([]byte, UTFMax);
buf = buf[0:EncodeRune(-1, buf)];
if !bytes.Equal(buf, errorbuf) {
t.Errorf("incorrect encoding [% x] for -1; expected [% x]", buf, errorbuf)
}
}
type RuneCountTest struct { type RuneCountTest struct {
in string; in string;
out int; out int;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment