Commit f11271b8 authored by Roger Peppe's avatar Roger Peppe Committed by Rob Pike

utf8: add DecodeLastRune and DecodeLastRuneInString to

enable traversing rune-by-rune backwards in strings

R=r, rsc
CC=golang-dev
https://golang.org/cl/2192050
parent 1959c3ac
......@@ -209,6 +209,73 @@ func DecodeRuneInString(s string) (rune, size int) {
return
}
// DecodeLastRune unpacks the last UTF-8 encoding in p
// and returns the rune and its width in bytes.
func DecodeLastRune(p []byte) (rune, size int) {
end := len(p)
if end == 0 {
return RuneError, 0
}
start := end - 1
rune = int(p[start])
if rune < RuneSelf {
return rune, 1
}
// guard against O(n^2) behavior when traversing
// backwards through strings with long sequences of
// invalid UTF-8.
lim := end - UTFMax
if lim < 0 {
lim = 0
}
for start--; start >= lim; start-- {
if RuneStart(p[start]) {
break
}
}
if start < 0 {
start = 0
}
rune, size = DecodeRune(p[start:end])
if start+size != end {
return RuneError, 1
}
return rune, size
}
// DecodeLastRuneInString is like DecodeLastRune but its input is a string.
func DecodeLastRuneInString(s string) (rune, size int) {
end := len(s)
if end == 0 {
return RuneError, 0
}
start := end - 1
rune = int(s[start])
if rune < RuneSelf {
return rune, 1
}
// guard against O(n^2) behavior when traversing
// backwards through strings with long sequences of
// invalid UTF-8.
lim := end - UTFMax
if lim < 0 {
lim = 0
}
for start--; start >= lim; start-- {
if RuneStart(s[start]) {
break
}
}
if start < 0 {
start = 0
}
rune, size = DecodeRuneInString(s[start:end])
if start+size != end {
return RuneError, 1
}
return rune, size
}
// RuneLen returns the number of bytes required to encode the rune.
func RuneLen(rune int) int {
switch {
......
......@@ -44,6 +44,12 @@ var utf8map = []Utf8Map{
Utf8Map{0xFFFD, "\xef\xbf\xbd"},
}
var testStrings = []string{
"",
"abcd",
"\x80\x80\x80\x80",
}
// strings.Bytes with one extra byte at end
func makeBytes(s string) []byte {
s += "\x00"
......@@ -141,6 +147,79 @@ func TestDecodeRune(t *testing.T) {
if rune != RuneError || size != 1 {
t.Errorf("DecodeRuneInString(%q) = %#04x, %d want %#04x, %d", s, rune, size, RuneError, 1)
}
}
}
// Check that DecodeRune and DecodeLastRune correspond to
// the equivalent range loop.
func TestSequencing(t *testing.T) {
for _, ts := range testStrings {
for _, m := range utf8map {
for _, s := range []string{ts + m.str, m.str + ts, ts + m.str + ts} {
testSequence(t, s)
}
}
}
}
func testSequence(t *testing.T, s string) {
type info struct {
index int
rune int
}
index := make([]info, len(s))
b := []byte(s)
si := 0
j := 0
for i, r := range s {
if si != i {
t.Errorf("Sequence(%q) mismatched index %d, want %d", s, si, i)
return
}
index[j] = info{i, r}
j++
rune1, size1 := DecodeRune(b[i:])
if r != rune1 {
t.Errorf("DecodeRune(%q) = %#04x, want %#04x", s[i:], rune1, r)
return
}
rune2, size2 := DecodeRuneInString(s[i:])
if r != rune2 {
t.Errorf("DecodeRuneInString(%q) = %#04x, want %#04x", s[i:], rune2, r)
return
}
if size1 != size2 {
t.Errorf("DecodeRune/DecodeRuneInString(%q) size mismatch %d/%d", s[i:], size1, size2)
return
}
si += size1
}
j--
for si = len(s); si > 0; {
rune1, size1 := DecodeLastRune(b[0:si])
rune2, size2 := DecodeLastRuneInString(s[0:si])
if size1 != size2 {
t.Errorf("DecodeLastRune/DecodeLastRuneInString(%q, %d) size mismatch %d/%d", s, si, size1, size2)
return
}
if rune1 != index[j].rune {
t.Errorf("DecodeLastRune(%q, %d) = %#04x, want %#04x", s, si, rune1, index[j].rune)
return
}
if rune2 != index[j].rune {
t.Errorf("DecodeLastRuneInString(%q, %d) = %#04x, want %#04x", s, si, rune2, index[j].rune)
return
}
si -= size1
if si != index[j].index {
t.Errorf("DecodeLastRune(%q) index mismatch at %d, want %d", s, si, index[j].index)
return
}
j--
}
if si != 0 {
t.Errorf("DecodeLastRune(%q) finished at %d, not 0", s, si)
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment