Commit 75f6a0c7 authored by Christian Himpel's avatar Christian Himpel Committed by Rob Pike

bytes: add IndexRune, FieldsFunc and To*Special

Basically these functions are implemented the same way as the
corresponding functions in the strings package.  Test functions
are implemented for IndexRune and FieldsFunc.

Additionally two typos are fixed in packages bytes and strings.

R=r
CC=golang-dev
https://golang.org/cl/1696062
parent 895c5db6
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
// The bytes package implements functions for the manipulation of byte slices. // The bytes package implements functions for the manipulation of byte slices.
// Analagous to the facilities of the strings package. // Analogous to the facilities of the strings package.
package bytes package bytes
import ( import (
...@@ -127,6 +127,20 @@ func LastIndex(s, sep []byte) int { ...@@ -127,6 +127,20 @@ func LastIndex(s, sep []byte) int {
return -1 return -1
} }
// IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points.
// It returns the byte index of the first occurrence in s of the given rune.
// It returns -1 if rune is not present in s.
func IndexRune(s []byte, rune int) int {
for i := 0; i < len(s); {
r, size := utf8.DecodeRune(s[i:])
if r == rune {
return i
}
i += size
}
return -1
}
// IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points. // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
// It returns the byte index of the first occurrence in s of any of the Unicode // It returns the byte index of the first occurrence in s of any of the Unicode
// code points in chars. It returns -1 if chars is empty or if there is no code // code points in chars. It returns -1 if chars is empty or if there is no code
...@@ -202,12 +216,20 @@ func SplitAfter(s, sep []byte, n int) [][]byte { ...@@ -202,12 +216,20 @@ func SplitAfter(s, sep []byte, n int) [][]byte {
// Fields splits the array s around each instance of one or more consecutive white space // Fields splits the array s around each instance of one or more consecutive white space
// characters, returning a slice of subarrays of s or an empty list if s contains only white space. // characters, returning a slice of subarrays of s or an empty list if s contains only white space.
func Fields(s []byte) [][]byte { func Fields(s []byte) [][]byte {
return FieldsFunc(s, unicode.IsSpace)
}
// FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
// It splits the array s at each run of code points c satisfying f(c) and
// returns a slice of subarrays of s. If no code points in s satisfy f(c), an
// empty slice is returned.
func FieldsFunc(s []byte, f func(int) bool) [][]byte {
n := 0 n := 0
inField := false inField := false
for i := 0; i < len(s); { for i := 0; i < len(s); {
rune, size := utf8.DecodeRune(s[i:]) rune, size := utf8.DecodeRune(s[i:])
wasInField := inField wasInField := inField
inField = !unicode.IsSpace(rune) inField = !f(rune)
if inField && !wasInField { if inField && !wasInField {
n++ n++
} }
...@@ -219,12 +241,12 @@ func Fields(s []byte) [][]byte { ...@@ -219,12 +241,12 @@ func Fields(s []byte) [][]byte {
fieldStart := -1 fieldStart := -1
for i := 0; i <= len(s) && na < n; { for i := 0; i <= len(s) && na < n; {
rune, size := utf8.DecodeRune(s[i:]) rune, size := utf8.DecodeRune(s[i:])
if fieldStart < 0 && size > 0 && !unicode.IsSpace(rune) { if fieldStart < 0 && size > 0 && !f(rune) {
fieldStart = i fieldStart = i
i += size i += size
continue continue
} }
if fieldStart >= 0 && (size == 0 || unicode.IsSpace(rune)) { if fieldStart >= 0 && (size == 0 || f(rune)) {
a[na] = s[fieldStart:i] a[na] = s[fieldStart:i]
na++ na++
fieldStart = -1 fieldStart = -1
...@@ -337,6 +359,25 @@ func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } ...@@ -337,6 +359,25 @@ func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
// ToTitle returns a copy of the byte array s with all Unicode letters mapped to their title case. // ToTitle returns a copy of the byte array s with all Unicode letters mapped to their title case.
func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
// ToUpperSpecial returns a copy of the byte array s with all Unicode letters mapped to their
// upper case, giving priority to the special casing rules.
func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte {
return Map(func(r int) int { return _case.ToUpper(r) }, s)
}
// ToLowerSpecial returns a copy of the byte array s with all Unicode letters mapped to their
// lower case, giving priority to the special casing rules.
func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte {
return Map(func(r int) int { return _case.ToLower(r) }, s)
}
// ToTitleSpecial returns a copy of the byte array s with all Unicode letters mapped to their
// title case, giving priority to the special casing rules.
func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte {
return Map(func(r int) int { return _case.ToTitle(r) }, s)
}
// isSeparator reports whether the rune could mark a word boundary. // isSeparator reports whether the rune could mark a word boundary.
// TODO: update when package unicode captures more of the properties. // TODO: update when package unicode captures more of the properties.
func isSeparator(rune int) bool { func isSeparator(rune int) bool {
......
...@@ -125,6 +125,15 @@ var indexAnyTests = []BinOpTest{ ...@@ -125,6 +125,15 @@ var indexAnyTests = []BinOpTest{
BinOpTest{dots + dots + dots, " ", -1}, BinOpTest{dots + dots + dots, " ", -1},
} }
var indexRuneTests = []BinOpTest{
BinOpTest{"", "a", -1},
BinOpTest{"", "☺", -1},
BinOpTest{"foo", "☹", -1},
BinOpTest{"foo", "o", 1},
BinOpTest{"foo☺bar", "☺", 3},
BinOpTest{"foo☺☻☹bar", "☹", 9},
}
// Execute f on each test case. funcName should be the name of f; it's used // Execute f on each test case. funcName should be the name of f; it's used
// in failure reports. // in failure reports.
func runIndexTests(t *testing.T, f func(s, sep []byte) int, funcName string, testCases []BinOpTest) { func runIndexTests(t *testing.T, f func(s, sep []byte) int, funcName string, testCases []BinOpTest) {
...@@ -168,6 +177,17 @@ func TestIndexByte(t *testing.T) { ...@@ -168,6 +177,17 @@ func TestIndexByte(t *testing.T) {
} }
} }
func TestIndexRune(t *testing.T) {
for _, tt := range indexRuneTests {
a := []byte(tt.a)
r, _ := utf8.DecodeRuneInString(tt.b)
pos := IndexRune(a, r)
if pos != tt.i {
t.Errorf(`IndexRune(%q, '%c') = %v`, tt.a, r, pos)
}
}
}
func BenchmarkIndexByte4K(b *testing.B) { bmIndex(b, IndexByte, 4<<10) } func BenchmarkIndexByte4K(b *testing.B) { bmIndex(b, IndexByte, 4<<10) }
func BenchmarkIndexByte4M(b *testing.B) { bmIndex(b, IndexByte, 4<<20) } func BenchmarkIndexByte4M(b *testing.B) { bmIndex(b, IndexByte, 4<<20) }
...@@ -336,6 +356,23 @@ func TestFields(t *testing.T) { ...@@ -336,6 +356,23 @@ func TestFields(t *testing.T) {
} }
} }
func TestFieldsFunc(t *testing.T) {
pred := func(c int) bool { return c == 'X' }
var fieldsFuncTests = []FieldsTest{
FieldsTest{"", []string{}},
FieldsTest{"XX", []string{}},
FieldsTest{"XXhiXXX", []string{"hi"}},
FieldsTest{"aXXbXXXcX", []string{"a", "b", "c"}},
}
for _, tt := range fieldsFuncTests {
a := FieldsFunc([]byte(tt.s), pred)
result := arrayOfString(a)
if !eq(result, tt.a) {
t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
}
}
}
// Test case for any function which accepts and returns a byte array. // Test case for any function which accepts and returns a byte array.
// For ease of creation, we write the byte arrays as strings. // For ease of creation, we write the byte arrays as strings.
type StringTest struct { type StringTest struct {
......
...@@ -191,7 +191,7 @@ func Fields(s string) []string { ...@@ -191,7 +191,7 @@ func Fields(s string) []string {
return FieldsFunc(s, unicode.IsSpace) return FieldsFunc(s, unicode.IsSpace)
} }
// FieldsFunc splits the string s at each run of Unicode code points c satifying f(c) // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
// and returns an array of slices of s. If no code points in s satisfy f(c), an empty slice // and returns an array of slices of s. If no code points in s satisfy f(c), an empty slice
// is returned. // is returned.
func FieldsFunc(s string, f func(int) bool) []string { func FieldsFunc(s string, f func(int) bool) []string {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment