Commit 30533d60 authored by David Symonds's avatar David Symonds

Change strings.Split, bytes.Split to take a maximum substring count argument.

R=rsc
APPROVED=r
DELTA=131  (39 added, 10 deleted, 82 changed)
OCL=30669
CL=30723
parent 466dd8da
...@@ -55,19 +55,27 @@ func Copy(dst, src []byte) int { ...@@ -55,19 +55,27 @@ func Copy(dst, src []byte) int {
return len(src) return len(src)
} }
// Explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes). // explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes),
// Invalid UTF-8 sequences become correct encodings of U+FFF8. // up to a maximum of n byte arrays. Invalid UTF-8 sequences are chopped into individual bytes.
func Explode(s []byte) [][]byte { func explode(s []byte, n int) [][]byte {
a := make([][]byte, utf8.RuneCount(s)); if n <= 0 {
n = len(s);
}
a := make([][]byte, n);
var size, rune int; var size, rune int;
i := 0; na := 0;
for len(s) > 0 { for len(s) > 0 {
if na+1 >= n {
a[na] = s;
na++;
break
}
rune, size = utf8.DecodeRune(s); rune, size = utf8.DecodeRune(s);
a[i] = s[0:size]; a[na] = s[0:size];
s = s[size:len(s)]; s = s[size:len(s)];
i++; na++;
} }
return a return a[0:na]
} }
// Count counts the number of non-overlapping instances of sep in s. // Count counts the number of non-overlapping instances of sep in s.
...@@ -101,27 +109,30 @@ func Index(s, sep []byte) int { ...@@ -101,27 +109,30 @@ func Index(s, sep []byte) int {
return -1 return -1
} }
// Split returns the array representing the subarrays of s separated by sep. Adjacent // Split splits the array s around each instance of sep, returning an array of subarrays of s.
// occurrences of sep produce empty subarrays. If sep is empty, it is the same as Explode. // If sep is empty, Split splits s after each UTF-8 sequence.
func Split(s, sep []byte) [][]byte { // If n > 0, split Splits s into at most n subarrays; the last subarray will contain an unsplit remainder.
func Split(s, sep []byte, n int) [][]byte {
if len(sep) == 0 { if len(sep) == 0 {
return Explode(s) return explode(s, n)
}
if n <= 0 {
n = Count(s, sep) + 1;
} }
c := sep[0]; c := sep[0];
start := 0; start := 0;
n := Count(s, sep)+1;
a := make([][]byte, n); a := make([][]byte, n);
na := 0; na := 0;
for i := 0; i+len(sep) <= len(s); i++ { for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) { if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) {
a[na] = s[start:i]; a[na] = s[start:i];
na++; na++;
start = i+len(sep); start = i+len(sep);
i += len(sep)-1 i += len(sep)-1;
} }
} }
a[na] = s[start:len(s)]; a[na] = s[start:len(s)];
return a return a[0:na+1]
} }
// Join concatenates the elements of a to create a single byte array. The separator // Join concatenates the elements of a to create a single byte array. The separator
......
...@@ -75,24 +75,25 @@ func TestCompare(t *testing.T) { ...@@ -75,24 +75,25 @@ func TestCompare(t *testing.T) {
type ExplodeTest struct { type ExplodeTest struct {
s string; s string;
n int;
a []string; a []string;
} }
var explodetests = []ExplodeTest { var explodetests = []ExplodeTest {
ExplodeTest{ abcd, []string{"a", "b", "c", "d"} }, ExplodeTest{ abcd, 0, []string{"a", "b", "c", "d"} },
ExplodeTest{ faces, []string{"☺", "☻", "☹" } }, ExplodeTest{ faces, 0, []string{"☺", "☻", "☹"} },
ExplodeTest{ abcd, 2, []string{"a", "bcd"} },
} }
func TestExplode(t *testing.T) { func TestExplode(t *testing.T) {
for i := 0; i < len(explodetests); i++ { for _, tt := range(explodetests) {
tt := explodetests[i]; a := explode(io.StringBytes(tt.s), tt.n);
a := Explode(io.StringBytes(tt.s));
result := arrayOfString(a); result := arrayOfString(a);
if !eq(result, tt.a) { if !eq(result, tt.a) {
t.Errorf(`Explode("%s") = %v; want %v`, tt.s, result, tt.a); t.Errorf(`Explode("%s", %d) = %v; want %v`, tt.s, tt.n, result, tt.a);
continue; continue;
} }
s := Join(a, []byte{}); s := Join(a, []byte{});
if string(s) != tt.s { if string(s) != tt.s {
t.Errorf(`Join(Explode("%s"), "") = "%s"`, tt.s, s); t.Errorf(`Join(Explode("%s", %d), "") = "%s"`, tt.s, tt.n, s);
} }
} }
} }
...@@ -101,30 +102,35 @@ func TestExplode(t *testing.T) { ...@@ -101,30 +102,35 @@ func TestExplode(t *testing.T) {
type SplitTest struct { type SplitTest struct {
s string; s string;
sep string; sep string;
n int;
a []string; a []string;
} }
var splittests = []SplitTest { var splittests = []SplitTest {
SplitTest{ abcd, "a", []string{"", "bcd"} }, SplitTest{ abcd, "a", 0, []string{"", "bcd"} },
SplitTest{ abcd, "z", []string{"abcd"} }, SplitTest{ abcd, "z", 0, []string{"abcd"} },
SplitTest{ abcd, "", []string{"a", "b", "c", "d"} }, SplitTest{ abcd, "", 0, []string{"a", "b", "c", "d"} },
SplitTest{ commas, ",", []string{"1", "2", "3", "4"} }, SplitTest{ commas, ",", 0, []string{"1", "2", "3", "4"} },
SplitTest{ dots, "...", []string{"1", ".2", ".3", ".4"} }, SplitTest{ dots, "...", 0, []string{"1", ".2", ".3", ".4"} },
SplitTest{ faces, "☹", []string{"☺☻", ""} }, SplitTest{ faces, "☹", 0, []string{"☺☻", ""} },
SplitTest{ faces, "~", []string{faces} }, SplitTest{ faces, "~", 0, []string{faces} },
SplitTest{ faces, "", []string{"☺", "☻", "☹"} }, SplitTest{ faces, "", 0, []string{"☺", "☻", "☹"} },
SplitTest{ "1 2 3 4", " ", 3, []string{"1", "2", "3 4"} },
SplitTest{ "1 2 3", " ", 3, []string{"1", "2", "3"} },
SplitTest{ "1 2", " ", 3, []string{"1", "2"} },
SplitTest{ "123", "", 2, []string{"1", "23"} },
SplitTest{ "123", "", 17, []string{"1", "2", "3"} },
} }
func TestSplit(t *testing.T) { func TestSplit(t *testing.T) {
for i := 0; i < len(splittests); i++ { for _, tt := range splittests {
tt := splittests[i]; a := Split(io.StringBytes(tt.s), io.StringBytes(tt.sep), tt.n);
a := Split(io.StringBytes(tt.s), io.StringBytes(tt.sep));
result := arrayOfString(a); result := arrayOfString(a);
if !eq(result, tt.a) { if !eq(result, tt.a) {
t.Errorf(`Split("%s", "%s") = %v; want %v`, tt.s, tt.sep, result, tt.a); t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a);
continue; continue;
} }
s := Join(a, io.StringBytes(tt.sep)); s := Join(a, io.StringBytes(tt.sep));
if string(s) != tt.s { if string(s) != tt.s {
t.Errorf(`Join(Split("%s", "%s"), "%s") = "%s"`, tt.s, tt.sep, tt.sep, s); t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s);
} }
} }
} }
......
...@@ -214,7 +214,7 @@ func LookPath(file string) (string, os.Error) { ...@@ -214,7 +214,7 @@ func LookPath(file string) (string, os.Error) {
// (equivalent to PATH="."). // (equivalent to PATH=".").
pathenv = ""; pathenv = "";
} }
for i, dir := range strings.Split(pathenv, ":") { for i, dir := range strings.Split(pathenv, ":", 0) {
if dir == "" { if dir == "" {
// Unix shell semantics: path element "" means "." // Unix shell semantics: path element "" means "."
dir = "."; dir = ".";
......
...@@ -45,7 +45,7 @@ func commentText(comments []string) string { ...@@ -45,7 +45,7 @@ func commentText(comments []string) string {
lines := make([]string, 0, 20); lines := make([]string, 0, 20);
for i, c := range comments { for i, c := range comments {
// split on newlines // split on newlines
cl := strings.Split(c, "\n"); cl := strings.Split(c, "\n", 0);
// walk lines, stripping comment markers // walk lines, stripping comment markers
w := 0; w := 0;
......
...@@ -108,13 +108,12 @@ func send(req *Request) (resp *Response, err os.Error) { ...@@ -108,13 +108,12 @@ func send(req *Request) (resp *Response, err os.Error) {
if err != nil { if err != nil {
return nil, err; return nil, err;
} }
i := strings.Index(line, " "); f := strings.Split(line, " ", 3);
j := strings.Index(line[i+1:len(line)], " ") + i+1; if len(f) < 3 {
if i < 0 || j < 0 {
return nil, os.ErrorString(fmt.Sprintf("Invalid first line in HTTP response: %q", line)); return nil, os.ErrorString(fmt.Sprintf("Invalid first line in HTTP response: %q", line));
} }
resp.Status = line[i+1:len(line)]; resp.Status = f[1] + " " + f[2];
resp.StatusCode, err = strconv.Atoi(line[i+1:j]); resp.StatusCode, err = strconv.Atoi(f[1]);
if err != nil { if err != nil {
return nil, os.ErrorString(fmt.Sprintf("Invalid status code in HTTP response: %q", line)); return nil, os.ErrorString(fmt.Sprintf("Invalid status code in HTTP response: %q", line));
} }
......
...@@ -442,7 +442,7 @@ func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) { ...@@ -442,7 +442,7 @@ func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
} }
var f []string; var f []string;
if f = strings.Split(s, " "); len(f) != 3 { if f = strings.Split(s, " ", 3); len(f) < 3 {
return nil, BadRequest return nil, BadRequest
} }
req.Method, req.RawUrl, req.Proto = f[0], f[1], f[2]; req.Method, req.RawUrl, req.Proto = f[0], f[1], f[2];
...@@ -572,8 +572,8 @@ func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) { ...@@ -572,8 +572,8 @@ func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
func parseForm(body string) (data map[string] *vector.StringVector, err os.Error) { func parseForm(body string) (data map[string] *vector.StringVector, err os.Error) {
data = make(map[string] *vector.StringVector); data = make(map[string] *vector.StringVector);
for _, kv := range strings.Split(body, "&") { for _, kv := range strings.Split(body, "&", 0) {
kvPair := strings.Split(kv, "="); kvPair := strings.Split(kv, "=", 2);
var key, value string; var key, value string;
var e os.Error; var e os.Error;
......
...@@ -28,7 +28,7 @@ func pow2(i int) float64 { ...@@ -28,7 +28,7 @@ func pow2(i int) float64 {
// Wrapper around strconv.Atof64. Handles dddddp+ddd (binary exponent) // Wrapper around strconv.Atof64. Handles dddddp+ddd (binary exponent)
// itself, passes the rest on to strconv.Atof64. // itself, passes the rest on to strconv.Atof64.
func myatof64(s string) (f float64, ok bool) { func myatof64(s string) (f float64, ok bool) {
a := strings.Split(s, "p"); a := strings.Split(s, "p", 2);
if len(a) == 2 { if len(a) == 2 {
n, err := strconv.Atoi64(a[0]); n, err := strconv.Atoi64(a[0]);
if err != nil { if err != nil {
...@@ -72,7 +72,7 @@ func myatof64(s string) (f float64, ok bool) { ...@@ -72,7 +72,7 @@ func myatof64(s string) (f float64, ok bool) {
// Wrapper around strconv.Atof32. Handles dddddp+ddd (binary exponent) // Wrapper around strconv.Atof32. Handles dddddp+ddd (binary exponent)
// itself, passes the rest on to strconv.Atof32. // itself, passes the rest on to strconv.Atof32.
func myatof32(s string) (f float32, ok bool) { func myatof32(s string) (f float32, ok bool) {
a := strings.Split(s, "p"); a := strings.Split(s, "p", 2);
if len(a) == 2 { if len(a) == 2 {
n, err := strconv.Atoi(a[0]); n, err := strconv.Atoi(a[0]);
if err != nil { if err != nil {
...@@ -115,7 +115,7 @@ func TestFp(t *testing.T) { ...@@ -115,7 +115,7 @@ func TestFp(t *testing.T) {
if len(line) == 0 || line[0] == '#' { if len(line) == 0 || line[0] == '#' {
continue continue
} }
a := strings.Split(line, " "); a := strings.Split(line, " ", 0);
if len(a) != 4 { if len(a) != 4 {
t.Error("testfp.txt:", lineno, ": wrong field count\n"); t.Error("testfp.txt:", lineno, ": wrong field count\n");
continue; continue;
......
...@@ -7,19 +7,27 @@ package strings ...@@ -7,19 +7,27 @@ package strings
import "utf8" import "utf8"
// Explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings). // explode splits s into an array of UTF-8 sequences, one per Unicode character (still strings) up to a maximum of n (n <= 0 means no limit).
// Invalid UTF-8 sequences become correct encodings of U+FFF8. // Invalid UTF-8 sequences become correct encodings of U+FFF8.
func Explode(s string) []string { func explode(s string, n int) []string {
a := make([]string, utf8.RuneCountInString(s)); if n <= 0 {
n = len(s);
}
a := make([]string, n);
var size, rune int; var size, rune int;
i := 0; na := 0;
for len(s) > 0 { for len(s) > 0 {
if na+1 >= n {
a[na] = s;
na++;
break
}
rune, size = utf8.DecodeRuneInString(s); rune, size = utf8.DecodeRuneInString(s);
s = s[size:len(s)]; s = s[size:len(s)];
a[i] = string(rune); a[na] = string(rune);
i++; na++;
} }
return a return a[0:na]
} }
// Count counts the number of non-overlapping instances of sep in s. // Count counts the number of non-overlapping instances of sep in s.
...@@ -68,27 +76,30 @@ func LastIndex(s, sep string) int { ...@@ -68,27 +76,30 @@ func LastIndex(s, sep string) int {
return -1 return -1
} }
// Split returns the array representing the substrings of s separated by string sep. Adjacent // Split splits the string s around each instance of sep, returning an array of substrings of s.
// occurrences of sep produce empty substrings. If sep is empty, it is the same as Explode. // If sep is empty, Split splits s after each UTF-8 sequence.
func Split(s, sep string) []string { // If n > 0, split Splits s into at most n substrings; the last subarray will contain an unsplit remainder string.
func Split(s, sep string, n int) []string {
if sep == "" { if sep == "" {
return Explode(s) return explode(s, n)
}
if n <= 0 {
n = Count(s, sep) + 1;
} }
c := sep[0]; c := sep[0];
start := 0; start := 0;
n := Count(s, sep)+1;
a := make([]string, n); a := make([]string, n);
na := 0; na := 0;
for i := 0; i+len(sep) <= len(s); i++ { for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) { if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
a[na] = s[start:i]; a[na] = s[start:i];
na++; na++;
start = i+len(sep); start = i+len(sep);
i += len(sep)-1 i += len(sep)-1;
} }
} }
a[na] = s[start:len(s)]; a[na] = s[start:len(s)];
return a return a[0:na+1]
} }
// Join concatenates the elements of a to create a single string. The separator string // Join concatenates the elements of a to create a single string. The separator string
......
...@@ -83,23 +83,24 @@ func TestLastIndex(t *testing.T) { ...@@ -83,23 +83,24 @@ func TestLastIndex(t *testing.T) {
type ExplodeTest struct { type ExplodeTest struct {
s string; s string;
n int;
a []string; a []string;
} }
var explodetests = []ExplodeTest { var explodetests = []ExplodeTest {
ExplodeTest{ abcd, []string{"a", "b", "c", "d"} }, ExplodeTest{ abcd, 4, []string{"a", "b", "c", "d"} },
ExplodeTest{ faces, []string{"☺", "☻", "☹" } }, ExplodeTest{ faces, 3, []string{"☺", "☻", "☹"} },
ExplodeTest{ abcd, 2, []string{"a", "bcd"} },
} }
func TestExplode(t *testing.T) { func TestExplode(t *testing.T) {
for i := 0; i < len(explodetests); i++ { for _, tt := range explodetests {
tt := explodetests[i]; a := explode(tt.s, tt.n);
a := Explode(tt.s);
if !eq(a, tt.a) { if !eq(a, tt.a) {
t.Errorf("Explode(%q) = %v; want %v", tt.s, a, tt.a); t.Errorf("explode(%q, %d) = %v; want %v", tt.s, tt.n, a, tt.a);
continue; continue;
} }
s := Join(a, ""); s := Join(a, "");
if s != tt.s { if s != tt.s {
t.Errorf(`Join(Explode(%q), "") = %q`, tt.s, s); t.Errorf(`Join(explode(%q, %d), "") = %q`, tt.s, tt.n, s);
} }
} }
} }
...@@ -107,29 +108,33 @@ func TestExplode(t *testing.T) { ...@@ -107,29 +108,33 @@ func TestExplode(t *testing.T) {
type SplitTest struct { type SplitTest struct {
s string; s string;
sep string; sep string;
n int;
a []string; a []string;
} }
var splittests = []SplitTest { var splittests = []SplitTest {
SplitTest{ abcd, "a", []string{"", "bcd"} }, SplitTest{ abcd, "a", 0, []string{"", "bcd"} },
SplitTest{ abcd, "z", []string{"abcd"} }, SplitTest{ abcd, "z", 0, []string{"abcd"} },
SplitTest{ abcd, "", []string{"a", "b", "c", "d"} }, SplitTest{ abcd, "", 0, []string{"a", "b", "c", "d"} },
SplitTest{ commas, ",", []string{"1", "2", "3", "4"} }, SplitTest{ commas, ",", 0, []string{"1", "2", "3", "4"} },
SplitTest{ dots, "...", []string{"1", ".2", ".3", ".4"} }, SplitTest{ dots, "...", 0, []string{"1", ".2", ".3", ".4"} },
SplitTest{ faces, "☹", []string{"☺☻", ""} }, SplitTest{ faces, "☹", 0, []string{"☺☻", ""} },
SplitTest{ faces, "~", []string{faces} }, SplitTest{ faces, "~", 0, []string{faces} },
SplitTest{ faces, "", []string{"☺", "☻", "☹"} }, SplitTest{ faces, "", 0, []string{"☺", "☻", "☹"} },
SplitTest{ "1 2 3 4", " ", 3, []string{"1", "2", "3 4"} },
SplitTest{ "1 2", " ", 3, []string{"1", "2"} },
SplitTest{ "123", "", 2, []string{"1", "23"} },
SplitTest{ "123", "", 17, []string{"1", "2", "3"} },
} }
func TestSplit(t *testing.T) { func TestSplit(t *testing.T) {
for i := 0; i < len(splittests); i++ { for _, tt := range splittests {
tt := splittests[i]; a := Split(tt.s, tt.sep, tt.n);
a := Split(tt.s, tt.sep);
if !eq(a, tt.a) { if !eq(a, tt.a) {
t.Errorf("Split(%q, %q) = %v; want %v", tt.s, tt.sep, a, tt.a); t.Errorf("Split(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, a, tt.a);
continue; continue;
} }
s := Join(a, tt.sep); s := Join(a, tt.sep);
if s != tt.s { if s != tt.s {
t.Errorf("Join(Split(%q, %q), %q) = %q", tt.s, tt.sep, tt.sep, s); t.Errorf("Join(Split(%q, %q, %d), %q) = %q", tt.s, tt.sep, tt.n, tt.sep, s);
} }
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment