Commit 94b3f6d7 authored by Rick Arnold's avatar Rick Arnold Committed by Russ Cox

regexp: add Split

As discussed in issue 2672 and on golang-nuts, this CL adds a Split() method
to regexp. It is based on returning the "opposite" of FindAllString() so that
the returned substrings are everything not matched by the expression.

See: https://groups.google.com/forum/?fromgroups=#!topic/golang-nuts/xodBZh9Lh2E

Fixes #2762.

R=remyoudompheng, r, rsc
CC=golang-dev
https://golang.org/cl/6846048
parent a93b15ca
......@@ -5,6 +5,7 @@
package regexp
import (
"reflect"
"strings"
"testing"
)
......@@ -416,6 +417,59 @@ func TestSubexp(t *testing.T) {
}
}
var splitTests = []struct {
s string
r string
n int
out []string
}{
{"foo:and:bar", ":", -1, []string{"foo", "and", "bar"}},
{"foo:and:bar", ":", 1, []string{"foo:and:bar"}},
{"foo:and:bar", ":", 2, []string{"foo", "and:bar"}},
{"foo:and:bar", "foo", -1, []string{"", ":and:bar"}},
{"foo:and:bar", "bar", -1, []string{"foo:and:", ""}},
{"foo:and:bar", "baz", -1, []string{"foo:and:bar"}},
{"baabaab", "a", -1, []string{"b", "", "b", "", "b"}},
{"baabaab", "a*", -1, []string{"b", "b", "b"}},
{"baabaab", "ba*", -1, []string{"", "", "", ""}},
{"foobar", "f*b*", -1, []string{"", "o", "o", "a", "r"}},
{"foobar", "f+.*b+", -1, []string{"", "ar"}},
{"foobooboar", "o{2}", -1, []string{"f", "b", "boar"}},
{"a,b,c,d,e,f", ",", 3, []string{"a", "b", "c,d,e,f"}},
{"a,b,c,d,e,f", ",", 0, nil},
{",", ",", -1, []string{"", ""}},
{",,,", ",", -1, []string{"", "", "", ""}},
{"", ",", -1, []string{""}},
{"", ".*", -1, []string{""}},
{"", ".+", -1, []string{""}},
{"", "", -1, []string{}},
{"foobar", "", -1, []string{"f", "o", "o", "b", "a", "r"}},
{"abaabaccadaaae", "a*", 5, []string{"", "b", "b", "c", "cadaaae"}},
{":x:y:z:", ":", -1, []string{"", "x", "y", "z", ""}},
}
func TestSplit(t *testing.T) {
for i, test := range splitTests {
re, err := Compile(test.r)
if err != nil {
t.Errorf("#%d: %q: compile error: %s", i, test.r, err.Error())
continue
}
split := re.Split(test.s, test.n)
if !reflect.DeepEqual(split, test.out) {
t.Errorf("#%d: %q: got %q; want %q", i, test.r, split, test.out)
}
if QuoteMeta(test.r) == test.r {
strsplit := strings.SplitN(test.s, test.r, test.n)
if !reflect.DeepEqual(split, strsplit) {
t.Errorf("#%d: Split(%q, %q, %d): regexp vs strings mismatch\nregexp=%q\nstrings=%q", i, test.s, test.r, test.n, split, strsplit)
}
}
}
}
func BenchmarkLiteral(b *testing.B) {
x := strings.Repeat("x", 50) + "y"
b.StopTimer()
......
......@@ -1048,3 +1048,52 @@ func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
}
return result
}
// Split slices s into substrings separated by the expression and returns a slice of
// the substrings between those expression matches.
//
// The slice returned by this method consists of all the substrings of s
// not contained in the slice returned by FindAllString. When called on an expression
// that contains no metacharacters, it is equivalent to strings.SplitN.
//
// Example:
// s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5)
// // s: ["", "b", "b", "c", "cadaaae"]
//
// The count determines the number of substrings to return:
// n > 0: at most n substrings; the last substring will be the unsplit remainder.
// n == 0: the result is nil (zero substrings)
// n < 0: all substrings
func (re *Regexp) Split(s string, n int) []string {
if n == 0 {
return nil
}
if len(re.expr) > 0 && len(s) == 0 {
return []string{""}
}
matches := re.FindAllStringIndex(s, n)
strings := make([]string, 0, len(matches))
beg := 0
end := 0
for _, match := range matches {
if n > 0 && len(strings) >= n-1 {
break
}
end = match[0]
if match[1] != 0 {
strings = append(strings, s[beg:end])
}
beg = match[1]
}
if end != len(s) {
strings = append(strings, s[beg:])
}
return strings
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment