Commit c68ae9d4 authored by Russ Cox's avatar Russ Cox

bytes: add EqualFold

R=golang-dev, r, r
CC=golang-dev
https://golang.org/cl/5123047
parent 4bdf1fc0
......@@ -608,3 +608,58 @@ func Replace(s, old, new []byte, n int) []byte {
w += copy(t[w:], s[start:])
return t[0:w]
}
// EqualFold reports whether s and t, interpreted as UTF-8 strings,
// are equal under Unicode case-folding.
func EqualFold(s, t []byte) bool {
for len(s) != 0 && len(t) != 0 {
// Extract first rune from each.
var sr, tr int
if s[0] < utf8.RuneSelf {
sr, s = int(s[0]), s[1:]
} else {
r, size := utf8.DecodeRune(s)
sr, s = r, s[size:]
}
if t[0] < utf8.RuneSelf {
tr, t = int(t[0]), t[1:]
} else {
r, size := utf8.DecodeRune(t)
tr, t = r, t[size:]
}
// If they match, keep going; if not, return false.
// Easy case.
if tr == sr {
continue
}
// Make sr < tr to simplify what follows.
if tr < sr {
tr, sr = sr, tr
}
// Fast check for ASCII.
if tr < utf8.RuneSelf && 'A' <= sr && sr <= 'Z' {
// ASCII, and sr is upper case. tr must be lower case.
if tr == sr+'a'-'A' {
continue
}
return false
}
// General case. SimpleFold(x) returns the next equivalent rune > x
// or wraps around to smaller values.
r := unicode.SimpleFold(sr)
for r != sr && r < tr {
r = unicode.SimpleFold(r)
}
if r == tr {
continue
}
return false
}
// One string is empty. Are both?
return len(s) == len(t)
}
......@@ -862,3 +862,31 @@ func TestTitle(t *testing.T) {
}
}
}
var EqualFoldTests = []struct {
s, t string
out bool
}{
{"abc", "abc", true},
{"ABcd", "ABcd", true},
{"123abc", "123ABC", true},
{"αβδ", "ΑΒΔ", true},
{"abc", "xyz", false},
{"abc", "XYZ", false},
{"abcdefghijk", "abcdefghijX", false},
{"abcdefghijk", "abcdefghij\u212A", true},
{"abcdefghijK", "abcdefghij\u212A", true},
{"abcdefghijkz", "abcdefghij\u212Ay", false},
{"abcdefghijKz", "abcdefghij\u212Ay", false},
}
func TestEqualFold(t *testing.T) {
for _, tt := range EqualFoldTests {
if out := EqualFold([]byte(tt.s), []byte(tt.t)); out != tt.out {
t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.s, tt.t, out, tt.out)
}
if out := EqualFold([]byte(tt.t), []byte(tt.s)); out != tt.out {
t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.t, tt.s, out, tt.out)
}
}
}
......@@ -584,7 +584,8 @@ func Replace(s, old, new string, n int) string {
return string(t[0:w])
}
// EqualFold returns true if s and t are equal under Unicode case-folding.
// EqualFold reports whether s and t, interpreted as UTF-8 strings,
// are equal under Unicode case-folding.
func EqualFold(s, t string) bool {
for s != "" && t != "" {
// Extract first rune from each string.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment