Commit 729bc5c0 authored by Russ Cox's avatar Russ Cox

rudimentary string utilities.

R=r
DELTA=314  (306 added, 8 deleted, 0 changed)
OCL=15074
CL=15263
parent 6693730e
......@@ -6,14 +6,6 @@
echo; echo; echo %%%% making lib %%%%; echo
rm -f *.6
for i in fmt.go flag.go container/vector.go sort.go
do
base=$(basename $i .go)
echo 6g -o $GOROOT/pkg/$base.6 $i
6g -o $GOROOT/pkg/$base.6 $i
done
for i in os math
do
echo; echo; echo %%%% making lib/$i %%%%; echo
......@@ -22,3 +14,11 @@ do
cd ..
done
rm -f *.6
for i in fmt.go flag.go container/vector.go rand.go sort.go strings.go
do
base=$(basename $i .go)
echo 6g -o $GOROOT/pkg/$base.6 $i
6g -o $GOROOT/pkg/$base.6 $i
done
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings
// Count UTF-8 sequences in s.
// Assumes s is well-formed.
export func utflen(s string) int {
n := 0;
for i := 0; i < len(s); i++ {
if s[i]&0xC0 != 0x80 {
n++
}
}
return n
}
// Split string into array of UTF-8 sequences (still strings)
export func explode(s string) *[]string {
a := new([]string, utflen(s));
j := 0;
for i := 0; i < len(a); i++ {
ej := j;
ej++;
for ej < len(s) && (s[ej]&0xC0) == 0x80 {
ej++
}
a[i] = s[j:ej];
j = ej
}
return a
}
// Count non-overlapping instances of sep in s.
export func count(s, sep string) int {
if sep == "" {
return utflen(s)+1
}
c := sep[0];
n := 0;
for i := 0; i+len(sep) <= len(s); i++ {
if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
n++;
i += len(sep)-1
}
}
return n
}
// Return index of first instance of sep in s.
export func index(s, sep string) int {
if sep == "" {
return 0
}
c := sep[0];
for i := 0; i+len(sep) <= len(s); i++ {
if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
return i
}
}
return -1
}
// Split string into list of strings at separators
export func split(s, sep string) *[]string {
if sep == "" {
return explode(s)
}
c := sep[0];
start := 0;
n := count(s, sep)+1;
a := new([]string, n);
na := 0;
for i := 0; i+len(sep) <= len(s); i++ {
if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
a[na] = s[start:i];
na++;
start = i+len(sep);
i += len(sep)-1
}
}
a[na] = s[start:len(s)];
return a
}
// Join list of strings with separators between them.
export func join(a *[]string, sep string) string {
if len(a) == 0 {
return 0
}
if len(a) == 1 {
return a[0]
}
n := len(sep) * (len(a)-1);
for i := 0; i < len(a); i++ {
n += len(a[i])
}
b := new([]byte, n);
bp := 0;
for i := 0; i < len(a); i++ {
s := a[i];
for j := 0; j < len(s); j++ {
b[bp] = s[j];
bp++
}
if i + 1 < len(a) {
s = sep;
for j := 0; j < len(s); j++ {
b[bp] = s[j];
bp++
}
}
}
return string(b)
}
// Convert decimal string to integer.
// TODO: Doesn't check for overflow.
export func atoi(s string) (i int, ok bool) {
// empty string bad
if len(s) == 0 {
return 0, false
}
// pick off leading sign
neg := false;
if s[0] == '+' {
s = s[1:len(s)]
} else if s[0] == '-' {
neg = true;
s = s[1:len(s)]
}
// empty string bad
if len(s) == 0 {
return 0, false
}
// pick off zero
if s == "0" {
return 0, true
}
// otherwise, leading zero bad
if s[0] == '0' {
return 0, false
}
// parse number
n := 0;
for i := 0; i < len(s); i++ {
if s[i] < '0' || s[i] > '9' {
return 0, false
}
n = n*10 + int(s[i] - '0')
}
if neg {
n = -n
}
return n, true
}
export func itoa(i int) string {
if i == 0 {
return "0"
}
neg := false; // negative
bigneg := false; // largest negative number
if i < 0 {
neg = true;
i = -i;
if i < 0 {
bigneg = true; // is largest negative int
i-- // now positive
}
}
// Assemble decimal in reverse order.
var b [32]byte;
bp := len(b);
for ; i > 0; i /= 10 {
bp--;
b[bp] = byte(i%10) + '0'
}
if neg { // add sign
bp--;
b[bp] = '-'
}
if bigneg { // account for i-- above
b[len(b)-1]++
}
// BUG return string(b[bp:len(b)])
return string((&b)[bp:len(b)])
}
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// $G $F.go && $L $F.$A && ./$A.out
package main
import strings "strings"
func split(s, sep string) *[]string {
a := strings.split(s, sep);
b := strings.join(a, sep);
if b != s {
print("Split: ", s, " ", sep, " got ", len(a), "\n");
for i := 0; i < len(a); i++ {
print(" a[", i, "] = ", a[i], "\n")
}
panic("split / join "+s+" "+sep)
}
return a
}
func explode(s string) *[]string {
a := strings.explode(s);
b := strings.join(a, "");
if b != s {
panic("explode / join "+s)
}
return a
}
func itoa(i int) string {
s := strings.itoa(i);
n, ok := strings.atoi(s);
if n != i {
print("itoa: ", i, " ", s, "\n");
panic("itoa")
}
return s
}
func main() {
abcd := "abcd";
faces := "☺☻☹";
commas := "1,2,3,4";
dots := "1....2....3....4";
if strings.utflen(abcd) != 4 { panic("utflen abcd") }
if strings.utflen(faces) != 3 { panic("utflen faces") }
if strings.utflen(commas) != 7 { panic("utflen commas") }
{
a := split(abcd, "a");
if len(a) != 2 || a[0] != "" || a[1] != "bcd" { panic("split abcd a") }
}
{
a := split(abcd, "z");
if len(a) != 1 || a[0] != "abcd" { panic("split abcd z") }
}
{
a := split(abcd, "");
if len(a) != 4 || a[0] != "a" || a[1] != "b" || a[2] != "c" || a[3] != "d" { panic("split abcd empty") }
}
{
a := explode(abcd);
if len(a) != 4 || a[0] != "a" || a[1] != "b" || a[2] != "c" || a[3] != "d" { panic("explode abcd") }
}
{
a := split(commas, ",");
if len(a) != 4 || a[0] != "1" || a[1] != "2" || a[2] != "3" || a[3] != "4" { panic("split commas") }
}
{
a := split(dots, "...");
if len(a) != 4 || a[0] != "1" || a[1] != ".2" || a[2] != ".3" || a[3] != ".4" { panic("split dots") }
}
{
a := split(faces, "☹");
if len(a) != 2 || a[0] != "☺☻" || a[1] != "" { panic("split faces 1") }
}
{
a := split(faces, "~");
if len(a) != 1 || a[0] != faces { panic("split faces ~") }
}
{
a := explode(faces);
if len(a) != 3 || a[0] != "☺" || a[1] != "☻" || a[2] != "☹" { panic("explode faces") }
}
{
a := split(faces, "");
if len(a) != 3 || a[0] != "☺" || a[1] != "☻" || a[2] != "☹" { panic("split faces empty") }
}
{
n, ok := strings.atoi("0"); if n != 0 || !ok { panic("atoi 0") }
n, ok = strings.atoi("-1"); if n != -1 || !ok { panic("atoi -1") }
n, ok = strings.atoi("+345"); if n != 345 || !ok { panic("atoi +345") }
n, ok = strings.atoi("9999"); if n != 9999 || !ok { panic("atoi 9999") }
n, ok = strings.atoi("20ba"); if n != 0 || ok { panic("atoi 20ba") }
n, ok = strings.atoi("hello"); if n != 0 || ok { panic("hello") }
}
if itoa(0) != "0" { panic("itoa 0") }
if itoa(12345) != "12345" { panic("itoa 12345") }
if itoa(-1<<31) != "-2147483648" { panic("itoa 1<<31") }
// should work if int == int64: is there some way to know?
// if itoa(-1<<63) != "-9223372036854775808" { panic("itoa 1<<63") }
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment