step 2 of the great buffer shift.

make strings.Buffer handle strings and bytes with comparable efficiency. if ok, next step will be to move this code to bytes.Buffer and terminate strings.Buffer's short happy life. R=rsc DELTA=292 (212 added, 0 deleted, 80 changed) OCL=34837 CL=34849

step 2 of the great buffer shift.
make strings.Buffer handle strings and bytes with comparable efficiency. if ok, next step will be to move this code to bytes.Buffer and terminate strings.Buffer's short happy life. R=rsc DELTA=292 (212 added, 0 deleted, 80 changed) OCL=34837 CL=34849
fed47706 · Rob Pike · de0d762a · fed47706 · fed47706 · fed47706
Commit fed47706 authored Sep 21, 2009 by Rob Pike
5 changed files
--- a/src/pkg/base64/base64_test.go
+++ b/src/pkg/base64/base64_test.go
@@ -112,7 +112,7 @@ func TestDecode(t *testing.T) {

 func TestDecoder(t *testing.T) {
 	for _, p := range pairs {
-		decoder := NewDecoder(StdEncoding, strings.NewBuffer(p.encoded));
+		decoder := NewDecoder(StdEncoding, strings.NewBufferString(p.encoded));
 		dbuf := make([]byte, StdEncoding.DecodedLen(len(p.encoded)));
 		count, err := decoder.Read(dbuf);
 		if err != nil && err != os.EOF {
@@ -129,7 +129,7 @@ func TestDecoder(t *testing.T) {

 func TestDecoderBuffering(t *testing.T) {
 	for bs := 1; bs <= 12; bs++ {
-		decoder := NewDecoder(StdEncoding, strings.NewBuffer(bigtest.encoded));
+		decoder := NewDecoder(StdEncoding, strings.NewBufferString(bigtest.encoded));
 		buf := make([]byte, len(bigtest.decoded) + 12);
 		var total int;
 		for total = 0; total < len(bigtest.decoded); {

--- a/src/pkg/bufio/bufio_test.go
+++ b/src/pkg/bufio/bufio_test.go
@@ -61,12 +61,12 @@ func readBytes(buf *Reader) string {

 func TestReaderSimple(t *testing.T) {
 	data := "hello world";
-	b := NewReader(strings.NewBuffer(data));
+	b := NewReader(strings.NewBufferString(data));
 	if s := readBytes(b); s != "hello world" {
 		t.Errorf("simple hello world test failed: got %q", s);
 	}

-	b = NewReader(newRot13Reader(strings.NewBuffer(data)));
+	b = NewReader(newRot13Reader(strings.NewBufferString(data)));
 	if s := readBytes(b); s != "uryyb jbeyq" {
 		t.Error("rot13 hello world test failed: got %q", s);
 	}
@@ -154,7 +154,7 @@ func TestReader(t *testing.T) {
 					readmaker := readMakers[i];
 					bufreader := bufreaders[j];
 					bufsize := bufsizes[k];
-					read := readmaker.fn(strings.NewBuffer(text));
+					read := readmaker.fn(strings.NewBufferString(text));
 					buf, _ := NewReaderSize(read, bufsize);
 					s := bufreader.fn(buf);
 					if s != text {
@@ -308,7 +308,7 @@ func TestWriteErrors(t *testing.T) {

 func TestNewReaderSizeIdempotent(t *testing.T) {
 	const BufSize = 1000;
-	b, err := NewReaderSize(strings.NewBuffer("hello world"), BufSize);
+	b, err := NewReaderSize(strings.NewBufferString("hello world"), BufSize);
 	if err != nil {
 		t.Error("NewReaderSize create fail", err);
 	}

--- a/src/pkg/gob/encoder_test.go
+++ b/src/pkg/gob/encoder_test.go
@@ -228,7 +228,7 @@ func TestWrongTypeDecoder(t *testing.T) {
 }

 func corruptDataCheck(s string, err os.Error, t *testing.T) {
-	b := strings.NewBuffer(s);
+	b := strings.NewBufferString(s);
 	dec := NewDecoder(b);
 	dec.Decode(new(ET2));
 	if dec.state.err != err {

--- a/src/pkg/strings/buffer.go
+++ b/src/pkg/strings/buffer.go
@@ -6,20 +6,140 @@ package strings

 import "os"

-// Efficient construction of large strings.
+// Efficient construction of large strings and byte arrays.
 // Implements io.Reader and io.Writer.

-// A Buffer is a variable-sized buffer of strings
-// with Read and Write methods.  Appends (writes) are efficient.
+// A Buffer provides efficient construction of large strings
+// and slices of bytes.  It implements io.Reader and io.Writer.
+// Appends (writes) are efficient.
 // The zero value for Buffer is an empty buffer ready to use.
 type Buffer struct {
-	str	[]string;
+	blk	[]block;
 	len	int;
-	byteBuf	[1]byte;
+	oneByte	[1]byte;
+}
+
+// There are two kinds of block: a string or a []byte.
+// When the user writes big strings, we add string blocks;
+// when the user writes big byte slices, we add []byte blocks.
+// Small writes are coalesced onto the end of the last block,
+// whatever it is.
+// This strategy is intended to reduce unnecessary allocation.
+type block interface {
+	Len()	int;
+	String()	string;
+	appendBytes(s []byte);
+	appendString(s string);
+	setSlice(m, n int);
+}
+
+// stringBlocks represent strings. We use pointer receivers
+// so append and setSlice can overwrite the receiver.
+type stringBlock string
+
+func (b *stringBlock) Len() int {
+	return len(*b)
+}
+
+func (b *stringBlock) String() string {
+	return string(*b)
+}
+
+func (b *stringBlock) appendBytes(s []byte) {
+	*b += stringBlock(s)
+}
+
+func (b *stringBlock) appendString(s string) {
+	*b = stringBlock(s)
+}
+
+func (b *stringBlock) setSlice(m, n int) {
+	*b = (*b)[m:n]
+}
+
+// byteBlock represent slices of bytes.  We use pointer receivers
+// so append and setSlice can overwrite the receiver.
+type byteBlock []byte
+
+func (b *byteBlock) Len() int {
+	return len(*b)
+}
+
+func (b *byteBlock) String() string {
+	return string(*b)
+}
+
+func (b *byteBlock) resize(max int) {
+	by := []byte(*b);
+	if cap(by) >= max {
+		by = by[0:max];
+	} else {
+		nby := make([]byte, max, 3*(max+10)/2);
+		copyBytes(nby, 0, by);
+		by = nby;
+	}
+	*b = by;
+}
+
+func (b *byteBlock) appendBytes(s []byte) {
+	curLen := b.Len();
+	b.resize(curLen + len(s));
+	copyBytes([]byte(*b), curLen, s);
+}
+
+func (b *byteBlock) appendString(s string) {
+	curLen := b.Len();
+	b.resize(curLen + len(s));
+	copyString([]byte(*b), curLen, s);
+}
+
+func (b *byteBlock) setSlice(m, n int) {
+	*b = (*b)[m:n]
+}
+
+// Because the user may overwrite the contents of byte slices, we need
+// to make a copy.  Allocation strategy: leave some space on the end so
+// small subsequent writes can avoid another allocation.  The input
+// is known to be non-empty.
+func newByteBlock(s []byte) *byteBlock {
+	l := len(s);
+	// Capacity with room to grow.  If small, allocate a mininum.  If medium,
+	// double the size.  If huge, use the size plus epsilon (room for a newline,
+	// at least).
+	c := l;
+	switch {
+	case l < 32:
+		c = 64
+	case l < 1<<18:
+		c *= 2;
+	default:
+		c += 8
+	}
+	b := make([]byte, l, c);
+	copyBytes(b, 0, s);
+	return &b;
+}
+
+// Copy from block to byte array at offset doff.  Assume there's room.
+func copy(dst []byte, doff int, src block) {
+	switch s := src.(type) {
+	case *stringBlock:
+		copyString(dst, doff, string(*s));
+	case *byteBlock:
+		copyBytes(dst, doff, []byte(*s));
+	}
 }

 // Copy from string to byte array at offset doff.  Assume there's room.
-func copy(dst []byte, doff int, src string) {
+func copyString(dst []byte, doff int, str string) {
+	for soff := 0; soff < len(str); soff++ {
+		dst[doff] = str[soff];
+		doff++;
+	}
+}
+
+// Copy from bytes to byte array at offset doff.  Assume there's room.
+func copyBytes(dst []byte, doff int, src []byte) {
 	for soff := 0; soff < len(src); soff++ {
 		dst[doff] = src[soff];
 		doff++;
@@ -32,9 +152,9 @@ func (b *Buffer) Bytes() []byte {
 	n := b.len;
 	bytes := make([]byte, n);
 	nbytes := 0;
-	for _, s := range b.str {
+	for _, s := range b.blk {
 		copy(bytes, nbytes, s);
-		nbytes += len(s);
+		nbytes += s.Len();
 	}
 	return bytes;
 }
@@ -42,33 +162,33 @@ func (b *Buffer) Bytes() []byte {
 // String returns the contents of the unread portion of the buffer
 // as a string.
 func (b *Buffer) String() string {
-	if len(b.str) == 1 {	// important special case
-		return b.str[0]
+	if len(b.blk) == 1 {	// important special case
+		return b.blk[0].String()
 	}
 	return string(b.Bytes())
 }

 // Len returns the number of bytes in the unread portion of the buffer;
 // b.Len() == len(b.Bytes()) == len(b.String()).
-func (b *Buffer) Len() (n int) {
+func (b *Buffer) Len() int {
 	return b.len
 }

 // Truncate discards all but the first n unread bytes from the buffer.
 func (b *Buffer) Truncate(n int) {
 	b.len = 0;	// recompute during scan.
-	for i, s := range b.str {
+	for i, s := range b.blk {
 		if n <= 0 {
-			b.str = b.str[0:i];
+			b.blk = b.blk[0:i];
 			break;
 		}
-		if n < len(s) {
-			b.str[i] = s[0:n];
+		if l := s.Len(); n < l {
+			b.blk[i].setSlice(0, n);
 			b.len += n;
 			n = 0;
 		} else {
-			b.len += len(s);
-			n -= len(s);
+			b.len += l;
+			n -= l;
 		}
 	}
 }
@@ -76,58 +196,84 @@ func (b *Buffer) Truncate(n int) {
 // Reset resets the buffer so it has no content.
 // b.Reset() is the same as b.Truncate(0).
 func (b *Buffer) Reset() {
-	b.str = b.str[0:0];
+	b.blk = b.blk[0:0];
 	b.len = 0;
 }

 // Can n bytes be appended efficiently to the end of the final string?
 func (b *Buffer) canCombine(n int) bool {
-	return len(b.str) > 0 && n+len(b.str[len(b.str)-1]) <= 64
+	return len(b.blk) > 0 && n+b.blk[len(b.blk)-1].Len() <= 64
 }

 // WriteString appends string s to the buffer.  The return
 // value n is the length of s; err is always nil.
 func (b *Buffer) WriteString(s string) (n int, err os.Error) {
 	n = len(s);
+	if n == 0 {
+		return
+	}
 	b.len += n;
-	numStr := len(b.str);
-	// Special case: If the last string is short and this one is short,
+	numStr := len(b.blk);
+	// Special case: If the last piece is short and this one is short,
 	// combine them and avoid growing the list.
 	if b.canCombine(n) {
-		b.str[numStr-1] += s;
+		b.blk[numStr-1].appendString(s);
 		return
 	}
-	if cap(b.str) == numStr {
-		nstr := make([]string, numStr, 3*(numStr+10)/2);
-		for i, s := range b.str {
+	if cap(b.blk) == numStr {
+		nstr := make([]block, numStr, 3*(numStr+10)/2);
+		for i, s := range b.blk {
 			nstr[i] = s;
 		}
-		b.str = nstr;
+		b.blk = nstr;
 	}
-	b.str = b.str[0:numStr+1];
-	b.str[numStr] = s;
+	b.blk = b.blk[0:numStr+1];
+	// The string is immutable; no need to make a copy.
+	b.blk[numStr] = (*stringBlock)(&s);
 	return
 }

 // Write appends the contents of p to the buffer.  The return
 // value n is the length of p; err is always nil.
 func (b *Buffer) Write(p []byte) (n int, err os.Error) {
-	return b.WriteString(string(p))
+	n = len(p);
+	if n == 0 {
+		return
+	}
+	b.len += n;
+	numStr := len(b.blk);
+	// Special case: If the last piece is short and this one is short,
+	// combine them and avoid growing the list.
+	if b.canCombine(n) {
+		b.blk[numStr-1].appendBytes(p);
+		return
+	}
+	if cap(b.blk) == numStr {
+		nstr := make([]block, numStr, 3*(numStr+10)/2);
+		for i, s := range b.blk {
+			nstr[i] = s;
+		}
+		b.blk = nstr;
+	}
+	b.blk = b.blk[0:numStr+1];
+	// Need to copy the data - user might overwrite the data.
+	b.blk[numStr] = newByteBlock(p);
+	return
 }

 // WriteByte appends the byte c to the buffer.
 // The returned error is always nil, but is included
 // to match bufio.Writer's WriteByte.
 func (b *Buffer) WriteByte(c byte) os.Error {
-	s := string(c);
+	b.oneByte[0] = c;
 	// For WriteByte, canCombine is almost always true so it's worth
 	// doing here.
 	if b.canCombine(1) {
-		b.str[len(b.str)-1] += s;
+		b.blk[len(b.blk)-1].appendBytes(&b.oneByte);
 		b.len++;
 		return nil
 	}
-	b.WriteString(s);
+	b.Write(&b.oneByte);
 	return nil;
 }

@@ -136,22 +282,27 @@ func (b *Buffer) WriteByte(c byte) os.Error {
 // buffer has no data to return, err is os.EOF even if len(p) is zero;
 // otherwise it is nil.
 func (b *Buffer) Read(p []byte) (n int, err os.Error) {
-	if len(b.str) == 0 {
+	if len(b.blk) == 0 {
 		return 0, os.EOF
 	}
-	for len(b.str) > 0 {
-		s := b.str[0];
+	for len(b.blk) > 0 {
+		blk := b.blk[0];
 		m := len(p) - n;
-		if m >= len(s) {
+		if l := blk.Len(); m >= l {
 			// consume all of this string.
-			copy(p, n, s);
-			n += len(s);
-			b.str = b.str[1:len(b.str)];
+			copy(p, n, blk);
+			n += l;
+			b.blk = b.blk[1:len(b.blk)];
 		} else {
-			// consume some of this string; it's the last piece.
-			copy(p, n, s[0:m]);
+			// consume some of this block; it's the last piece.
+			switch b := blk.(type) {
+			case *stringBlock:
+				copyString(p, n, string(*b)[0:m]);
+			case *byteBlock:
+				copyBytes(p, n, []byte(*b)[0:m]);
+			}
 			n += m;
-			b.str[0] = s[m:len(s)];
+			b.blk[0].setSlice(m, l);
 			break;
 		}
 	}
@@ -162,18 +313,28 @@ func (b *Buffer) Read(p []byte) (n int, err os.Error) {
 // ReadByte reads and returns the next byte from the buffer.
 // If no byte is available, it returns error os.EOF.
 func (b *Buffer) ReadByte() (c byte, err os.Error) {
-	if _, err := b.Read(&b.byteBuf); err != nil {
+	if _, err := b.Read(&b.oneByte); err != nil {
 		return 0, err
 	}
-	return b.byteBuf[0], nil
+	return b.oneByte[0], nil
 }

-// NewBuffer creates and initializes a new Buffer
-// using str as its initial contents.
-func NewBuffer(str string) *Buffer {
+// NewBufferString creates and initializes a new Buffer
+// using a string as its initial contents.
+func NewBufferString(str string) *Buffer {
 	b := new(Buffer);
-	b.str = make([]string, 1, 10);	// room to grow
-	b.str[0] = str;
+	b.blk = make([]block, 1, 10);	// room to grow
+	b.blk[0] = (*stringBlock)(&str);
 	b.len = len(str);
 	return b;
 }
+
+// NewBuffer creates and initializes a new Buffer
+// using a byte slice as its initial contents.
+func NewBuffer(by []byte) *Buffer {
+	b := new(Buffer);
+	b.blk = make([]block, 1, 10);	// room to grow
+	b.blk[0] = (*byteBlock)(&by);
+	b.len = len(by);
+	return b;
+}
--- a/src/pkg/strings/buffer_test.go
+++ b/src/pkg/strings/buffer_test.go
@@ -13,10 +13,11 @@ import (

 const N = 10000  // make this bigger for a larger (and slower) test
 var data string  // test data for write tests
+var bytes []byte	// test data; same as data but as a slice.


 func init() {
-	bytes := make([]byte, N);
+	bytes = make([]byte, N);
 	for i := 0; i < N; i++ {
 		bytes[i] = 'a' + byte(i % 26)
 	}
@@ -45,10 +46,10 @@ func check(t *testing.T, testname string, buf *Buffer, s string) {
 }


-// Fill buf through n writes of fus.
+// Fill buf through n writes of string fus.
 // The initial contents of buf corresponds to the string s;
 // the result is the final contents of buf returned as a string.
-func fill(t *testing.T, testname string, buf *Buffer, s string, n int, fus string) string {
+func fillString(t *testing.T, testname string, buf *Buffer, s string, n int, fus string) string {
 	check(t, testname + " (fill 1)", buf, s);
 	for ; n > 0; n-- {
 		m, err := buf.WriteString(fus);
@@ -65,12 +66,38 @@ func fill(t *testing.T, testname string, buf *Buffer, s string, n int, fus strin
 }


+// Fill buf through n writes of byte slice fub.
+// The initial contents of buf corresponds to the string s;
+// the result is the final contents of buf returned as a string.
+func fillBytes(t *testing.T, testname string, buf *Buffer, s string, n int, fub []byte) string {
+	check(t, testname + " (fill 1)", buf, s);
+	for ; n > 0; n-- {
+		m, err := buf.Write(fub);
+		if m != len(fub) {
+			t.Errorf(testname + " (fill 2): m == %d, expected %d\n", m, len(fub));
+		}
+		if err != nil {
+			t.Errorf(testname + " (fill 3): err should always be nil, found err == %s\n", err);
+		}
+		s += string(fub);
+		check(t, testname + " (fill 4)", buf, s);
+	}
+	return s;
+}
+
+
 func TestNewBuffer(t *testing.T) {
-	buf := NewBuffer(data);
+	buf := NewBuffer(bytes);
 	check(t, "NewBuffer", buf, data);
 }


+func TestNewBufferString(t *testing.T) {
+	buf := NewBufferString(data);
+	check(t, "NewBufferString", buf, data);
+}
+
+
 // Empty buf through repeated reads into fub.
 // The initial contents of buf corresponds to the string s.
 func empty(t *testing.T, testname string, buf *Buffer, s string, fub []byte) {
@@ -147,23 +174,43 @@ func TestBasicOperations(t *testing.T) {
 }


-func TestLargeWrites(t *testing.T) {
+func TestLargeStringWrites(t *testing.T) {
+	var buf Buffer;
+	for i := 3; i < 30; i += 3 {
+		s := fillString(t, "TestLargeWrites (1)", &buf, "", 5, data);
+		empty(t, "TestLargeStringWrites (2)", &buf, s, make([]byte, len(data)/i));
+	}
+	check(t, "TestLargeStringWrites (3)", &buf, "");
+}
+
+
+func TestLargeByteWrites(t *testing.T) {
 	var buf Buffer;
 	for i := 3; i < 30; i += 3 {
-		s := fill(t, "TestLargeWrites (1)", &buf, "", 5, data);
-		empty(t, "TestLargeWrites (2)", &buf, s, make([]byte, len(data)/i));
+		s := fillBytes(t, "TestLargeWrites (1)", &buf, "", 5, bytes);
+		empty(t, "TestLargeByteWrites (2)", &buf, s, make([]byte, len(data)/i));
 	}
-	check(t, "TestLargeWrites (3)", &buf, "");
+	check(t, "TestLargeByteWrites (3)", &buf, "");
 }


-func TestLargeReads(t *testing.T) {
+func TestLargeStringReads(t *testing.T) {
 	var buf Buffer;
 	for i := 3; i < 30; i += 3 {
-		s := fill(t, "TestLargeReads (1)", &buf, "", 5, data[0 : len(data)/i]);
+		s := fillString(t, "TestLargeReads (1)", &buf, "", 5, data[0 : len(data)/i]);
 		empty(t, "TestLargeReads (2)", &buf, s, make([]byte, len(data)));
 	}
-	check(t, "TestLargeReads (3)", &buf, "");
+	check(t, "TestLargeStringReads (3)", &buf, "");
+}
+
+
+func TestLargeByteReads(t *testing.T) {
+	var buf Buffer;
+	for i := 3; i < 30; i += 3 {
+		s := fillBytes(t, "TestLargeReads (1)", &buf, "", 5, bytes[0 : len(bytes)/i]);
+		empty(t, "TestLargeReads (2)", &buf, s, make([]byte, len(data)));
+	}
+	check(t, "TestLargeByteReads (3)", &buf, "");
 }


@@ -172,7 +219,11 @@ func TestMixedReadsAndWrites(t *testing.T) {
 	s := "";
 	for i := 0; i < 50; i++ {
 		wlen := rand.Intn(len(data));
-		s = fill(t, "TestMixedReadsAndWrites (1)", &buf, s, 1, data[0 : wlen]);
+		if i % 2 == 0 {
+			s = fillString(t, "TestMixedReadsAndWrites (1)", &buf, s, 1, data[0 : wlen]);
+		} else {
+			s = fillBytes(t, "TestMixedReadsAndWrites (1)", &buf, s, 1, bytes[0 : wlen]);
+		}

 		rlen := rand.Intn(len(data));
 		fub := make([]byte, rlen);