Commit ec127547 authored by Rob Pike's avatar Rob Pike

bufio: fix scanning with a final empty token.

The Scan function's interface to the split function was not sufficient
to handle an empty final token in a pure function; state was required.
This was ugly.

We introduce a special error value that a split function can return
that signals that this token is OK, but is the last one and scanning
should stop immediately _after_ this token.

The same effect could be achieved using the same trick (a special
error value) and checking for that error after Scan finishes, but it's
a little clumsy. Providing a published sentinel value in bufio is
cleaner and means everyone can use the same trick. The result
is an error-free scan.

Rewrite the test (that was only barely working) to use the value
and be more robust.

Also write a new example showing how to do it.

Fixes #11836

Change-Id: Iaae77d0f95b4a2efa0175ced94d93c66353079e8
Reviewed-on: https://go-review.googlesource.com/14924Reviewed-by: 's avatarIan Lance Taylor <iant@golang.org>
parent dc6df1b0
......@@ -80,3 +80,32 @@ func ExampleScanner_custom() {
// 5678
// Invalid input: strconv.ParseInt: parsing "1234567901234567890": value out of range
}
// Use a Scanner with a custom split function to parse a comma-separated
// list with an empty final value.
func ExampleScanner_emptyFinalToken() {
// Comma-separated list; last entry is empty.
const input = "1,2,3,4,"
scanner := bufio.NewScanner(strings.NewReader(input))
// Define a split function that separates on commas.
onComma := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
for i := 0; i < len(data); i++ {
if data[i] == ',' {
return i + 1, data[:i], nil
}
}
// There is one final token to be delivered, which may be the empty string.
// Returning bufio.ErrFinalToken here tells Scan there are no more tokens after this
// but does not trigger an error to be returned from Scan itself.
return 0, data, bufio.ErrFinalToken
}
scanner.Split(onComma)
// Scan.
for scanner.Scan() {
fmt.Printf("%q ", scanner.Text())
}
if err := scanner.Err(); err != nil {
fmt.Fprintln(os.Stderr, "reading input:", err)
}
// Output: "1" "2" "3" "4" ""
}
......@@ -38,6 +38,7 @@ type Scanner struct {
err error // Sticky error.
empties int // Count of successive empty tokens.
scanCalled bool // Scan has been called; buffer is in use.
done bool // Scan has finished.
}
// SplitFunc is the signature of the split function used to tokenize the
......@@ -106,6 +107,16 @@ func (s *Scanner) Text() string {
return string(s.token)
}
// ErrFinalToken is a special sentinel error value. It is intended to be
// returned by a Split function to indicate that the token being delivered
// with the error is the last token and scanning should stop after this one.
// After ErrFinalToken is received by Scan, scanning stops with no error.
// The value is useful to stop processing early or when it is necessary to
// deliver a final empty token. One could achieve the same behavior
// with a custom error value but providing one here is tidier.
// See the emptyFinalToken example for a use of this value.
var ErrFinalToken = errors.New("final token")
// Scan advances the Scanner to the next token, which will then be
// available through the Bytes or Text method. It returns false when the
// scan stops, either by reaching the end of the input or an error.
......@@ -115,6 +126,9 @@ func (s *Scanner) Text() string {
// Scan panics if the split function returns 100 empty tokens without
// advancing the input. This is a common error mode for scanners.
func (s *Scanner) Scan() bool {
if s.done {
return false
}
s.scanCalled = true
// Loop until we have a token.
for {
......@@ -124,6 +138,11 @@ func (s *Scanner) Scan() bool {
if s.end > s.start || s.err != nil {
advance, token, err := s.split(s.buf[s.start:s.end], s.err != nil)
if err != nil {
if err == ErrFinalToken {
s.token = token
s.done = true
return true
}
s.setErr(err)
return false
}
......
......@@ -429,33 +429,37 @@ func commaSplit(data []byte, atEOF bool) (advance int, token []byte, err error)
return i + 1, data[:i], nil
}
}
if !atEOF {
return 0, nil, nil
}
return 0, data, nil
return 0, data, ErrFinalToken
}
func TestEmptyTokens(t *testing.T) {
s := NewScanner(strings.NewReader("1,2,3,"))
values := []string{"1", "2", "3", ""}
func testEmptyTokens(t *testing.T, text string, values []string) {
s := NewScanner(strings.NewReader(text))
s.Split(commaSplit)
var i int
for i = 0; i < len(values); i++ {
if !s.Scan() {
break
for i = 0; s.Scan(); i++ {
if i >= len(values) {
t.Fatalf("got %d fields, expected %d", i+1, len(values))
}
if s.Text() != values[i] {
t.Errorf("%d: expected %q got %q", i, values[i], s.Text())
}
}
if i != len(values) {
t.Errorf("got %d fields, expected %d", i, len(values))
t.Fatalf("got %d fields, expected %d", i, len(values))
}
if err := s.Err(); err != nil {
t.Fatal(err)
}
}
func TestEmptyTokens(t *testing.T) {
testEmptyTokens(t, "1,2,3,", []string{"1", "2", "3", ""})
}
func TestWithNoEmptyTokens(t *testing.T) {
testEmptyTokens(t, "1,2,3", []string{"1", "2", "3"})
}
func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
if len(data) > 0 {
return 1, data[:1], nil
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment