Commit 54bc760a authored by Matthew Dempsky's avatar Matthew Dempsky Committed by Rob Pike

bufio: handle excessive white space in ScanWords

LGTM=r
R=golang-codereviews, bradfitz, r
CC=golang-codereviews
https://golang.org/cl/109020043
parent 311e2863
...@@ -326,9 +326,6 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) { ...@@ -326,9 +326,6 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
break break
} }
} }
if atEOF && len(data) == 0 {
return 0, nil, nil
}
// Scan until space, marking end of word. // Scan until space, marking end of word.
for width, i := 0, start; i < len(data); i += width { for width, i := 0, start; i < len(data); i += width {
var r rune var r rune
...@@ -342,5 +339,5 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) { ...@@ -342,5 +339,5 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
return len(data), data[start:], nil return len(data), data[start:], nil
} }
// Request more data. // Request more data.
return 0, nil, nil return start, nil, nil
} }
...@@ -15,6 +15,8 @@ import ( ...@@ -15,6 +15,8 @@ import (
"unicode/utf8" "unicode/utf8"
) )
const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
// Test white space table matches the Unicode definition. // Test white space table matches the Unicode definition.
func TestSpace(t *testing.T) { func TestSpace(t *testing.T) {
for r := rune(0); r <= utf8.MaxRune; r++ { for r := rune(0); r <= utf8.MaxRune; r++ {
...@@ -172,7 +174,6 @@ func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) { ...@@ -172,7 +174,6 @@ func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
// Test the line splitter, including some carriage returns but no long lines. // Test the line splitter, including some carriage returns but no long lines.
func TestScanLongLines(t *testing.T) { func TestScanLongLines(t *testing.T) {
const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize. // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
tmp := new(bytes.Buffer) tmp := new(bytes.Buffer)
buf := new(bytes.Buffer) buf := new(bytes.Buffer)
...@@ -404,3 +405,17 @@ func TestBadReader(t *testing.T) { ...@@ -404,3 +405,17 @@ func TestBadReader(t *testing.T) {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }
} }
func TestScanWordsExcessiveWhiteSpace(t *testing.T) {
const word = "ipsum"
s := strings.Repeat(" ", 4*smallMaxTokenSize) + word
scanner := NewScanner(strings.NewReader(s))
scanner.MaxTokenSize(smallMaxTokenSize)
scanner.Split(ScanWords)
if !scanner.Scan() {
t.Fatal("scan failed: %v", scanner.Err())
}
if token := scanner.Text(); token != word {
t.Fatal("unexpected token: %v", token)
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment