Commit 54bc760a authored by Matthew Dempsky's avatar Matthew Dempsky Committed by Rob Pike

bufio: handle excessive white space in ScanWords

LGTM=r
R=golang-codereviews, bradfitz, r
CC=golang-codereviews
https://golang.org/cl/109020043
parent 311e2863
......@@ -326,9 +326,6 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
break
}
}
if atEOF && len(data) == 0 {
return 0, nil, nil
}
// Scan until space, marking end of word.
for width, i := 0, start; i < len(data); i += width {
var r rune
......@@ -342,5 +339,5 @@ func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) {
return len(data), data[start:], nil
}
// Request more data.
return 0, nil, nil
return start, nil, nil
}
......@@ -15,6 +15,8 @@ import (
"unicode/utf8"
)
const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
// Test white space table matches the Unicode definition.
func TestSpace(t *testing.T) {
for r := rune(0); r <= utf8.MaxRune; r++ {
......@@ -172,7 +174,6 @@ func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
// Test the line splitter, including some carriage returns but no long lines.
func TestScanLongLines(t *testing.T) {
const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
// Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
tmp := new(bytes.Buffer)
buf := new(bytes.Buffer)
......@@ -404,3 +405,17 @@ func TestBadReader(t *testing.T) {
t.Errorf("unexpected error: %v", err)
}
}
func TestScanWordsExcessiveWhiteSpace(t *testing.T) {
const word = "ipsum"
s := strings.Repeat(" ", 4*smallMaxTokenSize) + word
scanner := NewScanner(strings.NewReader(s))
scanner.MaxTokenSize(smallMaxTokenSize)
scanner.Split(ScanWords)
if !scanner.Scan() {
t.Fatal("scan failed: %v", scanner.Err())
}
if token := scanner.Text(); token != word {
t.Fatal("unexpected token: %v", token)
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment