Commit 6ee79973 authored by Rob Pike's avatar Rob Pike

fmt.Fscan: use UnreadRune to preserve data across calls.

Fixes #1055.
(About as well as it can be fixed without changing the API.)

R=rsc
CC=golang-dev
https://golang.org/cl/2156047
parent 23bd214a
......@@ -129,9 +129,13 @@
All arguments to be scanned must be either pointers to basic
types or implementations of the Scanner interface.
Note: Fscan etc. can read one character past the input
they return, which means that a loop calling a scan routine
may skip some of the input. This is usually a problem only
when there is no space between input values.
Note: Fscan etc. can read one character (rune) past the
input they return, which means that a loop calling a scan
routine may skip some of the input. This is usually a
problem only when there is no space between input values.
However, if the reader provided to Fscan implements UnreadRune,
that method will be used to save the character and successive
calls will not lose data. To attach an UnreadRune method
to a reader without that capability, use bufio.NewReader.
*/
package fmt
......@@ -22,6 +22,14 @@ type readRuner interface {
ReadRune() (rune int, size int, err os.Error)
}
// unreadRuner is the interface to something that can unread runes.
// If the object provided to Scan does not satisfy this interface,
// a local buffer will be used to back up the input, but its contents
// will be lost when Scan returns.
type unreadRuner interface {
UnreadRune() os.Error
}
// ScanState represents the scanner state passed to custom scanners.
// Scanners may do rune-at-a-time scanning or ask the ScanState
// to discover the next space-delimited token.
......@@ -29,7 +37,7 @@ type ScanState interface {
// GetRune reads the next rune (Unicode code point) from the input.
GetRune() (rune int, err os.Error)
// UngetRune causes the next call to GetRune to return the rune.
UngetRune(rune int)
UngetRune()
// Width returns the value of the width option and whether it has been set.
// The unit is Unicode code points.
Width() (wid int, ok bool)
......@@ -133,6 +141,7 @@ type ss struct {
buf bytes.Buffer // token accumulator
nlIsSpace bool // whether newline counts as white space
peekRune int // one-rune lookahead
prevRune int // last rune returned by GetRune
atEOF bool // already read EOF
maxWid int // max width of field, in runes
widPresent bool // width was specified
......@@ -142,10 +151,14 @@ type ss struct {
func (s *ss) GetRune() (rune int, err os.Error) {
if s.peekRune >= 0 {
rune = s.peekRune
s.prevRune = rune
s.peekRune = -1
return
}
rune, _, err = s.rr.ReadRune()
if err == nil {
s.prevRune = rune
}
return
}
......@@ -161,11 +174,14 @@ func (s *ss) getRune() (rune int) {
}
if s.peekRune >= 0 {
rune = s.peekRune
s.prevRune = rune
s.peekRune = -1
return
}
rune, _, err := s.rr.ReadRune()
if err != nil {
if err == nil {
s.prevRune = rune
} else if err != nil {
if err == os.EOF {
s.atEOF = true
return EOF
......@@ -198,8 +214,12 @@ func (s *ss) mustGetRune() (rune int) {
}
func (s *ss) UngetRune(rune int) {
s.peekRune = rune
func (s *ss) UngetRune() {
if u, ok := s.rr.(unreadRuner); ok {
u.UnreadRune()
} else {
s.peekRune = s.prevRune
}
}
func (s *ss) error(err os.Error) {
......@@ -334,7 +354,7 @@ func (s *ss) skipSpace(stopAtNewline bool) {
return
}
if !unicode.IsSpace(rune) {
s.UngetRune(rune)
s.UngetRune()
break
}
}
......@@ -352,7 +372,7 @@ func (s *ss) token() string {
break
}
if unicode.IsSpace(rune) {
s.UngetRune(rune)
s.UngetRune()
break
}
s.buf.WriteRune(rune)
......@@ -386,7 +406,7 @@ func (s *ss) accept(ok string) bool {
}
}
if rune != EOF {
s.UngetRune(rune)
s.UngetRune()
}
return false
}
......@@ -681,7 +701,7 @@ func (s *ss) hexByte() (b byte, ok bool) {
return
}
if unicode.IsSpace(rune1) {
s.UngetRune(rune1)
s.UngetRune()
return
}
rune2 := s.mustGetRune()
......@@ -892,7 +912,7 @@ func (s *ss) advance(format string) (i int) {
}
inputc := s.mustGetRune()
if fmtc != inputc {
s.UngetRune(inputc)
s.UngetRune()
return -1
}
i += w
......
......@@ -5,6 +5,7 @@
package fmt_test
import (
"bufio"
. "fmt"
"io"
"os"
......@@ -582,3 +583,24 @@ func TestEOF(t *testing.T) {
t.Error("expected one EOF, got", ec.eofCount)
}
}
// Verify that, at least when using bufio, successive calls to Fscan do not lose runes.
func TestUnreadRuneWithBufio(t *testing.T) {
r := bufio.NewReader(strings.NewReader("123αb"))
var i int
var a string
n, err := Fscanf(r, "%d", &i)
if n != 1 || err != nil {
t.Errorf("reading int expected one item, no errors; got %d %q", n, err)
}
if i != 123 {
t.Errorf("expected 123; got %d", i)
}
n, err = Fscanf(r, "%s", &a)
if n != 1 || err != nil {
t.Errorf("reading string expected one item, no errors; got %d %q", n, err)
}
if a != "αb" {
t.Errorf("expected αb; got %q", a)
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment