Commit 1b9734b9 authored by Robert Griesemer's avatar Robert Griesemer

1) Fix a problem with tabwriter.Flush: any pending text not yet

   in a cell makes a final cell in that line
   (this showed up as occasionally missing single spaces in
   godoc-formatted declarations that fit on a single line)

2) Cleaned up tabwriter implementation a bit:
   - replaced local unicodeLen() with utf8.RuneCount()
   - instead of having 2 parallel arrays for line widths and sizes,
     have a single array of cells containing a width and size
   - factored code a bit better
   - added more comments
   - added testnames to tabwriter tests
   - added more test cases and fixed a broken test case that
     now works correctly

R=r
DELTA=279  (133 added, 62 deleted, 84 changed)
OCL=30509
CL=30514
parent 5eb5d4d3
......@@ -20,6 +20,16 @@ import (
// ----------------------------------------------------------------------------
// Filter implementation
// A cell represents a segment of text delineated by tabs, form-feed,
// or newline chars. The text itself is stored in a separate buffer;
// cell only describes the segment's size in bytes and width in runes.
//
type cell struct {
size int; // cell size in bytes
width int; // cell width in runes
}
// A Writer is a filter that inserts padding around
// tab-delimited columns in its input to align them
// in the output.
......@@ -37,8 +47,8 @@ import (
// UTF-8 characters.
//
// If a Writer is configured to filter HTML, HTML tags and entities
// are simply passed through and their widths are assumed to be zero
// for formatting purposes.
// are simply passed through. The widths of tags and entities are
// assumed to be zero (tags) and one (entities) for formatting purposes.
//
// The form feed character ('\f') acts like a newline but it also
// terminates all columns in the current line (effectively calling
......@@ -59,22 +69,42 @@ type Writer struct {
flags uint;
// current state
html_char byte; // terminating char of html tag/entity, or 0 ('>', ';', or 0)
buf io.ByteBuffer; // collected text w/o tabs, newlines, or form feed chars
size int; // size of incomplete cell in bytes
width int; // width of incomplete cell in runes up to buf[pos] w/o ignored sections
pos int; // buffer position up to which width of incomplete cell has been computed
lines_size vector.Vector; // list of lines; each line is a list of cell sizes in bytes
lines_width vector.Vector; // list of lines; each line is a list of cell widths in runes
cell cell; // current incomplete cell; cell.width is up to buf[pos] w/o ignored sections
html_char byte; // terminating char of html tag/entity, or 0 ('>', ';', or 0)
lines vector.Vector; // list if lines; each line is a list of cells
widths vector.IntVector; // list of column widths in runes - re-used during formatting
}
func (b *Writer) addLine() {
b.lines.Push(vector.New(0));
}
func (b *Writer) line(i int) *vector.Vector {
return b.lines.At(i).(*vector.Vector);
}
// Reset the current state.
func (b *Writer) reset() {
b.buf.Reset();
b.pos = 0;
b.cell = cell{};
b.html_char = 0;
b.lines.Init(0);
b.widths.Init(0);
b.addLine();
}
// Internal representation (current state):
//
// - all text written is appended to buf; form feed chars, tabs and newlines are stripped away
// - at any given time there is a (possibly empty) incomplete cell at the end
// (the cell starts after a tab or newline)
// (the cell starts after a tab, form feed, or newline)
// - size is the number of bytes belonging to the cell so far
// - width is text width in runes of that cell from the start of the cell to
// position pos; html tags and entities are excluded from this width if html
......@@ -94,12 +124,6 @@ type Writer struct {
// buf start of incomplete cell pos
func (b *Writer) addLine() {
b.lines_size.Push(vector.NewIntVector(0));
b.lines_width.Push(vector.NewIntVector(0));
}
// Formatting can be controlled with these flags.
const (
// Ignore html tags and treat entities (starting with '&'
......@@ -144,22 +168,12 @@ func (b *Writer) Init(output io.Writer, cellwidth, padding int, padchar byte, fl
}
b.flags = flags;
b.lines_size.Init(0);
b.lines_width.Init(0);
b.widths.Init(0);
b.addLine(); // the very first line
b.reset();
return b;
}
func (b *Writer) line(i int) (*vector.IntVector, *vector.IntVector) {
return
b.lines_size.At(i).(*vector.IntVector),
b.lines_width.At(i).(*vector.IntVector);
}
// debugging support (keep code around)
/*
func (b *Writer) dump() {
......@@ -219,19 +233,19 @@ func (b *Writer) writePadding(textw, cellw int) os.Error {
func (b *Writer) writeLines(pos0 int, line0, line1 int) (int, os.Error) {
pos := pos0;
for i := line0; i < line1; i++ {
line_size, line_width := b.line(i);
for j := 0; j < line_size.Len(); j++ {
s, w := line_size.At(j), line_width.At(j);
line := b.line(i);
for j := 0; j < line.Len(); j++ {
c := line.At(j).(cell);
switch {
default: // align left
if err := b.write0(b.buf.Data()[pos : pos + s]); err != nil {
if err := b.write0(b.buf.Data()[pos : pos + c.size]); err != nil {
return pos, err;
}
pos += s;
pos += c.size;
if j < b.widths.Len() {
if err := b.writePadding(w, b.widths.At(j)); err != nil {
if err := b.writePadding(c.width, b.widths.At(j)); err != nil {
return pos, err;
}
}
......@@ -239,24 +253,24 @@ func (b *Writer) writeLines(pos0 int, line0, line1 int) (int, os.Error) {
case b.flags & AlignRight != 0: // align right
if j < b.widths.Len() {
if err := b.writePadding(w, b.widths.At(j)); err != nil {
if err := b.writePadding(c.width, b.widths.At(j)); err != nil {
return pos, err;
}
}
if err := b.write0(b.buf.Data()[pos : pos + s]); err != nil {
if err := b.write0(b.buf.Data()[pos : pos + c.size]); err != nil {
return pos, err;
}
pos += s;
pos += c.size;
}
}
if i+1 == b.lines_size.Len() {
if i+1 == b.lines.Len() {
// last buffered line - we don't have a newline, so just write
// any outstanding buffered data
if err := b.write0(b.buf.Data()[pos : pos + b.size]); err != nil {
if err := b.write0(b.buf.Data()[pos : pos + b.cell.size]); err != nil {
return pos, err;
}
pos += b.size;
pos += b.cell.size;
} else {
// not the last line - write newline
if err := b.write0(newline); err != nil {
......@@ -273,9 +287,9 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) {
column := b.widths.Len();
last := line0;
for this := line0; this < line1; this++ {
line_size, line_width := b.line(this);
line := b.line(this);
if column < line_size.Len() - 1 {
if column < line.Len() - 1 {
// cell exists in this column
// (note that the last cell per line is ignored)
......@@ -289,10 +303,10 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) {
// column block begin
width := b.cellwidth; // minimal width
for ; this < line1; this++ {
line_size, line_width = b.line(this);
if column < line_size.Len() - 1 {
line = b.line(this);
if column < line.Len() - 1 {
// cell exists in this column => update width
w := line_width.At(column) + b.padding;
w := line.At(column).(cell).width + b.padding;
if w > width {
width = w;
}
......@@ -316,40 +330,77 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) {
}
// Flush should be called after the last call to Write to ensure
// that any data buffered in the Writer is written to output.
// Append text to current cell. Only update the cell width if updateWidth
// is set (the cell width can only be updated if we know that we cannot be
// in the middle of a UTF-8 encoded Unicode character).
//
func (b *Writer) Flush() os.Error {
_, err := b.format(0, 0, b.lines_size.Len());
// reset (even in the presence of errors)
b.buf.Reset();
b.size, b.width = 0, 0;
b.pos = 0;
b.lines_size.Init(0);
b.lines_width.Init(0);
b.addLine();
return err;
func (b *Writer) append(text []byte, updateWidth bool) {
b.buf.Write(text);
b.cell.size += len(text);
if updateWidth {
b.cell.width += utf8.RuneCount(b.buf.Data()[b.pos : b.buf.Len()]);
b.pos = b.buf.Len();
}
}
func unicodeLen(buf []byte) int {
l := 0;
for i := 0; i < len(buf); {
if buf[i] < utf8.RuneSelf {
i++;
} else {
rune, size := utf8.DecodeRune(buf[i : len(buf)]);
i += size;
}
l++;
// Start HTML-escape mode.
func (b *Writer) startHTML(ch byte) {
if ch == '<' {
b.html_char = '>';
} else {
b.html_char = ';';
}
}
// Terminate HTML-escape mode. If the HTML text was an entity, its width
// is assumed to be one for formatting purposes; otherwise it assumed to
// be zero.
//
func (b *Writer) terminateHTML() {
if b.html_char == ';' {
// was entity, count as one rune
b.cell.width++;
}
return l;
b.pos = b.buf.Len();
b.html_char = 0;
}
func (b *Writer) append(buf []byte) {
b.buf.Write(buf);
b.size += len(buf);
// Terminate the current cell by adding it to the list of cells of the
// current line. Returns the number of cells in that line.
//
func (b *Writer) terminateCell() int {
line := b.line(b.lines.Len() - 1);
line.Push(b.cell);
b.cell = cell{};
return line.Len();
}
// Flush should be called after the last call to Write to ensure
// that any data buffered in the Writer is written to output. Any
// incomplete HTML tag or entity at the end is simply considered
// complete for formatting purposes.
//
func (b *Writer) Flush() os.Error {
// add current cell if not empty
if b.cell.size > 0 {
if b.html_char != 0 {
// inside html tag/entity - terminate it even if incomplete
b.terminateHTML();
}
b.terminateCell();
}
// format contents of buffer
_, err := b.format(0, 0, b.lines.Len());
// reset, even in the presence of errors
b.reset();
return err;
}
......@@ -358,31 +409,21 @@ func (b *Writer) append(buf []byte) {
// while writing to the underlying output stream.
//
func (b *Writer) Write(buf []byte) (written int, err os.Error) {
i0, n := 0, len(buf);
// split text into cells
for i := 0; i < n; i++ {
ch := buf[i];
i0 := 0;
for i, ch := range buf {
if b.html_char == 0 {
// outside html tag/entity
switch ch {
case '\t', '\n', '\f':
b.append(buf[i0 : i]);
i0 = i + 1; // exclude ch from (next) cell
b.width += unicodeLen(b.buf.Data()[b.pos : b.buf.Len()]);
b.pos = b.buf.Len();
// terminate cell
last_size, last_width := b.line(b.lines_size.Len() - 1);
last_size.Push(b.size);
last_width.Push(b.width);
b.size, b.width = 0, 0;
// end of cell
b.append(buf[i0 : i], true);
i0 = i+1; // exclude ch from (next) cell
ncells := b.terminateCell();
if ch != '\t' {
// terminate line
b.addLine();
if ch == '\f' || last_size.Len() == 1 {
if ch == '\f' || ncells == 1 {
// A '\f' always forces a flush. Otherwise, if the previous
// line has only one cell which does not have an impact on
// the formatting of the following lines (the last cell per
......@@ -395,37 +436,29 @@ func (b *Writer) Write(buf []byte) (written int, err os.Error) {
}
case '<', '&':
// possibly an html tag/entity
if b.flags & FilterHTML != 0 {
b.append(buf[i0 : i]);
// begin of tag/entity
b.append(buf[i0 : i], true);
i0 = i;
b.width += unicodeLen(b.buf.Data()[b.pos : b.buf.Len()]);
b.pos = -1; // preventative - should not be used (will cause index out of bounds)
if ch == '<' {
b.html_char = '>';
} else {
b.html_char = ';';
}
b.startHTML(ch);
}
}
} else {
// inside html tag/entity
if ch == b.html_char {
// reached the end of tag/entity
b.append(buf[i0 : i + 1]);
i0 = i + 1;
if b.html_char == ';' {
b.width++; // count as one char
}
b.pos = b.buf.Len();
b.html_char = 0;
// end of tag/entity
b.append(buf[i0 : i+1], false);
i0 = i+1; // exclude ch from (next) cell
b.terminateHTML();
}
}
}
// append leftover text
b.append(buf[i0 : n]);
return n, nil;
b.append(buf[i0 : len(buf)], false);
return len(buf), nil;
}
......
......@@ -47,31 +47,31 @@ func (b *buffer) String() string {
}
func write(t *testing.T, w *tabwriter.Writer, src string) {
func write(t *testing.T, testname string, w *tabwriter.Writer, src string) {
written, err := io.WriteString(w, src);
if err != nil {
t.Errorf("--- src:\n%s\n--- write error: %v\n", src, err);
t.Errorf("--- test: %s\n--- src:\n%s\n--- write error: %v\n", testname, src, err);
}
if written != len(src) {
t.Errorf("--- src:\n%s\n--- written = %d, len(src) = %d\n", src, written, len(src));
t.Errorf("--- test: %s\n--- src:\n%s\n--- written = %d, len(src) = %d\n", testname, src, written, len(src));
}
}
func verify(t *testing.T, w *tabwriter.Writer, b *buffer, src, expected string) {
func verify(t *testing.T, testname string, w *tabwriter.Writer, b *buffer, src, expected string) {
err := w.Flush();
if err != nil {
t.Errorf("--- src:\n%s\n--- flush error: %v\n", src, err);
t.Errorf("--- test: %s\n--- src:\n%s\n--- flush error: %v\n", testname, src, err);
}
res := b.String();
if res != expected {
t.Errorf("--- src:\n%s\n--- found:\n%s\n--- expected:\n%s\n", src, res, expected)
t.Errorf("--- test: %s\n--- src:\n%s\n--- found:\n%s\n--- expected:\n%s\n", testname, src, res, expected)
}
}
func check(t *testing.T, tabwidth, padding int, padchar byte, flags uint, src, expected string) {
func check(t *testing.T, testname string, tabwidth, padding int, padchar byte, flags uint, src, expected string) {
var b buffer;
b.init(1000);
......@@ -80,30 +80,31 @@ func check(t *testing.T, tabwidth, padding int, padchar byte, flags uint, src, e
// write all at once
b.clear();
write(t, &w, src);
verify(t, &w, &b, src, expected);
write(t, testname, &w, src);
verify(t, testname, &w, &b, src, expected);
// write byte-by-byte
b.clear();
for i := 0; i < len(src); i++ {
write(t, &w, src[i : i+1]);
write(t, testname, &w, src[i : i+1]);
}
verify(t, &w, &b, src, expected);
verify(t, testname, &w, &b, src, expected);
// write using Fibonacci slice sizes
b.clear();
for i, d := 0, 0; i < len(src); {
write(t, &w, src[i : i+d]);
write(t, testname, &w, src[i : i+d]);
i, d = i+d, d+1;
if i+d > len(src) {
d = len(src) - i;
}
}
verify(t, &w, &b, src, expected);
verify(t, testname, &w, &b, src, expected);
}
type entry struct {
testname string;
tabwidth, padding int;
padchar byte;
flags uint;
......@@ -113,108 +114,133 @@ type entry struct {
var tests = []entry {
entry{
"1",
8, 1, '.', 0,
"",
""
},
entry{
"2",
8, 1, '.', 0,
"\n\n\n",
"\n\n\n"
},
entry{
"3",
8, 1, '.', 0,
"a\nb\nc",
"a\nb\nc"
},
entry{
"4a",
8, 1, '.', 0,
"\t", // '\t' terminates an empty cell on last line - nothing to print
""
},
entry{
"4b",
8, 1, '.', tabwriter.AlignRight,
"\t", // '\t' terminates an empty cell on last line - nothing to print
""
},
entry{
"5",
8, 1, '.', 0,
"*\t*",
"**"
"*.......*"
},
entry{
"5b",
8, 1, '.', 0,
"*\t*\n",
"*.......*\n"
},
entry{
"5c",
8, 1, '.', 0,
"*\t*\t",
"*.......*"
},
entry{
"5d",
8, 1, '.', tabwriter.AlignRight,
"*\t*\t",
".......**"
},
entry{
"6",
8, 1, '.', 0,
"\t\n",
"........\n"
},
entry{
"7a",
8, 1, '.', 0,
"a) foo",
"a) foo"
},
entry{
"7b",
8, 1, ' ', 0,
"b) foo\tbar", // "bar" is not in any cell - not formatted, just flushed
"b) foobar"
"b) foo\tbar",
"b) foo bar"
},
entry{
"7c",
8, 1, '.', 0,
"c) foo\tbar\t",
"c) foo..bar"
},
entry{
"7d",
8, 1, '.', 0,
"d) foo\tbar\n",
"d) foo..bar\n"
},
entry{
"7e",
8, 1, '.', 0,
"e) foo\tbar\t\n",
"e) foo..bar.....\n"
},
entry{
"7f",
8, 1, '.', tabwriter.FilterHTML,
"f) f&lt;o\t<b>bar</b>\t\n",
"f) f&lt;o..<b>bar</b>.....\n"
},
entry{
"7g",
8, 1, '.', tabwriter.FilterHTML,
"g) f&lt;o\t<b>bar</b>\t non-terminated entity &amp",
"g) f&lt;o..<b>bar</b>..... non-terminated entity &amp"
},
entry{
"8",
8, 1, '*', 0,
"Hello, world!\n",
"Hello, world!\n"
},
entry{
"9a",
0, 0, '.', 0,
"1\t2\t3\t4\n"
"11\t222\t3333\t44444\n",
......@@ -224,6 +250,7 @@ var tests = []entry {
},
entry{
"9b",
0, 0, '.', tabwriter.FilterHTML,
"1\t2<!---\f--->\t3\t4\n" // \f inside HTML is ignored
"11\t222\t3333\t44444\n",
......@@ -233,6 +260,7 @@ var tests = []entry {
},
entry{
"9c",
0, 0, '.', 0,
"1\t2\t3\t4\f" // \f causes a newline and flush
"11\t222\t3333\t44444\n",
......@@ -242,18 +270,21 @@ var tests = []entry {
},
entry{
"10a",
5, 0, '.', 0,
"1\t2\t3\t4\n",
"1....2....3....4\n"
},
entry{
"10b",
5, 0, '.', 0,
"1\t2\t3\t4\t\n",
"1....2....3....4....\n"
},
entry{
"11",
8, 1, '.', 0,
"本\tb\tc\n"
"aa\t\u672c\u672c\u672c\tcccc\tddddd\n"
......@@ -265,6 +296,7 @@ var tests = []entry {
},
entry{
"12a",
8, 1, ' ', tabwriter.AlignRight,
"a\tè\tc\t\n"
"aa\tèèè\tcccc\tddddd\t\n"
......@@ -276,6 +308,7 @@ var tests = []entry {
},
entry{
"12b",
2, 0, ' ', 0,
"a\tb\tc\n"
"aa\tbbb\tcccc\n"
......@@ -287,6 +320,7 @@ var tests = []entry {
},
entry{
"12c",
8, 1, '_', 0,
"a\tb\tc\n"
"aa\tbbb\tcccc\n"
......@@ -298,6 +332,7 @@ var tests = []entry {
},
entry{
"13a",
4, 1, '-', 0,
"4444\t日本語\t22\t1\t333\n"
"999999999\t22\n"
......@@ -317,6 +352,7 @@ var tests = []entry {
},
entry{
"13b",
4, 3, '.', 0,
"4444\t333\t22\t1\t333\n"
"999999999\t22\n"
......@@ -336,6 +372,7 @@ var tests = []entry {
},
entry{
"13c",
8, 1, '\t', tabwriter.FilterHTML,
"4444\t333\t22\t1\t333\n"
"999999999\t22\n"
......@@ -355,6 +392,7 @@ var tests = []entry {
},
entry{
"14",
0, 2, ' ', tabwriter.AlignRight,
".0\t.3\t2.4\t-5.1\t\n"
"23.0\t12345678.9\t2.4\t-989.4\t\n"
......@@ -375,6 +413,6 @@ var tests = []entry {
func Test(t *testing.T) {
for _, e := range tests {
check(t, e.tabwidth, e.padding, e.padchar, e.flags, e.src, e.expected);
check(t, e.testname, e.tabwidth, e.padding, e.padchar, e.flags, e.src, e.expected);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment