Commit 6cbdeb3f authored by Robert Griesemer's avatar Robert Griesemer

- fixed bug with unicode text formatting: the number of bytes

  per rune cannot be computed correctly if we have only parts
  of a rune - delay computation
- added html filtering mode: html tags and entities are ignored
  for width computations
- expanded tests:
  - extra tests for html text
  - extra tests that write text in various portions

R=r
DELTA=227  (126 added, 20 deleted, 81 changed)
OCL=20833
CL=20835
parent 92a1190c
...@@ -13,7 +13,7 @@ import ( ...@@ -13,7 +13,7 @@ import (
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// ByteArray // Basic ByteArray support
type ByteArray struct { type ByteArray struct {
a *[]byte; a *[]byte;
...@@ -25,6 +25,11 @@ func (b *ByteArray) Init(initial_size int) { ...@@ -25,6 +25,11 @@ func (b *ByteArray) Init(initial_size int) {
} }
func (b *ByteArray) Len() int {
return len(b.a);
}
func (b *ByteArray) Clear() { func (b *ByteArray) Clear() {
b.a = b.a[0 : 0]; b.a = b.a[0 : 0];
} }
...@@ -77,15 +82,16 @@ func (b *ByteArray) Append(s *[]byte) { ...@@ -77,15 +82,16 @@ func (b *ByteArray) Append(s *[]byte) {
// //
// Formatting can be controlled via parameters: // Formatting can be controlled via parameters:
// //
// cellwidth minimal cell width // cellwidth minimal cell width
// padding additional cell padding // padding additional cell padding
// padchar ASCII char used for padding // padchar ASCII char used for padding
// if padchar == '\t', the Writer will assume that the // if padchar == '\t', the Writer will assume that the
// width of a '\t' in the formatted output is tabwith, // width of a '\t' in the formatted output is cellwidth,
// and cells are left-aligned independent of align_left // and cells are left-aligned independent of align_left
// (for correct-looking results, cellwidth must correspond // (for correct-looking results, cellwidth must correspond
// to the tabwidth in the editor used to look at the result) // to the tabwidth in the viewer displaying the result)
// filter_html ignores html tags and handles entities (starting with '&'
// and ending in ';') as single characters (width = 1)
export type Writer struct { export type Writer struct {
// TODO should not export any of the fields // TODO should not export any of the fields
...@@ -95,16 +101,42 @@ export type Writer struct { ...@@ -95,16 +101,42 @@ export type Writer struct {
padding int; padding int;
padbytes [8]byte; padbytes [8]byte;
align_left bool; align_left bool;
filter_html bool;
// current state // current state
html_char byte; // terminating char of html tag/entity, or 0 ('>', ';', or 0)
buf ByteArray; // collected text w/o tabs and newlines buf ByteArray; // collected text w/o tabs and newlines
size int; // size of last incomplete cell in bytes size int; // size of incomplete cell in bytes
width int; // width of last incomplete cell in runes width int; // width of incomplete cell in runes up to buf[pos] w/o ignored sections
pos int; // buffer position up to which width of incomplete cell has been computed
lines_size array.Array; // list of lines; each line is a list of cell sizes in bytes lines_size array.Array; // list of lines; each line is a list of cell sizes in bytes
lines_width array.Array; // list of lines; each line is a list of cell widths in runes lines_width array.Array; // list of lines; each line is a list of cell widths in runes
widths array.IntArray; // list of column widths in runes - re-used during formatting widths array.IntArray; // list of column widths in runes - re-used during formatting
} }
// Internal representation (current state):
//
// - all text written is appended to buf; tabs and newlines are stripped away
// - at any given time there is a (possibly empty) incomplete cell at the end
// (the cell starts after a tab or newline)
// - size is the number of bytes belonging to the cell so far
// - width is text width in runes of that cell from the start of the cell to
// position pos; html tags and entities are excluded from this width if html
// filtering is enabled
// - the sizes and widths of processed text are kept in the lines_size and
// lines_width arrays, which contain an array of sizes or widths for each line
// - the widths array is a temporary array with current widths used during
// formatting; it is kept in Writer because it's re-used
//
// |<---------- size ---------->|
// | |
// |<- width ->|<- ignored ->| |
// | | | |
// [---processed---tab------------<tag>...</tag>...]
// ^ ^ ^
// | | |
// buf start of incomplete cell pos
func (b *Writer) AddLine() { func (b *Writer) AddLine() {
b.lines_size.Push(array.NewIntArray(0)); b.lines_size.Push(array.NewIntArray(0));
...@@ -112,7 +144,7 @@ func (b *Writer) AddLine() { ...@@ -112,7 +144,7 @@ func (b *Writer) AddLine() {
} }
func (b *Writer) Init(writer io.Write, cellwidth, padding int, padchar byte, align_left bool) *Writer { func (b *Writer) Init(writer io.Write, cellwidth, padding int, padchar byte, align_left, filter_html bool) *Writer {
if cellwidth < 0 { if cellwidth < 0 {
panic("negative cellwidth"); panic("negative cellwidth");
} }
...@@ -126,7 +158,8 @@ func (b *Writer) Init(writer io.Write, cellwidth, padding int, padchar byte, ali ...@@ -126,7 +158,8 @@ func (b *Writer) Init(writer io.Write, cellwidth, padding int, padchar byte, ali
b.padbytes[i] = padchar; b.padbytes[i] = padchar;
} }
b.align_left = align_left || padchar == '\t'; // tab enforces left-alignment b.align_left = align_left || padchar == '\t'; // tab enforces left-alignment
b.filter_html = filter_html;
b.buf.Init(1024); b.buf.Init(1024);
b.lines_size.Init(0); b.lines_size.Init(0);
b.lines_width.Init(0); b.lines_width.Init(0);
...@@ -209,7 +242,7 @@ func (b *Writer) WriteLines(pos0 int, line0, line1 int) (pos int, err *os.Error) ...@@ -209,7 +242,7 @@ func (b *Writer) WriteLines(pos0 int, line0, line1 int) (pos int, err *os.Error)
s, w := line_size.At(j), line_width.At(j); s, w := line_size.At(j), line_width.At(j);
if b.align_left { if b.align_left {
err = b.Write0(b.buf.a[pos : pos + s]); err = b.Write0(b.buf.Slice(pos, pos + s));
if err != nil { if err != nil {
goto exit; goto exit;
} }
...@@ -229,7 +262,7 @@ func (b *Writer) WriteLines(pos0 int, line0, line1 int) (pos int, err *os.Error) ...@@ -229,7 +262,7 @@ func (b *Writer) WriteLines(pos0 int, line0, line1 int) (pos int, err *os.Error)
goto exit; goto exit;
} }
} }
err = b.Write0(b.buf.a[pos : pos + s]); err = b.Write0(b.buf.Slice(pos, pos + s));
if err != nil { if err != nil {
goto exit; goto exit;
} }
...@@ -240,9 +273,8 @@ func (b *Writer) WriteLines(pos0 int, line0, line1 int) (pos int, err *os.Error) ...@@ -240,9 +273,8 @@ func (b *Writer) WriteLines(pos0 int, line0, line1 int) (pos int, err *os.Error)
if i+1 == b.lines_size.Len() { if i+1 == b.lines_size.Len() {
// last buffered line - we don't have a newline, so just write // last buffered line - we don't have a newline, so just write
// any outstanding buffered data // any outstanding buffered data
err = b.Write0(b.buf.a[pos : pos + b.size]); err = b.Write0(b.buf.Slice(pos, pos + b.size));
pos += b.size; pos += b.size;
b.size, b.width = 0, 0;
} else { } else {
// not the last line - write newline // not the last line - write newline
err = b.Write0(Newline); err = b.Write0(Newline);
...@@ -308,6 +340,19 @@ exit: ...@@ -308,6 +340,19 @@ exit:
} }
/* export */ func (b *Writer) Flush() *os.Error {
dummy, err := b.Format(0, 0, b.lines_size.Len());
// reset (even in the presence of errors)
b.buf.Clear();
b.size, b.width = 0, 0;
b.pos = 0;
b.lines_size.Init(0);
b.lines_width.Init(0);
b.AddLine();
return err;
}
func UnicodeLen(buf *[]byte) int { func UnicodeLen(buf *[]byte) int {
l := 0; l := 0;
for i := 0; i < len(buf); { for i := 0; i < len(buf); {
...@@ -326,50 +371,71 @@ func UnicodeLen(buf *[]byte) int { ...@@ -326,50 +371,71 @@ func UnicodeLen(buf *[]byte) int {
func (b *Writer) Append(buf *[]byte) { func (b *Writer) Append(buf *[]byte) {
b.buf.Append(buf); b.buf.Append(buf);
b.size += len(buf); b.size += len(buf);
b.width += UnicodeLen(buf);
}
/* export */ func (b *Writer) Flush() *os.Error {
dummy, err := b.Format(0, 0, b.lines_size.Len());
// reset (even in the presence of errors)
b.buf.Clear();
b.size, b.width = 0, 0;
b.lines_size.Init(0);
b.lines_width.Init(0);
b.AddLine();
return err;
} }
/* export */ func (b *Writer) Write(buf *[]byte) (written int, err *os.Error) { /* export */ func (b *Writer) Write(buf *[]byte) (written int, err *os.Error) {
i0, n := 0, len(buf); i0, n := 0, len(buf);
// split text into cells // split text into cells
for i := 0; i < n; i++ { for i := 0; i < n; i++ {
if ch := buf[i]; ch == '\t' || ch == '\n' { ch := buf[i];
b.Append(buf[i0 : i]);
i0 = i + 1; // exclude ch from (next) cell if b.html_char == 0 {
// outside html tag/entity
// terminate cell switch ch {
last_size, last_width := b.Line(b.lines_size.Len() - 1); case '\t', '\n':
last_size.Push(b.size); b.Append(buf[i0 : i]);
last_width.Push(b.width); i0 = i + 1; // exclude ch from (next) cell
b.size, b.width = 0, 0; b.width += UnicodeLen(b.buf.Slice(b.pos, b.buf.Len()));
b.pos = b.buf.Len();
if ch == '\n' {
b.AddLine(); // terminate cell
if last_size.Len() == 1 { last_size, last_width := b.Line(b.lines_size.Len() - 1);
// The previous line has only one cell which does not have last_size.Push(b.size);
// an impact on the formatting of the following lines (the last_width.Push(b.width);
// last cell per line is ignored by Format), thus we can b.size, b.width = 0, 0;
// flush the Writer contents.
err = b.Flush(); if ch == '\n' {
if err != nil { b.AddLine();
return i0, err; if last_size.Len() == 1 {
// The previous line has only one cell which does not have
// an impact on the formatting of the following lines (the
// last cell per line is ignored by Format), thus we can
// flush the Writer contents.
err = b.Flush();
if err != nil {
return i0, err;
}
}
}
case '<', '&':
if b.filter_html {
b.Append(buf[i0 : i]);
i0 = i;
b.width += UnicodeLen(b.buf.Slice(b.pos, b.buf.Len()));
b.pos = -1; // preventative - should not be used (will cause index out of bounds)
if ch == '<' {
b.html_char = '>';
} else {
b.html_char = ';';
} }
} }
} }
} else {
// inside html tag/entity
if ch == b.html_char {
// reached the end of tag/entity
b.Append(buf[i0 : i + 1]);
i0 = i + 1;
if b.html_char == ';' {
b.width++; // count as one char
}
b.pos = b.buf.Len();
b.html_char = 0;
}
} }
} }
...@@ -379,6 +445,6 @@ func (b *Writer) Append(buf *[]byte) { ...@@ -379,6 +445,6 @@ func (b *Writer) Append(buf *[]byte) {
} }
export func New(writer io.Write, cellwidth, padding int, padchar byte, align_left bool) *Writer { export func New(writer io.Write, cellwidth, padding int, padchar byte, align_left, filter_html bool) *Writer {
return new(Writer).Init(writer, cellwidth, padding, padchar, align_left) return new(Writer).Init(writer, cellwidth, padding, padchar, align_left, filter_html)
} }
...@@ -22,6 +22,11 @@ func (b *Buffer) Init(n int) { ...@@ -22,6 +22,11 @@ func (b *Buffer) Init(n int) {
} }
func (b *Buffer) Clear() {
b.a = b.a[0 : 0];
}
func (b *Buffer) Write(buf *[]byte) (written int, err *os.Error) { func (b *Buffer) Write(buf *[]byte) (written int, err *os.Error) {
n := len(b.a); n := len(b.a);
m := len(buf); m := len(buf);
...@@ -42,22 +47,19 @@ func (b *Buffer) String() string { ...@@ -42,22 +47,19 @@ func (b *Buffer) String() string {
} }
func Check(t *testing.T, tabwidth, padding int, padchar byte, align_left bool, src, expected string) { func Write(t *testing.T, w *tabwriter.Writer, src string) {
var b Buffer; written, err := io.WriteString(w, src);
b.Init(1000);
var w tabwriter.Writer;
w.Init(&b, tabwidth, padding, padchar, align_left);
written, err := io.WriteString(&w, src);
if err != nil { if err != nil {
t.Errorf("--- src:\n%s\n--- write error: %v\n", src, err); t.Errorf("--- src:\n%s\n--- write error: %v\n", src, err);
} }
if written != len(src) { if written != len(src) {
t.Errorf("--- src:\n%s\n--- written = %d, len(src) = %d\n", src, written, len(src)); t.Errorf("--- src:\n%s\n--- written = %d, len(src) = %d\n", src, written, len(src));
} }
}
err = w.Flush(); func Verify(t *testing.T, w *tabwriter.Writer, b *Buffer, src, expected string) {
err := w.Flush();
if err != nil { if err != nil {
t.Errorf("--- src:\n%s\n--- flush error: %v\n", src, err); t.Errorf("--- src:\n%s\n--- flush error: %v\n", src, err);
} }
...@@ -69,105 +71,143 @@ func Check(t *testing.T, tabwidth, padding int, padchar byte, align_left bool, s ...@@ -69,105 +71,143 @@ func Check(t *testing.T, tabwidth, padding int, padchar byte, align_left bool, s
} }
func Check(t *testing.T, tabwidth, padding int, padchar byte, align_left, filter_html bool, src, expected string) {
var b Buffer;
b.Init(1000);
var w tabwriter.Writer;
w.Init(&b, tabwidth, padding, padchar, align_left, filter_html);
// write all at once
b.Clear();
Write(t, &w, src);
Verify(t, &w, &b, src, expected);
// write byte-by-byte
b.Clear();
for i := 0; i < len(src); i++ {
Write(t, &w, src[i : i+1]);
}
Verify(t, &w, &b, src, expected);
// write using Fibonacci slice sizes
b.Clear();
for i, d := 0, 0; i < len(src); {
Write(t, &w, src[i : i+d]);
i, d = i+d, d+1;
if i+d > len(src) {
d = len(src) - i;
}
}
Verify(t, &w, &b, src, expected);
}
export func Test(t *testing.T) { export func Test(t *testing.T) {
Check( Check(
t, 8, 1, '.', true, t, 8, 1, '.', true, false,
"", "",
"" ""
); );
Check( Check(
t, 8, 1, '.', true, t, 8, 1, '.', true, false,
"\n\n\n", "\n\n\n",
"\n\n\n" "\n\n\n"
); );
Check( Check(
t, 8, 1, '.', true, t, 8, 1, '.', true, false,
"a\nb\nc", "a\nb\nc",
"a\nb\nc" "a\nb\nc"
); );
Check( Check(
t, 8, 1, '.', true, t, 8, 1, '.', true, false,
"\t", // '\t' terminates an empty cell on last line - nothing to print "\t", // '\t' terminates an empty cell on last line - nothing to print
"" ""
); );
Check( Check(
t, 8, 1, '.', false, t, 8, 1, '.', false, false,
"\t", // '\t' terminates an empty cell on last line - nothing to print "\t", // '\t' terminates an empty cell on last line - nothing to print
"" ""
); );
Check( Check(
t, 8, 1, '.', true, t, 8, 1, '.', true, false,
"*\t*", "*\t*",
"**" "**"
); );
Check( Check(
t, 8, 1, '.', true, t, 8, 1, '.', true, false,
"*\t*\n", "*\t*\n",
"*.......*\n" "*.......*\n"
); );
Check( Check(
t, 8, 1, '.', true, t, 8, 1, '.', true, false,
"*\t*\t", "*\t*\t",
"*.......*" "*.......*"
); );
Check( Check(
t, 8, 1, '.', false, t, 8, 1, '.', false, false,
"*\t*\t", "*\t*\t",
".......**" ".......**"
); );
Check( Check(
t, 8, 1, '.', true, t, 8, 1, '.', true, false,
"\t\n", "\t\n",
"........\n" "........\n"
); );
Check( Check(
t, 8, 1, '.', true, t, 8, 1, '.', true, false,
"a) foo", "a) foo",
"a) foo" "a) foo"
); );
Check( Check(
t, 8, 1, ' ', true, t, 8, 1, ' ', true, false,
"b) foo\tbar", // "bar" is not in any cell - not formatted, just flushed "b) foo\tbar", // "bar" is not in any cell - not formatted, just flushed
"b) foobar" "b) foobar"
); );
Check( Check(
t, 8, 1, '.', true, t, 8, 1, '.', true, false,
"c) foo\tbar\t", "c) foo\tbar\t",
"c) foo..bar" "c) foo..bar"
); );
Check( Check(
t, 8, 1, '.', true, t, 8, 1, '.', true, false,
"d) foo\tbar\n", "d) foo\tbar\n",
"d) foo..bar\n" "d) foo..bar\n"
); );
Check( Check(
t, 8, 1, '.', true, t, 8, 1, '.', true, false,
"e) foo\tbar\t\n", "e) foo\tbar\t\n",
"e) foo..bar.....\n" "e) foo..bar.....\n"
); );
Check( Check(
t, 8, 1, '*', true, t, 8, 1, '.', true, true,
"e) f&lt;o\t<b>bar</b>\t\n",
"e) f&lt;o..<b>bar</b>.....\n"
);
Check(
t, 8, 1, '*', true, false,
"Hello, world!\n", "Hello, world!\n",
"Hello, world!\n" "Hello, world!\n"
); );
Check( Check(
t, 0, 0, '.', true, t, 0, 0, '.', true, false,
"1\t2\t3\t4\n" "1\t2\t3\t4\n"
"11\t222\t3333\t44444\n", "11\t222\t3333\t44444\n",
...@@ -176,30 +216,30 @@ export func Test(t *testing.T) { ...@@ -176,30 +216,30 @@ export func Test(t *testing.T) {
); );
Check( Check(
t, 5, 0, '.', true, t, 5, 0, '.', true, false,
"1\t2\t3\t4\n", "1\t2\t3\t4\n",
"1....2....3....4\n" "1....2....3....4\n"
); );
Check( Check(
t, 5, 0, '.', true, t, 5, 0, '.', true, false,
"1\t2\t3\t4\t\n", "1\t2\t3\t4\t\n",
"1....2....3....4....\n" "1....2....3....4....\n"
); );
Check( Check(
t, 8, 1, ' ', true, t, 8, 1, '.', true, false,
"本\tb\tc\n" "本\tb\tc\n"
"aa\t\u672c\u672c\u672c\tcccc\tddddd\n" "aa\t\u672c\u672c\u672c\tcccc\tddddd\n"
"aaa\tbbbb\n", "aaa\tbbbb\n",
"本 b c\n" "本.......b.......c\n"
"aa 本本本 cccc ddddd\n" "aa......本本本.....cccc....ddddd\n"
"aaa bbbb\n" "aaa.....bbbb\n"
); );
Check( Check(
t, 8, 1, ' ', false, t, 8, 1, ' ', false, false,
"a\tè\tc\t\n" "a\tè\tc\t\n"
"aa\tèèè\tcccc\tddddd\t\n" "aa\tèèè\tcccc\tddddd\t\n"
"aaa\tèèèè\t\n", "aaa\tèèèè\t\n",
...@@ -210,7 +250,7 @@ export func Test(t *testing.T) { ...@@ -210,7 +250,7 @@ export func Test(t *testing.T) {
); );
Check( Check(
t, 2, 0, ' ', true, t, 2, 0, ' ', true, false,
"a\tb\tc\n" "a\tb\tc\n"
"aa\tbbb\tcccc\n" "aa\tbbb\tcccc\n"
"aaa\tbbbb\n", "aaa\tbbbb\n",
...@@ -221,7 +261,7 @@ export func Test(t *testing.T) { ...@@ -221,7 +261,7 @@ export func Test(t *testing.T) {
); );
Check( Check(
t, 8, 1, '_', true, t, 8, 1, '_', true, false,
"a\tb\tc\n" "a\tb\tc\n"
"aa\tbbb\tcccc\n" "aa\tbbb\tcccc\n"
"aaa\tbbbb\n", "aaa\tbbbb\n",
...@@ -232,7 +272,7 @@ export func Test(t *testing.T) { ...@@ -232,7 +272,7 @@ export func Test(t *testing.T) {
); );
Check( Check(
t, 4, 1, '-', true, t, 4, 1, '-', true, false,
"4444\t日本語\t22\t1\t333\n" "4444\t日本語\t22\t1\t333\n"
"999999999\t22\n" "999999999\t22\n"
"7\t22\n" "7\t22\n"
...@@ -251,7 +291,7 @@ export func Test(t *testing.T) { ...@@ -251,7 +291,7 @@ export func Test(t *testing.T) {
); );
Check( Check(
t, 4, 3, '.', true, t, 4, 3, '.', true, false,
"4444\t333\t22\t1\t333\n" "4444\t333\t22\t1\t333\n"
"999999999\t22\n" "999999999\t22\n"
"7\t22\n" "7\t22\n"
...@@ -270,14 +310,14 @@ export func Test(t *testing.T) { ...@@ -270,14 +310,14 @@ export func Test(t *testing.T) {
); );
Check( Check(
t, 8, 1, '\t', true, t, 8, 1, '\t', true, true,
"4444\t333\t22\t1\t333\n" "4444\t333\t22\t1\t333\n"
"999999999\t22\n" "999999999\t22\n"
"7\t22\n" "7\t22\n"
"\t\t\t88888888\n" "\t\t\t88888888\n"
"\n" "\n"
"666666\t666666\t666666\t4444\n" "666666\t666666\t666666\t4444\n"
"1\t1\t999999999\t0000000000\n", "1\t1\t<font color=red attr=日本語>999999999</font>\t0000000000\n",
"4444\t\t333\t22\t1\t333\n" "4444\t\t333\t22\t1\t333\n"
"999999999\t22\n" "999999999\t22\n"
...@@ -285,11 +325,11 @@ export func Test(t *testing.T) { ...@@ -285,11 +325,11 @@ export func Test(t *testing.T) {
"\t\t\t\t88888888\n" "\t\t\t\t88888888\n"
"\n" "\n"
"666666\t666666\t666666\t\t4444\n" "666666\t666666\t666666\t\t4444\n"
"1\t1\t999999999\t0000000000\n" "1\t1\t<font color=red attr=日本語>999999999</font>\t0000000000\n"
); );
Check( Check(
t, 0, 2, ' ', false, t, 0, 2, ' ', false, false,
".0\t.3\t2.4\t-5.1\t\n" ".0\t.3\t2.4\t-5.1\t\n"
"23.0\t12345678.9\t2.4\t-989.4\t\n" "23.0\t12345678.9\t2.4\t-989.4\t\n"
"5.1\t12.0\t2.4\t-7.0\t\n" "5.1\t12.0\t2.4\t-7.0\t\n"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment