Commit b55e6d1b authored by Robert Griesemer's avatar Robert Griesemer

permit escaped text segments which pass through tabwriter

undisturbed and uninterpreted

R=rsc
DELTA=141  (82 added, 23 deleted, 36 changed)
OCL=35747
CL=35769
parent da829d66
...@@ -51,6 +51,11 @@ type cell struct { ...@@ -51,6 +51,11 @@ type cell struct {
// terminated by horizontal (or "hard") tabs are not affected by // terminated by horizontal (or "hard") tabs are not affected by
// this flag. // this flag.
// //
// A segment of text may be escaped by bracketing it with Escape
// characters. The tabwriter strips the Escape characters but otherwise
// passes escaped text segments through unchanged. In particular, it
// does not interpret any tabs or line breaks within the segment.
//
// The Writer assumes that all characters have the same width; // The Writer assumes that all characters have the same width;
// this may not be true in some fonts, especially with certain // this may not be true in some fonts, especially with certain
// UTF-8 characters. // UTF-8 characters.
...@@ -62,8 +67,8 @@ type cell struct { ...@@ -62,8 +67,8 @@ type cell struct {
// The formfeed character ('\f') acts like a newline but it also // The formfeed character ('\f') acts like a newline but it also
// terminates all columns in the current line (effectively calling // terminates all columns in the current line (effectively calling
// Flush). Cells in the next line start new columns. Unless found // Flush). Cells in the next line start new columns. Unless found
// inside an HTML tag, formfeed characters appear as newlines in // inside an HTML tag or inside an escaped text segment, formfeed
// the output. // characters appear as newlines in the output.
// //
// The Writer must buffer input internally, because proper spacing // The Writer must buffer input internally, because proper spacing
// of one line may depend on the cells in future lines. Clients must // of one line may depend on the cells in future lines. Clients must
...@@ -81,7 +86,7 @@ type Writer struct { ...@@ -81,7 +86,7 @@ type Writer struct {
buf bytes.Buffer; // collected text w/o tabs, newlines, or formfeed chars buf bytes.Buffer; // collected text w/o tabs, newlines, or formfeed chars
pos int; // buffer position up to which width of incomplete cell has been computed pos int; // buffer position up to which width of incomplete cell has been computed
cell cell; // current incomplete cell; cell.width is up to buf[pos] w/o ignored sections cell cell; // current incomplete cell; cell.width is up to buf[pos] w/o ignored sections
html_char byte; // terminating char of html tag/entity, or 0 ('>', ';', or 0) endChar byte; // terminating char of escaped sequence (Escape for escapes, '>', ';' for HTML tags/entities, or 0)
lines vector.Vector; // list if lines; each line is a list of cells lines vector.Vector; // list if lines; each line is a list of cells
widths vector.IntVector; // list of column widths in runes - re-used during formatting widths vector.IntVector; // list of column widths in runes - re-used during formatting
} }
...@@ -102,7 +107,7 @@ func (b *Writer) reset() { ...@@ -102,7 +107,7 @@ func (b *Writer) reset() {
b.buf.Reset(); b.buf.Reset();
b.pos = 0; b.pos = 0;
b.cell = cell{}; b.cell = cell{};
b.html_char = 0; b.endChar = 0;
b.lines.Init(0); b.lines.Init(0);
b.widths.Init(0); b.widths.Init(0);
b.addLine(); b.addLine();
...@@ -378,41 +383,53 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) { ...@@ -378,41 +383,53 @@ func (b *Writer) format(pos0 int, line0, line1 int) (pos int, err os.Error) {
} }
// Append text to current cell. Only update the cell width if updateWidth // Append text to current cell.
// is set (the cell width can only be updated if we know that we cannot be func (b *Writer) append(text []byte) {
// in the middle of a UTF-8 encoded Unicode character).
//
func (b *Writer) append(text []byte, updateWidth bool) {
b.buf.Write(text); b.buf.Write(text);
b.cell.size += len(text); b.cell.size += len(text);
if updateWidth {
b.cell.width += utf8.RuneCount(b.buf.Bytes()[b.pos : b.buf.Len()]);
b.pos = b.buf.Len();
}
} }
// Start HTML-escape mode. // Update the cell width.
func (b *Writer) startHTML(ch byte) { func (b *Writer) updateWidth() {
if ch == '<' { b.cell.width += utf8.RuneCount(b.buf.Bytes()[b.pos : b.buf.Len()]);
b.html_char = '>'; b.pos = b.buf.Len();
} else { }
b.html_char = ';';
// To escape a text segment, bracket it with Escape characters.
// For instance, the tab in this string "Ignore this tab: \xff\t\xff"
// does not terminate a cell and constitutes a single character of
// width one for formatting purposes.
//
// The value 0xff was chosen because it cannot appear in a valid UTF-8 sequence.
//
const Escape ='\xff'
// Start escaped mode.
func (b *Writer) startEscape(ch byte) {
switch ch {
case Escape: b.endChar = Escape;
case '<': b.endChar = '>';
case '&': b.endChar = ';';
} }
} }
// Terminate HTML-escape mode. If the HTML text was an entity, its width // Terminate escaped mode. If the escaped text was an HTML tag, its width
// is assumed to be one for formatting purposes; otherwise it assumed to // is assumed to be zero for formatting purposes; if it was an HTML entity,
// be zero. // its width is assumed to be one. In all other cases, the width is the
// unicode width of the text.
// //
func (b *Writer) terminateHTML() { func (b *Writer) endEscape() {
if b.html_char == ';' { switch b.endChar {
// was entity, count as one rune case Escape: b.updateWidth();
b.cell.width++; case '>': // tag of zero width
case ';': b.cell.width++; // entity, count as one rune
} }
b.pos = b.buf.Len(); b.pos = b.buf.Len();
b.html_char = 0; b.endChar = 0;
} }
...@@ -430,15 +447,15 @@ func (b *Writer) terminateCell(htab bool) int { ...@@ -430,15 +447,15 @@ func (b *Writer) terminateCell(htab bool) int {
// Flush should be called after the last call to Write to ensure // Flush should be called after the last call to Write to ensure
// that any data buffered in the Writer is written to output. Any // that any data buffered in the Writer is written to output. Any
// incomplete HTML tag or entity at the end is simply considered // incomplete escape sequence at the end is simply considered
// complete for formatting purposes. // complete for formatting purposes.
// //
func (b *Writer) Flush() os.Error { func (b *Writer) Flush() os.Error {
// add current cell if not empty // add current cell if not empty
if b.cell.size > 0 { if b.cell.size > 0 {
if b.html_char != 0 { if b.endChar != 0 {
// inside html tag/entity - terminate it even if incomplete // inside escape - terminate it even if incomplete
b.terminateHTML(); b.endEscape();
} }
b.terminateCell(false); b.terminateCell(false);
} }
...@@ -457,17 +474,18 @@ func (b *Writer) Flush() os.Error { ...@@ -457,17 +474,18 @@ func (b *Writer) Flush() os.Error {
// The only errors returned are ones encountered // The only errors returned are ones encountered
// while writing to the underlying output stream. // while writing to the underlying output stream.
// //
func (b *Writer) Write(buf []byte) (written int, err os.Error) { func (b *Writer) Write(buf []byte) (n int, err os.Error) {
// split text into cells // split text into cells
i0 := 0; n = 0;
for i, ch := range buf { for i, ch := range buf {
if b.html_char == 0 { if b.endChar == 0 {
// outside html tag/entity // outside escape
switch ch { switch ch {
case '\t', '\v', '\n', '\f': case '\t', '\v', '\n', '\f':
// end of cell // end of cell
b.append(buf[i0 : i], true); b.append(buf[n : i]);
i0 = i+1; // exclude ch from (next) cell b.updateWidth();
n = i+1; // ch consumed
ncells := b.terminateCell(ch == '\t'); ncells := b.terminateCell(ch == '\t');
if ch == '\n' || ch == '\f' { if ch == '\n' || ch == '\f' {
// terminate line // terminate line
...@@ -479,35 +497,48 @@ func (b *Writer) Write(buf []byte) (written int, err os.Error) { ...@@ -479,35 +497,48 @@ func (b *Writer) Write(buf []byte) (written int, err os.Error) {
// line is ignored by format()), thus we can flush the // line is ignored by format()), thus we can flush the
// Writer contents. // Writer contents.
if err = b.Flush(); err != nil { if err = b.Flush(); err != nil {
return i0, err; return;
} }
} }
} }
case Escape:
// start of escaped sequence
b.append(buf[n : i]);
b.updateWidth();
n = i+1; // exclude Escape
b.startEscape(Escape);
case '<', '&': case '<', '&':
// possibly an html tag/entity // possibly an html tag/entity
if b.flags & FilterHTML != 0 { if b.flags & FilterHTML != 0 {
// begin of tag/entity // begin of tag/entity
b.append(buf[i0 : i], true); b.append(buf[n : i]);
i0 = i; b.updateWidth();
b.startHTML(ch); n = i;
b.startEscape(ch);
} }
} }
} else { } else {
// inside html tag/entity // inside escape
if ch == b.html_char { if ch == b.endChar {
// end of tag/entity // end of tag/entity
b.append(buf[i0 : i+1], false); j := i+1;
i0 = i+1; // exclude ch from (next) cell if ch == Escape {
b.terminateHTML(); j = i; // exclude Escape
}
b.append(buf[n : j]);
n = i+1; // ch consumed
b.endEscape();
} }
} }
} }
// append leftover text // append leftover text
b.append(buf[i0 : len(buf)], false); b.append(buf[n : len(buf)]);
return len(buf), nil; n = len(buf);
return;
} }
......
...@@ -113,19 +113,47 @@ type entry struct { ...@@ -113,19 +113,47 @@ type entry struct {
var tests = []entry { var tests = []entry {
entry{ entry{
"1", "1a",
8, 1, '.', 0, 8, 1, '.', 0,
"", "",
"" ""
}, },
entry{ entry{
"1 debug", "1a debug",
8, 1, '.', Debug, 8, 1, '.', Debug,
"", "",
"" ""
}, },
entry{
"1b esc",
8, 1, '.', 0,
"\xff\xff",
""
},
entry{
"1c esc",
8, 1, '.', 0,
"\xff\t\xff",
"\t"
},
entry{
"1d esc",
8, 1, '.', 0,
"\xff\"foo\t\n\tbar\"\xff",
"\"foo\t\n\tbar\"",
},
entry{
"1e esc",
8, 1, '.', 0,
"abc\xff\tdef", // unterminated escape
"abc\tdef",
},
entry{ entry{
"2", "2",
8, 1, '.', 0, 8, 1, '.', 0,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment