Commit 9c7aa5fe authored by Brad Fitzpatrick's avatar Brad Fitzpatrick

mime/multipart: allow unescaped newlines through in quoted-printable

This makes Go's quoted-printable decoder more like other
popular ones, allowing through a bare \r or \n, and also
passes through \r\n which looked like a real bug before.

Fixes #4771

R=minux.ma
CC=golang-dev
https://golang.org/cl/7300092
parent ec892be1
......@@ -3,6 +3,10 @@
// license that can be found in the LICENSE file.
// The file define a quoted-printable decoder, as specified in RFC 2045.
// Deviations:
// 1. in addition to "=\r\n", "=\n" is also treated as soft line break.
// 2. it will pass through a '\r' or '\n' not preceded by '=', consistent
// with other broken QP encoders & decoders.
package multipart
......@@ -14,14 +18,16 @@ import (
)
type qpReader struct {
br *bufio.Reader
rerr error // last read error
line []byte // to be consumed before more of br
br *bufio.Reader
skipWhite bool
rerr error // last read error
line []byte // to be consumed before more of br
}
func newQuotedPrintableReader(r io.Reader) io.Reader {
return &qpReader{
br: bufio.NewReader(r),
br: bufio.NewReader(r),
skipWhite: true,
}
}
......@@ -49,6 +55,10 @@ func (q *qpReader) readHexByte(v []byte) (b byte, err error) {
return hb<<4 | lb, nil
}
func isQPSkipWhiteByte(b byte) bool {
return b == ' ' || b == '\t'
}
func isQPDiscardWhitespace(r rune) bool {
switch r {
case '\n', '\r', ' ', '\t':
......@@ -57,22 +67,48 @@ func isQPDiscardWhitespace(r rune) bool {
return false
}
var (
crlf = []byte("\r\n")
lf = []byte("\n")
softSuffix = []byte("=")
)
func (q *qpReader) Read(p []byte) (n int, err error) {
for len(p) > 0 {
if len(q.line) == 0 {
if q.rerr != nil {
return n, q.rerr
}
q.skipWhite = true
q.line, q.rerr = q.br.ReadSlice('\n')
q.line = bytes.TrimRightFunc(q.line, isQPDiscardWhitespace)
// Does the line end in CRLF instead of just LF?
hasLF := bytes.HasSuffix(q.line, lf)
hasCR := bytes.HasSuffix(q.line, crlf)
wholeLine := q.line
q.line = bytes.TrimRightFunc(wholeLine, isQPDiscardWhitespace)
if bytes.HasSuffix(q.line, softSuffix) {
rightStripped := wholeLine[len(q.line):]
q.line = q.line[:len(q.line)-1]
if !bytes.HasPrefix(rightStripped, lf) && !bytes.HasPrefix(rightStripped, crlf) {
q.rerr = fmt.Errorf("multipart: invalid bytes after =: %q", rightStripped)
}
} else if hasLF {
if hasCR {
q.line = append(q.line, '\r', '\n')
} else {
q.line = append(q.line, '\n')
}
}
continue
}
if len(q.line) == 1 && q.line[0] == '=' {
// Soft newline; skipped.
q.line = nil
b := q.line[0]
if q.skipWhite && isQPSkipWhiteByte(b) {
q.line = q.line[1:]
continue
}
b := q.line[0]
q.skipWhite = false
switch {
case b == '=':
b, err = q.readHexByte(q.line[1:])
......@@ -80,7 +116,9 @@ func (q *qpReader) Read(p []byte) (n int, err error) {
return n, err
}
q.line = q.line[2:] // 2 of the 3; other 1 is done below
case b != '\t' && (b < ' ' || b > '~'):
case b == '\t' || b == '\r' || b == '\n':
break
case b < ' ' || b > '~':
return n, fmt.Errorf("multipart: invalid unescaped byte 0x%02x in quoted-printable body", b)
}
p[0] = b
......
......@@ -5,11 +5,18 @@
package multipart
import (
"bufio"
"bytes"
"errors"
"flag"
"fmt"
"io"
"os/exec"
"regexp"
"sort"
"strings"
"testing"
"time"
)
func TestQuotedPrintable(t *testing.T) {
......@@ -17,15 +24,39 @@ func TestQuotedPrintable(t *testing.T) {
in, want string
err interface{}
}{
{in: "", want: ""},
{in: "foo bar", want: "foo bar"},
{in: "foo bar=3D", want: "foo bar="},
{in: "foo bar=\n", want: "foo bar"},
{in: "foo bar\n", want: "foo bar\n"}, // somewhat lax.
{in: "foo bar=0", want: "foo bar", err: io.ErrUnexpectedEOF},
{in: "foo bar=ab", want: "foo bar", err: "multipart: invalid quoted-printable hex byte 0x61"},
{in: "foo bar=0D=0A", want: "foo bar\r\n"},
{in: "foo bar=\r\n baz", want: "foo bar baz"},
{in: " A B =\r\n C ", want: "A B C"},
{in: " A B =\n C ", want: "A B C"}, // lax. treating LF as CRLF
{in: "foo=\nbar", want: "foobar"},
{in: "foo\x00bar", want: "foo", err: "multipart: invalid unescaped byte 0x00 in quoted-printable body"},
{in: "foo bar\xff", want: "foo bar", err: "multipart: invalid unescaped byte 0xff in quoted-printable body"},
// Equal sign.
{in: "=3D30\n", want: "=30\n"},
{in: "=00=FF0=\n", want: "\x00\xff0"},
// Trailing whitespace
{in: "foo \n", want: "foo\n"},
{in: "foo \n\nfoo =\n\nfoo=20\n\n", want: "foo\n\nfoo \nfoo \n\n"},
// Tests that we allow bare \n and \r through, despite it being strictly
// not permitted per RFC 2045, Section 6.7 Page 22 bullet (4).
{in: "foo\nbar", want: "foo\nbar"},
{in: "foo\rbar", want: "foo\rbar"},
{in: "foo\r\nbar", want: "foo\r\nbar"},
// Different types of soft line-breaks.
{in: "foo=\r\nbar", want: "foobar"},
{in: "foo=\nbar", want: "foobar"},
{in: "foo=\rbar", want: "foo", err: "multipart: invalid quoted-printable hex byte 0x0d"},
{in: "foo=\r\r\r \nbar", want: "foo", err: `multipart: invalid bytes after =: "\r\r\r \n"`},
}
for _, tt := range tests {
var buf bytes.Buffer
......@@ -50,3 +81,119 @@ func TestQuotedPrintable(t *testing.T) {
}
}
func everySequence(base, alpha string, length int, fn func(string)) {
if len(base) == length {
fn(base)
return
}
for i := 0; i < len(alpha); i++ {
everySequence(base+alpha[i:i+1], alpha, length, fn)
}
}
var useQprint = flag.Bool("qprint", false, "Compare against the 'qprint' program.")
var badSoftRx = regexp.MustCompile(`=([^\r\n]+?\n)|([^\r\n]+$)|(\r$)|(\r[^\n]+\n)|( \r\n)`)
func TestQPExhaustive(t *testing.T) {
if *useQprint {
_, err := exec.LookPath("qprint")
if err != nil {
t.Fatalf("Error looking for qprint: %v", err)
}
}
var buf bytes.Buffer
res := make(map[string]int)
everySequence("", "0A \r\n=", 6, func(s string) {
if strings.HasSuffix(s, "=") || strings.Contains(s, "==") {
return
}
buf.Reset()
_, err := io.Copy(&buf, newQuotedPrintableReader(strings.NewReader(s)))
if err != nil {
errStr := err.Error()
if strings.Contains(errStr, "invalid bytes after =:") {
errStr = "invalid bytes after ="
}
res[errStr]++
if strings.Contains(errStr, "invalid quoted-printable hex byte ") {
if strings.HasSuffix(errStr, "0x20") && (strings.Contains(s, "=0 ") || strings.Contains(s, "=A ") || strings.Contains(s, "= ")) {
return
}
if strings.HasSuffix(errStr, "0x3d") && (strings.Contains(s, "=0=") || strings.Contains(s, "=A=")) {
return
}
if strings.HasSuffix(errStr, "0x0a") || strings.HasSuffix(errStr, "0x0d") {
// bunch of cases; since whitespace at the end of of a line before \n is removed.
return
}
}
if strings.Contains(errStr, "unexpected EOF") {
return
}
if errStr == "invalid bytes after =" && badSoftRx.MatchString(s) {
return
}
t.Errorf("decode(%q) = %v", s, err)
return
}
if *useQprint {
cmd := exec.Command("qprint", "-d")
cmd.Stdin = strings.NewReader(s)
stderr, err := cmd.StderrPipe()
if err != nil {
panic(err)
}
qpres := make(chan interface{}, 2)
go func() {
br := bufio.NewReader(stderr)
s, _ := br.ReadString('\n')
if s != "" {
qpres <- errors.New(s)
if cmd.Process != nil {
// It can get stuck on invalid input, like:
// echo -n "0000= " | qprint -d
cmd.Process.Kill()
}
}
}()
go func() {
want, err := cmd.Output()
if err == nil {
qpres <- want
}
}()
select {
case got := <-qpres:
if want, ok := got.([]byte); ok {
if string(want) != buf.String() {
t.Errorf("go decode(%q) = %q; qprint = %q", s, want, buf.String())
}
} else {
t.Logf("qprint -d(%q) = %v", s, got)
}
case <-time.After(5 * time.Second):
t.Logf("qprint timeout on %q", s)
}
}
res["OK"]++
})
var outcomes []string
for k, v := range res {
outcomes = append(outcomes, fmt.Sprintf("%v: %d", k, v))
}
sort.Strings(outcomes)
got := strings.Join(outcomes, "\n")
want := `OK: 21576
invalid bytes after =: 3397
multipart: invalid quoted-printable hex byte 0x0a: 1400
multipart: invalid quoted-printable hex byte 0x0d: 2700
multipart: invalid quoted-printable hex byte 0x20: 2490
multipart: invalid quoted-printable hex byte 0x3d: 440
unexpected EOF: 3122`
if got != want {
t.Errorf("Got:\n%s\nWant:\n%s", got, want)
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment