Commit e393a829 authored by Brad Fitzpatrick's avatar Brad Fitzpatrick

mime/multipart: fix handling of empty parts without CRLF before next part

Empty parts can be either of the form:

a) "--separator\r\n", header (w/ trailing 2xCRLF), \r\n "--separator"...
or
b) "--separator\r\n", header (w/ trailing 2xCRLF), "--separator"...

We never handled case b).  In fact the RFC seems kinda vague about
it, but browsers seem to do a), and App Engine's synthetic POST
bodies after blob uploads is of form b).

So handle them both, and add a bunch of tests.

(I can't promise these are the last fixes to multipart, especially
considering its history, but I'm growing increasingly confident at
least, and I've never submitted a multipart CL with known bugs
outstanding, including this time.)

R=golang-dev, adg
CC=golang-dev
https://golang.org/cl/6212046
parent a90cbd74
......@@ -22,11 +22,6 @@ import (
"net/textproto"
)
// TODO(bradfitz): inline these once the compiler can inline them in
// read-only situation (such as bytes.HasSuffix)
var lf = []byte("\n")
var crlf = []byte("\r\n")
var emptyParams = make(map[string]string)
// A Part represents a single part in a multipart body.
......@@ -36,8 +31,9 @@ type Part struct {
// i.e. "foo-bar" changes case to "Foo-Bar"
Header textproto.MIMEHeader
buffer *bytes.Buffer
mr *Reader
buffer *bytes.Buffer
mr *Reader
bytesRead int
disposition string
dispositionParams map[string]string
......@@ -113,14 +109,26 @@ func (bp *Part) populateHeaders() error {
// Read reads the body of a part, after its headers and before the
// next part (if any) begins.
func (p *Part) Read(d []byte) (n int, err error) {
defer func() {
p.bytesRead += n
}()
if p.buffer.Len() >= len(d) {
// Internal buffer of unconsumed data is large enough for
// the read request. No need to parse more at the moment.
return p.buffer.Read(d)
}
peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor
unexpectedEof := err == io.EOF
if err != nil && !unexpectedEof {
// Look for an immediate empty part without a leading \r\n
// before the boundary separator. Some MIME code makes empty
// parts like this. Most browsers, however, write the \r\n
// before the subsequent boundary even for empty parts and
// won't hit this path.
if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) {
return 0, io.EOF
}
unexpectedEOF := err == io.EOF
if err != nil && !unexpectedEOF {
return 0, fmt.Errorf("multipart: Part Read: %v", err)
}
if peek == nil {
......@@ -138,7 +146,7 @@ func (p *Part) Read(d []byte) (n int, err error) {
foundBoundary = true
} else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 {
nCopy = safeCount
} else if unexpectedEof {
} else if unexpectedEOF {
// If we've run out of peek buffer and the boundary
// wasn't found (and can't possibly fit), we must have
// hit the end of the file unexpectedly.
......@@ -172,7 +180,10 @@ type Reader struct {
currentPart *Part
partsRead int
nl, nlDashBoundary, dashBoundaryDash, dashBoundary []byte
nl []byte // "\r\n" or "\n" (set after seeing first boundary line)
nlDashBoundary []byte // nl + "--boundary"
dashBoundaryDash []byte // "--boundary--"
dashBoundary []byte // "--boundary"
}
// NextPart returns the next part in the multipart or an error.
......@@ -185,7 +196,7 @@ func (r *Reader) NextPart() (*Part, error) {
expectNewPart := false
for {
line, err := r.bufReader.ReadSlice('\n')
if err == io.EOF && bytes.Equal(line, r.dashBoundaryDash) {
if err == io.EOF && r.isFinalBoundary(line) {
// If the buffer ends in "--boundary--" without the
// trailing "\r\n", ReadSlice will return an error
// (since it's missing the '\n'), but this is a valid
......@@ -207,7 +218,7 @@ func (r *Reader) NextPart() (*Part, error) {
return bp, nil
}
if hasPrefixThenNewline(line, r.dashBoundaryDash) {
if r.isFinalBoundary(line) {
// Expected EOF
return nil, io.EOF
}
......@@ -235,7 +246,19 @@ func (r *Reader) NextPart() (*Part, error) {
panic("unreachable")
}
func (mr *Reader) isBoundaryDelimiterLine(line []byte) bool {
// isFinalBoundary returns whether line is the final boundary line
// indiciating that all parts are over.
// It matches `^--boundary--[ \t]*(\r\n)?$`
func (mr *Reader) isFinalBoundary(line []byte) bool {
if !bytes.HasPrefix(line, mr.dashBoundaryDash) {
return false
}
rest := line[len(mr.dashBoundaryDash):]
rest = skipLWSPChar(rest)
return len(rest) == 0 || bytes.Equal(rest, mr.nl)
}
func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
// http://tools.ietf.org/html/rfc2046#section-5.1
// The boundary delimiter line is then defined as a line
// consisting entirely of two hyphen characters ("-",
......@@ -245,32 +268,52 @@ func (mr *Reader) isBoundaryDelimiterLine(line []byte) bool {
if !bytes.HasPrefix(line, mr.dashBoundary) {
return false
}
if bytes.HasSuffix(line, mr.nl) {
return onlyHorizontalWhitespace(line[len(mr.dashBoundary) : len(line)-len(mr.nl)])
rest := line[len(mr.dashBoundary):]
rest = skipLWSPChar(rest)
// On the first part, see our lines are ending in \n instead of \r\n
// and switch into that mode if so. This is a violation of the spec,
// but occurs in practice.
if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' {
mr.nl = mr.nl[1:]
mr.nlDashBoundary = mr.nlDashBoundary[1:]
}
// Violate the spec and also support newlines without the
// carriage return...
if mr.partsRead == 0 && bytes.HasSuffix(line, lf) {
if onlyHorizontalWhitespace(line[len(mr.dashBoundary) : len(line)-1]) {
mr.nl = mr.nl[1:]
mr.nlDashBoundary = mr.nlDashBoundary[1:]
return true
}
}
return false
return bytes.Equal(rest, mr.nl)
}
func onlyHorizontalWhitespace(s []byte) bool {
for _, b := range s {
if b != ' ' && b != '\t' {
return false
}
// peekBufferIsEmptyPart returns whether the provided peek-ahead
// buffer represents an empty part. This is only called if we've not
// already read any bytes in this part and checks for the case of MIME
// software not writing the \r\n on empty parts. Some does, some
// doesn't.
//
// This checks that what follows the "--boundary" is actually the end
// ("--boundary--" with optional whitespace) or optional whitespace
// and then a newline, so we don't catch "--boundaryFAKE", in which
// case the whole line is part of the data.
func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool {
// End of parts case.
// Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)`
if bytes.HasPrefix(peek, mr.dashBoundaryDash) {
rest := peek[len(mr.dashBoundaryDash):]
rest = skipLWSPChar(rest)
return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0
}
return true
if !bytes.HasPrefix(peek, mr.dashBoundary) {
return false
}
// Test whether rest matches `^[ \t]*\r\n`)
rest := peek[len(mr.dashBoundary):]
rest = skipLWSPChar(rest)
return bytes.HasPrefix(rest, mr.nl)
}
func hasPrefixThenNewline(s, prefix []byte) bool {
return bytes.HasPrefix(s, prefix) &&
(len(s) == len(prefix)+1 && s[len(s)-1] == '\n' ||
len(s) == len(prefix)+2 && bytes.HasSuffix(s, crlf))
// skipLWSPChar returns b with leading spaces and tabs removed.
// RFC 822 defines:
// LWSP-char = SPACE / HTAB
func skipLWSPChar(b []byte) []byte {
for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') {
b = b[1:]
}
return b
}
......@@ -10,20 +10,13 @@ import (
"fmt"
"io"
"io/ioutil"
"net/textproto"
"os"
"reflect"
"strings"
"testing"
)
func TestHorizontalWhitespace(t *testing.T) {
if !onlyHorizontalWhitespace([]byte(" \t")) {
t.Error("expected pass")
}
if onlyHorizontalWhitespace([]byte("foo bar")) {
t.Error("expected failure")
}
}
func TestBoundaryLine(t *testing.T) {
mr := NewReader(strings.NewReader(""), "myBoundary")
if !mr.isBoundaryDelimiterLine([]byte("--myBoundary\r\n")) {
......@@ -319,29 +312,6 @@ Oh no, premature EOF!
}
}
func TestZeroLengthBody(t *testing.T) {
testBody := strings.Replace(`
This is a multi-part message. This line is ignored.
--MyBoundary
foo: bar
--MyBoundary--
`, "\n", "\r\n", -1)
r := NewReader(strings.NewReader(testBody), "MyBoundary")
part, err := r.NextPart()
if err != nil {
t.Fatalf("didn't get a part")
}
n, err := io.Copy(ioutil.Discard, part)
if err != nil {
t.Errorf("error reading part: %v", err)
}
if n != 0 {
t.Errorf("read %d bytes; expected 0", n)
}
}
type slowReader struct {
r io.Reader
}
......@@ -427,3 +397,214 @@ func TestNested(t *testing.T) {
t.Fatalf("final outer NextPart = %v; want io.EOF", err)
}
}
type headerBody struct {
header textproto.MIMEHeader
body string
}
func formData(key, value string) headerBody {
return headerBody{
textproto.MIMEHeader{
"Content-Type": {"text/plain; charset=ISO-8859-1"},
"Content-Disposition": {"form-data; name=" + key},
},
value,
}
}
type parseTest struct {
name string
in, sep string
want []headerBody
}
var parseTests = []parseTest{
// Actual body from App Engine on a blob upload. The final part (the
// Content-Type: message/external-body) is what App Engine replaces
// the uploaded file with. The other form fields (prefixed with
// "other" in their form-data name) are unchanged. A bug was
// reported with blob uploads failing when the other fields were
// empty. This was the MIME POST body that previously failed.
{
name: "App Engine post",
sep: "00151757727e9583fd04bfbca4c6",
in: "--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherEmpty1\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherFoo1\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherFoo2\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherEmpty2\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatFoo\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatFoo\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatEmpty\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatEmpty\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=submit\r\n\r\nSubmit\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: message/external-body; charset=ISO-8859-1; blob-key=AHAZQqG84qllx7HUqO_oou5EvdYQNS3Mbbkb0RjjBoM_Kc1UqEN2ygDxWiyCPulIhpHRPx-VbpB6RX4MrsqhWAi_ZxJ48O9P2cTIACbvATHvg7IgbvZytyGMpL7xO1tlIvgwcM47JNfv_tGhy1XwyEUO8oldjPqg5Q\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\nContent-Type: image/png\r\nContent-Length: 232303\r\nX-AppEngine-Upload-Creation: 2012-05-10 23:14:02.715173\r\nContent-MD5: MzRjODU1ZDZhZGU1NmRlOWEwZmMwMDdlODBmZTA0NzA=\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\n\r\n--00151757727e9583fd04bfbca4c6--",
want: []headerBody{
formData("otherEmpty1", ""),
formData("otherFoo1", "foo"),
formData("otherFoo2", "foo"),
formData("otherEmpty2", ""),
formData("otherRepeatFoo", "foo"),
formData("otherRepeatFoo", "foo"),
formData("otherRepeatEmpty", ""),
formData("otherRepeatEmpty", ""),
formData("submit", "Submit"),
{textproto.MIMEHeader{
"Content-Type": {"message/external-body; charset=ISO-8859-1; blob-key=AHAZQqG84qllx7HUqO_oou5EvdYQNS3Mbbkb0RjjBoM_Kc1UqEN2ygDxWiyCPulIhpHRPx-VbpB6RX4MrsqhWAi_ZxJ48O9P2cTIACbvATHvg7IgbvZytyGMpL7xO1tlIvgwcM47JNfv_tGhy1XwyEUO8oldjPqg5Q"},
"Content-Disposition": {"form-data; name=file; filename=\"fall.png\""},
}, "Content-Type: image/png\r\nContent-Length: 232303\r\nX-AppEngine-Upload-Creation: 2012-05-10 23:14:02.715173\r\nContent-MD5: MzRjODU1ZDZhZGU1NmRlOWEwZmMwMDdlODBmZTA0NzA=\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\n"},
},
},
// Single empty part, ended with --boundary immediately after headers.
{
name: "single empty part, --boundary",
sep: "abc",
in: "--abc\r\nFoo: bar\r\n\r\n--abc--",
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
},
},
// Single empty part, ended with \r\n--boundary immediately after headers.
{
name: "single empty part, \r\n--boundary",
sep: "abc",
in: "--abc\r\nFoo: bar\r\n\r\n\r\n--abc--",
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
},
},
// Final part empty.
{
name: "final part empty",
sep: "abc",
in: "--abc\r\nFoo: bar\r\n\r\n--abc\r\nFoo2: bar2\r\n\r\n--abc--",
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
{textproto.MIMEHeader{"Foo2": {"bar2"}}, ""},
},
},
// Final part empty with newlines after final separator.
{
name: "final part empty then crlf",
sep: "abc",
in: "--abc\r\nFoo: bar\r\n\r\n--abc--\r\n",
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
},
},
// Final part empty with lwsp-chars after final separator.
{
name: "final part empty then lwsp",
sep: "abc",
in: "--abc\r\nFoo: bar\r\n\r\n--abc-- \t",
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
},
},
// No parts (empty form as submitted by Chrome)
{
name: "no parts",
sep: "----WebKitFormBoundaryQfEAfzFOiSemeHfA",
in: "------WebKitFormBoundaryQfEAfzFOiSemeHfA--\r\n",
want: []headerBody{},
},
// Part containing data starting with the boundary, but with additional suffix.
{
name: "fake separator as data",
sep: "sep",
in: "--sep\r\nFoo: bar\r\n\r\n--sepFAKE\r\n--sep--",
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, "--sepFAKE"},
},
},
// Part containing a boundary with whitespace following it.
{
name: "boundary with whitespace",
sep: "sep",
in: "--sep \r\nFoo: bar\r\n\r\ntext\r\n--sep--",
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, "text"},
},
},
// With ignored leading line.
{
name: "leading line",
sep: "MyBoundary",
in: strings.Replace(`This is a multi-part message. This line is ignored.
--MyBoundary
foo: bar
--MyBoundary--`, "\n", "\r\n", -1),
want: []headerBody{
{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
},
},
roundTripParseTest(),
}
func TestParse(t *testing.T) {
Cases:
for _, tt := range parseTests {
r := NewReader(strings.NewReader(tt.in), tt.sep)
got := []headerBody{}
for {
p, err := r.NextPart()
if err == io.EOF {
break
}
if err != nil {
t.Errorf("in test %q, NextPart: %v", tt.name, err)
continue Cases
}
pbody, err := ioutil.ReadAll(p)
if err != nil {
t.Errorf("in test %q, error reading part: %v", tt.name, err)
continue Cases
}
got = append(got, headerBody{p.Header, string(pbody)})
}
if !reflect.DeepEqual(tt.want, got) {
t.Errorf("test %q:\n got: %v\nwant: %v", tt.name, got, tt.want)
if len(tt.want) != len(got) {
t.Errorf("test %q: got %d parts, want %d", tt.name, len(got), len(tt.want))
} else if len(got) > 1 {
for pi, wantPart := range tt.want {
if !reflect.DeepEqual(wantPart, got[pi]) {
t.Errorf("test %q, part %d:\n got: %v\nwant: %v", tt.name, pi, got[pi], wantPart)
}
}
}
}
}
}
func roundTripParseTest() parseTest {
t := parseTest{
name: "round trip",
want: []headerBody{
formData("empty", ""),
formData("lf", "\n"),
formData("cr", "\r"),
formData("crlf", "\r\n"),
formData("foo", "bar"),
},
}
var buf bytes.Buffer
w := NewWriter(&buf)
for _, p := range t.want {
pw, err := w.CreatePart(p.header)
if err != nil {
panic(err)
}
_, err = pw.Write([]byte(p.body))
if err != nil {
panic(err)
}
}
w.Close()
t.in = buf.String()
t.sep = w.Boundary()
return t
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment