mime/multipart: fix handling of empty parts without CRLF before next part

Empty parts can be either of the form: a) "--separator\r\n", header (w/ trailing 2xCRLF), \r\n "--separator"... or b) "--separator\r\n", header (w/ trailing 2xCRLF), "--separator"... We never handled case b). In fact the RFC seems kinda vague about it, but browsers seem to do a), and App Engine's synthetic POST bodies after blob uploads is of form b). So handle them both, and add a bunch of tests. (I can't promise these are the last fixes to multipart, especially considering its history, but I'm growing increasingly confident at least, and I've never submitted a multipart CL with known bugs outstanding, including this time.) R=golang-dev, adg CC=golang-dev https://golang.org/cl/6212046

mime/multipart: fix handling of empty parts without CRLF before next part
Empty parts can be either of the form: a) "--separator\r\n", header (w/ trailing 2xCRLF), \r\n "--separator"... or b) "--separator\r\n", header (w/ trailing 2xCRLF), "--separator"... We never handled case b). In fact the RFC seems kinda vague about it, but browsers seem to do a), and App Engine's synthetic POST bodies after blob uploads is of form b). So handle them both, and add a bunch of tests. (I can't promise these are the last fixes to multipart, especially considering its history, but I'm growing increasingly confident at least, and I've never submitted a multipart CL with known bugs outstanding, including this time.) R=golang-dev, adg CC=golang-dev https://golang.org/cl/6212046
e393a829 · Brad Fitzpatrick · a90cbd74 · e393a829 · e393a829
Commit e393a829 authored May 15, 2012 by Brad Fitzpatrick
Hide whitespace changes
Inline Side-by-side

Showing with 292 additions and 68 deletions

multipart.go src/pkg/mime/multipart/multipart.go +79 -36

multipart_test.go src/pkg/mime/multipart/multipart_test.go +213 -32

No files found.
--- a/src/pkg/mime/multipart/multipart.go
+++ b/src/pkg/mime/multipart/multipart.go
@@ -22,11 +22,6 @@ import (
 	"net/textproto"
 )

-// TODO(bradfitz): inline these once the compiler can inline them in
-// read-only situation (such as bytes.HasSuffix)
-var lf = []byte("\n")
-var crlf = []byte("\r\n")
-
 var emptyParams = make(map[string]string)

 // A Part represents a single part in a multipart body.
@@ -36,8 +31,9 @@ type Part struct {
 	// i.e. "foo-bar" changes case to "Foo-Bar"
 	Header textproto.MIMEHeader

-	buffer *bytes.Buffer
-	mr     *Reader
+	buffer    *bytes.Buffer
+	mr        *Reader
+	bytesRead int

 	disposition       string
 	dispositionParams map[string]string
@@ -113,14 +109,26 @@ func (bp *Part) populateHeaders() error {
 // Read reads the body of a part, after its headers and before the
 // next part (if any) begins.
 func (p *Part) Read(d []byte) (n int, err error) {
+	defer func() {
+		p.bytesRead += n
+	}()
 	if p.buffer.Len() >= len(d) {
 		// Internal buffer of unconsumed data is large enough for
 		// the read request.  No need to parse more at the moment.
 		return p.buffer.Read(d)
 	}
 	peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor
-	unexpectedEof := err == io.EOF
-	if err != nil && !unexpectedEof {
+
+	// Look for an immediate empty part without a leading \r\n
+	// before the boundary separator.  Some MIME code makes empty
+	// parts like this. Most browsers, however, write the \r\n
+	// before the subsequent boundary even for empty parts and
+	// won't hit this path.
+	if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) {
+		return 0, io.EOF
+	}
+	unexpectedEOF := err == io.EOF
+	if err != nil && !unexpectedEOF {
 		return 0, fmt.Errorf("multipart: Part Read: %v", err)
 	}
 	if peek == nil {
@@ -138,7 +146,7 @@ func (p *Part) Read(d []byte) (n int, err error) {
 		foundBoundary = true
 	} else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 {
 		nCopy = safeCount
-	} else if unexpectedEof {
+	} else if unexpectedEOF {
 		// If we've run out of peek buffer and the boundary
 		// wasn't found (and can't possibly fit), we must have
 		// hit the end of the file unexpectedly.
@@ -172,7 +180,10 @@ type Reader struct {
 	currentPart *Part
 	partsRead   int

-	nl, nlDashBoundary, dashBoundaryDash, dashBoundary []byte
+	nl               []byte // "\r\n" or "\n" (set after seeing first boundary line)
+	nlDashBoundary   []byte // nl + "--boundary"
+	dashBoundaryDash []byte // "--boundary--"
+	dashBoundary     []byte // "--boundary"
 }

 // NextPart returns the next part in the multipart or an error.
@@ -185,7 +196,7 @@ func (r *Reader) NextPart() (*Part, error) {
 	expectNewPart := false
 	for {
 		line, err := r.bufReader.ReadSlice('\n')
-		if err == io.EOF && bytes.Equal(line, r.dashBoundaryDash) {
+		if err == io.EOF && r.isFinalBoundary(line) {
 			// If the buffer ends in "--boundary--" without the
 			// trailing "\r\n", ReadSlice will return an error
 			// (since it's missing the '\n'), but this is a valid
@@ -207,7 +218,7 @@ func (r *Reader) NextPart() (*Part, error) {
 			return bp, nil
 		}

-		if hasPrefixThenNewline(line, r.dashBoundaryDash) {
+		if r.isFinalBoundary(line) {
 			// Expected EOF
 			return nil, io.EOF
 		}
@@ -235,7 +246,19 @@ func (r *Reader) NextPart() (*Part, error) {
 	panic("unreachable")
 }

-func (mr *Reader) isBoundaryDelimiterLine(line []byte) bool {
+// isFinalBoundary returns whether line is the final boundary line
+// indiciating that all parts are over.
+// It matches `^--boundary--[ \t]*(\r\n)?$`
+func (mr *Reader) isFinalBoundary(line []byte) bool {
+	if !bytes.HasPrefix(line, mr.dashBoundaryDash) {
+		return false
+	}
+	rest := line[len(mr.dashBoundaryDash):]
+	rest = skipLWSPChar(rest)
+	return len(rest) == 0 || bytes.Equal(rest, mr.nl)
+}
+
+func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
 	// http://tools.ietf.org/html/rfc2046#section-5.1
 	//   The boundary delimiter line is then defined as a line
 	//   consisting entirely of two hyphen characters ("-",
@@ -245,32 +268,52 @@ func (mr *Reader) isBoundaryDelimiterLine(line []byte) bool {
 	if !bytes.HasPrefix(line, mr.dashBoundary) {
 		return false
 	}
-	if bytes.HasSuffix(line, mr.nl) {
-		return onlyHorizontalWhitespace(line[len(mr.dashBoundary) : len(line)-len(mr.nl)])
+	rest := line[len(mr.dashBoundary):]
+	rest = skipLWSPChar(rest)
+
+	// On the first part, see our lines are ending in \n instead of \r\n
+	// and switch into that mode if so.  This is a violation of the spec,
+	// but occurs in practice.
+	if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' {
+		mr.nl = mr.nl[1:]
+		mr.nlDashBoundary = mr.nlDashBoundary[1:]
 	}
-	// Violate the spec and also support newlines without the
-	// carriage return...
-	if mr.partsRead == 0 && bytes.HasSuffix(line, lf) {
-		if onlyHorizontalWhitespace(line[len(mr.dashBoundary) : len(line)-1]) {
-			mr.nl = mr.nl[1:]
-			mr.nlDashBoundary = mr.nlDashBoundary[1:]
-			return true
-		}
-	}
-	return false
+	return bytes.Equal(rest, mr.nl)
 }

-func onlyHorizontalWhitespace(s []byte) bool {
-	for _, b := range s {
-		if b != ' ' && b != '\t' {
-			return false
-		}
+// peekBufferIsEmptyPart returns whether the provided peek-ahead
+// buffer represents an empty part.  This is only called if we've not
+// already read any bytes in this part and checks for the case of MIME
+// software not writing the \r\n on empty parts. Some does, some
+// doesn't.
+//
+// This checks that what follows the "--boundary" is actually the end
+// ("--boundary--" with optional whitespace) or optional whitespace
+// and then a newline, so we don't catch "--boundaryFAKE", in which
+// case the whole line is part of the data.
+func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool {
+	// End of parts case.
+	// Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)`
+	if bytes.HasPrefix(peek, mr.dashBoundaryDash) {
+		rest := peek[len(mr.dashBoundaryDash):]
+		rest = skipLWSPChar(rest)
+		return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0
 	}
-	return true
+	if !bytes.HasPrefix(peek, mr.dashBoundary) {
+		return false
+	}
+	// Test whether rest matches `^[ \t]*\r\n`)
+	rest := peek[len(mr.dashBoundary):]
+	rest = skipLWSPChar(rest)
+	return bytes.HasPrefix(rest, mr.nl)
 }

-func hasPrefixThenNewline(s, prefix []byte) bool {
-	return bytes.HasPrefix(s, prefix) &&
-		(len(s) == len(prefix)+1 && s[len(s)-1] == '\n' ||
-			len(s) == len(prefix)+2 && bytes.HasSuffix(s, crlf))
+// skipLWSPChar returns b with leading spaces and tabs removed.
+// RFC 822 defines:
+//    LWSP-char = SPACE / HTAB
+func skipLWSPChar(b []byte) []byte {
+	for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') {
+		b = b[1:]
+	}
+	return b
 }
--- a/src/pkg/mime/multipart/multipart_test.go
+++ b/src/pkg/mime/multipart/multipart_test.go
@@ -10,20 +10,13 @@ import (
 	"fmt"
 	"io"
 	"io/ioutil"
+	"net/textproto"
 	"os"
+	"reflect"
 	"strings"
 	"testing"
 )

-func TestHorizontalWhitespace(t *testing.T) {
-	if !onlyHorizontalWhitespace([]byte(" \t")) {
-		t.Error("expected pass")
-	}
-	if onlyHorizontalWhitespace([]byte("foo bar")) {
-		t.Error("expected failure")
-	}
-}
-
 func TestBoundaryLine(t *testing.T) {
 	mr := NewReader(strings.NewReader(""), "myBoundary")
 	if !mr.isBoundaryDelimiterLine([]byte("--myBoundary\r\n")) {
@@ -319,29 +312,6 @@ Oh no, premature EOF!
 	}
 }

-func TestZeroLengthBody(t *testing.T) {
-	testBody := strings.Replace(`
-This is a multi-part message.  This line is ignored.
--MyBoundary
-foo: bar
-
-
--MyBoundary--
-`, "\n", "\r\n", -1)
-	r := NewReader(strings.NewReader(testBody), "MyBoundary")
-	part, err := r.NextPart()
-	if err != nil {
-		t.Fatalf("didn't get a part")
-	}
-	n, err := io.Copy(ioutil.Discard, part)
-	if err != nil {
-		t.Errorf("error reading part: %v", err)
-	}
-	if n != 0 {
-		t.Errorf("read %d bytes; expected 0", n)
-	}
-}
-
 type slowReader struct {
 	r io.Reader
 }
@@ -427,3 +397,214 @@ func TestNested(t *testing.T) {
 		t.Fatalf("final outer NextPart = %v; want io.EOF", err)
 	}
 }
+
+type headerBody struct {
+	header textproto.MIMEHeader
+	body   string
+}
+
+func formData(key, value string) headerBody {
+	return headerBody{
+		textproto.MIMEHeader{
+			"Content-Type":        {"text/plain; charset=ISO-8859-1"},
+			"Content-Disposition": {"form-data; name=" + key},
+		},
+		value,
+	}
+}
+
+type parseTest struct {
+	name    string
+	in, sep string
+	want    []headerBody
+}
+
+var parseTests = []parseTest{
+	// Actual body from App Engine on a blob upload. The final part (the
+	// Content-Type: message/external-body) is what App Engine replaces
+	// the uploaded file with.  The other form fields (prefixed with
+	// "other" in their form-data name) are unchanged.  A bug was
+	// reported with blob uploads failing when the other fields were
+	// empty. This was the MIME POST body that previously failed.
+	{
+		name: "App Engine post",
+		sep:  "00151757727e9583fd04bfbca4c6",
+		in:   "--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherEmpty1\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherFoo1\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherFoo2\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherEmpty2\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatFoo\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatFoo\r\n\r\nfoo\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatEmpty\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=otherRepeatEmpty\r\n\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: text/plain; charset=ISO-8859-1\r\nContent-Disposition: form-data; name=submit\r\n\r\nSubmit\r\n--00151757727e9583fd04bfbca4c6\r\nContent-Type: message/external-body; charset=ISO-8859-1; blob-key=AHAZQqG84qllx7HUqO_oou5EvdYQNS3Mbbkb0RjjBoM_Kc1UqEN2ygDxWiyCPulIhpHRPx-VbpB6RX4MrsqhWAi_ZxJ48O9P2cTIACbvATHvg7IgbvZytyGMpL7xO1tlIvgwcM47JNfv_tGhy1XwyEUO8oldjPqg5Q\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\nContent-Type: image/png\r\nContent-Length: 232303\r\nX-AppEngine-Upload-Creation: 2012-05-10 23:14:02.715173\r\nContent-MD5: MzRjODU1ZDZhZGU1NmRlOWEwZmMwMDdlODBmZTA0NzA=\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\n\r\n--00151757727e9583fd04bfbca4c6--",
+		want: []headerBody{
+			formData("otherEmpty1", ""),
+			formData("otherFoo1", "foo"),
+			formData("otherFoo2", "foo"),
+			formData("otherEmpty2", ""),
+			formData("otherRepeatFoo", "foo"),
+			formData("otherRepeatFoo", "foo"),
+			formData("otherRepeatEmpty", ""),
+			formData("otherRepeatEmpty", ""),
+			formData("submit", "Submit"),
+			{textproto.MIMEHeader{
+				"Content-Type":        {"message/external-body; charset=ISO-8859-1; blob-key=AHAZQqG84qllx7HUqO_oou5EvdYQNS3Mbbkb0RjjBoM_Kc1UqEN2ygDxWiyCPulIhpHRPx-VbpB6RX4MrsqhWAi_ZxJ48O9P2cTIACbvATHvg7IgbvZytyGMpL7xO1tlIvgwcM47JNfv_tGhy1XwyEUO8oldjPqg5Q"},
+				"Content-Disposition": {"form-data; name=file; filename=\"fall.png\""},
+			}, "Content-Type: image/png\r\nContent-Length: 232303\r\nX-AppEngine-Upload-Creation: 2012-05-10 23:14:02.715173\r\nContent-MD5: MzRjODU1ZDZhZGU1NmRlOWEwZmMwMDdlODBmZTA0NzA=\r\nContent-Disposition: form-data; name=file; filename=\"fall.png\"\r\n\r\n"},
+		},
+	},
+
+	// Single empty part, ended with --boundary immediately after headers.
+	{
+		name: "single empty part, --boundary",
+		sep:  "abc",
+		in:   "--abc\r\nFoo: bar\r\n\r\n--abc--",
+		want: []headerBody{
+			{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+		},
+	},
+
+	// Single empty part, ended with \r\n--boundary immediately after headers.
+	{
+		name: "single empty part, \r\n--boundary",
+		sep:  "abc",
+		in:   "--abc\r\nFoo: bar\r\n\r\n\r\n--abc--",
+		want: []headerBody{
+			{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+		},
+	},
+
+	// Final part empty.
+	{
+		name: "final part empty",
+		sep:  "abc",
+		in:   "--abc\r\nFoo: bar\r\n\r\n--abc\r\nFoo2: bar2\r\n\r\n--abc--",
+		want: []headerBody{
+			{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+			{textproto.MIMEHeader{"Foo2": {"bar2"}}, ""},
+		},
+	},
+
+	// Final part empty with newlines after final separator.
+	{
+		name: "final part empty then crlf",
+		sep:  "abc",
+		in:   "--abc\r\nFoo: bar\r\n\r\n--abc--\r\n",
+		want: []headerBody{
+			{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+		},
+	},
+
+	// Final part empty with lwsp-chars after final separator.
+	{
+		name: "final part empty then lwsp",
+		sep:  "abc",
+		in:   "--abc\r\nFoo: bar\r\n\r\n--abc-- \t",
+		want: []headerBody{
+			{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+		},
+	},
+
+	// No parts (empty form as submitted by Chrome)
+	{
+		name: "no parts",
+		sep:  "----WebKitFormBoundaryQfEAfzFOiSemeHfA",
+		in:   "------WebKitFormBoundaryQfEAfzFOiSemeHfA--\r\n",
+		want: []headerBody{},
+	},
+
+	// Part containing data starting with the boundary, but with additional suffix.
+	{
+		name: "fake separator as data",
+		sep:  "sep",
+		in:   "--sep\r\nFoo: bar\r\n\r\n--sepFAKE\r\n--sep--",
+		want: []headerBody{
+			{textproto.MIMEHeader{"Foo": {"bar"}}, "--sepFAKE"},
+		},
+	},
+
+	// Part containing a boundary with whitespace following it.
+	{
+		name: "boundary with whitespace",
+		sep:  "sep",
+		in:   "--sep \r\nFoo: bar\r\n\r\ntext\r\n--sep--",
+		want: []headerBody{
+			{textproto.MIMEHeader{"Foo": {"bar"}}, "text"},
+		},
+	},
+
+	// With ignored leading line.
+	{
+		name: "leading line",
+		sep:  "MyBoundary",
+		in: strings.Replace(`This is a multi-part message.  This line is ignored.
+--MyBoundary
+foo: bar
+
+
+--MyBoundary--`, "\n", "\r\n", -1),
+		want: []headerBody{
+			{textproto.MIMEHeader{"Foo": {"bar"}}, ""},
+		},
+	},
+
+	roundTripParseTest(),
+}
+
+func TestParse(t *testing.T) {
+Cases:
+	for _, tt := range parseTests {
+		r := NewReader(strings.NewReader(tt.in), tt.sep)
+		got := []headerBody{}
+		for {
+			p, err := r.NextPart()
+			if err == io.EOF {
+				break
+			}
+			if err != nil {
+				t.Errorf("in test %q, NextPart: %v", tt.name, err)
+				continue Cases
+			}
+			pbody, err := ioutil.ReadAll(p)
+			if err != nil {
+				t.Errorf("in test %q, error reading part: %v", tt.name, err)
+				continue Cases
+			}
+			got = append(got, headerBody{p.Header, string(pbody)})
+		}
+		if !reflect.DeepEqual(tt.want, got) {
+			t.Errorf("test %q:\n got: %v\nwant: %v", tt.name, got, tt.want)
+			if len(tt.want) != len(got) {
+				t.Errorf("test %q: got %d parts, want %d", tt.name, len(got), len(tt.want))
+			} else if len(got) > 1 {
+				for pi, wantPart := range tt.want {
+					if !reflect.DeepEqual(wantPart, got[pi]) {
+						t.Errorf("test %q, part %d:\n got: %v\nwant: %v", tt.name, pi, got[pi], wantPart)
+					}
+				}
+			}
+		}
+	}
+}
+
+func roundTripParseTest() parseTest {
+	t := parseTest{
+		name: "round trip",
+		want: []headerBody{
+			formData("empty", ""),
+			formData("lf", "\n"),
+			formData("cr", "\r"),
+			formData("crlf", "\r\n"),
+			formData("foo", "bar"),
+		},
+	}
+	var buf bytes.Buffer
+	w := NewWriter(&buf)
+	for _, p := range t.want {
+		pw, err := w.CreatePart(p.header)
+		if err != nil {
+			panic(err)
+		}
+		_, err = pw.Write([]byte(p.body))
+		if err != nil {
+			panic(err)
+		}
+	}
+	w.Close()
+	t.in = buf.String()
+	t.sep = w.Boundary()
+	return t
+}