Commit 7f52b439 authored by Conrad Irwin's avatar Conrad Irwin Committed by Brad Fitzpatrick

net/mail: allow utf-8 in ParseAddress

The existing implementation correctly supported RFC 5322, this
change adds support for UTF-8 while parsing as specified by
RFC 6532. The serialization code is unchanged, so emails created
by go remain compatible with very legacy systems.

Fixes #14260

Change-Id: Ib57e510f5834d273605e1892679f2df19ea931b1
Reviewed-on: https://go-review.googlesource.com/19687
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarAlexandre Cesaro <alexandre.cesaro@gmail.com>
Reviewed-by: 's avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 89a1f028
...@@ -5,13 +5,15 @@ ...@@ -5,13 +5,15 @@
/* /*
Package mail implements parsing of mail messages. Package mail implements parsing of mail messages.
For the most part, this package follows the syntax as specified by RFC 5322. For the most part, this package follows the syntax as specified by RFC 5322 and
extended by RFC 6532.
Notable divergences: Notable divergences:
* Obsolete address formats are not parsed, including addresses with * Obsolete address formats are not parsed, including addresses with
embedded route information. embedded route information.
* Group addresses are not parsed. * Group addresses are not parsed.
* The full range of spacing (the CFWS syntax element) is not supported, * The full range of spacing (the CFWS syntax element) is not supported,
such as breaking addresses across lines. such as breaking addresses across lines.
* No unicode normalization is performed.
*/ */
package mail package mail
...@@ -26,6 +28,7 @@ import ( ...@@ -26,6 +28,7 @@ import (
"net/textproto" "net/textproto"
"strings" "strings"
"time" "time"
"unicode/utf8"
) )
var debug = debugT(false) var debug = debugT(false)
...@@ -180,15 +183,12 @@ func (a *Address) String() string { ...@@ -180,15 +183,12 @@ func (a *Address) String() string {
} }
// Add quotes if needed // Add quotes if needed
// TODO: rendering quoted local part and rendering printable name
// should be merged in helper function.
quoteLocal := false quoteLocal := false
for i := 0; i < len(local); i++ { for i, r := range local {
ch := local[i] if isAtext(r, false) {
if isAtext(ch, false) {
continue continue
} }
if ch == '.' { if r == '.' {
// Dots are okay if they are surrounded by atext. // Dots are okay if they are surrounded by atext.
// We only need to check that the previous byte is // We only need to check that the previous byte is
// not a dot, and this isn't the end of the string. // not a dot, and this isn't the end of the string.
...@@ -212,25 +212,16 @@ func (a *Address) String() string { ...@@ -212,25 +212,16 @@ func (a *Address) String() string {
// If every character is printable ASCII, quoting is simple. // If every character is printable ASCII, quoting is simple.
allPrintable := true allPrintable := true
for i := 0; i < len(a.Name); i++ { for _, r := range a.Name {
// isWSP here should actually be isFWS, // isWSP here should actually be isFWS,
// but we don't support folding yet. // but we don't support folding yet.
if !isVchar(a.Name[i]) && !isWSP(a.Name[i]) { if !isVchar(r) && !isWSP(r) || isMultibyte(r) {
allPrintable = false allPrintable = false
break break
} }
} }
if allPrintable { if allPrintable {
b := bytes.NewBufferString(`"`) return quoteString(a.Name) + " " + s
for i := 0; i < len(a.Name); i++ {
if !isQtext(a.Name[i]) && !isWSP(a.Name[i]) {
b.WriteByte('\\')
}
b.WriteByte(a.Name[i])
}
b.WriteString(`" `)
b.WriteString(s)
return b.String()
} }
// Text in an encoded-word in a display-name must not contain certain // Text in an encoded-word in a display-name must not contain certain
...@@ -427,29 +418,48 @@ func (p *addrParser) consumePhrase() (phrase string, err error) { ...@@ -427,29 +418,48 @@ func (p *addrParser) consumePhrase() (phrase string, err error) {
func (p *addrParser) consumeQuotedString() (qs string, err error) { func (p *addrParser) consumeQuotedString() (qs string, err error) {
// Assume first byte is '"'. // Assume first byte is '"'.
i := 1 i := 1
qsb := make([]byte, 0, 10) qsb := make([]rune, 0, 10)
escaped := false
Loop: Loop:
for { for {
if i >= p.len() { r, size := utf8.DecodeRuneInString(p.s[i:])
return "", errors.New("mail: unclosed quoted-string")
} switch {
switch c := p.s[i]; { case size == 0:
case c == '"':
break Loop
case c == '\\':
if i+1 == p.len() {
return "", errors.New("mail: unclosed quoted-string") return "", errors.New("mail: unclosed quoted-string")
case size == 1 && r == utf8.RuneError:
return "", fmt.Errorf("mail: invalid utf-8 in quoted-string: %q", p.s)
case escaped:
// quoted-pair = ("\" (VCHAR / WSP))
if !isVchar(r) && !isWSP(r) {
return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
} }
qsb = append(qsb, p.s[i+1])
i += 2 qsb = append(qsb, r)
case isQtext(c), c == ' ': escaped = false
case isQtext(r) || isWSP(r):
// qtext (printable US-ASCII excluding " and \), or // qtext (printable US-ASCII excluding " and \), or
// FWS (almost; we're ignoring CRLF) // FWS (almost; we're ignoring CRLF)
qsb = append(qsb, c) qsb = append(qsb, r)
i++
case r == '"':
break Loop
case r == '\\':
escaped = true
default: default:
return "", fmt.Errorf("mail: bad character in quoted-string: %q", c) return "", fmt.Errorf("mail: bad character in quoted-string: %q", r)
} }
i += size
} }
p.s = p.s[i+1:] p.s = p.s[i+1:]
if len(qsb) == 0 { if len(qsb) == 0 {
...@@ -458,24 +468,32 @@ Loop: ...@@ -458,24 +468,32 @@ Loop:
return string(qsb), nil return string(qsb), nil
} }
var errNonASCII = errors.New("mail: unencoded non-ASCII text in address")
// consumeAtom parses an RFC 5322 atom at the start of p. // consumeAtom parses an RFC 5322 atom at the start of p.
// If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. // If dot is true, consumeAtom parses an RFC 5322 dot-atom instead.
// If permissive is true, consumeAtom will not fail on // If permissive is true, consumeAtom will not fail on
// leading/trailing/double dots in the atom (see golang.org/issue/4938). // leading/trailing/double dots in the atom (see golang.org/issue/4938).
func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) { func (p *addrParser) consumeAtom(dot bool, permissive bool) (atom string, err error) {
if c := p.peek(); !isAtext(c, false) { i := 0
if c > 127 {
return "", errNonASCII Loop:
} for {
return "", errors.New("mail: invalid string") r, size := utf8.DecodeRuneInString(p.s[i:])
switch {
case size == 1 && r == utf8.RuneError:
return "", fmt.Errorf("mail: invalid utf-8 in address: %q", p.s)
case size == 0 || !isAtext(r, dot):
break Loop
default:
i += size
} }
i := 1
for ; i < p.len() && isAtext(p.s[i], dot); i++ {
} }
if i < p.len() && p.s[i] > 127 {
return "", errNonASCII if i == 0 {
return "", errors.New("mail: invalid string")
} }
atom, p.s = p.s[:i], p.s[i:] atom, p.s = p.s[:i], p.s[i:]
if !permissive { if !permissive {
...@@ -547,54 +565,58 @@ func (e charsetError) Error() string { ...@@ -547,54 +565,58 @@ func (e charsetError) Error() string {
return fmt.Sprintf("charset not supported: %q", string(e)) return fmt.Sprintf("charset not supported: %q", string(e))
} }
var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" + // isAtext reports whether r is an RFC 5322 atext character.
"abcdefghijklmnopqrstuvwxyz" +
"0123456789" +
"!#$%&'*+-/=?^_`{|}~")
// isAtext reports whether c is an RFC 5322 atext character.
// If dot is true, period is included. // If dot is true, period is included.
func isAtext(c byte, dot bool) bool { func isAtext(r rune, dot bool) bool {
if dot && c == '.' { switch r {
return true case '.':
return dot
case '(', ')', '<', '>', '[', ']', ':', ';', '@', '\\', ',', '"': // RFC 5322 3.2.3. specials
return false
} }
return bytes.IndexByte(atextChars, c) >= 0 return isVchar(r)
} }
// isQtext reports whether c is an RFC 5322 qtext character. // isQtext reports whether r is an RFC 5322 qtext character.
func isQtext(c byte) bool { func isQtext(r rune) bool {
// Printable US-ASCII, excluding backslash or quote. // Printable US-ASCII, excluding backslash or quote.
if c == '\\' || c == '"' { if r == '\\' || r == '"' {
return false return false
} }
return '!' <= c && c <= '~' return isVchar(r)
} }
// quoteString renders a string as an RFC 5322 quoted-string. // quoteString renders a string as an RFC 5322 quoted-string.
func quoteString(s string) string { func quoteString(s string) string {
var buf bytes.Buffer var buf bytes.Buffer
buf.WriteByte('"') buf.WriteByte('"')
for _, c := range s { for _, r := range s {
ch := byte(c) if isQtext(r) || isWSP(r) {
if isQtext(ch) || isWSP(ch) { buf.WriteRune(r)
buf.WriteByte(ch) } else if isVchar(r) {
} else if isVchar(ch) {
buf.WriteByte('\\') buf.WriteByte('\\')
buf.WriteByte(ch) buf.WriteRune(r)
} }
} }
buf.WriteByte('"') buf.WriteByte('"')
return buf.String() return buf.String()
} }
// isVchar reports whether c is an RFC 5322 VCHAR character. // isVchar reports whether r is an RFC 5322 VCHAR character.
func isVchar(c byte) bool { func isVchar(r rune) bool {
// Visible (printing) characters. // Visible (printing) characters.
return '!' <= c && c <= '~' return '!' <= r && r <= '~' || isMultibyte(r)
}
// isMultibyte reports whether r is a multi-byte UTF-8 character
// as supported by RFC 6532
func isMultibyte(r rune) bool {
return r >= utf8.RuneSelf
} }
// isWSP reports whether c is a WSP (white space). // isWSP reports whether r is a WSP (white space).
// WSP is a space or horizontal tab (RFC 5234 Appendix B). // WSP is a space or horizontal tab (RFC 5234 Appendix B).
func isWSP(c byte) bool { func isWSP(r rune) bool {
return c == ' ' || c == '\t' return r == ' ' || r == '\t'
} }
...@@ -125,8 +125,12 @@ func TestAddressParsingError(t *testing.T) { ...@@ -125,8 +125,12 @@ func TestAddressParsingError(t *testing.T) {
wantErrText string wantErrText string
}{ }{
0: {"=?iso-8859-2?Q?Bogl=E1rka_Tak=E1cs?= <unknown@gmail.com>", "charset not supported"}, 0: {"=?iso-8859-2?Q?Bogl=E1rka_Tak=E1cs?= <unknown@gmail.com>", "charset not supported"},
1: {"µ <micro@example.net>", "unencoded non-ASCII text in address"}, 1: {"a@gmail.com b@gmail.com", "expected single address"},
2: {"a@gmail.com b@gmail.com", "expected single address"}, 2: {string([]byte{0xed, 0xa0, 0x80}) + " <micro@example.net>", "invalid utf-8 in address"},
3: {"\"" + string([]byte{0xed, 0xa0, 0x80}) + "\" <half-surrogate@example.com>", "invalid utf-8 in quoted-string"},
4: {"\"\\" + string([]byte{0x80}) + "\" <escaped-invalid-unicode@example.net>", "invalid utf-8 in quoted-string"},
5: {"\"\x00\" <null@example.net>", "bad character in quoted-string"},
6: {"\"\\\x00\" <escaped-null@example.net>", "bad character in quoted-string"},
} }
for i, tc := range mustErrTestCases { for i, tc := range mustErrTestCases {
...@@ -266,6 +270,46 @@ func TestAddressParsing(t *testing.T) { ...@@ -266,6 +270,46 @@ func TestAddressParsing(t *testing.T) {
}, },
}, },
}, },
// RFC 6532 3.2.3, qtext /= UTF8-non-ascii
{
`"Gø Pher" <gopher@example.com>`,
[]*Address{
{
Name: `Gø Pher`,
Address: "gopher@example.com",
},
},
},
// RFC 6532 3.2, atext /= UTF8-non-ascii
{
`µ <micro@example.com>`,
[]*Address{
{
Name: `µ`,
Address: "micro@example.com",
},
},
},
// RFC 6532 3.2.2, local address parts allow UTF-8
{
`Micro <µ@example.com>`,
[]*Address{
{
Name: `Micro`,
Address: "µ@example.com",
},
},
},
// RFC 6532 3.2.4, domains parts allow UTF-8
{
`Micro <micro@µ.example.com>`,
[]*Address{
{
Name: `Micro`,
Address: "micro@µ.example.com",
},
},
},
} }
for _, test := range tests { for _, test := range tests {
if len(test.exp) == 1 { if len(test.exp) == 1 {
...@@ -517,6 +561,11 @@ func TestAddressString(t *testing.T) { ...@@ -517,6 +561,11 @@ func TestAddressString(t *testing.T) {
&Address{Name: "world?=", Address: "hello@world.com"}, &Address{Name: "world?=", Address: "hello@world.com"},
`"world?=" <hello@world.com>`, `"world?=" <hello@world.com>`,
}, },
{
// should q-encode even for invalid utf-8.
&Address{Name: string([]byte{0xed, 0xa0, 0x80}), Address: "invalid-utf8@example.net"},
"=?utf-8?q?=ED=A0=80?= <invalid-utf8@example.net>",
},
} }
for _, test := range tests { for _, test := range tests {
s := test.addr.String() s := test.addr.String()
...@@ -612,7 +661,6 @@ func TestAddressParsingAndFormatting(t *testing.T) { ...@@ -612,7 +661,6 @@ func TestAddressParsingAndFormatting(t *testing.T) {
`< @example.com>`, `< @example.com>`,
`<""test""blah""@example.com>`, `<""test""blah""@example.com>`,
`<""@0>`, `<""@0>`,
"<\"\t0\"@0>",
} }
for _, test := range badTests { for _, test := range badTests {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment