Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
G
golang
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
go
golang
Commits
98176b77
Commit
98176b77
authored
Apr 18, 2011
by
Brad Fitzpatrick
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
mime: RFC 2231 continuation / non-ASCII support
Fixes #1119. R=rsc, r CC=golang-dev
https://golang.org/cl/4437052
parent
23fc9c84
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
203 additions
and
9 deletions
+203
-9
mediatype.go
src/pkg/mime/mediatype.go
+146
-7
mediatype_test.go
src/pkg/mime/mediatype_test.go
+57
-2
No files found.
src/pkg/mime/mediatype.go
View file @
98176b77
...
...
@@ -6,6 +6,8 @@ package mime
import
(
"bytes"
"fmt"
"os"
"strings"
"unicode"
)
...
...
@@ -46,11 +48,16 @@ func ParseMediaType(v string) (mediatype string, params map[string]string) {
params
=
make
(
map
[
string
]
string
)
// Map of base parameter name -> parameter name -> value
// for parameters containing a '*' character.
// Lazily initialized.
var
continuation
map
[
string
]
map
[
string
]
string
v
=
v
[
i
:
]
for
len
(
v
)
>
0
{
v
=
strings
.
TrimLeftFunc
(
v
,
unicode
.
IsSpace
)
if
len
(
v
)
==
0
{
return
break
}
key
,
value
,
rest
:=
consumeMediaParam
(
v
)
if
key
==
""
{
...
...
@@ -62,12 +69,83 @@ func ParseMediaType(v string) (mediatype string, params map[string]string) {
// Parse error.
return
""
,
nil
}
params
[
key
]
=
value
pmap
:=
params
if
idx
:=
strings
.
Index
(
key
,
"*"
);
idx
!=
-
1
{
baseName
:=
key
[
:
idx
]
if
continuation
==
nil
{
continuation
=
make
(
map
[
string
]
map
[
string
]
string
)
}
var
ok
bool
if
pmap
,
ok
=
continuation
[
baseName
];
!
ok
{
continuation
[
baseName
]
=
make
(
map
[
string
]
string
)
pmap
=
continuation
[
baseName
]
}
}
if
_
,
exists
:=
pmap
[
key
];
exists
{
// Duplicate parameter name is bogus.
return
""
,
nil
}
pmap
[
key
]
=
value
v
=
rest
}
// Stitch together any continuations or things with stars
// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
var
buf
bytes
.
Buffer
for
key
,
pieceMap
:=
range
continuation
{
singlePartKey
:=
key
+
"*"
if
v
,
ok
:=
pieceMap
[
singlePartKey
];
ok
{
decv
:=
decode2231Enc
(
v
)
params
[
key
]
=
decv
continue
}
buf
.
Reset
()
valid
:=
false
for
n
:=
0
;
;
n
++
{
simplePart
:=
fmt
.
Sprintf
(
"%s*%d"
,
key
,
n
)
if
v
,
ok
:=
pieceMap
[
simplePart
];
ok
{
valid
=
true
buf
.
WriteString
(
v
)
continue
}
encodedPart
:=
simplePart
+
"*"
if
v
,
ok
:=
pieceMap
[
encodedPart
];
ok
{
valid
=
true
if
n
==
0
{
buf
.
WriteString
(
decode2231Enc
(
v
))
}
else
{
decv
,
_
:=
percentHexUnescape
(
v
)
buf
.
WriteString
(
decv
)
}
}
else
{
break
}
}
if
valid
{
params
[
key
]
=
buf
.
String
()
}
}
return
}
func
decode2231Enc
(
v
string
)
string
{
sv
:=
strings
.
Split
(
v
,
"'"
,
3
)
if
len
(
sv
)
!=
3
{
return
""
}
// Ignoring lang in sv[1] for now.
charset
:=
strings
.
ToLower
(
sv
[
0
])
if
charset
!=
"us-ascii"
&&
charset
!=
"utf-8"
{
// TODO: unsupported encoding
return
""
}
encv
,
_
:=
percentHexUnescape
(
sv
[
2
])
return
encv
}
func
isNotTokenChar
(
rune
int
)
bool
{
return
!
IsTokenChar
(
rune
)
}
...
...
@@ -107,17 +185,14 @@ func consumeValue(v string) (value, rest string) {
for
idx
,
rune
=
range
rest
{
switch
{
case
nextIsLiteral
:
if
rune
>=
0x80
{
return
""
,
v
}
buffer
.
WriteRune
(
rune
)
nextIsLiteral
=
false
case
rune
==
leadQuote
:
return
buffer
.
String
(),
rest
[
idx
+
1
:
]
case
IsQText
(
rune
)
:
buffer
.
WriteRune
(
rune
)
case
rune
==
'\\'
:
nextIsLiteral
=
true
case
rune
!=
'\r'
&&
rune
!=
'\n'
:
buffer
.
WriteRune
(
rune
)
default
:
return
""
,
v
}
...
...
@@ -137,6 +212,7 @@ func consumeMediaParam(v string) (param, value, rest string) {
if
param
==
""
{
return
""
,
""
,
v
}
rest
=
strings
.
TrimLeftFunc
(
rest
,
unicode
.
IsSpace
)
if
!
strings
.
HasPrefix
(
rest
,
"="
)
{
return
""
,
""
,
v
...
...
@@ -149,3 +225,66 @@ func consumeMediaParam(v string) (param, value, rest string) {
}
return
param
,
value
,
rest
}
func
percentHexUnescape
(
s
string
)
(
string
,
os
.
Error
)
{
// Count %, check that they're well-formed.
percents
:=
0
for
i
:=
0
;
i
<
len
(
s
);
{
if
s
[
i
]
!=
'%'
{
i
++
continue
}
percents
++
if
i
+
2
>=
len
(
s
)
||
!
ishex
(
s
[
i
+
1
])
||
!
ishex
(
s
[
i
+
2
])
{
s
=
s
[
i
:
]
if
len
(
s
)
>
3
{
s
=
s
[
0
:
3
]
}
return
""
,
fmt
.
Errorf
(
"Bogus characters after %: %q"
,
s
)
}
i
+=
3
}
if
percents
==
0
{
return
s
,
nil
}
t
:=
make
([]
byte
,
len
(
s
)
-
2
*
percents
)
j
:=
0
for
i
:=
0
;
i
<
len
(
s
);
{
switch
s
[
i
]
{
case
'%'
:
t
[
j
]
=
unhex
(
s
[
i
+
1
])
<<
4
|
unhex
(
s
[
i
+
2
])
j
++
i
+=
3
default
:
t
[
j
]
=
s
[
i
]
j
++
i
++
}
}
return
string
(
t
),
nil
}
func
ishex
(
c
byte
)
bool
{
switch
{
case
'0'
<=
c
&&
c
<=
'9'
:
return
true
case
'a'
<=
c
&&
c
<=
'f'
:
return
true
case
'A'
<=
c
&&
c
<=
'F'
:
return
true
}
return
false
}
func
unhex
(
c
byte
)
byte
{
switch
{
case
'0'
<=
c
&&
c
<=
'9'
:
return
c
-
'0'
case
'a'
<=
c
&&
c
<=
'f'
:
return
c
-
'a'
+
10
case
'A'
<=
c
&&
c
<=
'F'
:
return
c
-
'A'
+
10
}
return
0
}
src/pkg/mime/mediatype_test.go
View file @
98176b77
...
...
@@ -114,6 +114,28 @@ func TestParseMediaType(t *testing.T) {
"form-data"
,
m
(
"key"
,
"value"
,
"blah"
,
"value"
,
"name"
,
"foo"
)},
{
`foo; key=val1; key=the-key-appears-again-which-is-bogus`
,
""
,
m
()},
// From RFC 2231:
{
`application/x-stuff; title*=us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A`
,
"application/x-stuff"
,
m
(
"title"
,
"This is ***fun***"
)},
{
`message/external-body; access-type=URL; `
+
`URL*0="ftp://";`
+
`URL*1="cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"`
,
"message/external-body"
,
m
(
"access-type"
,
"URL"
,
"URL"
,
"ftp://cs.utk.edu/pub/moore/bulk-mailer/bulk-mailer.tar"
)},
{
`application/x-stuff; `
+
`title*0*=us-ascii'en'This%20is%20even%20more%20; `
+
`title*1*=%2A%2A%2Afun%2A%2A%2A%20; `
+
`title*2="isn't it!"`
,
"application/x-stuff"
,
m
(
"title"
,
"This is even more ***fun*** isn't it!"
)},
// Tests from http://greenbytes.de/tech/tc2231/
// TODO(bradfitz): add the rest of the tests from that site.
{
`attachment; filename="f\oo.html"`
,
...
...
@@ -159,8 +181,41 @@ func TestParseMediaType(t *testing.T) {
"attachment"
,
m
(
"creation-date"
,
"Wed, 12 Feb 1997 16:29:51 -0500"
)},
{
`foobar`
,
"foobar"
,
m
()},
// TODO(bradfitz): rest of them, including RFC2231 encoded UTF-8 and
// other charsets.
{
`attachment; filename* =UTF-8''foo-%c3%a4.html`
,
"attachment"
,
m
(
"filename"
,
"foo-ä.html"
)},
{
`attachment; filename*=UTF-8''A-%2541.html`
,
"attachment"
,
m
(
"filename"
,
"A-%41.html"
)},
{
`attachment; filename*0="foo."; filename*1="html"`
,
"attachment"
,
m
(
"filename"
,
"foo.html"
)},
{
`attachment; filename*0*=UTF-8''foo-%c3%a4; filename*1=".html"`
,
"attachment"
,
m
(
"filename"
,
"foo-ä.html"
)},
{
`attachment; filename*0="foo"; filename*01="bar"`
,
"attachment"
,
m
(
"filename"
,
"foo"
)},
{
`attachment; filename*0="foo"; filename*2="bar"`
,
"attachment"
,
m
(
"filename"
,
"foo"
)},
{
`attachment; filename*1="foo"; filename*2="bar"`
,
"attachment"
,
m
()},
{
`attachment; filename*1="bar"; filename*0="foo"`
,
"attachment"
,
m
(
"filename"
,
"foobar"
)},
{
`attachment; filename="foo-ae.html"; filename*=UTF-8''foo-%c3%a4.html`
,
"attachment"
,
m
(
"filename"
,
"foo-ä.html"
)},
{
`attachment; filename*=UTF-8''foo-%c3%a4.html; filename="foo-ae.html"`
,
"attachment"
,
m
(
"filename"
,
"foo-ä.html"
)},
// Browsers also just send UTF-8 directly without RFC 2231,
// at least when the source page is served with UTF-8.
{
`form-data; firstname="Брэд"; lastname="Фицпатрик"`
,
"form-data"
,
m
(
"firstname"
,
"Брэд"
,
"lastname"
,
"Фицпатрик"
)},
}
for
_
,
test
:=
range
tests
{
mt
,
params
:=
ParseMediaType
(
test
.
in
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment