Commit 874a605a authored by Russ Cox's avatar Russ Cox

net/url: add RawPath field, a hint at the desired encoding of Path

Historically we have declined to try to provide real support for URLs
that contain %2F in the path, but they seem to be popping up more
often, especially in (arguably ill-considered) REST APIs that shoehorn
entire paths into individual path elements.

The obvious thing to do is to introduce a URL.RawPath field that
records the original encoding of Path and then consult it during
URL.String and URL.RequestURI. The problem with the obvious thing
is that it breaks backward compatibility: if someone parses a URL
into u, modifies u.Path, and calls u.String, they expect the result
to use the modified u.Path and not the original raw encoding.

Split the difference by treating u.RawPath as a hint: the observation
is that there are many valid encodings of u.Path. If u.RawPath is one
of them, use it. Otherwise compute the encoding of u.Path as before.

If a client does not use RawPath, the only change will be that String
selects a different valid encoding sometimes (the original passed
to Parse).

This ensures that, for example, HTTP requests use the exact
encoding passed to http.Get (or http.NewRequest, etc).

Also add new URL.EscapedPath method for access to the actual
escaped path. Clients should use EscapedPath instead of
reading RawPath directly.

All the old workarounds remain valid.

Fixes #5777.
Might help #9859.
Fixes #7356.
Fixes #8767.
Fixes #8292.
Fixes #8450.
Fixes #4860.
Fixes #10887.
Fixes #3659.
Fixes #8248.
Fixes #6658.
Reduces need for #2782.

Change-Id: I77b88f14631883a7d74b72d1cf19b0073d4f5473
Reviewed-on: https://go-review.googlesource.com/11302Reviewed-by: 's avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 794c01b8
...@@ -43,6 +43,21 @@ func ExampleURL() { ...@@ -43,6 +43,21 @@ func ExampleURL() {
// Output: https://google.com/search?q=golang // Output: https://google.com/search?q=golang
} }
func ExampleURL_roundtrip() {
// Parse + String preserve the original encoding.
u, err := url.Parse("https://example.com/foo%2fbar")
if err != nil {
log.Fatal(err)
}
fmt.Println(u.Path)
fmt.Println(u.RawPath)
fmt.Println(u.String())
// Output:
// /foo/bar
// /foo%2fbar
// https://example.com/foo%2fbar
}
func ExampleURL_opaque() { func ExampleURL_opaque() {
// Sending a literal '%' in an HTTP request's Path // Sending a literal '%' in an HTTP request's Path
req := &http.Request{ req := &http.Request{
......
...@@ -239,16 +239,24 @@ func escape(s string, mode encoding) string { ...@@ -239,16 +239,24 @@ func escape(s string, mode encoding) string {
// Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/. // Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.
// A consequence is that it is impossible to tell which slashes in the Path were // A consequence is that it is impossible to tell which slashes in the Path were
// slashes in the raw URL and which were %2f. This distinction is rarely important, // slashes in the raw URL and which were %2f. This distinction is rarely important,
// but when it is a client must use other routines to parse the raw URL or construct // but when it is, code must not use Path directly.
// the parsed URL. For example, an HTTP server can consult req.RequestURI, and //
// an HTTP client can use URL{Host: "example.com", Opaque: "//example.com/Go%2f"} // Go 1.5 introduced the RawPath field to hold the encoded form of Path.
// instead of URL{Host: "example.com", Path: "/Go/"}. // The Parse function sets both Path and RawPath in the URL it returns,
// and URL's String method uses RawPath if it is a valid encoding of Path,
// by calling the EncodedPath method.
//
// In earlier versions of Go, the more indirect workarounds were that an
// HTTP server could consult req.RequestURI and an HTTP client could
// construct a URL struct directly and set the Opaque field instead of Path.
// These still work as well.
type URL struct { type URL struct {
Scheme string Scheme string
Opaque string // encoded opaque data Opaque string // encoded opaque data
User *Userinfo // username and password information User *Userinfo // username and password information
Host string // host or host:port Host string // host or host:port
Path string Path string
RawPath string // encoded path hint (Go 1.5 and later only; see EscapedPath method)
RawQuery string // encoded query values, without '?' RawQuery string // encoded query values, without '?'
Fragment string // fragment for references, without '#' Fragment string // fragment for references, without '#'
} }
...@@ -417,6 +425,7 @@ func parse(rawurl string, viaRequest bool) (url *URL, err error) { ...@@ -417,6 +425,7 @@ func parse(rawurl string, viaRequest bool) (url *URL, err error) {
goto Error goto Error
} }
} }
url.RawPath = rest
if url.Path, err = unescape(rest, encodePath); err != nil { if url.Path, err = unescape(rest, encodePath); err != nil {
goto Error goto Error
} }
...@@ -501,6 +510,36 @@ func parseHost(host string) (string, error) { ...@@ -501,6 +510,36 @@ func parseHost(host string) (string, error) {
return host, nil return host, nil
} }
// EscapedPath returns the escaped form of u.Path.
// In general there are multiple possible escaped forms of any path.
// EscapedPath returns u.RawPath when it is a valid escaping of u.Path.
// Otherwise EscapedPath ignores u.RawPath and computes an escaped
// form on its own.
// The String and RequestURI methods use EscapedPath to construct
// their results.
// In general, code should call EscapedPath instead of
// reading u.RawPath directly.
func (u *URL) EscapedPath() string {
if u.RawPath != "" && validEncodedPath(u.RawPath) {
p, err := unescape(u.RawPath, encodePath)
if err == nil && p == u.Path {
return u.RawPath
}
}
return escape(u.Path, encodePath)
}
// validEncodedPath reports whether s is a valid encoded path.
// It must contain any bytes that require escaping during path encoding.
func validEncodedPath(s string) bool {
for i := 0; i < len(s); i++ {
if s[i] != '%' && shouldEscape(s[i], encodePath) {
return false
}
}
return true
}
// String reassembles the URL into a valid URL string. // String reassembles the URL into a valid URL string.
// The general form of the result is one of: // The general form of the result is one of:
// //
...@@ -509,6 +548,7 @@ func parseHost(host string) (string, error) { ...@@ -509,6 +548,7 @@ func parseHost(host string) (string, error) {
// //
// If u.Opaque is non-empty, String uses the first form; // If u.Opaque is non-empty, String uses the first form;
// otherwise it uses the second form. // otherwise it uses the second form.
// To obtain the path, String uses u.EncodedPath().
// //
// In the second form, the following rules apply: // In the second form, the following rules apply:
// - if u.Scheme is empty, scheme: is omitted. // - if u.Scheme is empty, scheme: is omitted.
...@@ -539,10 +579,11 @@ func (u *URL) String() string { ...@@ -539,10 +579,11 @@ func (u *URL) String() string {
buf.WriteString(escape(h, encodeHost)) buf.WriteString(escape(h, encodeHost))
} }
} }
if u.Path != "" && u.Path[0] != '/' && u.Host != "" { path := u.EscapedPath()
if path != "" && path[0] != '/' && u.Host != "" {
buf.WriteByte('/') buf.WriteByte('/')
} }
buf.WriteString(escape(u.Path, encodePath)) buf.WriteString(path)
} }
if u.RawQuery != "" { if u.RawQuery != "" {
buf.WriteByte('?') buf.WriteByte('?')
...@@ -764,7 +805,7 @@ func (u *URL) Query() Values { ...@@ -764,7 +805,7 @@ func (u *URL) Query() Values {
func (u *URL) RequestURI() string { func (u *URL) RequestURI() string {
result := u.Opaque result := u.Opaque
if result == "" { if result == "" {
result = escape(u.Path, encodePath) result = u.EscapedPath()
if result == "" { if result == "" {
result = "/" result = "/"
} }
......
...@@ -13,7 +13,7 @@ import ( ...@@ -13,7 +13,7 @@ import (
type URLTest struct { type URLTest struct {
in string in string
out *URL out *URL // expected parse; RawPath="" means same as Path
roundtrip string // expected result of reserializing the URL; empty means same as "in". roundtrip string // expected result of reserializing the URL; empty means same as "in".
} }
...@@ -41,11 +41,12 @@ var urltests = []URLTest{ ...@@ -41,11 +41,12 @@ var urltests = []URLTest{
{ {
"http://www.google.com/file%20one%26two", "http://www.google.com/file%20one%26two",
&URL{ &URL{
Scheme: "http", Scheme: "http",
Host: "www.google.com", Host: "www.google.com",
Path: "/file one&two", Path: "/file one&two",
RawPath: "/file%20one%26two",
}, },
"http://www.google.com/file%20one&two", "",
}, },
// user // user
{ {
...@@ -98,6 +99,7 @@ var urltests = []URLTest{ ...@@ -98,6 +99,7 @@ var urltests = []URLTest{
Scheme: "http", Scheme: "http",
Host: "www.google.com", Host: "www.google.com",
Path: "/a b", Path: "/a b",
RawPath: "/a%20b",
RawQuery: "q=c+d", RawQuery: "q=c+d",
}, },
"", "",
...@@ -369,6 +371,18 @@ var urltests = []URLTest{ ...@@ -369,6 +371,18 @@ var urltests = []URLTest{
}, },
"http://[fe80::1%25en01-._~]:8080/", "http://[fe80::1%25en01-._~]:8080/",
}, },
// alternate escapings of path survive round trip
{
"http://rest.rsc.io/foo%2fbar/baz%2Fquux?alt=media",
&URL{
Scheme: "http",
Host: "rest.rsc.io",
Path: "/foo/bar/baz/quux",
RawPath: "/foo%2fbar/baz%2Fquux",
RawQuery: "alt=media",
},
"",
},
} }
// more useful string for debugging than fmt's struct printer // more useful string for debugging than fmt's struct printer
...@@ -391,6 +405,9 @@ func DoTest(t *testing.T, parse func(string) (*URL, error), name string, tests [ ...@@ -391,6 +405,9 @@ func DoTest(t *testing.T, parse func(string) (*URL, error), name string, tests [
t.Errorf("%s(%q) returned error %s", name, tt.in, err) t.Errorf("%s(%q) returned error %s", name, tt.in, err)
continue continue
} }
if tt.out.RawPath == "" {
tt.out.RawPath = tt.out.Path
}
if !reflect.DeepEqual(u, tt.out) { if !reflect.DeepEqual(u, tt.out) {
t.Errorf("%s(%q):\n\thave %v\n\twant %v\n", t.Errorf("%s(%q):\n\thave %v\n\twant %v\n",
name, tt.in, ufmt(u), ufmt(tt.out)) name, tt.in, ufmt(u), ufmt(tt.out))
...@@ -973,6 +990,25 @@ var requritests = []RequestURITest{ ...@@ -973,6 +990,25 @@ var requritests = []RequestURITest{
}, },
"http://other.example.com/%2F/%2F/", "http://other.example.com/%2F/%2F/",
}, },
// better fix for issue 4860
{
&URL{
Scheme: "http",
Host: "example.com",
Path: "/////",
RawPath: "/%2F/%2F/",
},
"/%2F/%2F/",
},
{
&URL{
Scheme: "http",
Host: "example.com",
Path: "/////",
RawPath: "/WRONG/", // ignored because doesn't match Path
},
"/////",
},
{ {
&URL{ &URL{
Scheme: "http", Scheme: "http",
...@@ -982,6 +1018,26 @@ var requritests = []RequestURITest{ ...@@ -982,6 +1018,26 @@ var requritests = []RequestURITest{
}, },
"/a%20b?q=go+language", "/a%20b?q=go+language",
}, },
{
&URL{
Scheme: "http",
Host: "example.com",
Path: "/a b",
RawPath: "/a b", // ignored because invalid
RawQuery: "q=go+language",
},
"/a%20b?q=go+language",
},
{
&URL{
Scheme: "http",
Host: "example.com",
Path: "/a?b",
RawPath: "/a?b", // ignored because invalid
RawQuery: "q=go+language",
},
"/a%3Fb?q=go+language",
},
{ {
&URL{ &URL{
Scheme: "myschema", Scheme: "myschema",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment