Commit 932c8ddb authored by Brad Fitzpatrick's avatar Brad Fitzpatrick

cmd/go: allow go get with arbitrary URLs

This CL permits using arbitrary, non-VCS-qualified URLs as
aliases for fully VCS-qualified and/or well-known code hosting
sites.

Example 1) A VCS-qualified URL can now be shorter.

Before:
$ go get camlistore.org/r/p/camlistore.git/pkg/blobref
After:
$ go get camlistore.org/pkg/blobref

Example 2) A custom domain can be used as the import,
referencing a well-known code hosting site.

Before:
$ go get github.com/bradfitz/sonden
After:
$ go get bradfitz.com/pkg/sonden

The mechanism used is a <meta> tag in the HTML document
retrieved from fetching:

    https://<import>?go-get=1  (preferred)
    http://<import>?go-get=1   (fallback)

The meta tag should look like:

<meta name="go-import" content="import-alias-prefix vcs full-repo-root">

The full-repo-root must be a full URL root to a repository containing
a scheme and *not* containing a ".vcs" qualifier.

The vcs is one of "git", "hg", "svn", etc.

The import-alias-prefix must be a prefix or exact match of the
package being fetched with "go get".

If there are multiple meta tags, only the one with a prefix
matching the import path is used. It is an error if multiple
go-import values match the import prefix.

If the import-alias-prefix is not an exact match for the import,
another HTTP fetch is performed, at the declared root (which does
*not* need to be the domain's root).

For example, assuming that "camlistore.org/pkg/blobref" declares
in its HTML head:

<meta name="go-import" content="camlistore.org git https://camlistore.org/r/p/camlistore" />

... then:

$ go get camlistore.org/pkg/blobref

... looks at the following URLs:

   https://camlistore.org/pkg/blobref?go-get=1
   http://camlistore.org/pkg/blobref?go-get=1
   https://camlistore.org/?go-get=1
   http://camlistore.org/?go-get=1

Ultimately it finds, at the root (camlistore.org/), the same go-import:

<meta name="go-import" content="camlistore.org git https://camlistore.org/r/p/camlistore" />

... and proceeds to trust it, checking out git //camlistore.org/r/p/camlistore at
the import path of "camlistore.org" on disk.

Fixes #3099

R=r, rsc, gary.burd, eikeon, untheoretic, n13m3y3r, rsc
CC=golang-dev
https://golang.org/cl/5660051
parent 36708a40
......@@ -10,8 +10,21 @@
package main
import "errors"
import (
"errors"
"io"
)
var errHTTP = errors.New("no http in bootstrap go command")
func httpGET(url string) ([]byte, error) {
return nil, errors.New("no http in bootstrap go command")
return nil, errHTTP
}
func httpsOrHTTP(importPath string) (string, io.ReadCloser, error) {
return "", nil, errHTTP
}
func parseMetaGoImports(r io.Reader) (imports []metaImport) {
panic("unreachable")
}
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !cmd_go_bootstrap
// This code is compiled into the real 'go' binary, but it is not
// compiled into the binary that is built during all.bash, so as
// to avoid needing to build net (and thus use cgo) during the
// bootstrap process.
package main
import (
"encoding/xml"
"io"
"strings"
)
// parseMetaGoImports returns meta imports from the HTML in r.
// Parsing ends at the end of the <head> section or the beginning of the <body>.
func parseMetaGoImports(r io.Reader) (imports []metaImport) {
d := xml.NewDecoder(r)
d.Strict = false
for {
t, err := d.Token()
if err != nil {
return
}
if e, ok := t.(xml.StartElement); ok && strings.EqualFold(e.Name.Local, "body") {
return
}
if e, ok := t.(xml.EndElement); ok && strings.EqualFold(e.Name.Local, "head") {
return
}
e, ok := t.(xml.StartElement)
if !ok || !strings.EqualFold(e.Name.Local, "meta") {
continue
}
if attrValue(e.Attr, "name") != "go-import" {
continue
}
if f := strings.Fields(attrValue(e.Attr, "content")); len(f) == 3 {
imports = append(imports, metaImport{
Prefix: f[0],
VCS: f[1],
RepoRoot: f[2],
})
}
}
return
}
// attrValue returns the attribute value for the case-insensitive key
// `name', or the empty string if nothing is found.
func attrValue(attrs []xml.Attr, name string) string {
for _, a := range attrs {
if strings.EqualFold(a.Name.Local, name) {
return a.Value
}
}
return ""
}
......@@ -162,11 +162,12 @@ func downloadPackage(p *Package) error {
} else {
// Analyze the import path to determine the version control system,
// repository, and the import path for the root of the repository.
vcs, repo, rootPath, err = vcsForImportPath(p.ImportPath)
}
rr, err := repoRootForImportPath(p.ImportPath)
if err != nil {
return err
}
vcs, repo, rootPath = rr.vcs, rr.repo, rr.root
}
if p.build.SrcRoot == "" {
// Package not found. Put in first directory of $GOPATH or else $GOROOT.
......
......@@ -13,8 +13,11 @@ package main
import (
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"net/url"
)
// httpGET returns the data from an HTTP GET request for the given URL.
......@@ -33,3 +36,51 @@ func httpGET(url string) ([]byte, error) {
}
return b, nil
}
// httpClient is the default HTTP client, but a variable so it can be
// changed by tests, without modifying http.DefaultClient.
var httpClient = http.DefaultClient
// httpsOrHTTP returns the body of either the importPath's
// https resource or, if unavailable, the http resource.
func httpsOrHTTP(importPath string) (urlStr string, body io.ReadCloser, err error) {
fetch := func(scheme string) (urlStr string, res *http.Response, err error) {
u, err := url.Parse(scheme + "://" + importPath)
if err != nil {
return "", nil, err
}
u.RawQuery = "go-get=1"
urlStr = u.String()
if buildV {
log.Printf("Fetching %s", urlStr)
}
res, err = httpClient.Get(urlStr)
return
}
closeBody := func(res *http.Response) {
if res != nil {
res.Body.Close()
}
}
urlStr, res, err := fetch("https")
if err != nil || res.StatusCode != 200 {
if buildV {
if err != nil {
log.Printf("https fetch failed.")
} else {
log.Printf("ignoring https fetch with status code %d", res.StatusCode)
}
}
closeBody(res)
urlStr, res, err = fetch("http")
}
if err != nil {
closeBody(res)
log.Printf("http fetch failed")
return "", nil, err
}
// Note: accepting a non-200 OK here, so people can serve a
// meta import in their http 404 page.
log.Printf("Parsing meta tags from %s (status code %d)", urlStr, res.StatusCode)
return urlStr, res.Body, nil
}
......@@ -7,7 +7,9 @@ package main
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"log"
"os"
"os/exec"
"path/filepath"
......@@ -302,12 +304,41 @@ func vcsForDir(p *Package) (vcs *vcsCmd, root string, err error) {
return nil, "", fmt.Errorf("directory %q is not using a known version control system", dir)
}
// vcsForImportPath analyzes importPath to determine the
// repoRoot represents a version control system, a repo, and a root of
// where to put it on disk.
type repoRoot struct {
vcs *vcsCmd
// repo is the repository URL, including scheme
repo string
// root is the import path corresponding to the root of the
// repository
root string
}
// repoRootForImportPath analyzes importPath to determine the
// version control system, and code repository to use.
// On return, repo is the repository URL and root is the
// import path corresponding to the root of the repository
// (thus root is a prefix of importPath).
func vcsForImportPath(importPath string) (vcs *vcsCmd, repo, root string, err error) {
func repoRootForImportPath(importPath string) (*repoRoot, error) {
rr, err := repoRootForImportPathStatic(importPath, "")
if err == errUnknownSite {
rr, err = repoRootForImportDynamic(importPath)
}
return rr, err
}
var errUnknownSite = errors.New("dynamic lookup required to find mapping")
// repoRootForImportPathStatic attempts to map importPath to a
// repoRoot using the commonly-used VCS hosting sites in vcsPaths
// (github.com/user/dir), or from a fully-qualified importPath already
// containing its VCS type (foo.com/repo.git/dir)
//
// If scheme is non-empty, that scheme is forced.
func repoRootForImportPathStatic(importPath, scheme string) (*repoRoot, error) {
if strings.Contains(importPath, "://") {
return nil, fmt.Errorf("invalid import path %q", importPath)
}
for _, srv := range vcsPaths {
if !strings.HasPrefix(importPath, srv.prefix) {
continue
......@@ -315,7 +346,7 @@ func vcsForImportPath(importPath string) (vcs *vcsCmd, repo, root string, err er
m := srv.regexp.FindStringSubmatch(importPath)
if m == nil {
if srv.prefix != "" {
return nil, "", "", fmt.Errorf("invalid %s import path %q", srv.prefix, importPath)
return nil, fmt.Errorf("invalid %s import path %q", srv.prefix, importPath)
}
continue
}
......@@ -338,14 +369,17 @@ func vcsForImportPath(importPath string) (vcs *vcsCmd, repo, root string, err er
}
if srv.check != nil {
if err := srv.check(match); err != nil {
return nil, "", "", err
return nil, err
}
}
vcs := vcsByCmd(match["vcs"])
if vcs == nil {
return nil, "", "", fmt.Errorf("unknown version control system %q", match["vcs"])
return nil, fmt.Errorf("unknown version control system %q", match["vcs"])
}
if srv.ping {
if scheme != "" {
match["repo"] = scheme + "://" + match["repo"]
} else {
for _, scheme := range vcs.scheme {
if vcs.ping(scheme, match["repo"]) == nil {
match["repo"] = scheme + "://" + match["repo"]
......@@ -353,9 +387,109 @@ func vcsForImportPath(importPath string) (vcs *vcsCmd, repo, root string, err er
}
}
}
return vcs, match["repo"], match["root"], nil
}
return nil, "", "", fmt.Errorf("unrecognized import path %q", importPath)
rr := &repoRoot{
vcs: vcs,
repo: match["repo"],
root: match["root"],
}
return rr, nil
}
return nil, errUnknownSite
}
// repoRootForImportDynamic finds a *repoRoot for a custom domain that's not
// statically known by repoRootForImportPathStatic.
//
// This handles "vanity import paths" like "name.tld/pkg/foo".
func repoRootForImportDynamic(importPath string) (*repoRoot, error) {
slash := strings.Index(importPath, "/")
if slash < 0 {
return nil, fmt.Errorf("missing / in import %q", importPath)
}
urlStr, body, err := httpsOrHTTP(importPath)
if err != nil {
return nil, fmt.Errorf("http/https fetch for import %q: %v", importPath, err)
}
defer body.Close()
metaImport, err := matchGoImport(parseMetaGoImports(body), importPath)
if err != nil {
if err != errNoMatch {
return nil, fmt.Errorf("parse %s: %v", urlStr, err)
}
return nil, fmt.Errorf("parse %s: no go-import meta tags", urlStr)
}
if buildV {
log.Printf("get %q: found meta tag %#v at %s", importPath, metaImport, urlStr)
}
// If the import was "uni.edu/bob/project", which said the
// prefix was "uni.edu" and the RepoRoot was "evilroot.com",
// make sure we don't trust Bob and check out evilroot.com to
// "uni.edu" yet (possibly overwriting/preempting another
// non-evil student). Instead, first verify the root and see
// if it matches Bob's claim.
if metaImport.Prefix != importPath {
if buildV {
log.Printf("get %q: verifying non-authoritative meta tag", importPath)
}
urlStr0 := urlStr
urlStr, body, err = httpsOrHTTP(metaImport.Prefix)
if err != nil {
return nil, fmt.Errorf("fetch %s: %v", urlStr, err)
}
imports := parseMetaGoImports(body)
if len(imports) == 0 {
return nil, fmt.Errorf("fetch %s: no go-import meta tag", urlStr)
}
metaImport2, err := matchGoImport(imports, importPath)
if err != nil || metaImport != metaImport2 {
return nil, fmt.Errorf("%s and %s disagree about go-import for %s", urlStr0, urlStr, metaImport.Prefix)
}
}
if !strings.Contains(metaImport.RepoRoot, "://") {
return nil, fmt.Errorf("%s: invalid repo root %q; no scheme", urlStr, metaImport.RepoRoot)
}
rr := &repoRoot{
vcs: vcsByCmd(metaImport.VCS),
repo: metaImport.RepoRoot,
root: metaImport.Prefix,
}
if rr.vcs == nil {
return nil, fmt.Errorf("%s: unknown vcs %q", urlStr, metaImport.VCS)
}
return rr, nil
}
// metaImport represents the parsed <meta name="go-import"
// content="prefix vcs reporoot" /> tags from HTML files.
type metaImport struct {
Prefix, VCS, RepoRoot string
}
// errNoMatch is returned from matchGoImport when there's no applicable match.
var errNoMatch = errors.New("no import match")
// matchGoImport returns the metaImport from imports matching importPath.
// An error is returned if there are multiple matches.
// errNoMatch is returned if none match.
func matchGoImport(imports []metaImport, importPath string) (_ metaImport, err error) {
match := -1
for i, im := range imports {
if !strings.HasPrefix(importPath, im.Prefix) {
continue
}
if match != -1 {
err = fmt.Errorf("multiple meta tags match import path %q", importPath)
return
}
match = i
}
if match == -1 {
err = errNoMatch
return
}
return imports[match], nil
}
// expand rewrites s to replace {k} with match[k] for each key k in match.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment