vendor: add golang.org/x/crypto/{chacha20poly1305,poly1305}

This change imports the chacha20poly1305 and poly1305 packages from x/crypto at 5f4e837b98443e9e7a65072235205993af565d85. These packages will be used to support the ChaCha20-Poly1305 AEAD in crypto/tls. Change-Id: I1a38d671ef9aeff3bc41e3924655883d465a5617 Reviewed-on: https://go-review.googlesource.com/30956Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

vendor: add golang.org/x/crypto/{chacha20poly1305,poly1305}
This change imports the chacha20poly1305 and poly1305 packages from x/crypto at 5f4e837b98443e9e7a65072235205993af565d85. These packages will be used to support the ChaCha20-Poly1305 AEAD in crypto/tls. Change-Id: I1a38d671ef9aeff3bc41e3924655883d465a5617 Reviewed-on: https://go-review.googlesource.com/30956Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
77b6a08e · Adam Langley · 9a97c3bf · 77b6a08e · 77b6a08e · 77b6a08e
Commit 77b6a08e authored Oct 12, 2016 by Adam Langley
16 changed files
--- a/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305.go
+++ b/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305.go
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package chacha20poly1305 implements the ChaCha20-Poly1305 AEAD as specified in RFC 7539.
+package chacha20poly1305
+
+import (
+	"crypto/cipher"
+	"errors"
+)
+
+const (
+	// KeySize is the size of the key used by this AEAD, in bytes.
+	KeySize = 32
+	// NonceSize is the size of the nonce used with this AEAD, in bytes.
+	NonceSize = 12
+)
+
+type chacha20poly1305 struct {
+	key [32]byte
+}
+
+// New returns a ChaCha20-Poly1305 AEAD that uses the given, 256-bit key.
+func New(key []byte) (cipher.AEAD, error) {
+	if len(key) != KeySize {
+		return nil, errors.New("chacha20poly1305: bad key length")
+	}
+	ret := new(chacha20poly1305)
+	copy(ret.key[:], key)
+	return ret, nil
+}
+
+func (c *chacha20poly1305) NonceSize() int {
+	return NonceSize
+}
+
+func (c *chacha20poly1305) Overhead() int {
+	return 16
+}
+
+func (c *chacha20poly1305) Seal(dst, nonce, plaintext, additionalData []byte) []byte {
+	if len(nonce) != NonceSize {
+		panic("chacha20poly1305: bad nonce length passed to Seal")
+	}
+
+	if uint64(len(plaintext)) > (1<<38)-64 {
+		panic("chacha20poly1305: plaintext too large")
+	}
+
+	return c.seal(dst, nonce, plaintext, additionalData)
+}
+
+var errOpen = errors.New("chacha20poly1305: message authentication failed")
+
+func (c *chacha20poly1305) Open(dst, nonce, ciphertext, additionalData []byte) ([]byte, error) {
+	if len(nonce) != NonceSize {
+		panic("chacha20poly1305: bad nonce length passed to Open")
+	}
+	if len(ciphertext) < 16 {
+		return nil, errOpen
+	}
+	if uint64(len(ciphertext)) > (1<<38)-48 {
+		panic("chacha20poly1305: ciphertext too large")
+	}
+
+	return c.open(dst, nonce, ciphertext, additionalData)
+}
+
+// sliceForAppend takes a slice and a requested number of bytes. It returns a
+// slice with the contents of the given slice followed by that many bytes and a
+// second slice that aliases into it and contains only the extra bytes. If the
+// original slice has sufficient capacity then no allocation is performed.
+func sliceForAppend(in []byte, n int) (head, tail []byte) {
+	if total := len(in) + n; cap(in) >= total {
+		head = in[:total]
+	} else {
+		head = make([]byte, total)
+		copy(head, in)
+	}
+	tail = head[len(in):]
+	return
+}
--- a/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.go
+++ b/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.go
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64,go1.7
+
+package chacha20poly1305
+
+import "encoding/binary"
+
+//go:noescape
+func chacha20Poly1305Open(dst []byte, key []uint32, src, ad []byte) bool
+
+//go:noescape
+func chacha20Poly1305Seal(dst []byte, key []uint32, src, ad []byte)
+
+//go:noescape
+func haveSSSE3() bool
+
+var canUseASM bool
+
+func init() {
+	canUseASM = haveSSSE3()
+}
+
+// setupState writes a ChaCha20 input matrix to state. See
+// https://tools.ietf.org/html/rfc7539#section-2.3.
+func setupState(state *[16]uint32, key *[32]byte, nonce []byte) {
+	state[0] = 0x61707865
+	state[1] = 0x3320646e
+	state[2] = 0x79622d32
+	state[3] = 0x6b206574
+
+	state[4] = binary.LittleEndian.Uint32(key[:4])
+	state[5] = binary.LittleEndian.Uint32(key[4:8])
+	state[6] = binary.LittleEndian.Uint32(key[8:12])
+	state[7] = binary.LittleEndian.Uint32(key[12:16])
+	state[8] = binary.LittleEndian.Uint32(key[16:20])
+	state[9] = binary.LittleEndian.Uint32(key[20:24])
+	state[10] = binary.LittleEndian.Uint32(key[24:28])
+	state[11] = binary.LittleEndian.Uint32(key[28:32])
+
+	state[12] = 0
+	state[13] = binary.LittleEndian.Uint32(nonce[:4])
+	state[14] = binary.LittleEndian.Uint32(nonce[4:8])
+	state[15] = binary.LittleEndian.Uint32(nonce[8:12])
+}
+
+func (c *chacha20poly1305) seal(dst, nonce, plaintext, additionalData []byte) []byte {
+	if !canUseASM {
+		return c.sealGeneric(dst, nonce, plaintext, additionalData)
+	}
+
+	var state [16]uint32
+	setupState(&state, &c.key, nonce)
+
+	ret, out := sliceForAppend(dst, len(plaintext)+16)
+	chacha20Poly1305Seal(out[:], state[:], plaintext, additionalData)
+	return ret
+}
+
+func (c *chacha20poly1305) open(dst, nonce, ciphertext, additionalData []byte) ([]byte, error) {
+	if !canUseASM {
+		return c.openGeneric(dst, nonce, ciphertext, additionalData)
+	}
+
+	var state [16]uint32
+	setupState(&state, &c.key, nonce)
+
+	ciphertext = ciphertext[:len(ciphertext)-16]
+	ret, out := sliceForAppend(dst, len(ciphertext))
+	if !chacha20Poly1305Open(out, state[:], ciphertext, additionalData) {
+		for i := range out {
+			out[i] = 0
+		}
+		return nil, errOpen
+	}
+
+	return ret, nil
+}
--- a/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s
+++ b/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s
--- a/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_generic.go
+++ b/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_generic.go
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package chacha20poly1305
+
+import (
+	"encoding/binary"
+
+	"golang_org/x/crypto/chacha20poly1305/internal/chacha20"
+	"golang_org/x/crypto/poly1305"
+)
+
+func roundTo16(n int) int {
+	return 16 * ((n + 15) / 16)
+}
+
+func (c *chacha20poly1305) sealGeneric(dst, nonce, plaintext, additionalData []byte) []byte {
+	var counter [16]byte
+	copy(counter[4:], nonce)
+
+	var polyKey [32]byte
+	chacha20.XORKeyStream(polyKey[:], polyKey[:], &counter, &c.key)
+
+	ret, out := sliceForAppend(dst, len(plaintext)+poly1305.TagSize)
+	counter[0] = 1
+	chacha20.XORKeyStream(out, plaintext, &counter, &c.key)
+
+	polyInput := make([]byte, roundTo16(len(additionalData))+roundTo16(len(plaintext))+8+8)
+	copy(polyInput, additionalData)
+	copy(polyInput[roundTo16(len(additionalData)):], out[:len(plaintext)])
+	binary.LittleEndian.PutUint64(polyInput[len(polyInput)-16:], uint64(len(additionalData)))
+	binary.LittleEndian.PutUint64(polyInput[len(polyInput)-8:], uint64(len(plaintext)))
+
+	var tag [poly1305.TagSize]byte
+	poly1305.Sum(&tag, polyInput, &polyKey)
+	copy(out[len(plaintext):], tag[:])
+
+	return ret
+}
+
+func (c *chacha20poly1305) openGeneric(dst, nonce, ciphertext, additionalData []byte) ([]byte, error) {
+	var tag [poly1305.TagSize]byte
+	copy(tag[:], ciphertext[len(ciphertext)-16:])
+	ciphertext = ciphertext[:len(ciphertext)-16]
+
+	var counter [16]byte
+	copy(counter[4:], nonce)
+
+	var polyKey [32]byte
+	chacha20.XORKeyStream(polyKey[:], polyKey[:], &counter, &c.key)
+
+	polyInput := make([]byte, roundTo16(len(additionalData))+roundTo16(len(ciphertext))+8+8)
+	copy(polyInput, additionalData)
+	copy(polyInput[roundTo16(len(additionalData)):], ciphertext)
+	binary.LittleEndian.PutUint64(polyInput[len(polyInput)-16:], uint64(len(additionalData)))
+	binary.LittleEndian.PutUint64(polyInput[len(polyInput)-8:], uint64(len(ciphertext)))
+
+	ret, out := sliceForAppend(dst, len(ciphertext))
+	if !poly1305.Verify(&tag, polyInput, &polyKey) {
+		for i := range out {
+			out[i] = 0
+		}
+		return nil, errOpen
+	}
+
+	counter[0] = 1
+	chacha20.XORKeyStream(out, ciphertext, &counter, &c.key)
+	return ret, nil
+}
--- a/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_noasm.go
+++ b/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_noasm.go
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64 !go1.7
+
+package chacha20poly1305
+
+func (c *chacha20poly1305) seal(dst, nonce, plaintext, additionalData []byte) []byte {
+	return c.sealGeneric(dst, nonce, plaintext, additionalData)
+}
+
+func (c *chacha20poly1305) open(dst, nonce, ciphertext, additionalData []byte) ([]byte, error) {
+	return c.openGeneric(dst, nonce, ciphertext, additionalData)
+}
--- a/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_test.go
+++ b/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_test.go
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package chacha20poly1305
+
+import (
+	"bytes"
+	cr "crypto/rand"
+	"encoding/hex"
+	mr "math/rand"
+	"testing"
+)
+
+func TestVectors(t *testing.T) {
+	for i, test := range chacha20Poly1305Tests {
+		key, _ := hex.DecodeString(test.key)
+		nonce, _ := hex.DecodeString(test.nonce)
+		ad, _ := hex.DecodeString(test.aad)
+		plaintext, _ := hex.DecodeString(test.plaintext)
+
+		aead, err := New(key)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		ct := aead.Seal(nil, nonce, plaintext, ad)
+		if ctHex := hex.EncodeToString(ct); ctHex != test.out {
+			t.Errorf("#%d: got %s, want %s", i, ctHex, test.out)
+			continue
+		}
+
+		plaintext2, err := aead.Open(nil, nonce, ct, ad)
+		if err != nil {
+			t.Errorf("#%d: Open failed", i)
+			continue
+		}
+
+		if !bytes.Equal(plaintext, plaintext2) {
+			t.Errorf("#%d: plaintext's don't match: got %x vs %x", i, plaintext2, plaintext)
+			continue
+		}
+
+		if len(ad) > 0 {
+			alterAdIdx := mr.Intn(len(ad))
+			ad[alterAdIdx] ^= 0x80
+			if _, err := aead.Open(nil, nonce, ct, ad); err == nil {
+				t.Errorf("#%d: Open was successful after altering additional data", i)
+			}
+			ad[alterAdIdx] ^= 0x80
+		}
+
+		alterNonceIdx := mr.Intn(aead.NonceSize())
+		nonce[alterNonceIdx] ^= 0x80
+		if _, err := aead.Open(nil, nonce, ct, ad); err == nil {
+			t.Errorf("#%d: Open was successful after altering nonce", i)
+		}
+		nonce[alterNonceIdx] ^= 0x80
+
+		alterCtIdx := mr.Intn(len(ct))
+		ct[alterCtIdx] ^= 0x80
+		if _, err := aead.Open(nil, nonce, ct, ad); err == nil {
+			t.Errorf("#%d: Open was successful after altering ciphertext", i)
+		}
+		ct[alterCtIdx] ^= 0x80
+	}
+}
+
+func TestRandom(t *testing.T) {
+	// Some random tests to verify Open(Seal) == Plaintext
+	for i := 0; i < 256; i++ {
+		var nonce [12]byte
+		var key [32]byte
+
+		al := mr.Intn(128)
+		pl := mr.Intn(16384)
+		ad := make([]byte, al)
+		plaintext := make([]byte, pl)
+		cr.Read(key[:])
+		cr.Read(nonce[:])
+		cr.Read(ad)
+		cr.Read(plaintext)
+
+		aead, err := New(key[:])
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		ct := aead.Seal(nil, nonce[:], plaintext, ad)
+
+		plaintext2, err := aead.Open(nil, nonce[:], ct, ad)
+		if err != nil {
+			t.Errorf("Random #%d: Open failed", i)
+			continue
+		}
+
+		if !bytes.Equal(plaintext, plaintext2) {
+			t.Errorf("Random #%d: plaintext's don't match: got %x vs %x", i, plaintext2, plaintext)
+			continue
+		}
+
+		if len(ad) > 0 {
+			alterAdIdx := mr.Intn(len(ad))
+			ad[alterAdIdx] ^= 0x80
+			if _, err := aead.Open(nil, nonce[:], ct, ad); err == nil {
+				t.Errorf("Random #%d: Open was successful after altering additional data", i)
+			}
+			ad[alterAdIdx] ^= 0x80
+		}
+
+		alterNonceIdx := mr.Intn(aead.NonceSize())
+		nonce[alterNonceIdx] ^= 0x80
+		if _, err := aead.Open(nil, nonce[:], ct, ad); err == nil {
+			t.Errorf("Random #%d: Open was successful after altering nonce", i)
+		}
+		nonce[alterNonceIdx] ^= 0x80
+
+		alterCtIdx := mr.Intn(len(ct))
+		ct[alterCtIdx] ^= 0x80
+		if _, err := aead.Open(nil, nonce[:], ct, ad); err == nil {
+			t.Errorf("Random #%d: Open was successful after altering ciphertext", i)
+		}
+		ct[alterCtIdx] ^= 0x80
+	}
+}
+
+func benchamarkChaCha20Poly1305Seal(b *testing.B, buf []byte) {
+	b.SetBytes(int64(len(buf)))
+
+	var key [32]byte
+	var nonce [12]byte
+	var ad [13]byte
+	var out []byte
+
+	aead, _ := New(key[:])
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		out = aead.Seal(out[:0], nonce[:], buf[:], ad[:])
+	}
+}
+
+func benchamarkChaCha20Poly1305Open(b *testing.B, buf []byte) {
+	b.SetBytes(int64(len(buf)))
+
+	var key [32]byte
+	var nonce [12]byte
+	var ad [13]byte
+	var ct []byte
+	var out []byte
+
+	aead, _ := New(key[:])
+	ct = aead.Seal(ct[:0], nonce[:], buf[:], ad[:])
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		out, _ = aead.Open(out[:0], nonce[:], ct[:], ad[:])
+	}
+}
+
+func BenchmarkChacha20Poly1305Open_64(b *testing.B) {
+	benchamarkChaCha20Poly1305Open(b, make([]byte, 64))
+}
+
+func BenchmarkChacha20Poly1305Seal_64(b *testing.B) {
+	benchamarkChaCha20Poly1305Seal(b, make([]byte, 64))
+}
+
+func BenchmarkChacha20Poly1305Open_1350(b *testing.B) {
+	benchamarkChaCha20Poly1305Open(b, make([]byte, 1350))
+}
+
+func BenchmarkChacha20Poly1305Seal_1350(b *testing.B) {
+	benchamarkChaCha20Poly1305Seal(b, make([]byte, 1350))
+}
+
+func BenchmarkChacha20Poly1305Open_8K(b *testing.B) {
+	benchamarkChaCha20Poly1305Open(b, make([]byte, 8*1024))
+}
+
+func BenchmarkChacha20Poly1305Seal_8K(b *testing.B) {
+	benchamarkChaCha20Poly1305Seal(b, make([]byte, 8*1024))
+}
--- a/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_test_vectors.go
+++ b/src/vendor/golang_org/x/crypto/chacha20poly1305/chacha20poly1305_test_vectors.go
--- a/src/vendor/golang_org/x/crypto/chacha20poly1305/internal/chacha20/chacha_generic.go
+++ b/src/vendor/golang_org/x/crypto/chacha20poly1305/internal/chacha20/chacha_generic.go
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package ChaCha20 implements the core ChaCha20 function as specified in https://tools.ietf.org/html/rfc7539#section-2.3.
+package chacha20
+
+import "encoding/binary"
+
+const rounds = 20
+
+// core applies the ChaCha20 core function to 16-byte input in, 32-byte key k,
+// and 16-byte constant c, and puts the result into 64-byte array out.
+func core(out *[64]byte, in *[16]byte, k *[32]byte) {
+	j0 := uint32(0x61707865)
+	j1 := uint32(0x3320646e)
+	j2 := uint32(0x79622d32)
+	j3 := uint32(0x6b206574)
+	j4 := binary.LittleEndian.Uint32(k[0:4])
+	j5 := binary.LittleEndian.Uint32(k[4:8])
+	j6 := binary.LittleEndian.Uint32(k[8:12])
+	j7 := binary.LittleEndian.Uint32(k[12:16])
+	j8 := binary.LittleEndian.Uint32(k[16:20])
+	j9 := binary.LittleEndian.Uint32(k[20:24])
+	j10 := binary.LittleEndian.Uint32(k[24:28])
+	j11 := binary.LittleEndian.Uint32(k[28:32])
+	j12 := binary.LittleEndian.Uint32(in[0:4])
+	j13 := binary.LittleEndian.Uint32(in[4:8])
+	j14 := binary.LittleEndian.Uint32(in[8:12])
+	j15 := binary.LittleEndian.Uint32(in[12:16])
+
+	x0, x1, x2, x3, x4, x5, x6, x7 := j0, j1, j2, j3, j4, j5, j6, j7
+	x8, x9, x10, x11, x12, x13, x14, x15 := j8, j9, j10, j11, j12, j13, j14, j15
+
+	for i := 0; i < rounds; i += 2 {
+		x0 += x4
+		x12 ^= x0
+		x12 = (x12 << 16) | (x12 >> (16))
+		x8 += x12
+		x4 ^= x8
+		x4 = (x4 << 12) | (x4 >> (20))
+		x0 += x4
+		x12 ^= x0
+		x12 = (x12 << 8) | (x12 >> (24))
+		x8 += x12
+		x4 ^= x8
+		x4 = (x4 << 7) | (x4 >> (25))
+		x1 += x5
+		x13 ^= x1
+		x13 = (x13 << 16) | (x13 >> 16)
+		x9 += x13
+		x5 ^= x9
+		x5 = (x5 << 12) | (x5 >> 20)
+		x1 += x5
+		x13 ^= x1
+		x13 = (x13 << 8) | (x13 >> 24)
+		x9 += x13
+		x5 ^= x9
+		x5 = (x5 << 7) | (x5 >> 25)
+		x2 += x6
+		x14 ^= x2
+		x14 = (x14 << 16) | (x14 >> 16)
+		x10 += x14
+		x6 ^= x10
+		x6 = (x6 << 12) | (x6 >> 20)
+		x2 += x6
+		x14 ^= x2
+		x14 = (x14 << 8) | (x14 >> 24)
+		x10 += x14
+		x6 ^= x10
+		x6 = (x6 << 7) | (x6 >> 25)
+		x3 += x7
+		x15 ^= x3
+		x15 = (x15 << 16) | (x15 >> 16)
+		x11 += x15
+		x7 ^= x11
+		x7 = (x7 << 12) | (x7 >> 20)
+		x3 += x7
+		x15 ^= x3
+		x15 = (x15 << 8) | (x15 >> 24)
+		x11 += x15
+		x7 ^= x11
+		x7 = (x7 << 7) | (x7 >> 25)
+		x0 += x5
+		x15 ^= x0
+		x15 = (x15 << 16) | (x15 >> 16)
+		x10 += x15
+		x5 ^= x10
+		x5 = (x5 << 12) | (x5 >> 20)
+		x0 += x5
+		x15 ^= x0
+		x15 = (x15 << 8) | (x15 >> 24)
+		x10 += x15
+		x5 ^= x10
+		x5 = (x5 << 7) | (x5 >> 25)
+		x1 += x6
+		x12 ^= x1
+		x12 = (x12 << 16) | (x12 >> 16)
+		x11 += x12
+		x6 ^= x11
+		x6 = (x6 << 12) | (x6 >> 20)
+		x1 += x6
+		x12 ^= x1
+		x12 = (x12 << 8) | (x12 >> 24)
+		x11 += x12
+		x6 ^= x11
+		x6 = (x6 << 7) | (x6 >> 25)
+		x2 += x7
+		x13 ^= x2
+		x13 = (x13 << 16) | (x13 >> 16)
+		x8 += x13
+		x7 ^= x8
+		x7 = (x7 << 12) | (x7 >> 20)
+		x2 += x7
+		x13 ^= x2
+		x13 = (x13 << 8) | (x13 >> 24)
+		x8 += x13
+		x7 ^= x8
+		x7 = (x7 << 7) | (x7 >> 25)
+		x3 += x4
+		x14 ^= x3
+		x14 = (x14 << 16) | (x14 >> 16)
+		x9 += x14
+		x4 ^= x9
+		x4 = (x4 << 12) | (x4 >> 20)
+		x3 += x4
+		x14 ^= x3
+		x14 = (x14 << 8) | (x14 >> 24)
+		x9 += x14
+		x4 ^= x9
+		x4 = (x4 << 7) | (x4 >> 25)
+	}
+
+	x0 += j0
+	x1 += j1
+	x2 += j2
+	x3 += j3
+	x4 += j4
+	x5 += j5
+	x6 += j6
+	x7 += j7
+	x8 += j8
+	x9 += j9
+	x10 += j10
+	x11 += j11
+	x12 += j12
+	x13 += j13
+	x14 += j14
+	x15 += j15
+
+	binary.LittleEndian.PutUint32(out[0:4], x0)
+	binary.LittleEndian.PutUint32(out[4:8], x1)
+	binary.LittleEndian.PutUint32(out[8:12], x2)
+	binary.LittleEndian.PutUint32(out[12:16], x3)
+	binary.LittleEndian.PutUint32(out[16:20], x4)
+	binary.LittleEndian.PutUint32(out[20:24], x5)
+	binary.LittleEndian.PutUint32(out[24:28], x6)
+	binary.LittleEndian.PutUint32(out[28:32], x7)
+	binary.LittleEndian.PutUint32(out[32:36], x8)
+	binary.LittleEndian.PutUint32(out[36:40], x9)
+	binary.LittleEndian.PutUint32(out[40:44], x10)
+	binary.LittleEndian.PutUint32(out[44:48], x11)
+	binary.LittleEndian.PutUint32(out[48:52], x12)
+	binary.LittleEndian.PutUint32(out[52:56], x13)
+	binary.LittleEndian.PutUint32(out[56:60], x14)
+	binary.LittleEndian.PutUint32(out[60:64], x15)
+}
+
+// XORKeyStream crypts bytes from in to out using the given key and counters.
+// In and out may be the same slice but otherwise should not overlap. Counter
+// contains the raw ChaCha20 counter bytes (i.e. block counter followed by
+// nonce).
+func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
+	var block [64]byte
+	var counterCopy [16]byte
+	copy(counterCopy[:], counter[:])
+
+	for len(in) >= 64 {
+		core(&block, &counterCopy, key)
+		for i, x := range block {
+			out[i] = in[i] ^ x
+		}
+		u := uint32(1)
+		for i := 0; i < 4; i++ {
+			u += uint32(counterCopy[i])
+			counterCopy[i] = byte(u)
+			u >>= 8
+		}
+		in = in[64:]
+		out = out[64:]
+	}
+
+	if len(in) > 0 {
+		core(&block, &counterCopy, key)
+		for i, v := range in {
+			out[i] = v ^ block[i]
+		}
+	}
+}
--- a/src/vendor/golang_org/x/crypto/chacha20poly1305/internal/chacha20/chacha_test.go
+++ b/src/vendor/golang_org/x/crypto/chacha20poly1305/internal/chacha20/chacha_test.go
+package chacha20
+
+import (
+	"encoding/hex"
+	"testing"
+)
+
+func TestCore(t *testing.T) {
+	// This is just a smoke test that checks the example from
+	// https://tools.ietf.org/html/rfc7539#section-2.3.2. The
+	// chacha20poly1305 package contains much more extensive tests of this
+	// code.
+	var key [32]byte
+	for i := range key {
+		key[i] = byte(i)
+	}
+
+	var input [16]byte
+	input[0] = 1
+	input[7] = 9
+	input[11] = 0x4a
+
+	var out [64]byte
+	XORKeyStream(out[:], out[:], &input, &key)
+	const expected = "10f1e7e4d13b5915500fdd1fa32071c4c7d1f4c733c068030422aa9ac3d46c4ed2826446079faa0914c2d705d98b02a2b5129cd1de164eb9cbd083e8a2503c4e"
+	if result := hex.EncodeToString(out[:]); result != expected {
+		t.Errorf("wanted %x but got %x", expected, result)
+	}
+}
--- a/src/vendor/golang_org/x/crypto/poly1305/poly1305.go
+++ b/src/vendor/golang_org/x/crypto/poly1305/poly1305.go
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package poly1305 implements Poly1305 one-time message authentication code as specified in http://cr.yp.to/mac/poly1305-20050329.pdf.
+
+Poly1305 is a fast, one-time authentication function. It is infeasible for an
+attacker to generate an authenticator for a message without the key. However, a
+key must only be used for a single message. Authenticating two different
+messages with the same key allows an attacker to forge authenticators for other
+messages with the same key.
+
+Poly1305 was originally coupled with AES in order to make Poly1305-AES. AES was
+used with a fixed key in order to generate one-time keys from an nonce.
+However, in this package AES isn't used and the one-time key is specified
+directly.
+*/
+package poly1305 // import "golang.org/x/crypto/poly1305"
+
+import "crypto/subtle"
+
+// TagSize is the size, in bytes, of a poly1305 authenticator.
+const TagSize = 16
+
+// Verify returns true if mac is a valid authenticator for m with the given
+// key.
+func Verify(mac *[16]byte, m []byte, key *[32]byte) bool {
+	var tmp [16]byte
+	Sum(&tmp, m, key)
+	return subtle.ConstantTimeCompare(tmp[:], mac[:]) == 1
+}
--- a/src/vendor/golang_org/x/crypto/poly1305/poly1305_test.go
+++ b/src/vendor/golang_org/x/crypto/poly1305/poly1305_test.go
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package poly1305
+
+import (
+	"bytes"
+	"testing"
+	"unsafe"
+)
+
+var testData = []struct {
+	in, k, correct []byte
+}{
+	{
+		[]byte("Hello world!"),
+		[]byte("this is 32-byte key for Poly1305"),
+		[]byte{0xa6, 0xf7, 0x45, 0x00, 0x8f, 0x81, 0xc9, 0x16, 0xa2, 0x0d, 0xcc, 0x74, 0xee, 0xf2, 0xb2, 0xf0},
+	},
+	{
+		make([]byte, 32),
+		[]byte("this is 32-byte key for Poly1305"),
+		[]byte{0x49, 0xec, 0x78, 0x09, 0x0e, 0x48, 0x1e, 0xc6, 0xc2, 0x6b, 0x33, 0xb9, 0x1c, 0xcc, 0x03, 0x07},
+	},
+	{
+		make([]byte, 2007),
+		[]byte("this is 32-byte key for Poly1305"),
+		[]byte{0xda, 0x84, 0xbc, 0xab, 0x02, 0x67, 0x6c, 0x38, 0xcd, 0xb0, 0x15, 0x60, 0x42, 0x74, 0xc2, 0xaa},
+	},
+	{
+		make([]byte, 2007),
+		make([]byte, 32),
+		make([]byte, 16),
+	},
+	{
+		// This test triggers an edge-case. See https://go-review.googlesource.com/#/c/30101/.
+		[]byte{0x81, 0xd8, 0xb2, 0xe4, 0x6a, 0x25, 0x21, 0x3b, 0x58, 0xfe, 0xe4, 0x21, 0x3a, 0x2a, 0x28, 0xe9, 0x21, 0xc1, 0x2a, 0x96, 0x32, 0x51, 0x6d, 0x3b, 0x73, 0x27, 0x27, 0x27, 0xbe, 0xcf, 0x21, 0x29},
+		[]byte{0x3b, 0x3a, 0x29, 0xe9, 0x3b, 0x21, 0x3a, 0x5c, 0x5c, 0x3b, 0x3b, 0x05, 0x3a, 0x3a, 0x8c, 0x0d},
+		[]byte{0x6d, 0xc1, 0x8b, 0x8c, 0x34, 0x4c, 0xd7, 0x99, 0x27, 0x11, 0x8b, 0xbe, 0x84, 0xb7, 0xf3, 0x14},
+	},
+}
+
+func testSum(t *testing.T, unaligned bool) {
+	var out [16]byte
+	var key [32]byte
+
+	for i, v := range testData {
+		in := v.in
+		if unaligned {
+			in = unalignBytes(in)
+		}
+		copy(key[:], v.k)
+		Sum(&out, in, &key)
+		if !bytes.Equal(out[:], v.correct) {
+			t.Errorf("%d: expected %x, got %x", i, v.correct, out[:])
+		}
+	}
+}
+
+func TestSum(t *testing.T)          { testSum(t, false) }
+func TestSumUnaligned(t *testing.T) { testSum(t, true) }
+
+func benchmark(b *testing.B, size int, unaligned bool) {
+	var out [16]byte
+	var key [32]byte
+	in := make([]byte, size)
+	if unaligned {
+		in = unalignBytes(in)
+	}
+	b.SetBytes(int64(len(in)))
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Sum(&out, in, &key)
+	}
+}
+
+func Benchmark64(b *testing.B)          { benchmark(b, 64, false) }
+func Benchmark1K(b *testing.B)          { benchmark(b, 1024, false) }
+func Benchmark64Unaligned(b *testing.B) { benchmark(b, 64, true) }
+func Benchmark1KUnaligned(b *testing.B) { benchmark(b, 1024, true) }
+
+func unalignBytes(in []byte) []byte {
+	out := make([]byte, len(in)+1)
+	if uintptr(unsafe.Pointer(&out[0]))&(unsafe.Alignof(uint32(0))-1) == 0 {
+		out = out[1:]
+	} else {
+		out = out[:len(in)]
+	}
+	copy(out, in)
+	return out
+}
--- a/src/vendor/golang_org/x/crypto/poly1305/sum_amd64.go
+++ b/src/vendor/golang_org/x/crypto/poly1305/sum_amd64.go
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64,!gccgo,!appengine,go1.7
+
+package poly1305
+
+// This function is implemented in sum_amd64.s
+//go:noescape
+func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
+
+// Sum generates an authenticator for m using a one-time key and puts the
+// 16-byte result into out. Authenticating two different messages with the same
+// key allows an attacker to forge messages at will.
+func Sum(out *[16]byte, m []byte, key *[32]byte) {
+	var mPtr *byte
+	if len(m) > 0 {
+		mPtr = &m[0]
+	}
+	poly1305(out, mPtr, uint64(len(m)), key)
+}
--- a/src/vendor/golang_org/x/crypto/poly1305/sum_amd64.s
+++ b/src/vendor/golang_org/x/crypto/poly1305/sum_amd64.s
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64,!gccgo,!appengine,go1.7
+
+#include "textflag.h"
+
+#define POLY1305_ADD(msg, h0, h1, h2) \
+	ADDQ 0(msg), h0;  \
+	ADCQ 8(msg), h1;  \
+	ADCQ $1, h2;      \
+	LEAQ 16(msg), msg
+
+#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
+	MOVQ  r0, AX;                  \
+	MULQ  h0;                      \
+	MOVQ  AX, t0;                  \
+	MOVQ  DX, t1;                  \
+	MOVQ  r0, AX;                  \
+	MULQ  h1;                      \
+	ADDQ  AX, t1;                  \
+	ADCQ  $0, DX;                  \
+	MOVQ  r0, t2;                  \
+	IMULQ h2, t2;                  \
+	ADDQ  DX, t2;                  \
+	                               \
+	MOVQ  r1, AX;                  \
+	MULQ  h0;                      \
+	ADDQ  AX, t1;                  \
+	ADCQ  $0, DX;                  \
+	MOVQ  DX, h0;                  \
+	MOVQ  r1, t3;                  \
+	IMULQ h2, t3;                  \
+	MOVQ  r1, AX;                  \
+	MULQ  h1;                      \
+	ADDQ  AX, t2;                  \
+	ADCQ  DX, t3;                  \
+	ADDQ  h0, t2;                  \
+	ADCQ  $0, t3;                  \
+	                               \
+	MOVQ  t0, h0;                  \
+	MOVQ  t1, h1;                  \
+	MOVQ  t2, h2;                  \
+	ANDQ  $3, h2;                  \
+	MOVQ  t2, t0;                  \
+	ANDQ  $0xFFFFFFFFFFFFFFFC, t0; \
+	ADDQ  t0, h0;                  \
+	ADCQ  t3, h1;                  \
+	ADCQ  $0, h2;                  \
+	SHRQ  $2, t3, t2;              \
+	SHRQ  $2, t3;                  \
+	ADDQ  t2, h0;                  \
+	ADCQ  t3, h1;                  \
+	ADCQ  $0, h2
+
+DATA poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
+DATA poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
+GLOBL poly1305Mask<>(SB), RODATA, $16
+
+// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
+TEXT ·poly1305(SB), $0-32
+	MOVQ out+0(FP), DI
+	MOVQ m+8(FP), SI
+	MOVQ mlen+16(FP), R15
+	MOVQ key+24(FP), AX
+
+	MOVQ 0(AX), R11
+	MOVQ 8(AX), R12
+	ANDQ poly1305Mask<>(SB), R11   // r0
+	ANDQ poly1305Mask<>+8(SB), R12 // r1
+	XORQ R8, R8                    // h0
+	XORQ R9, R9                    // h1
+	XORQ R10, R10                  // h2
+
+	CMPQ R15, $16
+	JB   bytes_between_0_and_15
+
+loop:
+	POLY1305_ADD(SI, R8, R9, R10)
+
+multiply:
+	POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
+	SUBQ $16, R15
+	CMPQ R15, $16
+	JAE  loop
+
+bytes_between_0_and_15:
+	TESTQ R15, R15
+	JZ    done
+	MOVQ  $1, BX
+	XORQ  CX, CX
+	XORQ  R13, R13
+	ADDQ  R15, SI
+
+flush_buffer:
+	SHLQ $8, BX, CX
+	SHLQ $8, BX
+	MOVB -1(SI), R13
+	XORQ R13, BX
+	DECQ SI
+	DECQ R15
+	JNZ  flush_buffer
+
+	ADDQ BX, R8
+	ADCQ CX, R9
+	ADCQ $0, R10
+	MOVQ $16, R15
+	JMP  multiply
+
+done:
+	MOVQ    R8, AX
+	MOVQ    R9, BX
+	SUBQ    $0xFFFFFFFFFFFFFFFB, AX
+	SBBQ    $0xFFFFFFFFFFFFFFFF, BX
+	SBBQ    $3, R10
+	CMOVQCS R8, AX
+	CMOVQCS R9, BX
+	MOVQ    key+24(FP), R8
+	ADDQ    16(R8), AX
+	ADCQ    24(R8), BX
+
+	MOVQ AX, 0(DI)
+	MOVQ BX, 8(DI)
+	RET
--- a/src/vendor/golang_org/x/crypto/poly1305/sum_arm.go
+++ b/src/vendor/golang_org/x/crypto/poly1305/sum_arm.go
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build arm,!gccgo,!appengine,!nacl
+
+package poly1305
+
+// This function is implemented in sum_arm.s
+//go:noescape
+func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte)
+
+// Sum generates an authenticator for m using a one-time key and puts the
+// 16-byte result into out. Authenticating two different messages with the same
+// key allows an attacker to forge messages at will.
+func Sum(out *[16]byte, m []byte, key *[32]byte) {
+	var mPtr *byte
+	if len(m) > 0 {
+		mPtr = &m[0]
+	}
+	poly1305_auth_armv6(out, mPtr, uint32(len(m)), key)
+}
--- a/src/vendor/golang_org/x/crypto/poly1305/sum_arm.s
+++ b/src/vendor/golang_org/x/crypto/poly1305/sum_arm.s
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build arm,!gccgo,!appengine,!nacl
+
+#include "textflag.h"
+
+// This code was translated into a form compatible with 5a from the public
+// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305.
+
+DATA poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff
+DATA poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03
+DATA poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff
+DATA poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff
+DATA poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff
+GLOBL poly1305_init_constants_armv6<>(SB), 8, $20
+
+// Warning: the linker may use R11 to synthesize certain instructions. Please
+// take care and verify that no synthetic instructions use it.
+
+TEXT poly1305_init_ext_armv6<>(SB), NOSPLIT|NOFRAME, $0
+	MOVM.DB.W [R4-R11], (R13)
+	MOVM.IA.W (R1), [R2-R5]
+	MOVW      $poly1305_init_constants_armv6<>(SB), R7
+	MOVW      R2, R8
+	MOVW      R2>>26, R9
+	MOVW      R3>>20, g
+	MOVW      R4>>14, R11
+	MOVW      R5>>8, R12
+	ORR       R3<<6, R9, R9
+	ORR       R4<<12, g, g
+	ORR       R5<<18, R11, R11
+	MOVM.IA   (R7), [R2-R6]
+	AND       R8, R2, R2
+	AND       R9, R3, R3
+	AND       g, R4, R4
+	AND       R11, R5, R5
+	AND       R12, R6, R6
+	MOVM.IA.W [R2-R6], (R0)
+	EOR       R2, R2, R2
+	EOR       R3, R3, R3
+	EOR       R4, R4, R4
+	EOR       R5, R5, R5
+	EOR       R6, R6, R6
+	MOVM.IA.W [R2-R6], (R0)
+	MOVM.IA.W (R1), [R2-R5]
+	MOVM.IA   [R2-R6], (R0)
+	MOVM.IA.W (R13), [R4-R11]
+	RET
+
+#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
+	MOVBU (offset+0)(Rsrc), Rtmp; \
+	MOVBU Rtmp, (offset+0)(Rdst); \
+	MOVBU (offset+1)(Rsrc), Rtmp; \
+	MOVBU Rtmp, (offset+1)(Rdst); \
+	MOVBU (offset+2)(Rsrc), Rtmp; \
+	MOVBU Rtmp, (offset+2)(Rdst); \
+	MOVBU (offset+3)(Rsrc), Rtmp; \
+	MOVBU Rtmp, (offset+3)(Rdst)
+
+TEXT poly1305_blocks_armv6<>(SB), NOSPLIT|NOFRAME, $0
+	MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
+	SUB       $128, R13
+	MOVW      R0, 36(R13)
+	MOVW      R1, 40(R13)
+	MOVW      R2, 44(R13)
+	MOVW      R1, R14
+	MOVW      R2, R12
+	MOVW      56(R0), R8
+	WORD      $0xe1180008                                  // TST R8, R8 not working see issue 5921
+	EOR       R6, R6, R6
+	MOVW.EQ   $(1<<24), R6
+	MOVW      R6, 32(R13)
+	ADD       $64, R13, g
+	MOVM.IA   (R0), [R0-R9]
+	MOVM.IA   [R0-R4], (g)
+	CMP       $16, R12
+	BLO       poly1305_blocks_armv6_done
+
+poly1305_blocks_armv6_mainloop:
+	WORD    $0xe31e0003                            // TST R14, #3 not working see issue 5921
+	BEQ     poly1305_blocks_armv6_mainloop_aligned
+	ADD     $48, R13, g
+	MOVW_UNALIGNED(R14, g, R0, 0)
+	MOVW_UNALIGNED(R14, g, R0, 4)
+	MOVW_UNALIGNED(R14, g, R0, 8)
+	MOVW_UNALIGNED(R14, g, R0, 12)
+	MOVM.IA (g), [R0-R3]
+	ADD     $16, R14
+	B       poly1305_blocks_armv6_mainloop_loaded
+
+poly1305_blocks_armv6_mainloop_aligned:
+	MOVM.IA.W (R14), [R0-R3]
+
+poly1305_blocks_armv6_mainloop_loaded:
+	MOVW    R0>>26, g
+	MOVW    R1>>20, R11
+	MOVW    R2>>14, R12
+	MOVW    R14, 40(R13)
+	MOVW    R3>>8, R4
+	ORR     R1<<6, g, g
+	ORR     R2<<12, R11, R11
+	ORR     R3<<18, R12, R12
+	BIC     $0xfc000000, R0, R0
+	BIC     $0xfc000000, g, g
+	MOVW    32(R13), R3
+	BIC     $0xfc000000, R11, R11
+	BIC     $0xfc000000, R12, R12
+	ADD     R0, R5, R5
+	ADD     g, R6, R6
+	ORR     R3, R4, R4
+	ADD     R11, R7, R7
+	ADD     $64, R13, R14
+	ADD     R12, R8, R8
+	ADD     R4, R9, R9
+	MOVM.IA (R14), [R0-R4]
+	MULLU   R4, R5, (R11, g)
+	MULLU   R3, R5, (R14, R12)
+	MULALU  R3, R6, (R11, g)
+	MULALU  R2, R6, (R14, R12)
+	MULALU  R2, R7, (R11, g)
+	MULALU  R1, R7, (R14, R12)
+	ADD     R4<<2, R4, R4
+	ADD     R3<<2, R3, R3
+	MULALU  R1, R8, (R11, g)
+	MULALU  R0, R8, (R14, R12)
+	MULALU  R0, R9, (R11, g)
+	MULALU  R4, R9, (R14, R12)
+	MOVW    g, 24(R13)
+	MOVW    R11, 28(R13)
+	MOVW    R12, 16(R13)
+	MOVW    R14, 20(R13)
+	MULLU   R2, R5, (R11, g)
+	MULLU   R1, R5, (R14, R12)
+	MULALU  R1, R6, (R11, g)
+	MULALU  R0, R6, (R14, R12)
+	MULALU  R0, R7, (R11, g)
+	MULALU  R4, R7, (R14, R12)
+	ADD     R2<<2, R2, R2
+	ADD     R1<<2, R1, R1
+	MULALU  R4, R8, (R11, g)
+	MULALU  R3, R8, (R14, R12)
+	MULALU  R3, R9, (R11, g)
+	MULALU  R2, R9, (R14, R12)
+	MOVW    g, 8(R13)
+	MOVW    R11, 12(R13)
+	MOVW    R12, 0(R13)
+	MOVW    R14, w+4(SP)
+	MULLU   R0, R5, (R11, g)
+	MULALU  R4, R6, (R11, g)
+	MULALU  R3, R7, (R11, g)
+	MULALU  R2, R8, (R11, g)
+	MULALU  R1, R9, (R11, g)
+	MOVM.IA (R13), [R0-R7]
+	MOVW    g>>26, R12
+	MOVW    R4>>26, R14
+	ORR     R11<<6, R12, R12
+	ORR     R5<<6, R14, R14
+	BIC     $0xfc000000, g, g
+	BIC     $0xfc000000, R4, R4
+	ADD.S   R12, R0, R0
+	ADC     $0, R1, R1
+	ADD.S   R14, R6, R6
+	ADC     $0, R7, R7
+	MOVW    R0>>26, R12
+	MOVW    R6>>26, R14
+	ORR     R1<<6, R12, R12
+	ORR     R7<<6, R14, R14
+	BIC     $0xfc000000, R0, R0
+	BIC     $0xfc000000, R6, R6
+	ADD     R14<<2, R14, R14
+	ADD.S   R12, R2, R2
+	ADC     $0, R3, R3
+	ADD     R14, g, g
+	MOVW    R2>>26, R12
+	MOVW    g>>26, R14
+	ORR     R3<<6, R12, R12
+	BIC     $0xfc000000, g, R5
+	BIC     $0xfc000000, R2, R7
+	ADD     R12, R4, R4
+	ADD     R14, R0, R0
+	MOVW    R4>>26, R12
+	BIC     $0xfc000000, R4, R8
+	ADD     R12, R6, R9
+	MOVW    w+44(SP), R12
+	MOVW    w+40(SP), R14
+	MOVW    R0, R6
+	CMP     $32, R12
+	SUB     $16, R12, R12
+	MOVW    R12, 44(R13)
+	BHS     poly1305_blocks_armv6_mainloop
+
+poly1305_blocks_armv6_done:
+	MOVW      36(R13), R12
+	MOVW      R5, 20(R12)
+	MOVW      R6, 24(R12)
+	MOVW      R7, 28(R12)
+	MOVW      R8, 32(R12)
+	MOVW      R9, 36(R12)
+	ADD       $128, R13, R13
+	MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14]
+	RET
+
+#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
+	MOVBU.P 1(Rsrc), Rtmp; \
+	MOVBU.P Rtmp, 1(Rdst); \
+	MOVBU.P 1(Rsrc), Rtmp; \
+	MOVBU.P Rtmp, 1(Rdst)
+
+#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
+	MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
+	MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
+
+TEXT poly1305_finish_ext_armv6<>(SB), NOSPLIT | NOFRAME, $0
+	MOVM.DB.W [R4, R5, R6, R7, R8, R9, g, R11, R14], (R13)
+	SUB       $16, R13, R13
+	MOVW      R0, R5
+	MOVW      R1, R6
+	MOVW      R2, R7
+	MOVW      R3, R8
+	AND.S     R2, R2, R2
+	BEQ       poly1305_finish_ext_armv6_noremaining
+	EOR       R0, R0
+	MOVW      R13, R9
+	MOVW      R0, 0(R13)
+	MOVW      R0, 4(R13)
+	MOVW      R0, 8(R13)
+	MOVW      R0, 12(R13)
+	WORD      $0xe3110003                                  // TST R1, #3 not working see issue 5921
+	BEQ       poly1305_finish_ext_armv6_aligned
+	WORD      $0xe3120008                                  // TST R2, #8 not working see issue 5921
+	BEQ       poly1305_finish_ext_armv6_skip8
+	MOVWP_UNALIGNED(R1, R9, g)
+	MOVWP_UNALIGNED(R1, R9, g)
+
+poly1305_finish_ext_armv6_skip8:
+	WORD $0xe3120004                     // TST $4, R2 not working see issue 5921
+	BEQ  poly1305_finish_ext_armv6_skip4
+	MOVWP_UNALIGNED(R1, R9, g)
+
+poly1305_finish_ext_armv6_skip4:
+	WORD $0xe3120002                     // TST $2, R2 not working see issue 5921
+	BEQ  poly1305_finish_ext_armv6_skip2
+	MOVHUP_UNALIGNED(R1, R9, g)
+	B    poly1305_finish_ext_armv6_skip2
+
+poly1305_finish_ext_armv6_aligned:
+	WORD      $0xe3120008                             // TST R2, #8 not working see issue 5921
+	BEQ       poly1305_finish_ext_armv6_skip8_aligned
+	MOVM.IA.W (R1), [g-R11]
+	MOVM.IA.W [g-R11], (R9)
+
+poly1305_finish_ext_armv6_skip8_aligned:
+	WORD   $0xe3120004                             // TST $4, R2 not working see issue 5921
+	BEQ    poly1305_finish_ext_armv6_skip4_aligned
+	MOVW.P 4(R1), g
+	MOVW.P g, 4(R9)
+
+poly1305_finish_ext_armv6_skip4_aligned:
+	WORD    $0xe3120002                     // TST $2, R2 not working see issue 5921
+	BEQ     poly1305_finish_ext_armv6_skip2
+	MOVHU.P 2(R1), g
+	MOVH.P  g, 2(R9)
+
+poly1305_finish_ext_armv6_skip2:
+	WORD    $0xe3120001                     // TST $1, R2 not working see issue 5921
+	BEQ     poly1305_finish_ext_armv6_skip1
+	MOVBU.P 1(R1), g
+	MOVBU.P g, 1(R9)
+
+poly1305_finish_ext_armv6_skip1:
+	MOVW  $1, R11
+	MOVBU R11, 0(R9)
+	MOVW  R11, 56(R5)
+	MOVW  R5, R0
+	MOVW  R13, R1
+	MOVW  $16, R2
+	BL    poly1305_blocks_armv6<>(SB)
+
+poly1305_finish_ext_armv6_noremaining:
+	MOVW      20(R5), R0
+	MOVW      24(R5), R1
+	MOVW      28(R5), R2
+	MOVW      32(R5), R3
+	MOVW      36(R5), R4
+	MOVW      R4>>26, R12
+	BIC       $0xfc000000, R4, R4
+	ADD       R12<<2, R12, R12
+	ADD       R12, R0, R0
+	MOVW      R0>>26, R12
+	BIC       $0xfc000000, R0, R0
+	ADD       R12, R1, R1
+	MOVW      R1>>26, R12
+	BIC       $0xfc000000, R1, R1
+	ADD       R12, R2, R2
+	MOVW      R2>>26, R12
+	BIC       $0xfc000000, R2, R2
+	ADD       R12, R3, R3
+	MOVW      R3>>26, R12
+	BIC       $0xfc000000, R3, R3
+	ADD       R12, R4, R4
+	ADD       $5, R0, R6
+	MOVW      R6>>26, R12
+	BIC       $0xfc000000, R6, R6
+	ADD       R12, R1, R7
+	MOVW      R7>>26, R12
+	BIC       $0xfc000000, R7, R7
+	ADD       R12, R2, g
+	MOVW      g>>26, R12
+	BIC       $0xfc000000, g, g
+	ADD       R12, R3, R11
+	MOVW      $-(1<<26), R12
+	ADD       R11>>26, R12, R12
+	BIC       $0xfc000000, R11, R11
+	ADD       R12, R4, R14
+	MOVW      R14>>31, R12
+	SUB       $1, R12
+	AND       R12, R6, R6
+	AND       R12, R7, R7
+	AND       R12, g, g
+	AND       R12, R11, R11
+	AND       R12, R14, R14
+	MVN       R12, R12
+	AND       R12, R0, R0
+	AND       R12, R1, R1
+	AND       R12, R2, R2
+	AND       R12, R3, R3
+	AND       R12, R4, R4
+	ORR       R6, R0, R0
+	ORR       R7, R1, R1
+	ORR       g, R2, R2
+	ORR       R11, R3, R3
+	ORR       R14, R4, R4
+	ORR       R1<<26, R0, R0
+	MOVW      R1>>6, R1
+	ORR       R2<<20, R1, R1
+	MOVW      R2>>12, R2
+	ORR       R3<<14, R2, R2
+	MOVW      R3>>18, R3
+	ORR       R4<<8, R3, R3
+	MOVW      40(R5), R6
+	MOVW      44(R5), R7
+	MOVW      48(R5), g
+	MOVW      52(R5), R11
+	ADD.S     R6, R0, R0
+	ADC.S     R7, R1, R1
+	ADC.S     g, R2, R2
+	ADC.S     R11, R3, R3
+	MOVM.IA   [R0-R3], (R8)
+	MOVW      R5, R12
+	EOR       R0, R0, R0
+	EOR       R1, R1, R1
+	EOR       R2, R2, R2
+	EOR       R3, R3, R3
+	EOR       R4, R4, R4
+	EOR       R5, R5, R5
+	EOR       R6, R6, R6
+	EOR       R7, R7, R7
+	MOVM.IA.W [R0-R7], (R12)
+	MOVM.IA   [R0-R7], (R12)
+	ADD       $16, R13, R13
+	MOVM.IA.W (R13), [R4, R5, R6, R7, R8, R9, g, R11, R14]
+	RET
+
+// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key)
+TEXT ·poly1305_auth_armv6(SB), $280-16
+	MOVW out+0(FP), R4
+	MOVW m+4(FP), R5
+	MOVW mlen+8(FP), R6
+	MOVW key+12(FP), R7
+
+	MOVW  R13, R8
+	BIC   $63, R13
+	SUB   $64, R13, R13
+	MOVW  R13, R0
+	MOVW  R7, R1
+	BL    poly1305_init_ext_armv6<>(SB)
+	BIC.S $15, R6, R2
+	BEQ   poly1305_auth_armv6_noblocks
+	MOVW  R13, R0
+	MOVW  R5, R1
+	ADD   R2, R5, R5
+	SUB   R2, R6, R6
+	BL    poly1305_blocks_armv6<>(SB)
+
+poly1305_auth_armv6_noblocks:
+	MOVW R13, R0
+	MOVW R5, R1
+	MOVW R6, R2
+	MOVW R4, R3
+	BL   poly1305_finish_ext_armv6<>(SB)
+	MOVW R8, R13
+	RET
--- a/src/vendor/golang_org/x/crypto/poly1305/sum_ref.go
+++ b/src/vendor/golang_org/x/crypto/poly1305/sum_ref.go
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64,!arm gccgo appengine nacl !go1.7
+
+package poly1305
+
+// Based on original, public domain implementation from NaCl by D. J.
+// Bernstein.
+
+import "math"
+
+const (
+	alpham80 = 0.00000000558793544769287109375
+	alpham48 = 24.0
+	alpham16 = 103079215104.0
+	alpha0   = 6755399441055744.0
+	alpha18  = 1770887431076116955136.0
+	alpha32  = 29014219670751100192948224.0
+	alpha50  = 7605903601369376408980219232256.0
+	alpha64  = 124615124604835863084731911901282304.0
+	alpha82  = 32667107224410092492483962313449748299776.0
+	alpha96  = 535217884764734955396857238543560676143529984.0
+	alpha112 = 35076039295941670036888435985190792471742381031424.0
+	alpha130 = 9194973245195333150150082162901855101712434733101613056.0
+	scale    = 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125
+	offset0  = 6755408030990331.0
+	offset1  = 29014256564239239022116864.0
+	offset2  = 124615283061160854719918951570079744.0
+	offset3  = 535219245894202480694386063513315216128475136.0
+)
+
+// Sum generates an authenticator for m using a one-time key and puts the
+// 16-byte result into out. Authenticating two different messages with the same
+// key allows an attacker to forge messages at will.
+func Sum(out *[16]byte, m []byte, key *[32]byte) {
+	r := key
+	s := key[16:]
+	var (
+		y7        float64
+		y6        float64
+		y1        float64
+		y0        float64
+		y5        float64
+		y4        float64
+		x7        float64
+		x6        float64
+		x1        float64
+		x0        float64
+		y3        float64
+		y2        float64
+		x5        float64
+		r3lowx0   float64
+		x4        float64
+		r0lowx6   float64
+		x3        float64
+		r3highx0  float64
+		x2        float64
+		r0highx6  float64
+		r0lowx0   float64
+		sr1lowx6  float64
+		r0highx0  float64
+		sr1highx6 float64
+		sr3low    float64
+		r1lowx0   float64
+		sr2lowx6  float64
+		r1highx0  float64
+		sr2highx6 float64
+		r2lowx0   float64
+		sr3lowx6  float64
+		r2highx0  float64
+		sr3highx6 float64
+		r1highx4  float64
+		r1lowx4   float64
+		r0highx4  float64
+		r0lowx4   float64
+		sr3highx4 float64
+		sr3lowx4  float64
+		sr2highx4 float64
+		sr2lowx4  float64
+		r0lowx2   float64
+		r0highx2  float64
+		r1lowx2   float64
+		r1highx2  float64
+		r2lowx2   float64
+		r2highx2  float64
+		sr3lowx2  float64
+		sr3highx2 float64
+		z0        float64
+		z1        float64
+		z2        float64
+		z3        float64
+		m0        int64
+		m1        int64
+		m2        int64
+		m3        int64
+		m00       uint32
+		m01       uint32
+		m02       uint32
+		m03       uint32
+		m10       uint32
+		m11       uint32
+		m12       uint32
+		m13       uint32
+		m20       uint32
+		m21       uint32
+		m22       uint32
+		m23       uint32
+		m30       uint32
+		m31       uint32
+		m32       uint32
+		m33       uint64
+		lbelow2   int32
+		lbelow3   int32
+		lbelow4   int32
+		lbelow5   int32
+		lbelow6   int32
+		lbelow7   int32
+		lbelow8   int32
+		lbelow9   int32
+		lbelow10  int32
+		lbelow11  int32
+		lbelow12  int32
+		lbelow13  int32
+		lbelow14  int32
+		lbelow15  int32
+		s00       uint32
+		s01       uint32
+		s02       uint32
+		s03       uint32
+		s10       uint32
+		s11       uint32
+		s12       uint32
+		s13       uint32
+		s20       uint32
+		s21       uint32
+		s22       uint32
+		s23       uint32
+		s30       uint32
+		s31       uint32
+		s32       uint32
+		s33       uint32
+		bits32    uint64
+		f         uint64
+		f0        uint64
+		f1        uint64
+		f2        uint64
+		f3        uint64
+		f4        uint64
+		g         uint64
+		g0        uint64
+		g1        uint64
+		g2        uint64
+		g3        uint64
+		g4        uint64
+	)
+
+	var p int32
+
+	l := int32(len(m))
+
+	r00 := uint32(r[0])
+
+	r01 := uint32(r[1])
+
+	r02 := uint32(r[2])
+	r0 := int64(2151)
+
+	r03 := uint32(r[3])
+	r03 &= 15
+	r0 <<= 51
+
+	r10 := uint32(r[4])
+	r10 &= 252
+	r01 <<= 8
+	r0 += int64(r00)
+
+	r11 := uint32(r[5])
+	r02 <<= 16
+	r0 += int64(r01)
+
+	r12 := uint32(r[6])
+	r03 <<= 24
+	r0 += int64(r02)
+
+	r13 := uint32(r[7])
+	r13 &= 15
+	r1 := int64(2215)
+	r0 += int64(r03)
+
+	d0 := r0
+	r1 <<= 51
+	r2 := int64(2279)
+
+	r20 := uint32(r[8])
+	r20 &= 252
+	r11 <<= 8
+	r1 += int64(r10)
+
+	r21 := uint32(r[9])
+	r12 <<= 16
+	r1 += int64(r11)
+
+	r22 := uint32(r[10])
+	r13 <<= 24
+	r1 += int64(r12)
+
+	r23 := uint32(r[11])
+	r23 &= 15
+	r2 <<= 51
+	r1 += int64(r13)
+
+	d1 := r1
+	r21 <<= 8
+	r2 += int64(r20)
+
+	r30 := uint32(r[12])
+	r30 &= 252
+	r22 <<= 16
+	r2 += int64(r21)
+
+	r31 := uint32(r[13])
+	r23 <<= 24
+	r2 += int64(r22)
+
+	r32 := uint32(r[14])
+	r2 += int64(r23)
+	r3 := int64(2343)
+
+	d2 := r2
+	r3 <<= 51
+
+	r33 := uint32(r[15])
+	r33 &= 15
+	r31 <<= 8
+	r3 += int64(r30)
+
+	r32 <<= 16
+	r3 += int64(r31)
+
+	r33 <<= 24
+	r3 += int64(r32)
+
+	r3 += int64(r33)
+	h0 := alpha32 - alpha32
+
+	d3 := r3
+	h1 := alpha32 - alpha32
+
+	h2 := alpha32 - alpha32
+
+	h3 := alpha32 - alpha32
+
+	h4 := alpha32 - alpha32
+
+	r0low := math.Float64frombits(uint64(d0))
+	h5 := alpha32 - alpha32
+
+	r1low := math.Float64frombits(uint64(d1))
+	h6 := alpha32 - alpha32
+
+	r2low := math.Float64frombits(uint64(d2))
+	h7 := alpha32 - alpha32
+
+	r0low -= alpha0
+
+	r1low -= alpha32
+
+	r2low -= alpha64
+
+	r0high := r0low + alpha18
+
+	r3low := math.Float64frombits(uint64(d3))
+
+	r1high := r1low + alpha50
+	sr1low := scale * r1low
+
+	r2high := r2low + alpha82
+	sr2low := scale * r2low
+
+	r0high -= alpha18
+	r0high_stack := r0high
+
+	r3low -= alpha96
+
+	r1high -= alpha50
+	r1high_stack := r1high
+
+	sr1high := sr1low + alpham80
+
+	r0low -= r0high
+
+	r2high -= alpha82
+	sr3low = scale * r3low
+
+	sr2high := sr2low + alpham48
+
+	r1low -= r1high
+	r1low_stack := r1low
+
+	sr1high -= alpham80
+	sr1high_stack := sr1high
+
+	r2low -= r2high
+	r2low_stack := r2low
+
+	sr2high -= alpham48
+	sr2high_stack := sr2high
+
+	r3high := r3low + alpha112
+	r0low_stack := r0low
+
+	sr1low -= sr1high
+	sr1low_stack := sr1low
+
+	sr3high := sr3low + alpham16
+	r2high_stack := r2high
+
+	sr2low -= sr2high
+	sr2low_stack := sr2low
+
+	r3high -= alpha112
+	r3high_stack := r3high
+
+	sr3high -= alpham16
+	sr3high_stack := sr3high
+
+	r3low -= r3high
+	r3low_stack := r3low
+
+	sr3low -= sr3high
+	sr3low_stack := sr3low
+
+	if l < 16 {
+		goto addatmost15bytes
+	}
+
+	m00 = uint32(m[p+0])
+	m0 = 2151
+
+	m0 <<= 51
+	m1 = 2215
+	m01 = uint32(m[p+1])
+
+	m1 <<= 51
+	m2 = 2279
+	m02 = uint32(m[p+2])
+
+	m2 <<= 51
+	m3 = 2343
+	m03 = uint32(m[p+3])
+
+	m10 = uint32(m[p+4])
+	m01 <<= 8
+	m0 += int64(m00)
+
+	m11 = uint32(m[p+5])
+	m02 <<= 16
+	m0 += int64(m01)
+
+	m12 = uint32(m[p+6])
+	m03 <<= 24
+	m0 += int64(m02)
+
+	m13 = uint32(m[p+7])
+	m3 <<= 51
+	m0 += int64(m03)
+
+	m20 = uint32(m[p+8])
+	m11 <<= 8
+	m1 += int64(m10)
+
+	m21 = uint32(m[p+9])
+	m12 <<= 16
+	m1 += int64(m11)
+
+	m22 = uint32(m[p+10])
+	m13 <<= 24
+	m1 += int64(m12)
+
+	m23 = uint32(m[p+11])
+	m1 += int64(m13)
+
+	m30 = uint32(m[p+12])
+	m21 <<= 8
+	m2 += int64(m20)
+
+	m31 = uint32(m[p+13])
+	m22 <<= 16
+	m2 += int64(m21)
+
+	m32 = uint32(m[p+14])
+	m23 <<= 24
+	m2 += int64(m22)
+
+	m33 = uint64(m[p+15])
+	m2 += int64(m23)
+
+	d0 = m0
+	m31 <<= 8
+	m3 += int64(m30)
+
+	d1 = m1
+	m32 <<= 16
+	m3 += int64(m31)
+
+	d2 = m2
+	m33 += 256
+
+	m33 <<= 24
+	m3 += int64(m32)
+
+	m3 += int64(m33)
+	d3 = m3
+
+	p += 16
+	l -= 16
+
+	z0 = math.Float64frombits(uint64(d0))
+
+	z1 = math.Float64frombits(uint64(d1))
+
+	z2 = math.Float64frombits(uint64(d2))
+
+	z3 = math.Float64frombits(uint64(d3))
+
+	z0 -= alpha0
+
+	z1 -= alpha32
+
+	z2 -= alpha64
+
+	z3 -= alpha96
+
+	h0 += z0
+
+	h1 += z1
+
+	h3 += z2
+
+	h5 += z3
+
+	if l < 16 {
+		goto multiplyaddatmost15bytes
+	}
+
+multiplyaddatleast16bytes:
+
+	m2 = 2279
+	m20 = uint32(m[p+8])
+	y7 = h7 + alpha130
+
+	m2 <<= 51
+	m3 = 2343
+	m21 = uint32(m[p+9])
+	y6 = h6 + alpha130
+
+	m3 <<= 51
+	m0 = 2151
+	m22 = uint32(m[p+10])
+	y1 = h1 + alpha32
+
+	m0 <<= 51
+	m1 = 2215
+	m23 = uint32(m[p+11])
+	y0 = h0 + alpha32
+
+	m1 <<= 51
+	m30 = uint32(m[p+12])
+	y7 -= alpha130
+
+	m21 <<= 8
+	m2 += int64(m20)
+	m31 = uint32(m[p+13])
+	y6 -= alpha130
+
+	m22 <<= 16
+	m2 += int64(m21)
+	m32 = uint32(m[p+14])
+	y1 -= alpha32
+
+	m23 <<= 24
+	m2 += int64(m22)
+	m33 = uint64(m[p+15])
+	y0 -= alpha32
+
+	m2 += int64(m23)
+	m00 = uint32(m[p+0])
+	y5 = h5 + alpha96
+
+	m31 <<= 8
+	m3 += int64(m30)
+	m01 = uint32(m[p+1])
+	y4 = h4 + alpha96
+
+	m32 <<= 16
+	m02 = uint32(m[p+2])
+	x7 = h7 - y7
+	y7 *= scale
+
+	m33 += 256
+	m03 = uint32(m[p+3])
+	x6 = h6 - y6
+	y6 *= scale
+
+	m33 <<= 24
+	m3 += int64(m31)
+	m10 = uint32(m[p+4])
+	x1 = h1 - y1
+
+	m01 <<= 8
+	m3 += int64(m32)
+	m11 = uint32(m[p+5])
+	x0 = h0 - y0
+
+	m3 += int64(m33)
+	m0 += int64(m00)
+	m12 = uint32(m[p+6])
+	y5 -= alpha96
+
+	m02 <<= 16
+	m0 += int64(m01)
+	m13 = uint32(m[p+7])
+	y4 -= alpha96
+
+	m03 <<= 24
+	m0 += int64(m02)
+	d2 = m2
+	x1 += y7
+
+	m0 += int64(m03)
+	d3 = m3
+	x0 += y6
+
+	m11 <<= 8
+	m1 += int64(m10)
+	d0 = m0
+	x7 += y5
+
+	m12 <<= 16
+	m1 += int64(m11)
+	x6 += y4
+
+	m13 <<= 24
+	m1 += int64(m12)
+	y3 = h3 + alpha64
+
+	m1 += int64(m13)
+	d1 = m1
+	y2 = h2 + alpha64
+
+	x0 += x1
+
+	x6 += x7
+
+	y3 -= alpha64
+	r3low = r3low_stack
+
+	y2 -= alpha64
+	r0low = r0low_stack
+
+	x5 = h5 - y5
+	r3lowx0 = r3low * x0
+	r3high = r3high_stack
+
+	x4 = h4 - y4
+	r0lowx6 = r0low * x6
+	r0high = r0high_stack
+
+	x3 = h3 - y3
+	r3highx0 = r3high * x0
+	sr1low = sr1low_stack
+
+	x2 = h2 - y2
+	r0highx6 = r0high * x6
+	sr1high = sr1high_stack
+
+	x5 += y3
+	r0lowx0 = r0low * x0
+	r1low = r1low_stack
+
+	h6 = r3lowx0 + r0lowx6
+	sr1lowx6 = sr1low * x6
+	r1high = r1high_stack
+
+	x4 += y2
+	r0highx0 = r0high * x0
+	sr2low = sr2low_stack
+
+	h7 = r3highx0 + r0highx6
+	sr1highx6 = sr1high * x6
+	sr2high = sr2high_stack
+
+	x3 += y1
+	r1lowx0 = r1low * x0
+	r2low = r2low_stack
+
+	h0 = r0lowx0 + sr1lowx6
+	sr2lowx6 = sr2low * x6
+	r2high = r2high_stack
+
+	x2 += y0
+	r1highx0 = r1high * x0
+	sr3low = sr3low_stack
+
+	h1 = r0highx0 + sr1highx6
+	sr2highx6 = sr2high * x6
+	sr3high = sr3high_stack
+
+	x4 += x5
+	r2lowx0 = r2low * x0
+	z2 = math.Float64frombits(uint64(d2))
+
+	h2 = r1lowx0 + sr2lowx6
+	sr3lowx6 = sr3low * x6
+
+	x2 += x3
+	r2highx0 = r2high * x0
+	z3 = math.Float64frombits(uint64(d3))
+
+	h3 = r1highx0 + sr2highx6
+	sr3highx6 = sr3high * x6
+
+	r1highx4 = r1high * x4
+	z2 -= alpha64
+
+	h4 = r2lowx0 + sr3lowx6
+	r1lowx4 = r1low * x4
+
+	r0highx4 = r0high * x4
+	z3 -= alpha96
+
+	h5 = r2highx0 + sr3highx6
+	r0lowx4 = r0low * x4
+
+	h7 += r1highx4
+	sr3highx4 = sr3high * x4
+
+	h6 += r1lowx4
+	sr3lowx4 = sr3low * x4
+
+	h5 += r0highx4
+	sr2highx4 = sr2high * x4
+
+	h4 += r0lowx4
+	sr2lowx4 = sr2low * x4
+
+	h3 += sr3highx4
+	r0lowx2 = r0low * x2
+
+	h2 += sr3lowx4
+	r0highx2 = r0high * x2
+
+	h1 += sr2highx4
+	r1lowx2 = r1low * x2
+
+	h0 += sr2lowx4
+	r1highx2 = r1high * x2
+
+	h2 += r0lowx2
+	r2lowx2 = r2low * x2
+
+	h3 += r0highx2
+	r2highx2 = r2high * x2
+
+	h4 += r1lowx2
+	sr3lowx2 = sr3low * x2
+
+	h5 += r1highx2
+	sr3highx2 = sr3high * x2
+
+	p += 16
+	l -= 16
+	h6 += r2lowx2
+
+	h7 += r2highx2
+
+	z1 = math.Float64frombits(uint64(d1))
+	h0 += sr3lowx2
+
+	z0 = math.Float64frombits(uint64(d0))
+	h1 += sr3highx2
+
+	z1 -= alpha32
+
+	z0 -= alpha0
+
+	h5 += z3
+
+	h3 += z2
+
+	h1 += z1
+
+	h0 += z0
+
+	if l >= 16 {
+		goto multiplyaddatleast16bytes
+	}
+
+multiplyaddatmost15bytes:
+
+	y7 = h7 + alpha130
+
+	y6 = h6 + alpha130
+
+	y1 = h1 + alpha32
+
+	y0 = h0 + alpha32
+
+	y7 -= alpha130
+
+	y6 -= alpha130
+
+	y1 -= alpha32
+
+	y0 -= alpha32
+
+	y5 = h5 + alpha96
+
+	y4 = h4 + alpha96
+
+	x7 = h7 - y7
+	y7 *= scale
+
+	x6 = h6 - y6
+	y6 *= scale
+
+	x1 = h1 - y1
+
+	x0 = h0 - y0
+
+	y5 -= alpha96
+
+	y4 -= alpha96
+
+	x1 += y7
+
+	x0 += y6
+
+	x7 += y5
+
+	x6 += y4
+
+	y3 = h3 + alpha64
+
+	y2 = h2 + alpha64
+
+	x0 += x1
+
+	x6 += x7
+
+	y3 -= alpha64
+	r3low = r3low_stack
+
+	y2 -= alpha64
+	r0low = r0low_stack
+
+	x5 = h5 - y5
+	r3lowx0 = r3low * x0
+	r3high = r3high_stack
+
+	x4 = h4 - y4
+	r0lowx6 = r0low * x6
+	r0high = r0high_stack
+
+	x3 = h3 - y3
+	r3highx0 = r3high * x0
+	sr1low = sr1low_stack
+
+	x2 = h2 - y2
+	r0highx6 = r0high * x6
+	sr1high = sr1high_stack
+
+	x5 += y3
+	r0lowx0 = r0low * x0
+	r1low = r1low_stack
+
+	h6 = r3lowx0 + r0lowx6
+	sr1lowx6 = sr1low * x6
+	r1high = r1high_stack
+
+	x4 += y2
+	r0highx0 = r0high * x0
+	sr2low = sr2low_stack
+
+	h7 = r3highx0 + r0highx6
+	sr1highx6 = sr1high * x6
+	sr2high = sr2high_stack
+
+	x3 += y1
+	r1lowx0 = r1low * x0
+	r2low = r2low_stack
+
+	h0 = r0lowx0 + sr1lowx6
+	sr2lowx6 = sr2low * x6
+	r2high = r2high_stack
+
+	x2 += y0
+	r1highx0 = r1high * x0
+	sr3low = sr3low_stack
+
+	h1 = r0highx0 + sr1highx6
+	sr2highx6 = sr2high * x6
+	sr3high = sr3high_stack
+
+	x4 += x5
+	r2lowx0 = r2low * x0
+
+	h2 = r1lowx0 + sr2lowx6
+	sr3lowx6 = sr3low * x6
+
+	x2 += x3
+	r2highx0 = r2high * x0
+
+	h3 = r1highx0 + sr2highx6
+	sr3highx6 = sr3high * x6
+
+	r1highx4 = r1high * x4
+
+	h4 = r2lowx0 + sr3lowx6
+	r1lowx4 = r1low * x4
+
+	r0highx4 = r0high * x4
+
+	h5 = r2highx0 + sr3highx6
+	r0lowx4 = r0low * x4
+
+	h7 += r1highx4
+	sr3highx4 = sr3high * x4
+
+	h6 += r1lowx4
+	sr3lowx4 = sr3low * x4
+
+	h5 += r0highx4
+	sr2highx4 = sr2high * x4
+
+	h4 += r0lowx4
+	sr2lowx4 = sr2low * x4
+
+	h3 += sr3highx4
+	r0lowx2 = r0low * x2
+
+	h2 += sr3lowx4
+	r0highx2 = r0high * x2
+
+	h1 += sr2highx4
+	r1lowx2 = r1low * x2
+
+	h0 += sr2lowx4
+	r1highx2 = r1high * x2
+
+	h2 += r0lowx2
+	r2lowx2 = r2low * x2
+
+	h3 += r0highx2
+	r2highx2 = r2high * x2
+
+	h4 += r1lowx2
+	sr3lowx2 = sr3low * x2
+
+	h5 += r1highx2
+	sr3highx2 = sr3high * x2
+
+	h6 += r2lowx2
+
+	h7 += r2highx2
+
+	h0 += sr3lowx2
+
+	h1 += sr3highx2
+
+addatmost15bytes:
+
+	if l == 0 {
+		goto nomorebytes
+	}
+
+	lbelow2 = l - 2
+
+	lbelow3 = l - 3
+
+	lbelow2 >>= 31
+	lbelow4 = l - 4
+
+	m00 = uint32(m[p+0])
+	lbelow3 >>= 31
+	p += lbelow2
+
+	m01 = uint32(m[p+1])
+	lbelow4 >>= 31
+	p += lbelow3
+
+	m02 = uint32(m[p+2])
+	p += lbelow4
+	m0 = 2151
+
+	m03 = uint32(m[p+3])
+	m0 <<= 51
+	m1 = 2215
+
+	m0 += int64(m00)
+	m01 &^= uint32(lbelow2)
+
+	m02 &^= uint32(lbelow3)
+	m01 -= uint32(lbelow2)
+
+	m01 <<= 8
+	m03 &^= uint32(lbelow4)
+
+	m0 += int64(m01)
+	lbelow2 -= lbelow3
+
+	m02 += uint32(lbelow2)
+	lbelow3 -= lbelow4
+
+	m02 <<= 16
+	m03 += uint32(lbelow3)
+
+	m03 <<= 24
+	m0 += int64(m02)
+
+	m0 += int64(m03)
+	lbelow5 = l - 5
+
+	lbelow6 = l - 6
+	lbelow7 = l - 7
+
+	lbelow5 >>= 31
+	lbelow8 = l - 8
+
+	lbelow6 >>= 31
+	p += lbelow5
+
+	m10 = uint32(m[p+4])
+	lbelow7 >>= 31
+	p += lbelow6
+
+	m11 = uint32(m[p+5])
+	lbelow8 >>= 31
+	p += lbelow7
+
+	m12 = uint32(m[p+6])
+	m1 <<= 51
+	p += lbelow8
+
+	m13 = uint32(m[p+7])
+	m10 &^= uint32(lbelow5)
+	lbelow4 -= lbelow5
+
+	m10 += uint32(lbelow4)
+	lbelow5 -= lbelow6
+
+	m11 &^= uint32(lbelow6)
+	m11 += uint32(lbelow5)
+
+	m11 <<= 8
+	m1 += int64(m10)
+
+	m1 += int64(m11)
+	m12 &^= uint32(lbelow7)
+
+	lbelow6 -= lbelow7
+	m13 &^= uint32(lbelow8)
+
+	m12 += uint32(lbelow6)
+	lbelow7 -= lbelow8
+
+	m12 <<= 16
+	m13 += uint32(lbelow7)
+
+	m13 <<= 24
+	m1 += int64(m12)
+
+	m1 += int64(m13)
+	m2 = 2279
+
+	lbelow9 = l - 9
+	m3 = 2343
+
+	lbelow10 = l - 10
+	lbelow11 = l - 11
+
+	lbelow9 >>= 31
+	lbelow12 = l - 12
+
+	lbelow10 >>= 31
+	p += lbelow9
+
+	m20 = uint32(m[p+8])
+	lbelow11 >>= 31
+	p += lbelow10
+
+	m21 = uint32(m[p+9])
+	lbelow12 >>= 31
+	p += lbelow11
+
+	m22 = uint32(m[p+10])
+	m2 <<= 51
+	p += lbelow12
+
+	m23 = uint32(m[p+11])
+	m20 &^= uint32(lbelow9)
+	lbelow8 -= lbelow9
+
+	m20 += uint32(lbelow8)
+	lbelow9 -= lbelow10
+
+	m21 &^= uint32(lbelow10)
+	m21 += uint32(lbelow9)
+
+	m21 <<= 8
+	m2 += int64(m20)
+
+	m2 += int64(m21)
+	m22 &^= uint32(lbelow11)
+
+	lbelow10 -= lbelow11
+	m23 &^= uint32(lbelow12)
+
+	m22 += uint32(lbelow10)
+	lbelow11 -= lbelow12
+
+	m22 <<= 16
+	m23 += uint32(lbelow11)
+
+	m23 <<= 24
+	m2 += int64(m22)
+
+	m3 <<= 51
+	lbelow13 = l - 13
+
+	lbelow13 >>= 31
+	lbelow14 = l - 14
+
+	lbelow14 >>= 31
+	p += lbelow13
+	lbelow15 = l - 15
+
+	m30 = uint32(m[p+12])
+	lbelow15 >>= 31
+	p += lbelow14
+
+	m31 = uint32(m[p+13])
+	p += lbelow15
+	m2 += int64(m23)
+
+	m32 = uint32(m[p+14])
+	m30 &^= uint32(lbelow13)
+	lbelow12 -= lbelow13
+
+	m30 += uint32(lbelow12)
+	lbelow13 -= lbelow14
+
+	m3 += int64(m30)
+	m31 &^= uint32(lbelow14)
+
+	m31 += uint32(lbelow13)
+	m32 &^= uint32(lbelow15)
+
+	m31 <<= 8
+	lbelow14 -= lbelow15
+
+	m3 += int64(m31)
+	m32 += uint32(lbelow14)
+	d0 = m0
+
+	m32 <<= 16
+	m33 = uint64(lbelow15 + 1)
+	d1 = m1
+
+	m33 <<= 24
+	m3 += int64(m32)
+	d2 = m2
+
+	m3 += int64(m33)
+	d3 = m3
+
+	z3 = math.Float64frombits(uint64(d3))
+
+	z2 = math.Float64frombits(uint64(d2))
+
+	z1 = math.Float64frombits(uint64(d1))
+
+	z0 = math.Float64frombits(uint64(d0))
+
+	z3 -= alpha96
+
+	z2 -= alpha64
+
+	z1 -= alpha32
+
+	z0 -= alpha0
+
+	h5 += z3
+
+	h3 += z2
+
+	h1 += z1
+
+	h0 += z0
+
+	y7 = h7 + alpha130
+
+	y6 = h6 + alpha130
+
+	y1 = h1 + alpha32
+
+	y0 = h0 + alpha32
+
+	y7 -= alpha130
+
+	y6 -= alpha130
+
+	y1 -= alpha32
+
+	y0 -= alpha32
+
+	y5 = h5 + alpha96
+
+	y4 = h4 + alpha96
+
+	x7 = h7 - y7
+	y7 *= scale
+
+	x6 = h6 - y6
+	y6 *= scale
+
+	x1 = h1 - y1
+
+	x0 = h0 - y0
+
+	y5 -= alpha96
+
+	y4 -= alpha96
+
+	x1 += y7
+
+	x0 += y6
+
+	x7 += y5
+
+	x6 += y4
+
+	y3 = h3 + alpha64
+
+	y2 = h2 + alpha64
+
+	x0 += x1
+
+	x6 += x7
+
+	y3 -= alpha64
+	r3low = r3low_stack
+
+	y2 -= alpha64
+	r0low = r0low_stack
+
+	x5 = h5 - y5
+	r3lowx0 = r3low * x0
+	r3high = r3high_stack
+
+	x4 = h4 - y4
+	r0lowx6 = r0low * x6
+	r0high = r0high_stack
+
+	x3 = h3 - y3
+	r3highx0 = r3high * x0
+	sr1low = sr1low_stack
+
+	x2 = h2 - y2
+	r0highx6 = r0high * x6
+	sr1high = sr1high_stack
+
+	x5 += y3
+	r0lowx0 = r0low * x0
+	r1low = r1low_stack
+
+	h6 = r3lowx0 + r0lowx6
+	sr1lowx6 = sr1low * x6
+	r1high = r1high_stack
+
+	x4 += y2
+	r0highx0 = r0high * x0
+	sr2low = sr2low_stack
+
+	h7 = r3highx0 + r0highx6
+	sr1highx6 = sr1high * x6
+	sr2high = sr2high_stack
+
+	x3 += y1
+	r1lowx0 = r1low * x0
+	r2low = r2low_stack
+
+	h0 = r0lowx0 + sr1lowx6
+	sr2lowx6 = sr2low * x6
+	r2high = r2high_stack
+
+	x2 += y0
+	r1highx0 = r1high * x0
+	sr3low = sr3low_stack
+
+	h1 = r0highx0 + sr1highx6
+	sr2highx6 = sr2high * x6
+	sr3high = sr3high_stack
+
+	x4 += x5
+	r2lowx0 = r2low * x0
+
+	h2 = r1lowx0 + sr2lowx6
+	sr3lowx6 = sr3low * x6
+
+	x2 += x3
+	r2highx0 = r2high * x0
+
+	h3 = r1highx0 + sr2highx6
+	sr3highx6 = sr3high * x6
+
+	r1highx4 = r1high * x4
+
+	h4 = r2lowx0 + sr3lowx6
+	r1lowx4 = r1low * x4
+
+	r0highx4 = r0high * x4
+
+	h5 = r2highx0 + sr3highx6
+	r0lowx4 = r0low * x4
+
+	h7 += r1highx4
+	sr3highx4 = sr3high * x4
+
+	h6 += r1lowx4
+	sr3lowx4 = sr3low * x4
+
+	h5 += r0highx4
+	sr2highx4 = sr2high * x4
+
+	h4 += r0lowx4
+	sr2lowx4 = sr2low * x4
+
+	h3 += sr3highx4
+	r0lowx2 = r0low * x2
+
+	h2 += sr3lowx4
+	r0highx2 = r0high * x2
+
+	h1 += sr2highx4
+	r1lowx2 = r1low * x2
+
+	h0 += sr2lowx4
+	r1highx2 = r1high * x2
+
+	h2 += r0lowx2
+	r2lowx2 = r2low * x2
+
+	h3 += r0highx2
+	r2highx2 = r2high * x2
+
+	h4 += r1lowx2
+	sr3lowx2 = sr3low * x2
+
+	h5 += r1highx2
+	sr3highx2 = sr3high * x2
+
+	h6 += r2lowx2
+
+	h7 += r2highx2
+
+	h0 += sr3lowx2
+
+	h1 += sr3highx2
+
+nomorebytes:
+
+	y7 = h7 + alpha130
+
+	y0 = h0 + alpha32
+
+	y1 = h1 + alpha32
+
+	y2 = h2 + alpha64
+
+	y7 -= alpha130
+
+	y3 = h3 + alpha64
+
+	y4 = h4 + alpha96
+
+	y5 = h5 + alpha96
+
+	x7 = h7 - y7
+	y7 *= scale
+
+	y0 -= alpha32
+
+	y1 -= alpha32
+
+	y2 -= alpha64
+
+	h6 += x7
+
+	y3 -= alpha64
+
+	y4 -= alpha96
+
+	y5 -= alpha96
+
+	y6 = h6 + alpha130
+
+	x0 = h0 - y0
+
+	x1 = h1 - y1
+
+	x2 = h2 - y2
+
+	y6 -= alpha130
+
+	x0 += y7
+
+	x3 = h3 - y3
+
+	x4 = h4 - y4
+
+	x5 = h5 - y5
+
+	x6 = h6 - y6
+
+	y6 *= scale
+
+	x2 += y0
+
+	x3 += y1
+
+	x4 += y2
+
+	x0 += y6
+
+	x5 += y3
+
+	x6 += y4
+
+	x2 += x3
+
+	x0 += x1
+
+	x4 += x5
+
+	x6 += y5
+
+	x2 += offset1
+	d1 = int64(math.Float64bits(x2))
+
+	x0 += offset0
+	d0 = int64(math.Float64bits(x0))
+
+	x4 += offset2
+	d2 = int64(math.Float64bits(x4))
+
+	x6 += offset3
+	d3 = int64(math.Float64bits(x6))
+
+	f0 = uint64(d0)
+
+	f1 = uint64(d1)
+	bits32 = math.MaxUint64
+
+	f2 = uint64(d2)
+	bits32 >>= 32
+
+	f3 = uint64(d3)
+	f = f0 >> 32
+
+	f0 &= bits32
+	f &= 255
+
+	f1 += f
+	g0 = f0 + 5
+
+	g = g0 >> 32
+	g0 &= bits32
+
+	f = f1 >> 32
+	f1 &= bits32
+
+	f &= 255
+	g1 = f1 + g
+
+	g = g1 >> 32
+	f2 += f
+
+	f = f2 >> 32
+	g1 &= bits32
+
+	f2 &= bits32
+	f &= 255
+
+	f3 += f
+	g2 = f2 + g
+
+	g = g2 >> 32
+	g2 &= bits32
+
+	f4 = f3 >> 32
+	f3 &= bits32
+
+	f4 &= 255
+	g3 = f3 + g
+
+	g = g3 >> 32
+	g3 &= bits32
+
+	g4 = f4 + g
+
+	g4 = g4 - 4
+	s00 = uint32(s[0])
+
+	f = uint64(int64(g4) >> 63)
+	s01 = uint32(s[1])
+
+	f0 &= f
+	g0 &^= f
+	s02 = uint32(s[2])
+
+	f1 &= f
+	f0 |= g0
+	s03 = uint32(s[3])
+
+	g1 &^= f
+	f2 &= f
+	s10 = uint32(s[4])
+
+	f3 &= f
+	g2 &^= f
+	s11 = uint32(s[5])
+
+	g3 &^= f
+	f1 |= g1
+	s12 = uint32(s[6])
+
+	f2 |= g2
+	f3 |= g3
+	s13 = uint32(s[7])
+
+	s01 <<= 8
+	f0 += uint64(s00)
+	s20 = uint32(s[8])
+
+	s02 <<= 16
+	f0 += uint64(s01)
+	s21 = uint32(s[9])
+
+	s03 <<= 24
+	f0 += uint64(s02)
+	s22 = uint32(s[10])
+
+	s11 <<= 8
+	f1 += uint64(s10)
+	s23 = uint32(s[11])
+
+	s12 <<= 16
+	f1 += uint64(s11)
+	s30 = uint32(s[12])
+
+	s13 <<= 24
+	f1 += uint64(s12)
+	s31 = uint32(s[13])
+
+	f0 += uint64(s03)
+	f1 += uint64(s13)
+	s32 = uint32(s[14])
+
+	s21 <<= 8
+	f2 += uint64(s20)
+	s33 = uint32(s[15])
+
+	s22 <<= 16
+	f2 += uint64(s21)
+
+	s23 <<= 24
+	f2 += uint64(s22)
+
+	s31 <<= 8
+	f3 += uint64(s30)
+
+	s32 <<= 16
+	f3 += uint64(s31)
+
+	s33 <<= 24
+	f3 += uint64(s32)
+
+	f2 += uint64(s23)
+	f3 += uint64(s33)
+
+	out[0] = byte(f0)
+	f0 >>= 8
+	out[1] = byte(f0)
+	f0 >>= 8
+	out[2] = byte(f0)
+	f0 >>= 8
+	out[3] = byte(f0)
+	f0 >>= 8
+	f1 += f0
+
+	out[4] = byte(f1)
+	f1 >>= 8
+	out[5] = byte(f1)
+	f1 >>= 8
+	out[6] = byte(f1)
+	f1 >>= 8
+	out[7] = byte(f1)
+	f1 >>= 8
+	f2 += f1
+
+	out[8] = byte(f2)
+	f2 >>= 8
+	out[9] = byte(f2)
+	f2 >>= 8
+	out[10] = byte(f2)
+	f2 >>= 8
+	out[11] = byte(f2)
+	f2 >>= 8
+	f3 += f2
+
+	out[12] = byte(f3)
+	f3 >>= 8
+	out[13] = byte(f3)
+	f3 >>= 8
+	out[14] = byte(f3)
+	f3 >>= 8
+	out[15] = byte(f3)
+}