idna: Revert "use code generated by internal x/text package"

This reverts commit 67957fd0. Updates golang/go#18567 Change-Id: I4a9da509eb95949d2e3ab08763274abf6706f6f8 Reviewed-on: https://go-review.googlesource.com/35270 Run-TryBot: Nigel Tao <nigeltao@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Benoit Sigoure <tsunanet@gmail.com> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>

idna: Revert "use code generated by internal x/text package"
This reverts commit 67957fd0. Updates golang/go#18567 Change-Id: I4a9da509eb95949d2e3ab08763274abf6706f6f8 Reviewed-on: https://go-review.googlesource.com/35270 Run-TryBot: Nigel Tao <nigeltao@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Benoit Sigoure <tsunanet@gmail.com> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
f2499483 · Nigel Tao · Brad Fitzpatrick · 0ab64c59 · f2499483 · f2499483
Commit f2499483 authored Jan 14, 2017 by Nigel Tao Committed by Brad Fitzpatrick Jan 14, 2017
7 changed files
--- a/idna/idna.go
+++ b/idna/idna.go
-// Copied from the golang.org/x/text repo; DO NOT EDIT
-
-// Copyright 2016 The Go Authors. All rights reserved.
+// Copyright 2012 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// Package idna implements IDNA2008 using the compatibility processing
-// defined by UTS (Unicode Technical Standard) #46, which defines a standard to
-// deal with the transition from IDNA2003.
-//
-// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
-// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
-// UTS #46 is defined in http://www.unicode.org/reports/tr46.
-// See http://unicode.org/cldr/utility/idna.jsp for a visualization of the
-// differences between these two standards.
+// Package idna implements IDNA2008 (Internationalized Domain Names for
+// Applications), defined in RFC 5890, RFC 5891, RFC 5892, RFC 5893 and
+// RFC 5894.
 package idna // import "golang.org/x/net/idna"

 import (
-	"fmt"
 	"strings"
 	"unicode/utf8"
-
-	"golang.org/x/text/secure/bidirule"
-	"golang.org/x/text/unicode/norm"
 )

-// NOTE: Unlike common practice in Go APIs, the functions will return a
-// sanitized domain name in case of errors. Browsers sometimes use a partially
-// evaluated string as lookup.
-// TODO: the current error handling is, in my opinion, the least opinionated.
-// Other strategies are also viable, though:
-// Option 1) Return an empty string in case of error, but allow the user to
-//    specify explicitly which errors to ignore.
-// Option 2) Return the partially evaluated string if it is itself a valid
-//    string, otherwise return the empty string in case of error.
-// Option 3) Option 1 and 2.
-// Option 4) Always return an empty string for now and implement Option 1 as
-//    needed, and document that the return string may not be empty in case of
-//    error in the future.
-// I think Option 1 is best, but it is quite opinionated.
+// TODO(nigeltao): specify when errors occur. For example, is ToASCII(".") or
+// ToASCII("foo\x00") an error? See also http://www.unicode.org/faq/idn.html#11
+
+// acePrefix is the ASCII Compatible Encoding prefix.
+const acePrefix = "xn--"

 // ToASCII converts a domain or domain label to its ASCII form. For example,
 // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
-// ToASCII("golang") is "golang". If an error is encountered it will return
-// an error and a (partially) processed result.
+// ToASCII("golang") is "golang".
 func ToASCII(s string) (string, error) {
-	return Resolve.process(s, true)
-}
-
-// ToUnicode converts a domain or domain label to its Unicode form. For example,
-// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
-// ToUnicode("golang") is "golang". If an error is encountered it will return
-// an error and a (partially) processed result.
-func ToUnicode(s string) (string, error) {
-	return NonTransitional.process(s, false)
-}
-
-// An Option configures a Profile at creation time.
-type Option func(*options)
-
-// Transitional sets a Profile to use the Transitional mapping as defined
-// in UTS #46.
-func Transitional(transitional bool) Option {
-	return func(o *options) { o.transitional = true }
-}
-
-// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
-// are longer than allowed by the RFC.
-func VerifyDNSLength(verify bool) Option {
-	return func(o *options) { o.verifyDNSLength = verify }
-}
-
-// IgnoreSTD3Rules sets whether ASCII characters outside the A-Z, a-z, 0-9 and
-// the hyphen should be allowed. By default this is not allowed, but IDNA2003,
-// and as a consequence UTS #46, allows this to be overridden to support
-// browsers that allow characters outside this range, for example a '_' (U+005F
-// LOW LINE). See http://www.rfc- editor.org/std/std3.txt for more details.
-func IgnoreSTD3Rules(ignore bool) Option {
-	return func(o *options) { o.ignoreSTD3Rules = ignore }
-}
-
-type options struct {
-	transitional    bool
-	ignoreSTD3Rules bool
-	verifyDNSLength bool
-}
-
-// A Profile defines the configuration of a IDNA mapper.
-type Profile struct {
-	options
-}
-
-func apply(o *options, opts []Option) {
-	for _, f := range opts {
-		f(o)
+	if ascii(s) {
+		return s, nil
+	}
+	labels := strings.Split(s, ".")
+	for i, label := range labels {
+		if !ascii(label) {
+			a, err := encode(acePrefix, label)
+			if err != nil {
+				return "", err
+			}
+			labels[i] = a
+		}
 	}
-}
-
-// New creates a new Profile.
-// With no options, the returned profile is the non-transitional profile as
-// defined in UTS #46.
-func New(o ...Option) *Profile {
-	p := &Profile{}
-	apply(&p.options, o)
-	return p
-}
-
-// ToASCII converts a domain or domain label to its ASCII form. For example,
-// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
-// ToASCII("golang") is "golang". If an error is encountered it will return
-// an error and a (partially) processed result.
-func (p *Profile) ToASCII(s string) (string, error) {
-	return p.process(s, true)
+	return strings.Join(labels, "."), nil
 }

 // ToUnicode converts a domain or domain label to its Unicode form. For example,
 // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
-// ToUnicode("golang") is "golang". If an error is encountered it will return
-// an error and a (partially) processed result.
-func (p *Profile) ToUnicode(s string) (string, error) {
-	pp := *p
-	pp.transitional = false
-	return pp.process(s, false)
-}
-
-// String reports a string with a description of the profile for debugging
-// purposes. The string format may change with different versions.
-func (p *Profile) String() string {
-	s := ""
-	if p.transitional {
-		s = "Transitional"
-	} else {
-		s = "NonTransitional"
-	}
-	if p.ignoreSTD3Rules {
-		s += ":NoSTD3Rules"
-	}
-	return s
-}
-
-var (
-	// Resolve is the recommended profile for resolving domain names.
-	// The configuration of this profile may change over time.
-	Resolve = resolve
-
-	// Display is the recommended profile for displaying domain names.
-	// The configuration of this profile may change over time.
-	Display = display
-
-	// NonTransitional defines a profile that implements the Transitional
-	// mapping as defined in UTS #46 with no additional constraints.
-	NonTransitional = nonTransitional
-
-	resolve         = &Profile{options{transitional: true}}
-	display         = &Profile{}
-	nonTransitional = &Profile{}
-
-	// TODO: profiles
-	// V2008: strict IDNA2008
-	// Register: recommended for approving domain names: nontransitional, but
-	// bundle or block deviation characters.
-)
-
-type labelError struct{ label, code_ string }
-
-func (e labelError) code() string { return e.code_ }
-func (e labelError) Error() string {
-	return fmt.Sprintf("idna: invalid label %q", e.label)
-}
-
-type runeError rune
-
-func (e runeError) code() string { return "P1" }
-func (e runeError) Error() string {
-	return fmt.Sprintf("idna: disallowed rune %U", e)
-}
-
-// process implements the algorithm described in section 4 of UTS #46,
-// see http://www.unicode.org/reports/tr46.
-func (p *Profile) process(s string, toASCII bool) (string, error) {
-	var (
-		b    []byte
-		err  error
-		k, i int
-	)
-	for i < len(s) {
-		v, sz := trie.lookupString(s[i:])
-		start := i
-		i += sz
-		// Copy bytes not copied so far.
-		switch p.simplify(info(v).category()) {
-		case valid:
-			continue
-		case disallowed:
-			if err == nil {
-				r, _ := utf8.DecodeRuneInString(s[i:])
-				err = runeError(r)
-			}
-			continue
-		case mapped, deviation:
-			b = append(b, s[k:start]...)
-			b = info(v).appendMapping(b, s[start:i])
-		case ignored:
-			b = append(b, s[k:start]...)
-			// drop the rune
-		case unknown:
-			b = append(b, s[k:start]...)
-			b = append(b, "\ufffd"...)
-		}
-		k = i
-	}
-	if k == 0 {
-		// No changes so far.
-		s = norm.NFC.String(s)
-	} else {
-		b = append(b, s[k:]...)
-		if norm.NFC.QuickSpan(b) != len(b) {
-			b = norm.NFC.Bytes(b)
-		}
-		// TODO: the punycode converters require strings as input.
-		s = string(b)
-	}
-	// Remove leading empty labels
-	for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
-	}
-	if s == "" {
-		return "", &labelError{s, "A4"}
+// ToUnicode("golang") is "golang".
+func ToUnicode(s string) (string, error) {
+	if !strings.Contains(s, acePrefix) {
+		return s, nil
 	}
-	labels := labelIter{orig: s}
-	for ; !labels.done(); labels.next() {
-		label := labels.label()
-		if label == "" {
-			// Empty labels are not okay. The label iterator skips the last
-			// label if it is empty.
-			if err == nil {
-				err = &labelError{s, "A4"}
-			}
-			continue
-		}
+	labels := strings.Split(s, ".")
+	for i, label := range labels {
 		if strings.HasPrefix(label, acePrefix) {
-			u, err2 := decode(label[len(acePrefix):])
-			if err2 != nil {
-				if err == nil {
-					err = err2
-				}
-				// Spec says keep the old label.
-				continue
+			u, err := decode(label[len(acePrefix):])
+			if err != nil {
+				return "", err
 			}
-			labels.set(u)
-			if err == nil {
-				err = p.validateFromPunycode(u)
-			}
-			if err == nil {
-				err = NonTransitional.validate(u)
-			}
-		} else if err == nil {
-			err = p.validate(label)
+			labels[i] = u
 		}
 	}
-	if toASCII {
-		for labels.reset(); !labels.done(); labels.next() {
-			label := labels.label()
-			if !ascii(label) {
-				a, err2 := encode(acePrefix, label)
-				if err == nil {
-					err = err2
-				}
-				label = a
-				labels.set(a)
-			}
-			n := len(label)
-			if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
-				err = &labelError{label, "A4"}
-			}
-		}
-	}
-	s = labels.result()
-	if toASCII && p.verifyDNSLength && err == nil {
-		// Compute the length of the domain name minus the root label and its dot.
-		n := len(s)
-		if n > 0 && s[n-1] == '.' {
-			n--
-		}
-		if len(s) < 1 || n > 253 {
-			err = &labelError{s, "A4"}
-		}
-	}
-	return s, err
-}
-
-// A labelIter allows iterating over domain name labels.
-type labelIter struct {
-	orig     string
-	slice    []string
-	curStart int
-	curEnd   int
-	i        int
-}
-
-func (l *labelIter) reset() {
-	l.curStart = 0
-	l.curEnd = 0
-	l.i = 0
-}
-
-func (l *labelIter) done() bool {
-	return l.curStart >= len(l.orig)
-}
-
-func (l *labelIter) result() string {
-	if l.slice != nil {
-		return strings.Join(l.slice, ".")
-	}
-	return l.orig
-}
-
-func (l *labelIter) label() string {
-	if l.slice != nil {
-		return l.slice[l.i]
-	}
-	p := strings.IndexByte(l.orig[l.curStart:], '.')
-	l.curEnd = l.curStart + p
-	if p == -1 {
-		l.curEnd = len(l.orig)
-	}
-	return l.orig[l.curStart:l.curEnd]
-}
-
-// next sets the value to the next label. It skips the last label if it is empty.
-func (l *labelIter) next() {
-	l.i++
-	if l.slice != nil {
-		if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
-			l.curStart = len(l.orig)
-		}
-	} else {
-		l.curStart = l.curEnd + 1
-		if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
-			l.curStart = len(l.orig)
-		}
-	}
-}
-
-func (l *labelIter) set(s string) {
-	if l.slice == nil {
-		l.slice = strings.Split(l.orig, ".")
-	}
-	l.slice[l.i] = s
-}
-
-// acePrefix is the ASCII Compatible Encoding prefix.
-const acePrefix = "xn--"
-
-func (p *Profile) simplify(cat category) category {
-	switch cat {
-	case disallowedSTD3Mapped:
-		if !p.ignoreSTD3Rules {
-			cat = disallowed
-		} else {
-			cat = mapped
-		}
-	case disallowedSTD3Valid:
-		if !p.ignoreSTD3Rules {
-			cat = disallowed
-		} else {
-			cat = valid
-		}
-	case deviation:
-		if !p.transitional {
-			cat = valid
-		}
-	case validNV8, validXV8:
-		// TODO: handle V2008
-		cat = valid
-	}
-	return cat
-}
-
-func (p *Profile) validateFromPunycode(s string) error {
-	if !norm.NFC.IsNormalString(s) {
-		return &labelError{s, "V1"}
-	}
-	for i := 0; i < len(s); {
-		v, sz := trie.lookupString(s[i:])
-		if c := p.simplify(info(v).category()); c != valid && c != deviation {
-			return &labelError{s, "V6"}
-		}
-		i += sz
-	}
-	return nil
-}
-
-const (
-	zwnj = "\u200c"
-	zwj  = "\u200d"
-)
-
-type joinState int8
-
-const (
-	stateStart joinState = iota
-	stateVirama
-	stateBefore
-	stateBeforeVirama
-	stateAfter
-	stateFAIL
-)
-
-var joinStates = [][numJoinTypes]joinState{
-	stateStart: {
-		joiningL:   stateBefore,
-		joiningD:   stateBefore,
-		joinZWNJ:   stateFAIL,
-		joinZWJ:    stateFAIL,
-		joinVirama: stateVirama,
-	},
-	stateVirama: {
-		joiningL: stateBefore,
-		joiningD: stateBefore,
-	},
-	stateBefore: {
-		joiningL:   stateBefore,
-		joiningD:   stateBefore,
-		joiningT:   stateBefore,
-		joinZWNJ:   stateAfter,
-		joinZWJ:    stateFAIL,
-		joinVirama: stateBeforeVirama,
-	},
-	stateBeforeVirama: {
-		joiningL: stateBefore,
-		joiningD: stateBefore,
-		joiningT: stateBefore,
-	},
-	stateAfter: {
-		joiningL:   stateFAIL,
-		joiningD:   stateBefore,
-		joiningT:   stateAfter,
-		joiningR:   stateStart,
-		joinZWNJ:   stateFAIL,
-		joinZWJ:    stateFAIL,
-		joinVirama: stateAfter, // no-op as we can't accept joiners here
-	},
-	stateFAIL: {
-		0:          stateFAIL,
-		joiningL:   stateFAIL,
-		joiningD:   stateFAIL,
-		joiningT:   stateFAIL,
-		joiningR:   stateFAIL,
-		joinZWNJ:   stateFAIL,
-		joinZWJ:    stateFAIL,
-		joinVirama: stateFAIL,
-	},
-}
-
-// validate validates the criteria from Section 4.1. Item 1, 4, and 6 are
-// already implicitly satisfied by the overall implementation.
-func (p *Profile) validate(s string) error {
-	if len(s) > 4 && s[2] == '-' && s[3] == '-' {
-		return &labelError{s, "V2"}
-	}
-	if s[0] == '-' || s[len(s)-1] == '-' {
-		return &labelError{s, "V3"}
-	}
-	// TODO: merge the use of this in the trie.
-	v, sz := trie.lookupString(s)
-	x := info(v)
-	if x.isModifier() {
-		return &labelError{s, "V5"}
-	}
-	if !bidirule.ValidString(s) {
-		return &labelError{s, "B"}
-	}
-	// Quickly return in the absence of zero-width (non) joiners.
-	if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
-		return nil
-	}
-	st := stateStart
-	for i := 0; ; {
-		jt := x.joinType()
-		if s[i:i+sz] == zwj {
-			jt = joinZWJ
-		} else if s[i:i+sz] == zwnj {
-			jt = joinZWNJ
-		}
-		st = joinStates[st][jt]
-		if x.isViramaModifier() {
-			st = joinStates[st][joinVirama]
-		}
-		if i += sz; i == len(s) {
-			break
-		}
-		v, sz = trie.lookupString(s[i:])
-		x = info(v)
-	}
-	if st == stateFAIL || st == stateAfter {
-		return &labelError{s, "C"}
-	}
-	return nil
+	return strings.Join(labels, "."), nil
 }

 func ascii(s string) bool {

--- a/idna/idna_test.go
+++ b/idna/idna_test.go
@@ -2,9 +2,6 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// This file contains a few basic functional tests.
-// Full tests are done in x/text/internal/export/idna.
-
 package idna

 import (
@@ -41,3 +38,6 @@ func TestIDNA(t *testing.T) {
 		}
 	}
 }
+
+// TODO(nigeltao): test errors, once we've specified when ToASCII and ToUnicode
+// return errors.
--- a/idna/punycode.go
+++ b/idna/punycode.go
-// Copied from the golang.org/x/text repo; DO NOT EDIT
-
-// Copyright 2016 The Go Authors. All rights reserved.
+// Copyright 2012 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

@@ -9,6 +7,7 @@ package idna
 // This file implements the Punycode algorithm from RFC 3492.

 import (
+	"fmt"
 	"math"
 	"strings"
 	"unicode/utf8"
@@ -28,8 +27,6 @@ const (
 	tmin        int32 = 1
 )

-func punyError(s string) error { return &labelError{s, "A3"} }
-
 // decode decodes a string as specified in section 6.2.
 func decode(encoded string) (string, error) {
 	if encoded == "" {
@@ -37,7 +34,7 @@ func decode(encoded string) (string, error) {
 	}
 	pos := 1 + strings.LastIndex(encoded, "-")
 	if pos == 1 {
-		return "", punyError(encoded)
+		return "", fmt.Errorf("idna: invalid label %q", encoded)
 	}
 	if pos == len(encoded) {
 		return encoded[:len(encoded)-1], nil
@@ -53,16 +50,16 @@ func decode(encoded string) (string, error) {
 		oldI, w := i, int32(1)
 		for k := base; ; k += base {
 			if pos == len(encoded) {
-				return "", punyError(encoded)
+				return "", fmt.Errorf("idna: invalid label %q", encoded)
 			}
 			digit, ok := decodeDigit(encoded[pos])
 			if !ok {
-				return "", punyError(encoded)
+				return "", fmt.Errorf("idna: invalid label %q", encoded)
 			}
 			pos++
 			i += digit * w
 			if i < 0 {
-				return "", punyError(encoded)
+				return "", fmt.Errorf("idna: invalid label %q", encoded)
 			}
 			t := k - bias
 			if t < tmin {
@@ -75,7 +72,7 @@ func decode(encoded string) (string, error) {
 			}
 			w *= base - t
 			if w >= math.MaxInt32/base {
-				return "", punyError(encoded)
+				return "", fmt.Errorf("idna: invalid label %q", encoded)
 			}
 		}
 		x := int32(len(output) + 1)
@@ -83,7 +80,7 @@ func decode(encoded string) (string, error) {
 		n += i / x
 		i %= x
 		if n > utf8.MaxRune || len(output) >= 1024 {
-			return "", punyError(encoded)
+			return "", fmt.Errorf("idna: invalid label %q", encoded)
 		}
 		output = append(output, 0)
 		copy(output[i+1:], output[i:])
@@ -124,14 +121,14 @@ func encode(prefix, s string) (string, error) {
 		}
 		delta += (m - n) * (h + 1)
 		if delta < 0 {
-			return "", punyError(s)
+			return "", fmt.Errorf("idna: invalid label %q", s)
 		}
 		n = m
 		for _, r := range s {
 			if r < n {
 				delta++
 				if delta < 0 {
-					return "", punyError(s)
+					return "", fmt.Errorf("idna: invalid label %q", s)
 				}
 				continue
 			}

--- a/idna/punycode_test.go
+++ b/idna/punycode_test.go
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package idna
+
+import (
+	"strings"
+	"testing"
+)
+
+var punycodeTestCases = [...]struct {
+	s, encoded string
+}{
+	{"", ""},
+	{"-", "--"},
+	{"-a", "-a-"},
+	{"-a-", "-a--"},
+	{"a", "a-"},
+	{"a-", "a--"},
+	{"a-b", "a-b-"},
+	{"books", "books-"},
+	{"bücher", "bcher-kva"},
+	{"Hello世界", "Hello-ck1hg65u"},
+	{"ü", "tda"},
+	{"üý", "tdac"},
+
+	// The test cases below come from RFC 3492 section 7.1 with Errata 3026.
+	{
+		// (A) Arabic (Egyptian).
+		"\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" +
+			"\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
+		"egbpdaj6bu4bxfgehfvwxn",
+	},
+	{
+		// (B) Chinese (simplified).
+		"\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
+		"ihqwcrb4cv8a8dqg056pqjye",
+	},
+	{
+		// (C) Chinese (traditional).
+		"\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
+		"ihqwctvzc91f659drss3x8bo0yb",
+	},
+	{
+		// (D) Czech.
+		"\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074" +
+			"\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D" +
+			"\u0065\u0073\u006B\u0079",
+		"Proprostnemluvesky-uyb24dma41a",
+	},
+	{
+		// (E) Hebrew.
+		"\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8" +
+			"\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2" +
+			"\u05D1\u05E8\u05D9\u05EA",
+		"4dbcagdahymbxekheh6e0a7fei0b",
+	},
+	{
+		// (F) Hindi (Devanagari).
+		"\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D" +
+			"\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939" +
+			"\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947" +
+			"\u0939\u0948\u0902",
+		"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd",
+	},
+	{
+		// (G) Japanese (kanji and hiragana).
+		"\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092" +
+			"\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
+		"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa",
+	},
+	{
+		// (H) Korean (Hangul syllables).
+		"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774" +
+			"\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74" +
+			"\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
+		"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j" +
+			"psd879ccm6fea98c",
+	},
+	{
+		// (I) Russian (Cyrillic).
+		"\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E" +
+			"\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440" +
+			"\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A" +
+			"\u0438",
+		"b1abfaaepdrnnbgefbadotcwatmq2g4l",
+	},
+	{
+		// (J) Spanish.
+		"\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070" +
+			"\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070" +
+			"\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061" +
+			"\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070" +
+			"\u0061\u00F1\u006F\u006C",
+		"PorqunopuedensimplementehablarenEspaol-fmd56a",
+	},
+	{
+		// (K) Vietnamese.
+		"\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B" +
+			"\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068" +
+			"\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067" +
+			"\u0056\u0069\u1EC7\u0074",
+		"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g",
+	},
+	{
+		// (L) 3<nen>B<gumi><kinpachi><sensei>.
+		"\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
+		"3B-ww4c5e180e575a65lsy2b",
+	},
+	{
+		// (M) <amuro><namie>-with-SUPER-MONKEYS.
+		"\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074" +
+			"\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D" +
+			"\u004F\u004E\u004B\u0045\u0059\u0053",
+		"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n",
+	},
+	{
+		// (N) Hello-Another-Way-<sorezore><no><basho>.
+		"\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F" +
+			"\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D" +
+			"\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
+		"Hello-Another-Way--fc4qua05auwb3674vfr0b",
+	},
+	{
+		// (O) <hitotsu><yane><no><shita>2.
+		"\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
+		"2-u9tlzr9756bt3uc0v",
+	},
+	{
+		// (P) Maji<de>Koi<suru>5<byou><mae>
+		"\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059" +
+			"\u308B\u0035\u79D2\u524D",
+		"MajiKoi5-783gue6qz075azm5e",
+	},
+	{
+		// (Q) <pafii>de<runba>
+		"\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
+		"de-jg4avhby1noc0d",
+	},
+	{
+		// (R) <sono><supiido><de>
+		"\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
+		"d9juau41awczczp",
+	},
+	{
+		// (S) -> $1.00 <-
+		"\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020" +
+			"\u003C\u002D",
+		"-> $1.00 <--",
+	},
+}
+
+func TestPunycode(t *testing.T) {
+	for _, tc := range punycodeTestCases {
+		if got, err := decode(tc.encoded); err != nil {
+			t.Errorf("decode(%q): %v", tc.encoded, err)
+		} else if got != tc.s {
+			t.Errorf("decode(%q): got %q, want %q", tc.encoded, got, tc.s)
+		}
+
+		if got, err := encode("", tc.s); err != nil {
+			t.Errorf(`encode("", %q): %v`, tc.s, err)
+		} else if got != tc.encoded {
+			t.Errorf(`encode("", %q): got %q, want %q`, tc.s, got, tc.encoded)
+		}
+	}
+}
+
+var punycodeErrorTestCases = [...]string{
+	"decode -",            // A sole '-' is invalid.
+	"decode foo\x00bar",   // '\x00' is not in [0-9A-Za-z].
+	"decode foo#bar",      // '#' is not in [0-9A-Za-z].
+	"decode foo\u00A3bar", // '\u00A3' is not in [0-9A-Za-z].
+	"decode 9",            // "9a" decodes to codepoint \u00A3; "9" is truncated.
+	"decode 99999a",       // "99999a" decodes to codepoint \U0048A3C1, which is > \U0010FFFF.
+	"decode 9999999999a",  // "9999999999a" overflows the int32 calculation.
+
+	"encode " + strings.Repeat("x", 65536) + "\uff00", // int32 overflow.
+}
+
+func TestPunycodeErrors(t *testing.T) {
+	for _, tc := range punycodeErrorTestCases {
+		var err error
+		switch {
+		case strings.HasPrefix(tc, "decode "):
+			_, err = decode(tc[7:])
+		case strings.HasPrefix(tc, "encode "):
+			_, err = encode("", tc[7:])
+		}
+		if err == nil {
+			if len(tc) > 256 {
+				tc = tc[:100] + "..." + tc[len(tc)-100:]
+			}
+			t.Errorf("no error for %s", tc)
+		}
+	}
+}
--- a/idna/tables.go
+++ b/idna/tables.go
--- a/idna/trie.go
+++ b/idna/trie.go
-// Copied from the golang.org/x/text repo; DO NOT EDIT
-
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package idna
-
-// appendMapping appends the mapping for the respective rune. isMapped must be
-// true. A mapping is a categorization of a rune as defined in UTS #46.
-func (c info) appendMapping(b []byte, s string) []byte {
-	index := int(c >> indexShift)
-	if c&xorBit == 0 {
-		s := mappings[index:]
-		return append(b, s[1:s[0]+1]...)
-	}
-	b = append(b, s...)
-	if c&inlineXOR == inlineXOR {
-		// TODO: support and handle two-byte inline masks
-		b[len(b)-1] ^= byte(index)
-	} else {
-		for p := len(b) - int(xorData[index]); p < len(b); p++ {
-			index++
-			b[p] ^= xorData[index]
-		}
-	}
-	return b
-}
-
-// Sparse block handling code.
-
-type valueRange struct {
-	value  uint16 // header: value:stride
-	lo, hi byte   // header: lo:n
-}
-
-type sparseBlocks struct {
-	values []valueRange
-	offset []uint16
-}
-
-var idnaSparse = sparseBlocks{
-	values: idnaSparseValues[:],
-	offset: idnaSparseOffset[:],
-}
-
-var trie = newIdnaTrie(0)
-
-// lookup determines the type of block n and looks up the value for b.
-// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
-// is a list of ranges with an accompanying value. Given a matching range r,
-// the value for b is by r.value + (b - r.lo) * stride.
-func (t *sparseBlocks) lookup(n uint32, b byte) uint16 {
-	offset := t.offset[n]
-	header := t.values[offset]
-	lo := offset + 1
-	hi := lo + uint16(header.lo)
-	for lo < hi {
-		m := lo + (hi-lo)/2
-		r := t.values[m]
-		if r.lo <= b && b <= r.hi {
-			return r.value + uint16(b-r.lo)*header.value
-		}
-		if b < r.lo {
-			hi = m
-		} else {
-			lo = m + 1
-		}
-	}
-	return 0
-}
--- a/idna/trieval.go
+++ b/idna/trieval.go
-// Copied from the golang.org/x/text repo; DO NOT EDIT
-
-// This file was generated by go generate; DO NOT EDIT
-
-package idna
-
-// This file contains definitions for interpreting the trie value of the idna
-// trie generated by "go run gen*.go". It is shared by both the generator
-// program and the resultant package. Sharing is achieved by the generator
-// copying gen_trieval.go to trieval.go and changing what's above this comment.
-
-// info holds information from the IDNA mapping table for a single rune. It is
-// the value returned by a trie lookup. In most cases, all information fits in
-// a 16-bit value. For mappings, this value may contain an index into a slice
-// with the mapped string. Such mappings can consist of the actual mapped value
-// or an XOR pattern to be applied to the bytes of the UTF8 encoding of the
-// input rune. This technique is used by the cases packages and reduces the
-// table size significantly.
-//
-// The per-rune values have the following format:
-//
-//   if mapped {
-//     if inlinedXOR {
-//       15..13 inline XOR marker
-//       12..11 unused
-//       10..3  inline XOR mask
-//     } else {
-//       15..3  index into xor or mapping table
-//     }
-//   } else {
-//       15..13 unused
-//           12 modifier (including virama)
-//           11 virama modifier
-//       10..8  joining type
-//        7..3  category type
-//   }
-//      2  use xor pattern
-//   1..0  mapped category
-//
-// See the definitions below for a more detailed description of the various
-// bits.
-type info uint16
-
-const (
-	catSmallMask = 0x3
-	catBigMask   = 0xF8
-	indexShift   = 3
-	xorBit       = 0x4    // interpret the index as an xor pattern
-	inlineXOR    = 0xE000 // These bits are set if the XOR pattern is inlined.
-
-	joinShift = 8
-	joinMask  = 0x07
-
-	viramaModifier = 0x0800
-	modifier       = 0x1000
-)
-
-// A category corresponds to a category defined in the IDNA mapping table.
-type category uint16
-
-const (
-	unknown              category = 0 // not defined currently in unicode.
-	mapped               category = 1
-	disallowedSTD3Mapped category = 2
-	deviation            category = 3
-)
-
-const (
-	valid               category = 0x08
-	validNV8            category = 0x18
-	validXV8            category = 0x28
-	disallowed          category = 0x40
-	disallowedSTD3Valid category = 0x80
-	ignored             category = 0xC0
-)
-
-// join types and additional rune information
-const (
-	joiningL = (iota + 1)
-	joiningD
-	joiningT
-	joiningR
-
-	//the following types are derived during processing
-	joinZWJ
-	joinZWNJ
-	joinVirama
-	numJoinTypes
-)
-
-func (c info) isMapped() bool {
-	return c&0x3 != 0
-}
-
-func (c info) category() category {
-	small := c & catSmallMask
-	if small != 0 {
-		return category(small)
-	}
-	return category(c & catBigMask)
-}
-
-func (c info) joinType() info {
-	if c.isMapped() {
-		return 0
-	}
-	return (c >> joinShift) & joinMask
-}
-
-func (c info) isModifier() bool {
-	return c&(modifier|catSmallMask) == modifier
-}
-
-func (c info) isViramaModifier() bool {
-	return c&(viramaModifier|catSmallMask) == viramaModifier
-}