Commit 67957fd0 authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

idna: use code generated by internal x/text package

The API extends the old API but is intended to be backwards
compatible. The old test file serves as a test for this purpose.

I will send a CL to update the vendored mirror in core after
1.8 is out.

Change-Id: I538df20d42c213d9e038bdb0dad76b6cc132d4fd
Reviewed-on: https://go-review.googlesource.com/34770
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
Reviewed-by: 's avatarNigel Tao <nigeltao@golang.org>
parent 69d4b8aa
This diff is collapsed.
......@@ -2,6 +2,9 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file contains a few basic functional tests.
// Full tests are done in x/text/internal/export/idna.
package idna
import (
......@@ -38,6 +41,3 @@ func TestIDNA(t *testing.T) {
}
}
}
// TODO(nigeltao): test errors, once we've specified when ToASCII and ToUnicode
// return errors.
// Copyright 2012 The Go Authors. All rights reserved.
// Copied from the golang.org/x/text repo; DO NOT EDIT
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
......@@ -7,7 +9,6 @@ package idna
// This file implements the Punycode algorithm from RFC 3492.
import (
"fmt"
"math"
"strings"
"unicode/utf8"
......@@ -27,6 +28,8 @@ const (
tmin int32 = 1
)
func punyError(s string) error { return &labelError{s, "A3"} }
// decode decodes a string as specified in section 6.2.
func decode(encoded string) (string, error) {
if encoded == "" {
......@@ -34,7 +37,7 @@ func decode(encoded string) (string, error) {
}
pos := 1 + strings.LastIndex(encoded, "-")
if pos == 1 {
return "", fmt.Errorf("idna: invalid label %q", encoded)
return "", punyError(encoded)
}
if pos == len(encoded) {
return encoded[:len(encoded)-1], nil
......@@ -50,16 +53,16 @@ func decode(encoded string) (string, error) {
oldI, w := i, int32(1)
for k := base; ; k += base {
if pos == len(encoded) {
return "", fmt.Errorf("idna: invalid label %q", encoded)
return "", punyError(encoded)
}
digit, ok := decodeDigit(encoded[pos])
if !ok {
return "", fmt.Errorf("idna: invalid label %q", encoded)
return "", punyError(encoded)
}
pos++
i += digit * w
if i < 0 {
return "", fmt.Errorf("idna: invalid label %q", encoded)
return "", punyError(encoded)
}
t := k - bias
if t < tmin {
......@@ -72,7 +75,7 @@ func decode(encoded string) (string, error) {
}
w *= base - t
if w >= math.MaxInt32/base {
return "", fmt.Errorf("idna: invalid label %q", encoded)
return "", punyError(encoded)
}
}
x := int32(len(output) + 1)
......@@ -80,7 +83,7 @@ func decode(encoded string) (string, error) {
n += i / x
i %= x
if n > utf8.MaxRune || len(output) >= 1024 {
return "", fmt.Errorf("idna: invalid label %q", encoded)
return "", punyError(encoded)
}
output = append(output, 0)
copy(output[i+1:], output[i:])
......@@ -121,14 +124,14 @@ func encode(prefix, s string) (string, error) {
}
delta += (m - n) * (h + 1)
if delta < 0 {
return "", fmt.Errorf("idna: invalid label %q", s)
return "", punyError(s)
}
n = m
for _, r := range s {
if r < n {
delta++
if delta < 0 {
return "", fmt.Errorf("idna: invalid label %q", s)
return "", punyError(s)
}
continue
}
......
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna
import (
"strings"
"testing"
)
var punycodeTestCases = [...]struct {
s, encoded string
}{
{"", ""},
{"-", "--"},
{"-a", "-a-"},
{"-a-", "-a--"},
{"a", "a-"},
{"a-", "a--"},
{"a-b", "a-b-"},
{"books", "books-"},
{"bücher", "bcher-kva"},
{"Hello世界", "Hello-ck1hg65u"},
{"ü", "tda"},
{"üý", "tdac"},
// The test cases below come from RFC 3492 section 7.1 with Errata 3026.
{
// (A) Arabic (Egyptian).
"\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" +
"\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
"egbpdaj6bu4bxfgehfvwxn",
},
{
// (B) Chinese (simplified).
"\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
"ihqwcrb4cv8a8dqg056pqjye",
},
{
// (C) Chinese (traditional).
"\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
"ihqwctvzc91f659drss3x8bo0yb",
},
{
// (D) Czech.
"\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074" +
"\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D" +
"\u0065\u0073\u006B\u0079",
"Proprostnemluvesky-uyb24dma41a",
},
{
// (E) Hebrew.
"\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8" +
"\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2" +
"\u05D1\u05E8\u05D9\u05EA",
"4dbcagdahymbxekheh6e0a7fei0b",
},
{
// (F) Hindi (Devanagari).
"\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D" +
"\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939" +
"\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947" +
"\u0939\u0948\u0902",
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd",
},
{
// (G) Japanese (kanji and hiragana).
"\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092" +
"\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa",
},
{
// (H) Korean (Hangul syllables).
"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774" +
"\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74" +
"\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j" +
"psd879ccm6fea98c",
},
{
// (I) Russian (Cyrillic).
"\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E" +
"\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440" +
"\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A" +
"\u0438",
"b1abfaaepdrnnbgefbadotcwatmq2g4l",
},
{
// (J) Spanish.
"\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070" +
"\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070" +
"\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061" +
"\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070" +
"\u0061\u00F1\u006F\u006C",
"PorqunopuedensimplementehablarenEspaol-fmd56a",
},
{
// (K) Vietnamese.
"\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B" +
"\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068" +
"\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067" +
"\u0056\u0069\u1EC7\u0074",
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g",
},
{
// (L) 3<nen>B<gumi><kinpachi><sensei>.
"\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
"3B-ww4c5e180e575a65lsy2b",
},
{
// (M) <amuro><namie>-with-SUPER-MONKEYS.
"\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074" +
"\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D" +
"\u004F\u004E\u004B\u0045\u0059\u0053",
"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n",
},
{
// (N) Hello-Another-Way-<sorezore><no><basho>.
"\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F" +
"\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D" +
"\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
"Hello-Another-Way--fc4qua05auwb3674vfr0b",
},
{
// (O) <hitotsu><yane><no><shita>2.
"\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
"2-u9tlzr9756bt3uc0v",
},
{
// (P) Maji<de>Koi<suru>5<byou><mae>
"\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059" +
"\u308B\u0035\u79D2\u524D",
"MajiKoi5-783gue6qz075azm5e",
},
{
// (Q) <pafii>de<runba>
"\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
"de-jg4avhby1noc0d",
},
{
// (R) <sono><supiido><de>
"\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
"d9juau41awczczp",
},
{
// (S) -> $1.00 <-
"\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020" +
"\u003C\u002D",
"-> $1.00 <--",
},
}
func TestPunycode(t *testing.T) {
for _, tc := range punycodeTestCases {
if got, err := decode(tc.encoded); err != nil {
t.Errorf("decode(%q): %v", tc.encoded, err)
} else if got != tc.s {
t.Errorf("decode(%q): got %q, want %q", tc.encoded, got, tc.s)
}
if got, err := encode("", tc.s); err != nil {
t.Errorf(`encode("", %q): %v`, tc.s, err)
} else if got != tc.encoded {
t.Errorf(`encode("", %q): got %q, want %q`, tc.s, got, tc.encoded)
}
}
}
var punycodeErrorTestCases = [...]string{
"decode -", // A sole '-' is invalid.
"decode foo\x00bar", // '\x00' is not in [0-9A-Za-z].
"decode foo#bar", // '#' is not in [0-9A-Za-z].
"decode foo\u00A3bar", // '\u00A3' is not in [0-9A-Za-z].
"decode 9", // "9a" decodes to codepoint \u00A3; "9" is truncated.
"decode 99999a", // "99999a" decodes to codepoint \U0048A3C1, which is > \U0010FFFF.
"decode 9999999999a", // "9999999999a" overflows the int32 calculation.
"encode " + strings.Repeat("x", 65536) + "\uff00", // int32 overflow.
}
func TestPunycodeErrors(t *testing.T) {
for _, tc := range punycodeErrorTestCases {
var err error
switch {
case strings.HasPrefix(tc, "decode "):
_, err = decode(tc[7:])
case strings.HasPrefix(tc, "encode "):
_, err = encode("", tc[7:])
}
if err == nil {
if len(tc) > 256 {
tc = tc[:100] + "..." + tc[len(tc)-100:]
}
t.Errorf("no error for %s", tc)
}
}
}
This diff is collapsed.
// Copied from the golang.org/x/text repo; DO NOT EDIT
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna
// appendMapping appends the mapping for the respective rune. isMapped must be
// true. A mapping is a categorization of a rune as defined in UTS #46.
func (c info) appendMapping(b []byte, s string) []byte {
index := int(c >> indexShift)
if c&xorBit == 0 {
s := mappings[index:]
return append(b, s[1:s[0]+1]...)
}
b = append(b, s...)
if c&inlineXOR == inlineXOR {
// TODO: support and handle two-byte inline masks
b[len(b)-1] ^= byte(index)
} else {
for p := len(b) - int(xorData[index]); p < len(b); p++ {
index++
b[p] ^= xorData[index]
}
}
return b
}
// Sparse block handling code.
type valueRange struct {
value uint16 // header: value:stride
lo, hi byte // header: lo:n
}
type sparseBlocks struct {
values []valueRange
offset []uint16
}
var idnaSparse = sparseBlocks{
values: idnaSparseValues[:],
offset: idnaSparseOffset[:],
}
var trie = newIdnaTrie(0)
// lookup determines the type of block n and looks up the value for b.
// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
// is a list of ranges with an accompanying value. Given a matching range r,
// the value for b is by r.value + (b - r.lo) * stride.
func (t *sparseBlocks) lookup(n uint32, b byte) uint16 {
offset := t.offset[n]
header := t.values[offset]
lo := offset + 1
hi := lo + uint16(header.lo)
for lo < hi {
m := lo + (hi-lo)/2
r := t.values[m]
if r.lo <= b && b <= r.hi {
return r.value + uint16(b-r.lo)*header.value
}
if b < r.lo {
hi = m
} else {
lo = m + 1
}
}
return 0
}
// Copied from the golang.org/x/text repo; DO NOT EDIT
// This file was generated by go generate; DO NOT EDIT
package idna
// This file contains definitions for interpreting the trie value of the idna
// trie generated by "go run gen*.go". It is shared by both the generator
// program and the resultant package. Sharing is achieved by the generator
// copying gen_trieval.go to trieval.go and changing what's above this comment.
// info holds information from the IDNA mapping table for a single rune. It is
// the value returned by a trie lookup. In most cases, all information fits in
// a 16-bit value. For mappings, this value may contain an index into a slice
// with the mapped string. Such mappings can consist of the actual mapped value
// or an XOR pattern to be applied to the bytes of the UTF8 encoding of the
// input rune. This technique is used by the cases packages and reduces the
// table size significantly.
//
// The per-rune values have the following format:
//
// if mapped {
// if inlinedXOR {
// 15..13 inline XOR marker
// 12..11 unused
// 10..3 inline XOR mask
// } else {
// 15..3 index into xor or mapping table
// }
// } else {
// 15..13 unused
// 12 modifier (including virama)
// 11 virama modifier
// 10..8 joining type
// 7..3 category type
// }
// 2 use xor pattern
// 1..0 mapped category
//
// See the definitions below for a more detailed description of the various
// bits.
type info uint16
const (
catSmallMask = 0x3
catBigMask = 0xF8
indexShift = 3
xorBit = 0x4 // interpret the index as an xor pattern
inlineXOR = 0xE000 // These bits are set if the XOR pattern is inlined.
joinShift = 8
joinMask = 0x07
viramaModifier = 0x0800
modifier = 0x1000
)
// A category corresponds to a category defined in the IDNA mapping table.
type category uint16
const (
unknown category = 0 // not defined currently in unicode.
mapped category = 1
disallowedSTD3Mapped category = 2
deviation category = 3
)
const (
valid category = 0x08
validNV8 category = 0x18
validXV8 category = 0x28
disallowed category = 0x40
disallowedSTD3Valid category = 0x80
ignored category = 0xC0
)
// join types and additional rune information
const (
joiningL = (iota + 1)
joiningD
joiningT
joiningR
//the following types are derived during processing
joinZWJ
joinZWNJ
joinVirama
numJoinTypes
)
func (c info) isMapped() bool {
return c&0x3 != 0
}
func (c info) category() category {
small := c & catSmallMask
if small != 0 {
return category(small)
}
return category(c & catBigMask)
}
func (c info) joinType() info {
if c.isMapped() {
return 0
}
return (c >> joinShift) & joinMask
}
func (c info) isModifier() bool {
return c&(modifier|catSmallMask) == modifier
}
func (c info) isViramaModifier() bool {
return c&(viramaModifier|catSmallMask) == viramaModifier
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment