Commit 78ebe5c8 authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

idna: copy the idna package from x/text, take 2.

The version is not only API compatible, but is also meant
to mimic the original behavior of doing Punycode only
with only a minimum of additional checks.
Integrated from x/text/internal/export/idna at Git SHA1
fc7fa097411d30e6708badff276c4c164425590c.

All checks making things fundamentally stricter or checks
that require tables are now optional.

Fixes golang/go#18567
Fixes golang/go#18582

Change-Id: I08e6d7f7c276d1d84e02391e22f60c0a44a8ddad
Reviewed-on: https://go-review.googlesource.com/37050
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarBrad Fitzpatrick <bradfitz@golang.org>
parent 3e967e1d
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna_test
import (
"fmt"
"golang.org/x/net/idna"
)
func ExampleProfile() {
// Raw Punycode has no restrictions and does no mappings.
fmt.Println(idna.ToASCII(""))
fmt.Println(idna.ToASCII("*.faß.com"))
fmt.Println(idna.Punycode.ToASCII("*.faß.com"))
// Rewrite IDN for lookup. This (currently) uses transitional mappings to
// find a balance between IDNA2003 and IDNA2008 compatibility.
fmt.Println(idna.Lookup.ToASCII(""))
fmt.Println(idna.Lookup.ToASCII("www.faß.com"))
// Convert an IDN to ASCII for registration purposes. This changes the
// encoding, but reports an error if the input was illformed.
fmt.Println(idna.Registration.ToASCII(""))
fmt.Println(idna.Registration.ToASCII("www.faß.com"))
// Output:
// <nil>
// *.xn--fa-hia.com <nil>
// *.xn--fa-hia.com <nil>
// <nil>
// www.fass.com <nil>
// idna: invalid label ""
// www.xn--fa-hia.com <nil>
}
func ExampleNew() {
var p *idna.Profile
// Raw Punycode has no restrictions and does no mappings.
p = idna.New()
fmt.Println(p.ToASCII("*.faß.com"))
// Do mappings. Note that star is not allowed in a DNS lookup.
p = idna.New(
idna.MapForLookup(),
idna.Transitional(true)) // Map ß -> ss
fmt.Println(p.ToASCII("*.faß.com"))
// Set up a profile maps for lookup, but allows wild cards.
p = idna.New(
idna.MapForLookup(),
idna.Transitional(true), // Map ß -> ss
idna.StrictDomainName(false)) // Set more permissive ASCII rules.
fmt.Println(p.ToASCII("*.faß.com"))
// Output:
// *.xn--fa-hia.com <nil>
// *.fass.com idna: disallowed rune U+002E
// *.fass.com <nil>
}
This diff is collapsed.
// Copyright 2012 The Go Authors. All rights reserved. // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
...@@ -7,7 +9,6 @@ package idna ...@@ -7,7 +9,6 @@ package idna
// This file implements the Punycode algorithm from RFC 3492. // This file implements the Punycode algorithm from RFC 3492.
import ( import (
"fmt"
"math" "math"
"strings" "strings"
"unicode/utf8" "unicode/utf8"
...@@ -27,6 +28,8 @@ const ( ...@@ -27,6 +28,8 @@ const (
tmin int32 = 1 tmin int32 = 1
) )
func punyError(s string) error { return &labelError{s, "A3"} }
// decode decodes a string as specified in section 6.2. // decode decodes a string as specified in section 6.2.
func decode(encoded string) (string, error) { func decode(encoded string) (string, error) {
if encoded == "" { if encoded == "" {
...@@ -34,7 +37,7 @@ func decode(encoded string) (string, error) { ...@@ -34,7 +37,7 @@ func decode(encoded string) (string, error) {
} }
pos := 1 + strings.LastIndex(encoded, "-") pos := 1 + strings.LastIndex(encoded, "-")
if pos == 1 { if pos == 1 {
return "", fmt.Errorf("idna: invalid label %q", encoded) return "", punyError(encoded)
} }
if pos == len(encoded) { if pos == len(encoded) {
return encoded[:len(encoded)-1], nil return encoded[:len(encoded)-1], nil
...@@ -50,16 +53,16 @@ func decode(encoded string) (string, error) { ...@@ -50,16 +53,16 @@ func decode(encoded string) (string, error) {
oldI, w := i, int32(1) oldI, w := i, int32(1)
for k := base; ; k += base { for k := base; ; k += base {
if pos == len(encoded) { if pos == len(encoded) {
return "", fmt.Errorf("idna: invalid label %q", encoded) return "", punyError(encoded)
} }
digit, ok := decodeDigit(encoded[pos]) digit, ok := decodeDigit(encoded[pos])
if !ok { if !ok {
return "", fmt.Errorf("idna: invalid label %q", encoded) return "", punyError(encoded)
} }
pos++ pos++
i += digit * w i += digit * w
if i < 0 { if i < 0 {
return "", fmt.Errorf("idna: invalid label %q", encoded) return "", punyError(encoded)
} }
t := k - bias t := k - bias
if t < tmin { if t < tmin {
...@@ -72,7 +75,7 @@ func decode(encoded string) (string, error) { ...@@ -72,7 +75,7 @@ func decode(encoded string) (string, error) {
} }
w *= base - t w *= base - t
if w >= math.MaxInt32/base { if w >= math.MaxInt32/base {
return "", fmt.Errorf("idna: invalid label %q", encoded) return "", punyError(encoded)
} }
} }
x := int32(len(output) + 1) x := int32(len(output) + 1)
...@@ -80,7 +83,7 @@ func decode(encoded string) (string, error) { ...@@ -80,7 +83,7 @@ func decode(encoded string) (string, error) {
n += i / x n += i / x
i %= x i %= x
if n > utf8.MaxRune || len(output) >= 1024 { if n > utf8.MaxRune || len(output) >= 1024 {
return "", fmt.Errorf("idna: invalid label %q", encoded) return "", punyError(encoded)
} }
output = append(output, 0) output = append(output, 0)
copy(output[i+1:], output[i:]) copy(output[i+1:], output[i:])
...@@ -121,14 +124,14 @@ func encode(prefix, s string) (string, error) { ...@@ -121,14 +124,14 @@ func encode(prefix, s string) (string, error) {
} }
delta += (m - n) * (h + 1) delta += (m - n) * (h + 1)
if delta < 0 { if delta < 0 {
return "", fmt.Errorf("idna: invalid label %q", s) return "", punyError(s)
} }
n = m n = m
for _, r := range s { for _, r := range s {
if r < n { if r < n {
delta++ delta++
if delta < 0 { if delta < 0 {
return "", fmt.Errorf("idna: invalid label %q", s) return "", punyError(s)
} }
continue continue
} }
......
This diff is collapsed.
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna
// appendMapping appends the mapping for the respective rune. isMapped must be
// true. A mapping is a categorization of a rune as defined in UTS #46.
func (c info) appendMapping(b []byte, s string) []byte {
index := int(c >> indexShift)
if c&xorBit == 0 {
s := mappings[index:]
return append(b, s[1:s[0]+1]...)
}
b = append(b, s...)
if c&inlineXOR == inlineXOR {
// TODO: support and handle two-byte inline masks
b[len(b)-1] ^= byte(index)
} else {
for p := len(b) - int(xorData[index]); p < len(b); p++ {
index++
b[p] ^= xorData[index]
}
}
return b
}
// Sparse block handling code.
type valueRange struct {
value uint16 // header: value:stride
lo, hi byte // header: lo:n
}
type sparseBlocks struct {
values []valueRange
offset []uint16
}
var idnaSparse = sparseBlocks{
values: idnaSparseValues[:],
offset: idnaSparseOffset[:],
}
// Don't use newIdnaTrie to avoid unconditional linking in of the table.
var trie = &idnaTrie{}
// lookup determines the type of block n and looks up the value for b.
// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
// is a list of ranges with an accompanying value. Given a matching range r,
// the value for b is by r.value + (b - r.lo) * stride.
func (t *sparseBlocks) lookup(n uint32, b byte) uint16 {
offset := t.offset[n]
header := t.values[offset]
lo := offset + 1
hi := lo + uint16(header.lo)
for lo < hi {
m := lo + (hi-lo)/2
r := t.values[m]
if r.lo <= b && b <= r.hi {
return r.value + uint16(b-r.lo)*header.value
}
if b < r.lo {
hi = m
} else {
lo = m + 1
}
}
return 0
}
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package idna
// This file contains definitions for interpreting the trie value of the idna
// trie generated by "go run gen*.go". It is shared by both the generator
// program and the resultant package. Sharing is achieved by the generator
// copying gen_trieval.go to trieval.go and changing what's above this comment.
// info holds information from the IDNA mapping table for a single rune. It is
// the value returned by a trie lookup. In most cases, all information fits in
// a 16-bit value. For mappings, this value may contain an index into a slice
// with the mapped string. Such mappings can consist of the actual mapped value
// or an XOR pattern to be applied to the bytes of the UTF8 encoding of the
// input rune. This technique is used by the cases packages and reduces the
// table size significantly.
//
// The per-rune values have the following format:
//
// if mapped {
// if inlinedXOR {
// 15..13 inline XOR marker
// 12..11 unused
// 10..3 inline XOR mask
// } else {
// 15..3 index into xor or mapping table
// }
// } else {
// 15..13 unused
// 12 modifier (including virama)
// 11 virama modifier
// 10..8 joining type
// 7..3 category type
// }
// 2 use xor pattern
// 1..0 mapped category
//
// See the definitions below for a more detailed description of the various
// bits.
type info uint16
const (
catSmallMask = 0x3
catBigMask = 0xF8
indexShift = 3
xorBit = 0x4 // interpret the index as an xor pattern
inlineXOR = 0xE000 // These bits are set if the XOR pattern is inlined.
joinShift = 8
joinMask = 0x07
viramaModifier = 0x0800
modifier = 0x1000
)
// A category corresponds to a category defined in the IDNA mapping table.
type category uint16
const (
unknown category = 0 // not defined currently in unicode.
mapped category = 1
disallowedSTD3Mapped category = 2
deviation category = 3
)
const (
valid category = 0x08
validNV8 category = 0x18
validXV8 category = 0x28
disallowed category = 0x40
disallowedSTD3Valid category = 0x80
ignored category = 0xC0
)
// join types and additional rune information
const (
joiningL = (iota + 1)
joiningD
joiningT
joiningR
//the following types are derived during processing
joinZWJ
joinZWNJ
joinVirama
numJoinTypes
)
func (c info) isMapped() bool {
return c&0x3 != 0
}
func (c info) category() category {
small := c & catSmallMask
if small != 0 {
return category(small)
}
return category(c & catBigMask)
}
func (c info) joinType() info {
if c.isMapped() {
return 0
}
return (c >> joinShift) & joinMask
}
func (c info) isModifier() bool {
return c&(modifier|catSmallMask) == modifier
}
func (c info) isViramaModifier() bool {
return c&(viramaModifier|catSmallMask) == viramaModifier
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment