Commit d997483c authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

idna: update from x/text

Fixes golang/go#19821

Change-Id: Iee5d43dfeaaf580c39ca38a7a3a74cf2a8b347b3
Reviewed-on: https://go-review.googlesource.com/44381
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarNigel Tao <nigeltao@golang.org>
parent 5f8847ae
...@@ -51,6 +51,10 @@ func ExampleNew() { ...@@ -51,6 +51,10 @@ func ExampleNew() {
idna.Transitional(true)) // Map ß -> ss idna.Transitional(true)) // Map ß -> ss
fmt.Println(p.ToASCII("*.faß.com")) fmt.Println(p.ToASCII("*.faß.com"))
// Lookup for registration. Also does not allow '*'.
p = idna.New(idna.ValidateForRegistration())
fmt.Println(p.ToUnicode("*.faß.com"))
// Set up a profile maps for lookup, but allows wild cards. // Set up a profile maps for lookup, but allows wild cards.
p = idna.New( p = idna.New(
idna.MapForLookup(), idna.MapForLookup(),
...@@ -60,6 +64,7 @@ func ExampleNew() { ...@@ -60,6 +64,7 @@ func ExampleNew() {
// Output: // Output:
// *.xn--fa-hia.com <nil> // *.xn--fa-hia.com <nil>
// *.fass.com idna: disallowed rune U+002E // *.fass.com idna: disallowed rune U+002A
// *.faß.com idna: disallowed rune U+002A
// *.fass.com <nil> // *.fass.com <nil>
} }
...@@ -67,6 +67,15 @@ func VerifyDNSLength(verify bool) Option { ...@@ -67,6 +67,15 @@ func VerifyDNSLength(verify bool) Option {
return func(o *options) { o.verifyDNSLength = verify } return func(o *options) { o.verifyDNSLength = verify }
} }
// RemoveLeadingDots removes leading label separators. Leading runes that map to
// dots, such as U+3002, are removed as well.
//
// This is the behavior suggested by the UTS #46 and is adopted by some
// browsers.
func RemoveLeadingDots(remove bool) Option {
return func(o *options) { o.removeLeadingDots = remove }
}
// ValidateLabels sets whether to check the mandatory label validation criteria // ValidateLabels sets whether to check the mandatory label validation criteria
// as defined in Section 5.4 of RFC 5891. This includes testing for correct use // as defined in Section 5.4 of RFC 5891. This includes testing for correct use
// of hyphens ('-'), normalization, validity of runes, and the context rules. // of hyphens ('-'), normalization, validity of runes, and the context rules.
...@@ -133,14 +142,16 @@ func MapForLookup() Option { ...@@ -133,14 +142,16 @@ func MapForLookup() Option {
o.mapping = validateAndMap o.mapping = validateAndMap
StrictDomainName(true)(o) StrictDomainName(true)(o)
ValidateLabels(true)(o) ValidateLabels(true)(o)
RemoveLeadingDots(true)(o)
} }
} }
type options struct { type options struct {
transitional bool transitional bool
useSTD3Rules bool useSTD3Rules bool
validateLabels bool validateLabels bool
verifyDNSLength bool verifyDNSLength bool
removeLeadingDots bool
trie *idnaTrie trie *idnaTrie
...@@ -240,21 +251,23 @@ var ( ...@@ -240,21 +251,23 @@ var (
punycode = &Profile{} punycode = &Profile{}
lookup = &Profile{options{ lookup = &Profile{options{
transitional: true, transitional: true,
useSTD3Rules: true, useSTD3Rules: true,
validateLabels: true, validateLabels: true,
trie: trie, removeLeadingDots: true,
fromPuny: validateFromPunycode, trie: trie,
mapping: validateAndMap, fromPuny: validateFromPunycode,
bidirule: bidirule.ValidString, mapping: validateAndMap,
bidirule: bidirule.ValidString,
}} }}
display = &Profile{options{ display = &Profile{options{
useSTD3Rules: true, useSTD3Rules: true,
validateLabels: true, validateLabels: true,
trie: trie, removeLeadingDots: true,
fromPuny: validateFromPunycode, trie: trie,
mapping: validateAndMap, fromPuny: validateFromPunycode,
bidirule: bidirule.ValidString, mapping: validateAndMap,
bidirule: bidirule.ValidString,
}} }}
registration = &Profile{options{ registration = &Profile{options{
useSTD3Rules: true, useSTD3Rules: true,
...@@ -293,7 +306,9 @@ func (p *Profile) process(s string, toASCII bool) (string, error) { ...@@ -293,7 +306,9 @@ func (p *Profile) process(s string, toASCII bool) (string, error) {
s, err = p.mapping(p, s) s, err = p.mapping(p, s)
} }
// Remove leading empty labels. // Remove leading empty labels.
for ; len(s) > 0 && s[0] == '.'; s = s[1:] { if p.removeLeadingDots {
for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
}
} }
// It seems like we should only create this error on ToASCII, but the // It seems like we should only create this error on ToASCII, but the
// UTS 46 conformance tests suggests we should always check this. // UTS 46 conformance tests suggests we should always check this.
...@@ -373,23 +388,20 @@ func validateRegistration(p *Profile, s string) (string, error) { ...@@ -373,23 +388,20 @@ func validateRegistration(p *Profile, s string) (string, error) {
if !norm.NFC.IsNormalString(s) { if !norm.NFC.IsNormalString(s) {
return s, &labelError{s, "V1"} return s, &labelError{s, "V1"}
} }
var err error
for i := 0; i < len(s); { for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:]) v, sz := trie.lookupString(s[i:])
i += sz
// Copy bytes not copied so far. // Copy bytes not copied so far.
switch p.simplify(info(v).category()) { switch p.simplify(info(v).category()) {
// TODO: handle the NV8 defined in the Unicode idna data set to allow // TODO: handle the NV8 defined in the Unicode idna data set to allow
// for strict conformance to IDNA2008. // for strict conformance to IDNA2008.
case valid, deviation: case valid, deviation:
case disallowed, mapped, unknown, ignored: case disallowed, mapped, unknown, ignored:
if err == nil { r, _ := utf8.DecodeRuneInString(s[i:])
r, _ := utf8.DecodeRuneInString(s[i:]) return s, runeError(r)
err = runeError(r)
}
} }
i += sz
} }
return s, err return s, nil
} }
func validateAndMap(p *Profile, s string) (string, error) { func validateAndMap(p *Profile, s string) (string, error) {
...@@ -408,7 +420,7 @@ func validateAndMap(p *Profile, s string) (string, error) { ...@@ -408,7 +420,7 @@ func validateAndMap(p *Profile, s string) (string, error) {
continue continue
case disallowed: case disallowed:
if err == nil { if err == nil {
r, _ := utf8.DecodeRuneInString(s[i:]) r, _ := utf8.DecodeRuneInString(s[start:])
err = runeError(r) err = runeError(r)
} }
continue continue
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment