Commit cbecf2f7 authored by Nigel Tao's avatar Nigel Tao

go.net/publicsuffix: use IDNA.

R=dr.volker.dobler
CC=golang-dev
https://golang.org/cl/6930054
parent 7db922ba
......@@ -19,8 +19,6 @@ package main
// To fetch a particular hg revision, such as 05b11a8d1ace, pass
// -url "http://hg.mozilla.org/mozilla-central/raw-file/05b11a8d1ace/netwerk/dns/effective_tld_names.dat"
// TODO(nigeltao): decide what to do with non-ASCII entries.
import (
"bufio"
"bytes"
......@@ -32,6 +30,8 @@ import (
"os"
"sort"
"strings"
"code.google.com/p/go.net/idna"
)
const (
......@@ -105,9 +105,13 @@ func main1() error {
return err
}
s = strings.TrimSpace(s)
if s == "" || strings.HasPrefix(s, "//") || !isASCII(s) {
if s == "" || strings.HasPrefix(s, "//") {
continue
}
s, err = idna.ToASCII(s)
if err != nil {
return err
}
if *subset {
switch {
......@@ -118,7 +122,10 @@ func main1() error {
case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"):
case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"):
case s == "uk" || strings.HasSuffix(s, ".uk"):
case s == "tw" || strings.HasSuffix(s, ".tw"):
case s == "zw" || strings.HasSuffix(s, ".zw"):
case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"):
// xn--p1ai is Russian-Cyrillic "рф".
default:
continue
}
......@@ -169,15 +176,6 @@ func main1() error {
return err
}
func isASCII(s string) bool {
for i := 0; i < len(s); i++ {
if s[i] < 32 || 127 < s[i] {
return false
}
}
return true
}
func printTest(w io.Writer, n *node) error {
fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n")
fmt.Fprintf(w, "package publicsuffix\n\nvar rules = [...]string{\n")
......
......@@ -138,6 +138,34 @@ var publicSuffixTestCases = []struct {
{"b.ide.kyoto.jp", "ide.kyoto.jp"},
{"a.b.ide.kyoto.jp", "ide.kyoto.jp"},
// The .tw rules are:
// tw
// edu.tw
// gov.tw
// mil.tw
// com.tw
// net.tw
// org.tw
// idv.tw
// game.tw
// ebiz.tw
// club.tw
// 網路.tw (xn--zf0ao64a.tw)
// 組織.tw (xn--uc0atv.tw)
// 商業.tw (xn--czrw28b.tw)
// blogspot.tw
{"tw", "tw"},
{"aaa.tw", "tw"},
{"www.aaa.tw", "tw"},
{"xn--czrw28b.aaa.tw", "tw"},
{"edu.tw", "edu.tw"},
{"www.edu.tw", "edu.tw"},
{"xn--czrw28b.edu.tw", "edu.tw"},
{"xn--czrw28b.tw", "xn--czrw28b.tw"},
{"www.xn--czrw28b.tw", "xn--czrw28b.tw"},
{"xn--uc0atv.xn--czrw28b.tw", "xn--czrw28b.tw"},
{"xn--kpry57d.tw", "tw"},
// The .uk rules are:
// *.uk
// *.sch.uk
......@@ -163,6 +191,12 @@ var publicSuffixTestCases = []struct {
{"blogspot.nic.uk", "uk"},
{"blogspot.sch.uk", "blogspot.sch.uk"},
// The .рф rules are
// рф (xn--p1ai)
{"xn--p1ai", "xn--p1ai"},
{"aaa.xn--p1ai", "xn--p1ai"},
{"www.xxx.yyy.xn--p1ai", "xn--p1ai"},
// The .zw rules are:
// *.zw
{"zw", "zw"},
......
This diff is collapsed.
......@@ -61,6 +61,20 @@ var rules = [...]string{
"wazuka.kyoto.jp",
"yamashina.kyoto.jp",
"yawata.kyoto.jp",
"tw",
"edu.tw",
"gov.tw",
"mil.tw",
"com.tw",
"net.tw",
"org.tw",
"idv.tw",
"game.tw",
"ebiz.tw",
"club.tw",
"xn--zf0ao64a.tw",
"xn--uc0atv.tw",
"xn--czrw28b.tw",
"*.uk",
"*.sch.uk",
"!bl.uk",
......@@ -72,9 +86,11 @@ var rules = [...]string{
"!nic.uk",
"!nls.uk",
"!parliament.uk",
"xn--p1ai",
"*.zw",
"blogspot.co.uk",
"blogspot.com.ar",
"blogspot.tw",
}
var nodeLabels = [...]string{
......@@ -82,7 +98,9 @@ var nodeLabels = [...]string{
"ar",
"arpa",
"jp",
"tw",
"uk",
"xn--p1ai",
"zw",
"co",
"ed",
......@@ -141,6 +159,20 @@ var nodeLabels = [...]string{
"wazuka",
"yamashina",
"yawata",
"blogspot",
"club",
"com",
"ebiz",
"edu",
"game",
"gov",
"idv",
"mil",
"net",
"org",
"xn--czrw28b",
"xn--uc0atv",
"xn--zf0ao64a",
"bl",
"british-library",
"co",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment