Commit 61791144 authored by Nigel Tao's avatar Nigel Tao

go.net/publicsuffix: add an EffectiveTLDPlus1 function.

Also expand the gen.go subset to cover the test cases from
http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit/data/test_psl.txt

R=dr.volker.dobler, patrick
CC=golang-dev
https://golang.org/cl/7124044
parent b8ab510d
...@@ -143,14 +143,19 @@ func main1() error { ...@@ -143,14 +143,19 @@ func main1() error {
if *subset { if *subset {
switch { switch {
case s == "ac.jp" || strings.HasSuffix(s, ".ac.jp"):
case s == "ak.us" || strings.HasSuffix(s, ".ak.us"):
case s == "ao" || strings.HasSuffix(s, ".ao"): case s == "ao" || strings.HasSuffix(s, ".ao"):
case s == "ar" || strings.HasSuffix(s, ".ar"): case s == "ar" || strings.HasSuffix(s, ".ar"):
case s == "arpa" || strings.HasSuffix(s, ".arpa"): case s == "arpa" || strings.HasSuffix(s, ".arpa"):
case s == "cy" || strings.HasSuffix(s, ".cy"):
case s == "dyndns.org" || strings.HasSuffix(s, ".dyndns.org"): case s == "dyndns.org" || strings.HasSuffix(s, ".dyndns.org"):
case s == "jp": case s == "jp":
case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"): case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"):
case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"): case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"):
case s == "om" || strings.HasSuffix(s, ".om"):
case s == "uk" || strings.HasSuffix(s, ".uk"): case s == "uk" || strings.HasSuffix(s, ".uk"):
case s == "uk.com" || strings.HasSuffix(s, ".uk.com"):
case s == "tw" || strings.HasSuffix(s, ".tw"): case s == "tw" || strings.HasSuffix(s, ".tw"):
case s == "zw" || strings.HasSuffix(s, ".zw"): case s == "zw" || strings.HasSuffix(s, ".zw"):
case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"): case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"):
......
...@@ -7,8 +7,12 @@ ...@@ -7,8 +7,12 @@
// can directly register names. // can directly register names.
package publicsuffix package publicsuffix
// TODO: specify case sensitivity and leading/trailing dot behavior for
// func PublicSuffix and func EffectiveTLDPlusOne.
import ( import (
"exp/cookiejar" "exp/cookiejar"
"fmt"
"strings" "strings"
) )
...@@ -113,3 +117,17 @@ func nodeLabel(i uint32) string { ...@@ -113,3 +117,17 @@ func nodeLabel(i uint32) string {
offset := x & (1<<nodesBitsTextOffset - 1) offset := x & (1<<nodesBitsTextOffset - 1)
return text[offset : offset+length] return text[offset : offset+length]
} }
// EffectiveTLDPlusOne returns the effective top level domain plus one more
// label. For example, the eTLD+1 for "foo.bar.golang.org" is "golang.org".
func EffectiveTLDPlusOne(domain string) (string, error) {
suffix, _ := PublicSuffix(domain)
if len(domain) <= len(suffix) {
return "", fmt.Errorf("publicsuffix: cannot derive eTLD+1 for domain %q", domain)
}
i := len(domain) - len(suffix) - 1
if domain[i] != '.' {
return "", fmt.Errorf("publicsuffix: invalid public suffix %q for domain %q", suffix, domain)
}
return domain[1+strings.LastIndex(domain[:i], "."):], nil
}
...@@ -326,5 +326,79 @@ func (b byPriority) Less(i, j int) bool { ...@@ -326,5 +326,79 @@ func (b byPriority) Less(i, j int) bool {
return len(b[i]) > len(b[j]) return len(b[i]) > len(b[j])
} }
// TODO(nigeltao): add the "Effective Top Level Domain Plus 1" tests from // eTLDPlusOneTestCases come from
// http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit/data/test_psl.txt // http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit/data/test_psl.txt
var eTLDPlusOneTestCases = []struct {
domain, want string
}{
// Empty input.
{"", ""},
// Unlisted TLD.
{"example", ""},
{"example.example", "example.example"},
{"b.example.example", "example.example"},
{"a.b.example.example", "example.example"},
// TLD with only 1 rule.
{"biz", ""},
{"domain.biz", "domain.biz"},
{"b.domain.biz", "domain.biz"},
{"a.b.domain.biz", "domain.biz"},
// TLD with some 2-level rules.
{"com", ""},
{"example.com", "example.com"},
{"b.example.com", "example.com"},
{"a.b.example.com", "example.com"},
{"uk.com", ""},
{"example.uk.com", "example.uk.com"},
{"b.example.uk.com", "example.uk.com"},
{"a.b.example.uk.com", "example.uk.com"},
{"test.ac", "test.ac"},
// TLD with only 1 (wildcard) rule.
{"cy", ""},
{"c.cy", ""},
{"b.c.cy", "b.c.cy"},
{"a.b.c.cy", "b.c.cy"},
// More complex TLD.
{"jp", ""},
{"test.jp", "test.jp"},
{"www.test.jp", "test.jp"},
{"ac.jp", ""},
{"test.ac.jp", "test.ac.jp"},
{"www.test.ac.jp", "test.ac.jp"},
{"kyoto.jp", ""},
{"test.kyoto.jp", "test.kyoto.jp"},
{"ide.kyoto.jp", ""},
{"b.ide.kyoto.jp", "b.ide.kyoto.jp"},
{"a.b.ide.kyoto.jp", "b.ide.kyoto.jp"},
{"c.kobe.jp", ""},
{"b.c.kobe.jp", "b.c.kobe.jp"},
{"a.b.c.kobe.jp", "b.c.kobe.jp"},
{"city.kobe.jp", "city.kobe.jp"},
{"www.city.kobe.jp", "city.kobe.jp"},
// TLD with a wildcard rule and exceptions.
{"om", ""},
{"test.om", ""},
{"b.test.om", "b.test.om"},
{"a.b.test.om", "b.test.om"},
{"songfest.om", "songfest.om"},
{"www.songfest.om", "songfest.om"},
// US K12.
{"us", ""},
{"test.us", "test.us"},
{"www.test.us", "test.us"},
{"ak.us", ""},
{"test.ak.us", "test.ak.us"},
{"www.test.ak.us", "test.ak.us"},
{"k12.ak.us", ""},
{"test.k12.ak.us", "test.k12.ak.us"},
{"www.test.k12.ak.us", "test.k12.ak.us"},
}
func TestEffectiveTLDPlusOne(t *testing.T) {
for _, tc := range eTLDPlusOneTestCases {
got, _ := EffectiveTLDPlusOne(tc.domain)
if got != tc.want {
t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
}
}
}
This diff is collapsed.
...@@ -26,7 +26,9 @@ var rules = [...]string{ ...@@ -26,7 +26,9 @@ var rules = [...]string{
"iris.arpa", "iris.arpa",
"uri.arpa", "uri.arpa",
"urn.arpa", "urn.arpa",
"*.cy",
"jp", "jp",
"ac.jp",
"kyoto.jp", "kyoto.jp",
"*.kobe.jp", "*.kobe.jp",
"!city.kobe.jp", "!city.kobe.jp",
...@@ -61,6 +63,17 @@ var rules = [...]string{ ...@@ -61,6 +63,17 @@ var rules = [...]string{
"wazuka.kyoto.jp", "wazuka.kyoto.jp",
"yamashina.kyoto.jp", "yamashina.kyoto.jp",
"yawata.kyoto.jp", "yawata.kyoto.jp",
"*.om",
"!mediaphone.om",
"!nawrastelecom.om",
"!nawras.om",
"!omanmobile.om",
"!omanpost.om",
"!omantel.om",
"!rakpetroleum.om",
"!siemens.om",
"!songfest.om",
"!statecouncil.om",
"tw", "tw",
"edu.tw", "edu.tw",
"gov.tw", "gov.tw",
...@@ -86,8 +99,13 @@ var rules = [...]string{ ...@@ -86,8 +99,13 @@ var rules = [...]string{
"!nic.uk", "!nic.uk",
"!nls.uk", "!nls.uk",
"!parliament.uk", "!parliament.uk",
"ak.us",
"k12.ak.us",
"cc.ak.us",
"lib.ak.us",
"xn--p1ai", "xn--p1ai",
"*.zw", "*.zw",
"uk.com",
"blogspot.co.uk", "blogspot.co.uk",
"blogspot.com.ar", "blogspot.com.ar",
"blogspot.tw", "blogspot.tw",
...@@ -100,10 +118,14 @@ var nodeLabels = [...]string{ ...@@ -100,10 +118,14 @@ var nodeLabels = [...]string{
"ao", "ao",
"ar", "ar",
"arpa", "arpa",
"com",
"cy",
"jp", "jp",
"om",
"org", "org",
"tw", "tw",
"uk", "uk",
"us",
"xn--p1ai", "xn--p1ai",
"zw", "zw",
"co", "co",
...@@ -129,6 +151,8 @@ var nodeLabels = [...]string{ ...@@ -129,6 +151,8 @@ var nodeLabels = [...]string{
"iris", "iris",
"uri", "uri",
"urn", "urn",
"uk",
"ac",
"kobe", "kobe",
"kyoto", "kyoto",
"city", "city",
...@@ -163,6 +187,16 @@ var nodeLabels = [...]string{ ...@@ -163,6 +187,16 @@ var nodeLabels = [...]string{
"wazuka", "wazuka",
"yamashina", "yamashina",
"yawata", "yawata",
"mediaphone",
"nawras",
"nawrastelecom",
"omanmobile",
"omanpost",
"omantel",
"rakpetroleum",
"siemens",
"songfest",
"statecouncil",
"dyndns", "dyndns",
"go", "go",
"home", "home",
...@@ -192,4 +226,8 @@ var nodeLabels = [...]string{ ...@@ -192,4 +226,8 @@ var nodeLabels = [...]string{
"parliament", "parliament",
"sch", "sch",
"blogspot", "blogspot",
"ak",
"cc",
"k12",
"lib",
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment