Commit 61791144 authored by Nigel Tao's avatar Nigel Tao

go.net/publicsuffix: add an EffectiveTLDPlus1 function.

Also expand the gen.go subset to cover the test cases from
http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit/data/test_psl.txt

R=dr.volker.dobler, patrick
CC=golang-dev
https://golang.org/cl/7124044
parent b8ab510d
......@@ -143,14 +143,19 @@ func main1() error {
if *subset {
switch {
case s == "ac.jp" || strings.HasSuffix(s, ".ac.jp"):
case s == "ak.us" || strings.HasSuffix(s, ".ak.us"):
case s == "ao" || strings.HasSuffix(s, ".ao"):
case s == "ar" || strings.HasSuffix(s, ".ar"):
case s == "arpa" || strings.HasSuffix(s, ".arpa"):
case s == "cy" || strings.HasSuffix(s, ".cy"):
case s == "dyndns.org" || strings.HasSuffix(s, ".dyndns.org"):
case s == "jp":
case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"):
case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"):
case s == "om" || strings.HasSuffix(s, ".om"):
case s == "uk" || strings.HasSuffix(s, ".uk"):
case s == "uk.com" || strings.HasSuffix(s, ".uk.com"):
case s == "tw" || strings.HasSuffix(s, ".tw"):
case s == "zw" || strings.HasSuffix(s, ".zw"):
case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"):
......
......@@ -7,8 +7,12 @@
// can directly register names.
package publicsuffix
// TODO: specify case sensitivity and leading/trailing dot behavior for
// func PublicSuffix and func EffectiveTLDPlusOne.
import (
"exp/cookiejar"
"fmt"
"strings"
)
......@@ -113,3 +117,17 @@ func nodeLabel(i uint32) string {
offset := x & (1<<nodesBitsTextOffset - 1)
return text[offset : offset+length]
}
// EffectiveTLDPlusOne returns the effective top level domain plus one more
// label. For example, the eTLD+1 for "foo.bar.golang.org" is "golang.org".
func EffectiveTLDPlusOne(domain string) (string, error) {
suffix, _ := PublicSuffix(domain)
if len(domain) <= len(suffix) {
return "", fmt.Errorf("publicsuffix: cannot derive eTLD+1 for domain %q", domain)
}
i := len(domain) - len(suffix) - 1
if domain[i] != '.' {
return "", fmt.Errorf("publicsuffix: invalid public suffix %q for domain %q", suffix, domain)
}
return domain[1+strings.LastIndex(domain[:i], "."):], nil
}
......@@ -326,5 +326,79 @@ func (b byPriority) Less(i, j int) bool {
return len(b[i]) > len(b[j])
}
// TODO(nigeltao): add the "Effective Top Level Domain Plus 1" tests from
// eTLDPlusOneTestCases come from
// http://mxr.mozilla.org/mozilla-central/source/netwerk/test/unit/data/test_psl.txt
var eTLDPlusOneTestCases = []struct {
domain, want string
}{
// Empty input.
{"", ""},
// Unlisted TLD.
{"example", ""},
{"example.example", "example.example"},
{"b.example.example", "example.example"},
{"a.b.example.example", "example.example"},
// TLD with only 1 rule.
{"biz", ""},
{"domain.biz", "domain.biz"},
{"b.domain.biz", "domain.biz"},
{"a.b.domain.biz", "domain.biz"},
// TLD with some 2-level rules.
{"com", ""},
{"example.com", "example.com"},
{"b.example.com", "example.com"},
{"a.b.example.com", "example.com"},
{"uk.com", ""},
{"example.uk.com", "example.uk.com"},
{"b.example.uk.com", "example.uk.com"},
{"a.b.example.uk.com", "example.uk.com"},
{"test.ac", "test.ac"},
// TLD with only 1 (wildcard) rule.
{"cy", ""},
{"c.cy", ""},
{"b.c.cy", "b.c.cy"},
{"a.b.c.cy", "b.c.cy"},
// More complex TLD.
{"jp", ""},
{"test.jp", "test.jp"},
{"www.test.jp", "test.jp"},
{"ac.jp", ""},
{"test.ac.jp", "test.ac.jp"},
{"www.test.ac.jp", "test.ac.jp"},
{"kyoto.jp", ""},
{"test.kyoto.jp", "test.kyoto.jp"},
{"ide.kyoto.jp", ""},
{"b.ide.kyoto.jp", "b.ide.kyoto.jp"},
{"a.b.ide.kyoto.jp", "b.ide.kyoto.jp"},
{"c.kobe.jp", ""},
{"b.c.kobe.jp", "b.c.kobe.jp"},
{"a.b.c.kobe.jp", "b.c.kobe.jp"},
{"city.kobe.jp", "city.kobe.jp"},
{"www.city.kobe.jp", "city.kobe.jp"},
// TLD with a wildcard rule and exceptions.
{"om", ""},
{"test.om", ""},
{"b.test.om", "b.test.om"},
{"a.b.test.om", "b.test.om"},
{"songfest.om", "songfest.om"},
{"www.songfest.om", "songfest.om"},
// US K12.
{"us", ""},
{"test.us", "test.us"},
{"www.test.us", "test.us"},
{"ak.us", ""},
{"test.ak.us", "test.ak.us"},
{"www.test.ak.us", "test.ak.us"},
{"k12.ak.us", ""},
{"test.k12.ak.us", "test.k12.ak.us"},
{"www.test.k12.ak.us", "test.k12.ak.us"},
}
func TestEffectiveTLDPlusOne(t *testing.T) {
for _, tc := range eTLDPlusOneTestCases {
got, _ := EffectiveTLDPlusOne(tc.domain)
if got != tc.want {
t.Errorf("%q: got %q, want %q", tc.domain, got, tc.want)
}
}
}
This diff is collapsed.
......@@ -26,7 +26,9 @@ var rules = [...]string{
"iris.arpa",
"uri.arpa",
"urn.arpa",
"*.cy",
"jp",
"ac.jp",
"kyoto.jp",
"*.kobe.jp",
"!city.kobe.jp",
......@@ -61,6 +63,17 @@ var rules = [...]string{
"wazuka.kyoto.jp",
"yamashina.kyoto.jp",
"yawata.kyoto.jp",
"*.om",
"!mediaphone.om",
"!nawrastelecom.om",
"!nawras.om",
"!omanmobile.om",
"!omanpost.om",
"!omantel.om",
"!rakpetroleum.om",
"!siemens.om",
"!songfest.om",
"!statecouncil.om",
"tw",
"edu.tw",
"gov.tw",
......@@ -86,8 +99,13 @@ var rules = [...]string{
"!nic.uk",
"!nls.uk",
"!parliament.uk",
"ak.us",
"k12.ak.us",
"cc.ak.us",
"lib.ak.us",
"xn--p1ai",
"*.zw",
"uk.com",
"blogspot.co.uk",
"blogspot.com.ar",
"blogspot.tw",
......@@ -100,10 +118,14 @@ var nodeLabels = [...]string{
"ao",
"ar",
"arpa",
"com",
"cy",
"jp",
"om",
"org",
"tw",
"uk",
"us",
"xn--p1ai",
"zw",
"co",
......@@ -129,6 +151,8 @@ var nodeLabels = [...]string{
"iris",
"uri",
"urn",
"uk",
"ac",
"kobe",
"kyoto",
"city",
......@@ -163,6 +187,16 @@ var nodeLabels = [...]string{
"wazuka",
"yamashina",
"yawata",
"mediaphone",
"nawras",
"nawrastelecom",
"omanmobile",
"omanpost",
"omantel",
"rakpetroleum",
"siemens",
"songfest",
"statecouncil",
"dyndns",
"go",
"home",
......@@ -192,4 +226,8 @@ var nodeLabels = [...]string{
"parliament",
"sch",
"blogspot",
"ak",
"cc",
"k12",
"lib",
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment