├── .github ├── dependabot.yml └── workflows │ └── test.yml ├── LICENSE ├── README.md ├── example └── main.go ├── go.mod ├── go.sum ├── parse.go └── parse_test.go /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: gomod 4 | directory: "/" 5 | schedule: 6 | interval: monthly 7 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | name: Tests 3 | jobs: 4 | test: 5 | strategy: 6 | matrix: 7 | go-version: [1.15.x, 1.16.x, 1.17.x] 8 | platform: [ubuntu-latest, macos-latest, windows-latest] 9 | runs-on: ${{ matrix.platform }} 10 | steps: 11 | - name: Install Go 12 | uses: actions/setup-go@v1 13 | with: 14 | go-version: ${{ matrix.go-version }} 15 | - name: Checkout code 16 | uses: actions/checkout@v2 17 | - name: Build 18 | run: go build -v . 19 | - name: Test 20 | run: go test -v ./... 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © 2019-2020 Jaime Pillora 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TLD Parser in Go 2 | 3 | [![GoDoc](https://godoc.org/github.com/jpillora/go-tld?status.svg)](https://pkg.go.dev/github.com/jpillora/go-tld?tab=doc) [![Tests](https://github.com/jpillora/go-tld/workflows/Tests/badge.svg)](https://github.com/jpillora/go-tld/actions?workflow=Tests) 4 | 5 | The `tld` package has the same API ([see godoc](http://godoc.org/github.com/jpillora/go-tld)) as `net/url` except `tld.URL` contains extra fields: `Subdomain`, `Domain`, `TLD` and `Port`. Note: `TLD` is actually "eTLD", so `Domain+TLD` is the often desired "eTLD+1". 6 | 7 | _Note:_ This is a wrapper around http://golang.org/x/net/publicsuffix. 8 | 9 | ### Install 10 | 11 | ``` 12 | go get github.com/jpillora/go-tld 13 | ``` 14 | 15 | ### Usage 16 | 17 | ```go 18 | package main 19 | 20 | import ( 21 | "fmt" 22 | 23 | tld "github.com/jpillora/go-tld" 24 | ) 25 | 26 | func main() { 27 | urls := []string{ 28 | "http://google.com", 29 | "http://blog.google", 30 | "https://www.medi-cal.ca.gov/", 31 | "https://ato.gov.au", 32 | "http://a.very.complex-domain.co.uk:8080/foo/bar", 33 | "http://a.domain.that.is.unmanaged", 34 | } 35 | for _, url := range urls { 36 | u, _ := tld.Parse(url) 37 | fmt.Printf("%50s = [ %s ] [ %s ] [ %s ] [ %s ] [ %s ] [ %t ]\n", 38 | u, u.Subdomain, u.Domain, u.TLD, u.Port, u.Path, u.ICANN) 39 | } 40 | } 41 | ``` 42 | 43 | ``` 44 | $ go run main.go 45 | http://google.com = [ ] [ google ] [ com ] [ ] [ ] [ true ] 46 | http://blog.google = [ ] [ blog ] [ google ] [ ] [ ] [ true ] 47 | https://www.medi-cal.ca.gov/ = [ www.medi-cal ] [ ca ] [ gov ] [ ] [ / ] [ true ] 48 | https://ato.gov.au = [ ] [ ato ] [ gov.au ] [ ] [ ] [ true ] 49 | http://a.very.complex-domain.co.uk:8080/foo/bar = [ a.very ] [ complex-domain ] [ co.uk ] [ 8080 ] [ /foo/bar ] [ true ] 50 | http://a.domain.that.is.unmanaged = [ a.domain.that ] [ is ] [ unmanaged ] [ ] [ ] [ false ] 51 | ``` 52 | 53 | #### MIT License 54 | 55 | Copyright © 2019 Jaime Pillora <dev@jpillora.com> 56 | 57 | Permission is hereby granted, free of charge, to any person obtaining 58 | a copy of this software and associated documentation files (the 59 | 'Software'), to deal in the Software without restriction, including 60 | without limitation the rights to use, copy, modify, merge, publish, 61 | distribute, sublicense, and/or sell copies of the Software, and to 62 | permit persons to whom the Software is furnished to do so, subject to 63 | the following conditions: 64 | 65 | The above copyright notice and this permission notice shall be 66 | included in all copies or substantial portions of the Software. 67 | 68 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 69 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 70 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 71 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 72 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 73 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 74 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 75 | -------------------------------------------------------------------------------- /example/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/jpillora/go-tld" 7 | ) 8 | 9 | func main() { 10 | urls := []string{ 11 | "http://google.com", 12 | "http://blog.google", 13 | "https://www.medi-cal.ca.gov/", 14 | "https://ato.gov.au", 15 | "http://a.very.complex-domain.co.uk:8080/foo/bar", 16 | "http://a.domain.that.is.unmanaged", 17 | } 18 | for _, url := range urls { 19 | u, _ := tld.Parse(url) 20 | fmt.Printf("%50s = [ %s ] [ %s ] [ %s ] [ %s ] [ %s ] [ %t ]\n", 21 | u, u.Subdomain, u.Domain, u.TLD, u.Port, u.Path, u.ICANN) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/jpillora/go-tld 2 | 3 | go 1.14 4 | 5 | require golang.org/x/net v0.0.0-20220225172249-27dd8689420f 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | golang.org/x/net v0.0.0-20220225172249-27dd8689420f h1:oA4XRj0qtSt8Yo1Zms0CUlsT3KG69V2UGQWPBxujDmc= 2 | golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= 3 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 4 | golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 5 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 6 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 7 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 8 | -------------------------------------------------------------------------------- /parse.go: -------------------------------------------------------------------------------- 1 | //go:generate sh generate.sh 2 | 3 | //Package tld has the same API as net/url except 4 | //tld.URL contains extra fields: Subdomain, Domain, TLD and Port. 5 | package tld 6 | 7 | import ( 8 | "fmt" 9 | "net/url" 10 | "strings" 11 | 12 | "golang.org/x/net/publicsuffix" 13 | ) 14 | 15 | //URL embeds net/url and adds extra fields ontop 16 | type URL struct { 17 | Subdomain, Domain, TLD, Port string 18 | ICANN bool 19 | *url.URL 20 | } 21 | 22 | //Parse mirrors net/url.Parse except instead it returns 23 | //a tld.URL, which contains extra fields. 24 | func Parse(s string) (*URL, error) { 25 | url, err := url.Parse(s) 26 | if err != nil { 27 | return nil, err 28 | } 29 | if url.Host == "" { 30 | return &URL{URL: url}, nil 31 | } 32 | dom, port := domainPort(url.Host) 33 | //etld+1 34 | etld1, err := publicsuffix.EffectiveTLDPlusOne(dom) 35 | suffix, icann := publicsuffix.PublicSuffix(strings.ToLower(dom)) 36 | // HACK: attempt to support valid domains which are not registered with ICAN 37 | if err != nil && !icann && suffix == dom { 38 | etld1 = dom 39 | err = nil 40 | } 41 | if err != nil { 42 | return nil, err 43 | } 44 | //convert to domain name, and tld 45 | i := strings.Index(etld1, ".") 46 | if i < 0 { 47 | return nil, fmt.Errorf("tld: failed parsing %q", s) 48 | } 49 | domName := etld1[0:i] 50 | tld := etld1[i+1:] 51 | //and subdomain 52 | sub := "" 53 | if rest := strings.TrimSuffix(dom, "."+etld1); rest != dom { 54 | sub = rest 55 | } 56 | return &URL{ 57 | Subdomain: sub, 58 | Domain: domName, 59 | TLD: tld, 60 | Port: port, 61 | ICANN: icann, 62 | URL: url, 63 | }, nil 64 | } 65 | 66 | func domainPort(host string) (string, string) { 67 | for i := len(host) - 1; i >= 0; i-- { 68 | if host[i] == ':' { 69 | return host[:i], host[i+1:] 70 | } else if host[i] < '0' || host[i] > '9' { 71 | return host, "" 72 | } 73 | } 74 | //will only land here if the string is all digits, 75 | //net/url should prevent that from happening 76 | return host, "" 77 | } 78 | -------------------------------------------------------------------------------- /parse_test.go: -------------------------------------------------------------------------------- 1 | package tld 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func run(input, sub, dom, tld string, icann, errorExpected bool, t *testing.T) { 8 | 9 | u, err := Parse(input) 10 | 11 | if err != nil && errorExpected { 12 | return 13 | } else if err != nil { 14 | t.Errorf("errored '%s'", err) 15 | } else if u.TLD != tld { 16 | t.Errorf("should have TLD '%s', got '%s'", tld, u.TLD) 17 | } else if u.Domain != dom { 18 | t.Errorf("should have Domain '%s', got '%s'", dom, u.Domain) 19 | } else if u.Subdomain != sub { 20 | t.Errorf("should have Subdomain '%s', got '%s'", sub, u.Subdomain) 21 | } else if u.ICANN != icann { 22 | t.Errorf("should have Icann '%t', got '%t'", icann, u.ICANN) 23 | } 24 | } 25 | 26 | func Test0(t *testing.T) { 27 | run("http://foo.com", "", "foo", "com", true, false, t) 28 | } 29 | 30 | func Test1(t *testing.T) { 31 | run("http://zip.zop.foo.com", "zip.zop", "foo", "com", true, false, t) 32 | } 33 | 34 | func Test2(t *testing.T) { 35 | run("http://au.com.au", "", "au", "com.au", true, false, t) 36 | } 37 | 38 | func Test3(t *testing.T) { 39 | run("http://im.from.england.co.uk:1900", "im.from", "england", "co.uk", true, false, t) 40 | } 41 | 42 | func Test4(t *testing.T) { 43 | run("https://google.com", "", "google", "com", true, false, t) 44 | } 45 | 46 | func Test5(t *testing.T) { 47 | run("https://foo.notmanaged", "", "foo", "notmanaged", false, false, t) 48 | } 49 | 50 | func Test6(t *testing.T) { 51 | run("https://google.Com", "", "google", "Com", true, false, t) 52 | } 53 | 54 | func Test7(t *testing.T) { 55 | run("https://github.io", "", "github", "io", false, false, t) 56 | } 57 | 58 | func Test8(t *testing.T) { 59 | // test expects error 60 | run("https://no_dot_should_not_panic", "", "", "", false, true, t) 61 | } 62 | 63 | func Test9(t *testing.T) { 64 | // test expects error 65 | run("https://.start_with_dot_should_fail", "", "", "", false, true, t) 66 | } 67 | 68 | func Test10(t *testing.T) { 69 | // test expects error 70 | run("https://ends_with_dot_should_fail.", "", "", "", false, true, t) 71 | } 72 | --------------------------------------------------------------------------------