├── .codecov.yml
├── .github
├── FUNDING.yml
├── dependabot.yml
└── workflows
│ ├── codeql-analysis.yml
│ ├── psl-update.yml
│ └── tests.yml
├── CHANGELOG.md
├── LICENSE.txt
├── Makefile
├── README.md
├── SECURITY.md
├── cmd
├── gen
│ └── gen.go
└── load
│ └── main.go
├── fixtures
├── list-simple.txt
└── tests.txt
├── go.mod
├── go.sum
├── net
└── publicsuffix
│ ├── publicsuffix.go
│ └── publicsuffix_test.go
├── publicsuffix
├── acceptance_test.go
├── generator
│ └── gen.go
├── psl_test.go
├── publicsuffix.go
├── publicsuffix_test.go
└── rules.go
└── test.sh
/.codecov.yml:
--------------------------------------------------------------------------------
1 | # https://docs.codecov.io/docs/coverage-configuration
2 | coverage:
3 | precision: 1
4 | round: down
5 |
6 | coverage:
7 | status:
8 | project:
9 | default: false
10 | patch:
11 | default: false
12 |
13 | # https://docs.codecov.io/docs/pull-request-comments#section-requiring-changes
14 | comment: off
15 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: [weppos]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: gomod
4 | directory: "/"
5 | schedule:
6 | interval: daily
7 | time: "04:00"
8 | open-pull-requests-limit: 10
9 | labels:
10 | - dependencies
11 |
12 | - package-ecosystem: "github-actions"
13 | directory: "/"
14 | schedule:
15 | interval: "daily"
16 | time: "04:00"
17 | open-pull-requests-limit: 10
18 | labels:
19 | - dependencies
20 |
--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
1 | # For most projects, this workflow file will not need changing; you simply need
2 | # to commit it to your repository.
3 | #
4 | # You may wish to alter this file to override the set of languages analyzed,
5 | # or to provide custom queries or build logic.
6 | name: "CodeQL"
7 |
8 | on:
9 | push:
10 | branches: [ master ]
11 | pull_request:
12 | # The branches below must be a subset of the branches above
13 | branches: [ master ]
14 | schedule:
15 | - cron: '40 21 * * 0'
16 |
17 | jobs:
18 | analyze:
19 | name: Analyze
20 | runs-on: ubuntu-latest
21 | permissions:
22 | actions: read
23 | contents: read
24 | security-events: write
25 |
26 | strategy:
27 | fail-fast: false
28 | matrix:
29 | language: [ 'go' ]
30 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
31 | # Learn more about CodeQL language support at https://git.io/codeql-language-support
32 |
33 | steps:
34 | - name: Checkout repository
35 | uses: actions/checkout@v4
36 |
37 | # Initializes the CodeQL tools for scanning.
38 | - name: Initialize CodeQL
39 | uses: github/codeql-action/init@v3
40 | with:
41 | languages: ${{ matrix.language }}
42 | # If you wish to specify custom queries, you can do so here or in a config file.
43 | # By default, queries listed here will override any specified in a config file.
44 | # Prefix the list here with "+" to use these queries and those in the config file.
45 | # queries: ./path/to/local/query, your-org/your-repo/queries@main
46 |
47 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
48 | # If this step fails, then you should remove it and run the build manually (see below)
49 | - name: Autobuild
50 | uses: github/codeql-action/autobuild@v3
51 |
52 | # ℹ️ Command-line programs to run using the OS shell.
53 | # 📚 https://git.io/JvXDl
54 |
55 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
56 | # and modify them (or add more) to build your code if your project
57 | # uses a compiled language
58 |
59 | #- run: |
60 | # make bootstrap
61 | # make release
62 |
63 | - name: Perform CodeQL Analysis
64 | uses: github/codeql-action/analyze@v3
65 |
--------------------------------------------------------------------------------
/.github/workflows/psl-update.yml:
--------------------------------------------------------------------------------
1 | name: PSL Update
2 |
3 | on:
4 | workflow_dispatch:
5 | schedule:
6 | - cron: '40 6 * * *'
7 |
8 | jobs:
9 | update:
10 | runs-on: ubuntu-latest
11 | steps:
12 |
13 | - uses: actions/checkout@v4
14 |
15 | - name: Set up Go
16 | uses: actions/setup-go@v5
17 | with:
18 | go-version: "1.24"
19 |
20 | - name: Update PSL
21 | run: make gen
22 |
23 | - name: Create Pull Request
24 | uses: peter-evans/create-pull-request@v7
25 | with:
26 | title: PSL auto-update
27 | commit-message: Updated list from source
28 | reviewers: weppos
29 | labels: psl
30 |
31 | - name: Check Pull Request
32 | if: ${{ steps.cpr.outputs.pull-request-number }}
33 | run: |
34 | echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}"
35 | echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}"
36 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | push:
5 | pull_request:
6 |
7 | jobs:
8 |
9 | build:
10 | strategy:
11 | matrix:
12 | go-version:
13 | - "1.23"
14 | - "1.24"
15 | platform: [ubuntu-latest]
16 |
17 | runs-on: ${{ matrix.platform }}
18 | steps:
19 |
20 | - name: Setup env
21 | run: |
22 | echo "GOPATH=$GITHUB_WORKSPACE" >> $GITHUB_ENV
23 | echo "CURRENT_WORKSPACE=$GITHUB_WORKSPACE/src/github.com/${{ github.repository }}" >> $GITHUB_ENV
24 | echo "$GITHUB_WORKSPACE/bin" >> $GITHUB_PATH
25 |
26 | - name: Set up Go
27 | uses: actions/setup-go@v5
28 | with:
29 | go-version: ${{ matrix.go-version }}
30 |
31 | - name: Check out code into the Go module directory
32 | uses: actions/checkout@v4
33 | with:
34 | path: ${{ env.CURRENT_WORKSPACE }}
35 |
36 | - name: Get dependencies
37 | run: go get -v ./...
38 | working-directory: ${{ env.CURRENT_WORKSPACE }}
39 |
40 | - name: Run go vet
41 | run: go vet ./...
42 | working-directory: ${{ env.CURRENT_WORKSPACE }}
43 |
44 | - name: Run tests
45 | run: ./test.sh
46 | working-directory: ${{ env.CURRENT_WORKSPACE }}
47 |
48 | - uses: codecov/codecov-action@v5
49 | with:
50 | files: ${{ env.CURRENT_WORKSPACE }}/coverage.txt
51 | verbose: true
52 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # CHANGELOG
2 |
3 | ## main
4 |
5 | - CHANGED: Removed dependency from go-github (GH-1067)
6 | - CHANGED: Supported minimum version is now Go 1.23.
7 | - CHANGED: Definition updates.
8 |
9 |
10 | ## 0.40.2
11 |
12 | - CHANGED: Definition updates.
13 |
14 |
15 | ## 0.40.0
16 |
17 | - CHANGED: Supported minimum version is now Go 1.21.
18 | - CHANGED: Definition updates.
19 |
20 |
21 | ## 0.30.2
22 |
23 | - CHANGED: Definition updates.
24 |
25 |
26 | ## 0.30.1
27 |
28 | - CHANGED: Definition updates.
29 |
30 |
31 | ## 0.30.0
32 |
33 | - CHANGED: Dropped Go < 1.16 from the list of supported versions to fix "package embed is not in GOROOT". "embed" is an indirect dependency and it's only available since Go 1.16.
34 |
35 | - CHANGED: Exported defaultListVersion as ListVersion (GH-334, GH-880)
36 |
37 |
38 | ## 0.20.0
39 |
40 | - CHANGED: Definition updates.
41 |
42 |
43 | ## 0.15.0
44 |
45 | - CHANGED: Definition updates.
46 |
47 | - CHANGED: Dropped Go 1.8 from the list of supported versions. "math/bits" is an indirect dependency and it's only available since Go 1.9.
48 |
49 | - ENHANCEMENT: Improved performances by using rune instead of strings single char comparison (GH-484, GH-485)
50 |
51 |
52 | ## 0.14.0
53 |
54 | - CHANGED: Added go modules (GH-240).
55 |
56 |
57 | ## 0.13.0
58 |
59 | - CHANGED: Rollback changes of v0.12.0. It turns out it is actually causing more issues.
60 |
61 |
62 | ## 0.12.0
63 |
64 | - CHANGED: Extracted generator into its own package.
65 |
66 |
67 | ## 0.11.0
68 |
69 | - CHANGED: Definition updates.
70 |
71 |
72 | ## 0.10.0
73 |
74 | - ENHANCEMENT: Internal refactoring to use go gen when building definition list.
75 |
76 |
77 | ## 0.5.0
78 |
79 | - FIXED: Added a DefaultRules() function that can be used to create a new list without modifying the default one (GH-141, GH-170). Thanks @guliyevemil1
80 |
81 | - FIXED: Fixed nil pointer dereference when can't find a rule (GH-16)
82 |
83 | - CHANGED: Removed unreachable code (GH-167)
84 |
85 |
86 | ## 0.4.0
87 |
88 | - CHANGED: Definition updates.
89 |
90 | - ENHANCEMENT: gen tool now uses GitHub API instead of scraping GitHub UI (GH-93).
91 |
92 |
93 | ## 0.3.2
94 |
95 | - CHANGED: Definition updates.
96 |
97 |
98 | ## 0.3.1
99 |
100 | - CHANGED: Definition updates.
101 |
102 |
103 | ## 0.3.0
104 |
105 | - CHANGED: Definition updates.
106 |
107 | - ENHANCEMENT: Changed internal representation of PSL rules to be A-label encoded, as well the public interface of the library to use ASCII-encoded names by default (GH-31, GH-40).
108 |
109 |
110 | ## 0.2.0
111 |
112 | - CHANGED: Definition updates.
113 |
114 | - ENHANCEMENT: List.Select() is no longer exported. This was an experimental method and it's now kept private as the Find() implementation may change in the future.
115 |
116 | - ENHANCEMENT: List.Find() now returns a pointer to a Rule, and not a Rule. That's because Find() can actually return `nil` if the DefaultRule find option is set. This is useful if you need to avoid the fallback to the default rule "*".
117 |
118 |
119 | ## 0.1.0
120 |
121 | Initial version
122 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016-2024 Simone Carletti
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .DEFAULT_GOAL := test
2 |
3 | test:
4 | go test ./... -v
5 |
6 | gen:
7 | go generate ./...
8 |
9 | clean:
10 | rm publicsuffix/rules.*
11 |
12 | get-deps:
13 | go get ./...
14 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Public Suffix for Go
2 |
3 | The package publicsuffix provides a Go domain name parser based on the [Public Suffix List](http://publicsuffix.org/).
4 |
5 | [](https://github.com/weppos/publicsuffix-go/actions?query=workflow%3ATests)
6 | [](https://pkg.go.dev/github.com/weppos/publicsuffix-go/publicsuffix)
7 |
8 | Currently, **publicsuffix-go requires Go version 1.21 or greater**. We do our best not to break older versions of Go if we don't have to, but due to tooling constraints, we don't always test older versions.
9 |
10 |
11 | ## Getting started
12 |
13 | Clone the repository [in your workspace](https://golang.org/doc/code.html#Organization) and move into it:
14 |
15 | ```shell
16 | mkdir -p $GOPATH/src/github.com/weppos && cd $_
17 | git clone git@github.com:weppos/publicsuffix-go.git
18 | cd publicsuffix-go
19 | ```
20 |
21 | Fetch the dependencies:
22 |
23 | ```shell
24 | go get ./...
25 | ```
26 |
27 | Run the test suite.
28 |
29 | ```shell
30 | go test ./...
31 | ```
32 |
33 |
34 | ## Testing
35 |
36 | The following command runs the entire test suite.
37 |
38 | ```shell
39 | go test ./...
40 | ```
41 |
42 | There are 3 different test suites built into this library:
43 |
44 | - Acceptance: the acceptance test suite contains some high level tests to ensure the library behaves as expected
45 | - PSL: the PSL test suite runs the library against the [official Public Suffix test cases](https://github.com/publicsuffix/list/blob/master/tests/tests.txt)
46 | - Unit: the unit test suite stresses the various single components of this package
47 |
48 |
49 | ## Installation
50 |
51 | ```shell
52 | go get github.com/weppos/publicsuffix-go
53 | ```
54 |
55 |
56 | ## Usage
57 |
58 | This is a simple example that demonstrates how to use the package with the default options and the default Public Suffix list packaged with the library.
59 |
60 | ```go
61 | package main
62 |
63 | import (
64 | "fmt"
65 |
66 | "github.com/weppos/publicsuffix-go/publicsuffix"
67 | )
68 |
69 | func main() {
70 | // Extract the domain from a string
71 | // using the default list
72 | fmt.Println(publicsuffix.Domain("example.com")) // example.com
73 | fmt.Println(publicsuffix.Domain("www.example.com")) // example.com
74 | fmt.Println(publicsuffix.Domain("example.co.uk")) // example.co.uk
75 | fmt.Println(publicsuffix.Domain("www.example.co.uk")) // example.co.uk
76 |
77 | // Parse the domain from a string
78 | // using the default list
79 | fmt.Println(publicsuffix.Parse("example.com")) // &DomainName{"com", "example", ""}
80 | fmt.Println(publicsuffix.Parse("www.example.com")) // &DomainName{"com", "example", "www"}
81 | fmt.Println(publicsuffix.Parse("example.co.uk")) // &DomainName{"co.uk", "example", ""}
82 | fmt.Println(publicsuffix.Parse("www.example.co.uk")) // &DomainName{"co.uk", "example", "www"}
83 | }
84 | ```
85 |
86 | #### Ignoring Private Domains
87 |
88 | The PSL is composed by two list of suffixes: IANA suffixes, and Private Domains.
89 |
90 | Private domains are submitted by private organizations. By default, private domains are not ignored.
91 | Sometimes, you want to ignore these domains and only query against the IANA suffixes. You have two options:
92 |
93 | 1. Ignore the domains at runtime
94 | 2. Create a custom list without the private domains
95 |
96 | In the first case, the private domains are ignored at runtime: they will still be included in the lists but the lookup will skip them when found.
97 |
98 | ```go
99 | publicsuffix.DomainFromListWithOptions(publicsuffix.DefaultList(), "google.blogspot.com", nil)
100 | // google.blogspot.com
101 |
102 | publicsuffix.DomainFromListWithOptions(publicsuffix.DefaultList(), "google.blogspot.com", &publicsuffix.FindOptions{IgnorePrivate: true})
103 | // blogspot.com
104 |
105 | // Note that the DefaultFindOptions includes the private domains by default
106 | publicsuffix.DomainFromListWithOptions(publicsuffix.DefaultList(), "google.blogspot.com", publicsuffix.DefaultFindOptions)
107 | // google.blogspot.com
108 | ```
109 |
110 | This solution is easy, but slower. If you find yourself ignoring the private domains in all cases (or in most cases), you may want to create a custom list without the private domains.
111 |
112 | ```go
113 | list := NewListFromFile("path/to/list.txt", &publicsuffix.ParserOption{PrivateDomains: false})
114 | publicsuffix.DomainFromListWithOptions(list, "google.blogspot.com", nil)
115 | // blogspot.com
116 | ```
117 |
118 | ## IDN domains, A-labels and U-labels
119 |
120 | [A-label and U-label](https://tools.ietf.org/html/rfc5890#section-2.3.2.1) are two different ways to represent IDN domain names. These two encodings are also known as ASCII (A-label) or Pynucode vs Unicode (U-label). Conversions between U-labels and A-labels are performed according to the ["Punycode" specification](https://tools.ietf.org/html/rfc3492), adding or removing the ACE prefix as needed.
121 |
122 | IDNA-aware applications generally use the A-label form for storing and manipulating data, whereas the U-labels can appear in presentation and user interface forms.
123 |
124 | Although the PSL list has been traditionally U-label encoded, this library follows the common industry standards and stores the rules in their A-label form. Therefore, unless explicitly mentioned, any method call, comparison or internal representation is expected to be ASCII-compatible encoded (ACE).
125 |
126 | Passing Unicode names to the library may either result in error or unexpected behaviors.
127 |
128 | If you are interested in the details of this decision, you can read the full discussion [here](https://github.com/weppos/publicsuffix-go/issues/31).
129 |
130 |
131 | ## Differences with `golang.org/x/net/publicsuffix`
132 |
133 | The [`golang.org/x/net/publicsuffix`](https://godoc.org/golang.org/x/net/publicsuffix) is a package part of the Golang `x/net` package, that provides a public suffix list implementation.
134 |
135 | The main difference is that the `x/net` package is optimized for speed, but it's less flexible. The list is compiled and embedded into the package itself. However, this is also the main downside.
136 | The [list is not frequently refreshed](https://github.com/letsencrypt/boulder/issues/1374#issuecomment-182429297), hence the results may be inaccurate, in particular if you heavily rely on the private domain section of the list. Changes in the IANA section are less frequent, whereas changes in the Private Domains section happens weekly.
137 |
138 | This package provides the following extra features:
139 |
140 | - Ability to load an arbitrary list at runtime (e.g. you can feed your own list, or create multiple lists)
141 | - Ability to create multiple lists
142 | - Ability to parse a domain using a previously defined list
143 | - Ability to add custom rules to an existing list, or merge/load rules from other lists (provided as file or string)
144 | - Advanced access to the list rules
145 | - Ability to ignore private domains at runtime, or when the list is parsed
146 |
147 | This package also aims for 100% compatibility with the `x/net` package. A special adapter is provided as a drop-in replacement. Simply change the include statement from
148 |
149 | ```go
150 | import (
151 | "golang.org/x/net/publicsuffix"
152 | )
153 | ```
154 |
155 | to
156 |
157 | ```go
158 | import (
159 | "github.com/weppos/publicsuffix-go/net/publicsuffix"
160 | )
161 | ```
162 |
163 | The `github.com/weppos/publicsuffix-go/net/publicsuffix` package defines the same methods defined in `golang.org/x/net/publicsuffix`, but these methods are implemented using the `github.com/weppos/publicsuffix-go/publicsuffix` package.
164 |
165 | Note that the adapter doesn't offer the flexibility of `github.com/weppos/publicsuffix-go/publicsuffix`, such as the ability to use multiple lists or disable private domains at runtime.
166 |
167 |
168 | ## `cookiejar.PublicSuffixList` interface
169 |
170 | This package implements the [`cookiejar.PublicSuffixList` interface](https://godoc.org/net/http/cookiejar#PublicSuffixList). It means it can be used as a value for the `PublicSuffixList` option when creating a `net/http/cookiejar`.
171 |
172 | ```go
173 | import (
174 | "net/http/cookiejar"
175 | "github.com/weppos/publicsuffix-go/publicsuffix"
176 | )
177 |
178 | deliciousJar := cookiejar.New(&cookiejar.Options{PublicSuffixList: publicsuffix.CookieJarList})
179 | ```
180 |
181 |
182 | ## License
183 |
184 | Copyright (c) 2016-2024 Simone Carletti. This is Free Software distributed under the MIT license.
185 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | ## Supported Versions
4 |
5 | Security updates are provided only for the current minor version.
6 |
7 | If you are using a previous minor version, we recommend to upgrade to the current minor version.
8 | This project uses [semantic versioning](https://semver.org/), therefore you can upgrade to a more recent minor version without incurring into breaking changes.
9 |
10 | Exceptionally, we may support previous minor versions upon request if there are significant reasons preventing to immediately switch the latest minor version.
11 |
12 | Older major versions are no longer supported.
13 |
14 |
15 | ## Reporting a Vulnerability
16 |
17 | To make a report, please email weppos@weppos.net.
18 |
19 | > [!IMPORTANT]
20 | > Please consider encrypting your report with GPG using the key [0x420da82a989398df](https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x420da82a989398df).
21 |
22 |
23 | ## Tracking Security Updates
24 |
25 | Information about security vulnerabilities are published in the [Security Advisories](https://github.com/weppos/publicsuffix-ruby/security/advisories) page.
26 |
--------------------------------------------------------------------------------
/cmd/gen/gen.go:
--------------------------------------------------------------------------------
1 | //go:build ignore
2 |
3 | // gen downloads an updated version of the PSL list and compiles it into go code.
4 | //
5 | // It is meant to be used by maintainers in conjunction with the go generate tool
6 | // to update the list.
7 | package main
8 |
9 | import (
10 | "context"
11 | "fmt"
12 | "os"
13 | "os/signal"
14 |
15 | "github.com/weppos/publicsuffix-go/publicsuffix/generator"
16 | )
17 |
18 | const (
19 | // where the rules will be written
20 | filename = "rules.go"
21 | )
22 |
23 | func main() {
24 | ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
25 | defer cancel()
26 |
27 | g := generator.NewGenerator()
28 | g.Verbose = true
29 | err := g.Write(ctx, filename)
30 | if err != nil {
31 | fmt.Fprintf(os.Stderr, "Error: %v\n", err)
32 | os.Exit(1)
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/cmd/load/main.go:
--------------------------------------------------------------------------------
1 | // +build ignore
2 |
3 | package main
4 |
5 | import (
6 | "fmt"
7 | "time"
8 |
9 | "github.com/weppos/publicsuffix-go/publicsuffix"
10 | )
11 |
12 | func main() {
13 | startTime := time.Now()
14 | defer func() {
15 | elapsed := time.Since(startTime)
16 | elapsed -= elapsed % 1000000
17 | fmt.Printf("Time elapsed: %s\n", elapsed)
18 | }()
19 |
20 | fmt.Printf("%d rules loaded\n", publicsuffix.DefaultList.Size())
21 | }
22 |
--------------------------------------------------------------------------------
/fixtures/list-simple.txt:
--------------------------------------------------------------------------------
1 | // This Source Code Form is subject to the terms of the Mozilla Public
2 | // License, v. 2.0. If a copy of the MPL was not distributed with this
3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4 |
5 | // ===BEGIN ICANN DOMAINS===
6 |
7 | // ac : http://en.wikipedia.org/wiki/.ac
8 | ac
9 | com.ac
10 |
11 | // ===END ICANN DOMAINS===
12 | // ===BEGIN PRIVATE DOMAINS===
13 |
14 | // Google, Inc.
15 | blogspot.com
16 |
17 | // ===END PRIVATE DOMAINS===
18 |
--------------------------------------------------------------------------------
/fixtures/tests.txt:
--------------------------------------------------------------------------------
1 | // Any copyright is dedicated to the Public Domain.
2 | // https://creativecommons.org/publicdomain/zero/1.0/
3 |
4 | // null input.
5 | null null
6 | // Mixed case.
7 | COM null
8 | example.COM example.com
9 | WwW.example.COM example.com
10 | // Leading dot.
11 | .com null
12 | .example null
13 | .example.com null
14 | .example.example null
15 | // Unlisted TLD.
16 | example null
17 | example.example example.example
18 | b.example.example example.example
19 | a.b.example.example example.example
20 | // Listed, but non-Internet, TLD.
21 | //local null
22 | //example.local null
23 | //b.example.local null
24 | //a.b.example.local null
25 | // TLD with only 1 rule.
26 | biz null
27 | domain.biz domain.biz
28 | b.domain.biz domain.biz
29 | a.b.domain.biz domain.biz
30 | // TLD with some 2-level rules.
31 | com null
32 | example.com example.com
33 | b.example.com example.com
34 | a.b.example.com example.com
35 | uk.com null
36 | example.uk.com example.uk.com
37 | b.example.uk.com example.uk.com
38 | a.b.example.uk.com example.uk.com
39 | test.ac test.ac
40 | // TLD with only 1 (wildcard) rule.
41 | mm null
42 | c.mm null
43 | b.c.mm b.c.mm
44 | a.b.c.mm b.c.mm
45 | // More complex TLD.
46 | jp null
47 | test.jp test.jp
48 | www.test.jp test.jp
49 | ac.jp null
50 | test.ac.jp test.ac.jp
51 | www.test.ac.jp test.ac.jp
52 | kyoto.jp null
53 | test.kyoto.jp test.kyoto.jp
54 | ide.kyoto.jp null
55 | b.ide.kyoto.jp b.ide.kyoto.jp
56 | a.b.ide.kyoto.jp b.ide.kyoto.jp
57 | c.kobe.jp null
58 | b.c.kobe.jp b.c.kobe.jp
59 | a.b.c.kobe.jp b.c.kobe.jp
60 | city.kobe.jp city.kobe.jp
61 | www.city.kobe.jp city.kobe.jp
62 | // TLD with a wildcard rule and exceptions.
63 | ck null
64 | test.ck null
65 | b.test.ck b.test.ck
66 | a.b.test.ck b.test.ck
67 | www.ck www.ck
68 | www.www.ck www.ck
69 | // US K12.
70 | us null
71 | test.us test.us
72 | www.test.us test.us
73 | ak.us null
74 | test.ak.us test.ak.us
75 | www.test.ak.us test.ak.us
76 | k12.ak.us null
77 | test.k12.ak.us test.k12.ak.us
78 | www.test.k12.ak.us test.k12.ak.us
79 | // IDN labels.
80 | 食狮.com.cn 食狮.com.cn
81 | 食狮.公司.cn 食狮.公司.cn
82 | www.食狮.公司.cn 食狮.公司.cn
83 | shishi.公司.cn shishi.公司.cn
84 | 公司.cn null
85 | 食狮.中国 食狮.中国
86 | www.食狮.中国 食狮.中国
87 | shishi.中国 shishi.中国
88 | 中国 null
89 | // Same as above, but punycoded.
90 | xn--85x722f.com.cn xn--85x722f.com.cn
91 | xn--85x722f.xn--55qx5d.cn xn--85x722f.xn--55qx5d.cn
92 | www.xn--85x722f.xn--55qx5d.cn xn--85x722f.xn--55qx5d.cn
93 | shishi.xn--55qx5d.cn shishi.xn--55qx5d.cn
94 | xn--55qx5d.cn null
95 | xn--85x722f.xn--fiqs8s xn--85x722f.xn--fiqs8s
96 | www.xn--85x722f.xn--fiqs8s xn--85x722f.xn--fiqs8s
97 | shishi.xn--fiqs8s shishi.xn--fiqs8s
98 | xn--fiqs8s null
99 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/weppos/publicsuffix-go
2 |
3 | go 1.23.0
4 |
5 | toolchain go1.23.4
6 |
7 | require golang.org/x/net v0.39.0
8 |
9 | require golang.org/x/text v0.24.0 // indirect
10 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY=
2 | golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E=
3 | golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
4 | golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
5 |
--------------------------------------------------------------------------------
/net/publicsuffix/publicsuffix.go:
--------------------------------------------------------------------------------
1 | // Package publicsuffix is a drop-in replacement for the golang.org/x/net/publicsuffix
2 | // based on the weppos/publicsuffix package.
3 | package publicsuffix
4 |
5 | import (
6 | psl "github.com/weppos/publicsuffix-go/publicsuffix"
7 | )
8 |
9 | // PublicSuffix returns the public suffix of the domain
10 | // using a copy of the publicsuffix.org database packaged into this library.
11 | //
12 | // Note. To maintain compatibility with the golang.org/x/net/publicsuffix
13 | // this method doesn't return an error. However, in case of error,
14 | // the returned value is empty.
15 | func PublicSuffix(domain string) (publicSuffix string, icann bool) {
16 | //d, err := psl.Parse(domain)
17 | //if err != nil {
18 | // return "", false
19 | //}
20 | //
21 | //return d.Rule.Value, !d.Rule.Private
22 |
23 | rule := psl.DefaultList.Find(domain, nil)
24 | publicSuffix = rule.Decompose(domain)[1]
25 | icann = !rule.Private
26 |
27 | // x/net/publicsuffix sets icann to false when the default rule "*" is used
28 | if rule.Value == "" && rule.Type == psl.WildcardType {
29 | icann = false
30 | }
31 |
32 | return
33 | }
34 |
35 | // EffectiveTLDPlusOne returns the effective top level domain plus one more label.
36 | // For example, the eTLD+1 for "foo.bar.golang.org" is "golang.org".
37 | func EffectiveTLDPlusOne(domain string) (string, error) {
38 | return psl.Domain(domain)
39 | }
40 |
--------------------------------------------------------------------------------
/net/publicsuffix/publicsuffix_test.go:
--------------------------------------------------------------------------------
1 | package publicsuffix_test
2 |
3 | import (
4 | "testing"
5 |
6 | wpsl "github.com/weppos/publicsuffix-go/net/publicsuffix"
7 | xpsl "golang.org/x/net/publicsuffix"
8 | )
9 |
10 | func TestPublicSuffix(t *testing.T) {
11 | testCases := []string{
12 | "example.com",
13 | "www.example.com",
14 | "example.co.uk",
15 | "www.example.co.uk",
16 | "example.blogspot.com",
17 | "www.example.blogspot.com",
18 | "parliament.uk",
19 | "www.parliament.uk",
20 | // not listed
21 | "www.example.test",
22 | }
23 |
24 | for _, testCase := range testCases {
25 | ws, wb := wpsl.PublicSuffix(testCase)
26 | xs, xb := xpsl.PublicSuffix(testCase)
27 |
28 | if ws != xs || wb != xb {
29 | t.Errorf("PublicSuffix(%v): x/psl -> (%v, %v) != w/psl -> (%v, %v)", testCase, xs, xb, ws, wb)
30 | }
31 | }
32 | }
33 |
34 | func TestEffectiveTLDPlusOne(t *testing.T) {
35 | testCases := []string{
36 | "example.com",
37 | "www.example.com",
38 | "example.co.uk",
39 | "www.example.co.uk",
40 | "example.blogspot.com",
41 | "www.example.blogspot.com",
42 | "parliament.uk",
43 | "www.parliament.uk",
44 | // not listed
45 | "www.example.test",
46 | }
47 |
48 | for _, testCase := range testCases {
49 | ws, we := wpsl.EffectiveTLDPlusOne(testCase)
50 | xs, xe := xpsl.EffectiveTLDPlusOne(testCase)
51 |
52 | if ws != xs || we != xe {
53 | t.Errorf("EffectiveTLDPlusOne(%v): x/psl -> (%v, %v) != w/psl -> (%v, %v)", testCase, xs, xe, ws, we)
54 | }
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/publicsuffix/acceptance_test.go:
--------------------------------------------------------------------------------
1 | package publicsuffix
2 |
3 | import (
4 | "testing"
5 | )
6 |
7 | type validTestCase struct {
8 | input string
9 | domain string
10 | parsed *DomainName
11 | }
12 |
13 | func TestValid(t *testing.T) {
14 | testCases := []validTestCase{
15 | {"example.com", "example.com", &DomainName{"com", "example", "", MustNewRule("com")}},
16 | {"foo.example.com", "example.com", &DomainName{"com", "example", "foo", MustNewRule("com")}},
17 |
18 | {"verybritish.co.uk", "verybritish.co.uk", &DomainName{"co.uk", "verybritish", "", MustNewRule("*.uk")}},
19 | {"foo.verybritish.co.uk", "verybritish.co.uk", &DomainName{"co.uk", "verybritish", "foo", MustNewRule("*.uk")}},
20 |
21 | {"parliament.uk", "parliament.uk", &DomainName{"uk", "parliament", "", MustNewRule("!parliament.uk")}},
22 | {"foo.parliament.uk", "parliament.uk", &DomainName{"uk", "parliament", "foo", MustNewRule("!parliament.uk")}},
23 |
24 | {"foo.blogspot.com", "foo.blogspot.com", &DomainName{"blogspot.com", "foo", "", MustNewRule("blogspot.com")}},
25 | {"bar.foo.blogspot.com", "foo.blogspot.com", &DomainName{"blogspot.com", "foo", "bar", MustNewRule("blogspot.com")}},
26 | }
27 |
28 | for _, testCase := range testCases {
29 | got, err := Parse(testCase.input)
30 | if err != nil {
31 | t.Errorf("TestValid(%v) returned error: %v", testCase.input, err)
32 | }
33 | if want := testCase.parsed; want.String() != got.String() {
34 | t.Errorf("TestValid(%v) = %v, want %v", testCase.input, got, want)
35 | }
36 |
37 | str, err := Domain(testCase.input)
38 | if err != nil {
39 | t.Errorf("TestValid(%v) returned error: %v", testCase.input, err)
40 | }
41 | if want := testCase.domain; want != str {
42 | t.Errorf("TestValid(%v) = %v, want %v", testCase.input, str, want)
43 | }
44 | }
45 | }
46 |
47 | type privateTestCase struct {
48 | input string
49 | domain string
50 | ignore bool
51 | error bool
52 | }
53 |
54 | func TestIncludePrivate(t *testing.T) {
55 | testCases := []privateTestCase{
56 | {"blogspot.com", "", false, true},
57 | {"blogspot.com", "blogspot.com", true, false},
58 |
59 | {"foo.blogspot.com", "foo.blogspot.com", false, false},
60 | {"foo.blogspot.com", "blogspot.com", true, false},
61 | }
62 |
63 | for _, testCase := range testCases {
64 | got, err := DomainFromListWithOptions(DefaultList, testCase.input, &FindOptions{IgnorePrivate: testCase.ignore})
65 |
66 | if testCase.error && err == nil {
67 | t.Errorf("TestIncludePrivate(%v) should have returned error, got: %v", testCase.input, got)
68 | continue
69 | }
70 | if !testCase.error && err != nil {
71 | t.Errorf("TestIncludePrivate(%v) returned error: %v", testCase.input, err)
72 | continue
73 | }
74 |
75 | if want := testCase.domain; want != got {
76 | t.Errorf("Domain(%v) = %v, want %v", testCase.input, got, want)
77 | }
78 | }
79 | }
80 |
81 | type idnaTestCase struct {
82 | input string
83 | domain string
84 | error bool
85 | }
86 |
87 | func TestIDNA(t *testing.T) {
88 | testACases := []idnaTestCase{
89 | // A-labels are supported
90 | // Check single IDN part
91 | {"xn--p1ai", "", true},
92 | {"example.xn--p1ai", "example.xn--p1ai", false},
93 | {"subdomain.example.xn--p1ai", "example.xn--p1ai", false},
94 | // Check multiple IDN parts
95 | {"xn--example--3bhk5a.xn--p1ai", "xn--example--3bhk5a.xn--p1ai", false},
96 | {"subdomain.xn--example--3bhk5a.xn--p1ai", "xn--example--3bhk5a.xn--p1ai", false},
97 | // Check multiple IDN rules
98 | {"example.xn--o1ach.xn--90a3ac", "example.xn--o1ach.xn--90a3ac", false},
99 | {"sudbomain.example.xn--o1ach.xn--90a3ac", "example.xn--o1ach.xn--90a3ac", false},
100 | }
101 |
102 | for _, testCase := range testACases {
103 | got, err := DomainFromListWithOptions(DefaultList, testCase.input, nil)
104 |
105 | if testCase.error && err == nil {
106 | t.Errorf("A-label %v should have returned error, got: %v", testCase.input, got)
107 | continue
108 | }
109 | if !testCase.error && err != nil {
110 | t.Errorf("A-label %v returned error: %v", testCase.input, err)
111 | continue
112 | }
113 |
114 | if want := testCase.domain; want != got {
115 | t.Errorf("A-label Domain(%v) = %v, want %v", testCase.input, got, want)
116 | }
117 | }
118 |
119 | // These tests validates the non-acceptance of U-labels.
120 | //
121 | // TODO(weppos): some tests are passing because of the default rule *
122 | // Consider to add some tests overriding the default rule to nil.
123 | // Right now, setting the default rule to nil with cause a panic if the lookup results in a nil.
124 | testUCases := []idnaTestCase{
125 | // U-labels are NOT supported
126 | // Check single IDN part
127 | {"рф", "", true},
128 | {"example.рф", "example.рф", false}, // passes because of *
129 | {"subdomain.example.рф", "example.рф", false}, // passes because of *
130 | // Check multiple IDN parts
131 | {"example-упр.рф", "example-упр.рф", false}, // passes because of *
132 | {"subdomain.example-упр.рф", "example-упр.рф", false}, // passes because of *
133 | // Check multiple IDN rules
134 | {"example.упр.срб", "упр.срб", false},
135 | {"sudbomain.example.упр.срб", "упр.срб", false},
136 | }
137 |
138 | for _, testCase := range testUCases {
139 | got, err := DomainFromListWithOptions(DefaultList, testCase.input, nil)
140 |
141 | if testCase.error && err == nil {
142 | t.Errorf("U-label %v should have returned error, got: %v", testCase.input, got)
143 | continue
144 | }
145 | if !testCase.error && err != nil {
146 | t.Errorf("U-label %v returned error: %v", testCase.input, err)
147 | continue
148 | }
149 |
150 | if want := testCase.domain; want != got {
151 | t.Errorf("U-label Domain(%v) = %v, want %v", testCase.input, got, want)
152 | }
153 | }
154 | }
155 |
156 | func TestFindRuleIANA(t *testing.T) {
157 | testCases := []struct {
158 | input, want string
159 | }{
160 | // TLD with only 1 rule.
161 | {"biz", "biz"},
162 | {"input.biz", "biz"},
163 | {"b.input.biz", "biz"},
164 |
165 | // The relevant {kobe,kyoto}.jp rules are:
166 | // jp
167 | // *.kobe.jp
168 | // !city.kobe.jp
169 | // kyoto.jp
170 | // ide.kyoto.jp
171 | {"jp", "jp"},
172 | {"kobe.jp", "jp"},
173 | {"c.kobe.jp", "c.kobe.jp"},
174 | {"b.c.kobe.jp", "c.kobe.jp"},
175 | {"a.b.c.kobe.jp", "c.kobe.jp"},
176 | {"city.kobe.jp", "kobe.jp"},
177 | {"www.city.kobe.jp", "kobe.jp"},
178 | {"kyoto.jp", "kyoto.jp"},
179 | {"test.kyoto.jp", "kyoto.jp"},
180 | {"ide.kyoto.jp", "ide.kyoto.jp"},
181 | {"b.ide.kyoto.jp", "ide.kyoto.jp"},
182 | {"a.b.ide.kyoto.jp", "ide.kyoto.jp"},
183 |
184 | // Domain with a private public suffix should return the ICANN public suffix.
185 | {"foo.compute-1.amazonaws.com", "com"},
186 | // Domain equal to a private public suffix should return the ICANN public suffix.
187 | {"cloudapp.net", "net"},
188 | }
189 |
190 | for _, tc := range testCases {
191 | rule := DefaultList.Find(tc.input, &FindOptions{IgnorePrivate: true, DefaultRule: nil})
192 |
193 | if rule == nil {
194 | t.Errorf("TestFindRuleIANA(%v) nil rule", tc.input)
195 | continue
196 | }
197 |
198 | suffix := rule.Decompose(tc.input)[1]
199 | // If the TLD is empty, it means name is actually a suffix.
200 | // In fact, decompose returns an array of empty strings in this case.
201 | if suffix == "" {
202 | suffix = tc.input
203 | }
204 |
205 | if suffix != tc.want {
206 | t.Errorf("TestFindRuleIANA(%v) = %v, want %v", tc.input, suffix, tc.want)
207 | }
208 | }
209 | }
210 |
--------------------------------------------------------------------------------
/publicsuffix/generator/gen.go:
--------------------------------------------------------------------------------
1 | // Package generator downloads an updated version of the PSL list and compiles it into go code.
2 | //
3 | // It is meant to be used by maintainers in conjunction with the go generate tool
4 | // to update the list.
5 | package generator
6 |
7 | import (
8 | "bytes"
9 | "context"
10 | "encoding/json"
11 | "fmt"
12 | "go/format"
13 | "io"
14 | "log"
15 | "net/http"
16 | "os"
17 | "strings"
18 | "text/template"
19 | "time"
20 |
21 | "github.com/weppos/publicsuffix-go/publicsuffix"
22 | )
23 |
24 | const (
25 | list = `// This file is automatically generated
26 | // Run "go run cmd/gen/gen.go" to update the list.
27 |
28 | package publicsuffix
29 |
30 | const ListVersion = "PSL version {{.VersionSHA}} ({{.VersionDate}})"
31 |
32 | func DefaultRules() [{{len .Rules}}]Rule {
33 | return r
34 | }
35 |
36 | var r = [{{len .Rules}}]Rule{
37 | {{range $r := .Rules}} \
38 | { {{$r.Type}}, "{{$r.Value}}", {{$r.Length}}, {{$r.Private}} },
39 | {{end}}
40 | }
41 |
42 | func init() {
43 | for i := range r {
44 | DefaultList.AddRule(&r[i])
45 | }
46 | }
47 |
48 | `
49 | )
50 |
51 | var listTmpl = template.Must(template.New("list").Parse(cont(list)))
52 |
53 | // https://github.com/golang/go/issues/9969
54 | // Requires go1.6
55 | func cont(s string) string {
56 | return strings.Replace(s, "\\\n", "", -1)
57 | }
58 |
59 | type headInfo struct {
60 | SHA string
61 | Datetime time.Time
62 | }
63 |
64 | type githubNodes struct {
65 | SHA string `json:"sha"`
66 | Commit githubCommit `json:"commit"`
67 | }
68 |
69 | type githubCommit struct {
70 | Commiter githubCommitter `json:"committer"`
71 | }
72 |
73 | type githubCommitter struct {
74 | Date time.Time `json:"date"`
75 | }
76 |
77 | func extractHeadInfo(ctx context.Context) (*headInfo, error) {
78 | req, err := http.NewRequestWithContext(ctx, "GET", "https://api.github.com/repos/publicsuffix/list/commits", nil)
79 | if err != nil {
80 | return nil, fmt.Errorf("http.NewRequestWithContext: %w", err)
81 | }
82 | req.Header.Set("Accept", "application/vnd.github+json")
83 | req.Header.Set("X-Github-Api-Version", "2022-11-28")
84 |
85 | resp, err := http.DefaultClient.Do(req)
86 | if err != nil {
87 | return nil, fmt.Errorf("http.DefaultClient.Do: %w", err)
88 | }
89 | defer resp.Body.Close()
90 |
91 | respString, err := io.ReadAll(resp.Body)
92 | if err != nil {
93 | return nil, fmt.Errorf("io.ReadAll: %w", err)
94 | }
95 |
96 | if resp.StatusCode != http.StatusOK {
97 | return nil, fmt.Errorf("unexpected status code: %d %s", resp.StatusCode, respString)
98 | }
99 |
100 | var respBody []githubNodes
101 | err = json.Unmarshal(respString, &respBody)
102 | if err != nil {
103 | return nil, fmt.Errorf("json.Unmarshal %s: %w", respBody, err)
104 | }
105 |
106 | if len(respBody) == 0 {
107 | return nil, fmt.Errorf("no nodes found")
108 | }
109 |
110 | return &headInfo{
111 | SHA: respBody[0].SHA,
112 | Datetime: respBody[0].Commit.Commiter.Date,
113 | }, nil
114 | }
115 |
116 | // Generator represents a generator.
117 | type Generator struct {
118 | Verbose bool
119 | }
120 |
121 | // NewGenerator creates a Generator with default settings.
122 | func NewGenerator() *Generator {
123 | g := &Generator{
124 | Verbose: false,
125 | }
126 | return g
127 | }
128 |
129 | // Write ...
130 | func (g *Generator) Write(ctx context.Context, filename string) error {
131 | content, err := g.generate(ctx)
132 | if err != nil {
133 | return err
134 | }
135 |
136 | g.log("Writing %v...\n", filename)
137 | return os.WriteFile(filename, content, 0o644)
138 | }
139 |
140 | // Print ...
141 | func (g *Generator) Print(ctx context.Context) error {
142 | content, err := g.generate(ctx)
143 | if err != nil {
144 | return err
145 | }
146 |
147 | _, err = os.Stdout.Write(content)
148 | return err
149 | }
150 |
151 | // Generate downloads an updated version of the PSL list and compiles it into go code.
152 | func (g *Generator) generate(ctx context.Context) ([]byte, error) {
153 | g.log("Fetching PSL version...\n")
154 | headInfo, err := extractHeadInfo(ctx)
155 | if err != nil {
156 | return nil, err
157 | }
158 |
159 | g.log("Downloading PSL %s...\n", headInfo.SHA[:6])
160 | reqURL := fmt.Sprintf("https://raw.githubusercontent.com/publicsuffix/list/%s/public_suffix_list.dat", headInfo.SHA)
161 |
162 | req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil)
163 | if err != nil {
164 | return nil, err
165 | }
166 |
167 | resp, err := http.DefaultClient.Do(req)
168 | if err != nil {
169 | return nil, err
170 | }
171 |
172 | defer resp.Body.Close()
173 |
174 | list := publicsuffix.NewList()
175 | rules, err := list.Load(resp.Body, nil)
176 | if err != nil {
177 | return nil, err
178 | }
179 |
180 | data := struct {
181 | VersionSHA string
182 | VersionDate string
183 | Rules []publicsuffix.Rule
184 | }{
185 | headInfo.SHA[:6],
186 | headInfo.Datetime.Format(time.ANSIC),
187 | rules,
188 | }
189 |
190 | g.log("Parsing PSL...\n")
191 | buf := new(bytes.Buffer)
192 | err = listTmpl.Execute(buf, &data)
193 | if err != nil {
194 | return nil, err
195 | }
196 |
197 | return format.Source(buf.Bytes())
198 | }
199 |
200 | func (g *Generator) log(format string, v ...interface{}) {
201 | if !g.Verbose {
202 | return
203 | }
204 |
205 | log.Printf(format, v...)
206 | }
207 |
--------------------------------------------------------------------------------
/publicsuffix/psl_test.go:
--------------------------------------------------------------------------------
1 | package publicsuffix
2 |
3 | import (
4 | "bufio"
5 | "os"
6 | "strings"
7 | "testing"
8 | )
9 |
10 | type pslTestCase struct {
11 | input string
12 | output string
13 | error bool
14 | }
15 |
16 | func TestPsl(t *testing.T) {
17 | f, err := os.Open("../fixtures/tests.txt")
18 | if err != nil {
19 | panic(err)
20 | }
21 | defer f.Close()
22 |
23 | testCases := []pslTestCase{}
24 |
25 | scanner := bufio.NewScanner(f)
26 | scanner.Split(bufio.ScanLines)
27 | for scanner.Scan() {
28 | line := scanner.Text()
29 | switch {
30 | case line == "":
31 | break
32 | case strings.HasPrefix(line, "//"):
33 | break
34 | default:
35 | xy := strings.Split(line, " ")
36 | tc := pslTestCase{}
37 | tc.input = xy[0]
38 | if xy[1] == "null" {
39 | tc.error = true
40 | } else {
41 | tc.error = false
42 | tc.output = xy[1]
43 | }
44 | testCases = append(testCases, tc)
45 | }
46 | }
47 |
48 | for _, testCase := range testCases {
49 | input, err := ToASCII(testCase.input)
50 | if err != nil {
51 | t.Fatalf("failed to convert input %v to ASCII", testCase.input)
52 | }
53 |
54 | output, err := ToASCII(testCase.output)
55 | if err != nil {
56 | t.Fatalf("failed to convert output %v to ASCII", testCase.output)
57 | }
58 |
59 | got, err := Domain(input)
60 |
61 | if testCase.error && err == nil {
62 | t.Errorf("PSL(%v) should have returned error, got: %v", testCase.input, got)
63 | continue
64 | }
65 | if !testCase.error && err != nil {
66 | t.Errorf("PSL(%v) returned error: %v", testCase.input, err)
67 | continue
68 | }
69 | if got != output {
70 | t.Errorf("PSL(%v) = %v, want %v", testCase.input, got, testCase.output)
71 | continue
72 | }
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/publicsuffix/publicsuffix.go:
--------------------------------------------------------------------------------
1 | //go:generate go run ../cmd/gen/gen.go
2 |
3 | // Package publicsuffix provides a domain name parser
4 | // based on data from the public suffix list http://publicsuffix.org/.
5 | // A public suffix is one under which Internet users can directly register names.
6 | package publicsuffix
7 |
8 | import (
9 | "bufio"
10 | "fmt"
11 | "io"
12 | "net/http/cookiejar"
13 | "os"
14 | "strings"
15 |
16 | "golang.org/x/net/idna"
17 | )
18 |
19 | const (
20 | // Version identifies the current library version.
21 | // This is a pro forma convention given that Go dependencies
22 | // tends to be fetched directly from the repo.
23 | Version = "0.40.2"
24 |
25 | // NormalType represents a normal rule such as "com"
26 | NormalType = 1
27 | // WildcardType represents a wildcard rule such as "*.com"
28 | WildcardType = 2
29 | // ExceptionType represents an exception to a wildard rule
30 | ExceptionType = 3
31 |
32 | listTokenPrivateDomains = "===BEGIN PRIVATE DOMAINS==="
33 | listTokenComment = "//"
34 | )
35 |
36 | // DefaultList is the default List and it is used by Parse and Domain.
37 | var DefaultList = NewList()
38 |
39 | // DefaultRule is the default Rule that represents "*".
40 | var DefaultRule = MustNewRule("*")
41 |
42 | // DefaultParserOptions are the default options used to parse a Public Suffix list.
43 | var DefaultParserOptions = &ParserOption{PrivateDomains: true, ASCIIEncoded: false}
44 |
45 | // DefaultFindOptions are the default options used to perform the lookup of rules in the list.
46 | var DefaultFindOptions = &FindOptions{IgnorePrivate: false, DefaultRule: DefaultRule}
47 |
48 | // Rule represents a single rule in a Public Suffix List.
49 | type Rule struct {
50 | Type int
51 | Value string
52 | Length int
53 | Private bool
54 | }
55 |
56 | // ParserOption are the options you can use to customize the way a List
57 | // is parsed from a file or a string.
58 | type ParserOption struct {
59 | // Set to false to skip the private domains when parsing.
60 | // Default to true, which means the private domains are included.
61 | PrivateDomains bool
62 |
63 | // Set to false if the input is encoded in U-labels (Unicode)
64 | // as opposite to A-labels.
65 | // Default to false, which means the list is containing Unicode domains.
66 | // This is the default because the original PSL currently contains Unicode.
67 | ASCIIEncoded bool
68 | }
69 |
70 | // FindOptions are the options you can use to customize the way a Rule
71 | // is searched within the list.
72 | type FindOptions struct {
73 | // Set to true to ignore the rules within the "Private" section of the Public Suffix List.
74 | IgnorePrivate bool
75 |
76 | // The default rule to use when no rule matches the input.
77 | // The format Public Suffix algorithm states that the rule "*" should be used when no other rule matches,
78 | // but some consumers may have different needs.
79 | DefaultRule *Rule
80 | }
81 |
82 | // List represents a Public Suffix List.
83 | type List struct {
84 | // rules is kept private because you should not access rules directly
85 | rules map[string]*Rule
86 | }
87 |
88 | // NewList creates a new empty list.
89 | func NewList() *List {
90 | return &List{
91 | rules: map[string]*Rule{},
92 | }
93 | }
94 |
95 | // NewListFromString parses a string that represents a Public Suffix source
96 | // and returns a List initialized with the rules in the source.
97 | func NewListFromString(src string, options *ParserOption) (*List, error) {
98 | l := NewList()
99 | _, err := l.LoadString(src, options)
100 | return l, err
101 | }
102 |
103 | // NewListFromFile parses a string that represents a Public Suffix source
104 | // and returns a List initialized with the rules in the source.
105 | func NewListFromFile(path string, options *ParserOption) (*List, error) {
106 | l := NewList()
107 | _, err := l.LoadFile(path, options)
108 | return l, err
109 | }
110 |
111 | // Load parses and loads a set of rules from an io.Reader into the current list.
112 | func (l *List) Load(r io.Reader, options *ParserOption) ([]Rule, error) {
113 | return l.parse(r, options)
114 | }
115 |
116 | // LoadString parses and loads a set of rules from a String into the current list.
117 | func (l *List) LoadString(src string, options *ParserOption) ([]Rule, error) {
118 | r := strings.NewReader(src)
119 | return l.parse(r, options)
120 | }
121 |
122 | // LoadFile parses and loads a set of rules from a File into the current list.
123 | func (l *List) LoadFile(path string, options *ParserOption) ([]Rule, error) {
124 | f, err := os.Open(path)
125 | if err != nil {
126 | return nil, err
127 | }
128 | defer f.Close()
129 | return l.parse(f, options)
130 | }
131 |
132 | // AddRule adds a new rule to the list.
133 | //
134 | // The exact position of the rule into the list is unpredictable.
135 | // The list may be optimized internally for lookups, therefore the algorithm
136 | // will decide the best position for the new rule.
137 | func (l *List) AddRule(r *Rule) error {
138 | l.rules[r.Value] = r
139 | return nil
140 | }
141 |
142 | // Size returns the size of the list, which is the number of rules.
143 | func (l *List) Size() int {
144 | return len(l.rules)
145 | }
146 |
147 | func (l *List) parse(r io.Reader, options *ParserOption) ([]Rule, error) {
148 | if options == nil {
149 | options = DefaultParserOptions
150 | }
151 | var rules []Rule
152 |
153 | scanner := bufio.NewScanner(r)
154 | var section int // 1 == ICANN, 2 == PRIVATE
155 |
156 | Scanning:
157 | for scanner.Scan() {
158 | line := strings.TrimSpace(scanner.Text())
159 | switch {
160 |
161 | // skip blank lines
162 | case line == "":
163 | break
164 |
165 | // include private domains or stop scanner
166 | case strings.Contains(line, listTokenPrivateDomains):
167 | if !options.PrivateDomains {
168 | break Scanning
169 | }
170 | section = 2
171 |
172 | // skip comments
173 | case strings.HasPrefix(line, listTokenComment):
174 | break
175 |
176 | default:
177 | var rule *Rule
178 | var err error
179 |
180 | if options.ASCIIEncoded {
181 | rule, err = NewRule(line)
182 | } else {
183 | rule, err = NewRuleUnicode(line)
184 | }
185 | if err != nil {
186 | return []Rule{}, err
187 | }
188 |
189 | rule.Private = (section == 2)
190 | l.AddRule(rule)
191 | rules = append(rules, *rule)
192 | }
193 |
194 | }
195 |
196 | return rules, scanner.Err()
197 | }
198 |
199 | // Find and returns the most appropriate rule for the domain name.
200 | func (l *List) Find(name string, options *FindOptions) *Rule {
201 | if options == nil {
202 | options = DefaultFindOptions
203 | }
204 |
205 | part := name
206 | for {
207 | rule, ok := l.rules[part]
208 |
209 | if ok && rule.Match(name) && !(options.IgnorePrivate && rule.Private) {
210 | return rule
211 | }
212 |
213 | i := strings.IndexRune(part, '.')
214 | if i < 0 {
215 | return options.DefaultRule
216 | }
217 |
218 | part = part[i+1:]
219 | }
220 |
221 | }
222 |
223 | // NewRule parses the rule content, creates and returns a Rule.
224 | //
225 | // The content of the rule MUST be encoded in ASCII (A-labels).
226 | func NewRule(content string) (*Rule, error) {
227 | var rule *Rule
228 | var value string
229 |
230 | switch content[0] {
231 | case '*': // wildcard
232 | if content == "*" {
233 | value = ""
234 | } else {
235 | value = content[2:]
236 | }
237 | rule = &Rule{Type: WildcardType, Value: value, Length: len(Labels(value)) + 1}
238 | case '!': // exception
239 | value = content[1:]
240 | rule = &Rule{Type: ExceptionType, Value: value, Length: len(Labels(value))}
241 | default: // normal
242 | value = content
243 | rule = &Rule{Type: NormalType, Value: value, Length: len(Labels(value))}
244 | }
245 |
246 | return rule, nil
247 | }
248 |
249 | // NewRuleUnicode is like NewRule, but expects the content to be encoded in Unicode (U-labels).
250 | func NewRuleUnicode(content string) (*Rule, error) {
251 | var err error
252 |
253 | content, err = ToASCII(content)
254 | if err != nil {
255 | return nil, err
256 | }
257 |
258 | return NewRule(content)
259 | }
260 |
261 | // MustNewRule is like NewRule, but panics if the content cannot be parsed.
262 | func MustNewRule(content string) *Rule {
263 | rule, err := NewRule(content)
264 | if err != nil {
265 | panic(err)
266 | }
267 | return rule
268 | }
269 |
270 | // Match checks if the rule matches the name.
271 | //
272 | // A domain name is said to match a rule if and only if all of the following conditions are met:
273 | // - When the domain and rule are split into corresponding labels,
274 | // that the domain contains as many or more labels than the rule.
275 | // - Beginning with the right-most labels of both the domain and the rule,
276 | // and continuing for all labels in the rule, one finds that for every pair,
277 | // either they are identical, or that the label from the rule is "*".
278 | //
279 | // See https://publicsuffix.org/list/
280 | func (r *Rule) Match(name string) bool {
281 | left := strings.TrimSuffix(name, r.Value)
282 |
283 | // the name contains as many labels than the rule
284 | // this is a match, unless it's a wildcard
285 | // because the wildcard requires one more label
286 | if left == "" {
287 | return r.Type != WildcardType
288 | }
289 |
290 | // if there is one more label, the rule match
291 | // because either the rule is shorter than the domain
292 | // or the rule is a wildcard and there is one more label
293 | return left[len(left)-1:] == "."
294 | }
295 |
296 | // Decompose takes a name as input and decomposes it into a tuple of ,
297 | // according to the rule definition and type.
298 | func (r *Rule) Decompose(name string) (result [2]string) {
299 | if r == DefaultRule {
300 | i := strings.LastIndexByte(name, '.')
301 | if i < 0 {
302 | return
303 | }
304 | result[0], result[1] = name[:i], name[i+1:]
305 | return
306 | }
307 | switch r.Type {
308 | case NormalType:
309 | name = strings.TrimSuffix(name, r.Value)
310 | if len(name) == 0 {
311 | return
312 | }
313 | result[0], result[1] = name[:len(name)-1], r.Value
314 | case WildcardType:
315 | name := strings.TrimSuffix(name, r.Value)
316 | if len(name) == 0 {
317 | return
318 | }
319 | name = name[:len(name)-1]
320 | i := strings.LastIndexByte(name, '.')
321 | if i < 0 {
322 | return
323 | }
324 | result[0], result[1] = name[:i], name[i+1:]+"."+r.Value
325 | case ExceptionType:
326 | i := strings.IndexRune(r.Value, '.')
327 | if i < 0 {
328 | return
329 | }
330 | suffix := r.Value[i+1:]
331 | name = strings.TrimSuffix(name, suffix)
332 | if len(name) == 0 {
333 | return
334 | }
335 | result[0], result[1] = name[:len(name)-1], suffix
336 | }
337 | return
338 | }
339 |
340 | // Labels decomposes given domain name into labels,
341 | // corresponding to the dot-separated tokens.
342 | func Labels(name string) []string {
343 | return strings.Split(name, ".")
344 | }
345 |
346 | // DomainName represents a domain name.
347 | type DomainName struct {
348 | TLD string
349 | SLD string
350 | TRD string
351 | Rule *Rule
352 | }
353 |
354 | // String joins the components of the domain name into a single string.
355 | // Empty labels are skipped.
356 | //
357 | // Examples:
358 | //
359 | // DomainName{"com", "example"}.String()
360 | // // example.com
361 | // DomainName{"com", "example", "www"}.String()
362 | // // www.example.com
363 | func (d *DomainName) String() string {
364 | switch {
365 | case d.TLD == "":
366 | return ""
367 | case d.SLD == "":
368 | return d.TLD
369 | case d.TRD == "":
370 | return d.SLD + "." + d.TLD
371 | default:
372 | return d.TRD + "." + d.SLD + "." + d.TLD
373 | }
374 | }
375 |
376 | // Domain extract and return the domain name from the input
377 | // using the default (Public Suffix) List.
378 | //
379 | // Examples:
380 | //
381 | // publicsuffix.Domain("example.com")
382 | // // example.com
383 | // publicsuffix.Domain("www.example.com")
384 | // // example.com
385 | // publicsuffix.Domain("www.example.co.uk")
386 | // // example.co.uk
387 | func Domain(name string) (string, error) {
388 | return DomainFromListWithOptions(DefaultList, name, DefaultFindOptions)
389 | }
390 |
391 | // Parse decomposes the name into TLD, SLD, TRD
392 | // using the default (Public Suffix) List,
393 | // and returns the result as a DomainName
394 | //
395 | // Examples:
396 | //
397 | // list := NewList()
398 | //
399 | // publicsuffix.Parse("example.com")
400 | // // &DomainName{"com", "example"}
401 | // publicsuffix.Parse("www.example.com")
402 | // // &DomainName{"com", "example", "www"}
403 | // publicsuffix.Parse("www.example.co.uk")
404 | // // &DomainName{"co.uk", "example"}
405 | func Parse(name string) (*DomainName, error) {
406 | return ParseFromListWithOptions(DefaultList, name, DefaultFindOptions)
407 | }
408 |
409 | // DomainFromListWithOptions extract and return the domain name from the input
410 | // using the (Public Suffix) list passed as argument.
411 | //
412 | // Examples:
413 | //
414 | // list := NewList()
415 | //
416 | // publicsuffix.DomainFromListWithOptions(list, "example.com")
417 | // // example.com
418 | // publicsuffix.DomainFromListWithOptions(list, "www.example.com")
419 | // // example.com
420 | // publicsuffix.DomainFromListWithOptions(list, "www.example.co.uk")
421 | // // example.co.uk
422 | func DomainFromListWithOptions(l *List, name string, options *FindOptions) (string, error) {
423 | dn, err := ParseFromListWithOptions(l, name, options)
424 | if err != nil {
425 | return "", err
426 | }
427 | return dn.SLD + "." + dn.TLD, nil
428 | }
429 |
430 | // ParseFromListWithOptions decomposes the name into TLD, SLD, TRD
431 | // using the (Public Suffix) list passed as argument,
432 | // and returns the result as a DomainName
433 | //
434 | // Examples:
435 | //
436 | // list := NewList()
437 | //
438 | // publicsuffix.ParseFromListWithOptions(list, "example.com")
439 | // // &DomainName{"com", "example"}
440 | // publicsuffix.ParseFromListWithOptions(list, "www.example.com")
441 | // // &DomainName{"com", "example", "www"}
442 | // publicsuffix.ParseFromListWithOptions(list, "www.example.co.uk")
443 | // // &DomainName{"co.uk", "example"}
444 | func ParseFromListWithOptions(l *List, name string, options *FindOptions) (*DomainName, error) {
445 | n, err := normalize(name)
446 | if err != nil {
447 | return nil, err
448 | }
449 |
450 | r := l.Find(n, options)
451 | if r == nil {
452 | return nil, fmt.Errorf("no rule matching name %s", name)
453 | }
454 |
455 | parts := r.Decompose(n)
456 | left, tld := parts[0], parts[1]
457 | if tld == "" {
458 | return nil, fmt.Errorf("%s is a suffix", n)
459 | }
460 |
461 | dn := &DomainName{
462 | Rule: r,
463 | TLD: tld,
464 | }
465 | if i := strings.LastIndexByte(left, '.'); i < 0 {
466 | dn.SLD = left
467 | } else {
468 | dn.TRD = left[:i]
469 | dn.SLD = left[i+1:]
470 | }
471 | return dn, nil
472 | }
473 |
474 | func normalize(name string) (string, error) {
475 | ret := strings.ToLower(name)
476 |
477 | if ret == "" {
478 | return "", fmt.Errorf("name is blank")
479 | }
480 | if ret[0] == '.' {
481 | return "", fmt.Errorf("name %s starts with a dot", ret)
482 | }
483 |
484 | return ret, nil
485 | }
486 |
487 | // ToASCII is a wrapper for idna.ToASCII.
488 | //
489 | // This wrapper exists because idna.ToASCII backward-compatibility was broken twice in few months
490 | // and I can't call this package directly anymore. The wrapper performs some terrible-but-necessary
491 | // before-after replacements to make sure an already ASCII input always results in the same output
492 | // even if passed through ToASCII.
493 | //
494 | // See golang/net@67957fd0b1, golang/net@f2499483f9, golang/net@78ebe5c8b6,
495 | // and weppos/publicsuffix-go#66.
496 | func ToASCII(s string) (string, error) {
497 | // .example.com should be .example.com
498 | // ..example.com should be ..example.com
499 | if strings.HasPrefix(s, ".") {
500 | dotIndex := 0
501 | for i := 0; i < len(s); i++ {
502 | if s[i] == '.' {
503 | dotIndex = i
504 | } else {
505 | break
506 | }
507 | }
508 | out, err := idna.ToASCII(s[dotIndex+1:])
509 | out = s[:dotIndex+1] + out
510 | return out, err
511 | }
512 |
513 | return idna.ToASCII(s)
514 | }
515 |
516 | // ToUnicode is a wrapper for idna.ToUnicode.
517 | //
518 | // See ToASCII for more details about why this wrapper exists.
519 | func ToUnicode(s string) (string, error) {
520 | return idna.ToUnicode(s)
521 | }
522 |
523 | // CookieJarList implements the cookiejar.PublicSuffixList interface.
524 | var CookieJarList cookiejar.PublicSuffixList = cookiejarList{DefaultList}
525 |
526 | type cookiejarList struct {
527 | List *List
528 | }
529 |
530 | // PublicSuffix implements cookiejar.PublicSuffixList.
531 | func (l cookiejarList) PublicSuffix(domain string) string {
532 | rule := l.List.Find(domain, nil)
533 | return rule.Decompose(domain)[1]
534 | }
535 |
536 | // PublicSuffix implements cookiejar.String.
537 | func (cookiejarList) String() string {
538 | return ListVersion
539 | }
540 |
--------------------------------------------------------------------------------
/publicsuffix/publicsuffix_test.go:
--------------------------------------------------------------------------------
1 | package publicsuffix
2 |
3 | import (
4 | "reflect"
5 | "testing"
6 |
7 | xlib "golang.org/x/net/publicsuffix"
8 | )
9 |
10 | func TestNewListFromString(t *testing.T) {
11 | src := `
12 | // This Source Code Form is subject to the terms of the Mozilla Public
13 | // License, v. 2.0. If a copy of the MPL was not distributed with this
14 | // file, You can obtain one at https://mozilla.org/MPL/2.0/.
15 |
16 | // ===BEGIN ICANN DOMAINS===
17 |
18 | // ac : http://en.wikipedia.org/wiki/.ac
19 | ac
20 | com.ac
21 |
22 | // ===END ICANN DOMAINS===
23 | // ===BEGIN PRIVATE DOMAINS===
24 |
25 | // Google, Inc.
26 | blogspot.com
27 |
28 | // ===END PRIVATE DOMAINS===
29 | `
30 |
31 | list, err := NewListFromString(src, nil)
32 | if err != nil {
33 | t.Fatalf("Parse returned an error: %v", err)
34 | }
35 |
36 | if want, got := 3, list.Size(); want != got {
37 | t.Errorf("Parse returned a list with %v rules, want %v", got, want)
38 | t.Fatalf("%v", list.rules)
39 | }
40 |
41 | rules := list.rules
42 | var testRules []Rule
43 |
44 | testRules = []Rule{}
45 | for _, rule := range rules {
46 | if rule.Private == false {
47 | testRules = append(testRules, *rule)
48 | }
49 | }
50 | if want, got := 2, len(testRules); want != got {
51 | t.Errorf("Parse returned a list with %v IANA rules, want %v", got, want)
52 | t.Fatalf("%v", testRules)
53 | }
54 |
55 | testRules = []Rule{}
56 | for _, rule := range rules {
57 | if rule.Private == true {
58 | testRules = append(testRules, *rule)
59 | }
60 | }
61 | if want, got := 1, len(testRules); want != got {
62 | t.Errorf("Parse returned a list with %v PRIVATE rules, want %v", got, want)
63 | t.Fatalf("%v", testRules)
64 | }
65 | }
66 |
67 | func TestNewListFromString_IDNAInputIsUnicode(t *testing.T) {
68 | src := `
69 | // xn--d1alf ("mkd", Macedonian) : MK
70 | // MARnet
71 | мкд
72 |
73 | // xn--l1acc ("mon", Mongolian) : MN
74 | xn--l1acc
75 | `
76 |
77 | list, err := NewListFromString(src, nil)
78 | if err != nil {
79 | t.Fatalf("Parse returned error: %v", err)
80 | }
81 |
82 | if want, got := 2, list.Size(); want != got {
83 | t.Errorf("Parse returned a list with %v rules, want %v", got, want)
84 | t.Fatalf("%v", list.rules)
85 | }
86 |
87 | if rule := list.Find("hello.xn--d1alf", &FindOptions{DefaultRule: nil}); rule == nil {
88 | t.Fatalf("Find(%v) returned nil", "hello.xn--d1alf")
89 | }
90 | if rule := list.Find("hello.мкд", &FindOptions{DefaultRule: nil}); rule != nil {
91 | t.Fatalf("Find(%v) expected to return nil, got %v", "hello.xn--d1alf", rule)
92 | }
93 | if rule := list.Find("hello.xn--l1acc", &FindOptions{DefaultRule: nil}); rule == nil {
94 | t.Fatalf("Find(%v) returned nil", "hello.xn--l1acc")
95 | }
96 | }
97 |
98 | func TestNewListFromString_IDNAInputIsAscii(t *testing.T) {
99 | src := `
100 | // xn--d1alf ("mkd", Macedonian) : MK
101 | // MARnet
102 | xn--d1alf
103 |
104 | // xn--l1acc ("mon", Mongolian) : MN
105 | xn--l1acc
106 | `
107 |
108 | list, err := NewListFromString(src, &ParserOption{ASCIIEncoded: true})
109 | if err != nil {
110 | t.Fatalf("Parse returned error: %v", err)
111 | }
112 |
113 | if want, got := 2, list.Size(); want != got {
114 | t.Errorf("Parse returned a list with %v rules, want %v", got, want)
115 | t.Fatalf("%v", list.rules)
116 | }
117 |
118 | if rule := list.Find("hello.xn--d1alf", &FindOptions{DefaultRule: nil}); rule == nil {
119 | t.Fatalf("Find(%v) returned nil", "hello.xn--d1alf")
120 | }
121 | if rule := list.Find("hello.мкд", &FindOptions{DefaultRule: nil}); rule != nil {
122 | t.Fatalf("Find(%v) expected to return nil, got %v", "hello.xn--d1alf", rule)
123 | }
124 | if rule := list.Find("hello.xn--l1acc", &FindOptions{DefaultRule: nil}); rule == nil {
125 | t.Fatalf("Find(%v) returned nil", "hello.xn--l1acc")
126 | }
127 | }
128 |
129 | func TestNewListFromFile(t *testing.T) {
130 | list, err := NewListFromFile("../fixtures/list-simple.txt", nil)
131 | if err != nil {
132 | t.Fatalf("Parse returned an error: %v", err)
133 | }
134 |
135 | if want, got := 3, list.Size(); want != got {
136 | t.Errorf("Parse returned a list with %v rules, want %v", got, want)
137 | t.Fatalf("%v", list.rules)
138 | }
139 |
140 | rules := list.rules
141 | var testRules []Rule
142 |
143 | testRules = []Rule{}
144 | for _, rule := range rules {
145 | if rule.Private == false {
146 | testRules = append(testRules, *rule)
147 | }
148 | }
149 | if want, got := 2, len(testRules); want != got {
150 | t.Errorf("Parse returned a list with %v IANA rules, want %v", got, want)
151 | t.Fatalf("%v", testRules)
152 | }
153 |
154 | testRules = []Rule{}
155 | for _, rule := range rules {
156 | if rule.Private == true {
157 | testRules = append(testRules, *rule)
158 | }
159 | }
160 | if want, got := 1, len(testRules); want != got {
161 | t.Errorf("Parse returned a list with %v PRIVATE rules, want %v", got, want)
162 | t.Fatalf("%v", testRules)
163 | }
164 | }
165 |
166 | func TestListAddRule(t *testing.T) {
167 | list := NewList()
168 |
169 | if list.Size() != 0 {
170 | t.Fatalf("Empty list should have 0 rules, got %v", list.Size())
171 | }
172 |
173 | rule := MustNewRule("com")
174 | list.AddRule(rule)
175 | if list.Size() != 1 {
176 | t.Fatalf("List should have 1 rule, got %v", list.Size())
177 | }
178 | for _, got := range list.rules {
179 | if !reflect.DeepEqual(rule, got) {
180 | t.Fatalf("List[0] expected to be %v, got %v", rule, got)
181 | }
182 | }
183 | }
184 |
185 | type listFindTestCase struct {
186 | input string
187 | expected *Rule
188 | }
189 |
190 | func TestListFind(t *testing.T) {
191 | src := `
192 | // This Source Code Form is subject to the terms of the Mozilla Public
193 | // License, v. 2.0. If a copy of the MPL was not distributed with this
194 | // file, You can obtain one at https://mozilla.org/MPL/2.0/.
195 |
196 | // ===BEGIN ICANN DOMAINS===
197 |
198 | // com
199 | com
200 |
201 | // uk
202 | *.uk
203 | *.sch.uk
204 | !bl.uk
205 | !british-library.uk
206 |
207 | // io
208 | io
209 |
210 | // jp
211 | jp
212 | *.kawasaki.jp
213 | *.kitakyushu.jp
214 | *.kobe.jp
215 | *.nagoya.jp
216 | *.sapporo.jp
217 | *.sendai.jp
218 | *.yokohama.jp
219 | !city.kawasaki.jp
220 | !city.kitakyushu.jp
221 | !city.kobe.jp
222 | !city.nagoya.jp
223 | !city.sapporo.jp
224 | !city.sendai.jp
225 | !city.yokohama.jp
226 |
227 | // ===END ICANN DOMAINS===
228 | // ===BEGIN PRIVATE DOMAINS===
229 |
230 | // Google, Inc.
231 | blogspot.com
232 |
233 | // ===END PRIVATE DOMAINS===
234 | `
235 |
236 | // TODO(weppos): ability to set type to a rule.
237 | p1 := MustNewRule("blogspot.com")
238 | p1.Private = true
239 |
240 | testCases := []listFindTestCase{
241 | // match standard
242 | {"example.com", MustNewRule("com")},
243 | {"foo.example.com", MustNewRule("com")},
244 |
245 | // match wildcard
246 | {"example.uk", MustNewRule("*.uk")},
247 | {"example.co.uk", MustNewRule("*.uk")},
248 | {"foo.example.co.uk", MustNewRule("*.uk")},
249 |
250 | // match exception
251 | {"british-library.uk", MustNewRule("!british-library.uk")},
252 | {"foo.british-library.uk", MustNewRule("!british-library.uk")},
253 |
254 | // match default rule
255 | {"test", DefaultRule},
256 | {"example.test", DefaultRule},
257 | {"foo.example.test", DefaultRule},
258 |
259 | // match private
260 | {"blogspot.com", p1},
261 | {"foo.blogspot.com", p1},
262 |
263 | // input is wildcard rule
264 | {"kobe.jp", MustNewRule("jp")},
265 | }
266 |
267 | list, err := NewListFromString(src, nil)
268 | if err != nil {
269 | t.Fatalf("Unable to parse list: %v", err)
270 | }
271 |
272 | for _, testCase := range testCases {
273 | if want, got := testCase.expected, list.Find(testCase.input, nil); !reflect.DeepEqual(want, got) {
274 | t.Errorf("Find(%v) = %v, want %v", testCase.input, got, want)
275 | }
276 | }
277 | }
278 |
279 | func TestNewRule_Normal(t *testing.T) {
280 | rule := MustNewRule("com")
281 | want := &Rule{Type: NormalType, Value: "com", Length: 1}
282 |
283 | if !reflect.DeepEqual(want, rule) {
284 | t.Fatalf("NewRule returned %v, want %v", rule, want)
285 | }
286 | }
287 |
288 | func TestNewRule_Wildcard(t *testing.T) {
289 | rule := MustNewRule("*.example.com")
290 | want := &Rule{Type: WildcardType, Value: "example.com", Length: 3}
291 |
292 | if !reflect.DeepEqual(want, rule) {
293 | t.Fatalf("NewRule returned %v, want %v", rule, want)
294 | }
295 | }
296 |
297 | func TestNewRule_Exception(t *testing.T) {
298 | rule := MustNewRule("!example.com")
299 | want := &Rule{Type: ExceptionType, Value: "example.com", Length: 2}
300 |
301 | if !reflect.DeepEqual(want, rule) {
302 | t.Fatalf("NewRule returned %v, want %v", rule, want)
303 | }
304 | }
305 |
306 | func TestNewRule_FromASCII(t *testing.T) {
307 | rule, _ := NewRule("xn--l1acc")
308 |
309 | if want := "xn--l1acc"; rule.Value != want {
310 | t.Fatalf("NewRule == %v, want %v", rule.Value, want)
311 | }
312 | }
313 | func TestNewRule_FromUnicode(t *testing.T) {
314 | rule, _ := NewRule("мон")
315 |
316 | // No transformation is performed
317 | if want := "мон"; rule.Value != want {
318 | t.Fatalf("NewRule == %v, want %v", rule.Value, want)
319 | }
320 | }
321 |
322 | func TestNewRuleUnicode_FromASCII(t *testing.T) {
323 | rule, _ := NewRuleUnicode("xn--l1acc")
324 |
325 | if want := "xn--l1acc"; rule.Value != want {
326 | t.Fatalf("NewRule == %v, want %v", rule.Value, want)
327 | }
328 | }
329 |
330 | func TestNewRuleUnicode_FromUnicode(t *testing.T) {
331 | rule, _ := NewRuleUnicode("мон")
332 |
333 | if want := "xn--l1acc"; rule.Value != want {
334 | t.Fatalf("NewRule == %v, want %v", rule.Value, want)
335 | }
336 | }
337 |
338 | type ruleMatchTestCase struct {
339 | rule *Rule
340 | input string
341 | expected bool
342 | }
343 |
344 | func TestRuleMatch(t *testing.T) {
345 | testCases := []ruleMatchTestCase{
346 | // standard match
347 | {MustNewRule("uk"), "uk", true},
348 | {MustNewRule("uk"), "example.uk", true},
349 | {MustNewRule("uk"), "example.co.uk", true},
350 | {MustNewRule("co.uk"), "example.co.uk", true},
351 |
352 | // special rules match
353 | {MustNewRule("*.com"), "com", false},
354 | {MustNewRule("*.com"), "example.com", true},
355 | {MustNewRule("*.com"), "foo.example.com", true},
356 | {MustNewRule("!example.com"), "com", false},
357 | {MustNewRule("!example.com"), "example.com", true},
358 | {MustNewRule("!example.com"), "foo.example.com", true},
359 |
360 | // TLD mismatch
361 | {MustNewRule("gk"), "example.uk", false},
362 | {MustNewRule("gk"), "example.co.uk", false},
363 |
364 | // general mismatch
365 | {MustNewRule("uk.co"), "example.co.uk", false},
366 | {MustNewRule("go.uk"), "example.co.uk", false},
367 | // rule is longer than input, should not match
368 | {MustNewRule("co.uk"), "uk", false},
369 |
370 | // partial matches/mismatches
371 | {MustNewRule("co"), "example.co.uk", false},
372 | {MustNewRule("example"), "example.uk", false},
373 | {MustNewRule("le.it"), "example.it", false},
374 | {MustNewRule("le.it"), "le.it", true},
375 | {MustNewRule("le.it"), "foo.le.it", true},
376 | }
377 |
378 | for _, testCase := range testCases {
379 | if testCase.rule.Match(testCase.input) != testCase.expected {
380 | t.Errorf("Expected %v to %v match %v", testCase.rule.Value, testCase.expected, testCase.input)
381 | }
382 | }
383 | }
384 |
385 | type ruleDecomposeTestCase struct {
386 | rule *Rule
387 | input string
388 | expected [2]string
389 | }
390 |
391 | func TestRuleDecompose(t *testing.T) {
392 | testCases := []ruleDecomposeTestCase{
393 | {MustNewRule("com"), "com", [2]string{"", ""}},
394 | {MustNewRule("com"), "example.com", [2]string{"example", "com"}},
395 | {MustNewRule("com"), "foo.example.com", [2]string{"foo.example", "com"}},
396 |
397 | {MustNewRule("!british-library.uk"), "uk", [2]string{"", ""}},
398 | {MustNewRule("!british-library.uk"), "british-library.uk", [2]string{"british-library", "uk"}},
399 | {MustNewRule("!british-library.uk"), "foo.british-library.uk", [2]string{"foo.british-library", "uk"}},
400 |
401 | {MustNewRule("*.com"), "com", [2]string{"", ""}},
402 | {MustNewRule("*.com"), "example.com", [2]string{"", ""}},
403 | {MustNewRule("*.com"), "foo.example.com", [2]string{"foo", "example.com"}},
404 | {MustNewRule("*.com"), "bar.foo.example.com", [2]string{"bar.foo", "example.com"}},
405 | }
406 |
407 | for _, testCase := range testCases {
408 | if got := testCase.rule.Decompose(testCase.input); !reflect.DeepEqual(got, testCase.expected) {
409 | t.Errorf("Expected %v to decompose %v into %v, got %v", testCase.rule.Value, testCase.input, testCase.expected, got)
410 | }
411 | }
412 | }
413 |
414 | func TestLabels(t *testing.T) {
415 | testCases := map[string][]string{
416 | "com": {"com"},
417 | "example.com": {"example", "com"},
418 | "www.example.com": {"www", "example", "com"},
419 | }
420 |
421 | for input, expected := range testCases {
422 | if output := Labels(input); !reflect.DeepEqual(output, expected) {
423 | t.Errorf("Labels(%v) = %v, want %v", input, output, expected)
424 | }
425 | }
426 | }
427 |
428 | func TestParseFromListWithOptions_RuleFound(t *testing.T) {
429 | list := NewList()
430 | rule := MustNewRule("com")
431 | _ = list.AddRule(rule)
432 |
433 | input := "foobar.com"
434 |
435 | got, err := ParseFromListWithOptions(list, "foobar.com", &FindOptions{IgnorePrivate: true})
436 | if err != nil {
437 | t.Fatalf("ParseFromListWithOptions(%v) error: %v", input, err)
438 | }
439 |
440 | want := &DomainName{TLD: "com", SLD: "foobar", Rule: rule}
441 | if !reflect.DeepEqual(want, got) {
442 | t.Errorf("ParseFromListWithOptions(%v) = %v, want %v", input, got, want)
443 | }
444 | }
445 |
446 | func TestParseFromListWithOptions_RuleNotFoundDefaultNil(t *testing.T) {
447 | list := NewList()
448 | rule := MustNewRule("com")
449 | _ = list.AddRule(rule)
450 |
451 | input := "foobar.localdomain"
452 |
453 | _, err := ParseFromListWithOptions(list, "foobar.localdomain", &FindOptions{IgnorePrivate: true})
454 | if err == nil {
455 | t.Fatalf("ParseFromListWithOptions(%v) should have returned error", input)
456 | }
457 |
458 | if want := "no rule matching name foobar.localdomain"; err.Error() != want {
459 | t.Errorf("Error expected to be %v, got %v", want, err)
460 | }
461 | }
462 |
463 | func TestParseFromListWithOptions_RuleNotFoundDefaultRule(t *testing.T) {
464 | list := NewList()
465 | rule := MustNewRule("com")
466 | _ = list.AddRule(rule)
467 |
468 | input := "foobar.localdomain"
469 |
470 | got, err := ParseFromListWithOptions(list, "foobar.localdomain", &FindOptions{IgnorePrivate: true, DefaultRule: DefaultRule})
471 | if err != nil {
472 | t.Fatalf("ParseFromListWithOptions(%v) error: %v", input, err)
473 | }
474 |
475 | want := &DomainName{TLD: "localdomain", SLD: "foobar", Rule: DefaultRule}
476 | if !reflect.DeepEqual(want, got) {
477 | t.Errorf("ParseFromListWithOptions(%v) = %v, want %v", input, got, want)
478 | }
479 | }
480 |
481 | func TestToASCII(t *testing.T) {
482 | testCases := []string{
483 | "example.com",
484 | ".example.com",
485 | "..example.com",
486 | }
487 |
488 | for _, input := range testCases {
489 | output, err := ToASCII(input)
490 | if err != nil {
491 | t.Errorf("ToASCII(%s) returned error", input)
492 | }
493 | if output != input {
494 | t.Errorf("ToASCII(%s) = %s, want %s", input, output, input)
495 | }
496 | }
497 | }
498 |
499 | func TestCookieJarList(t *testing.T) {
500 | testCases := map[string]string{
501 | "example.com": "com",
502 | "www.example.com": "com",
503 | "example.co.uk": "co.uk",
504 | "www.example.co.uk": "co.uk",
505 | "example.blogspot.com": "blogspot.com",
506 | "www.example.blogspot.com": "blogspot.com",
507 | "parliament.uk": "uk",
508 | "www.parliament.uk": "uk",
509 | // not listed
510 | "www.example.test": "test",
511 | }
512 |
513 | for input, suffix := range testCases {
514 | if output := CookieJarList.PublicSuffix(input); output != suffix {
515 | t.Errorf("CookieJarList.PublicSuffix(%v) = %v, want %v", input, output, suffix)
516 | }
517 | }
518 | }
519 |
520 | var benchmarkTestCases = map[string]string{
521 | "example.com": "example.com",
522 | "example.id.au": "example.id.au",
523 | "www.ck": "www.ck",
524 | "foo.bar.xn--55qx5d.cn": "bar.xn--55qx5d.cn",
525 | "a.b.c.minami.fukuoka.jp": "c.minami.fukuoka.jp",
526 | "posts-and-telecommunications.museum": "",
527 | "www.example.pvt.k12.ma.us": "example.pvt.k12.ma.us",
528 | "many.lol": "many.lol",
529 | "the.russian.for.moscow.is.xn--80adxhks": "is.xn--80adxhks",
530 | "blah.blah.s3-us-west-1.amazonaws.com": "blah.s3-us-west-1.amazonaws.com",
531 | "thing.dyndns.org": "thing.dyndns.org",
532 | "nosuchtld": "",
533 | }
534 |
535 | func benchmarkDomain(b *testing.B, domainFunc func(string) (string, error)) {
536 | var got string
537 | for i := 0; i < b.N; i++ {
538 | for input := range benchmarkTestCases {
539 | got, _ = domainFunc(input)
540 | }
541 | }
542 | _ = got
543 | }
544 |
545 | func BenchmarkDomain(b *testing.B) {
546 | benchmarkDomain(b, Domain)
547 | }
548 |
549 | func BenchmarkXNet(b *testing.B) {
550 | benchmarkDomain(b, xlib.EffectiveTLDPlusOne)
551 | }
552 |
--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -e
4 | echo "" > coverage.txt
5 |
6 | for d in $(go list ./... | grep -v vendor); do
7 | go test -v -race -coverprofile=profile.out -covermode=atomic "$d"
8 | if [ -f profile.out ]; then
9 | cat profile.out >> coverage.txt
10 | rm profile.out
11 | fi
12 | done
13 |
--------------------------------------------------------------------------------