├── .codecov.yml ├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ ├── codeql-analysis.yml │ ├── psl-update.yml │ └── tests.yml ├── CHANGELOG.md ├── LICENSE.txt ├── Makefile ├── README.md ├── SECURITY.md ├── cmd ├── gen │ └── gen.go └── load │ └── main.go ├── fixtures ├── list-simple.txt └── tests.txt ├── go.mod ├── go.sum ├── net └── publicsuffix │ ├── publicsuffix.go │ └── publicsuffix_test.go ├── publicsuffix ├── acceptance_test.go ├── generator │ └── gen.go ├── psl_test.go ├── publicsuffix.go ├── publicsuffix_test.go └── rules.go └── test.sh /.codecov.yml: -------------------------------------------------------------------------------- 1 | # https://docs.codecov.io/docs/coverage-configuration 2 | coverage: 3 | precision: 1 4 | round: down 5 | 6 | coverage: 7 | status: 8 | project: 9 | default: false 10 | patch: 11 | default: false 12 | 13 | # https://docs.codecov.io/docs/pull-request-comments#section-requiring-changes 14 | comment: off 15 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [weppos] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: gomod 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "04:00" 8 | open-pull-requests-limit: 10 9 | labels: 10 | - dependencies 11 | 12 | - package-ecosystem: "github-actions" 13 | directory: "/" 14 | schedule: 15 | interval: "daily" 16 | time: "04:00" 17 | open-pull-requests-limit: 10 18 | labels: 19 | - dependencies 20 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | name: "CodeQL" 7 | 8 | on: 9 | push: 10 | branches: [ master ] 11 | pull_request: 12 | # The branches below must be a subset of the branches above 13 | branches: [ master ] 14 | schedule: 15 | - cron: '40 21 * * 0' 16 | 17 | jobs: 18 | analyze: 19 | name: Analyze 20 | runs-on: ubuntu-latest 21 | permissions: 22 | actions: read 23 | contents: read 24 | security-events: write 25 | 26 | strategy: 27 | fail-fast: false 28 | matrix: 29 | language: [ 'go' ] 30 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 31 | # Learn more about CodeQL language support at https://git.io/codeql-language-support 32 | 33 | steps: 34 | - name: Checkout repository 35 | uses: actions/checkout@v4 36 | 37 | # Initializes the CodeQL tools for scanning. 38 | - name: Initialize CodeQL 39 | uses: github/codeql-action/init@v3 40 | with: 41 | languages: ${{ matrix.language }} 42 | # If you wish to specify custom queries, you can do so here or in a config file. 43 | # By default, queries listed here will override any specified in a config file. 44 | # Prefix the list here with "+" to use these queries and those in the config file. 45 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 46 | 47 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 48 | # If this step fails, then you should remove it and run the build manually (see below) 49 | - name: Autobuild 50 | uses: github/codeql-action/autobuild@v3 51 | 52 | # ℹ️ Command-line programs to run using the OS shell. 53 | # 📚 https://git.io/JvXDl 54 | 55 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 56 | # and modify them (or add more) to build your code if your project 57 | # uses a compiled language 58 | 59 | #- run: | 60 | # make bootstrap 61 | # make release 62 | 63 | - name: Perform CodeQL Analysis 64 | uses: github/codeql-action/analyze@v3 65 | -------------------------------------------------------------------------------- /.github/workflows/psl-update.yml: -------------------------------------------------------------------------------- 1 | name: PSL Update 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: '40 6 * * *' 7 | 8 | jobs: 9 | update: 10 | runs-on: ubuntu-latest 11 | steps: 12 | 13 | - uses: actions/checkout@v4 14 | 15 | - name: Set up Go 16 | uses: actions/setup-go@v5 17 | with: 18 | go-version: "1.24" 19 | 20 | - name: Update PSL 21 | run: make gen 22 | 23 | - name: Create Pull Request 24 | uses: peter-evans/create-pull-request@v7 25 | with: 26 | title: PSL auto-update 27 | commit-message: Updated list from source 28 | reviewers: weppos 29 | labels: psl 30 | 31 | - name: Check Pull Request 32 | if: ${{ steps.cpr.outputs.pull-request-number }} 33 | run: | 34 | echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}" 35 | echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}" 36 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | jobs: 8 | 9 | build: 10 | strategy: 11 | matrix: 12 | go-version: 13 | - "1.23" 14 | - "1.24" 15 | platform: [ubuntu-latest] 16 | 17 | runs-on: ${{ matrix.platform }} 18 | steps: 19 | 20 | - name: Setup env 21 | run: | 22 | echo "GOPATH=$GITHUB_WORKSPACE" >> $GITHUB_ENV 23 | echo "CURRENT_WORKSPACE=$GITHUB_WORKSPACE/src/github.com/${{ github.repository }}" >> $GITHUB_ENV 24 | echo "$GITHUB_WORKSPACE/bin" >> $GITHUB_PATH 25 | 26 | - name: Set up Go 27 | uses: actions/setup-go@v5 28 | with: 29 | go-version: ${{ matrix.go-version }} 30 | 31 | - name: Check out code into the Go module directory 32 | uses: actions/checkout@v4 33 | with: 34 | path: ${{ env.CURRENT_WORKSPACE }} 35 | 36 | - name: Get dependencies 37 | run: go get -v ./... 38 | working-directory: ${{ env.CURRENT_WORKSPACE }} 39 | 40 | - name: Run go vet 41 | run: go vet ./... 42 | working-directory: ${{ env.CURRENT_WORKSPACE }} 43 | 44 | - name: Run tests 45 | run: ./test.sh 46 | working-directory: ${{ env.CURRENT_WORKSPACE }} 47 | 48 | - uses: codecov/codecov-action@v5 49 | with: 50 | files: ${{ env.CURRENT_WORKSPACE }}/coverage.txt 51 | verbose: true 52 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## main 4 | 5 | - CHANGED: Removed dependency from go-github (GH-1067) 6 | - CHANGED: Supported minimum version is now Go 1.23. 7 | - CHANGED: Definition updates. 8 | 9 | 10 | ## 0.40.2 11 | 12 | - CHANGED: Definition updates. 13 | 14 | 15 | ## 0.40.0 16 | 17 | - CHANGED: Supported minimum version is now Go 1.21. 18 | - CHANGED: Definition updates. 19 | 20 | 21 | ## 0.30.2 22 | 23 | - CHANGED: Definition updates. 24 | 25 | 26 | ## 0.30.1 27 | 28 | - CHANGED: Definition updates. 29 | 30 | 31 | ## 0.30.0 32 | 33 | - CHANGED: Dropped Go < 1.16 from the list of supported versions to fix "package embed is not in GOROOT". "embed" is an indirect dependency and it's only available since Go 1.16. 34 | 35 | - CHANGED: Exported defaultListVersion as ListVersion (GH-334, GH-880) 36 | 37 | 38 | ## 0.20.0 39 | 40 | - CHANGED: Definition updates. 41 | 42 | 43 | ## 0.15.0 44 | 45 | - CHANGED: Definition updates. 46 | 47 | - CHANGED: Dropped Go 1.8 from the list of supported versions. "math/bits" is an indirect dependency and it's only available since Go 1.9. 48 | 49 | - ENHANCEMENT: Improved performances by using rune instead of strings single char comparison (GH-484, GH-485) 50 | 51 | 52 | ## 0.14.0 53 | 54 | - CHANGED: Added go modules (GH-240). 55 | 56 | 57 | ## 0.13.0 58 | 59 | - CHANGED: Rollback changes of v0.12.0. It turns out it is actually causing more issues. 60 | 61 | 62 | ## 0.12.0 63 | 64 | - CHANGED: Extracted generator into its own package. 65 | 66 | 67 | ## 0.11.0 68 | 69 | - CHANGED: Definition updates. 70 | 71 | 72 | ## 0.10.0 73 | 74 | - ENHANCEMENT: Internal refactoring to use go gen when building definition list. 75 | 76 | 77 | ## 0.5.0 78 | 79 | - FIXED: Added a DefaultRules() function that can be used to create a new list without modifying the default one (GH-141, GH-170). Thanks @guliyevemil1 80 | 81 | - FIXED: Fixed nil pointer dereference when can't find a rule (GH-16) 82 | 83 | - CHANGED: Removed unreachable code (GH-167) 84 | 85 | 86 | ## 0.4.0 87 | 88 | - CHANGED: Definition updates. 89 | 90 | - ENHANCEMENT: gen tool now uses GitHub API instead of scraping GitHub UI (GH-93). 91 | 92 | 93 | ## 0.3.2 94 | 95 | - CHANGED: Definition updates. 96 | 97 | 98 | ## 0.3.1 99 | 100 | - CHANGED: Definition updates. 101 | 102 | 103 | ## 0.3.0 104 | 105 | - CHANGED: Definition updates. 106 | 107 | - ENHANCEMENT: Changed internal representation of PSL rules to be A-label encoded, as well the public interface of the library to use ASCII-encoded names by default (GH-31, GH-40). 108 | 109 | 110 | ## 0.2.0 111 | 112 | - CHANGED: Definition updates. 113 | 114 | - ENHANCEMENT: List.Select() is no longer exported. This was an experimental method and it's now kept private as the Find() implementation may change in the future. 115 | 116 | - ENHANCEMENT: List.Find() now returns a pointer to a Rule, and not a Rule. That's because Find() can actually return `nil` if the DefaultRule find option is set. This is useful if you need to avoid the fallback to the default rule "*". 117 | 118 | 119 | ## 0.1.0 120 | 121 | Initial version 122 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016-2024 Simone Carletti 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL := test 2 | 3 | test: 4 | go test ./... -v 5 | 6 | gen: 7 | go generate ./... 8 | 9 | clean: 10 | rm publicsuffix/rules.* 11 | 12 | get-deps: 13 | go get ./... 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Public Suffix for Go 2 | 3 | The package publicsuffix provides a Go domain name parser based on the [Public Suffix List](http://publicsuffix.org/). 4 | 5 | [![Tests](https://github.com/weppos/publicsuffix-go/workflows/Tests/badge.svg)](https://github.com/weppos/publicsuffix-go/actions?query=workflow%3ATests) 6 | [![GoDoc](https://godoc.org/github.com/weppos/publicsuffix-go/publicsuffix?status.svg)](https://pkg.go.dev/github.com/weppos/publicsuffix-go/publicsuffix) 7 | 8 | Currently, **publicsuffix-go requires Go version 1.21 or greater**. We do our best not to break older versions of Go if we don't have to, but due to tooling constraints, we don't always test older versions. 9 | 10 | 11 | ## Getting started 12 | 13 | Clone the repository [in your workspace](https://golang.org/doc/code.html#Organization) and move into it: 14 | 15 | ```shell 16 | mkdir -p $GOPATH/src/github.com/weppos && cd $_ 17 | git clone git@github.com:weppos/publicsuffix-go.git 18 | cd publicsuffix-go 19 | ``` 20 | 21 | Fetch the dependencies: 22 | 23 | ```shell 24 | go get ./... 25 | ``` 26 | 27 | Run the test suite. 28 | 29 | ```shell 30 | go test ./... 31 | ``` 32 | 33 | 34 | ## Testing 35 | 36 | The following command runs the entire test suite. 37 | 38 | ```shell 39 | go test ./... 40 | ``` 41 | 42 | There are 3 different test suites built into this library: 43 | 44 | - Acceptance: the acceptance test suite contains some high level tests to ensure the library behaves as expected 45 | - PSL: the PSL test suite runs the library against the [official Public Suffix test cases](https://github.com/publicsuffix/list/blob/master/tests/tests.txt) 46 | - Unit: the unit test suite stresses the various single components of this package 47 | 48 | 49 | ## Installation 50 | 51 | ```shell 52 | go get github.com/weppos/publicsuffix-go 53 | ``` 54 | 55 | 56 | ## Usage 57 | 58 | This is a simple example that demonstrates how to use the package with the default options and the default Public Suffix list packaged with the library. 59 | 60 | ```go 61 | package main 62 | 63 | import ( 64 | "fmt" 65 | 66 | "github.com/weppos/publicsuffix-go/publicsuffix" 67 | ) 68 | 69 | func main() { 70 | // Extract the domain from a string 71 | // using the default list 72 | fmt.Println(publicsuffix.Domain("example.com")) // example.com 73 | fmt.Println(publicsuffix.Domain("www.example.com")) // example.com 74 | fmt.Println(publicsuffix.Domain("example.co.uk")) // example.co.uk 75 | fmt.Println(publicsuffix.Domain("www.example.co.uk")) // example.co.uk 76 | 77 | // Parse the domain from a string 78 | // using the default list 79 | fmt.Println(publicsuffix.Parse("example.com")) // &DomainName{"com", "example", ""} 80 | fmt.Println(publicsuffix.Parse("www.example.com")) // &DomainName{"com", "example", "www"} 81 | fmt.Println(publicsuffix.Parse("example.co.uk")) // &DomainName{"co.uk", "example", ""} 82 | fmt.Println(publicsuffix.Parse("www.example.co.uk")) // &DomainName{"co.uk", "example", "www"} 83 | } 84 | ``` 85 | 86 | #### Ignoring Private Domains 87 | 88 | The PSL is composed by two list of suffixes: IANA suffixes, and Private Domains. 89 | 90 | Private domains are submitted by private organizations. By default, private domains are not ignored. 91 | Sometimes, you want to ignore these domains and only query against the IANA suffixes. You have two options: 92 | 93 | 1. Ignore the domains at runtime 94 | 2. Create a custom list without the private domains 95 | 96 | In the first case, the private domains are ignored at runtime: they will still be included in the lists but the lookup will skip them when found. 97 | 98 | ```go 99 | publicsuffix.DomainFromListWithOptions(publicsuffix.DefaultList(), "google.blogspot.com", nil) 100 | // google.blogspot.com 101 | 102 | publicsuffix.DomainFromListWithOptions(publicsuffix.DefaultList(), "google.blogspot.com", &publicsuffix.FindOptions{IgnorePrivate: true}) 103 | // blogspot.com 104 | 105 | // Note that the DefaultFindOptions includes the private domains by default 106 | publicsuffix.DomainFromListWithOptions(publicsuffix.DefaultList(), "google.blogspot.com", publicsuffix.DefaultFindOptions) 107 | // google.blogspot.com 108 | ``` 109 | 110 | This solution is easy, but slower. If you find yourself ignoring the private domains in all cases (or in most cases), you may want to create a custom list without the private domains. 111 | 112 | ```go 113 | list := NewListFromFile("path/to/list.txt", &publicsuffix.ParserOption{PrivateDomains: false}) 114 | publicsuffix.DomainFromListWithOptions(list, "google.blogspot.com", nil) 115 | // blogspot.com 116 | ``` 117 | 118 | ## IDN domains, A-labels and U-labels 119 | 120 | [A-label and U-label](https://tools.ietf.org/html/rfc5890#section-2.3.2.1) are two different ways to represent IDN domain names. These two encodings are also known as ASCII (A-label) or Pynucode vs Unicode (U-label). Conversions between U-labels and A-labels are performed according to the ["Punycode" specification](https://tools.ietf.org/html/rfc3492), adding or removing the ACE prefix as needed. 121 | 122 | IDNA-aware applications generally use the A-label form for storing and manipulating data, whereas the U-labels can appear in presentation and user interface forms. 123 | 124 | Although the PSL list has been traditionally U-label encoded, this library follows the common industry standards and stores the rules in their A-label form. Therefore, unless explicitly mentioned, any method call, comparison or internal representation is expected to be ASCII-compatible encoded (ACE). 125 | 126 | Passing Unicode names to the library may either result in error or unexpected behaviors. 127 | 128 | If you are interested in the details of this decision, you can read the full discussion [here](https://github.com/weppos/publicsuffix-go/issues/31). 129 | 130 | 131 | ## Differences with `golang.org/x/net/publicsuffix` 132 | 133 | The [`golang.org/x/net/publicsuffix`](https://godoc.org/golang.org/x/net/publicsuffix) is a package part of the Golang `x/net` package, that provides a public suffix list implementation. 134 | 135 | The main difference is that the `x/net` package is optimized for speed, but it's less flexible. The list is compiled and embedded into the package itself. However, this is also the main downside. 136 | The [list is not frequently refreshed](https://github.com/letsencrypt/boulder/issues/1374#issuecomment-182429297), hence the results may be inaccurate, in particular if you heavily rely on the private domain section of the list. Changes in the IANA section are less frequent, whereas changes in the Private Domains section happens weekly. 137 | 138 | This package provides the following extra features: 139 | 140 | - Ability to load an arbitrary list at runtime (e.g. you can feed your own list, or create multiple lists) 141 | - Ability to create multiple lists 142 | - Ability to parse a domain using a previously defined list 143 | - Ability to add custom rules to an existing list, or merge/load rules from other lists (provided as file or string) 144 | - Advanced access to the list rules 145 | - Ability to ignore private domains at runtime, or when the list is parsed 146 | 147 | This package also aims for 100% compatibility with the `x/net` package. A special adapter is provided as a drop-in replacement. Simply change the include statement from 148 | 149 | ```go 150 | import ( 151 | "golang.org/x/net/publicsuffix" 152 | ) 153 | ``` 154 | 155 | to 156 | 157 | ```go 158 | import ( 159 | "github.com/weppos/publicsuffix-go/net/publicsuffix" 160 | ) 161 | ``` 162 | 163 | The `github.com/weppos/publicsuffix-go/net/publicsuffix` package defines the same methods defined in `golang.org/x/net/publicsuffix`, but these methods are implemented using the `github.com/weppos/publicsuffix-go/publicsuffix` package. 164 | 165 | Note that the adapter doesn't offer the flexibility of `github.com/weppos/publicsuffix-go/publicsuffix`, such as the ability to use multiple lists or disable private domains at runtime. 166 | 167 | 168 | ## `cookiejar.PublicSuffixList` interface 169 | 170 | This package implements the [`cookiejar.PublicSuffixList` interface](https://godoc.org/net/http/cookiejar#PublicSuffixList). It means it can be used as a value for the `PublicSuffixList` option when creating a `net/http/cookiejar`. 171 | 172 | ```go 173 | import ( 174 | "net/http/cookiejar" 175 | "github.com/weppos/publicsuffix-go/publicsuffix" 176 | ) 177 | 178 | deliciousJar := cookiejar.New(&cookiejar.Options{PublicSuffixList: publicsuffix.CookieJarList}) 179 | ``` 180 | 181 | 182 | ## License 183 | 184 | Copyright (c) 2016-2024 Simone Carletti. This is Free Software distributed under the MIT license. 185 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | Security updates are provided only for the current minor version. 6 | 7 | If you are using a previous minor version, we recommend to upgrade to the current minor version. 8 | This project uses [semantic versioning](https://semver.org/), therefore you can upgrade to a more recent minor version without incurring into breaking changes. 9 | 10 | Exceptionally, we may support previous minor versions upon request if there are significant reasons preventing to immediately switch the latest minor version. 11 | 12 | Older major versions are no longer supported. 13 | 14 | 15 | ## Reporting a Vulnerability 16 | 17 | To make a report, please email weppos@weppos.net. 18 | 19 | > [!IMPORTANT] 20 | > Please consider encrypting your report with GPG using the key [0x420da82a989398df](https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x420da82a989398df). 21 | 22 | 23 | ## Tracking Security Updates 24 | 25 | Information about security vulnerabilities are published in the [Security Advisories](https://github.com/weppos/publicsuffix-ruby/security/advisories) page. 26 | -------------------------------------------------------------------------------- /cmd/gen/gen.go: -------------------------------------------------------------------------------- 1 | //go:build ignore 2 | 3 | // gen downloads an updated version of the PSL list and compiles it into go code. 4 | // 5 | // It is meant to be used by maintainers in conjunction with the go generate tool 6 | // to update the list. 7 | package main 8 | 9 | import ( 10 | "context" 11 | "fmt" 12 | "os" 13 | "os/signal" 14 | 15 | "github.com/weppos/publicsuffix-go/publicsuffix/generator" 16 | ) 17 | 18 | const ( 19 | // where the rules will be written 20 | filename = "rules.go" 21 | ) 22 | 23 | func main() { 24 | ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt) 25 | defer cancel() 26 | 27 | g := generator.NewGenerator() 28 | g.Verbose = true 29 | err := g.Write(ctx, filename) 30 | if err != nil { 31 | fmt.Fprintf(os.Stderr, "Error: %v\n", err) 32 | os.Exit(1) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /cmd/load/main.go: -------------------------------------------------------------------------------- 1 | // +build ignore 2 | 3 | package main 4 | 5 | import ( 6 | "fmt" 7 | "time" 8 | 9 | "github.com/weppos/publicsuffix-go/publicsuffix" 10 | ) 11 | 12 | func main() { 13 | startTime := time.Now() 14 | defer func() { 15 | elapsed := time.Since(startTime) 16 | elapsed -= elapsed % 1000000 17 | fmt.Printf("Time elapsed: %s\n", elapsed) 18 | }() 19 | 20 | fmt.Printf("%d rules loaded\n", publicsuffix.DefaultList.Size()) 21 | } 22 | -------------------------------------------------------------------------------- /fixtures/list-simple.txt: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // ===BEGIN ICANN DOMAINS=== 6 | 7 | // ac : http://en.wikipedia.org/wiki/.ac 8 | ac 9 | com.ac 10 | 11 | // ===END ICANN DOMAINS=== 12 | // ===BEGIN PRIVATE DOMAINS=== 13 | 14 | // Google, Inc. 15 | blogspot.com 16 | 17 | // ===END PRIVATE DOMAINS=== 18 | -------------------------------------------------------------------------------- /fixtures/tests.txt: -------------------------------------------------------------------------------- 1 | // Any copyright is dedicated to the Public Domain. 2 | // https://creativecommons.org/publicdomain/zero/1.0/ 3 | 4 | // null input. 5 | null null 6 | // Mixed case. 7 | COM null 8 | example.COM example.com 9 | WwW.example.COM example.com 10 | // Leading dot. 11 | .com null 12 | .example null 13 | .example.com null 14 | .example.example null 15 | // Unlisted TLD. 16 | example null 17 | example.example example.example 18 | b.example.example example.example 19 | a.b.example.example example.example 20 | // Listed, but non-Internet, TLD. 21 | //local null 22 | //example.local null 23 | //b.example.local null 24 | //a.b.example.local null 25 | // TLD with only 1 rule. 26 | biz null 27 | domain.biz domain.biz 28 | b.domain.biz domain.biz 29 | a.b.domain.biz domain.biz 30 | // TLD with some 2-level rules. 31 | com null 32 | example.com example.com 33 | b.example.com example.com 34 | a.b.example.com example.com 35 | uk.com null 36 | example.uk.com example.uk.com 37 | b.example.uk.com example.uk.com 38 | a.b.example.uk.com example.uk.com 39 | test.ac test.ac 40 | // TLD with only 1 (wildcard) rule. 41 | mm null 42 | c.mm null 43 | b.c.mm b.c.mm 44 | a.b.c.mm b.c.mm 45 | // More complex TLD. 46 | jp null 47 | test.jp test.jp 48 | www.test.jp test.jp 49 | ac.jp null 50 | test.ac.jp test.ac.jp 51 | www.test.ac.jp test.ac.jp 52 | kyoto.jp null 53 | test.kyoto.jp test.kyoto.jp 54 | ide.kyoto.jp null 55 | b.ide.kyoto.jp b.ide.kyoto.jp 56 | a.b.ide.kyoto.jp b.ide.kyoto.jp 57 | c.kobe.jp null 58 | b.c.kobe.jp b.c.kobe.jp 59 | a.b.c.kobe.jp b.c.kobe.jp 60 | city.kobe.jp city.kobe.jp 61 | www.city.kobe.jp city.kobe.jp 62 | // TLD with a wildcard rule and exceptions. 63 | ck null 64 | test.ck null 65 | b.test.ck b.test.ck 66 | a.b.test.ck b.test.ck 67 | www.ck www.ck 68 | www.www.ck www.ck 69 | // US K12. 70 | us null 71 | test.us test.us 72 | www.test.us test.us 73 | ak.us null 74 | test.ak.us test.ak.us 75 | www.test.ak.us test.ak.us 76 | k12.ak.us null 77 | test.k12.ak.us test.k12.ak.us 78 | www.test.k12.ak.us test.k12.ak.us 79 | // IDN labels. 80 | 食狮.com.cn 食狮.com.cn 81 | 食狮.公司.cn 食狮.公司.cn 82 | www.食狮.公司.cn 食狮.公司.cn 83 | shishi.公司.cn shishi.公司.cn 84 | 公司.cn null 85 | 食狮.中国 食狮.中国 86 | www.食狮.中国 食狮.中国 87 | shishi.中国 shishi.中国 88 | 中国 null 89 | // Same as above, but punycoded. 90 | xn--85x722f.com.cn xn--85x722f.com.cn 91 | xn--85x722f.xn--55qx5d.cn xn--85x722f.xn--55qx5d.cn 92 | www.xn--85x722f.xn--55qx5d.cn xn--85x722f.xn--55qx5d.cn 93 | shishi.xn--55qx5d.cn shishi.xn--55qx5d.cn 94 | xn--55qx5d.cn null 95 | xn--85x722f.xn--fiqs8s xn--85x722f.xn--fiqs8s 96 | www.xn--85x722f.xn--fiqs8s xn--85x722f.xn--fiqs8s 97 | shishi.xn--fiqs8s shishi.xn--fiqs8s 98 | xn--fiqs8s null 99 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/weppos/publicsuffix-go 2 | 3 | go 1.23.0 4 | 5 | toolchain go1.23.4 6 | 7 | require golang.org/x/net v0.39.0 8 | 9 | require golang.org/x/text v0.24.0 // indirect 10 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY= 2 | golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E= 3 | golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= 4 | golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= 5 | -------------------------------------------------------------------------------- /net/publicsuffix/publicsuffix.go: -------------------------------------------------------------------------------- 1 | // Package publicsuffix is a drop-in replacement for the golang.org/x/net/publicsuffix 2 | // based on the weppos/publicsuffix package. 3 | package publicsuffix 4 | 5 | import ( 6 | psl "github.com/weppos/publicsuffix-go/publicsuffix" 7 | ) 8 | 9 | // PublicSuffix returns the public suffix of the domain 10 | // using a copy of the publicsuffix.org database packaged into this library. 11 | // 12 | // Note. To maintain compatibility with the golang.org/x/net/publicsuffix 13 | // this method doesn't return an error. However, in case of error, 14 | // the returned value is empty. 15 | func PublicSuffix(domain string) (publicSuffix string, icann bool) { 16 | //d, err := psl.Parse(domain) 17 | //if err != nil { 18 | // return "", false 19 | //} 20 | // 21 | //return d.Rule.Value, !d.Rule.Private 22 | 23 | rule := psl.DefaultList.Find(domain, nil) 24 | publicSuffix = rule.Decompose(domain)[1] 25 | icann = !rule.Private 26 | 27 | // x/net/publicsuffix sets icann to false when the default rule "*" is used 28 | if rule.Value == "" && rule.Type == psl.WildcardType { 29 | icann = false 30 | } 31 | 32 | return 33 | } 34 | 35 | // EffectiveTLDPlusOne returns the effective top level domain plus one more label. 36 | // For example, the eTLD+1 for "foo.bar.golang.org" is "golang.org". 37 | func EffectiveTLDPlusOne(domain string) (string, error) { 38 | return psl.Domain(domain) 39 | } 40 | -------------------------------------------------------------------------------- /net/publicsuffix/publicsuffix_test.go: -------------------------------------------------------------------------------- 1 | package publicsuffix_test 2 | 3 | import ( 4 | "testing" 5 | 6 | wpsl "github.com/weppos/publicsuffix-go/net/publicsuffix" 7 | xpsl "golang.org/x/net/publicsuffix" 8 | ) 9 | 10 | func TestPublicSuffix(t *testing.T) { 11 | testCases := []string{ 12 | "example.com", 13 | "www.example.com", 14 | "example.co.uk", 15 | "www.example.co.uk", 16 | "example.blogspot.com", 17 | "www.example.blogspot.com", 18 | "parliament.uk", 19 | "www.parliament.uk", 20 | // not listed 21 | "www.example.test", 22 | } 23 | 24 | for _, testCase := range testCases { 25 | ws, wb := wpsl.PublicSuffix(testCase) 26 | xs, xb := xpsl.PublicSuffix(testCase) 27 | 28 | if ws != xs || wb != xb { 29 | t.Errorf("PublicSuffix(%v): x/psl -> (%v, %v) != w/psl -> (%v, %v)", testCase, xs, xb, ws, wb) 30 | } 31 | } 32 | } 33 | 34 | func TestEffectiveTLDPlusOne(t *testing.T) { 35 | testCases := []string{ 36 | "example.com", 37 | "www.example.com", 38 | "example.co.uk", 39 | "www.example.co.uk", 40 | "example.blogspot.com", 41 | "www.example.blogspot.com", 42 | "parliament.uk", 43 | "www.parliament.uk", 44 | // not listed 45 | "www.example.test", 46 | } 47 | 48 | for _, testCase := range testCases { 49 | ws, we := wpsl.EffectiveTLDPlusOne(testCase) 50 | xs, xe := xpsl.EffectiveTLDPlusOne(testCase) 51 | 52 | if ws != xs || we != xe { 53 | t.Errorf("EffectiveTLDPlusOne(%v): x/psl -> (%v, %v) != w/psl -> (%v, %v)", testCase, xs, xe, ws, we) 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /publicsuffix/acceptance_test.go: -------------------------------------------------------------------------------- 1 | package publicsuffix 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | type validTestCase struct { 8 | input string 9 | domain string 10 | parsed *DomainName 11 | } 12 | 13 | func TestValid(t *testing.T) { 14 | testCases := []validTestCase{ 15 | {"example.com", "example.com", &DomainName{"com", "example", "", MustNewRule("com")}}, 16 | {"foo.example.com", "example.com", &DomainName{"com", "example", "foo", MustNewRule("com")}}, 17 | 18 | {"verybritish.co.uk", "verybritish.co.uk", &DomainName{"co.uk", "verybritish", "", MustNewRule("*.uk")}}, 19 | {"foo.verybritish.co.uk", "verybritish.co.uk", &DomainName{"co.uk", "verybritish", "foo", MustNewRule("*.uk")}}, 20 | 21 | {"parliament.uk", "parliament.uk", &DomainName{"uk", "parliament", "", MustNewRule("!parliament.uk")}}, 22 | {"foo.parliament.uk", "parliament.uk", &DomainName{"uk", "parliament", "foo", MustNewRule("!parliament.uk")}}, 23 | 24 | {"foo.blogspot.com", "foo.blogspot.com", &DomainName{"blogspot.com", "foo", "", MustNewRule("blogspot.com")}}, 25 | {"bar.foo.blogspot.com", "foo.blogspot.com", &DomainName{"blogspot.com", "foo", "bar", MustNewRule("blogspot.com")}}, 26 | } 27 | 28 | for _, testCase := range testCases { 29 | got, err := Parse(testCase.input) 30 | if err != nil { 31 | t.Errorf("TestValid(%v) returned error: %v", testCase.input, err) 32 | } 33 | if want := testCase.parsed; want.String() != got.String() { 34 | t.Errorf("TestValid(%v) = %v, want %v", testCase.input, got, want) 35 | } 36 | 37 | str, err := Domain(testCase.input) 38 | if err != nil { 39 | t.Errorf("TestValid(%v) returned error: %v", testCase.input, err) 40 | } 41 | if want := testCase.domain; want != str { 42 | t.Errorf("TestValid(%v) = %v, want %v", testCase.input, str, want) 43 | } 44 | } 45 | } 46 | 47 | type privateTestCase struct { 48 | input string 49 | domain string 50 | ignore bool 51 | error bool 52 | } 53 | 54 | func TestIncludePrivate(t *testing.T) { 55 | testCases := []privateTestCase{ 56 | {"blogspot.com", "", false, true}, 57 | {"blogspot.com", "blogspot.com", true, false}, 58 | 59 | {"foo.blogspot.com", "foo.blogspot.com", false, false}, 60 | {"foo.blogspot.com", "blogspot.com", true, false}, 61 | } 62 | 63 | for _, testCase := range testCases { 64 | got, err := DomainFromListWithOptions(DefaultList, testCase.input, &FindOptions{IgnorePrivate: testCase.ignore}) 65 | 66 | if testCase.error && err == nil { 67 | t.Errorf("TestIncludePrivate(%v) should have returned error, got: %v", testCase.input, got) 68 | continue 69 | } 70 | if !testCase.error && err != nil { 71 | t.Errorf("TestIncludePrivate(%v) returned error: %v", testCase.input, err) 72 | continue 73 | } 74 | 75 | if want := testCase.domain; want != got { 76 | t.Errorf("Domain(%v) = %v, want %v", testCase.input, got, want) 77 | } 78 | } 79 | } 80 | 81 | type idnaTestCase struct { 82 | input string 83 | domain string 84 | error bool 85 | } 86 | 87 | func TestIDNA(t *testing.T) { 88 | testACases := []idnaTestCase{ 89 | // A-labels are supported 90 | // Check single IDN part 91 | {"xn--p1ai", "", true}, 92 | {"example.xn--p1ai", "example.xn--p1ai", false}, 93 | {"subdomain.example.xn--p1ai", "example.xn--p1ai", false}, 94 | // Check multiple IDN parts 95 | {"xn--example--3bhk5a.xn--p1ai", "xn--example--3bhk5a.xn--p1ai", false}, 96 | {"subdomain.xn--example--3bhk5a.xn--p1ai", "xn--example--3bhk5a.xn--p1ai", false}, 97 | // Check multiple IDN rules 98 | {"example.xn--o1ach.xn--90a3ac", "example.xn--o1ach.xn--90a3ac", false}, 99 | {"sudbomain.example.xn--o1ach.xn--90a3ac", "example.xn--o1ach.xn--90a3ac", false}, 100 | } 101 | 102 | for _, testCase := range testACases { 103 | got, err := DomainFromListWithOptions(DefaultList, testCase.input, nil) 104 | 105 | if testCase.error && err == nil { 106 | t.Errorf("A-label %v should have returned error, got: %v", testCase.input, got) 107 | continue 108 | } 109 | if !testCase.error && err != nil { 110 | t.Errorf("A-label %v returned error: %v", testCase.input, err) 111 | continue 112 | } 113 | 114 | if want := testCase.domain; want != got { 115 | t.Errorf("A-label Domain(%v) = %v, want %v", testCase.input, got, want) 116 | } 117 | } 118 | 119 | // These tests validates the non-acceptance of U-labels. 120 | // 121 | // TODO(weppos): some tests are passing because of the default rule * 122 | // Consider to add some tests overriding the default rule to nil. 123 | // Right now, setting the default rule to nil with cause a panic if the lookup results in a nil. 124 | testUCases := []idnaTestCase{ 125 | // U-labels are NOT supported 126 | // Check single IDN part 127 | {"рф", "", true}, 128 | {"example.рф", "example.рф", false}, // passes because of * 129 | {"subdomain.example.рф", "example.рф", false}, // passes because of * 130 | // Check multiple IDN parts 131 | {"example-упр.рф", "example-упр.рф", false}, // passes because of * 132 | {"subdomain.example-упр.рф", "example-упр.рф", false}, // passes because of * 133 | // Check multiple IDN rules 134 | {"example.упр.срб", "упр.срб", false}, 135 | {"sudbomain.example.упр.срб", "упр.срб", false}, 136 | } 137 | 138 | for _, testCase := range testUCases { 139 | got, err := DomainFromListWithOptions(DefaultList, testCase.input, nil) 140 | 141 | if testCase.error && err == nil { 142 | t.Errorf("U-label %v should have returned error, got: %v", testCase.input, got) 143 | continue 144 | } 145 | if !testCase.error && err != nil { 146 | t.Errorf("U-label %v returned error: %v", testCase.input, err) 147 | continue 148 | } 149 | 150 | if want := testCase.domain; want != got { 151 | t.Errorf("U-label Domain(%v) = %v, want %v", testCase.input, got, want) 152 | } 153 | } 154 | } 155 | 156 | func TestFindRuleIANA(t *testing.T) { 157 | testCases := []struct { 158 | input, want string 159 | }{ 160 | // TLD with only 1 rule. 161 | {"biz", "biz"}, 162 | {"input.biz", "biz"}, 163 | {"b.input.biz", "biz"}, 164 | 165 | // The relevant {kobe,kyoto}.jp rules are: 166 | // jp 167 | // *.kobe.jp 168 | // !city.kobe.jp 169 | // kyoto.jp 170 | // ide.kyoto.jp 171 | {"jp", "jp"}, 172 | {"kobe.jp", "jp"}, 173 | {"c.kobe.jp", "c.kobe.jp"}, 174 | {"b.c.kobe.jp", "c.kobe.jp"}, 175 | {"a.b.c.kobe.jp", "c.kobe.jp"}, 176 | {"city.kobe.jp", "kobe.jp"}, 177 | {"www.city.kobe.jp", "kobe.jp"}, 178 | {"kyoto.jp", "kyoto.jp"}, 179 | {"test.kyoto.jp", "kyoto.jp"}, 180 | {"ide.kyoto.jp", "ide.kyoto.jp"}, 181 | {"b.ide.kyoto.jp", "ide.kyoto.jp"}, 182 | {"a.b.ide.kyoto.jp", "ide.kyoto.jp"}, 183 | 184 | // Domain with a private public suffix should return the ICANN public suffix. 185 | {"foo.compute-1.amazonaws.com", "com"}, 186 | // Domain equal to a private public suffix should return the ICANN public suffix. 187 | {"cloudapp.net", "net"}, 188 | } 189 | 190 | for _, tc := range testCases { 191 | rule := DefaultList.Find(tc.input, &FindOptions{IgnorePrivate: true, DefaultRule: nil}) 192 | 193 | if rule == nil { 194 | t.Errorf("TestFindRuleIANA(%v) nil rule", tc.input) 195 | continue 196 | } 197 | 198 | suffix := rule.Decompose(tc.input)[1] 199 | // If the TLD is empty, it means name is actually a suffix. 200 | // In fact, decompose returns an array of empty strings in this case. 201 | if suffix == "" { 202 | suffix = tc.input 203 | } 204 | 205 | if suffix != tc.want { 206 | t.Errorf("TestFindRuleIANA(%v) = %v, want %v", tc.input, suffix, tc.want) 207 | } 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /publicsuffix/generator/gen.go: -------------------------------------------------------------------------------- 1 | // Package generator downloads an updated version of the PSL list and compiles it into go code. 2 | // 3 | // It is meant to be used by maintainers in conjunction with the go generate tool 4 | // to update the list. 5 | package generator 6 | 7 | import ( 8 | "bytes" 9 | "context" 10 | "encoding/json" 11 | "fmt" 12 | "go/format" 13 | "io" 14 | "log" 15 | "net/http" 16 | "os" 17 | "strings" 18 | "text/template" 19 | "time" 20 | 21 | "github.com/weppos/publicsuffix-go/publicsuffix" 22 | ) 23 | 24 | const ( 25 | list = `// This file is automatically generated 26 | // Run "go run cmd/gen/gen.go" to update the list. 27 | 28 | package publicsuffix 29 | 30 | const ListVersion = "PSL version {{.VersionSHA}} ({{.VersionDate}})" 31 | 32 | func DefaultRules() [{{len .Rules}}]Rule { 33 | return r 34 | } 35 | 36 | var r = [{{len .Rules}}]Rule{ 37 | {{range $r := .Rules}} \ 38 | { {{$r.Type}}, "{{$r.Value}}", {{$r.Length}}, {{$r.Private}} }, 39 | {{end}} 40 | } 41 | 42 | func init() { 43 | for i := range r { 44 | DefaultList.AddRule(&r[i]) 45 | } 46 | } 47 | 48 | ` 49 | ) 50 | 51 | var listTmpl = template.Must(template.New("list").Parse(cont(list))) 52 | 53 | // https://github.com/golang/go/issues/9969 54 | // Requires go1.6 55 | func cont(s string) string { 56 | return strings.Replace(s, "\\\n", "", -1) 57 | } 58 | 59 | type headInfo struct { 60 | SHA string 61 | Datetime time.Time 62 | } 63 | 64 | type githubNodes struct { 65 | SHA string `json:"sha"` 66 | Commit githubCommit `json:"commit"` 67 | } 68 | 69 | type githubCommit struct { 70 | Commiter githubCommitter `json:"committer"` 71 | } 72 | 73 | type githubCommitter struct { 74 | Date time.Time `json:"date"` 75 | } 76 | 77 | func extractHeadInfo(ctx context.Context) (*headInfo, error) { 78 | req, err := http.NewRequestWithContext(ctx, "GET", "https://api.github.com/repos/publicsuffix/list/commits", nil) 79 | if err != nil { 80 | return nil, fmt.Errorf("http.NewRequestWithContext: %w", err) 81 | } 82 | req.Header.Set("Accept", "application/vnd.github+json") 83 | req.Header.Set("X-Github-Api-Version", "2022-11-28") 84 | 85 | resp, err := http.DefaultClient.Do(req) 86 | if err != nil { 87 | return nil, fmt.Errorf("http.DefaultClient.Do: %w", err) 88 | } 89 | defer resp.Body.Close() 90 | 91 | respString, err := io.ReadAll(resp.Body) 92 | if err != nil { 93 | return nil, fmt.Errorf("io.ReadAll: %w", err) 94 | } 95 | 96 | if resp.StatusCode != http.StatusOK { 97 | return nil, fmt.Errorf("unexpected status code: %d %s", resp.StatusCode, respString) 98 | } 99 | 100 | var respBody []githubNodes 101 | err = json.Unmarshal(respString, &respBody) 102 | if err != nil { 103 | return nil, fmt.Errorf("json.Unmarshal %s: %w", respBody, err) 104 | } 105 | 106 | if len(respBody) == 0 { 107 | return nil, fmt.Errorf("no nodes found") 108 | } 109 | 110 | return &headInfo{ 111 | SHA: respBody[0].SHA, 112 | Datetime: respBody[0].Commit.Commiter.Date, 113 | }, nil 114 | } 115 | 116 | // Generator represents a generator. 117 | type Generator struct { 118 | Verbose bool 119 | } 120 | 121 | // NewGenerator creates a Generator with default settings. 122 | func NewGenerator() *Generator { 123 | g := &Generator{ 124 | Verbose: false, 125 | } 126 | return g 127 | } 128 | 129 | // Write ... 130 | func (g *Generator) Write(ctx context.Context, filename string) error { 131 | content, err := g.generate(ctx) 132 | if err != nil { 133 | return err 134 | } 135 | 136 | g.log("Writing %v...\n", filename) 137 | return os.WriteFile(filename, content, 0o644) 138 | } 139 | 140 | // Print ... 141 | func (g *Generator) Print(ctx context.Context) error { 142 | content, err := g.generate(ctx) 143 | if err != nil { 144 | return err 145 | } 146 | 147 | _, err = os.Stdout.Write(content) 148 | return err 149 | } 150 | 151 | // Generate downloads an updated version of the PSL list and compiles it into go code. 152 | func (g *Generator) generate(ctx context.Context) ([]byte, error) { 153 | g.log("Fetching PSL version...\n") 154 | headInfo, err := extractHeadInfo(ctx) 155 | if err != nil { 156 | return nil, err 157 | } 158 | 159 | g.log("Downloading PSL %s...\n", headInfo.SHA[:6]) 160 | reqURL := fmt.Sprintf("https://raw.githubusercontent.com/publicsuffix/list/%s/public_suffix_list.dat", headInfo.SHA) 161 | 162 | req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqURL, nil) 163 | if err != nil { 164 | return nil, err 165 | } 166 | 167 | resp, err := http.DefaultClient.Do(req) 168 | if err != nil { 169 | return nil, err 170 | } 171 | 172 | defer resp.Body.Close() 173 | 174 | list := publicsuffix.NewList() 175 | rules, err := list.Load(resp.Body, nil) 176 | if err != nil { 177 | return nil, err 178 | } 179 | 180 | data := struct { 181 | VersionSHA string 182 | VersionDate string 183 | Rules []publicsuffix.Rule 184 | }{ 185 | headInfo.SHA[:6], 186 | headInfo.Datetime.Format(time.ANSIC), 187 | rules, 188 | } 189 | 190 | g.log("Parsing PSL...\n") 191 | buf := new(bytes.Buffer) 192 | err = listTmpl.Execute(buf, &data) 193 | if err != nil { 194 | return nil, err 195 | } 196 | 197 | return format.Source(buf.Bytes()) 198 | } 199 | 200 | func (g *Generator) log(format string, v ...interface{}) { 201 | if !g.Verbose { 202 | return 203 | } 204 | 205 | log.Printf(format, v...) 206 | } 207 | -------------------------------------------------------------------------------- /publicsuffix/psl_test.go: -------------------------------------------------------------------------------- 1 | package publicsuffix 2 | 3 | import ( 4 | "bufio" 5 | "os" 6 | "strings" 7 | "testing" 8 | ) 9 | 10 | type pslTestCase struct { 11 | input string 12 | output string 13 | error bool 14 | } 15 | 16 | func TestPsl(t *testing.T) { 17 | f, err := os.Open("../fixtures/tests.txt") 18 | if err != nil { 19 | panic(err) 20 | } 21 | defer f.Close() 22 | 23 | testCases := []pslTestCase{} 24 | 25 | scanner := bufio.NewScanner(f) 26 | scanner.Split(bufio.ScanLines) 27 | for scanner.Scan() { 28 | line := scanner.Text() 29 | switch { 30 | case line == "": 31 | break 32 | case strings.HasPrefix(line, "//"): 33 | break 34 | default: 35 | xy := strings.Split(line, " ") 36 | tc := pslTestCase{} 37 | tc.input = xy[0] 38 | if xy[1] == "null" { 39 | tc.error = true 40 | } else { 41 | tc.error = false 42 | tc.output = xy[1] 43 | } 44 | testCases = append(testCases, tc) 45 | } 46 | } 47 | 48 | for _, testCase := range testCases { 49 | input, err := ToASCII(testCase.input) 50 | if err != nil { 51 | t.Fatalf("failed to convert input %v to ASCII", testCase.input) 52 | } 53 | 54 | output, err := ToASCII(testCase.output) 55 | if err != nil { 56 | t.Fatalf("failed to convert output %v to ASCII", testCase.output) 57 | } 58 | 59 | got, err := Domain(input) 60 | 61 | if testCase.error && err == nil { 62 | t.Errorf("PSL(%v) should have returned error, got: %v", testCase.input, got) 63 | continue 64 | } 65 | if !testCase.error && err != nil { 66 | t.Errorf("PSL(%v) returned error: %v", testCase.input, err) 67 | continue 68 | } 69 | if got != output { 70 | t.Errorf("PSL(%v) = %v, want %v", testCase.input, got, testCase.output) 71 | continue 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /publicsuffix/publicsuffix.go: -------------------------------------------------------------------------------- 1 | //go:generate go run ../cmd/gen/gen.go 2 | 3 | // Package publicsuffix provides a domain name parser 4 | // based on data from the public suffix list http://publicsuffix.org/. 5 | // A public suffix is one under which Internet users can directly register names. 6 | package publicsuffix 7 | 8 | import ( 9 | "bufio" 10 | "fmt" 11 | "io" 12 | "net/http/cookiejar" 13 | "os" 14 | "strings" 15 | 16 | "golang.org/x/net/idna" 17 | ) 18 | 19 | const ( 20 | // Version identifies the current library version. 21 | // This is a pro forma convention given that Go dependencies 22 | // tends to be fetched directly from the repo. 23 | Version = "0.40.2" 24 | 25 | // NormalType represents a normal rule such as "com" 26 | NormalType = 1 27 | // WildcardType represents a wildcard rule such as "*.com" 28 | WildcardType = 2 29 | // ExceptionType represents an exception to a wildard rule 30 | ExceptionType = 3 31 | 32 | listTokenPrivateDomains = "===BEGIN PRIVATE DOMAINS===" 33 | listTokenComment = "//" 34 | ) 35 | 36 | // DefaultList is the default List and it is used by Parse and Domain. 37 | var DefaultList = NewList() 38 | 39 | // DefaultRule is the default Rule that represents "*". 40 | var DefaultRule = MustNewRule("*") 41 | 42 | // DefaultParserOptions are the default options used to parse a Public Suffix list. 43 | var DefaultParserOptions = &ParserOption{PrivateDomains: true, ASCIIEncoded: false} 44 | 45 | // DefaultFindOptions are the default options used to perform the lookup of rules in the list. 46 | var DefaultFindOptions = &FindOptions{IgnorePrivate: false, DefaultRule: DefaultRule} 47 | 48 | // Rule represents a single rule in a Public Suffix List. 49 | type Rule struct { 50 | Type int 51 | Value string 52 | Length int 53 | Private bool 54 | } 55 | 56 | // ParserOption are the options you can use to customize the way a List 57 | // is parsed from a file or a string. 58 | type ParserOption struct { 59 | // Set to false to skip the private domains when parsing. 60 | // Default to true, which means the private domains are included. 61 | PrivateDomains bool 62 | 63 | // Set to false if the input is encoded in U-labels (Unicode) 64 | // as opposite to A-labels. 65 | // Default to false, which means the list is containing Unicode domains. 66 | // This is the default because the original PSL currently contains Unicode. 67 | ASCIIEncoded bool 68 | } 69 | 70 | // FindOptions are the options you can use to customize the way a Rule 71 | // is searched within the list. 72 | type FindOptions struct { 73 | // Set to true to ignore the rules within the "Private" section of the Public Suffix List. 74 | IgnorePrivate bool 75 | 76 | // The default rule to use when no rule matches the input. 77 | // The format Public Suffix algorithm states that the rule "*" should be used when no other rule matches, 78 | // but some consumers may have different needs. 79 | DefaultRule *Rule 80 | } 81 | 82 | // List represents a Public Suffix List. 83 | type List struct { 84 | // rules is kept private because you should not access rules directly 85 | rules map[string]*Rule 86 | } 87 | 88 | // NewList creates a new empty list. 89 | func NewList() *List { 90 | return &List{ 91 | rules: map[string]*Rule{}, 92 | } 93 | } 94 | 95 | // NewListFromString parses a string that represents a Public Suffix source 96 | // and returns a List initialized with the rules in the source. 97 | func NewListFromString(src string, options *ParserOption) (*List, error) { 98 | l := NewList() 99 | _, err := l.LoadString(src, options) 100 | return l, err 101 | } 102 | 103 | // NewListFromFile parses a string that represents a Public Suffix source 104 | // and returns a List initialized with the rules in the source. 105 | func NewListFromFile(path string, options *ParserOption) (*List, error) { 106 | l := NewList() 107 | _, err := l.LoadFile(path, options) 108 | return l, err 109 | } 110 | 111 | // Load parses and loads a set of rules from an io.Reader into the current list. 112 | func (l *List) Load(r io.Reader, options *ParserOption) ([]Rule, error) { 113 | return l.parse(r, options) 114 | } 115 | 116 | // LoadString parses and loads a set of rules from a String into the current list. 117 | func (l *List) LoadString(src string, options *ParserOption) ([]Rule, error) { 118 | r := strings.NewReader(src) 119 | return l.parse(r, options) 120 | } 121 | 122 | // LoadFile parses and loads a set of rules from a File into the current list. 123 | func (l *List) LoadFile(path string, options *ParserOption) ([]Rule, error) { 124 | f, err := os.Open(path) 125 | if err != nil { 126 | return nil, err 127 | } 128 | defer f.Close() 129 | return l.parse(f, options) 130 | } 131 | 132 | // AddRule adds a new rule to the list. 133 | // 134 | // The exact position of the rule into the list is unpredictable. 135 | // The list may be optimized internally for lookups, therefore the algorithm 136 | // will decide the best position for the new rule. 137 | func (l *List) AddRule(r *Rule) error { 138 | l.rules[r.Value] = r 139 | return nil 140 | } 141 | 142 | // Size returns the size of the list, which is the number of rules. 143 | func (l *List) Size() int { 144 | return len(l.rules) 145 | } 146 | 147 | func (l *List) parse(r io.Reader, options *ParserOption) ([]Rule, error) { 148 | if options == nil { 149 | options = DefaultParserOptions 150 | } 151 | var rules []Rule 152 | 153 | scanner := bufio.NewScanner(r) 154 | var section int // 1 == ICANN, 2 == PRIVATE 155 | 156 | Scanning: 157 | for scanner.Scan() { 158 | line := strings.TrimSpace(scanner.Text()) 159 | switch { 160 | 161 | // skip blank lines 162 | case line == "": 163 | break 164 | 165 | // include private domains or stop scanner 166 | case strings.Contains(line, listTokenPrivateDomains): 167 | if !options.PrivateDomains { 168 | break Scanning 169 | } 170 | section = 2 171 | 172 | // skip comments 173 | case strings.HasPrefix(line, listTokenComment): 174 | break 175 | 176 | default: 177 | var rule *Rule 178 | var err error 179 | 180 | if options.ASCIIEncoded { 181 | rule, err = NewRule(line) 182 | } else { 183 | rule, err = NewRuleUnicode(line) 184 | } 185 | if err != nil { 186 | return []Rule{}, err 187 | } 188 | 189 | rule.Private = (section == 2) 190 | l.AddRule(rule) 191 | rules = append(rules, *rule) 192 | } 193 | 194 | } 195 | 196 | return rules, scanner.Err() 197 | } 198 | 199 | // Find and returns the most appropriate rule for the domain name. 200 | func (l *List) Find(name string, options *FindOptions) *Rule { 201 | if options == nil { 202 | options = DefaultFindOptions 203 | } 204 | 205 | part := name 206 | for { 207 | rule, ok := l.rules[part] 208 | 209 | if ok && rule.Match(name) && !(options.IgnorePrivate && rule.Private) { 210 | return rule 211 | } 212 | 213 | i := strings.IndexRune(part, '.') 214 | if i < 0 { 215 | return options.DefaultRule 216 | } 217 | 218 | part = part[i+1:] 219 | } 220 | 221 | } 222 | 223 | // NewRule parses the rule content, creates and returns a Rule. 224 | // 225 | // The content of the rule MUST be encoded in ASCII (A-labels). 226 | func NewRule(content string) (*Rule, error) { 227 | var rule *Rule 228 | var value string 229 | 230 | switch content[0] { 231 | case '*': // wildcard 232 | if content == "*" { 233 | value = "" 234 | } else { 235 | value = content[2:] 236 | } 237 | rule = &Rule{Type: WildcardType, Value: value, Length: len(Labels(value)) + 1} 238 | case '!': // exception 239 | value = content[1:] 240 | rule = &Rule{Type: ExceptionType, Value: value, Length: len(Labels(value))} 241 | default: // normal 242 | value = content 243 | rule = &Rule{Type: NormalType, Value: value, Length: len(Labels(value))} 244 | } 245 | 246 | return rule, nil 247 | } 248 | 249 | // NewRuleUnicode is like NewRule, but expects the content to be encoded in Unicode (U-labels). 250 | func NewRuleUnicode(content string) (*Rule, error) { 251 | var err error 252 | 253 | content, err = ToASCII(content) 254 | if err != nil { 255 | return nil, err 256 | } 257 | 258 | return NewRule(content) 259 | } 260 | 261 | // MustNewRule is like NewRule, but panics if the content cannot be parsed. 262 | func MustNewRule(content string) *Rule { 263 | rule, err := NewRule(content) 264 | if err != nil { 265 | panic(err) 266 | } 267 | return rule 268 | } 269 | 270 | // Match checks if the rule matches the name. 271 | // 272 | // A domain name is said to match a rule if and only if all of the following conditions are met: 273 | // - When the domain and rule are split into corresponding labels, 274 | // that the domain contains as many or more labels than the rule. 275 | // - Beginning with the right-most labels of both the domain and the rule, 276 | // and continuing for all labels in the rule, one finds that for every pair, 277 | // either they are identical, or that the label from the rule is "*". 278 | // 279 | // See https://publicsuffix.org/list/ 280 | func (r *Rule) Match(name string) bool { 281 | left := strings.TrimSuffix(name, r.Value) 282 | 283 | // the name contains as many labels than the rule 284 | // this is a match, unless it's a wildcard 285 | // because the wildcard requires one more label 286 | if left == "" { 287 | return r.Type != WildcardType 288 | } 289 | 290 | // if there is one more label, the rule match 291 | // because either the rule is shorter than the domain 292 | // or the rule is a wildcard and there is one more label 293 | return left[len(left)-1:] == "." 294 | } 295 | 296 | // Decompose takes a name as input and decomposes it into a tuple of , 297 | // according to the rule definition and type. 298 | func (r *Rule) Decompose(name string) (result [2]string) { 299 | if r == DefaultRule { 300 | i := strings.LastIndexByte(name, '.') 301 | if i < 0 { 302 | return 303 | } 304 | result[0], result[1] = name[:i], name[i+1:] 305 | return 306 | } 307 | switch r.Type { 308 | case NormalType: 309 | name = strings.TrimSuffix(name, r.Value) 310 | if len(name) == 0 { 311 | return 312 | } 313 | result[0], result[1] = name[:len(name)-1], r.Value 314 | case WildcardType: 315 | name := strings.TrimSuffix(name, r.Value) 316 | if len(name) == 0 { 317 | return 318 | } 319 | name = name[:len(name)-1] 320 | i := strings.LastIndexByte(name, '.') 321 | if i < 0 { 322 | return 323 | } 324 | result[0], result[1] = name[:i], name[i+1:]+"."+r.Value 325 | case ExceptionType: 326 | i := strings.IndexRune(r.Value, '.') 327 | if i < 0 { 328 | return 329 | } 330 | suffix := r.Value[i+1:] 331 | name = strings.TrimSuffix(name, suffix) 332 | if len(name) == 0 { 333 | return 334 | } 335 | result[0], result[1] = name[:len(name)-1], suffix 336 | } 337 | return 338 | } 339 | 340 | // Labels decomposes given domain name into labels, 341 | // corresponding to the dot-separated tokens. 342 | func Labels(name string) []string { 343 | return strings.Split(name, ".") 344 | } 345 | 346 | // DomainName represents a domain name. 347 | type DomainName struct { 348 | TLD string 349 | SLD string 350 | TRD string 351 | Rule *Rule 352 | } 353 | 354 | // String joins the components of the domain name into a single string. 355 | // Empty labels are skipped. 356 | // 357 | // Examples: 358 | // 359 | // DomainName{"com", "example"}.String() 360 | // // example.com 361 | // DomainName{"com", "example", "www"}.String() 362 | // // www.example.com 363 | func (d *DomainName) String() string { 364 | switch { 365 | case d.TLD == "": 366 | return "" 367 | case d.SLD == "": 368 | return d.TLD 369 | case d.TRD == "": 370 | return d.SLD + "." + d.TLD 371 | default: 372 | return d.TRD + "." + d.SLD + "." + d.TLD 373 | } 374 | } 375 | 376 | // Domain extract and return the domain name from the input 377 | // using the default (Public Suffix) List. 378 | // 379 | // Examples: 380 | // 381 | // publicsuffix.Domain("example.com") 382 | // // example.com 383 | // publicsuffix.Domain("www.example.com") 384 | // // example.com 385 | // publicsuffix.Domain("www.example.co.uk") 386 | // // example.co.uk 387 | func Domain(name string) (string, error) { 388 | return DomainFromListWithOptions(DefaultList, name, DefaultFindOptions) 389 | } 390 | 391 | // Parse decomposes the name into TLD, SLD, TRD 392 | // using the default (Public Suffix) List, 393 | // and returns the result as a DomainName 394 | // 395 | // Examples: 396 | // 397 | // list := NewList() 398 | // 399 | // publicsuffix.Parse("example.com") 400 | // // &DomainName{"com", "example"} 401 | // publicsuffix.Parse("www.example.com") 402 | // // &DomainName{"com", "example", "www"} 403 | // publicsuffix.Parse("www.example.co.uk") 404 | // // &DomainName{"co.uk", "example"} 405 | func Parse(name string) (*DomainName, error) { 406 | return ParseFromListWithOptions(DefaultList, name, DefaultFindOptions) 407 | } 408 | 409 | // DomainFromListWithOptions extract and return the domain name from the input 410 | // using the (Public Suffix) list passed as argument. 411 | // 412 | // Examples: 413 | // 414 | // list := NewList() 415 | // 416 | // publicsuffix.DomainFromListWithOptions(list, "example.com") 417 | // // example.com 418 | // publicsuffix.DomainFromListWithOptions(list, "www.example.com") 419 | // // example.com 420 | // publicsuffix.DomainFromListWithOptions(list, "www.example.co.uk") 421 | // // example.co.uk 422 | func DomainFromListWithOptions(l *List, name string, options *FindOptions) (string, error) { 423 | dn, err := ParseFromListWithOptions(l, name, options) 424 | if err != nil { 425 | return "", err 426 | } 427 | return dn.SLD + "." + dn.TLD, nil 428 | } 429 | 430 | // ParseFromListWithOptions decomposes the name into TLD, SLD, TRD 431 | // using the (Public Suffix) list passed as argument, 432 | // and returns the result as a DomainName 433 | // 434 | // Examples: 435 | // 436 | // list := NewList() 437 | // 438 | // publicsuffix.ParseFromListWithOptions(list, "example.com") 439 | // // &DomainName{"com", "example"} 440 | // publicsuffix.ParseFromListWithOptions(list, "www.example.com") 441 | // // &DomainName{"com", "example", "www"} 442 | // publicsuffix.ParseFromListWithOptions(list, "www.example.co.uk") 443 | // // &DomainName{"co.uk", "example"} 444 | func ParseFromListWithOptions(l *List, name string, options *FindOptions) (*DomainName, error) { 445 | n, err := normalize(name) 446 | if err != nil { 447 | return nil, err 448 | } 449 | 450 | r := l.Find(n, options) 451 | if r == nil { 452 | return nil, fmt.Errorf("no rule matching name %s", name) 453 | } 454 | 455 | parts := r.Decompose(n) 456 | left, tld := parts[0], parts[1] 457 | if tld == "" { 458 | return nil, fmt.Errorf("%s is a suffix", n) 459 | } 460 | 461 | dn := &DomainName{ 462 | Rule: r, 463 | TLD: tld, 464 | } 465 | if i := strings.LastIndexByte(left, '.'); i < 0 { 466 | dn.SLD = left 467 | } else { 468 | dn.TRD = left[:i] 469 | dn.SLD = left[i+1:] 470 | } 471 | return dn, nil 472 | } 473 | 474 | func normalize(name string) (string, error) { 475 | ret := strings.ToLower(name) 476 | 477 | if ret == "" { 478 | return "", fmt.Errorf("name is blank") 479 | } 480 | if ret[0] == '.' { 481 | return "", fmt.Errorf("name %s starts with a dot", ret) 482 | } 483 | 484 | return ret, nil 485 | } 486 | 487 | // ToASCII is a wrapper for idna.ToASCII. 488 | // 489 | // This wrapper exists because idna.ToASCII backward-compatibility was broken twice in few months 490 | // and I can't call this package directly anymore. The wrapper performs some terrible-but-necessary 491 | // before-after replacements to make sure an already ASCII input always results in the same output 492 | // even if passed through ToASCII. 493 | // 494 | // See golang/net@67957fd0b1, golang/net@f2499483f9, golang/net@78ebe5c8b6, 495 | // and weppos/publicsuffix-go#66. 496 | func ToASCII(s string) (string, error) { 497 | // .example.com should be .example.com 498 | // ..example.com should be ..example.com 499 | if strings.HasPrefix(s, ".") { 500 | dotIndex := 0 501 | for i := 0; i < len(s); i++ { 502 | if s[i] == '.' { 503 | dotIndex = i 504 | } else { 505 | break 506 | } 507 | } 508 | out, err := idna.ToASCII(s[dotIndex+1:]) 509 | out = s[:dotIndex+1] + out 510 | return out, err 511 | } 512 | 513 | return idna.ToASCII(s) 514 | } 515 | 516 | // ToUnicode is a wrapper for idna.ToUnicode. 517 | // 518 | // See ToASCII for more details about why this wrapper exists. 519 | func ToUnicode(s string) (string, error) { 520 | return idna.ToUnicode(s) 521 | } 522 | 523 | // CookieJarList implements the cookiejar.PublicSuffixList interface. 524 | var CookieJarList cookiejar.PublicSuffixList = cookiejarList{DefaultList} 525 | 526 | type cookiejarList struct { 527 | List *List 528 | } 529 | 530 | // PublicSuffix implements cookiejar.PublicSuffixList. 531 | func (l cookiejarList) PublicSuffix(domain string) string { 532 | rule := l.List.Find(domain, nil) 533 | return rule.Decompose(domain)[1] 534 | } 535 | 536 | // PublicSuffix implements cookiejar.String. 537 | func (cookiejarList) String() string { 538 | return ListVersion 539 | } 540 | -------------------------------------------------------------------------------- /publicsuffix/publicsuffix_test.go: -------------------------------------------------------------------------------- 1 | package publicsuffix 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | 7 | xlib "golang.org/x/net/publicsuffix" 8 | ) 9 | 10 | func TestNewListFromString(t *testing.T) { 11 | src := ` 12 | // This Source Code Form is subject to the terms of the Mozilla Public 13 | // License, v. 2.0. If a copy of the MPL was not distributed with this 14 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 15 | 16 | // ===BEGIN ICANN DOMAINS=== 17 | 18 | // ac : http://en.wikipedia.org/wiki/.ac 19 | ac 20 | com.ac 21 | 22 | // ===END ICANN DOMAINS=== 23 | // ===BEGIN PRIVATE DOMAINS=== 24 | 25 | // Google, Inc. 26 | blogspot.com 27 | 28 | // ===END PRIVATE DOMAINS=== 29 | ` 30 | 31 | list, err := NewListFromString(src, nil) 32 | if err != nil { 33 | t.Fatalf("Parse returned an error: %v", err) 34 | } 35 | 36 | if want, got := 3, list.Size(); want != got { 37 | t.Errorf("Parse returned a list with %v rules, want %v", got, want) 38 | t.Fatalf("%v", list.rules) 39 | } 40 | 41 | rules := list.rules 42 | var testRules []Rule 43 | 44 | testRules = []Rule{} 45 | for _, rule := range rules { 46 | if rule.Private == false { 47 | testRules = append(testRules, *rule) 48 | } 49 | } 50 | if want, got := 2, len(testRules); want != got { 51 | t.Errorf("Parse returned a list with %v IANA rules, want %v", got, want) 52 | t.Fatalf("%v", testRules) 53 | } 54 | 55 | testRules = []Rule{} 56 | for _, rule := range rules { 57 | if rule.Private == true { 58 | testRules = append(testRules, *rule) 59 | } 60 | } 61 | if want, got := 1, len(testRules); want != got { 62 | t.Errorf("Parse returned a list with %v PRIVATE rules, want %v", got, want) 63 | t.Fatalf("%v", testRules) 64 | } 65 | } 66 | 67 | func TestNewListFromString_IDNAInputIsUnicode(t *testing.T) { 68 | src := ` 69 | // xn--d1alf ("mkd", Macedonian) : MK 70 | // MARnet 71 | мкд 72 | 73 | // xn--l1acc ("mon", Mongolian) : MN 74 | xn--l1acc 75 | ` 76 | 77 | list, err := NewListFromString(src, nil) 78 | if err != nil { 79 | t.Fatalf("Parse returned error: %v", err) 80 | } 81 | 82 | if want, got := 2, list.Size(); want != got { 83 | t.Errorf("Parse returned a list with %v rules, want %v", got, want) 84 | t.Fatalf("%v", list.rules) 85 | } 86 | 87 | if rule := list.Find("hello.xn--d1alf", &FindOptions{DefaultRule: nil}); rule == nil { 88 | t.Fatalf("Find(%v) returned nil", "hello.xn--d1alf") 89 | } 90 | if rule := list.Find("hello.мкд", &FindOptions{DefaultRule: nil}); rule != nil { 91 | t.Fatalf("Find(%v) expected to return nil, got %v", "hello.xn--d1alf", rule) 92 | } 93 | if rule := list.Find("hello.xn--l1acc", &FindOptions{DefaultRule: nil}); rule == nil { 94 | t.Fatalf("Find(%v) returned nil", "hello.xn--l1acc") 95 | } 96 | } 97 | 98 | func TestNewListFromString_IDNAInputIsAscii(t *testing.T) { 99 | src := ` 100 | // xn--d1alf ("mkd", Macedonian) : MK 101 | // MARnet 102 | xn--d1alf 103 | 104 | // xn--l1acc ("mon", Mongolian) : MN 105 | xn--l1acc 106 | ` 107 | 108 | list, err := NewListFromString(src, &ParserOption{ASCIIEncoded: true}) 109 | if err != nil { 110 | t.Fatalf("Parse returned error: %v", err) 111 | } 112 | 113 | if want, got := 2, list.Size(); want != got { 114 | t.Errorf("Parse returned a list with %v rules, want %v", got, want) 115 | t.Fatalf("%v", list.rules) 116 | } 117 | 118 | if rule := list.Find("hello.xn--d1alf", &FindOptions{DefaultRule: nil}); rule == nil { 119 | t.Fatalf("Find(%v) returned nil", "hello.xn--d1alf") 120 | } 121 | if rule := list.Find("hello.мкд", &FindOptions{DefaultRule: nil}); rule != nil { 122 | t.Fatalf("Find(%v) expected to return nil, got %v", "hello.xn--d1alf", rule) 123 | } 124 | if rule := list.Find("hello.xn--l1acc", &FindOptions{DefaultRule: nil}); rule == nil { 125 | t.Fatalf("Find(%v) returned nil", "hello.xn--l1acc") 126 | } 127 | } 128 | 129 | func TestNewListFromFile(t *testing.T) { 130 | list, err := NewListFromFile("../fixtures/list-simple.txt", nil) 131 | if err != nil { 132 | t.Fatalf("Parse returned an error: %v", err) 133 | } 134 | 135 | if want, got := 3, list.Size(); want != got { 136 | t.Errorf("Parse returned a list with %v rules, want %v", got, want) 137 | t.Fatalf("%v", list.rules) 138 | } 139 | 140 | rules := list.rules 141 | var testRules []Rule 142 | 143 | testRules = []Rule{} 144 | for _, rule := range rules { 145 | if rule.Private == false { 146 | testRules = append(testRules, *rule) 147 | } 148 | } 149 | if want, got := 2, len(testRules); want != got { 150 | t.Errorf("Parse returned a list with %v IANA rules, want %v", got, want) 151 | t.Fatalf("%v", testRules) 152 | } 153 | 154 | testRules = []Rule{} 155 | for _, rule := range rules { 156 | if rule.Private == true { 157 | testRules = append(testRules, *rule) 158 | } 159 | } 160 | if want, got := 1, len(testRules); want != got { 161 | t.Errorf("Parse returned a list with %v PRIVATE rules, want %v", got, want) 162 | t.Fatalf("%v", testRules) 163 | } 164 | } 165 | 166 | func TestListAddRule(t *testing.T) { 167 | list := NewList() 168 | 169 | if list.Size() != 0 { 170 | t.Fatalf("Empty list should have 0 rules, got %v", list.Size()) 171 | } 172 | 173 | rule := MustNewRule("com") 174 | list.AddRule(rule) 175 | if list.Size() != 1 { 176 | t.Fatalf("List should have 1 rule, got %v", list.Size()) 177 | } 178 | for _, got := range list.rules { 179 | if !reflect.DeepEqual(rule, got) { 180 | t.Fatalf("List[0] expected to be %v, got %v", rule, got) 181 | } 182 | } 183 | } 184 | 185 | type listFindTestCase struct { 186 | input string 187 | expected *Rule 188 | } 189 | 190 | func TestListFind(t *testing.T) { 191 | src := ` 192 | // This Source Code Form is subject to the terms of the Mozilla Public 193 | // License, v. 2.0. If a copy of the MPL was not distributed with this 194 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 195 | 196 | // ===BEGIN ICANN DOMAINS=== 197 | 198 | // com 199 | com 200 | 201 | // uk 202 | *.uk 203 | *.sch.uk 204 | !bl.uk 205 | !british-library.uk 206 | 207 | // io 208 | io 209 | 210 | // jp 211 | jp 212 | *.kawasaki.jp 213 | *.kitakyushu.jp 214 | *.kobe.jp 215 | *.nagoya.jp 216 | *.sapporo.jp 217 | *.sendai.jp 218 | *.yokohama.jp 219 | !city.kawasaki.jp 220 | !city.kitakyushu.jp 221 | !city.kobe.jp 222 | !city.nagoya.jp 223 | !city.sapporo.jp 224 | !city.sendai.jp 225 | !city.yokohama.jp 226 | 227 | // ===END ICANN DOMAINS=== 228 | // ===BEGIN PRIVATE DOMAINS=== 229 | 230 | // Google, Inc. 231 | blogspot.com 232 | 233 | // ===END PRIVATE DOMAINS=== 234 | ` 235 | 236 | // TODO(weppos): ability to set type to a rule. 237 | p1 := MustNewRule("blogspot.com") 238 | p1.Private = true 239 | 240 | testCases := []listFindTestCase{ 241 | // match standard 242 | {"example.com", MustNewRule("com")}, 243 | {"foo.example.com", MustNewRule("com")}, 244 | 245 | // match wildcard 246 | {"example.uk", MustNewRule("*.uk")}, 247 | {"example.co.uk", MustNewRule("*.uk")}, 248 | {"foo.example.co.uk", MustNewRule("*.uk")}, 249 | 250 | // match exception 251 | {"british-library.uk", MustNewRule("!british-library.uk")}, 252 | {"foo.british-library.uk", MustNewRule("!british-library.uk")}, 253 | 254 | // match default rule 255 | {"test", DefaultRule}, 256 | {"example.test", DefaultRule}, 257 | {"foo.example.test", DefaultRule}, 258 | 259 | // match private 260 | {"blogspot.com", p1}, 261 | {"foo.blogspot.com", p1}, 262 | 263 | // input is wildcard rule 264 | {"kobe.jp", MustNewRule("jp")}, 265 | } 266 | 267 | list, err := NewListFromString(src, nil) 268 | if err != nil { 269 | t.Fatalf("Unable to parse list: %v", err) 270 | } 271 | 272 | for _, testCase := range testCases { 273 | if want, got := testCase.expected, list.Find(testCase.input, nil); !reflect.DeepEqual(want, got) { 274 | t.Errorf("Find(%v) = %v, want %v", testCase.input, got, want) 275 | } 276 | } 277 | } 278 | 279 | func TestNewRule_Normal(t *testing.T) { 280 | rule := MustNewRule("com") 281 | want := &Rule{Type: NormalType, Value: "com", Length: 1} 282 | 283 | if !reflect.DeepEqual(want, rule) { 284 | t.Fatalf("NewRule returned %v, want %v", rule, want) 285 | } 286 | } 287 | 288 | func TestNewRule_Wildcard(t *testing.T) { 289 | rule := MustNewRule("*.example.com") 290 | want := &Rule{Type: WildcardType, Value: "example.com", Length: 3} 291 | 292 | if !reflect.DeepEqual(want, rule) { 293 | t.Fatalf("NewRule returned %v, want %v", rule, want) 294 | } 295 | } 296 | 297 | func TestNewRule_Exception(t *testing.T) { 298 | rule := MustNewRule("!example.com") 299 | want := &Rule{Type: ExceptionType, Value: "example.com", Length: 2} 300 | 301 | if !reflect.DeepEqual(want, rule) { 302 | t.Fatalf("NewRule returned %v, want %v", rule, want) 303 | } 304 | } 305 | 306 | func TestNewRule_FromASCII(t *testing.T) { 307 | rule, _ := NewRule("xn--l1acc") 308 | 309 | if want := "xn--l1acc"; rule.Value != want { 310 | t.Fatalf("NewRule == %v, want %v", rule.Value, want) 311 | } 312 | } 313 | func TestNewRule_FromUnicode(t *testing.T) { 314 | rule, _ := NewRule("мон") 315 | 316 | // No transformation is performed 317 | if want := "мон"; rule.Value != want { 318 | t.Fatalf("NewRule == %v, want %v", rule.Value, want) 319 | } 320 | } 321 | 322 | func TestNewRuleUnicode_FromASCII(t *testing.T) { 323 | rule, _ := NewRuleUnicode("xn--l1acc") 324 | 325 | if want := "xn--l1acc"; rule.Value != want { 326 | t.Fatalf("NewRule == %v, want %v", rule.Value, want) 327 | } 328 | } 329 | 330 | func TestNewRuleUnicode_FromUnicode(t *testing.T) { 331 | rule, _ := NewRuleUnicode("мон") 332 | 333 | if want := "xn--l1acc"; rule.Value != want { 334 | t.Fatalf("NewRule == %v, want %v", rule.Value, want) 335 | } 336 | } 337 | 338 | type ruleMatchTestCase struct { 339 | rule *Rule 340 | input string 341 | expected bool 342 | } 343 | 344 | func TestRuleMatch(t *testing.T) { 345 | testCases := []ruleMatchTestCase{ 346 | // standard match 347 | {MustNewRule("uk"), "uk", true}, 348 | {MustNewRule("uk"), "example.uk", true}, 349 | {MustNewRule("uk"), "example.co.uk", true}, 350 | {MustNewRule("co.uk"), "example.co.uk", true}, 351 | 352 | // special rules match 353 | {MustNewRule("*.com"), "com", false}, 354 | {MustNewRule("*.com"), "example.com", true}, 355 | {MustNewRule("*.com"), "foo.example.com", true}, 356 | {MustNewRule("!example.com"), "com", false}, 357 | {MustNewRule("!example.com"), "example.com", true}, 358 | {MustNewRule("!example.com"), "foo.example.com", true}, 359 | 360 | // TLD mismatch 361 | {MustNewRule("gk"), "example.uk", false}, 362 | {MustNewRule("gk"), "example.co.uk", false}, 363 | 364 | // general mismatch 365 | {MustNewRule("uk.co"), "example.co.uk", false}, 366 | {MustNewRule("go.uk"), "example.co.uk", false}, 367 | // rule is longer than input, should not match 368 | {MustNewRule("co.uk"), "uk", false}, 369 | 370 | // partial matches/mismatches 371 | {MustNewRule("co"), "example.co.uk", false}, 372 | {MustNewRule("example"), "example.uk", false}, 373 | {MustNewRule("le.it"), "example.it", false}, 374 | {MustNewRule("le.it"), "le.it", true}, 375 | {MustNewRule("le.it"), "foo.le.it", true}, 376 | } 377 | 378 | for _, testCase := range testCases { 379 | if testCase.rule.Match(testCase.input) != testCase.expected { 380 | t.Errorf("Expected %v to %v match %v", testCase.rule.Value, testCase.expected, testCase.input) 381 | } 382 | } 383 | } 384 | 385 | type ruleDecomposeTestCase struct { 386 | rule *Rule 387 | input string 388 | expected [2]string 389 | } 390 | 391 | func TestRuleDecompose(t *testing.T) { 392 | testCases := []ruleDecomposeTestCase{ 393 | {MustNewRule("com"), "com", [2]string{"", ""}}, 394 | {MustNewRule("com"), "example.com", [2]string{"example", "com"}}, 395 | {MustNewRule("com"), "foo.example.com", [2]string{"foo.example", "com"}}, 396 | 397 | {MustNewRule("!british-library.uk"), "uk", [2]string{"", ""}}, 398 | {MustNewRule("!british-library.uk"), "british-library.uk", [2]string{"british-library", "uk"}}, 399 | {MustNewRule("!british-library.uk"), "foo.british-library.uk", [2]string{"foo.british-library", "uk"}}, 400 | 401 | {MustNewRule("*.com"), "com", [2]string{"", ""}}, 402 | {MustNewRule("*.com"), "example.com", [2]string{"", ""}}, 403 | {MustNewRule("*.com"), "foo.example.com", [2]string{"foo", "example.com"}}, 404 | {MustNewRule("*.com"), "bar.foo.example.com", [2]string{"bar.foo", "example.com"}}, 405 | } 406 | 407 | for _, testCase := range testCases { 408 | if got := testCase.rule.Decompose(testCase.input); !reflect.DeepEqual(got, testCase.expected) { 409 | t.Errorf("Expected %v to decompose %v into %v, got %v", testCase.rule.Value, testCase.input, testCase.expected, got) 410 | } 411 | } 412 | } 413 | 414 | func TestLabels(t *testing.T) { 415 | testCases := map[string][]string{ 416 | "com": {"com"}, 417 | "example.com": {"example", "com"}, 418 | "www.example.com": {"www", "example", "com"}, 419 | } 420 | 421 | for input, expected := range testCases { 422 | if output := Labels(input); !reflect.DeepEqual(output, expected) { 423 | t.Errorf("Labels(%v) = %v, want %v", input, output, expected) 424 | } 425 | } 426 | } 427 | 428 | func TestParseFromListWithOptions_RuleFound(t *testing.T) { 429 | list := NewList() 430 | rule := MustNewRule("com") 431 | _ = list.AddRule(rule) 432 | 433 | input := "foobar.com" 434 | 435 | got, err := ParseFromListWithOptions(list, "foobar.com", &FindOptions{IgnorePrivate: true}) 436 | if err != nil { 437 | t.Fatalf("ParseFromListWithOptions(%v) error: %v", input, err) 438 | } 439 | 440 | want := &DomainName{TLD: "com", SLD: "foobar", Rule: rule} 441 | if !reflect.DeepEqual(want, got) { 442 | t.Errorf("ParseFromListWithOptions(%v) = %v, want %v", input, got, want) 443 | } 444 | } 445 | 446 | func TestParseFromListWithOptions_RuleNotFoundDefaultNil(t *testing.T) { 447 | list := NewList() 448 | rule := MustNewRule("com") 449 | _ = list.AddRule(rule) 450 | 451 | input := "foobar.localdomain" 452 | 453 | _, err := ParseFromListWithOptions(list, "foobar.localdomain", &FindOptions{IgnorePrivate: true}) 454 | if err == nil { 455 | t.Fatalf("ParseFromListWithOptions(%v) should have returned error", input) 456 | } 457 | 458 | if want := "no rule matching name foobar.localdomain"; err.Error() != want { 459 | t.Errorf("Error expected to be %v, got %v", want, err) 460 | } 461 | } 462 | 463 | func TestParseFromListWithOptions_RuleNotFoundDefaultRule(t *testing.T) { 464 | list := NewList() 465 | rule := MustNewRule("com") 466 | _ = list.AddRule(rule) 467 | 468 | input := "foobar.localdomain" 469 | 470 | got, err := ParseFromListWithOptions(list, "foobar.localdomain", &FindOptions{IgnorePrivate: true, DefaultRule: DefaultRule}) 471 | if err != nil { 472 | t.Fatalf("ParseFromListWithOptions(%v) error: %v", input, err) 473 | } 474 | 475 | want := &DomainName{TLD: "localdomain", SLD: "foobar", Rule: DefaultRule} 476 | if !reflect.DeepEqual(want, got) { 477 | t.Errorf("ParseFromListWithOptions(%v) = %v, want %v", input, got, want) 478 | } 479 | } 480 | 481 | func TestToASCII(t *testing.T) { 482 | testCases := []string{ 483 | "example.com", 484 | ".example.com", 485 | "..example.com", 486 | } 487 | 488 | for _, input := range testCases { 489 | output, err := ToASCII(input) 490 | if err != nil { 491 | t.Errorf("ToASCII(%s) returned error", input) 492 | } 493 | if output != input { 494 | t.Errorf("ToASCII(%s) = %s, want %s", input, output, input) 495 | } 496 | } 497 | } 498 | 499 | func TestCookieJarList(t *testing.T) { 500 | testCases := map[string]string{ 501 | "example.com": "com", 502 | "www.example.com": "com", 503 | "example.co.uk": "co.uk", 504 | "www.example.co.uk": "co.uk", 505 | "example.blogspot.com": "blogspot.com", 506 | "www.example.blogspot.com": "blogspot.com", 507 | "parliament.uk": "uk", 508 | "www.parliament.uk": "uk", 509 | // not listed 510 | "www.example.test": "test", 511 | } 512 | 513 | for input, suffix := range testCases { 514 | if output := CookieJarList.PublicSuffix(input); output != suffix { 515 | t.Errorf("CookieJarList.PublicSuffix(%v) = %v, want %v", input, output, suffix) 516 | } 517 | } 518 | } 519 | 520 | var benchmarkTestCases = map[string]string{ 521 | "example.com": "example.com", 522 | "example.id.au": "example.id.au", 523 | "www.ck": "www.ck", 524 | "foo.bar.xn--55qx5d.cn": "bar.xn--55qx5d.cn", 525 | "a.b.c.minami.fukuoka.jp": "c.minami.fukuoka.jp", 526 | "posts-and-telecommunications.museum": "", 527 | "www.example.pvt.k12.ma.us": "example.pvt.k12.ma.us", 528 | "many.lol": "many.lol", 529 | "the.russian.for.moscow.is.xn--80adxhks": "is.xn--80adxhks", 530 | "blah.blah.s3-us-west-1.amazonaws.com": "blah.s3-us-west-1.amazonaws.com", 531 | "thing.dyndns.org": "thing.dyndns.org", 532 | "nosuchtld": "", 533 | } 534 | 535 | func benchmarkDomain(b *testing.B, domainFunc func(string) (string, error)) { 536 | var got string 537 | for i := 0; i < b.N; i++ { 538 | for input := range benchmarkTestCases { 539 | got, _ = domainFunc(input) 540 | } 541 | } 542 | _ = got 543 | } 544 | 545 | func BenchmarkDomain(b *testing.B) { 546 | benchmarkDomain(b, Domain) 547 | } 548 | 549 | func BenchmarkXNet(b *testing.B) { 550 | benchmarkDomain(b, xlib.EffectiveTLDPlusOne) 551 | } 552 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | echo "" > coverage.txt 5 | 6 | for d in $(go list ./... | grep -v vendor); do 7 | go test -v -race -coverprofile=profile.out -covermode=atomic "$d" 8 | if [ -f profile.out ]; then 9 | cat profile.out >> coverage.txt 10 | rm profile.out 11 | fi 12 | done 13 | --------------------------------------------------------------------------------