├── .github └── workflows │ └── ci.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── LICENSE-APACHE ├── README.md ├── benches └── benches.rs ├── src ├── error.rs └── lib.rs └── tests ├── list.rs ├── public_suffix_list.dat └── tests.txt /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | check: 7 | name: Check on v1.68.2 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout sources 11 | uses: actions/checkout@v2 12 | 13 | - name: Install stable toolchain 14 | uses: actions-rs/toolchain@v1 15 | with: 16 | toolchain: 1.68.2 17 | override: true 18 | 19 | - name: Run cargo check 20 | uses: actions-rs/cargo@v1 21 | with: 22 | command: check 23 | args: --features std 24 | 25 | test: 26 | name: Test on stable 27 | runs-on: ubuntu-latest 28 | steps: 29 | - name: Checkout sources 30 | uses: actions/checkout@v2 31 | 32 | - name: Install stable toolchain 33 | uses: actions-rs/toolchain@v1 34 | with: 35 | toolchain: stable 36 | override: true 37 | components: rustfmt, clippy 38 | 39 | - name: Run cargo fmt 40 | uses: actions-rs/cargo@v1 41 | with: 42 | command: fmt 43 | args: --all -- --check 44 | 45 | - name: Run cargo clippy 46 | uses: actions-rs/cargo@v1 47 | with: 48 | command: clippy 49 | args: --all-features -- -D warnings 50 | 51 | - name: Run cargo test 52 | uses: actions-rs/cargo@v1 53 | with: 54 | command: test 55 | args: --all-features 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | *.swp 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "publicsuffix" 3 | description = "Extract root domain and suffix from a domain name" 4 | version = "2.3.0" 5 | license = "MIT/Apache-2.0" 6 | repository = "https://github.com/rushmorem/publicsuffix" 7 | documentation = "https://docs.rs/publicsuffix" 8 | readme = "README.md" 9 | keywords = ["tld", "psl", "no_std", "tldextract", "domain"] 10 | authors = ["rushmorem "] 11 | edition = "2018" 12 | 13 | [features] 14 | # Punycode is enabled by default to be feature parity with the `psl` crate 15 | # by default, making switching between the 2 work seemlessly out of the box 16 | default = ["punycode"] 17 | 18 | # Adds support for looking up domain names in any case 19 | anycase = ["unicase"] 20 | 21 | # Adds support for looking up domain names in ascii format (normalised punycode) 22 | # see `idna::domain_to_ascii`. Since `idna` doesn't support `no_std` environments 23 | # this feature disables `no_std` 24 | punycode = ["idna"] 25 | 26 | # Adds support for `std::error::Error` 27 | std = [] 28 | 29 | [dependencies] 30 | psl-types = "2.0.11" 31 | 32 | # Optional dependencies 33 | hashbrown = { version = "0.15.1", features = ["inline-more"], optional = true } 34 | idna = { version = "1.0", optional = true } 35 | unicase = { version = "2.6.0", default-features = false, optional = true } 36 | 37 | [dev-dependencies] 38 | rspec = "1.0.0" 39 | 40 | [package.metadata.docs.rs] 41 | all-features = true 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Rushmore Mushambi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PublicSuffix 2 | 3 | A native Rust library for Mozilla's Public Suffix List 4 | 5 | [![CI](https://github.com/rushmorem/publicsuffix/actions/workflows/ci.yml/badge.svg)](https://github.com/rushmorem/publicsuffix/actions/workflows/ci.yml) 6 | [![Latest Version](https://img.shields.io/crates/v/publicsuffix.svg)](https://crates.io/crates/publicsuffix) 7 | [![Crates.io downloads](https://img.shields.io/crates/d/publicsuffix)](https://crates.io/crates/publicsuffix) 8 | [![Docs](https://docs.rs/publicsuffix/badge.svg)](https://docs.rs/publicsuffix) 9 | [![Minimum supported Rust version](https://img.shields.io/badge/rustc-1.56.1+-yellow.svg)](https://www.rust-lang.org) 10 | ![Maintenance](https://img.shields.io/badge/maintenance-actively--developed-brightgreen.svg) 11 | [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) 12 | 13 | This library uses Mozilla's [Public Suffix List](https://publicsuffix.org) to reliably determine the suffix of a domain name. This crate provides a dynamic list that can be updated at runtime. If you need a faster, though static list, please use the [psl](https://crates.io/crates/psl) crate instead. 14 | 15 | *NB*: v1 of this crate contained logic to validate domain names and email addresses. Since v2, this functionality was moved to the [addr](https://crates.io/crates/addr) crate. This crate also no longer downloads the list for you. 16 | 17 | ## Setting Up 18 | 19 | Add this crate to your `Cargo.toml`: 20 | 21 | ```toml 22 | [dependencies] 23 | publicsuffix = "2" 24 | ``` 25 | 26 | ## Examples 27 | 28 | ```rust 29 | use publicsuffix::{Psl, List}; 30 | 31 | // the official list can be found at 32 | // https://publicsuffix.org/list/public_suffix_list.dat 33 | let list: List = "<-- your public suffix list here -->".parse()?; 34 | 35 | let suffix = list.suffix(b"www.example.com")?; 36 | assert_eq!(suffix, "com"); 37 | assert_eq!(suffix.typ(), Some(Type::Icann)); 38 | 39 | let domain = list.domain(b"www.example.com")?; 40 | assert_eq!(domain, "example.com"); 41 | assert_eq!(domain.suffix(), "com"); 42 | 43 | let domain = list.domain("www.食狮.中国".as_bytes())?; 44 | assert_eq!(domain, "食狮.中国"); 45 | assert_eq!(domain.suffix(), "中国"); 46 | 47 | let domain = list.domain(b"www.xn--85x722f.xn--55qx5d.cn")?; 48 | assert_eq!(domain, "xn--85x722f.xn--55qx5d.cn"); 49 | assert_eq!(domain.suffix(), "xn--55qx5d.cn"); 50 | 51 | let domain = list.domain(b"a.b.example.uk.com")?; 52 | assert_eq!(domain, "example.uk.com"); 53 | assert_eq!(domain.suffix(), "uk.com"); 54 | 55 | let domain = list.domain(b"_tcp.example.com.")?; 56 | assert_eq!(domain, "example.com."); 57 | assert_eq!(domain.suffix(), "com."); 58 | ``` 59 | -------------------------------------------------------------------------------- /benches/benches.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | 5 | use publicsuffix::{List, Psl}; 6 | use test::Bencher; 7 | 8 | lazy_static::lazy_static! { 9 | static ref LIST: List = include_str!("../tests/public_suffix_list.dat").parse().unwrap(); 10 | } 11 | 12 | const DOMAIN: &[u8] = b"www.example.com"; 13 | 14 | #[bench] 15 | fn bench_find(b: &mut Bencher) { 16 | b.iter(|| LIST.find(DOMAIN.rsplit(|x| *x == b'.'))); 17 | } 18 | 19 | #[bench] 20 | fn bench_suffix(b: &mut Bencher) { 21 | b.iter(|| LIST.suffix(DOMAIN).unwrap()); 22 | } 23 | 24 | #[bench] 25 | fn bench_domain(b: &mut Bencher) { 26 | b.iter(|| LIST.domain(DOMAIN).unwrap()); 27 | } 28 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use alloc::string::String; 2 | use core::fmt; 3 | 4 | /// Errors returned by this crate 5 | #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] 6 | #[non_exhaustive] 7 | pub enum Error { 8 | EmptyLabel(String), 9 | ExceptionAtFirstLabel(String), 10 | InvalidList, 11 | InvalidRule(String), 12 | ListNotUtf8Encoded, 13 | } 14 | 15 | impl fmt::Display for Error { 16 | #[inline] 17 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 18 | match self { 19 | Error::EmptyLabel(rule) => write!(f, "rule `{}` contains an empty label", rule), 20 | Error::ExceptionAtFirstLabel(rule) => { 21 | write!(f, "`{}`; exceptions only valid at end of rule", rule) 22 | } 23 | Error::InvalidList => write!(f, "the provided list is not valid"), 24 | Error::InvalidRule(rule) => write!(f, "rule `{}` is invalid", rule), 25 | Error::ListNotUtf8Encoded => write!(f, "the provided list is not UTF8 encoded"), 26 | } 27 | } 28 | } 29 | 30 | #[cfg(feature = "std")] 31 | impl std::error::Error for Error {} 32 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A native Rust library for Mozilla's Public Suffix List 2 | 3 | #![cfg_attr(not(any(feature = "punycode", feature = "std")), no_std)] 4 | #![forbid(unsafe_code)] 5 | 6 | extern crate alloc; 7 | 8 | mod error; 9 | 10 | #[cfg(feature = "anycase")] 11 | use alloc::borrow::Cow; 12 | use alloc::borrow::ToOwned; 13 | #[cfg(not(any(feature = "hashbrown", feature = "punycode", feature = "std")))] 14 | use alloc::collections::BTreeMap as Map; 15 | #[cfg(not(feature = "anycase"))] 16 | use alloc::vec::Vec; 17 | use core::str::{from_utf8, FromStr}; 18 | #[cfg(feature = "hashbrown")] 19 | use hashbrown::HashMap as Map; 20 | #[cfg(all(not(feature = "hashbrown"), any(feature = "punycode", feature = "std")))] 21 | use std::collections::HashMap as Map; 22 | #[cfg(feature = "anycase")] 23 | use unicase::UniCase; 24 | 25 | pub use error::Error; 26 | pub use psl_types::{Domain, Info, List as Psl, Suffix, Type}; 27 | 28 | /// The official URL of the list 29 | pub const LIST_URL: &str = "https://publicsuffix.org/list/public_suffix_list.dat"; 30 | 31 | #[cfg(not(feature = "anycase"))] 32 | type Children = Map, Node>; 33 | 34 | #[cfg(feature = "anycase")] 35 | type Children = Map>, Node>; 36 | 37 | const WILDCARD: &str = "*"; 38 | 39 | #[derive(Debug, Clone, Default, Eq, PartialEq)] 40 | struct Node { 41 | children: Children, 42 | leaf: Option, 43 | } 44 | 45 | #[derive(Debug, Clone, Copy, Eq, PartialEq)] 46 | struct Leaf { 47 | is_exception: bool, 48 | typ: Type, 49 | } 50 | 51 | /// A dynamic public suffix list 52 | #[derive(Debug, Clone, Default, Eq, PartialEq)] 53 | pub struct List { 54 | rules: Node, 55 | typ: Option, 56 | } 57 | 58 | impl List { 59 | /// Creates a new list with default wildcard rule support 60 | #[inline] 61 | #[must_use] 62 | pub fn new() -> Self { 63 | Self::default() 64 | } 65 | 66 | /// Creates a new list from a byte slice 67 | /// 68 | /// # Errors 69 | /// 70 | /// Returns an `Err` if the list is not UTF-8 encoded 71 | /// or if its format is invalid. 72 | #[inline] 73 | pub fn from_bytes(bytes: &[u8]) -> Result { 74 | from_utf8(bytes) 75 | .map_err(|_| Error::ListNotUtf8Encoded)? 76 | .parse() 77 | } 78 | 79 | /// Checks to see if the list is empty, ignoring the wildcard rule 80 | #[inline] 81 | #[must_use] 82 | pub fn is_empty(&self) -> bool { 83 | self.rules.children.is_empty() 84 | } 85 | 86 | #[inline] 87 | fn append(&mut self, mut rule: &str, typ: Type) -> Result<(), Error> { 88 | let mut is_exception = false; 89 | if rule.starts_with('!') { 90 | if !rule.contains('.') { 91 | return Err(Error::ExceptionAtFirstLabel(rule.to_owned())); 92 | } 93 | is_exception = true; 94 | rule = &rule[1..]; 95 | } 96 | 97 | let mut current = &mut self.rules; 98 | for label in rule.rsplit('.') { 99 | if label.is_empty() { 100 | return Err(Error::EmptyLabel(rule.to_owned())); 101 | } 102 | 103 | #[cfg(not(feature = "anycase"))] 104 | let key = label.as_bytes().to_owned(); 105 | #[cfg(feature = "anycase")] 106 | let key = UniCase::new(Cow::from(label.to_owned())); 107 | 108 | current = current.children.entry(key).or_default(); 109 | } 110 | 111 | current.leaf = Some(Leaf { is_exception, typ }); 112 | 113 | Ok(()) 114 | } 115 | } 116 | 117 | #[cfg(feature = "anycase")] 118 | macro_rules! anycase_key { 119 | ($label:ident) => { 120 | match from_utf8($label) { 121 | Ok(label) => UniCase::new(Cow::from(label)), 122 | Err(_) => return Info { len: 0, typ: None }, 123 | } 124 | }; 125 | } 126 | 127 | impl Psl for List { 128 | #[inline] 129 | fn find<'a, T>(&self, mut labels: T) -> Info 130 | where 131 | T: Iterator, 132 | { 133 | let mut rules = &self.rules; 134 | 135 | // the first label 136 | // it's special because we always need it whether or not 137 | // it's in our hash map (because of the implicit wildcard) 138 | let mut info = match labels.next() { 139 | Some(label) => { 140 | let mut info = Info { 141 | len: label.len(), 142 | typ: None, 143 | }; 144 | #[cfg(not(feature = "anycase"))] 145 | let node_opt = rules.children.get(label); 146 | #[cfg(feature = "anycase")] 147 | let node_opt = rules.children.get(&anycase_key!(label)); 148 | match node_opt { 149 | Some(node) => { 150 | info.typ = node.leaf.map(|leaf| leaf.typ); 151 | rules = node; 152 | } 153 | None => return info, 154 | } 155 | info 156 | } 157 | None => return Info { len: 0, typ: None }, 158 | }; 159 | 160 | // the rest of the labels 161 | let mut len_so_far = info.len; 162 | for label in labels { 163 | #[cfg(not(feature = "anycase"))] 164 | let node_opt = rules.children.get(label); 165 | #[cfg(feature = "anycase")] 166 | let node_opt = rules.children.get(&anycase_key!(label)); 167 | match node_opt { 168 | Some(node) => rules = node, 169 | None => { 170 | #[cfg(not(feature = "anycase"))] 171 | let node_opt = rules.children.get(WILDCARD.as_bytes()); 172 | #[cfg(feature = "anycase")] 173 | let node_opt = rules.children.get(&UniCase::new(Cow::from(WILDCARD))); 174 | match node_opt { 175 | Some(node) => rules = node, 176 | None => break, 177 | } 178 | } 179 | } 180 | let label_plus_dot = label.len() + 1; 181 | if let Some(leaf) = rules.leaf { 182 | if self.typ.is_none() || self.typ == Some(leaf.typ) { 183 | info.typ = Some(leaf.typ); 184 | if leaf.is_exception { 185 | info.len = len_so_far; 186 | break; 187 | } 188 | info.len = len_so_far + label_plus_dot; 189 | } 190 | } 191 | len_so_far += label_plus_dot; 192 | } 193 | 194 | info 195 | } 196 | } 197 | 198 | impl FromStr for List { 199 | type Err = Error; 200 | 201 | #[inline] 202 | fn from_str(s: &str) -> Result { 203 | let mut typ = None; 204 | let mut list = List::new(); 205 | for line in s.lines() { 206 | match line { 207 | line if line.contains("BEGIN ICANN DOMAINS") => { 208 | typ = Some(Type::Icann); 209 | } 210 | line if line.contains("BEGIN PRIVATE DOMAINS") => { 211 | typ = Some(Type::Private); 212 | } 213 | line if line.starts_with("//") => { 214 | continue; 215 | } 216 | line => match typ { 217 | Some(typ) => { 218 | let rule = match line.split_whitespace().next() { 219 | Some(rule) => rule, 220 | None => continue, 221 | }; 222 | list.append(rule, typ)?; 223 | #[cfg(feature = "punycode")] 224 | { 225 | let ascii = idna::domain_to_ascii(rule) 226 | .map_err(|_| Error::InvalidRule(rule.to_owned()))?; 227 | list.append(&ascii, typ)?; 228 | } 229 | } 230 | None => { 231 | continue; 232 | } 233 | }, 234 | } 235 | } 236 | if list.is_empty() { 237 | return Err(Error::InvalidList); 238 | } 239 | Ok(list) 240 | } 241 | } 242 | 243 | /// A list of only ICANN suffixes 244 | #[derive(Debug, Clone, Default, Eq, PartialEq)] 245 | pub struct IcannList(List); 246 | 247 | impl From for IcannList { 248 | #[inline] 249 | fn from(mut list: List) -> Self { 250 | list.typ = Some(Type::Icann); 251 | Self(list) 252 | } 253 | } 254 | 255 | impl From for List { 256 | #[inline] 257 | fn from(IcannList(mut list): IcannList) -> Self { 258 | list.typ = None; 259 | list 260 | } 261 | } 262 | 263 | impl IcannList { 264 | /// Creates a new list from a byte slice 265 | /// 266 | /// # Errors 267 | /// 268 | /// Returns an `Err` if the list is not UTF-8 encoded 269 | /// or if its format is invalid. 270 | #[inline] 271 | pub fn from_bytes(bytes: &[u8]) -> Result { 272 | let list = List::from_bytes(bytes)?; 273 | Ok(list.into()) 274 | } 275 | 276 | /// Checks to see if the list is empty, ignoring the wildcard rule 277 | #[inline] 278 | #[must_use] 279 | pub fn is_empty(&self) -> bool { 280 | self.0.is_empty() 281 | } 282 | } 283 | 284 | impl FromStr for IcannList { 285 | type Err = Error; 286 | 287 | #[inline] 288 | fn from_str(s: &str) -> Result { 289 | let list = List::from_str(s)?; 290 | Ok(list.into()) 291 | } 292 | } 293 | 294 | impl Psl for IcannList { 295 | #[inline] 296 | fn find<'a, T>(&self, labels: T) -> Info 297 | where 298 | T: Iterator, 299 | { 300 | self.0.find(labels) 301 | } 302 | } 303 | 304 | /// A list of only private suffixes 305 | #[derive(Debug, Clone, Default, Eq, PartialEq)] 306 | pub struct PrivateList(List); 307 | 308 | impl From for PrivateList { 309 | #[inline] 310 | fn from(mut list: List) -> Self { 311 | list.typ = Some(Type::Private); 312 | Self(list) 313 | } 314 | } 315 | 316 | impl From for List { 317 | #[inline] 318 | fn from(PrivateList(mut list): PrivateList) -> Self { 319 | list.typ = None; 320 | list 321 | } 322 | } 323 | 324 | impl PrivateList { 325 | /// Creates a new list from a byte slice 326 | /// 327 | /// # Errors 328 | /// 329 | /// Returns an `Err` if the list is not UTF-8 encoded 330 | /// or if its format is invalid. 331 | #[inline] 332 | pub fn from_bytes(bytes: &[u8]) -> Result { 333 | let list = List::from_bytes(bytes)?; 334 | Ok(list.into()) 335 | } 336 | 337 | /// Checks to see if the list is empty, ignoring the wildcard rule 338 | #[inline] 339 | #[must_use] 340 | pub fn is_empty(&self) -> bool { 341 | self.0.is_empty() 342 | } 343 | } 344 | 345 | impl FromStr for PrivateList { 346 | type Err = Error; 347 | 348 | #[inline] 349 | fn from_str(s: &str) -> Result { 350 | let list = List::from_str(s)?; 351 | Ok(list.into()) 352 | } 353 | } 354 | 355 | impl Psl for PrivateList { 356 | #[inline] 357 | fn find<'a, T>(&self, labels: T) -> Info 358 | where 359 | T: Iterator, 360 | { 361 | self.0.find(labels) 362 | } 363 | } 364 | 365 | #[cfg(test)] 366 | mod tests { 367 | use super::*; 368 | 369 | const LIST: &[u8] = b" 370 | // BEGIN ICANN DOMAINS 371 | com.uk 372 | "; 373 | 374 | #[test] 375 | fn list_construction() { 376 | let list = List::from_bytes(LIST).unwrap(); 377 | let expected = List { 378 | typ: None, 379 | rules: Node { 380 | children: { 381 | let mut children = Children::default(); 382 | children.insert( 383 | #[cfg(not(feature = "anycase"))] 384 | b"uk".to_vec(), 385 | #[cfg(feature = "anycase")] 386 | UniCase::new(Cow::from("uk")), 387 | Node { 388 | children: { 389 | let mut children = Children::default(); 390 | children.insert( 391 | #[cfg(not(feature = "anycase"))] 392 | b"com".to_vec(), 393 | #[cfg(feature = "anycase")] 394 | UniCase::new(Cow::from("com")), 395 | Node { 396 | children: Default::default(), 397 | leaf: Some(Leaf { 398 | is_exception: false, 399 | typ: Type::Icann, 400 | }), 401 | }, 402 | ); 403 | children 404 | }, 405 | leaf: None, 406 | }, 407 | ); 408 | children 409 | }, 410 | leaf: None, 411 | }, 412 | }; 413 | assert_eq!(list, expected); 414 | } 415 | 416 | #[test] 417 | fn find_localhost() { 418 | let list = List::from_bytes(LIST).unwrap(); 419 | let labels = b"localhost".rsplit(|x| *x == b'.'); 420 | assert_eq!(list.find(labels), Info { len: 9, typ: None }); 421 | } 422 | 423 | #[test] 424 | fn find_uk() { 425 | let list = List::from_bytes(LIST).unwrap(); 426 | let labels = b"uk".rsplit(|x| *x == b'.'); 427 | assert_eq!(list.find(labels), Info { len: 2, typ: None }); 428 | } 429 | 430 | #[test] 431 | fn find_com_uk() { 432 | let list = List::from_bytes(LIST).unwrap(); 433 | let labels = b"com.uk".rsplit(|x| *x == b'.'); 434 | assert_eq!( 435 | list.find(labels), 436 | Info { 437 | len: 6, 438 | typ: Some(Type::Icann) 439 | } 440 | ); 441 | } 442 | 443 | #[test] 444 | fn find_ide_kyoto_jp() { 445 | let list = List::from_bytes(b"// BEGIN ICANN DOMAINS\nide.kyoto.jp").unwrap(); 446 | let labels = b"ide.kyoto.jp".rsplit(|x| *x == b'.'); 447 | assert_eq!( 448 | list.find(labels), 449 | Info { 450 | len: 12, 451 | typ: Some(Type::Icann) 452 | } 453 | ); 454 | } 455 | } 456 | -------------------------------------------------------------------------------- /tests/list.rs: -------------------------------------------------------------------------------- 1 | use publicsuffix::{List, Psl, Type}; 2 | use rspec::report::ExampleResult; 3 | use std::sync::LazyLock; 4 | use std::{env, mem, str}; 5 | 6 | static LIST: LazyLock = 7 | LazyLock::new(|| include_str!("public_suffix_list.dat").parse().unwrap()); 8 | 9 | #[test] 10 | fn list_behaviour() { 11 | rspec::run(&rspec::describe("the official test", (), |ctx| { 12 | let mut parse = false; 13 | 14 | // `tests.txt` was downloaded from 15 | // https://raw.githubusercontent.com/publicsuffix/list/master/tests/tests.txt 16 | for (i, line) in include_str!("tests.txt").lines().enumerate() { 17 | match line { 18 | line if line.trim().is_empty() => { 19 | parse = true; 20 | continue; 21 | } 22 | line if line.starts_with("//") => { 23 | continue; 24 | } 25 | line => { 26 | if !parse { 27 | continue; 28 | } 29 | let mut test = line.split_whitespace().peekable(); 30 | if test.peek().is_none() { 31 | continue; 32 | } 33 | let input = match test.next() { 34 | Some("null") => "", 35 | Some(res) => res, 36 | None => panic!("line {} of the test file doesn't seem to be valid", i), 37 | }; 38 | if !expected_tld(input) 39 | || (cfg!(not(feature = "punycode")) && input.contains("xn--")) 40 | { 41 | continue; 42 | } 43 | let (expected_root, expected_suffix) = match test.next() { 44 | Some("null") => (None, None), 45 | Some(root) => { 46 | let suffix = { 47 | let parts: Vec<&str> = root.split('.').rev().collect(); 48 | (&parts[..parts.len() - 1]) 49 | .iter() 50 | .rev() 51 | .map(|part| *part) 52 | .collect::>() 53 | .join(".") 54 | }; 55 | (Some(root.to_string()), Some(suffix.to_string())) 56 | } 57 | None => panic!("line {} of the test file doesn't seem to be valid", i), 58 | }; 59 | let (found_root, found_suffix) = 60 | if input.starts_with(".") || input.contains("..") { 61 | (None, None) 62 | } else { 63 | LIST.domain(input.to_lowercase().as_bytes()) 64 | .map(|d| { 65 | let domain = str::from_utf8(d.as_bytes()).unwrap().to_string(); 66 | let suffix = 67 | str::from_utf8(d.suffix().as_bytes()).unwrap().to_string(); 68 | (Some(domain), Some(suffix)) 69 | }) 70 | .unwrap_or((None, None)) 71 | }; 72 | ctx.when(msg(format!("input is `{}`", input)), |ctx| { 73 | let full_domain = expected_root.is_some(); 74 | 75 | ctx.it(msg(format!("means the root domain {}", val(&expected_root))), move |_| { 76 | if expected_root == found_root { 77 | ExampleResult::Success 78 | } else { 79 | let msg = format!("expected `{:?}` but found `{:?}` on line {} of `test_psl.txt`", expected_root, found_root, i+1); 80 | ExampleResult::Failure(Some(msg)) 81 | } 82 | }); 83 | 84 | if full_domain { 85 | ctx.it(msg(format!("also means the suffix {}", val(&expected_suffix))), move |_| { 86 | if expected_suffix == found_suffix { 87 | ExampleResult::Success 88 | } else { 89 | let msg = format!("expected `{:?}` but found `{:?}` on line {} of `test_psl.txt`", expected_suffix, found_suffix, i+1); 90 | ExampleResult::Failure(Some(msg)) 91 | } 92 | }); 93 | } 94 | }); 95 | } 96 | } 97 | } 98 | })); 99 | 100 | rspec::run(&rspec::describe("suffix tests", (), |ctx| { 101 | let extra = vec![ 102 | ( 103 | "gp-id-ter-acc-1.to.gp-kl-cas-11-ses001-ses-1.wdsl.5m.za", 104 | "za", 105 | ), 106 | ("yokohama.jp", "jp"), 107 | ("kobe.jp", "jp"), 108 | #[cfg(feature = "anycase")] 109 | ("foo.bar.platformsh.Site", "bar.platformsh.Site"), 110 | ("bar.platformsh.site", "bar.platformsh.site"), 111 | ("platform.sh", "sh"), 112 | ("sh", "sh"), 113 | (".", "."), 114 | ("example.com.", "com."), 115 | ("www.食狮.中国", "中国"), 116 | #[cfg(feature = "punycode")] 117 | ("www.xn--85x722f.xn--55qx5d.cn", "xn--55qx5d.cn"), 118 | ("a.b.example.uk.com", "uk.com"), 119 | ("_tcp.example.com.", "com."), 120 | ("airbroadband.co.z", "z"), 121 | ("th-tyo.access.aseinet.ne.jp", "ne.jp"), 122 | ]; 123 | 124 | for (input, expected) in extra { 125 | if !expected_tld(input) { 126 | continue; 127 | } 128 | ctx.when(msg(format!("input is `{}`", input)), |ctx| { 129 | let expected_suffix = Some(expected); 130 | ctx.it( 131 | msg(format!( 132 | "means the suffix {}", 133 | val(&expected_suffix.map(ToString::to_string)) 134 | )), 135 | move |_| { 136 | let suffix = LIST.suffix(input.as_bytes()).unwrap(); 137 | if suffix == expected { 138 | ExampleResult::Success 139 | } else { 140 | let msg = format!( 141 | "expected `{:?}` but found `{:?}`", 142 | expected_suffix, 143 | Some(str::from_utf8(suffix.as_bytes()).unwrap().to_string()) 144 | ); 145 | ExampleResult::Failure(Some(msg)) 146 | } 147 | }, 148 | ); 149 | }); 150 | } 151 | })); 152 | 153 | rspec::run(&rspec::describe("suffix type tests", (), |ctx| { 154 | let extra = vec![ 155 | ( 156 | "gp-id-ter-acc-1.to.gp-kl-cas-11-ses001-ses-1.wdsl.5m.za", 157 | false, 158 | None, 159 | ), 160 | ("yokohama.jp", true, Some(Type::Icann)), 161 | ("kobe.jp", true, Some(Type::Icann)), 162 | ("foo.bar.platformsh.site", true, Some(Type::Private)), 163 | ("bar.platformsh.site", true, Some(Type::Private)), 164 | ("platform.sh", true, Some(Type::Icann)), 165 | ("sh", true, Some(Type::Icann)), 166 | (".", false, None), 167 | ("example.gafregsrse", false, None), 168 | ("www.食狮.中国", true, Some(Type::Icann)), 169 | #[cfg(feature = "punycode")] 170 | ("www.xn--85x722f.xn--55qx5d.cn", true, Some(Type::Icann)), 171 | ]; 172 | 173 | for (input, known_suffix, typ) in extra { 174 | if !expected_tld(input) { 175 | continue; 176 | } 177 | ctx.when(msg(format!("input is `{}`", input)), |ctx| { 178 | ctx.it( 179 | msg(format!( 180 | "means known suffix {}", 181 | val(&Some(known_suffix.to_string())) 182 | )), 183 | move |_| { 184 | let suffix = LIST.suffix(input.as_bytes()).unwrap(); 185 | assert_eq!(suffix.typ(), typ); 186 | if suffix.is_known() == known_suffix { 187 | ExampleResult::Success 188 | } else { 189 | let msg = format!( 190 | "expected `{:?}` but found `{:?}`", 191 | known_suffix, 192 | suffix.is_known() 193 | ); 194 | ExampleResult::Failure(Some(msg)) 195 | } 196 | }, 197 | ); 198 | }); 199 | } 200 | })); 201 | } 202 | 203 | // Converts a String to &'static str 204 | // 205 | // This will leak memory but that's OK for our testing purposes 206 | fn msg(s: String) -> &'static str { 207 | unsafe { 208 | let ret = mem::transmute(&s as &str); 209 | mem::forget(s); 210 | ret 211 | } 212 | } 213 | 214 | fn val(s: &Option) -> String { 215 | match *s { 216 | Some(ref v) => format!("should be `{}`", v), 217 | None => format!("is invalid"), 218 | } 219 | } 220 | 221 | fn expected_tld(input: &str) -> bool { 222 | let var = if let Ok(var) = env::var("PSL_TLD") { 223 | var 224 | } else { 225 | String::new() 226 | }; 227 | var.trim().is_empty() || input.trim().trim_end_matches('.').ends_with(&var) 228 | } 229 | -------------------------------------------------------------------------------- /tests/tests.txt: -------------------------------------------------------------------------------- 1 | // Any copyright is dedicated to the Public Domain. 2 | // https://creativecommons.org/publicdomain/zero/1.0/ 3 | 4 | // null input. 5 | null null 6 | // Mixed case. 7 | COM null 8 | example.COM example.com 9 | WwW.example.COM example.com 10 | // Leading dot. 11 | .com null 12 | .example null 13 | .example.com null 14 | .example.example null 15 | // Unlisted TLD. 16 | example null 17 | example.example example.example 18 | b.example.example example.example 19 | a.b.example.example example.example 20 | // Listed, but non-Internet, TLD. 21 | //local null 22 | //example.local null 23 | //b.example.local null 24 | //a.b.example.local null 25 | // TLD with only 1 rule. 26 | biz null 27 | domain.biz domain.biz 28 | b.domain.biz domain.biz 29 | a.b.domain.biz domain.biz 30 | // TLD with some 2-level rules. 31 | com null 32 | example.com example.com 33 | b.example.com example.com 34 | a.b.example.com example.com 35 | uk.com null 36 | example.uk.com example.uk.com 37 | b.example.uk.com example.uk.com 38 | a.b.example.uk.com example.uk.com 39 | test.ac test.ac 40 | // TLD with only 1 (wildcard) rule. 41 | mm null 42 | c.mm null 43 | b.c.mm b.c.mm 44 | a.b.c.mm b.c.mm 45 | // More complex TLD. 46 | jp null 47 | test.jp test.jp 48 | www.test.jp test.jp 49 | ac.jp null 50 | test.ac.jp test.ac.jp 51 | www.test.ac.jp test.ac.jp 52 | kyoto.jp null 53 | test.kyoto.jp test.kyoto.jp 54 | ide.kyoto.jp null 55 | b.ide.kyoto.jp b.ide.kyoto.jp 56 | a.b.ide.kyoto.jp b.ide.kyoto.jp 57 | c.kobe.jp null 58 | b.c.kobe.jp b.c.kobe.jp 59 | a.b.c.kobe.jp b.c.kobe.jp 60 | city.kobe.jp city.kobe.jp 61 | www.city.kobe.jp city.kobe.jp 62 | // TLD with a wildcard rule and exceptions. 63 | ck null 64 | test.ck null 65 | b.test.ck b.test.ck 66 | a.b.test.ck b.test.ck 67 | www.ck www.ck 68 | www.www.ck www.ck 69 | // US K12. 70 | us null 71 | test.us test.us 72 | www.test.us test.us 73 | ak.us null 74 | test.ak.us test.ak.us 75 | www.test.ak.us test.ak.us 76 | k12.ak.us null 77 | test.k12.ak.us test.k12.ak.us 78 | www.test.k12.ak.us test.k12.ak.us 79 | // IDN labels. 80 | 食狮.com.cn 食狮.com.cn 81 | 食狮.公司.cn 食狮.公司.cn 82 | www.食狮.公司.cn 食狮.公司.cn 83 | shishi.公司.cn shishi.公司.cn 84 | 公司.cn null 85 | 食狮.中国 食狮.中国 86 | www.食狮.中国 食狮.中国 87 | shishi.中国 shishi.中国 88 | 中国 null 89 | // Same as above, but punycoded. 90 | xn--85x722f.com.cn xn--85x722f.com.cn 91 | xn--85x722f.xn--55qx5d.cn xn--85x722f.xn--55qx5d.cn 92 | www.xn--85x722f.xn--55qx5d.cn xn--85x722f.xn--55qx5d.cn 93 | shishi.xn--55qx5d.cn shishi.xn--55qx5d.cn 94 | xn--55qx5d.cn null 95 | xn--85x722f.xn--fiqs8s xn--85x722f.xn--fiqs8s 96 | www.xn--85x722f.xn--fiqs8s xn--85x722f.xn--fiqs8s 97 | shishi.xn--fiqs8s shishi.xn--fiqs8s 98 | xn--fiqs8s null 99 | --------------------------------------------------------------------------------