├── .gitignore ├── .travis.yml ├── AUTHORS.rst ├── CHANGELOG.rst ├── MANIFEST.in ├── README.rst ├── publicsuffix2.LICENSE ├── setup.cfg ├── setup.py ├── src └── publicsuffix2 │ ├── __init__.py │ ├── mpl-2.0.LICENSE │ ├── public_suffix_list.ABOUT │ └── public_suffix_list.dat ├── tests.py └── tests_mozilla.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | MANIFEST 3 | dist 4 | build 5 | /.project 6 | /.pydevproject 7 | /.settings/ 8 | *.egg-info/ 9 | /__pycache__/ 10 | /tmp/ 11 | /.cache/ 12 | /.pytest_cache/ 13 | /.eggs/ 14 | /.python-version 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "2.7" 5 | - "3.6" 6 | - "3.7" 7 | 8 | install: 9 | - pip install pytest 10 | - pip install -e . 11 | 12 | script: 13 | - py.test 14 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Credits 3 | ======= 4 | 5 | - Mozilla and the public suffix list maintainers 6 | - David Wilson @dw 7 | - Tomaž Šolc @avian2 8 | - Philippe Ombredanne @pombredanne 9 | - Renée Burton @KnitCode 10 | - @vpiserchia 11 | - Kevin Olbrich @kevin-olbrich 12 | - Masahiro Honma @hiratara 13 | - Scott Kitterman @kitterma 14 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | --------- 3 | 4 | 2020-01-27 publicsuffix2 2.20200127 5 | 6 | * Update TLD list 7 | * Include tests in source distribution to help with Debian packaging by @kitterma 8 | * Update unicode/idna tests by @hiratara 9 | 10 | 11 | 2019-12-19 publicsuffix2 2.20191219 12 | 13 | * Add new strict mode to get_tld() by @hiratara 14 | * Update TLD list 15 | * Add tests from Mozilla test suite 16 | 17 | 18 | 2019-08-12 publicsuffix2 2.20190812 19 | 20 | * Fix regression in available tlds. 21 | * Format and streamline code. 22 | 23 | 24 | 2019-08-11 publicsuffix2 2.20190811 25 | 26 | * Update publicsuffix.file to the latest version from Mozilla. 27 | 28 | 29 | 2019-08-08 publicsuffix2 2.20190808 30 | 31 | * Add additional functionality and handles change to PSL format 32 | * Add attribute to retrieve the PSL as a list 33 | 34 | 35 | 2019-02-05 publicsuffix2 2.201902051213 36 | 37 | * Update publicsuffix.file to the latest version from Mozilla. 38 | * Restore a fetch() function by popular demand 39 | 40 | 41 | 2018-12-13 publicsuffix2 2.20181213 42 | 43 | * Update publicsuffix.file to the latest version from Mozilla. 44 | 45 | 46 | 2018-10-01 publicsuffix2 2.20180921.2 47 | 48 | * Update publicsuffix.file to the latest version from Mozilla. 49 | * Breaking API change: publicsuffix module renamed to publicsuffix2 50 | 51 | 52 | 2016-08-18 publicsuffix2 2.20160818 53 | 54 | * Update publicsuffix.file to the latest version from Mozilla. 55 | 56 | 57 | 2016-06-21 publicsuffix2 2.20160621 58 | 59 | * Update publicsuffix.file to the latest version from Mozilla. 60 | * Adopt new version scheme: major. 61 | 62 | 63 | 2015-10-12 publicsuffix2 2.1.0 64 | 65 | * Merged latest updates from publicsuffix 66 | * Added new convenience top level get_public_suffix_function caching 67 | a loaded list if needed. 68 | * Updated publicsuffix.file to the latest version from Mozilla. 69 | * Added an update_psl setup command to fetch and vendor the latest list 70 | Use as: python setup.py update_psl 71 | 72 | 73 | 2015-06-04 publicsuffix2 2.0.0 74 | 75 | * Forked publicsuffix, but kept the same API 76 | * Updated publicsuffix.file to the latest version from Mozilla. 77 | * Changed packaging to have the suffix list be package data 78 | and be wheel friendly. 79 | * Use spaces indentation, not tabs 80 | 81 | 82 | 2014-01-14 publicsuffix 1.0.5 83 | 84 | * Correctly handle fully qualified domain names (thanks to Matthäus 85 | Wander). 86 | * Updated publicsuffix.txt to the latest version from Mozilla. 87 | 88 | 2013-01-02 publicsuffix 1.0.4 89 | 90 | * Added missing change log. 91 | 92 | 2013-01-02 publicsuffix 1.0.3 93 | 94 | * Updated publicsuffix.txt to the latest version from Mozilla. 95 | * Added trove classifiers. 96 | * Minor update of the README. 97 | 98 | 2011-10-10 publicsuffix 1.0.2 99 | 100 | * Compatibility with Python 3.x (thanks to Joern 101 | Koerner) and Python 2.5 102 | 103 | 2011-09-22 publicsuffix 1.0.1 104 | 105 | * Fixed installation issue under virtualenv (thanks to 106 | Mark McClain) 107 | 108 | 2011-07-29 publicsuffix 1.0.0 109 | 110 | * First release 111 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft src 2 | 3 | include AUTHORS.rst 4 | include CHANGELOG.rst 5 | include README.rst 6 | include publicsuffix2.LICENSE 7 | include tests.py 8 | include tests_mozilla.py 9 | include setup.py 10 | include setup.cfg 11 | 12 | global-exclude *.py[co] __pycache__ *.so *.pyd 13 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Public Suffix List module for Python 2 | ==================================== 3 | 4 | This module allows you to get the public suffix, as well as the registrable domain, 5 | of a domain name using the Public Suffix List from http://publicsuffix.org 6 | 7 | A public suffix is a domain suffix under which you can register domain 8 | names, or under which the suffix owner does not control the subdomains. 9 | Some examples of public suffixes in the former example are ".com", 10 | ".co.uk" and "pvt.k12.wy.us"; examples of the latter case are "github.io" and 11 | "blogspot.com". The public suffix is sometimes referred to as the effective 12 | or extended TLD (eTLD). 13 | Accurately knowing the public suffix of a domain is useful when handling 14 | web browser cookies, highlighting the most important part of a domain name 15 | in a user interface or sorting URLs by web site. It is also used in a wide range 16 | of research and applications that leverages Domain Name System (DNS) data. 17 | 18 | This module builds the public suffix list as a Trie structure, making it more efficient 19 | than other string-based modules available for the same purpose. It can be used 20 | effectively in large-scale distributed environments, such as PySpark. 21 | 22 | This Python module includes with a copy of the Public Suffix List (PSL) so that it is 23 | usable out of the box. Newer versions try to provide reasonably fresh copies of 24 | this list. It also includes a convenience method to fetch the latest list. The PSL does 25 | change regularly. 26 | 27 | The code is a fork of the publicsuffix package and includes the same base API. In 28 | addition, it contains a few variants useful for certain use cases, such as the option to 29 | ignore wildcards or return only the extended TLD (eTLD). You just need to import publicsuffix2 instead. 30 | 31 | The public suffix list is now provided in UTF-8 format. To correctly process 32 | IDNA-encoded domains, either the query or the list must be converted. By default, the 33 | module converts the PSL. If your use case includes UTF-8 domains, e.g., '食狮.com.cn', 34 | you'll need to set the IDNA-encoding flag to False on instantiation (see examples below). 35 | Failure to use the correct encoding for your use case can lead to incorrect results for 36 | domains that utilize unicode characters. 37 | 38 | The code is MIT-licensed and the publicsuffix data list is MPL-2.0-licensed. 39 | 40 | .. image:: https://api.travis-ci.org/nexB/python-publicsuffix2.png?branch=master 41 | :target: https://travis-ci.org/nexB/python-publicsuffix2 42 | :alt: master branch tests status 43 | 44 | .. image:: https://api.travis-ci.org/nexB/python-publicsuffix2.png?branch=develop 45 | :target: https://travis-ci.org/nexB/python-publicsuffix2 46 | :alt: develop branch tests status 47 | 48 | Usage 49 | ----- 50 | 51 | Install with:: 52 | 53 | pip install publicsuffix2 54 | 55 | The module provides functions to obtain the base domain, or sld, of an fqdn, as well as one 56 | to get just the public suffix. In addition, the functions a number of boolean parameters that 57 | control how wildcards are handled. In addition to the functions, the module exposes a class that 58 | parses the PSL, and allows for more control. 59 | 60 | The module provides two equivalent functions to query a domain name, and return the base domain, 61 | or second-level-doamin; get_public_suffix() and get_sld():: 62 | 63 | >>> from publicsuffix2 import get_public_suffix 64 | >>> get_public_suffix('www.example.com') 65 | 'example.com' 66 | >>> get_sld('www.example.com') 67 | 'example.com' 68 | >>> get_public_suffix('www.example.co.uk') 69 | 'example.co.uk' 70 | >>> get_public_suffix('www.super.example.co.uk') 71 | 'example.co.uk' 72 | >>> get_sld("co.uk") # returns eTLD as is 73 | 'co.uk' 74 | 75 | This function loads and caches the public suffix list. To obtain the latest version of the 76 | PSL, use the fetch() function to first download the latest version. Alternatively, you can pass 77 | a custom list. 78 | 79 | For more control, there is also a class that parses a Public 80 | Suffix List and allows the same queries on individual domain names:: 81 | 82 | >>> from publicsuffix2 import PublicSuffixList 83 | >>> psl = PublicSuffixList() 84 | >>> psl.get_public_suffix('www.example.com') 85 | 'example.com' 86 | >>> psl.get_public_suffix('www.example.co.uk') 87 | 'example.co.uk' 88 | >>> psl.get_public_suffix('www.super.example.co.uk') 89 | 'example.co.uk' 90 | >>> psl.get_sld('www.super.example.co.uk') 91 | 'example.co.uk' 92 | 93 | Note that the ``host`` part of an URL can contain strings that are 94 | not plain DNS domain names (IP addresses, Punycode-encoded names, name in 95 | combination with a port number or a username, etc.). It is up to the 96 | caller to ensure only domain names are passed to the get_public_suffix() 97 | method. 98 | 99 | The get_public_suffix() function and the PublicSuffixList class initializer accept 100 | an optional argument pointing to a public suffix file. This can either be a file 101 | path, an iterable of public suffix lines, or a file-like object pointing to an 102 | opened list:: 103 | 104 | >>> from publicsuffix2 import get_public_suffix 105 | >>> psl_file = 'path to some psl data file' 106 | >>> get_public_suffix('www.example.com', psl_file) 107 | 'example.com' 108 | 109 | Note that when using get_public_suffix() a global cache keeps the latest provided 110 | suffix list data. This will use the cached latest loaded above:: 111 | 112 | >>> get_public_suffix('www.example.co.uk') 113 | 'example.co.uk' 114 | 115 | **IDNA-encoding.** The public suffix list is now in UTF-8 format. For those use cases that 116 | include IDNA-encoded domains, the list must be converted. Publicsuffix2 includes idna 117 | encoding as a parameter of the PublicSuffixList initialization and is true by 118 | default. For UTF-8 use cases, set the idna parameter to False:: 119 | 120 | >>> from publicsuffix2 import PublicSuffixList 121 | >>> psl = PublicSuffixList(idna=True) # on by default 122 | >>> psl.get_public_suffix('www.google.com') 123 | 'google.com' 124 | >>> psl = PublicSuffixList(idna=False) # use UTF-8 encodings 125 | >>> psl.get_public_suffix('食狮.com.cn') 126 | '食狮.com.cn' 127 | 128 | **Ignore wildcards.** In some use cases, particularly those related to large-scale domain processing, 129 | the user might want to ignore wildcards to create more aggregation. This is possible by setting 130 | the parameter wildcard=False.:: 131 | 132 | >>> psl.get_public_suffix('telinet.com.pg', wildcard=False) 133 | 'com.pg' 134 | >>> psl.get_public_suffix('telinet.com.pg', wildcard=True) 135 | 'telinet.com.pg' 136 | 137 | **Require valid eTLDs (strict).** In the publicsuffix2 module, a domain with an invalid TLD will still return 138 | return a base domain, e.g,:: 139 | 140 | >>> psl.get_public_suffix('www.mine.local') 141 | 'mine.local' 142 | 143 | This is useful for many use cases, while in others, we want to ensure that the domain includes a 144 | valid eTLD. In this case, the boolean parameter strict provides a solution. If this flag is set, 145 | an invalid TLD will return None.:: 146 | 147 | >>> psl.get_public_suffix('www.mine.local', strict=True) is None 148 | True 149 | 150 | **Return eTLD only.** The standard use case for publicsuffix2 is to return the registrable, 151 | or base, domain 152 | according to the public suffix list. In some cases, however, we only wish to find the eTLD 153 | itself. This is available via the get_tld() method.:: 154 | 155 | >>> psl.get_tld('www.google.com') 156 | 'com' 157 | >>> psl.get_tld('www.google.co.uk') 158 | 'co.uk' 159 | 160 | All of the methods and functions include the wildcard and strict parameters. 161 | 162 | For convenience, the public method get_sld() is available. This is identical to the method 163 | get_public_suffix() and is intended to clarify the output for some users. 164 | 165 | To **update the bundled suffix list** use the provided setup.py command:: 166 | 167 | python setup.py update_psl 168 | 169 | The update list will be saved in `src/publicsuffix2/public_suffix_list.dat` 170 | and you can build a new wheel with this bundled data. 171 | 172 | Alternatively, there is a fetch() function that will fetch the latest version 173 | of a Public Suffix data file from https://publicsuffix.org/list/public_suffix_list.dat 174 | You can use it this way:: 175 | 176 | >>> from publicsuffix2 import get_public_suffix 177 | >>> from publicsuffix2 import fetch 178 | >>> psl_file = fetch() 179 | >>> get_public_suffix('www.example.com', psl_file) 180 | 'example.com' 181 | 182 | Note that the once loaded, the data file is cached and therefore fetched only 183 | once. 184 | 185 | The extracted public suffix list, that is the tlds and their modifiers, is put into 186 | an instance variable, tlds, which can be accessed as an attribute, tlds.:: 187 | 188 | >>> psl = PublicSuffixList() 189 | >>> psl.tlds[:5] 190 | ['ac', 191 | 'com.ac', 192 | 'edu.ac', 193 | 'gov.ac', 194 | 'net.ac'] 195 | 196 | **Using the module in large-scale processing** 197 | If using this library in large-scale pyspark processing, you should instantiate the class as 198 | a global variable, not within a user function. The class methods can then be used within user 199 | functions for distributed processing. 200 | 201 | Source 202 | ------ 203 | 204 | Get a local copy of the development repository. The development takes 205 | place in the ``develop`` branch. Stable releases are tagged in the ``master`` 206 | branch:: 207 | 208 | git clone https://github.com/nexB/python-publicsuffix2.git 209 | 210 | 211 | History 212 | ------- 213 | This code is forked from Tomaž Šolc's fork of David Wilson's code. 214 | 215 | Tomaž Šolc's code originally at: 216 | 217 | https://www.tablix.org/~avian/git/publicsuffix.git 218 | 219 | Copyright (c) 2014 Tomaž Šolc 220 | 221 | David Wilson's code was originally at: 222 | 223 | http://code.google.com/p/python-public-suffix-list/ 224 | 225 | Copyright (c) 2009 David Wilson 226 | 227 | 228 | License 229 | ------- 230 | 231 | The code is MIT-licensed. 232 | The vendored public suffix list data from Mozilla is under the MPL-2.0. 233 | 234 | Copyright (c) 2015 nexB Inc. and others. 235 | 236 | Copyright (c) 2014 Tomaž Šolc 237 | 238 | Copyright (c) 2009 David Wilson 239 | 240 | Permission is hereby granted, free of charge, to any person obtaining a 241 | copy of this software and associated documentation files (the "Software"), 242 | to deal in the Software without restriction, including without limitation 243 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 244 | and/or sell copies of the Software, and to permit persons to whom the 245 | Software is furnished to do so, subject to the following conditions: 246 | 247 | The above copyright notice and this permission notice shall be included in 248 | all copies or substantial portions of the Software. 249 | 250 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 251 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 252 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 253 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 254 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 255 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 256 | DEALINGS IN THE SOFTWARE. 257 | -------------------------------------------------------------------------------- /publicsuffix2.LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) nexB Inc. and others. 2 | This code is based on Tomaž Šolc fork of David Wilson code originally at 3 | https://www.tablix.org/~avian/git/publicsuffix.git 4 | 5 | Copyright (c) 2014 Tomaž Šolc 6 | 7 | Python module included in this distribution is based on the code downloaded 8 | from http://code.google.com/p/python-public-suffix-list/, which is 9 | available under the following license: 10 | 11 | Copyright (c) 2009 David Wilson 12 | 13 | Permission is hereby granted, free of charge, to any person obtaining a 14 | copy of this software and associated documentation files (the "Software"), 15 | to deal in the Software without restriction, including without limitation 16 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 17 | and/or sell copies of the Software, and to permit persons to whom the 18 | Software is furnished to do so, subject to the following conditions: 19 | 20 | The above copyright notice and this permission notice shall be included in 21 | all copies or substantial portions of the Software. 22 | 23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 29 | DEALINGS IN THE SOFTWARE. 30 | 31 | 32 | The Public Suffix List vendored in this distribution has been downloaded 33 | from http://publicsuffix.org/public_suffix_list.dat 34 | This data file is licensed under the MPL-2.0 license. 35 | http://mozilla.org/MPL/2.0/ 36 | 37 | Mozilla Public License Version 2.0 38 | ================================== 39 | 40 | 1. Definitions 41 | -------------- 42 | 43 | 1.1. "Contributor" 44 | means each individual or legal entity that creates, contributes to 45 | the creation of, or owns Covered Software. 46 | 47 | 1.2. "Contributor Version" 48 | means the combination of the Contributions of others (if any) used 49 | by a Contributor and that particular Contributor's Contribution. 50 | 51 | 1.3. "Contribution" 52 | means Covered Software of a particular Contributor. 53 | 54 | 1.4. "Covered Software" 55 | means Source Code Form to which the initial Contributor has attached 56 | the notice in Exhibit A, the Executable Form of such Source Code 57 | Form, and Modifications of such Source Code Form, in each case 58 | including portions thereof. 59 | 60 | 1.5. "Incompatible With Secondary Licenses" 61 | means 62 | 63 | (a) that the initial Contributor has attached the notice described 64 | in Exhibit B to the Covered Software; or 65 | 66 | (b) that the Covered Software was made available under the terms of 67 | version 1.1 or earlier of the License, but not also under the 68 | terms of a Secondary License. 69 | 70 | 1.6. "Executable Form" 71 | means any form of the work other than Source Code Form. 72 | 73 | 1.7. "Larger Work" 74 | means a work that combines Covered Software with other material, in 75 | a separate file or files, that is not Covered Software. 76 | 77 | 1.8. "License" 78 | means this document. 79 | 80 | 1.9. "Licensable" 81 | means having the right to grant, to the maximum extent possible, 82 | whether at the time of the initial grant or subsequently, any and 83 | all of the rights conveyed by this License. 84 | 85 | 1.10. "Modifications" 86 | means any of the following: 87 | 88 | (a) any file in Source Code Form that results from an addition to, 89 | deletion from, or modification of the contents of Covered 90 | Software; or 91 | 92 | (b) any new file in Source Code Form that contains any Covered 93 | Software. 94 | 95 | 1.11. "Patent Claims" of a Contributor 96 | means any patent claim(s), including without limitation, method, 97 | process, and apparatus claims, in any patent Licensable by such 98 | Contributor that would be infringed, but for the grant of the 99 | License, by the making, using, selling, offering for sale, having 100 | made, import, or transfer of either its Contributions or its 101 | Contributor Version. 102 | 103 | 1.12. "Secondary License" 104 | means either the GNU General Public License, Version 2.0, the GNU 105 | Lesser General Public License, Version 2.1, the GNU Affero General 106 | Public License, Version 3.0, or any later versions of those 107 | licenses. 108 | 109 | 1.13. "Source Code Form" 110 | means the form of the work preferred for making modifications. 111 | 112 | 1.14. "You" (or "Your") 113 | means an individual or a legal entity exercising rights under this 114 | License. For legal entities, "You" includes any entity that 115 | controls, is controlled by, or is under common control with You. For 116 | purposes of this definition, "control" means (a) the power, direct 117 | or indirect, to cause the direction or management of such entity, 118 | whether by contract or otherwise, or (b) ownership of more than 119 | fifty percent (50%) of the outstanding shares or beneficial 120 | ownership of such entity. 121 | 122 | 2. License Grants and Conditions 123 | -------------------------------- 124 | 125 | 2.1. Grants 126 | 127 | Each Contributor hereby grants You a world-wide, royalty-free, 128 | non-exclusive license: 129 | 130 | (a) under intellectual property rights (other than patent or trademark) 131 | Licensable by such Contributor to use, reproduce, make available, 132 | modify, display, perform, distribute, and otherwise exploit its 133 | Contributions, either on an unmodified basis, with Modifications, or 134 | as part of a Larger Work; and 135 | 136 | (b) under Patent Claims of such Contributor to make, use, sell, offer 137 | for sale, have made, import, and otherwise transfer either its 138 | Contributions or its Contributor Version. 139 | 140 | 2.2. Effective Date 141 | 142 | The licenses granted in Section 2.1 with respect to any Contribution 143 | become effective for each Contribution on the date the Contributor first 144 | distributes such Contribution. 145 | 146 | 2.3. Limitations on Grant Scope 147 | 148 | The licenses granted in this Section 2 are the only rights granted under 149 | this License. No additional rights or licenses will be implied from the 150 | distribution or licensing of Covered Software under this License. 151 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 152 | Contributor: 153 | 154 | (a) for any code that a Contributor has removed from Covered Software; 155 | or 156 | 157 | (b) for infringements caused by: (i) Your and any other third party's 158 | modifications of Covered Software, or (ii) the combination of its 159 | Contributions with other software (except as part of its Contributor 160 | Version); or 161 | 162 | (c) under Patent Claims infringed by Covered Software in the absence of 163 | its Contributions. 164 | 165 | This License does not grant any rights in the trademarks, service marks, 166 | or logos of any Contributor (except as may be necessary to comply with 167 | the notice requirements in Section 3.4). 168 | 169 | 2.4. Subsequent Licenses 170 | 171 | No Contributor makes additional grants as a result of Your choice to 172 | distribute the Covered Software under a subsequent version of this 173 | License (see Section 10.2) or under the terms of a Secondary License (if 174 | permitted under the terms of Section 3.3). 175 | 176 | 2.5. Representation 177 | 178 | Each Contributor represents that the Contributor believes its 179 | Contributions are its original creation(s) or it has sufficient rights 180 | to grant the rights to its Contributions conveyed by this License. 181 | 182 | 2.6. Fair Use 183 | 184 | This License is not intended to limit any rights You have under 185 | applicable copyright doctrines of fair use, fair dealing, or other 186 | equivalents. 187 | 188 | 2.7. Conditions 189 | 190 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 191 | in Section 2.1. 192 | 193 | 3. Responsibilities 194 | ------------------- 195 | 196 | 3.1. Distribution of Source Form 197 | 198 | All distribution of Covered Software in Source Code Form, including any 199 | Modifications that You create or to which You contribute, must be under 200 | the terms of this License. You must inform recipients that the Source 201 | Code Form of the Covered Software is governed by the terms of this 202 | License, and how they can obtain a copy of this License. You may not 203 | attempt to alter or restrict the recipients' rights in the Source Code 204 | Form. 205 | 206 | 3.2. Distribution of Executable Form 207 | 208 | If You distribute Covered Software in Executable Form then: 209 | 210 | (a) such Covered Software must also be made available in Source Code 211 | Form, as described in Section 3.1, and You must inform recipients of 212 | the Executable Form how they can obtain a copy of such Source Code 213 | Form by reasonable means in a timely manner, at a charge no more 214 | than the cost of distribution to the recipient; and 215 | 216 | (b) You may distribute such Executable Form under the terms of this 217 | License, or sublicense it under different terms, provided that the 218 | license for the Executable Form does not attempt to limit or alter 219 | the recipients' rights in the Source Code Form under this License. 220 | 221 | 3.3. Distribution of a Larger Work 222 | 223 | You may create and distribute a Larger Work under terms of Your choice, 224 | provided that You also comply with the requirements of this License for 225 | the Covered Software. If the Larger Work is a combination of Covered 226 | Software with a work governed by one or more Secondary Licenses, and the 227 | Covered Software is not Incompatible With Secondary Licenses, this 228 | License permits You to additionally distribute such Covered Software 229 | under the terms of such Secondary License(s), so that the recipient of 230 | the Larger Work may, at their option, further distribute the Covered 231 | Software under the terms of either this License or such Secondary 232 | License(s). 233 | 234 | 3.4. Notices 235 | 236 | You may not remove or alter the substance of any license notices 237 | (including copyright notices, patent notices, disclaimers of warranty, 238 | or limitations of liability) contained within the Source Code Form of 239 | the Covered Software, except that You may alter any license notices to 240 | the extent required to remedy known factual inaccuracies. 241 | 242 | 3.5. Application of Additional Terms 243 | 244 | You may choose to offer, and to charge a fee for, warranty, support, 245 | indemnity or liability obligations to one or more recipients of Covered 246 | Software. However, You may do so only on Your own behalf, and not on 247 | behalf of any Contributor. You must make it absolutely clear that any 248 | such warranty, support, indemnity, or liability obligation is offered by 249 | You alone, and You hereby agree to indemnify every Contributor for any 250 | liability incurred by such Contributor as a result of warranty, support, 251 | indemnity or liability terms You offer. You may include additional 252 | disclaimers of warranty and limitations of liability specific to any 253 | jurisdiction. 254 | 255 | 4. Inability to Comply Due to Statute or Regulation 256 | --------------------------------------------------- 257 | 258 | If it is impossible for You to comply with any of the terms of this 259 | License with respect to some or all of the Covered Software due to 260 | statute, judicial order, or regulation then You must: (a) comply with 261 | the terms of this License to the maximum extent possible; and (b) 262 | describe the limitations and the code they affect. Such description must 263 | be placed in a text file included with all distributions of the Covered 264 | Software under this License. Except to the extent prohibited by statute 265 | or regulation, such description must be sufficiently detailed for a 266 | recipient of ordinary skill to be able to understand it. 267 | 268 | 5. Termination 269 | -------------- 270 | 271 | 5.1. The rights granted under this License will terminate automatically 272 | if You fail to comply with any of its terms. However, if You become 273 | compliant, then the rights granted under this License from a particular 274 | Contributor are reinstated (a) provisionally, unless and until such 275 | Contributor explicitly and finally terminates Your grants, and (b) on an 276 | ongoing basis, if such Contributor fails to notify You of the 277 | non-compliance by some reasonable means prior to 60 days after You have 278 | come back into compliance. Moreover, Your grants from a particular 279 | Contributor are reinstated on an ongoing basis if such Contributor 280 | notifies You of the non-compliance by some reasonable means, this is the 281 | first time You have received notice of non-compliance with this License 282 | from such Contributor, and You become compliant prior to 30 days after 283 | Your receipt of the notice. 284 | 285 | 5.2. If You initiate litigation against any entity by asserting a patent 286 | infringement claim (excluding declaratory judgment actions, 287 | counter-claims, and cross-claims) alleging that a Contributor Version 288 | directly or indirectly infringes any patent, then the rights granted to 289 | You by any and all Contributors for the Covered Software under Section 290 | 2.1 of this License shall terminate. 291 | 292 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 293 | end user license agreements (excluding distributors and resellers) which 294 | have been validly granted by You or Your distributors under this License 295 | prior to termination shall survive termination. 296 | 297 | ************************************************************************ 298 | * * 299 | * 6. Disclaimer of Warranty * 300 | * ------------------------- * 301 | * * 302 | * Covered Software is provided under this License on an "as is" * 303 | * basis, without warranty of any kind, either expressed, implied, or * 304 | * statutory, including, without limitation, warranties that the * 305 | * Covered Software is free of defects, merchantable, fit for a * 306 | * particular purpose or non-infringing. The entire risk as to the * 307 | * quality and performance of the Covered Software is with You. * 308 | * Should any Covered Software prove defective in any respect, You * 309 | * (not any Contributor) assume the cost of any necessary servicing, * 310 | * repair, or correction. This disclaimer of warranty constitutes an * 311 | * essential part of this License. No use of any Covered Software is * 312 | * authorized under this License except under this disclaimer. * 313 | * * 314 | ************************************************************************ 315 | 316 | ************************************************************************ 317 | * * 318 | * 7. Limitation of Liability * 319 | * -------------------------- * 320 | * * 321 | * Under no circumstances and under no legal theory, whether tort * 322 | * (including negligence), contract, or otherwise, shall any * 323 | * Contributor, or anyone who distributes Covered Software as * 324 | * permitted above, be liable to You for any direct, indirect, * 325 | * special, incidental, or consequential damages of any character * 326 | * including, without limitation, damages for lost profits, loss of * 327 | * goodwill, work stoppage, computer failure or malfunction, or any * 328 | * and all other commercial damages or losses, even if such party * 329 | * shall have been informed of the possibility of such damages. This * 330 | * limitation of liability shall not apply to liability for death or * 331 | * personal injury resulting from such party's negligence to the * 332 | * extent applicable law prohibits such limitation. Some * 333 | * jurisdictions do not allow the exclusion or limitation of * 334 | * incidental or consequential damages, so this exclusion and * 335 | * limitation may not apply to You. * 336 | * * 337 | ************************************************************************ 338 | 339 | 8. Litigation 340 | ------------- 341 | 342 | Any litigation relating to this License may be brought only in the 343 | courts of a jurisdiction where the defendant maintains its principal 344 | place of business and such litigation shall be governed by laws of that 345 | jurisdiction, without reference to its conflict-of-law provisions. 346 | Nothing in this Section shall prevent a party's ability to bring 347 | cross-claims or counter-claims. 348 | 349 | 9. Miscellaneous 350 | ---------------- 351 | 352 | This License represents the complete agreement concerning the subject 353 | matter hereof. If any provision of this License is held to be 354 | unenforceable, such provision shall be reformed only to the extent 355 | necessary to make it enforceable. Any law or regulation which provides 356 | that the language of a contract shall be construed against the drafter 357 | shall not be used to construe this License against a Contributor. 358 | 359 | 10. Versions of the License 360 | --------------------------- 361 | 362 | 10.1. New Versions 363 | 364 | Mozilla Foundation is the license steward. Except as provided in Section 365 | 10.3, no one other than the license steward has the right to modify or 366 | publish new versions of this License. Each version will be given a 367 | distinguishing version number. 368 | 369 | 10.2. Effect of New Versions 370 | 371 | You may distribute the Covered Software under the terms of the version 372 | of the License under which You originally received the Covered Software, 373 | or under the terms of any subsequent version published by the license 374 | steward. 375 | 376 | 10.3. Modified Versions 377 | 378 | If you create software not governed by this License, and you want to 379 | create a new license for such software, you may create and use a 380 | modified version of this License if you rename the license and remove 381 | any references to the name of the license steward (except to note that 382 | such modified license differs from this License). 383 | 384 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 385 | Licenses 386 | 387 | If You choose to distribute Source Code Form that is Incompatible With 388 | Secondary Licenses under the terms of this version of the License, the 389 | notice described in Exhibit B of this License must be attached. 390 | 391 | Exhibit A - Source Code Form License Notice 392 | ------------------------------------------- 393 | 394 | This Source Code Form is subject to the terms of the Mozilla Public 395 | License, v. 2.0. If a copy of the MPL was not distributed with this 396 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 397 | 398 | If it is not possible or desirable to put the notice in a particular 399 | file, then You may include the notice in a location (such as a LICENSE 400 | file in a relevant directory) where a recipient would be likely to look 401 | for such a notice. 402 | 403 | You may add additional accurate notices of copyright ownership. 404 | 405 | Exhibit B - "Incompatible With Secondary Licenses" Notice 406 | --------------------------------------------------------- 407 | 408 | This Source Code Form is "Incompatible With Secondary Licenses", as 409 | defined by the Mozilla Public License, v. 2.0. 410 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [metadata] 5 | license_files = 6 | publicsuffix2.LICENSE 7 | AUTHORS.rst 8 | CHANGELOG.rst 9 | README.rst 10 | 11 | [aliases] 12 | release = clean --all sdist bdist_wheel register upload 13 | 14 | [tool:pytest] 15 | norecursedirs = 16 | .git 17 | .tox 18 | .cache 19 | dist 20 | build 21 | thirdparty 22 | local 23 | ci 24 | docs 25 | etc 26 | Include 27 | include 28 | Lib 29 | lib 30 | bin 31 | Scripts 32 | tmp 33 | .eggs 34 | 35 | python_files = *.py 36 | python_classes = Test 37 | python_functions = test 38 | addopts = 39 | -rfEsxX 40 | --strict 41 | -s 42 | -vv 43 | --ignore setup.py 44 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | from __future__ import absolute_import 5 | from __future__ import print_function 6 | 7 | import codecs 8 | from glob import glob 9 | from os.path import basename 10 | from os.path import dirname 11 | from os.path import join 12 | from os.path import splitext 13 | 14 | from setuptools import Command 15 | from setuptools import find_packages 16 | from setuptools import setup 17 | 18 | 19 | def read(filename): 20 | """ 21 | Return the filename unicode text content stripping some reST lines and 22 | updating some defaults""" 23 | lines = [l for l in codecs.open(join(dirname(__file__), filename), encoding='utf8') 24 | if not l.strip().startswith(('.. ', ':',))] 25 | return u''.join(lines) 26 | 27 | 28 | class UpdatePslCommand(Command): 29 | """ 30 | A setuptools command to update the vendored public suffix list to the latest. 31 | """ 32 | user_options = [] 33 | def initialize_options(self): pass 34 | def finalize_options(self): pass 35 | 36 | def run(self): 37 | """ 38 | Update the vendored public suffix list to the latest list from 39 | publicsuffix.org saved in the src directory of this package. 40 | 41 | Also create an ABOUT file with download info including the download UTC 42 | date/time as the version (see http://aboutcode.org) 43 | """ 44 | 45 | from contextlib import closing 46 | from datetime import datetime 47 | import os 48 | import sys 49 | 50 | import requests 51 | 52 | PSL_URL = 'https://publicsuffix.org/list/public_suffix_list.dat' 53 | BASE_DIR = os.path.join(os.path.dirname(__file__), 'src', 'publicsuffix2') 54 | PSL_FILE = os.path.join(BASE_DIR, 'public_suffix_list.dat') 55 | ABOUT_PSL_FILE = os.path.join(BASE_DIR, 'public_suffix_list.ABOUT') 56 | 57 | ABOUT_TEMPLATE = ''' 58 | about_resource: public_suffix_list.dat 59 | name: Public Suffix List 60 | version: %(version)s 61 | download_url: %(PSL_URL)s 62 | home_url: https://publicsuffix.org/ 63 | 64 | owner: Mozilla 65 | copyright: Copyright (c) Mozilla and others 66 | license: mpl-2.0 67 | license_text_file: mpl-2.0.LICENSE 68 | ''' 69 | 70 | # current date and time as an ISO time stamp string 71 | version = datetime.isoformat(datetime.utcnow()).partition('.')[0] 72 | glocals = locals() 73 | print('Fetching latest list from: %(PSL_URL)s on: %(version)s' % glocals) 74 | fetched= requests.get(PSL_URL).content 75 | with open(PSL_FILE, 'wb') as pslout: 76 | pslout.write(fetched) 77 | mode = 'wb' if sys.version_info[0] == 2 else 'w' 78 | with open(ABOUT_PSL_FILE, mode) as about: 79 | about.write(ABOUT_TEMPLATE % glocals) 80 | print('Saved updated %(PSL_FILE)s and %(ABOUT_PSL_FILE)s' % glocals) 81 | 82 | 83 | setup( 84 | name='publicsuffix2', 85 | version='2.20200127', 86 | license='MIT and MPL-2.0', 87 | description='Get a public suffix for a domain name using the Public Suffix ' 88 | 'List. Forked from and using the same API as the publicsuffix package.', 89 | long_description_content_type = 'text/x-rst', 90 | long_description='%s\n%s' % (read('README.rst'), read('CHANGELOG.rst')), 91 | author='nexB Inc., Tomaz Solc, David Wilson and others.', 92 | author_email='info@nexb.com', 93 | url='https://github.com/nexb/python-publicsuffix2', 94 | packages=find_packages('src'), 95 | package_dir={'': 'src'}, 96 | py_modules=[splitext(basename(path))[0] for path in glob('src/*.py')], 97 | include_package_data=True, 98 | zip_safe=False, 99 | setup_requires = [ 100 | 'requests >= 2.7.0', 101 | ], 102 | classifiers=[ 103 | 'Intended Audience :: Developers', 104 | 'License :: OSI Approved :: MIT License', 105 | 'License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)', 106 | 'Programming Language :: Python', 107 | 'Programming Language :: Python :: 2', 108 | 'Programming Language :: Python :: 3', 109 | 'Topic :: Internet :: Name Service (DNS)', 110 | 'Topic :: Utilities', 111 | 'Development Status :: 5 - Production/Stable', 112 | ], 113 | keywords=[ 114 | 'domain', 'public suffix', 'suffix', 'dns', 'tld', 'sld', 'psl', 'idna', 115 | ], 116 | # run this to update the Public Suffix list data "python setup.py update_psl" 117 | cmdclass={'update_psl': UpdatePslCommand}, 118 | ) 119 | -------------------------------------------------------------------------------- /src/publicsuffix2/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright (c) 2019 nexB Inc. and Renée Burton 4 | # Copyright (c) 2015 nexB Inc. 5 | # This code is based on Tomaž Šolc's fork of David Wilson's code originally at 6 | # https://www.tablix.org/~avian/git/publicsuffix.git 7 | # 8 | # Copyright (c) 2014 Tomaž Šolc 9 | # 10 | # David Wilson's code was originally at: 11 | # from http://code.google.com/p/python-public-suffix-list/ 12 | # 13 | # Copyright (c) 2009 David Wilson 14 | # 15 | # Permission is hereby granted, free of charge, to any person obtaining a 16 | # copy of this software and associated documentation files (the "Software"), 17 | # to deal in the Software without restriction, including without limitation 18 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 19 | # and/or sell copies of the Software, and to permit persons to whom the 20 | # Software is furnished to do so, subject to the following conditions: 21 | # 22 | # The above copyright notice and this permission notice shall be included in 23 | # all copies or substantial portions of the Software. 24 | # 25 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 30 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 31 | # DEALINGS IN THE SOFTWARE. 32 | # 33 | # The Public Suffix List vendored in this distribution has been downloaded 34 | # from http://publicsuffix.org/public_suffix_list.dat 35 | # This data file is licensed under the MPL-2.0 license. 36 | # http://mozilla.org/MPL/2.0/ 37 | 38 | """ 39 | Public Suffix List module for Python. 40 | """ 41 | 42 | from __future__ import absolute_import 43 | from __future__ import unicode_literals 44 | 45 | import codecs 46 | from os import path 47 | import warnings 48 | 49 | try: 50 | from urllib.request import urlopen, Request 51 | except ImportError: 52 | from urllib2 import urlopen, Request 53 | 54 | 55 | PSL_URL = 'https://publicsuffix.org/list/public_suffix_list.dat' 56 | 57 | BASE_DIR = path.dirname(__file__) 58 | PSL_FILE = path.join(BASE_DIR, 'public_suffix_list.dat') 59 | ABOUT_PSL_FILE = path.join(BASE_DIR, 'public_suffix_list.ABOUT') 60 | 61 | 62 | 63 | class PublicSuffixList(object): 64 | 65 | def __init__(self, psl_file=None, idna=True): 66 | """ 67 | Read and parse a public suffix list. `psl_file` is either a file 68 | location string, or a file-like object, or an iterable of lines from a 69 | public suffix data file. 70 | 71 | If psl_file is None, the vendored file named "public_suffix_list.dat" is 72 | loaded. It is stored side by side with this Python package. 73 | 74 | The Mozilla public suffix list is no longer IDNA-encoded, it is UTF-8. 75 | For use cases with domains that are IDNA encoded, choose idna=True and 76 | the list will be converted upon loading. The wrong encoding will provide 77 | incorrect answers in either use case. 78 | 79 | The file format is described at http://publicsuffix.org/ 80 | 81 | :param psl_file: string or None 82 | :param idna: boolean, whether to convert file to IDNA-encoded strings 83 | """ 84 | # Note: we test for None as we accept empty lists as inputs 85 | if psl_file is None or isinstance(psl_file, str): 86 | with codecs.open(psl_file or PSL_FILE, 'r', encoding='utf8') as psl: 87 | psl = psl.readlines() 88 | else: 89 | # assume file-like 90 | psl = psl_file 91 | 92 | # a list of eTLDs with their modifiers, e.g., * 93 | self.tlds = [] 94 | root = self._build_structure(psl, idna) 95 | self.root = self._simplify(root) 96 | 97 | def _find_node(self, parent, parts): 98 | """ 99 | Processing each line of the public suffix list recursively to build the 100 | Trie. Each line is processed into a dictionary, which may contain sub- 101 | Trie, and nodes terminate in node of either 0 or 1 (negate). 102 | 103 | This method takes the current parent Trie, and searches it for the next 104 | part in the line (child). If not found, it adds a node to the Trie, 105 | creating a new branch with the [0]. If found, the existing sub-Trie is 106 | passed for the next part. 107 | 108 | :param parent: current Trie, form is Tuple (negate, dict of Trie) 109 | :param parts: list of strings 110 | :return: recursive search for remaining domain parts 111 | """ 112 | if not parts: 113 | return parent 114 | 115 | # this initiates the Trie from a new node as [negate, dict()] 116 | if len(parent) == 1: 117 | parent.append({}) 118 | 119 | assert len(parent) == 2 120 | _negate, children = parent 121 | 122 | child = parts.pop() 123 | 124 | # if child already exists as a node, grab the sub-Trie 125 | child_node = children.get(child, None) 126 | 127 | # if it doesn't exist, creates a new node and initialized with [0] 128 | if not child_node: 129 | children[child] = child_node = [0] 130 | 131 | return self._find_node(child_node, parts) 132 | 133 | def _add_rule(self, root, rule): 134 | """ 135 | Initial setup for a line of the public suffix list. If it starts with ! 136 | that is a negation operation. this calls the find_node() method 137 | recursively to build out the Trie for this rule. 138 | 139 | :param root: root Trie 140 | :param rule: string, line of public suffixlist 141 | :return: None 142 | """ 143 | if rule.startswith('!'): 144 | negate = 1 145 | rule = rule[1:] 146 | else: 147 | negate = 0 148 | 149 | parts = rule.split('.') 150 | self._find_node(root, parts)[0] = negate 151 | 152 | def _simplify(self, node): 153 | """ 154 | Condense the lines of the Trie in place. 155 | 156 | :param node: node in the Trie, either 0/1 or a subTrie 157 | :return: simplified Trie, form Tuple 158 | """ 159 | if len(node) == 1: 160 | return node[0] 161 | 162 | return (node[0], dict((k, self._simplify(v)) for (k, v) in node[1].items())) 163 | 164 | def _build_structure(self, fp, idna): 165 | """ 166 | Build a Trie from the public suffix list. If idna==True, idna-encode 167 | each line before building. 168 | 169 | The Trie is comprised of tuples that encode whether the line is a 170 | negation line (0 or 1), and terminate with 0. Each node is represented 171 | with two-tuple of the form (negate, dict of children / sub-Trie). A 172 | partial subTrie therefore looks like: (0, {'ac': 0, 'co': (0, 173 | {'blogspot': 0}), 'gv': 0,....}) where each tuple starts with the 174 | negation encoding, and each leaf in the Trie as a dictionary element 175 | returns 0. 176 | 177 | Also creates an instance attribute, tlds, which simply contains the 178 | publicsuffix list, with the modifiers such as wildcards, as a list. This 179 | can be accessed for post-processing by the application. 180 | 181 | :param fp: pointer for the public suffix list 182 | :param idna: boolean, convert lines to idna-encoded strings 183 | :return: Trie 184 | """ 185 | root = [0] 186 | 187 | tlds = self.tlds 188 | 189 | for line in fp: 190 | line = line.strip() 191 | if not line or line.startswith('//'): 192 | continue 193 | if idna: 194 | line = line.encode('idna').decode() 195 | tlds.append(line) 196 | 197 | self._add_rule(root, line.split()[0].lstrip('.')) 198 | 199 | return root 200 | 201 | def _lookup_node(self, matches, depth, parent, parts, wildcard): 202 | """ 203 | Traverses the Trie recursively to find the parts. By default, the 204 | traverse follows wildcards, as appropriate for the public suffix list, 205 | but if wildcard is set to False, it will stop at wildcard leaves. This 206 | can be useful for summarizing complex wildcard domains like those under 207 | amazonaws.com. 208 | 209 | The lookup is tracked via a list, initially set to all None, that marks 210 | the negation flags of nodes it matches. each match will be marked for 211 | later composition of the eTLD. 212 | 213 | :param matches: list, parts long, None (initial), 0, or 1 214 | :param depth: int, how far in the Trie this run is 215 | :param parent: Tuple, the current subTrie 216 | :param parts: list of domain parts, strings 217 | :param wildcard: boolean, whether to process wildcard nodes 218 | :return: None, recursive call 219 | """ 220 | if wildcard and depth == 1: 221 | # if no rules match, the prevailing rule is "*" 222 | # See: Algorithm 2 at https://publicsuffix.org/list/ 223 | matches[-depth] = 0 224 | 225 | if parent in (0, 1): 226 | return 227 | 228 | children = parent[1] 229 | 230 | if depth <= len(parts) and children: 231 | for name in ('*', parts[-depth]): 232 | child = children.get(name, None) 233 | if child is not None: 234 | if wildcard or name != '*': 235 | if child in (0, 1): 236 | negate = child 237 | else: 238 | negate = child[0] 239 | matches[-depth] = negate 240 | self._lookup_node(matches, depth + 1, child, parts, wildcard) 241 | 242 | def get_sld(self, domain, wildcard=True, strict=False): 243 | """ 244 | Return the second-level-domain (SLD) or private suffix of a given domain 245 | according to the public suffix list. The public suffix list includes 246 | wildcards, so if wildcard is set to True, this will follow the wildcard 247 | on traversal, otherwise it will stop at wildcard nodes. 248 | 249 | The logic does not check by default whether the TLD is in the Trie, so 250 | for example, 'www.this.local' will return 'this.local'. If you want to 251 | ensure the TLD is in the public suffix list, use strict=True. 252 | 253 | If domain is already an eTLD, it returns domain as-is instead of None 254 | value. 255 | 256 | :param domain: string, needs to match the encoding of the PSL (idna or UTF8) 257 | :param wildcard: boolean, follow wildcard patterns 258 | :param strict: boolean, check the TLD is valid, return None if not 259 | :return: string, the SLD for the domain 260 | """ 261 | if not domain: 262 | return None 263 | 264 | # for compatibility, set strict True not to allow invalid TLDs 265 | tld = self.get_tld(domain, wildcard, True) 266 | if strict and tld is None: 267 | return None 268 | 269 | parts = domain.lower().strip('.').split('.') 270 | num_of_tld_parts = 0 if tld is None else tld.count('.') + 1 271 | 272 | if len(parts) <= num_of_tld_parts: 273 | return tld 274 | else: 275 | return '.'.join(parts[-(num_of_tld_parts + 1):]) 276 | 277 | def get_public_suffix(self, domain, wildcard=True, strict=False): 278 | """ 279 | Use get_sld() instead. 280 | """ 281 | return self.get_sld(domain, wildcard, strict) 282 | 283 | def get_tld(self, domain, wildcard=True, strict=False): 284 | """ 285 | Return the TLD, or public suffix, of a domain using the public suffix 286 | list. uses wildcards if set, and checks for valid top TLD is 287 | strict=True. 288 | 289 | This will return the domain itself when it is an ICANN TLD, e.g., 'com' 290 | returns 'com', for follow on processing, while 'co.uk' return 'uk'. On 291 | the other hand, more complicated domains will return their public 292 | suffix, e.g., 293 | 'google.co.uk' will return 'co.uk'. Root ('.') will return empty string. 294 | 295 | :param domain: string 296 | :param wildcard: boolean, follow wildcards in Trie 297 | :param strict: boolean, check that top TLD is valid in Trie 298 | :return: string, the TLD for the domain 299 | """ 300 | if not domain: 301 | return None 302 | parts = domain.lower().strip('.').split('.') 303 | hits = [None] * len(parts) 304 | if strict and ( 305 | self.root in (0, 1) or parts[-1] not in self.root[1].keys() 306 | ): 307 | return None 308 | 309 | self._lookup_node(hits, 1, self.root, parts, wildcard) 310 | 311 | for i, what in enumerate(hits): 312 | if what is not None and what == 0: 313 | return '.'.join(parts[i:]) 314 | 315 | 316 | _PSL = None 317 | 318 | 319 | def get_sld(domain, psl_file=None, wildcard=True, idna=True, strict=False): 320 | """ 321 | Return the private suffix or SLD for a `domain` DNS name string. The 322 | original publicsuffix2 library used the method get_public_suffix() for this 323 | purpose, but get_private_suffix() is more proper. Convenience function that 324 | builds and caches a PublicSuffixList object. 325 | 326 | Optionally read, and parse a public suffix list. `psl_file` is either a file 327 | location string, or a file-like object, or an iterable of lines from a 328 | public suffix data file. 329 | 330 | If psl_file is None, the vendored file named "public_suffix_list.dat" is 331 | loaded. It is stored side by side with this Python package. 332 | 333 | The file format is described at http://publicsuffix.org/ 334 | """ 335 | global _PSL 336 | _PSL = _PSL or PublicSuffixList(psl_file, idna=idna) 337 | return _PSL.get_sld(domain, wildcard=wildcard, strict=strict) 338 | 339 | 340 | def get_tld(domain, psl_file=None, wildcard=True, idna=True, strict=False): 341 | """ 342 | Return the TLD or public suffix for a `domain` DNS name string. (this is 343 | actually the private suffix that is returned) Convenience function that 344 | builds and caches a PublicSuffixList object. 345 | 346 | Optionally read, and parse a public suffix list. `psl_file` is either a file 347 | location string, or a file-like object, or an iterable of lines from a 348 | public suffix data file. 349 | 350 | If psl_file is None, the vendored file named "public_suffix_list.dat" is 351 | loaded. It is stored side by side with this Python package. 352 | 353 | The file format is described at http://publicsuffix.org/ 354 | """ 355 | global _PSL 356 | _PSL = _PSL or PublicSuffixList(psl_file, idna=idna) 357 | return _PSL.get_tld(domain, wildcard=wildcard, strict=strict) 358 | 359 | 360 | def get_public_suffix(domain, psl_file=None, wildcard=True, idna=True, strict=False): 361 | """ 362 | Included for compatibility with the original publicsuffix2 library -- this 363 | function returns the private suffix or SLD of the domain. To get the public 364 | suffix, use get_tld(). Convenience function that builds and caches a 365 | PublicSuffixList object. 366 | 367 | Optionally read, and parse a public suffix list. `psl_file` is either a file 368 | location string, or a file-like object, or an iterable of lines from a 369 | public suffix data file. 370 | 371 | If psl_file is None, the vendored file named "public_suffix_list.dat" is 372 | loaded. It is stored side by side with this Python package. 373 | 374 | The file format is described at http://publicsuffix.org/ 375 | """ 376 | warnings.warn( 377 | 'This function returns the private suffix, SLD, or registrable domain. ' 378 | 'This equivalent to function get_sld(). ' 379 | 'To get the public suffix itself, use get_tld().', 380 | UserWarning 381 | ) 382 | return get_sld(domain, psl_file, wildcard, idna, strict) 383 | 384 | 385 | def fetch(): 386 | """ 387 | Return a file-like object for the latest public suffix list downloaded from 388 | publicsuffix.org 389 | """ 390 | req = Request(PSL_URL, headers={'User-Agent': 'python-publicsuffix2'}) 391 | res = urlopen(req) 392 | try: 393 | encoding = res.headers.get_content_charset() 394 | except AttributeError: 395 | encoding = res.headers.getparam('charset') 396 | f = codecs.getreader(encoding)(res) 397 | return f 398 | -------------------------------------------------------------------------------- /src/publicsuffix2/mpl-2.0.LICENSE: -------------------------------------------------------------------------------- 1 | The Public Suffix List vendored in this distribution has been downloaded 2 | from http://publicsuffix.org/public_suffix_list.dat 3 | This data file is licensed under the MPL-2.0 license. 4 | http://mozilla.org/MPL/2.0/ 5 | 6 | Mozilla Public License Version 2.0 7 | ================================== 8 | 9 | 1. Definitions 10 | -------------- 11 | 12 | 1.1. "Contributor" 13 | means each individual or legal entity that creates, contributes to 14 | the creation of, or owns Covered Software. 15 | 16 | 1.2. "Contributor Version" 17 | means the combination of the Contributions of others (if any) used 18 | by a Contributor and that particular Contributor's Contribution. 19 | 20 | 1.3. "Contribution" 21 | means Covered Software of a particular Contributor. 22 | 23 | 1.4. "Covered Software" 24 | means Source Code Form to which the initial Contributor has attached 25 | the notice in Exhibit A, the Executable Form of such Source Code 26 | Form, and Modifications of such Source Code Form, in each case 27 | including portions thereof. 28 | 29 | 1.5. "Incompatible With Secondary Licenses" 30 | means 31 | 32 | (a) that the initial Contributor has attached the notice described 33 | in Exhibit B to the Covered Software; or 34 | 35 | (b) that the Covered Software was made available under the terms of 36 | version 1.1 or earlier of the License, but not also under the 37 | terms of a Secondary License. 38 | 39 | 1.6. "Executable Form" 40 | means any form of the work other than Source Code Form. 41 | 42 | 1.7. "Larger Work" 43 | means a work that combines Covered Software with other material, in 44 | a separate file or files, that is not Covered Software. 45 | 46 | 1.8. "License" 47 | means this document. 48 | 49 | 1.9. "Licensable" 50 | means having the right to grant, to the maximum extent possible, 51 | whether at the time of the initial grant or subsequently, any and 52 | all of the rights conveyed by this License. 53 | 54 | 1.10. "Modifications" 55 | means any of the following: 56 | 57 | (a) any file in Source Code Form that results from an addition to, 58 | deletion from, or modification of the contents of Covered 59 | Software; or 60 | 61 | (b) any new file in Source Code Form that contains any Covered 62 | Software. 63 | 64 | 1.11. "Patent Claims" of a Contributor 65 | means any patent claim(s), including without limitation, method, 66 | process, and apparatus claims, in any patent Licensable by such 67 | Contributor that would be infringed, but for the grant of the 68 | License, by the making, using, selling, offering for sale, having 69 | made, import, or transfer of either its Contributions or its 70 | Contributor Version. 71 | 72 | 1.12. "Secondary License" 73 | means either the GNU General Public License, Version 2.0, the GNU 74 | Lesser General Public License, Version 2.1, the GNU Affero General 75 | Public License, Version 3.0, or any later versions of those 76 | licenses. 77 | 78 | 1.13. "Source Code Form" 79 | means the form of the work preferred for making modifications. 80 | 81 | 1.14. "You" (or "Your") 82 | means an individual or a legal entity exercising rights under this 83 | License. For legal entities, "You" includes any entity that 84 | controls, is controlled by, or is under common control with You. For 85 | purposes of this definition, "control" means (a) the power, direct 86 | or indirect, to cause the direction or management of such entity, 87 | whether by contract or otherwise, or (b) ownership of more than 88 | fifty percent (50%) of the outstanding shares or beneficial 89 | ownership of such entity. 90 | 91 | 2. License Grants and Conditions 92 | -------------------------------- 93 | 94 | 2.1. Grants 95 | 96 | Each Contributor hereby grants You a world-wide, royalty-free, 97 | non-exclusive license: 98 | 99 | (a) under intellectual property rights (other than patent or trademark) 100 | Licensable by such Contributor to use, reproduce, make available, 101 | modify, display, perform, distribute, and otherwise exploit its 102 | Contributions, either on an unmodified basis, with Modifications, or 103 | as part of a Larger Work; and 104 | 105 | (b) under Patent Claims of such Contributor to make, use, sell, offer 106 | for sale, have made, import, and otherwise transfer either its 107 | Contributions or its Contributor Version. 108 | 109 | 2.2. Effective Date 110 | 111 | The licenses granted in Section 2.1 with respect to any Contribution 112 | become effective for each Contribution on the date the Contributor first 113 | distributes such Contribution. 114 | 115 | 2.3. Limitations on Grant Scope 116 | 117 | The licenses granted in this Section 2 are the only rights granted under 118 | this License. No additional rights or licenses will be implied from the 119 | distribution or licensing of Covered Software under this License. 120 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 121 | Contributor: 122 | 123 | (a) for any code that a Contributor has removed from Covered Software; 124 | or 125 | 126 | (b) for infringements caused by: (i) Your and any other third party's 127 | modifications of Covered Software, or (ii) the combination of its 128 | Contributions with other software (except as part of its Contributor 129 | Version); or 130 | 131 | (c) under Patent Claims infringed by Covered Software in the absence of 132 | its Contributions. 133 | 134 | This License does not grant any rights in the trademarks, service marks, 135 | or logos of any Contributor (except as may be necessary to comply with 136 | the notice requirements in Section 3.4). 137 | 138 | 2.4. Subsequent Licenses 139 | 140 | No Contributor makes additional grants as a result of Your choice to 141 | distribute the Covered Software under a subsequent version of this 142 | License (see Section 10.2) or under the terms of a Secondary License (if 143 | permitted under the terms of Section 3.3). 144 | 145 | 2.5. Representation 146 | 147 | Each Contributor represents that the Contributor believes its 148 | Contributions are its original creation(s) or it has sufficient rights 149 | to grant the rights to its Contributions conveyed by this License. 150 | 151 | 2.6. Fair Use 152 | 153 | This License is not intended to limit any rights You have under 154 | applicable copyright doctrines of fair use, fair dealing, or other 155 | equivalents. 156 | 157 | 2.7. Conditions 158 | 159 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 160 | in Section 2.1. 161 | 162 | 3. Responsibilities 163 | ------------------- 164 | 165 | 3.1. Distribution of Source Form 166 | 167 | All distribution of Covered Software in Source Code Form, including any 168 | Modifications that You create or to which You contribute, must be under 169 | the terms of this License. You must inform recipients that the Source 170 | Code Form of the Covered Software is governed by the terms of this 171 | License, and how they can obtain a copy of this License. You may not 172 | attempt to alter or restrict the recipients' rights in the Source Code 173 | Form. 174 | 175 | 3.2. Distribution of Executable Form 176 | 177 | If You distribute Covered Software in Executable Form then: 178 | 179 | (a) such Covered Software must also be made available in Source Code 180 | Form, as described in Section 3.1, and You must inform recipients of 181 | the Executable Form how they can obtain a copy of such Source Code 182 | Form by reasonable means in a timely manner, at a charge no more 183 | than the cost of distribution to the recipient; and 184 | 185 | (b) You may distribute such Executable Form under the terms of this 186 | License, or sublicense it under different terms, provided that the 187 | license for the Executable Form does not attempt to limit or alter 188 | the recipients' rights in the Source Code Form under this License. 189 | 190 | 3.3. Distribution of a Larger Work 191 | 192 | You may create and distribute a Larger Work under terms of Your choice, 193 | provided that You also comply with the requirements of this License for 194 | the Covered Software. If the Larger Work is a combination of Covered 195 | Software with a work governed by one or more Secondary Licenses, and the 196 | Covered Software is not Incompatible With Secondary Licenses, this 197 | License permits You to additionally distribute such Covered Software 198 | under the terms of such Secondary License(s), so that the recipient of 199 | the Larger Work may, at their option, further distribute the Covered 200 | Software under the terms of either this License or such Secondary 201 | License(s). 202 | 203 | 3.4. Notices 204 | 205 | You may not remove or alter the substance of any license notices 206 | (including copyright notices, patent notices, disclaimers of warranty, 207 | or limitations of liability) contained within the Source Code Form of 208 | the Covered Software, except that You may alter any license notices to 209 | the extent required to remedy known factual inaccuracies. 210 | 211 | 3.5. Application of Additional Terms 212 | 213 | You may choose to offer, and to charge a fee for, warranty, support, 214 | indemnity or liability obligations to one or more recipients of Covered 215 | Software. However, You may do so only on Your own behalf, and not on 216 | behalf of any Contributor. You must make it absolutely clear that any 217 | such warranty, support, indemnity, or liability obligation is offered by 218 | You alone, and You hereby agree to indemnify every Contributor for any 219 | liability incurred by such Contributor as a result of warranty, support, 220 | indemnity or liability terms You offer. You may include additional 221 | disclaimers of warranty and limitations of liability specific to any 222 | jurisdiction. 223 | 224 | 4. Inability to Comply Due to Statute or Regulation 225 | --------------------------------------------------- 226 | 227 | If it is impossible for You to comply with any of the terms of this 228 | License with respect to some or all of the Covered Software due to 229 | statute, judicial order, or regulation then You must: (a) comply with 230 | the terms of this License to the maximum extent possible; and (b) 231 | describe the limitations and the code they affect. Such description must 232 | be placed in a text file included with all distributions of the Covered 233 | Software under this License. Except to the extent prohibited by statute 234 | or regulation, such description must be sufficiently detailed for a 235 | recipient of ordinary skill to be able to understand it. 236 | 237 | 5. Termination 238 | -------------- 239 | 240 | 5.1. The rights granted under this License will terminate automatically 241 | if You fail to comply with any of its terms. However, if You become 242 | compliant, then the rights granted under this License from a particular 243 | Contributor are reinstated (a) provisionally, unless and until such 244 | Contributor explicitly and finally terminates Your grants, and (b) on an 245 | ongoing basis, if such Contributor fails to notify You of the 246 | non-compliance by some reasonable means prior to 60 days after You have 247 | come back into compliance. Moreover, Your grants from a particular 248 | Contributor are reinstated on an ongoing basis if such Contributor 249 | notifies You of the non-compliance by some reasonable means, this is the 250 | first time You have received notice of non-compliance with this License 251 | from such Contributor, and You become compliant prior to 30 days after 252 | Your receipt of the notice. 253 | 254 | 5.2. If You initiate litigation against any entity by asserting a patent 255 | infringement claim (excluding declaratory judgment actions, 256 | counter-claims, and cross-claims) alleging that a Contributor Version 257 | directly or indirectly infringes any patent, then the rights granted to 258 | You by any and all Contributors for the Covered Software under Section 259 | 2.1 of this License shall terminate. 260 | 261 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 262 | end user license agreements (excluding distributors and resellers) which 263 | have been validly granted by You or Your distributors under this License 264 | prior to termination shall survive termination. 265 | 266 | ************************************************************************ 267 | * * 268 | * 6. Disclaimer of Warranty * 269 | * ------------------------- * 270 | * * 271 | * Covered Software is provided under this License on an "as is" * 272 | * basis, without warranty of any kind, either expressed, implied, or * 273 | * statutory, including, without limitation, warranties that the * 274 | * Covered Software is free of defects, merchantable, fit for a * 275 | * particular purpose or non-infringing. The entire risk as to the * 276 | * quality and performance of the Covered Software is with You. * 277 | * Should any Covered Software prove defective in any respect, You * 278 | * (not any Contributor) assume the cost of any necessary servicing, * 279 | * repair, or correction. This disclaimer of warranty constitutes an * 280 | * essential part of this License. No use of any Covered Software is * 281 | * authorized under this License except under this disclaimer. * 282 | * * 283 | ************************************************************************ 284 | 285 | ************************************************************************ 286 | * * 287 | * 7. Limitation of Liability * 288 | * -------------------------- * 289 | * * 290 | * Under no circumstances and under no legal theory, whether tort * 291 | * (including negligence), contract, or otherwise, shall any * 292 | * Contributor, or anyone who distributes Covered Software as * 293 | * permitted above, be liable to You for any direct, indirect, * 294 | * special, incidental, or consequential damages of any character * 295 | * including, without limitation, damages for lost profits, loss of * 296 | * goodwill, work stoppage, computer failure or malfunction, or any * 297 | * and all other commercial damages or losses, even if such party * 298 | * shall have been informed of the possibility of such damages. This * 299 | * limitation of liability shall not apply to liability for death or * 300 | * personal injury resulting from such party's negligence to the * 301 | * extent applicable law prohibits such limitation. Some * 302 | * jurisdictions do not allow the exclusion or limitation of * 303 | * incidental or consequential damages, so this exclusion and * 304 | * limitation may not apply to You. * 305 | * * 306 | ************************************************************************ 307 | 308 | 8. Litigation 309 | ------------- 310 | 311 | Any litigation relating to this License may be brought only in the 312 | courts of a jurisdiction where the defendant maintains its principal 313 | place of business and such litigation shall be governed by laws of that 314 | jurisdiction, without reference to its conflict-of-law provisions. 315 | Nothing in this Section shall prevent a party's ability to bring 316 | cross-claims or counter-claims. 317 | 318 | 9. Miscellaneous 319 | ---------------- 320 | 321 | This License represents the complete agreement concerning the subject 322 | matter hereof. If any provision of this License is held to be 323 | unenforceable, such provision shall be reformed only to the extent 324 | necessary to make it enforceable. Any law or regulation which provides 325 | that the language of a contract shall be construed against the drafter 326 | shall not be used to construe this License against a Contributor. 327 | 328 | 10. Versions of the License 329 | --------------------------- 330 | 331 | 10.1. New Versions 332 | 333 | Mozilla Foundation is the license steward. Except as provided in Section 334 | 10.3, no one other than the license steward has the right to modify or 335 | publish new versions of this License. Each version will be given a 336 | distinguishing version number. 337 | 338 | 10.2. Effect of New Versions 339 | 340 | You may distribute the Covered Software under the terms of the version 341 | of the License under which You originally received the Covered Software, 342 | or under the terms of any subsequent version published by the license 343 | steward. 344 | 345 | 10.3. Modified Versions 346 | 347 | If you create software not governed by this License, and you want to 348 | create a new license for such software, you may create and use a 349 | modified version of this License if you rename the license and remove 350 | any references to the name of the license steward (except to note that 351 | such modified license differs from this License). 352 | 353 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 354 | Licenses 355 | 356 | If You choose to distribute Source Code Form that is Incompatible With 357 | Secondary Licenses under the terms of this version of the License, the 358 | notice described in Exhibit B of this License must be attached. 359 | 360 | Exhibit A - Source Code Form License Notice 361 | ------------------------------------------- 362 | 363 | This Source Code Form is subject to the terms of the Mozilla Public 364 | License, v. 2.0. If a copy of the MPL was not distributed with this 365 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 366 | 367 | If it is not possible or desirable to put the notice in a particular 368 | file, then You may include the notice in a location (such as a LICENSE 369 | file in a relevant directory) where a recipient would be likely to look 370 | for such a notice. 371 | 372 | You may add additional accurate notices of copyright ownership. 373 | 374 | Exhibit B - "Incompatible With Secondary Licenses" Notice 375 | --------------------------------------------------------- 376 | 377 | This Source Code Form is "Incompatible With Secondary Licenses", as 378 | defined by the Mozilla Public License, v. 2.0. 379 | -------------------------------------------------------------------------------- /src/publicsuffix2/public_suffix_list.ABOUT: -------------------------------------------------------------------------------- 1 | 2 | about_resource: public_suffix_list.dat 3 | name: Public Suffix List 4 | version: 2020-01-27T10:24:28 5 | download_url: https://publicsuffix.org/list/public_suffix_list.dat 6 | home_url: https://publicsuffix.org/ 7 | 8 | owner: Mozilla 9 | copyright: Copyright (c) Mozilla and others 10 | license: mpl-2.0 11 | license_text_file: mpl-2.0.LICENSE 12 | -------------------------------------------------------------------------------- /tests.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) nexB Inc. and others. 3 | # This code is based on Tomaž Šolc's fork of David Wilson's code originally at 4 | # https://www.tablix.org/~avian/git/publicsuffix.git 5 | # 6 | # Copyright (c) 2014 Tomaž Šolc 7 | # 8 | # David Wilson's code was originally at: 9 | # from http://code.google.com/p/python-public-suffix-list/ 10 | # 11 | # Copyright (c) 2009 David Wilson 12 | # 13 | # Permission is hereby granted, free of charge, to any person obtaining a 14 | # copy of this software and associated documentation files (the "Software"), 15 | # to deal in the Software without restriction, including without limitation 16 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 17 | # and/or sell copies of the Software, and to permit persons to whom the 18 | # Software is furnished to do so, subject to the following conditions: 19 | # 20 | # The above copyright notice and this permission notice shall be included in 21 | # all copies or substantial portions of the Software. 22 | # 23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 29 | # DEALINGS IN THE SOFTWARE. 30 | # 31 | # The Public Suffix List vendored in this distribution has been downloaded 32 | # from http://publicsuffix.org/public_suffix_list.dat 33 | # This data file is licensed under the MPL-2.0 license. 34 | # http://mozilla.org/MPL/2.0/ 35 | 36 | 37 | from __future__ import absolute_import 38 | from __future__ import print_function 39 | from __future__ import unicode_literals 40 | 41 | import unittest 42 | 43 | import publicsuffix2 as publicsuffix 44 | 45 | 46 | class TestPublicSuffix(unittest.TestCase): 47 | 48 | def test_get_sld_from_empty_list(self): 49 | psl = publicsuffix.PublicSuffixList([]) 50 | assert 'com' == psl.get_sld('com') 51 | assert 'com' == psl.get_sld('COM') 52 | assert 'com' == psl.get_sld('.com') 53 | assert 'com' == psl.get_sld('a.example.com') 54 | 55 | def test_get_sld_from_empty_list_in_strict_mode(self): 56 | psl = publicsuffix.PublicSuffixList([]) 57 | assert None == psl.get_sld('com', strict=True) 58 | 59 | def test_get_sld_from_list(self): 60 | psl = publicsuffix.PublicSuffixList(['com']) 61 | assert 'example.com' == psl.get_sld('a.example.com') 62 | assert 'example.com' == psl.get_sld('a.a.example.com') 63 | assert 'example.com' == psl.get_sld('a.a.a.example.com') 64 | assert 'example.com' == psl.get_sld('A.example.com') 65 | assert 'example.com' == psl.get_sld('.a.a.example.com') 66 | 67 | def test_get_sld_from_list_with_exception_rule(self): 68 | psl = publicsuffix.PublicSuffixList(['*.example.com', '!b.example.com']) 69 | assert 'a.example.com' == psl.get_sld('a.example.com') 70 | assert 'a.a.example.com' == psl.get_sld('a.a.example.com') 71 | assert 'a.a.example.com' == psl.get_sld('a.a.a.example.com') 72 | assert 'a.a.example.com' == psl.get_sld('a.a.a.a.example.com') 73 | 74 | assert 'b.example.com' == psl.get_sld('b.example.com') 75 | assert 'b.example.com' == psl.get_sld('b.b.example.com') 76 | assert 'b.example.com' == psl.get_sld('b.b.b.example.com') 77 | assert 'b.example.com' == psl.get_sld('b.b.b.b.example.com') 78 | 79 | def test_get_sld_from_list_with_fqdn(self): 80 | psl = publicsuffix.PublicSuffixList(['com']) 81 | assert 'example.com' == psl.get_sld('example.com.') 82 | 83 | def test_get_sld_from_list_with_unicode(self): 84 | psl = publicsuffix.PublicSuffixList([u'\u0440\u0444'], idna=False) 85 | assert u'\u0440\u0444' == psl.get_sld(u'\u0440\u0444') 86 | assert u'example.\u0440\u0444' == psl.get_sld(u'example.\u0440\u0444') 87 | assert u'example.\u0440\u0444' == psl.get_sld(u'a.example.\u0440\u0444') 88 | assert u'example.\u0440\u0444' == psl.get_sld(u'a.a.example.\u0440\u0444') 89 | 90 | def test_get_public_suffix_from_builtin_full_publicsuffix_org_using_top_level_function(self): 91 | assert 'com' == publicsuffix.get_public_suffix('COM') 92 | assert 'example.com' == publicsuffix.get_public_suffix('example.COM') 93 | assert 'example.com' == publicsuffix.get_public_suffix('WwW.example.COM') 94 | 95 | 96 | class TestPublicSuffixUsingTheCurrentVendoredPSL(unittest.TestCase): 97 | 98 | def test_get_sld_from_builtin_full_publicsuffix_org_list_with_mixed_case(self): 99 | psl = publicsuffix.PublicSuffixList(None) 100 | assert 'com' == psl.get_sld('COM') 101 | assert 'example.com' == psl.get_sld('example.COM') 102 | assert 'example.com' == psl.get_sld('WwW.example.COM') 103 | 104 | def test_get_sld_from_builtin_full_publicsuffix_org_list_with_leading_dot(self): 105 | psl = publicsuffix.PublicSuffixList(None) 106 | assert 'com' == psl.get_sld('.com') 107 | assert 'example' == psl.get_sld('.example') 108 | assert 'example.com' == psl.get_sld('.example.com') 109 | assert 'example' == psl.get_sld('.example.example') 110 | 111 | def test_get_sld_from_builtin_full_publicsuffix_org_list_with_unlisted_tld(self): 112 | psl = publicsuffix.PublicSuffixList(None) 113 | assert 'example' == psl.get_sld('example') 114 | assert 'example' == psl.get_sld('example.example') 115 | assert 'example' == psl.get_sld('b.example.example') 116 | assert 'example' == psl.get_sld('a.b.example.example') 117 | 118 | def test_get_sld_from_builtin_full_publicsuffix_org_list_with_listed_ut_non_internet_tld(self): 119 | psl = publicsuffix.PublicSuffixList(None) 120 | assert 'local' == psl.get_sld('local') 121 | assert 'local' == psl.get_sld('example.local') 122 | assert 'local' == psl.get_sld('b.example.local') 123 | assert 'local' == psl.get_sld('a.b.example.local') 124 | 125 | def test_get_sld_from_builtin_full_publicsuffix_org_list_with_one_rule(self): 126 | psl = publicsuffix.PublicSuffixList(None) 127 | assert 'biz' == psl.get_sld('biz') 128 | assert 'domain.biz' == psl.get_sld('domain.biz') 129 | assert 'domain.biz' == psl.get_sld('b.domain.biz') 130 | assert 'domain.biz' == psl.get_sld('a.b.domain.biz') 131 | 132 | def test_get_sld_from_builtin_full_publicsuffix_org_list_with_two_level_rules(self): 133 | psl = publicsuffix.PublicSuffixList(None) 134 | assert 'com' == psl.get_sld('com') 135 | assert 'example.com' == psl.get_sld('example.com') 136 | assert 'example.com' == psl.get_sld('b.example.com') 137 | assert 'example.com' == psl.get_sld('a.b.example.com') 138 | 139 | def test_get_sld_from_builtin_full_publicsuffix_org_list_with_two_level_uk_rules(self): 140 | psl = publicsuffix.PublicSuffixList(None) 141 | assert 'uk.com' == psl.get_sld('uk.com') 142 | assert 'example.uk.com' == psl.get_sld('example.uk.com') 143 | assert 'example.uk.com' == psl.get_sld('b.example.uk.com') 144 | assert 'example.uk.com' == psl.get_sld('a.b.example.uk.com') 145 | assert 'test.ac' == psl.get_sld('test.ac') 146 | 147 | def test_get_sld_from_builtin_full_publicsuffix_org_list_with_wildcard_rule(self): 148 | psl = publicsuffix.PublicSuffixList(None) 149 | assert 'er' == psl.get_sld('er') 150 | assert 'c.er' == psl.get_sld('c.er') 151 | assert 'b.c.er' == psl.get_sld('b.c.er') 152 | assert 'b.c.er' == psl.get_sld('a.b.c.er') 153 | 154 | def test_get_sld_from_builtin_full_publicsuffix_org_list_with_japanese_domain(self): 155 | psl = publicsuffix.PublicSuffixList(None) 156 | assert 'jp' == psl.get_sld('jp') 157 | assert 'test.jp' == psl.get_sld('test.jp') 158 | assert 'test.jp' == psl.get_sld('www.test.jp') 159 | assert 'ac.jp' == psl.get_sld('ac.jp') 160 | assert 'test.ac.jp' == psl.get_sld('test.ac.jp') 161 | assert 'test.ac.jp' == psl.get_sld('www.test.ac.jp') 162 | assert 'kobe.jp' == psl.get_sld('kobe.jp') 163 | assert 'c.kobe.jp' == psl.get_sld('c.kobe.jp') 164 | assert 'b.c.kobe.jp' == psl.get_sld('b.c.kobe.jp') 165 | assert 'b.c.kobe.jp' == psl.get_sld('a.b.c.kobe.jp') 166 | 167 | def test_get_sld_from_builtin_full_publicsuffix_org_list_with_japanese_domain_exception_rule(self): 168 | psl = publicsuffix.PublicSuffixList(None) 169 | assert 'city.kobe.jp' == psl.get_sld('city.kobe.jp') 170 | assert 'city.kobe.jp' == psl.get_sld('www.city.kobe.jp') 171 | 172 | def test_get_sld_from_builtin_full_publicsuffix_org_list_with_ys(self): 173 | psl = publicsuffix.PublicSuffixList(None) 174 | assert 'us' == psl.get_sld('us') 175 | assert 'test.us' == psl.get_sld('test.us') 176 | assert 'test.us' == psl.get_sld('www.test.us') 177 | 178 | def test_get_sld_from_builtin_full_publicsuffix_org_list_with_us_k12(self): 179 | psl = publicsuffix.PublicSuffixList(None) 180 | assert 'ak.us' == psl.get_sld('ak.us') 181 | assert 'test.ak.us' == psl.get_sld('test.ak.us') 182 | assert 'test.ak.us' == psl.get_sld('www.test.ak.us') 183 | assert 'k12.ak.us' == psl.get_sld('k12.ak.us') 184 | assert 'test.k12.ak.us' == psl.get_sld('test.k12.ak.us') 185 | assert 'test.k12.ak.us' == psl.get_sld('www.test.k12.ak.us') 186 | 187 | 188 | class TestPublicSuffixGetSldIdna(unittest.TestCase): 189 | 190 | def test_get_sld_idna_encoded(self): 191 | # actually the default 192 | psl = publicsuffix.PublicSuffixList(idna=True) 193 | assert 'xn--85x722f.com.cn' == psl.get_sld('xn--85x722f.com.cn') 194 | assert 'xn--85x722f.xn--55qx5d.cn' == psl.get_sld('xn--85x722f.xn--55qx5d.cn') 195 | assert 'xn--85x722f.xn--55qx5d.cn' == psl.get_sld('www.xn--85x722f.xn--55qx5d.cn') 196 | assert 'shishi.xn--55qx5d.cn' == psl.get_sld('shishi.xn--55qx5d.cn') 197 | 198 | def test_get_sld_with_utf8_encoded(self): 199 | # uses the list provided utf-8 defaults 200 | psl = publicsuffix.PublicSuffixList(idna=False) 201 | assert u'食狮.com.cn' == psl.get_sld(u'食狮.com.cn') 202 | assert u'食狮.公司.cn' == psl.get_sld(u'食狮.公司.cn') 203 | assert u'食狮.公司.cn' == psl.get_sld(u'www.食狮.公司.cn') 204 | assert u'shishi.公司.cn' == psl.get_sld(u'shishi.公司.cn') 205 | 206 | def test_get_sld_exceptions(self): 207 | psl = publicsuffix.PublicSuffixList() 208 | # www is the exception 209 | assert 'www.ck' == psl.get_sld('www.www.ck') 210 | assert 'this.that.ck' == psl.get_sld('this.that.ck') 211 | 212 | def test_get_sld_no_wildcard(self): 213 | psl = publicsuffix.PublicSuffixList() 214 | # test completion when no wildcards should be processed 215 | assert 'com.pg' == psl.get_sld('telinet.com.pg', wildcard=False) 216 | expected = 'ap-southeast-1.elb.amazonaws.com' 217 | result = psl.get_sld('blah.ap-southeast-1.elb.amazonaws.com', wildcard=False) 218 | assert expected == result 219 | 220 | def test_get_sld_top_convenience_function_is_the_same_as_PublicSuffixList_method(self): 221 | psl = publicsuffix.PublicSuffixList() 222 | # these functions should be identical 223 | assert psl.get_sld('www.google.com') == publicsuffix.get_sld('www.google.com') 224 | assert psl.get_sld('www.test.ak.us') == publicsuffix.get_sld('www.test.ak.us') 225 | 226 | def test_get_tld_returns_correct_tld_or_etld(self): 227 | psl = publicsuffix.PublicSuffixList() 228 | assert 'com' == psl.get_tld('com') 229 | assert 'kobe.jp' == psl.get_tld('city.kobe.jp') 230 | assert 'kobe.jp' == psl.get_tld('kobe.jp') 231 | assert 'amazonaws.com' == psl.get_tld('amazonaws.com') 232 | assert 'com.pg' == psl.get_tld('telinet.com.pg', wildcard=True) 233 | assert 'pg' == psl.get_tld('telinet.com.pg', wildcard=False) 234 | assert 'com.pg' == psl.get_tld('com.pg', wildcard=True) 235 | assert 'pg' == psl.get_tld('com.pg', wildcard=False) 236 | assert 'co.uk' == psl.get_tld('telinet.co.uk', wildcard=False) 237 | assert 'co.uk' == psl.get_tld('co.uk', wildcard=True) 238 | assert 'co.uk' == psl.get_tld('co.uk', wildcard=False) 239 | assert None == psl.get_tld('blah.local', strict=True) 240 | assert None == psl.get_tld('blah.local', wildcard=False) 241 | assert 'local' == psl.get_tld('blah.local') 242 | 243 | def test_get_tld_returns_correct_tld_or_etld_for_fqdn(self): 244 | psl = publicsuffix.PublicSuffixList() 245 | assert 'com' == psl.get_tld('www.foo.com.') 246 | 247 | def test_get_tld_returns_correct_tld_or_etld_for_root_domain(self): 248 | psl = publicsuffix.PublicSuffixList() 249 | assert '' == psl.get_tld('.') 250 | 251 | def test_get_tld_returns_correct_tld_or_etld_for_empty_string(self): 252 | psl = publicsuffix.PublicSuffixList() 253 | assert None == psl.get_tld('') 254 | 255 | def test_PublicSuffixList_tlds_is_loaded_correctly(self): 256 | psl = publicsuffix.PublicSuffixList() 257 | assert psl.tlds 258 | 259 | 260 | class TestPublicSuffixGetSld(unittest.TestCase): 261 | 262 | def test_get_sld_backward_compatibility(self): 263 | psl = publicsuffix.PublicSuffixList() 264 | assert 'com' == psl.get_sld('com') 265 | assert 'foo.com' == psl.get_sld('foo.com') 266 | assert 'foo.co.jp' == psl.get_sld('foo.co.jp') 267 | assert 'co.jp' == psl.get_sld('co.jp') 268 | assert 'jp' == psl.get_sld('jp') 269 | 270 | def test_get_sld_backward_compatibility_strict_and_wildcard_flags(self): 271 | psl = publicsuffix.PublicSuffixList() 272 | assert 'local' == psl.get_sld('local') 273 | assert 'local' == psl.get_sld('foo.local') 274 | assert None == psl.get_sld('local', strict=True) 275 | assert None == psl.get_sld('foo.local', strict=True) 276 | assert 'local' == psl.get_sld('local', wildcard=False) 277 | assert 'local' == psl.get_sld('foo.local', strict=False) 278 | 279 | def test_get_sld_backward_compatibility_sld_for_empty_string(self): 280 | psl = publicsuffix.PublicSuffixList() 281 | assert None == psl.get_sld('') 282 | assert None == psl.get_sld('', strict=True) 283 | assert None == psl.get_sld('', wildcard=False) 284 | 285 | def test_get_sld_backward_compatibility_sld_for_fqdn(self): 286 | psl = publicsuffix.PublicSuffixList() 287 | assert 'foo.com' == psl.get_sld('www.foo.com.') 288 | 289 | def test_get_sld_backward_compatibility_sld_for_root_domain(self): 290 | psl = publicsuffix.PublicSuffixList() 291 | assert '' == psl.get_sld('.') 292 | assert None == psl.get_sld('.', strict=True) 293 | assert '' == psl.get_sld('.', wildcard=False) 294 | 295 | 296 | if __name__ == '__main__': 297 | unittest.main('tests') 298 | -------------------------------------------------------------------------------- /tests_mozilla.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) nexB Inc. and others. 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a 5 | # copy of this software and associated documentation files (the "Software"), 6 | # to deal in the Software without restriction, including without limitation 7 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | # and/or sell copies of the Software, and to permit persons to whom the 9 | # Software is furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | # DEALINGS IN THE SOFTWARE. 21 | # 22 | # This test suite is borrowed from Mozilla and originally from: 23 | # https://raw.githubusercontent.com/mozilla/gecko-dev/0678172d5b5c681061b904c776b668489e3355b0/netwerk/test/unit/data/test_psl.txt 24 | # Any copyright is dedicated to the Public Domain. 25 | # http://creativecommons.org/publicdomain/zero/1.0/ 26 | 27 | 28 | from __future__ import absolute_import 29 | from __future__ import print_function 30 | from __future__ import unicode_literals 31 | 32 | import unittest 33 | 34 | import publicsuffix2 as publicsuffix 35 | 36 | 37 | class TestPublicSuffixMozilla(unittest.TestCase): 38 | """ 39 | Test suite borrowed from Mozilla and originally from: 40 | https://raw.githubusercontent.com/mozilla/gecko-dev/0678172d5b5c681061b904c776b668489e3355b0/netwerk/test/unit/data/test_psl.txt 41 | Any copyright is dedicated to the Public Domain. 42 | http://creativecommons.org/publicdomain/zero/1.0/ 43 | """ 44 | 45 | def test_get_tld_null_input(self): 46 | assert None == publicsuffix.get_tld(None) 47 | 48 | def test_get_tld_Mixed_case(self): 49 | assert 'com' == publicsuffix.get_tld('COM') 50 | 51 | def test_get_tld_Mixed_case2(self): 52 | assert 'com' == publicsuffix.get_tld('example.COM') 53 | 54 | def test_get_tld_Mixed_case3(self): 55 | assert 'com' == publicsuffix.get_tld('WwW.example.COM') 56 | 57 | def test_get_tld_Leading_dot1(self): 58 | assert 'com' == publicsuffix.get_tld('.com') 59 | 60 | def test_get_tld_Leading_dot2(self): 61 | assert 'example' == publicsuffix.get_tld('.example') 62 | 63 | def test_get_tld_Leading_dot3(self): 64 | assert 'com' == publicsuffix.get_tld('.example.com') 65 | 66 | def test_get_tld_Leading_dot4(self): 67 | assert 'example' == publicsuffix.get_tld('.example.example') 68 | 69 | def test_get_tld_Unlisted_TLD1(self): 70 | assert 'example' == publicsuffix.get_tld('example') 71 | 72 | def test_get_tld_Unlisted_TLD2(self): 73 | assert 'example' == publicsuffix.get_tld('example.example') 74 | 75 | def test_get_tld_Unlisted_TLD3(self): 76 | assert 'example' == publicsuffix.get_tld('b.example.example') 77 | 78 | def test_get_tld_Unlisted_TLD4(self): 79 | assert 'example' == publicsuffix.get_tld('a.b.example.example') 80 | 81 | def test_get_tld_Listed_but_non_Internet_TLD1(self): 82 | assert 'local' == publicsuffix.get_tld('local') 83 | 84 | def test_get_tld_Listed_but_non_Internet_TLD2(self): 85 | assert 'local' == publicsuffix.get_tld('example.local') 86 | 87 | def test_get_tld_Listed_but_non_Internet_TLD3(self): 88 | assert 'local' == publicsuffix.get_tld('b.example.local') 89 | 90 | def test_get_tld_Listed_but_non_Internet_TLD4(self): 91 | assert 'local' == publicsuffix.get_tld('a.b.example.local') 92 | 93 | def test_get_tld_TLD_with_only_1_rule1(self): 94 | assert 'biz' == publicsuffix.get_tld('biz') 95 | 96 | def test_get_tld_TLD_with_only_1_rule2(self): 97 | assert 'biz' == publicsuffix.get_tld('domain.biz') 98 | 99 | def test_get_tld_TLD_with_only_1_rule3(self): 100 | assert 'biz' == publicsuffix.get_tld('b.domain.biz') 101 | 102 | def test_get_tld_TLD_with_only_1_rule4(self): 103 | assert 'biz' == publicsuffix.get_tld('a.b.domain.biz') 104 | 105 | def test_get_tld_TLD_with_some_2_level_rules1(self): 106 | assert 'com' == publicsuffix.get_tld('com') 107 | 108 | def test_get_tld_TLD_with_some_2_level_rules2(self): 109 | assert 'com' == publicsuffix.get_tld('example.com') 110 | 111 | def test_get_tld_TLD_with_some_2_level_rules3(self): 112 | assert 'com' == publicsuffix.get_tld('b.example.com') 113 | 114 | def test_get_tld_TLD_with_some_2_level_rules4(self): 115 | assert 'com' == publicsuffix.get_tld('a.b.example.com') 116 | 117 | def test_get_tld_TLD_with_some_2_level_rules5(self): 118 | assert 'uk.com' == publicsuffix.get_tld('uk.com') 119 | 120 | def test_get_tld_TLD_with_some_2_level_rules6(self): 121 | assert 'uk.com' == publicsuffix.get_tld('example.uk.com') 122 | 123 | def test_get_tld_TLD_with_some_2_level_rules7(self): 124 | assert 'uk.com' == publicsuffix.get_tld('b.example.uk.com') 125 | 126 | def test_get_tld_TLD_with_some_2_level_rules8(self): 127 | assert 'uk.com' == publicsuffix.get_tld('a.b.example.uk.com') 128 | 129 | def test_get_tld_TLD_with_some_2_level_rules9(self): 130 | assert 'ac' == publicsuffix.get_tld('test.ac') 131 | 132 | def test_get_tld_TLD_with_only_1_wildcard_rule1(self): 133 | assert 'bd' == publicsuffix.get_tld('bd') 134 | 135 | def test_get_tld_TLD_with_only_1_wildcard_rule2(self): 136 | assert 'c.bd' == publicsuffix.get_tld('c.bd') 137 | 138 | def test_get_tld_TLD_with_only_1_wildcard_rule3(self): 139 | assert 'c.bd' == publicsuffix.get_tld('b.c.bd') 140 | 141 | def test_get_tld_TLD_with_only_1_wildcard_rule4(self): 142 | assert 'c.bd' == publicsuffix.get_tld('a.b.c.bd') 143 | 144 | def test_get_tld_More_complex_TLD1(self): 145 | assert 'jp' == publicsuffix.get_tld('jp') 146 | 147 | def test_get_tld_More_complex_TLD2(self): 148 | assert 'jp' == publicsuffix.get_tld('test.jp') 149 | 150 | def test_get_tld_More_complex_TLD3(self): 151 | assert 'jp' == publicsuffix.get_tld('www.test.jp') 152 | 153 | def test_get_tld_More_complex_TLD4(self): 154 | assert 'ac.jp' == publicsuffix.get_tld('ac.jp') 155 | 156 | def test_get_tld_More_complex_TLD5(self): 157 | assert 'ac.jp' == publicsuffix.get_tld('test.ac.jp') 158 | 159 | def test_get_tld_More_complex_TLD6(self): 160 | assert 'ac.jp' == publicsuffix.get_tld('www.test.ac.jp') 161 | 162 | def test_get_tld_More_complex_TLD7(self): 163 | assert 'kyoto.jp' == publicsuffix.get_tld('kyoto.jp') 164 | 165 | def test_get_tld_More_complex_TLD8(self): 166 | assert 'kyoto.jp' == publicsuffix.get_tld('test.kyoto.jp') 167 | 168 | def test_get_tld_More_complex_TLD9(self): 169 | assert 'ide.kyoto.jp' == publicsuffix.get_tld('ide.kyoto.jp') 170 | 171 | def test_get_tld_More_complex_TLD10(self): 172 | assert 'ide.kyoto.jp' == publicsuffix.get_tld('b.ide.kyoto.jp') 173 | 174 | def test_get_tld_More_complex_TLD11(self): 175 | assert 'ide.kyoto.jp' == publicsuffix.get_tld('a.b.ide.kyoto.jp') 176 | 177 | def test_get_tld_More_complex_TLD12(self): 178 | assert 'c.kobe.jp' == publicsuffix.get_tld('c.kobe.jp') 179 | 180 | def test_get_tld_More_complex_TLD13(self): 181 | assert 'c.kobe.jp' == publicsuffix.get_tld('b.c.kobe.jp') 182 | 183 | def test_get_tld_More_complex_TLD14(self): 184 | assert 'c.kobe.jp' == publicsuffix.get_tld('a.b.c.kobe.jp') 185 | 186 | def test_get_tld_More_complex_TLD15(self): 187 | assert 'kobe.jp' == publicsuffix.get_tld('city.kobe.jp') 188 | 189 | def test_get_tld_More_complex_TLD16(self): 190 | assert 'kobe.jp' == publicsuffix.get_tld('www.city.kobe.jp') 191 | 192 | def test_get_tld_TLD_with_a_wildcard_rule_and_exceptions1(self): 193 | assert 'ck' == publicsuffix.get_tld('ck') 194 | 195 | def test_get_tld_TLD_with_a_wildcard_rule_and_exceptions2(self): 196 | assert 'test.ck' == publicsuffix.get_tld('test.ck') 197 | 198 | def test_get_tld_TLD_with_a_wildcard_rule_and_exceptions3(self): 199 | assert 'test.ck' == publicsuffix.get_tld('b.test.ck') 200 | 201 | def test_get_tld_TLD_with_a_wildcard_rule_and_exceptions4(self): 202 | assert 'test.ck' == publicsuffix.get_tld('a.b.test.ck') 203 | 204 | def test_get_tld_TLD_with_a_wildcard_rule_and_exceptions5(self): 205 | assert 'ck' == publicsuffix.get_tld('www.ck') 206 | 207 | def test_get_tld_TLD_with_a_wildcard_rule_and_exceptions6(self): 208 | assert 'ck' == publicsuffix.get_tld('www.www.ck') 209 | 210 | def test_get_tld_US_K121(self): 211 | assert 'us' == publicsuffix.get_tld('us') 212 | 213 | def test_get_tld_US_K122(self): 214 | assert 'us' == publicsuffix.get_tld('test.us') 215 | 216 | def test_get_tld_US_K123(self): 217 | assert 'us' == publicsuffix.get_tld('www.test.us') 218 | 219 | def test_get_tld_US_K124(self): 220 | assert 'ak.us' == publicsuffix.get_tld('ak.us') 221 | 222 | def test_get_tld_US_K125(self): 223 | assert 'ak.us' == publicsuffix.get_tld('test.ak.us') 224 | 225 | def test_get_tld_US_K126(self): 226 | assert 'ak.us' == publicsuffix.get_tld('www.test.ak.us') 227 | 228 | def test_get_tld_US_K127(self): 229 | assert 'k12.ak.us' == publicsuffix.get_tld('k12.ak.us') 230 | 231 | def test_get_tld_US_K128(self): 232 | assert 'k12.ak.us' == publicsuffix.get_tld('test.k12.ak.us') 233 | 234 | def test_get_tld_US_K129(self): 235 | assert 'k12.ak.us' == publicsuffix.get_tld('www.test.k12.ak.us') 236 | 237 | def test_get_tld_IDN_labels1(self): 238 | psl = publicsuffix.PublicSuffixList(idna=False) 239 | assert 'com.cn' == psl.get_tld('食狮.com.cn') 240 | 241 | def test_get_tld_IDN_labels2(self): 242 | psl = publicsuffix.PublicSuffixList(idna=False) 243 | assert '公司.cn' == psl.get_tld('食狮.公司.cn') 244 | 245 | def test_get_tld_IDN_labels3(self): 246 | psl = publicsuffix.PublicSuffixList(idna=False) 247 | assert '公司.cn' == psl.get_tld('www.食狮.公司.cn') 248 | 249 | def test_get_tld_IDN_labels4(self): 250 | psl = publicsuffix.PublicSuffixList(idna=False) 251 | assert '公司.cn' == psl.get_tld('shishi.公司.cn') 252 | 253 | def test_get_tld_IDN_labels5(self): 254 | psl = publicsuffix.PublicSuffixList(idna=False) 255 | assert '公司.cn' == psl.get_tld('公司.cn') 256 | 257 | def test_get_tld_IDN_labels6(self): 258 | psl = publicsuffix.PublicSuffixList(idna=False) 259 | assert '中国' == psl.get_tld('食狮.中国') 260 | 261 | def test_get_tld_IDN_labels7(self): 262 | psl = publicsuffix.PublicSuffixList(idna=False) 263 | assert '中国' == psl.get_tld('www.食狮.中国') 264 | 265 | def test_get_tld_IDN_labels8(self): 266 | psl = publicsuffix.PublicSuffixList(idna=False) 267 | assert '中国' == psl.get_tld('shishi.中国') 268 | 269 | def test_get_tld_IDN_labels9(self): 270 | psl = publicsuffix.PublicSuffixList(idna=False) 271 | assert '中国' == psl.get_tld('中国') 272 | 273 | def test_get_tld_Same_as_above_but_punycoded1(self): 274 | assert 'com.cn' == publicsuffix.get_tld('xn--85x722f.com.cn') 275 | 276 | def test_get_tld_Same_as_above_but_punycoded2(self): 277 | assert 'xn--55qx5d.cn' == publicsuffix.get_tld('xn--85x722f.xn--55qx5d.cn') 278 | 279 | def test_get_tld_Same_as_above_but_punycoded3(self): 280 | assert 'xn--55qx5d.cn' == publicsuffix.get_tld('www.xn--85x722f.xn--55qx5d.cn') 281 | 282 | def test_get_tld_Same_as_above_but_punycoded4(self): 283 | assert 'xn--55qx5d.cn' == publicsuffix.get_tld('shishi.xn--55qx5d.cn') 284 | 285 | def test_get_tld_Same_as_above_but_punycoded5(self): 286 | assert 'xn--55qx5d.cn' == publicsuffix.get_tld('xn--55qx5d.cn') 287 | 288 | def test_get_tld_Same_as_above_but_punycoded6(self): 289 | assert 'xn--fiqs8s' == publicsuffix.get_tld('xn--85x722f.xn--fiqs8s') 290 | 291 | def test_get_tld_Same_as_above_but_punycoded7(self): 292 | assert 'xn--fiqs8s' == publicsuffix.get_tld('www.xn--85x722f.xn--fiqs8s') 293 | 294 | def test_get_tld_Same_as_above_but_punycoded8(self): 295 | assert 'xn--fiqs8s' == publicsuffix.get_tld('shishi.xn--fiqs8s') 296 | 297 | def test_get_tld_Same_as_above_but_punycoded9(self): 298 | assert 'xn--fiqs8s' == publicsuffix.get_tld('xn--fiqs8s') 299 | 300 | 301 | class TestPublicSuffixMozillaStrict(unittest.TestCase): 302 | """ 303 | Test suite borrowed from Mozilla and originally from: 304 | https://raw.githubusercontent.com/mozilla/gecko-dev/0678172d5b5c681061b904c776b668489e3355b0/netwerk/test/unit/data/test_psl.txt 305 | Any copyright is dedicated to the Public Domain. 306 | http://creativecommons.org/publicdomain/zero/1.0/ 307 | """ 308 | 309 | def test_get_tld_null_input(self): 310 | assert None == publicsuffix.get_tld(None, strict=True) 311 | 312 | def test_get_tld_Mixed_case(self): 313 | assert 'com' == publicsuffix.get_tld('COM', strict=True) 314 | 315 | def test_get_tld_Mixed_case2(self): 316 | assert 'com' == publicsuffix.get_tld('example.COM', strict=True) 317 | 318 | def test_get_tld_Mixed_case3(self): 319 | assert 'com' == publicsuffix.get_tld('WwW.example.COM', strict=True) 320 | 321 | def test_get_tld_Leading_dot1(self): 322 | assert 'com' == publicsuffix.get_tld('.com', strict=True) 323 | 324 | def test_get_tld_Leading_dot2(self): 325 | assert None == publicsuffix.get_tld('.example', strict=True) 326 | 327 | def test_get_tld_Leading_dot3(self): 328 | assert 'com' == publicsuffix.get_tld('.example.com', strict=True) 329 | 330 | def test_get_tld_Leading_dot4(self): 331 | assert None == publicsuffix.get_tld('.example.example', strict=True) 332 | 333 | def test_get_tld_Unlisted_TLD1(self): 334 | assert None == publicsuffix.get_tld('example', strict=True) 335 | 336 | def test_get_tld_Unlisted_TLD2(self): 337 | assert None == publicsuffix.get_tld('example.example', strict=True) 338 | 339 | def test_get_tld_Unlisted_TLD3(self): 340 | assert None == publicsuffix.get_tld('b.example.example', strict=True) 341 | 342 | def test_get_tld_Unlisted_TLD4(self): 343 | assert None == publicsuffix.get_tld('a.b.example.example', strict=True) 344 | 345 | def test_get_tld_Listed_but_non_Internet_TLD1(self): 346 | assert None == publicsuffix.get_tld('local', strict=True) 347 | 348 | def test_get_tld_Listed_but_non_Internet_TLD2(self): 349 | assert None == publicsuffix.get_tld('example.local', strict=True) 350 | 351 | def test_get_tld_Listed_but_non_Internet_TLD3(self): 352 | assert None == publicsuffix.get_tld('b.example.local', strict=True) 353 | 354 | def test_get_tld_Listed_but_non_Internet_TLD4(self): 355 | assert None == publicsuffix.get_tld('a.b.example.local', strict=True) 356 | 357 | def test_get_tld_TLD_with_only_1_rule1(self): 358 | assert 'biz' == publicsuffix.get_tld('biz', strict=True) 359 | 360 | def test_get_tld_TLD_with_only_1_rule2(self): 361 | assert 'biz' == publicsuffix.get_tld('domain.biz', strict=True) 362 | 363 | def test_get_tld_TLD_with_only_1_rule3(self): 364 | assert 'biz' == publicsuffix.get_tld('b.domain.biz', strict=True) 365 | 366 | def test_get_tld_TLD_with_only_1_rule4(self): 367 | assert 'biz' == publicsuffix.get_tld('a.b.domain.biz', strict=True) 368 | 369 | def test_get_tld_TLD_with_some_2_level_rules1(self): 370 | assert 'com' == publicsuffix.get_tld('com', strict=True) 371 | 372 | def test_get_tld_TLD_with_some_2_level_rules2(self): 373 | assert 'com' == publicsuffix.get_tld('example.com', strict=True) 374 | 375 | def test_get_tld_TLD_with_some_2_level_rules3(self): 376 | assert 'com' == publicsuffix.get_tld('b.example.com', strict=True) 377 | 378 | def test_get_tld_TLD_with_some_2_level_rules4(self): 379 | assert 'com' == publicsuffix.get_tld('a.b.example.com', strict=True) 380 | 381 | def test_get_tld_TLD_with_some_2_level_rules5(self): 382 | assert 'uk.com' == publicsuffix.get_tld('uk.com', strict=True) 383 | 384 | def test_get_tld_TLD_with_some_2_level_rules6(self): 385 | assert 'uk.com' == publicsuffix.get_tld('example.uk.com', strict=True) 386 | 387 | def test_get_tld_TLD_with_some_2_level_rules7(self): 388 | assert 'uk.com' == publicsuffix.get_tld('b.example.uk.com', strict=True) 389 | 390 | def test_get_tld_TLD_with_some_2_level_rules8(self): 391 | assert 'uk.com' == publicsuffix.get_tld('a.b.example.uk.com', strict=True) 392 | 393 | def test_get_tld_TLD_with_some_2_level_rules9(self): 394 | assert 'ac' == publicsuffix.get_tld('test.ac', strict=True) 395 | 396 | def test_get_tld_TLD_with_only_1_wildcard_rule1(self): 397 | assert 'bd' == publicsuffix.get_tld('bd', strict=True) 398 | 399 | def test_get_tld_TLD_with_only_1_wildcard_rule2(self): 400 | assert 'c.bd' == publicsuffix.get_tld('c.bd', strict=True) 401 | 402 | def test_get_tld_TLD_with_only_1_wildcard_rule3(self): 403 | assert 'c.bd' == publicsuffix.get_tld('b.c.bd', strict=True) 404 | 405 | def test_get_tld_TLD_with_only_1_wildcard_rule4(self): 406 | assert 'c.bd' == publicsuffix.get_tld('a.b.c.bd', strict=True) 407 | 408 | def test_get_tld_More_complex_TLD1(self): 409 | assert 'jp' == publicsuffix.get_tld('jp', strict=True) 410 | 411 | def test_get_tld_More_complex_TLD2(self): 412 | assert 'jp' == publicsuffix.get_tld('test.jp', strict=True) 413 | 414 | def test_get_tld_More_complex_TLD3(self): 415 | assert 'jp' == publicsuffix.get_tld('www.test.jp', strict=True) 416 | 417 | def test_get_tld_More_complex_TLD4(self): 418 | assert 'ac.jp' == publicsuffix.get_tld('ac.jp', strict=True) 419 | 420 | def test_get_tld_More_complex_TLD5(self): 421 | assert 'ac.jp' == publicsuffix.get_tld('test.ac.jp', strict=True) 422 | 423 | def test_get_tld_More_complex_TLD6(self): 424 | assert 'ac.jp' == publicsuffix.get_tld('www.test.ac.jp', strict=True) 425 | 426 | def test_get_tld_More_complex_TLD7(self): 427 | assert 'kyoto.jp' == publicsuffix.get_tld('kyoto.jp', strict=True) 428 | 429 | def test_get_tld_More_complex_TLD8(self): 430 | assert 'kyoto.jp' == publicsuffix.get_tld('test.kyoto.jp', strict=True) 431 | 432 | def test_get_tld_More_complex_TLD9(self): 433 | assert 'ide.kyoto.jp' == publicsuffix.get_tld('ide.kyoto.jp', strict=True) 434 | 435 | def test_get_tld_More_complex_TLD10(self): 436 | assert 'ide.kyoto.jp' == publicsuffix.get_tld('b.ide.kyoto.jp', strict=True) 437 | 438 | def test_get_tld_More_complex_TLD11(self): 439 | assert 'ide.kyoto.jp' == publicsuffix.get_tld('a.b.ide.kyoto.jp', strict=True) 440 | 441 | def test_get_tld_More_complex_TLD12(self): 442 | assert 'c.kobe.jp' == publicsuffix.get_tld('c.kobe.jp', strict=True) 443 | 444 | def test_get_tld_More_complex_TLD13(self): 445 | assert 'c.kobe.jp' == publicsuffix.get_tld('b.c.kobe.jp', strict=True) 446 | 447 | def test_get_tld_More_complex_TLD14(self): 448 | assert 'c.kobe.jp' == publicsuffix.get_tld('a.b.c.kobe.jp', strict=True) 449 | 450 | def test_get_tld_More_complex_TLD15(self): 451 | assert 'kobe.jp' == publicsuffix.get_tld('city.kobe.jp', strict=True) 452 | 453 | def test_get_tld_More_complex_TLD16(self): 454 | assert 'kobe.jp' == publicsuffix.get_tld('www.city.kobe.jp', strict=True) 455 | 456 | def test_get_tld_TLD_with_a_wildcard_rule_and_exceptions1(self): 457 | assert 'ck' == publicsuffix.get_tld('ck', strict=True) 458 | 459 | def test_get_tld_TLD_with_a_wildcard_rule_and_exceptions2(self): 460 | assert 'test.ck' == publicsuffix.get_tld('test.ck', strict=True) 461 | 462 | def test_get_tld_TLD_with_a_wildcard_rule_and_exceptions3(self): 463 | assert 'test.ck' == publicsuffix.get_tld('b.test.ck', strict=True) 464 | 465 | def test_get_tld_TLD_with_a_wildcard_rule_and_exceptions4(self): 466 | assert 'test.ck' == publicsuffix.get_tld('a.b.test.ck', strict=True) 467 | 468 | def test_get_tld_TLD_with_a_wildcard_rule_and_exceptions5(self): 469 | assert 'ck' == publicsuffix.get_tld('www.ck', strict=True) 470 | 471 | def test_get_tld_TLD_with_a_wildcard_rule_and_exceptions6(self): 472 | assert 'ck' == publicsuffix.get_tld('www.www.ck', strict=True) 473 | 474 | def test_get_tld_US_K121(self): 475 | assert 'us' == publicsuffix.get_tld('us', strict=True) 476 | 477 | def test_get_tld_US_K122(self): 478 | assert 'us' == publicsuffix.get_tld('test.us', strict=True) 479 | 480 | def test_get_tld_US_K123(self): 481 | assert 'us' == publicsuffix.get_tld('www.test.us', strict=True) 482 | 483 | def test_get_tld_US_K124(self): 484 | assert 'ak.us' == publicsuffix.get_tld('ak.us', strict=True) 485 | 486 | def test_get_tld_US_K125(self): 487 | assert 'ak.us' == publicsuffix.get_tld('test.ak.us', strict=True) 488 | 489 | def test_get_tld_US_K126(self): 490 | assert 'ak.us' == publicsuffix.get_tld('www.test.ak.us', strict=True) 491 | 492 | def test_get_tld_US_K127(self): 493 | assert 'k12.ak.us' == publicsuffix.get_tld('k12.ak.us', strict=True) 494 | 495 | def test_get_tld_US_K128(self): 496 | assert 'k12.ak.us' == publicsuffix.get_tld('test.k12.ak.us', strict=True) 497 | 498 | def test_get_tld_US_K129(self): 499 | assert 'k12.ak.us' == publicsuffix.get_tld('www.test.k12.ak.us', strict=True) 500 | 501 | def test_get_tld_IDN_labels1(self): 502 | psl = publicsuffix.PublicSuffixList(idna=False) 503 | assert 'com.cn' == psl.get_tld('食狮.com.cn', strict=True) 504 | 505 | def test_get_tld_IDN_labels2(self): 506 | psl = publicsuffix.PublicSuffixList(idna=False) 507 | assert '公司.cn' == psl.get_tld('食狮.公司.cn', strict=True) 508 | 509 | def test_get_tld_IDN_labels3(self): 510 | psl = publicsuffix.PublicSuffixList(idna=False) 511 | assert '公司.cn' == psl.get_tld('www.食狮.公司.cn', strict=True) 512 | 513 | def test_get_tld_IDN_labels4(self): 514 | psl = publicsuffix.PublicSuffixList(idna=False) 515 | assert '公司.cn' == psl.get_tld('shishi.公司.cn', strict=True) 516 | 517 | def test_get_tld_IDN_labels5(self): 518 | psl = publicsuffix.PublicSuffixList(idna=False) 519 | assert '公司.cn' == psl.get_tld('公司.cn', strict=True) 520 | 521 | def test_get_tld_IDN_labels6(self): 522 | psl = publicsuffix.PublicSuffixList(idna=False) 523 | assert '中国' == psl.get_tld('食狮.中国', strict=True) 524 | 525 | def test_get_tld_IDN_labels7(self): 526 | psl = publicsuffix.PublicSuffixList(idna=False) 527 | assert '中国' == psl.get_tld('www.食狮.中国', strict=True) 528 | 529 | def test_get_tld_IDN_labels8(self): 530 | psl = publicsuffix.PublicSuffixList(idna=False) 531 | assert '中国' == psl.get_tld('shishi.中国', strict=True) 532 | 533 | def test_get_tld_IDN_labels9(self): 534 | psl = publicsuffix.PublicSuffixList(idna=False) 535 | assert '中国' == psl.get_tld('中国', strict=True) 536 | 537 | def test_get_tld_Same_as_above_but_punycoded1(self): 538 | assert 'com.cn' == publicsuffix.get_tld('xn--85x722f.com.cn', strict=True) 539 | 540 | def test_get_tld_Same_as_above_but_punycoded2(self): 541 | assert 'xn--55qx5d.cn' == publicsuffix.get_tld('xn--85x722f.xn--55qx5d.cn', strict=True) 542 | 543 | def test_get_tld_Same_as_above_but_punycoded3(self): 544 | assert 'xn--55qx5d.cn' == publicsuffix.get_tld('www.xn--85x722f.xn--55qx5d.cn', strict=True) 545 | 546 | def test_get_tld_Same_as_above_but_punycoded4(self): 547 | assert 'xn--55qx5d.cn' == publicsuffix.get_tld('shishi.xn--55qx5d.cn', strict=True) 548 | 549 | def test_get_tld_Same_as_above_but_punycoded5(self): 550 | assert 'xn--55qx5d.cn' == publicsuffix.get_tld('xn--55qx5d.cn', strict=True) 551 | 552 | def test_get_tld_Same_as_above_but_punycoded6(self): 553 | assert 'xn--fiqs8s' == publicsuffix.get_tld('xn--85x722f.xn--fiqs8s', strict=True) 554 | 555 | def test_get_tld_Same_as_above_but_punycoded7(self): 556 | assert 'xn--fiqs8s' == publicsuffix.get_tld('www.xn--85x722f.xn--fiqs8s', strict=True) 557 | 558 | def test_get_tld_Same_as_above_but_punycoded8(self): 559 | assert 'xn--fiqs8s' == publicsuffix.get_tld('shishi.xn--fiqs8s', strict=True) 560 | 561 | def test_get_tld_Same_as_above_but_punycoded9(self): 562 | assert 'xn--fiqs8s' == publicsuffix.get_tld('xn--fiqs8s', strict=True) 563 | 564 | 565 | class TestPublicSuffixMozillaSld(unittest.TestCase): 566 | """ 567 | Test suite borrowed from Mozilla and originally from: 568 | https://raw.githubusercontent.com/mozilla/gecko-dev/0678172d5b5c681061b904c776b668489e3355b0/netwerk/test/unit/data/test_psl.txt 569 | Any copyright is dedicated to the Public Domain. 570 | http://creativecommons.org/publicdomain/zero/1.0/ 571 | """ 572 | 573 | def test_get_sld_null_input(self): 574 | assert None == publicsuffix.get_sld(None) 575 | 576 | def test_get_sld_Mixed_case(self): 577 | assert 'com' == publicsuffix.get_sld('COM') 578 | 579 | def test_get_sld_Mixed_case2(self): 580 | assert 'example.com' == publicsuffix.get_sld('example.COM') 581 | 582 | def test_get_sld_Mixed_case3(self): 583 | assert 'example.com' == publicsuffix.get_sld('WwW.example.COM') 584 | 585 | def test_get_sld_Leading_dot1(self): 586 | assert 'com' == publicsuffix.get_sld('.com') 587 | 588 | def test_get_sld_Leading_dot2(self): 589 | assert 'example' == publicsuffix.get_sld('.example') 590 | 591 | def test_get_sld_Leading_dot3(self): 592 | assert 'example.com' == publicsuffix.get_sld('.example.com') 593 | 594 | def test_get_sld_Leading_dot4(self): 595 | assert 'example' == publicsuffix.get_sld('.example.example') 596 | 597 | def test_get_sld_Unlisted_sld1(self): 598 | assert 'example' == publicsuffix.get_sld('example') 599 | 600 | def test_get_sld_Unlisted_sld2(self): 601 | assert 'example' == publicsuffix.get_sld('example.example') 602 | 603 | def test_get_sld_Unlisted_sld3(self): 604 | assert 'example' == publicsuffix.get_sld('b.example.example') 605 | 606 | def test_get_sld_Unlisted_sld4(self): 607 | assert 'example' == publicsuffix.get_sld('a.b.example.example') 608 | 609 | def test_get_sld_Listed_but_non_Internet_sld1(self): 610 | assert 'local' == publicsuffix.get_sld('local') 611 | 612 | def test_get_sld_Listed_but_non_Internet_sld2(self): 613 | assert 'local' == publicsuffix.get_sld('example.local') 614 | 615 | def test_get_sld_Listed_but_non_Internet_sld3(self): 616 | assert 'local' == publicsuffix.get_sld('b.example.local') 617 | 618 | def test_get_sld_Listed_but_non_Internet_sld4(self): 619 | assert 'local' == publicsuffix.get_sld('a.b.example.local') 620 | 621 | def test_get_sld_tld_with_only_1_rule1(self): 622 | assert 'biz' == publicsuffix.get_sld('biz') 623 | 624 | def test_get_sld_tld_with_only_1_rule2(self): 625 | assert 'domain.biz' == publicsuffix.get_sld('domain.biz') 626 | 627 | def test_get_sld_tld_with_only_1_rule3(self): 628 | assert 'domain.biz' == publicsuffix.get_sld('b.domain.biz') 629 | 630 | def test_get_sld_tld_with_only_1_rule4(self): 631 | assert 'domain.biz' == publicsuffix.get_sld('a.b.domain.biz') 632 | 633 | def test_get_sld_tld_with_some_2_level_rules1(self): 634 | assert 'com' == publicsuffix.get_sld('com') 635 | 636 | def test_get_sld_tld_with_some_2_level_rules2(self): 637 | assert 'example.com' == publicsuffix.get_sld('example.com') 638 | 639 | def test_get_sld_tld_with_some_2_level_rules3(self): 640 | assert 'example.com' == publicsuffix.get_sld('b.example.com') 641 | 642 | def test_get_sld_tld_with_some_2_level_rules4(self): 643 | assert 'example.com' == publicsuffix.get_sld('a.b.example.com') 644 | 645 | def test_get_sld_tld_with_some_2_level_rules5(self): 646 | assert 'uk.com' == publicsuffix.get_sld('uk.com') 647 | 648 | def test_get_sld_tld_with_some_2_level_rules6(self): 649 | assert 'example.uk.com' == publicsuffix.get_sld('example.uk.com') 650 | 651 | def test_get_sld_tld_with_some_2_level_rules7(self): 652 | assert 'example.uk.com' == publicsuffix.get_sld('b.example.uk.com') 653 | 654 | def test_get_sld_tld_with_some_2_level_rules8(self): 655 | assert 'example.uk.com' == publicsuffix.get_sld('a.b.example.uk.com') 656 | 657 | def test_get_sld_tld_with_some_2_level_rules9(self): 658 | assert 'test.ac' == publicsuffix.get_sld('test.ac') 659 | 660 | def test_get_sld_tld_with_only_1_wildcard_rule1(self): 661 | assert 'bd' == publicsuffix.get_sld('bd') 662 | 663 | def test_get_sld_tld_with_only_1_wildcard_rule2(self): 664 | assert 'c.bd' == publicsuffix.get_sld('c.bd') 665 | 666 | def test_get_sld_tld_with_only_1_wildcard_rule3(self): 667 | assert 'b.c.bd' == publicsuffix.get_sld('b.c.bd') 668 | 669 | def test_get_sld_tld_with_only_1_wildcard_rule4(self): 670 | assert 'b.c.bd' == publicsuffix.get_sld('a.b.c.bd') 671 | 672 | def test_get_sld_More_complex_sld1(self): 673 | assert 'jp' == publicsuffix.get_sld('jp') 674 | 675 | def test_get_sld_More_complex_sld2(self): 676 | assert 'test.jp' == publicsuffix.get_sld('test.jp') 677 | 678 | def test_get_sld_More_complex_sld3(self): 679 | assert 'test.jp' == publicsuffix.get_sld('www.test.jp') 680 | 681 | def test_get_sld_More_complex_sld4(self): 682 | assert 'ac.jp' == publicsuffix.get_sld('ac.jp') 683 | 684 | def test_get_sld_More_complex_sld5(self): 685 | assert 'test.ac.jp' == publicsuffix.get_sld('test.ac.jp') 686 | 687 | def test_get_sld_More_complex_sld6(self): 688 | assert 'test.ac.jp' == publicsuffix.get_sld('www.test.ac.jp') 689 | 690 | def test_get_sld_More_complex_sld7(self): 691 | assert 'kyoto.jp' == publicsuffix.get_sld('kyoto.jp') 692 | 693 | def test_get_sld_More_complex_sld8(self): 694 | assert 'test.kyoto.jp' == publicsuffix.get_sld('test.kyoto.jp') 695 | 696 | def test_get_sld_More_complex_sld9(self): 697 | assert 'ide.kyoto.jp' == publicsuffix.get_sld('ide.kyoto.jp') 698 | 699 | def test_get_sld_More_complex_sld10(self): 700 | assert 'b.ide.kyoto.jp' == publicsuffix.get_sld('b.ide.kyoto.jp') 701 | 702 | def test_get_sld_More_complex_sld11(self): 703 | assert 'b.ide.kyoto.jp' == publicsuffix.get_sld('a.b.ide.kyoto.jp') 704 | 705 | def test_get_sld_More_complex_sld12(self): 706 | assert 'c.kobe.jp' == publicsuffix.get_sld('c.kobe.jp') 707 | 708 | def test_get_sld_More_complex_sld13(self): 709 | assert 'b.c.kobe.jp' == publicsuffix.get_sld('b.c.kobe.jp') 710 | 711 | def test_get_sld_More_complex_sld14(self): 712 | assert 'b.c.kobe.jp' == publicsuffix.get_sld('a.b.c.kobe.jp') 713 | 714 | def test_get_sld_More_complex_sld15(self): 715 | assert 'city.kobe.jp' == publicsuffix.get_sld('city.kobe.jp') 716 | 717 | def test_get_sld_More_complex_sld16(self): 718 | assert 'city.kobe.jp' == publicsuffix.get_sld('www.city.kobe.jp') 719 | 720 | def test_get_sld_tld_with_a_wildcard_rule_and_exceptions1(self): 721 | assert 'ck' == publicsuffix.get_sld('ck') 722 | 723 | def test_get_sld_tld_with_a_wildcard_rule_and_exceptions2(self): 724 | assert 'test.ck' == publicsuffix.get_sld('test.ck') 725 | 726 | def test_get_sld_tld_with_a_wildcard_rule_and_exceptions3(self): 727 | assert 'b.test.ck' == publicsuffix.get_sld('b.test.ck') 728 | 729 | def test_get_sld_tld_with_a_wildcard_rule_and_exceptions4(self): 730 | assert 'b.test.ck' == publicsuffix.get_sld('a.b.test.ck') 731 | 732 | def test_get_sld_tld_with_a_wildcard_rule_and_exceptions5(self): 733 | assert 'www.ck' == publicsuffix.get_sld('www.ck') 734 | 735 | def test_get_sld_tld_with_a_wildcard_rule_and_exceptions6(self): 736 | assert 'www.ck' == publicsuffix.get_sld('www.www.ck') 737 | 738 | def test_get_sld_US_K121(self): 739 | assert 'us' == publicsuffix.get_sld('us') 740 | 741 | def test_get_sld_US_K122(self): 742 | assert 'test.us' == publicsuffix.get_sld('test.us') 743 | 744 | def test_get_sld_US_K123(self): 745 | assert 'test.us' == publicsuffix.get_sld('www.test.us') 746 | 747 | def test_get_sld_US_K124(self): 748 | assert 'ak.us' == publicsuffix.get_sld('ak.us') 749 | 750 | def test_get_sld_US_K125(self): 751 | assert 'test.ak.us' == publicsuffix.get_sld('test.ak.us') 752 | 753 | def test_get_sld_US_K126(self): 754 | assert 'test.ak.us' == publicsuffix.get_sld('www.test.ak.us') 755 | 756 | def test_get_sld_US_K127(self): 757 | assert 'k12.ak.us' == publicsuffix.get_sld('k12.ak.us') 758 | 759 | def test_get_sld_US_K128(self): 760 | assert 'test.k12.ak.us' == publicsuffix.get_sld('test.k12.ak.us') 761 | 762 | def test_get_sld_US_K129(self): 763 | assert 'test.k12.ak.us' == publicsuffix.get_sld('www.test.k12.ak.us') 764 | 765 | def test_get_sld_IDN_labels1(self): 766 | psl = publicsuffix.PublicSuffixList(idna=False) 767 | assert '食狮.com.cn' == psl.get_sld('食狮.com.cn') 768 | 769 | def test_get_sld_IDN_labels2(self): 770 | psl = publicsuffix.PublicSuffixList(idna=False) 771 | assert '食狮.公司.cn' == psl.get_sld('食狮.公司.cn') 772 | 773 | def test_get_sld_IDN_labels3(self): 774 | psl = publicsuffix.PublicSuffixList(idna=False) 775 | assert '食狮.公司.cn' == psl.get_sld('www.食狮.公司.cn') 776 | 777 | def test_get_sld_IDN_labels4(self): 778 | psl = publicsuffix.PublicSuffixList(idna=False) 779 | assert 'shishi.公司.cn' == psl.get_sld('shishi.公司.cn') 780 | 781 | def test_get_sld_IDN_labels5(self): 782 | psl = publicsuffix.PublicSuffixList(idna=False) 783 | assert '公司.cn' == psl.get_sld('公司.cn') 784 | 785 | def test_get_sld_IDN_labels6(self): 786 | psl = publicsuffix.PublicSuffixList(idna=False) 787 | assert '食狮.中国' == psl.get_sld('食狮.中国') 788 | 789 | def test_get_sld_IDN_labels7(self): 790 | psl = publicsuffix.PublicSuffixList(idna=False) 791 | assert '食狮.中国' == psl.get_sld('www.食狮.中国') 792 | 793 | def test_get_sld_IDN_labels8(self): 794 | psl = publicsuffix.PublicSuffixList(idna=False) 795 | assert 'shishi.中国' == psl.get_sld('shishi.中国') 796 | 797 | def test_get_sld_IDN_labels9(self): 798 | psl = publicsuffix.PublicSuffixList(idna=False) 799 | assert '中国' == psl.get_sld('中国') 800 | 801 | def test_get_sld_Same_as_above_but_punycoded1(self): 802 | assert 'xn--85x722f.com.cn' == publicsuffix.get_sld('xn--85x722f.com.cn') 803 | 804 | def test_get_sld_Same_as_above_but_punycoded2(self): 805 | assert 'xn--85x722f.xn--55qx5d.cn' == publicsuffix.get_sld('xn--85x722f.xn--55qx5d.cn') 806 | 807 | def test_get_sld_Same_as_above_but_punycoded3(self): 808 | assert 'xn--85x722f.xn--55qx5d.cn' == publicsuffix.get_sld('www.xn--85x722f.xn--55qx5d.cn') 809 | 810 | def test_get_sld_Same_as_above_but_punycoded4(self): 811 | assert 'shishi.xn--55qx5d.cn' == publicsuffix.get_sld('shishi.xn--55qx5d.cn') 812 | 813 | def test_get_sld_Same_as_above_but_punycoded5(self): 814 | assert 'xn--55qx5d.cn' == publicsuffix.get_sld('xn--55qx5d.cn') 815 | 816 | def test_get_sld_Same_as_above_but_punycoded6(self): 817 | assert 'xn--85x722f.xn--fiqs8s' == publicsuffix.get_sld('xn--85x722f.xn--fiqs8s') 818 | 819 | def test_get_sld_Same_as_above_but_punycoded7(self): 820 | assert 'xn--85x722f.xn--fiqs8s' == publicsuffix.get_sld('www.xn--85x722f.xn--fiqs8s') 821 | 822 | def test_get_sld_Same_as_above_but_punycoded8(self): 823 | assert 'shishi.xn--fiqs8s' == publicsuffix.get_sld('shishi.xn--fiqs8s') 824 | 825 | def test_get_sld_Same_as_above_but_punycoded9(self): 826 | assert 'xn--fiqs8s' == publicsuffix.get_sld('xn--fiqs8s') 827 | 828 | 829 | if __name__ == '__main__': 830 | unittest.main('tests') 831 | --------------------------------------------------------------------------------