├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ ├── psl-update.yml │ ├── release.yml │ └── tests.yml ├── .gitignore ├── .rubocop.yml ├── .rubocop_opinionated.yml ├── .yardopts ├── 2.0-Upgrade.md ├── CHANGELOG.md ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── SECURITY.md ├── bin └── console ├── data └── list.txt ├── lib ├── public_suffix.rb └── public_suffix │ ├── domain.rb │ ├── errors.rb │ ├── list.rb │ ├── rule.rb │ └── version.rb ├── public_suffix.gemspec └── test ├── .empty ├── acceptance_test.rb ├── benchmarks ├── bm_find.rb ├── bm_find_all.rb ├── bm_names.rb ├── bm_select.rb ├── bm_select_incremental.rb └── bm_valid.rb ├── profilers ├── domain_profiler.rb ├── find_profiler.rb ├── find_profiler_jp.rb ├── initialization_profiler.rb ├── list_profsize.rb └── object_binsize.rb ├── psl_test.rb ├── test_helper.rb ├── tests.txt └── unit ├── domain_test.rb ├── errors_test.rb ├── list_test.rb ├── public_suffix_test.rb └── rule_test.rb /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: "rubygems/public_suffix" 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with a single custom sponsorship URL 13 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: bundler 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "04:00" 8 | open-pull-requests-limit: 10 9 | labels: 10 | - dependencies 11 | 12 | - package-ecosystem: "github-actions" 13 | directory: "/" 14 | schedule: 15 | interval: "daily" 16 | time: "04:00" 17 | open-pull-requests-limit: 10 18 | labels: 19 | - dependencies 20 | -------------------------------------------------------------------------------- /.github/workflows/psl-update.yml: -------------------------------------------------------------------------------- 1 | name: PSL Update 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: '40 6 * * *' 7 | 8 | jobs: 9 | update: 10 | runs-on: ubuntu-latest 11 | steps: 12 | 13 | - name: Checkout 14 | uses: actions/checkout@v3 15 | 16 | - name: Set up Ruby 17 | uses: ruby/setup-ruby@v1 18 | with: 19 | ruby-version: "3.1" 20 | bundler-cache: true 21 | 22 | - name: Update PSL 23 | run: bundle exec rake update-list 24 | 25 | - name: Create Pull Request 26 | uses: peter-evans/create-pull-request@v5 27 | with: 28 | title: PSL auto-update 29 | commit-message: Updated list from source 30 | reviewers: weppos 31 | add-paths: | 32 | data/list.txt 33 | 34 | - name: Check Pull Request 35 | if: ${{ steps.cpr.outputs.pull-request-number }} 36 | run: | 37 | echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}" 38 | echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}" 39 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - v*.*.* 7 | 8 | jobs: 9 | release: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v3 14 | - name: Release Gem 15 | uses: cadwallion/publish-rubygems-action@d9474d9633f4674e59afb0c343f2dafe25181328 16 | env: 17 | RUBYGEMS_API_KEY: ${{ secrets.RUBYGEMS_API_KEY }} 18 | RELEASE_COMMAND: rake release 19 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | pull_request: 6 | workflow_dispatch: 7 | 8 | jobs: 9 | test: 10 | strategy: 11 | matrix: 12 | ruby-version: 13 | - "2.6" 14 | - "2.7" 15 | - "3.0" 16 | - "3.1" 17 | - "3.2" 18 | runs-on: ubuntu-latest 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v3 22 | - name: Set up Ruby 23 | uses: ruby/setup-ruby@v1 24 | with: 25 | ruby-version: ${{ matrix.ruby-version }} 26 | bundler-cache: true 27 | - name: Rubocop 28 | run: bundle exec rake rubocop 29 | - name: Run tests 30 | run: bundle exec rake test 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Bundler 2 | /.bundle 3 | /Gemfile.lock 4 | /pkg/* 5 | 6 | # YARD 7 | /.yardoc 8 | /yardoc/ 9 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | inherit_from: 2 | - .rubocop_opinionated.yml 3 | 4 | AllCops: 5 | Exclude: 6 | # Exclude .gemspec files because they are generally auto-generated 7 | - '*.gemspec' 8 | # Exclude vendored folders 9 | - 'tmp/**/*' 10 | - 'vendor/**/*' 11 | # Exclude artifacts 12 | - 'pkg/**/*' 13 | # Other 14 | - 'test/benchmarks/**/*' 15 | - 'test/profilers/**/*' 16 | TargetRubyVersion: 2.6 17 | 18 | # I often use @_variable to avoid clashing. 19 | Naming/MemoizedInstanceVariableName: 20 | Enabled: false 21 | 22 | Style/ClassAndModuleChildren: 23 | Exclude: 24 | - 'spec/**/*_spec.rb' 25 | - 'test/**/*_test.rb' 26 | 27 | # Dear Rubocop, I don't want to use String#strip_heredoc 28 | Layout/HeredocIndentation: 29 | Enabled: false 30 | 31 | Style/WordArray: 32 | Enabled: false 33 | MinSize: 3 34 | 35 | Style/SymbolArray: 36 | Enabled: false 37 | MinSize: 3 38 | -------------------------------------------------------------------------------- /.rubocop_opinionated.yml: -------------------------------------------------------------------------------- 1 | AllCops: 2 | Exclude: 3 | # Exclude .gemspec files because they are generally auto-generated 4 | - '*.gemspec' 5 | # Exclude vendored folders 6 | - 'tmp/**/*' 7 | - 'vendor/**/*' 8 | NewCops: enable 9 | 10 | # [codesmell] 11 | Layout/LineLength: 12 | Enabled: false 13 | Exclude: 14 | - 'spec/**/*_spec.rb' 15 | - 'test/**/*_test.rb' 16 | Max: 100 17 | 18 | # [codesmell] 19 | Metrics/AbcSize: 20 | Enabled: false 21 | Exclude: 22 | - 'spec/**/*_spec.rb' 23 | - 'test/**/*_test.rb' 24 | 25 | # [codesmell] 26 | Metrics/BlockLength: 27 | Enabled: false 28 | 29 | # [codesmell] 30 | Metrics/CyclomaticComplexity: 31 | Enabled: false 32 | Exclude: 33 | - 'spec/**/*_spec.rb' 34 | - 'test/**/*_test.rb' 35 | 36 | # [codesmell] 37 | Metrics/ClassLength: 38 | Enabled: false 39 | Exclude: 40 | - 'spec/**/*_spec.rb' 41 | - 'test/**/*_test.rb' 42 | 43 | # [codesmell] 44 | Metrics/MethodLength: 45 | Enabled: false 46 | Exclude: 47 | - 'spec/**/*_spec.rb' 48 | - 'test/**/*_test.rb' 49 | Max: 10 50 | 51 | # [codesmell] 52 | Metrics/ModuleLength: 53 | Enabled: false 54 | Exclude: 55 | - 'spec/**/*_spec.rb' 56 | - 'test/**/*_test.rb' 57 | 58 | # [codesmell] 59 | Metrics/ParameterLists: 60 | Enabled: false 61 | Max: 5 62 | 63 | # [codesmell] 64 | Metrics/PerceivedComplexity: 65 | Enabled: false 66 | 67 | # Do not use "and" or "or" in conditionals, but for readability we can use it 68 | # to chain executions. Just beware of operator order. 69 | Style/AndOr: 70 | EnforcedStyle: conditionals 71 | 72 | Style/Documentation: 73 | Exclude: 74 | - 'spec/**/*' 75 | - 'test/**/*' 76 | 77 | # Double empty lines are useful to separate conceptually different methods 78 | # in the same class or module. 79 | Layout/EmptyLines: 80 | Enabled: false 81 | 82 | # In most cases, a space is nice. Sometimes, it's not. 83 | # Just be consistent with the rest of the surrounding code. 84 | Layout/EmptyLinesAroundClassBody: 85 | Enabled: false 86 | 87 | # In most cases, a space is nice. Sometimes, it's not. 88 | # Just be consistent with the rest of the surrounding code. 89 | Layout/EmptyLinesAroundModuleBody: 90 | Enabled: false 91 | 92 | # This is quite buggy, as it doesn't recognize double lines. 93 | # Double empty lines are useful to separate conceptually different methods 94 | # in the same class or module. 95 | Layout/EmptyLineBetweenDefs: 96 | Enabled: false 97 | 98 | # Annotated tokens (like %s) are a good thing, but in most cases we don't need them. 99 | # %s is a simpler and straightforward version that works in almost all cases. So don't complain. 100 | Style/FormatStringToken: 101 | Enabled: false 102 | 103 | # unless is not always cool. 104 | Style/NegatedIf: 105 | Enabled: false 106 | 107 | # There are cases were the inline rescue is ok. We can either downgrade the severity, 108 | # or rely on the developer judgement on a case-by-case basis. 109 | Style/RescueModifier: 110 | Enabled: false 111 | 112 | Style/SymbolArray: 113 | EnforcedStyle: brackets 114 | 115 | # Hate It or Love It, I prefer double quotes as this is more consistent 116 | # with several other programming languages and the output of puts and inspect. 117 | Style/StringLiterals: 118 | EnforcedStyle: double_quotes 119 | 120 | # It's nice to be consistent. The trailing comma also allows easy reordering, 121 | # and doesn't cause a diff in Git when you add a line to the bottom. 122 | Style/TrailingCommaInArrayLiteral: 123 | EnforcedStyleForMultiline: consistent_comma 124 | Style/TrailingCommaInHashLiteral: 125 | EnforcedStyleForMultiline: consistent_comma 126 | 127 | Style/TrivialAccessors: 128 | # IgnoreClassMethods because I want to be able to define class-level accessors 129 | # that sets an instance variable on the metaclass, such as: 130 | # 131 | # def self.default=(value) 132 | # @default = value 133 | # end 134 | # 135 | IgnoreClassMethods: true 136 | -------------------------------------------------------------------------------- /.yardopts: -------------------------------------------------------------------------------- 1 | --title 'Ruby Public Suffix API Documentation' 2 | -------------------------------------------------------------------------------- /2.0-Upgrade.md: -------------------------------------------------------------------------------- 1 | # Welcome to PublicSuffix 2.0! 2 | 3 | PublicSuffix 2.0 contains a rewritten internal representation and comparison logic, that drastically increases the lookup performance. The new version also changes several internal and external API. 4 | 5 | This document documents the most relevant changes to help you upgrading from PublicSuffix 1.0 to 2.0. 6 | 7 | ## What's New 8 | 9 | - The library is now 100% compliants with the official PublicSuffix tests. The major breaking change you may experience, is that if a domain passed as input doesn't match any rule, the rule `*` is assumed. You can override this behavior by passing a custom default rule with the `default_rule` option. The old behavior can be restored by passing `default_rule: nil`. 10 | - `PublicSuffix.domain` is a new method that parses the input and returns the domain (combination of second level domain + suffix). This is a convenient helper to parse a domain name, for example when you need to determine the cookie or SSL scope. 11 | - Added the ability to disable the use of private domains either at runtime, in addition to the ability to not load the private domains section when reading the list (`private_domains: false`). This feature also superseded the `private_domains` class-level attribute, that is no longer available. 12 | 13 | ## Upgrade 14 | 15 | When upgrading, here's the most relevant changes to keep an eye on: 16 | 17 | - Several futile utility helpers were removed, such as `Domain#rule`, `Domain#is_a_domain?`, `Domain#is_a_subdomain?`, `Domain#valid?`. You can easily obtain the same result by having a custom method that reconstructs the logic, and/or calling `PublicSuffix.{domain|parse}(domain.to_s)`. 18 | - `PublicSuffix::List.private_domains` is no longer available. Instead, you now have two ways to enable/disable the private domains: 19 | 20 | 1. At runtime, by using the `ignore_private` option 21 | 22 | ```ruby 23 | PublicSuffix.domain("something.blogspot.com", ignore_private: true) 24 | ``` 25 | 26 | 1. Loading a filtered list: 27 | 28 | ```ruby 29 | # Disable support for private TLDs 30 | PublicSuffix::List.default = PublicSuffix::List.parse(File.read(PublicSuffix::List::DEFAULT_LIST_PATH), private_domains: false) 31 | # => "blogspot.com" 32 | PublicSuffix.domain("something.blogspot.com") 33 | # => "blogspot.com" 34 | ``` 35 | - Now that the library is 100% compliant with the official PublicSuffix algorithm, if a domain passed as input doesn't match any rule, the wildcard rule `*` is assumed. This means that unlisted TLDs will be considered valid by default, when they would have been invalid in 1.x. However, you can override this behavior to emulate the 1.x behavior if needed: 36 | 37 | ```ruby 38 | # 1.x: 39 | 40 | PublicSuffix.valid?("google.commm") 41 | # => false 42 | 43 | # 2.x: 44 | 45 | PublicSuffix.valid?("google.commm") 46 | # => true 47 | 48 | # Overriding 2.x behavior if needed: 49 | 50 | PublicSuffix.valid?("google.commm", default_rule: nil) 51 | # => false 52 | ```` 53 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | This project uses [Semantic Versioning 2.0.0](https://semver.org/). 4 | 5 | ## 5.0.1 6 | 7 | ### Changed 8 | 9 | - Updated definitions. 10 | 11 | 12 | ## 5.0.0 13 | 14 | ### Changed 15 | 16 | - Minimum Ruby version is 2.6 17 | 18 | - Updated definitions. 19 | 20 | 21 | ## 4.0.7 22 | 23 | ### Fixes 24 | 25 | - Fixed YARD rake task (GH-179) 26 | 27 | ### Changed 28 | 29 | - Updated definitions. 30 | 31 | 32 | ## 4.0.6 33 | 34 | ### Changed 35 | 36 | - Updated definitions. 37 | 38 | 39 | ## 4.0.5 40 | 41 | ### Changed 42 | 43 | - Updated definitions. 44 | 45 | 46 | ## 4.0.4 47 | 48 | ### Changed 49 | 50 | - Updated definitions. 51 | 52 | 53 | ## 4.0.3 54 | 55 | ### Fixed 56 | 57 | - Fixed 2.7 deprecations and warnings (GH-167). [Thanks @BrianHawley] 58 | 59 | 60 | ## 4.0.2 61 | 62 | ### Changed 63 | 64 | - Updated definitions. 65 | 66 | 67 | ## 4.0.1 68 | 69 | ### Changed 70 | 71 | - Updated definitions. 72 | 73 | 74 | ## 4.0.0 75 | 76 | ### Changed 77 | 78 | - Minimum Ruby version is 2.3 79 | 80 | 81 | ## Release 3.1.1 82 | 83 | - CHANGED: Updated definitions. 84 | - CHANGED: Rolled back support for Ruby 2.3 (GH-161, GH-162) 85 | 86 | IMPORTANT: 3.x is the latest version compatible with Ruby 2.1 and Ruby 2.2. 87 | 88 | 89 | ## Release 3.1.0 90 | 91 | - CHANGED: Updated definitions. 92 | - CHANGED: Minimum Ruby version is 2.3 93 | - CHANGED: Upgraded to Bundler 2.x 94 | 95 | 96 | ## Release 3.0.3 97 | 98 | - CHANGED: Updated definitions. 99 | 100 | 101 | ## Release 3.0.2 102 | 103 | - CHANGED: Updated definitions. 104 | 105 | 106 | ## Release 3.0.1 107 | 108 | - CHANGED: Updated definitions. 109 | - CHANGED: Improve performance and avoid allocation (GH-146). [Thanks @robholland] 110 | 111 | 112 | ## Release 3.0.0 113 | 114 | This new version includes a major redesign of the library internals, with the goal to drastically 115 | improve the lookup time while reducing storage space. 116 | 117 | For this reason, several public methods that are no longer applicable have been deprecated 118 | and/or removed. You can find more information at GH-133. 119 | 120 | - CHANGED: Updated definitions. 121 | - CHANGED: Dropped support for Ruby < 2.1 122 | - CHANGED: `PublicSuffix::List#rules` is now protected. You should not rely on it as the internal rule representation is subject to change to optimize performances. 123 | - CHANGED: Removed `PublicSuffix::List.clear`, it was an unnecessary accessor method. Use `PublicSuffix::List.default = nil` if you **really** need to reset the default list. You shouldn't. 124 | - CHANGED: `PublicSuffix::List#select` is now private. You should not use it, instead use `PublicSuffix::List#find`. 125 | - CHANGED: `PublicSuffix::List` no longer implements Enumerable. Instead, use `#each` to loop over, or get an Enumerator. 126 | - CHANGED: Redesigned internal list storage and lookup algorithm to achieve O(1) lookup time (see GH-133). 127 | 128 | 129 | ## Release 2.0.5 130 | 131 | - CHANGED: Updated definitions. 132 | - CHANGED: Initialization performance improvements (GH-128). [Thanks @casperisfine] 133 | 134 | 135 | ## Release 2.0.4 136 | 137 | - FIXED: Fix a bug that caused the GEM to be published with the wrong version number in the gemspec (GH-121). 138 | 139 | - CHANGED: Updated definitions. 140 | 141 | 142 | ## Release 2.0.3 143 | 144 | - CHANGED: Updated definitions. 145 | 146 | 147 | ## Release 2.0.2 148 | 149 | - CHANGED: Updated definitions. 150 | 151 | 152 | ## Release 2.0.1 153 | 154 | - FIXED: Fix bug that prevented .valid? to reset the default rule 155 | 156 | 157 | ## Release 2.0.0 158 | 159 | - NEW: Added PublicSuffix.domain # => sld.tld 160 | - NEW: Added the ability to disable the use of private domains either at runtime, in addition to the ability to not load the private domains section when reading the list (`private_domains: false`). This feature also superseded the `private_domains` class-level attribute, that is no longer available. 161 | 162 | - CHANGED: Considerable performance improvements (GH-92) 163 | - CHANGED: Updated definitions. 164 | - CHANGED: Removed deprecated PublicSuffix::InvalidDomain exception 165 | - CHANGED: If the suffix is now listed, then the prevaling rule is "*" as defined by the PSL algorithm (GH-91) 166 | - CHANGED: Input validation is performed only if you call `PublicSuffix.parse` or `PublicSuffix.list` 167 | - CHANGED: Input with leading dot is invalid per PSL acceptance tests 168 | - CHANGED: Removed `private_domains` class-level attribute. It is replaced by the `private_domains: false` option in the list parse method. 169 | - CHANGED: The default list now assumes you use UTF-8 for reading the input (GH-94), 170 | 171 | - REMOVED: Removed futile utility helpers such as `Domain#rule`, `Domain#is_a_domain?`, `Domain#is_a_subdomain?`, `Domain#valid?`. You can easily obtain the same result by having a custom method that reconstructs the logic, and/or calling `PublicSuffix.{domain|parse}(domain.to_s)`. 172 | 173 | 174 | ## Release 1.5.3 175 | 176 | - FIXED: Don't duplicate rule indices when creating index (GH-77). [Thanks @ags] 177 | 178 | - CHANGED: Updated definitions. 179 | 180 | 181 | ## Release 1.5.2 182 | 183 | - CHANGED: Updated definitions. 184 | 185 | 186 | ## Release 1.5.1 187 | 188 | - FIXED: Ignore case for parsing and validating (GH-62) 189 | 190 | - CHANGED: Updated definitions. 191 | 192 | 193 | ## Release 1.5.0 194 | 195 | - CHANGED: Dropped support for Ruby < 2.0 196 | 197 | - CHANGED: Updated definitions. 198 | 199 | 200 | ## Release 1.4.6 201 | 202 | - CHANGED: Updated definitions. 203 | 204 | 205 | ## Release 1.4.5 206 | 207 | - CHANGED: Updated definitions. 208 | 209 | 210 | ## Release 1.4.4 211 | 212 | - CHANGED: Updated definitions. 213 | 214 | 215 | ## Release 1.4.3 216 | 217 | - CHANGED: Updated definitions. 218 | 219 | 220 | ## Release 1.4.2 221 | 222 | - CHANGED: Updated definitions. 223 | 224 | 225 | ## Release 1.4.1 226 | 227 | - CHANGED: Updated definitions. 228 | 229 | 230 | ## Release 1.4.0 231 | 232 | - CHANGED: Moved the definitions in the lib folder. 233 | 234 | - CHANGED: Updated definitions. 235 | 236 | 237 | ## Release 1.3.3 238 | 239 | - CHANGED: Updated definitions. 240 | 241 | 242 | ## Release 1.3.2 243 | 244 | - CHANGED: Updated definitions. 245 | 246 | 247 | ## Release 1.3.1 248 | 249 | - CHANGED: Updated definitions. 250 | 251 | 252 | ## Release 1.3.0 253 | 254 | - NEW: Ability to skip Private Domains (GH-28). [Thanks @rb2k] 255 | 256 | - CHANGED: Updated definitions. 257 | 258 | 259 | ## Release 1.2.1 260 | 261 | - CHANGED: Updated definitions. 262 | 263 | 264 | ## Release 1.2.0 265 | 266 | - NEW: Allow a custom List on `PublicSuffix.parse` (GH-26). [Thanks @itspriddle] 267 | 268 | - FIXED: PublicSuffix.parse and PublicSuffix.valid? crashes when input is nil (GH-20). 269 | 270 | - CHANGED: Updated definitions. 271 | 272 | 273 | ## Release 1.1.3 274 | 275 | - CHANGED: Updated definitions. 276 | 277 | 278 | ## Release 1.1.2 279 | 280 | - CHANGED: Updated definitions. 281 | 282 | 283 | ## Release 1.1.1 284 | 285 | - CHANGED: Updated definitions. 286 | 287 | 288 | ## Release 1.1.0 289 | 290 | - FIXED: #valid? and #parse consider URIs as valid domains (GH-15) 291 | 292 | - CHANGED: Updated definitions. 293 | 294 | - CHANGED: Removed deprecatd PublicSuffixService::RuleList. 295 | 296 | 297 | ## Release 1.0.0 298 | 299 | - CHANGED: Updated definitions. 300 | 301 | 302 | ## Release 1.0.0.rc1 303 | 304 | The library is now known as PublicSuffix. 305 | 306 | 307 | ## Release 0.9.1 308 | 309 | - CHANGED: Renamed PublicSuffixService::RuleList to PublicSuffixService::List. 310 | 311 | - CHANGED: Renamed PublicSuffixService::List#list to PublicSuffixService::List#rules. 312 | 313 | - CHANGED: Renamed PublicSuffixService to PublicSuffix. 314 | 315 | - CHANGED: Updated definitions. 316 | 317 | 318 | ## Release 0.9.0 319 | 320 | - CHANGED: Minimum Ruby version increased to Ruby 1.8.7. 321 | 322 | - CHANGED: rake/gempackagetask is deprecated. Use rubygems/package_task instead. 323 | 324 | 325 | ## Release 0.8.4 326 | 327 | - FIXED: Reverted bugfix for issue #12 for Ruby 1.8.6. 328 | This is the latest version compatible with Ruby 1.8.6. 329 | 330 | 331 | ## Release 0.8.3 332 | 333 | - FIXED: Fixed ArgumentError: invalid byte sequence in US-ASCII with Ruby 1.9.2 (#12). 334 | 335 | - CHANGED: Updated definitions (#11). 336 | 337 | - CHANGED: Renamed definitions.txt to definitions.dat. 338 | 339 | 340 | ## Release 0.8.2 341 | 342 | - NEW: Added support for rubygems-test. 343 | 344 | - CHANGED: Integrated Bundler. 345 | 346 | - CHANGED: Updated definitions. 347 | 348 | 349 | ## Release 0.8.1 350 | 351 | - FIXED: The files in the release 0.8.0 have wrong permission 600 and can't be loaded (#10). 352 | 353 | 354 | ## Release 0.8.0 355 | 356 | - CHANGED: Update public suffix list to d1a5599b49fa 2010-10-25 15:10 +0100 (#9) 357 | 358 | - NEW: Add support for Fully Qualified Domain Names (#7) 359 | 360 | 361 | ## Release 0.7.0 362 | 363 | - CHANGED: Using YARD to document the code instead of RDoc. 364 | 365 | - FIXED: RuleList cache is not recreated when a new rule is appended to the list (#6) 366 | 367 | - FIXED: PublicSuffixService.valid? should return false if the domain is not defined or not allowed (#4, #5) 368 | 369 | 370 | ## Release 0.6.0 371 | 372 | - NEW: PublicSuffixService.parse raises DomainNotAllowed when trying to parse a domain name 373 | which exists, but is not allowed by the current definition list (#3) 374 | 375 | PublicSuffixService.parse("nic.do") 376 | # => PublicSuffixService::DomainNotAllowed 377 | 378 | - CHANGED: Renamed PublicSuffixService::InvalidDomain to PublicSuffixService::DomainInvalid 379 | 380 | 381 | ## Release 0.5.2 382 | 383 | - CHANGED: Update public suffix list to 248ea690d671 2010-09-16 18:02 +0100 384 | 385 | 386 | ## Release 0.5.1 387 | 388 | - CHANGED: Update public suffix list to 14dc66dd53c1 2010-09-15 17:09 +0100 389 | 390 | 391 | ## Release 0.5.0 392 | 393 | - CHANGED: Improve documentation for Domain#domain and Domain#subdomain (#1). 394 | 395 | - CHANGED: Performance improvements (#2). 396 | 397 | 398 | ## Release 0.4.0 399 | 400 | - CHANGED: Rename library from DomainName to PublicSuffixService to reduce the probability of name conflicts. 401 | 402 | 403 | ## Release 0.3.1 404 | 405 | - Deprecated DomainName library. 406 | 407 | 408 | ## Release 0.3.0 409 | 410 | - CHANGED: DomainName#domain and DomainName#subdomain are no longer alias of Domain#sld and Domain#tld. 411 | 412 | - CHANGED: Removed DomainName#labels and decoupled Rule from DomainName. 413 | 414 | - CHANGED: DomainName#valid? no longer instantiates new DomainName objects. This means less overhead. 415 | 416 | - CHANGED: Refactoring the entire DomainName API. Removed the internal on-the-fly parsing. Added a bunch of new methods to check and validate the DomainName. 417 | 418 | 419 | ## Release 0.2.0 420 | 421 | - NEW: DomainName#valid? 422 | 423 | - NEW: DomainName#parse and DomainName#parse! 424 | 425 | - NEW: DomainName#valid_domain? and DomainName#valid_subdomain? 426 | 427 | - CHANGED: Make sure RuleList lookup is only performed once. 428 | 429 | 430 | ## Release 0.1.0 431 | 432 | - Initial version 433 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source "https://rubygems.org" 4 | 5 | gemspec 6 | 7 | gem "rake" 8 | 9 | gem "memory_profiler", require: false 10 | gem "minitest" 11 | gem "minitest-reporters" 12 | gem "mocha", "~> 2.0.1" 13 | gem "rubocop", require: false 14 | gem "yard" 15 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009-2022 Simone Carletti 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Public Suffix for Ruby 2 | 3 | PublicSuffix is a Ruby domain name parser based on the [Public Suffix List](https://publicsuffix.org/). 4 | 5 | [![Build Status](https://github.com/weppos/publicsuffix-ruby/actions/workflows/tests.yml/badge.svg)](https://github.com/weppos/publicsuffix-ruby/actions/workflows/tests.yml) 6 | [![Tidelift dependencies](https://tidelift.com/badges/package/rubygems/public_suffix)](https://tidelift.com/subscription/pkg/rubygems-public-suffix?utm_source=rubygems-public-suffix&utm_medium=referral&utm_campaign=enterprise) 7 | 8 | 9 | ## Links 10 | 11 | - [Homepage](https://simonecarletti.com/code/publicsuffix-ruby) 12 | - [Repository](https://github.com/weppos/publicsuffix-ruby) 13 | - [API Documentation](https://rubydoc.info/gems/public_suffix) 14 | - [Introducing the Public Suffix List library for Ruby](https://simonecarletti.com/blog/2010/06/public-suffix-list-library-for-ruby/) 15 | 16 | 17 | ## Requirements 18 | 19 | PublicSuffix requires **Ruby >= 2.6**. For an older versions of Ruby use a previous release. 20 | 21 | 22 | ## Installation 23 | 24 | You can install the gem manually: 25 | 26 | ```shell 27 | gem install public_suffix 28 | ``` 29 | 30 | Or use Bundler and define it as a dependency in your `Gemfile`: 31 | 32 | ```ruby 33 | gem 'public_suffix' 34 | ``` 35 | 36 | If you are upgrading to 2.0, see [2.0-Upgrade.md](2.0-Upgrade.md). 37 | 38 | ## Usage 39 | 40 | Extract the domain out from a name: 41 | 42 | ```ruby 43 | PublicSuffix.domain("google.com") 44 | # => "google.com" 45 | PublicSuffix.domain("www.google.com") 46 | # => "google.com" 47 | PublicSuffix.domain("www.google.co.uk") 48 | # => "google.co.uk" 49 | ``` 50 | 51 | Parse a domain without subdomains: 52 | 53 | ```ruby 54 | domain = PublicSuffix.parse("google.com") 55 | # => # 56 | domain.tld 57 | # => "com" 58 | domain.sld 59 | # => "google" 60 | domain.trd 61 | # => nil 62 | domain.domain 63 | # => "google.com" 64 | domain.subdomain 65 | # => nil 66 | ``` 67 | 68 | Parse a domain with subdomains: 69 | 70 | ```ruby 71 | domain = PublicSuffix.parse("www.google.com") 72 | # => # 73 | domain.tld 74 | # => "com" 75 | domain.sld 76 | # => "google" 77 | domain.trd 78 | # => "www" 79 | domain.domain 80 | # => "google.com" 81 | domain.subdomain 82 | # => "www.google.com" 83 | ``` 84 | 85 | Simple validation example: 86 | 87 | ```ruby 88 | PublicSuffix.valid?("google.com") 89 | # => true 90 | 91 | PublicSuffix.valid?("www.google.com") 92 | # => true 93 | 94 | # Explicitly forbidden, it is listed as a private domain 95 | PublicSuffix.valid?("blogspot.com") 96 | # => false 97 | 98 | # Unknown/not-listed TLD domains are valid by default 99 | PublicSuffix.valid?("example.tldnotlisted") 100 | # => true 101 | ``` 102 | 103 | Strict validation (without applying the default * rule): 104 | 105 | ```ruby 106 | PublicSuffix.valid?("example.tldnotlisted", default_rule: nil) 107 | # => false 108 | ``` 109 | 110 | 111 | ## Fully Qualified Domain Names 112 | 113 | This library automatically recognizes Fully Qualified Domain Names. A FQDN is a domain name that end with a trailing dot. 114 | 115 | ```ruby 116 | # Parse a standard domain name 117 | PublicSuffix.domain("www.google.com") 118 | # => "google.com" 119 | 120 | # Parse a fully qualified domain name 121 | PublicSuffix.domain("www.google.com.") 122 | # => "google.com" 123 | ``` 124 | 125 | ## Private domains 126 | 127 | This library has support for switching off support for private (non-ICANN). 128 | 129 | ```ruby 130 | # Extract a domain including private domains (by default) 131 | PublicSuffix.domain("something.blogspot.com") 132 | # => "something.blogspot.com" 133 | 134 | # Extract a domain excluding private domains 135 | PublicSuffix.domain("something.blogspot.com", ignore_private: true) 136 | # => "blogspot.com" 137 | 138 | # It also works for #parse and #valid? 139 | PublicSuffix.parse("something.blogspot.com", ignore_private: true) 140 | PublicSuffix.valid?("something.blogspot.com", ignore_private: true) 141 | ``` 142 | 143 | If you don't care about private domains at all, it's more efficient to exclude them when the list is parsed: 144 | 145 | ```ruby 146 | # Disable support for private TLDs 147 | PublicSuffix::List.default = PublicSuffix::List.parse(File.read(PublicSuffix::List::DEFAULT_LIST_PATH), private_domains: false) 148 | # => "blogspot.com" 149 | PublicSuffix.domain("something.blogspot.com") 150 | # => "blogspot.com" 151 | ``` 152 | 153 | ## Add domain to list 154 | 155 | If you want to manually add a domain to the list just run: 156 | 157 | ```ruby 158 | PublicSuffix::List.default << PublicSuffix::Rule.factory('onmicrosoft.com') 159 | ``` 160 | 161 | ## What is the Public Suffix List? 162 | 163 | The [Public Suffix List](https://publicsuffix.org) is a cross-vendor initiative to provide an accurate list of domain name suffixes. 164 | 165 | The Public Suffix List is an initiative of the Mozilla Project, but is maintained as a community resource. It is available for use in any software, but was originally created to meet the needs of browser manufacturers. 166 | 167 | A "public suffix" is one under which Internet users can directly register names. Some examples of public suffixes are ".com", ".co.uk" and "pvt.k12.wy.us". The Public Suffix List is a list of all known public suffixes. 168 | 169 | 170 | ## Why the Public Suffix List is better than any available Regular Expression parser? 171 | 172 | Previously, browsers used an algorithm which basically only denied setting wide-ranging cookies for top-level domains with no dots (e.g. com or org). However, this did not work for top-level domains where only third-level registrations are allowed (e.g. co.uk). In these cases, websites could set a cookie for co.uk which will be passed onto every website registered under co.uk. 173 | 174 | Clearly, this was a security risk as it allowed websites other than the one setting the cookie to read it, and therefore potentially extract sensitive information. 175 | 176 | Since there is no algorithmic method of finding the highest level at which a domain may be registered for a particular top-level domain (the policies differ with each registry), the only method is to create a list of all top-level domains and the level at which domains can be registered. This is the aim of the effective TLD list. 177 | 178 | As well as being used to prevent cookies from being set where they shouldn't be, the list can also potentially be used for other applications where the registry controlled and privately controlled parts of a domain name need to be known, for example when grouping by top-level domains. 179 | 180 | Source: https://wiki.mozilla.org/Public_Suffix_List 181 | 182 | Not convinced yet? Check out [this real world example](https://stackoverflow.com/q/288810/123527). 183 | 184 | 185 | ## Does PublicSuffix make requests to Public Suffix List website? 186 | 187 | No. PublicSuffix comes with a bundled list. It does not make any HTTP requests to parse or validate a domain. 188 | 189 | 190 | ## Support 191 | 192 | Library documentation is auto-generated from the [README](https://github.com/weppos/publicsuffix-ruby/blob/master/README.md) and the source code, and it's available at https://rubydoc.info/gems/public_suffix. 193 | 194 | - The PublicSuffix bug tracker is here: https://github.com/weppos/publicsuffix-ruby/issues 195 | - The PublicSuffix code repository is here: https://github.com/weppos/publicsuffix-ruby. Contributions are welcome! Please include tests and/or feature coverage for every patch, and create a topic branch for every separate change you make. 196 | 197 | [Consider subscribing to Tidelift which provides Enterprise support for this project](https://tidelift.com/subscription/pkg/rubygems-public-suffix?utm_source=rubygems-public-suffix&utm_medium=referral&utm_campaign=readme) as part of the Tidelift Subscription. Tidelift subscriptions also help the maintainers by funding the project, which in turn allows us to ship releases, bugfixes, and security updates more often. 198 | 199 | 200 | ## Security and Vulnerability Reporting 201 | 202 | Full information and description of our security policy please visit [`SECURITY.md`](SECURITY.md) 203 | 204 | 205 | ## Changelog 206 | 207 | See the [CHANGELOG.md](CHANGELOG.md) file for details. 208 | 209 | 210 | ## License 211 | 212 | Copyright (c) 2009-2022 Simone Carletti. This is Free Software distributed under the MIT license. 213 | 214 | The [Public Suffix List source](https://publicsuffix.org/list/) is subject to the terms of the Mozilla Public License, v. 2.0. 215 | 216 | ## Definitions 217 | 218 | tld = Top level domain, this is in reference to the last segment of a domain, sometimes the part that is directly after the "dot" symbol. For example, `mozilla.org`, the `.org` portion is the tld. 219 | 220 | sld = Second level domain, a domain that is directly below a top-level domain. For example, in `https://www.mozilla.org/en-US/`, `mozilla` is the second-level domain of the .org tld. 221 | 222 | trd = Transit routing domain, or known as a subdomain. This is the part of the domain that is before the sld or root domain. For example, in `https://www.mozilla.org/en-US/`, `www` is the trd. 223 | 224 | FQDN = Fully Qualified Domain Names, are domain names that are written with the hostname and the domain name, and include the top-level domain, the format looks like `[hostname].[domain].[tld].` for ex. `[www].[mozilla].[org]`. 225 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "bundler/gem_tasks" 4 | 5 | # By default, run tests and linter. 6 | task default: [:test, :rubocop] 7 | 8 | 9 | require "rake/testtask" 10 | 11 | Rake::TestTask.new do |t| 12 | t.libs = %w[lib test] 13 | t.pattern = "test/**/*_test.rb" 14 | t.verbose = !ENV["VERBOSE"].nil? 15 | t.warning = !ENV["WARNING"].nil? 16 | end 17 | 18 | require "rubocop/rake_task" 19 | 20 | RuboCop::RakeTask.new 21 | 22 | 23 | require "yard" 24 | require "yard/rake/yardoc_task" 25 | 26 | YARD::Rake::YardocTask.new(:yardoc) do |y| 27 | y.options = ["--output-dir", "yardoc"] 28 | end 29 | 30 | CLOBBER.include "yardoc" 31 | 32 | 33 | task :benchmarks do 34 | Dir["benchmarks/bm_*.rb"].each do |file| 35 | sh "ruby #{file}" 36 | end 37 | end 38 | task default: [:benchmarks] if ENV["BENCHMARKS"] == "1" 39 | 40 | 41 | desc "Downloads the Public Suffix List file from the repository and stores it locally." 42 | task :"update-list" do 43 | require "net/http" 44 | 45 | definition_url = "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat" 46 | 47 | File.open("data/list.txt", "w+") do |f| 48 | response = Net::HTTP.get_response(URI.parse(definition_url)) 49 | response.body 50 | f.write(response.body) 51 | end 52 | end 53 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | Security updates are provided only for the current minor version. 6 | 7 | If you are using a previous minor version, we recommend to upgrade to the current minor version. 8 | This project uses [semantic versioning](https://semver.org/), therefore you can upgrade to a more recent minor version without incurring into breaking changes. 9 | 10 | Exceptionally, we may support previous minor versions upon request if there are significant reasons preventing to immediately switch the latest minor version. 11 | 12 | Older major versions are no longer supported. 13 | 14 | 15 | ## Reporting a Vulnerability 16 | 17 | To make a report, please email weppos@weppos.net. 18 | 19 | Please consider encrypting your report with GPG using the key [0x420da82a989398df](https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x420da82a989398df). 20 | 21 | ``` 22 | -----BEGIN PGP PUBLIC KEY BLOCK----- 23 | 24 | xsBNBE/QiI0BCACtBbjJnJIzaLb4NfjaljzT/+dvodst+wyDRE8Vwc6ujwboZjr2 25 | 0QwXScNzObPazyvkSZVh3g6PveneeSD0dSw2XDqKbbtLMg/Ss12yqXJfjavH/zjk 26 | 6Xq+nnbSnxBPzwFAAEaEFIc6H6BygJ7zHPP5WEY5QIMqifEAX//aBqHi4GXHJiHE 27 | 237Zqufdry23jBYjY7wGXAa11VsU9Iwqh6LPB9/hc1KtzjAuvvm5ufeT/iVjxGQX 28 | te1OZZk6n8xSVYeLsn97PfgYs0yauhexwD9dG7FbRCB379JxPRn5akr391qXcVOG 29 | ZA3yBXUSPOL6D1+TS1S0su5zbw2AEp4+z3SpABEBAAHNIlNpbW9uZSBDYXJsZXR0 30 | aSA8d2VwcG9zQGdtYWlsLmNvbT7CwHcEEwEKACEFAlXH0UQCGy8FCwkIBwMFFQoJ 31 | CAsFFgIDAQACHgECF4AACgkQQg2oKpiTmN9BOQf/UHd+bmww71MkbS38KkowDu+0 32 | 1VH35aL8sFcAMUSEA4I5oPWZoBtYYPGpALLxtrSNW+SCnmmeCQVfVmLedUVHwDZo 33 | TS4qiynpqnz+Cnq4KRC8VMIyaFoiT5Vg6MLtau8hJtqZn1Wv68g0nXuprsCuf9vs 34 | z7DDZ36z8em6OJQJQ/FQ4BGogzyYHa90cJnIM6BeLiRUUpFTl1tHLlw4JFUNi8sx 35 | 6VQ1/nhcr3OyskAix5TytRnJ8uIn22m25GGdTF2WQPNfkWJQVT4ZDbCxT20acRp0 36 | l3x1DAk3Eel8gOKzgOboB3bkI5/l1XZvNL0YWGZeyfp8I7ZqpXg/m4qLDkYU2cLA 37 | egQTAQoAJAIbLwULCQgHAwUVCgkICwUWAgMBAAIeAQIXgAUCVf6KvAIZAQAKCRBC 38 | DagqmJOY34ABB/9WbNAh0l07UN1ePfVm6Brg2Yt8k6Q2lIRUG2xAeQj/+Kx/4lAL 39 | oY6F0jJ44kIDZqZdNA0QIqYzZRBV4iW+cQrsBUUyM+chiA3RuOsDG18sfvkyPvRy 40 | ecOVubHCN+nK2GKy1oHQkCpVFIeetr0ugB/j+xNDKJ3Oa5dGBKF29ZH5Pxg7cqwH 41 | cdkhBGMpPbBYq5pJtYGggqypELzFTG292StbtV837Eze+clWRTKtMBOHke/oKBCr 42 | YYic2fmipGC9XUiqvMEMAKYq5WWWXIlcSVSnBDdxq41tXjKK4XMVgoOboZCcNFvh 43 | 0NxuDQATk1YruRZOS4SpBPXykEA1pK/zm3WmzSNTaW1vbmUgQ2FybGV0dGkgPHdl 44 | cHBvc0B3ZXBwb3MubmV0PsLAeQQTAQIAIwUCT9CIjQIbLwcLCQgHAwIBBhUIAgkK 45 | CwQWAgMBAh4BAheAAAoJEEINqCqYk5jfGWcH/Ax3EhAckGeCqNYE5BTx94bKB1LL 46 | vUjeUoImMtGGFxQu2jNOAjtpuyjihm9uHBZ+dxaxHHrhE11f+0sDcwvW8qtKEzOs 47 | GESr01VqTaVFS2JOEHhLphXseaLXJe32Osz0kHCZmrz1fCwv3b8QuWBifn8oVzcV 48 | vrE7lGC6pGwaiUvMsvA++RUquTlNVlh8uRrqcQCU8Ne9lSoDWHlUJes5s4FoCh3R 49 | oVBcKPsx3m/P9+GlEgTDqYP+WU3sfSfJYERH0r0NAYP96m2e7UQrqdgvMTVVDkPB 50 | UB9efZzgkL7u9IAqmLU2klSGdEZnJ8t1AsjEyHXMztC7ICUhRFCeXHdTNhHCwHwE 51 | EwEKACYCGy8HCwkIBwMCAQYVCAIJCgsEFgIDAQIeAQIXgAUCVcfRaQIZAQAKCRBC 52 | DagqmJOY31y1B/41I/SsWwDqJP/Y3LzzatGmIv/gy+LkJBBTr/NV0NYzKV2XJ1BG 53 | ese2ZE4tKKdG4HDwF+IwFLBHcPZRv358IwwTRPnzeO23mxpTYAnRCdg/pcaYIJ9r 54 | OxIOP+R52YbgGrNKcezVA+7TY9za072P7Bk85jTM2FNfqevaf/YQ4GRcGLQ3JI8N 55 | tBUdvrOEETDpR0QFTr22Wv1C7UfPDsSf7ZUM7zJ38CmDji8JSlr6y75/LYSY50BB 56 | 8EHb03QxyePe98A3WzvOoqamiCIe9bRzH5IqRAtJYDX8cK4PZmp43bQhrjdjawCc 57 | AU/OY9iz+zCw00+b6CNiRb59N+OwpNJh5iNNwsB5BBMBCgAjAhsvBwsJCAcDAgEG 58 | FQgCCQoLBBYCAwECHgECF4AFAlX+iq0ACgkQQg2oKpiTmN/z2gf/VbcQHgTlXFYa 59 | Sq/dE7S54uGFrdzHOV3IJyl+ByMwVoKn6zdpksRoyt7jPV3RonrUO7jEcrt7VKCU 60 | 2KC7/MZMDoUsn9BXXTtUk+uTCNh8qllR0Fo/FvWM9RJKmcDMKwAJwcKIgbfUBJGx 61 | 1N6pP2DUc+YCnEerRbnQ1DWJUM7BaOEN6bvPxuGblPst1l6S5VktFj3gZGYItHrs 62 | pit5pesILP8K6B6VCNP2WXXYvYQo7yyYcG8WBWXin8/SdNwU68lUbfhhQVIKv6LU 63 | h0wvgG97NsBPrFbij0K6O63FufnNr9WLMZhAzi0h6gNK2HKAyw9AZNKpPccwg+mX 64 | Huc/4CPRlM0uU2ltb25lIENhcmxldHRpIDxzaW1vbmUuY2FybGV0dGlAZG5zaW1w 65 | bGUuY29tPsLAdwQTAQoAIQUCVh4ipAIbLwULCQgHAwUVCgkICwUWAgMBAAIeAQIX 66 | gAAKCRBCDagqmJOY329iCACpOY5SV7hwOZ8VqmRfxRoHQFQe9Owr+hD3eL0AKZaJ 67 | V918dCPrrxbAmwwMAC8pS8J4CmrrTR27kxcUgVwcfyydFPrgST5pg+H7UTrBR045 68 | 4Npw1+m99I2Pyyl3oaym4lKJFbp2c2DGODEzTg8kKfjk0cb8bd+MJrXqFyod1z5r 69 | 0pfexwaLVt1Hz+ZsmFIPO1ISHYBPV8OkpL8Kgb8WtY6REntgNjfcmtHNi0VWQ7+N 70 | vgeYqdhscX8c9ROe26BiiiGXphRlAsCU/VLHOJkzoW3f9QLy4z01Xj/7OaD0JkHS 71 | HrES1ye3ZDxnjnTRdh4U8ntJ+L+xnePcFQA2t0eCbPwIzSZTaW1vbmUgQ2FybGV0 72 | dGkgPHNpbW9uZUBjYXJsZXR0aS5uYW1lPsLAdwQTAQoAIQUCVf7gmwIbLwULCQgH 73 | AwUVCgkICwUWAgMBAAIeAQIXgAAKCRBCDagqmJOY37L+B/45pWT3wgm43+kzHVOT 74 | j63m4zmRb53TGZToRSxz3acyuVSuqU9Tv010F0ZV9ccb0NDeN+88s9tEisuoO0Rz 75 | 5vhC8AtwRUyR3ADE9pBtvvxT+4R9y8yYNTCIX45VPG9ZPp9+7i+XCdKtz30KIV7r 76 | smktd2FrK16r/KUN8+03iZSgzQ9lsTmXK5L7zH/f3Tqhbfvybr4+M71KGnSoP+iP 77 | vwfsoBb5rhijQLOykTb+VzdDpHQbupwxwm/3S4nsA4U6tonIywlJgBDSjgDjQj0i 78 | Ez+Db2Wt59y6LoksRQogvJqm0nuxFUWMZc47zdhsRnqmxUYTNpKaJPWc6pfxsQPK 79 | ZvTjzsBNBE/QiI0BCACsaNbG6kyKJBWL5jPhebsijk8PCfSHte1jNCA5l/NvaImZ 80 | 6ORq9f8S9MWlYxmzyUkVJaWrv+9p5zmjwcaegjerj6ggjPDEXlZG41Z4YE1/R8pf 81 | wkSvrkLziBxZDB1aYplg8kgXkaIf2yi2FrMPSi04sjvQbBSCcIJeh6+vGK8tIJTn 82 | e0tQbEvRorTwBAPAFlpx/bdk1wZYu11vFKbckhKWou7f8XSdn9ng9cY5uK+xBlFU 83 | 2ORgL1ygeIoY9uRvNZG2ncvCvxUPgOqbo31R8KPyvV4rNNvGBOfxQER9LbieBF2I 84 | 5I1gpyboGWKcXu1eV7tOpjtW6LHt+6NHhE6L1Lw1ABEBAAHCwX4EGAECAAkFAk/Q 85 | iI0CGy4BKQkQQg2oKpiTmN/AXSAEGQECAAYFAk/QiI0ACgkQcBROh493BN9hdwf9 86 | GjiF1GcQN+3TZkXdr2WY0AlbcA/wBp6+ShnqcoU5XLuA0RY3+rWGuaSc2buLke6Y 87 | 2MhMAYcgmPdG+WTBoW5dWQGXBZ1IHYVR8HLGaF+Vate1MofE1BNHXhnilIMMfH4G 88 | Tcr3Z3/FaSk9OdHlyiE/Jo7++8PQ+auHVyjtqry+/ysAnyr+lnCn+K4E0PQ1fYpP 89 | fiawKtfSqk9h6HjjMyx9Adrz+ljXh+NyVqYZUfRytjgO+v+dAQmMczT1EawLTdX+ 90 | trx1tHR549pEey7in5QKsje3GLH4zq4mCdWBlivQxmmmlvR07DysLADMbcpjKK2g 91 | utfzygZHCU9hWGR3wbWZ7lXjB/0ZzutNaNYzSCkiC8PIWH1bG+TJO9pslHwP+aBJ 92 | NGAmcwyOH9Bub2CSXikQFZNUmVRwtl7mN4bVAHI8zbMd6xdlX22yDgQei54dPXDw 93 | UYsvGE4zmrD97he1EYcIOKMFHzlJNcWK+uR7lEq6mv7SFGnBr8qTYZRi1bySRgwd 94 | UORuDV12GKTen9WectKtepW0fgYSz+udbDKQyyRef+7xGtCErWRL7f1qr8xm60da 95 | +gSwyD/WkPTY8SP2mdq4u+6m4dWS26kKoENwuL7jUktl/C/EG7NmUKURbXG8lmeu 96 | q59MIs/Fb3SgaO+zN2FZTYp6dyRJHbeEz55JdOu6F+6ihZYH 97 | =j6Xr 98 | -----END PGP PUBLIC KEY BLOCK----- 99 | ``` 100 | 101 | 102 | ## Tracking Security Updates 103 | 104 | Information about security vulnerabilities are published in the [Security Advisories](https://github.com/weppos/publicsuffix-ruby/security/advisories) page. 105 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | require "bundler/setup" 5 | require "public_suffix" 6 | 7 | # You can add fixtures and/or initialization code here to make experimenting 8 | # with your gem easier. You can also use a different console, if you like. 9 | 10 | # (If you use this, don't forget to add pry to your Gemfile!) 11 | # require "pry" 12 | # Pry.start 13 | 14 | require "irb" 15 | IRB.start 16 | -------------------------------------------------------------------------------- /lib/public_suffix.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # = Public Suffix 4 | # 5 | # Domain name parser based on the Public Suffix List. 6 | # 7 | # Copyright (c) 2009-2022 Simone Carletti 8 | 9 | require_relative "public_suffix/domain" 10 | require_relative "public_suffix/version" 11 | require_relative "public_suffix/errors" 12 | require_relative "public_suffix/rule" 13 | require_relative "public_suffix/list" 14 | 15 | # PublicSuffix is a Ruby domain name parser based on the Public Suffix List. 16 | # 17 | # The [Public Suffix List](https://publicsuffix.org) is a cross-vendor initiative 18 | # to provide an accurate list of domain name suffixes. 19 | # 20 | # The Public Suffix List is an initiative of the Mozilla Project, 21 | # but is maintained as a community resource. It is available for use in any software, 22 | # but was originally created to meet the needs of browser manufacturers. 23 | module PublicSuffix 24 | 25 | DOT = "." 26 | BANG = "!" 27 | STAR = "*" 28 | 29 | # Parses +name+ and returns the {PublicSuffix::Domain} instance. 30 | # 31 | # @example Parse a valid domain 32 | # PublicSuffix.parse("google.com") 33 | # # => # 34 | # 35 | # @example Parse a valid subdomain 36 | # PublicSuffix.parse("www.google.com") 37 | # # => # 38 | # 39 | # @example Parse a fully qualified domain 40 | # PublicSuffix.parse("google.com.") 41 | # # => # 42 | # 43 | # @example Parse a fully qualified domain (subdomain) 44 | # PublicSuffix.parse("www.google.com.") 45 | # # => # 46 | # 47 | # @example Parse an invalid (unlisted) domain 48 | # PublicSuffix.parse("x.yz") 49 | # # => # 50 | # 51 | # @example Parse an invalid (unlisted) domain with strict checking (without applying the default * rule) 52 | # PublicSuffix.parse("x.yz", default_rule: nil) 53 | # # => PublicSuffix::DomainInvalid: `x.yz` is not a valid domain 54 | # 55 | # @example Parse an URL (not supported, only domains) 56 | # PublicSuffix.parse("http://www.google.com") 57 | # # => PublicSuffix::DomainInvalid: http://www.google.com is not expected to contain a scheme 58 | # 59 | # 60 | # @param name [#to_s] The domain name or fully qualified domain name to parse. 61 | # @param list [PublicSuffix::List] The rule list to search, defaults to the default {PublicSuffix::List} 62 | # @param ignore_private [Boolean] 63 | # @return [PublicSuffix::Domain] 64 | # 65 | # @raise [PublicSuffix::DomainInvalid] If domain is not a valid domain. 66 | # @raise [PublicSuffix::DomainNotAllowed] If a rule for +domain+ is found, but the rule doesn't allow +domain+. 67 | def self.parse(name, list: List.default, default_rule: list.default_rule, ignore_private: false) 68 | what = normalize(name) 69 | raise what if what.is_a?(DomainInvalid) 70 | 71 | rule = list.find(what, default: default_rule, ignore_private: ignore_private) 72 | 73 | # rubocop:disable Style/IfUnlessModifier 74 | if rule.nil? 75 | raise DomainInvalid, "`#{what}` is not a valid domain" 76 | end 77 | if rule.decompose(what).last.nil? 78 | raise DomainNotAllowed, "`#{what}` is not allowed according to Registry policy" 79 | end 80 | 81 | # rubocop:enable Style/IfUnlessModifier 82 | 83 | decompose(what, rule) 84 | end 85 | 86 | # Checks whether +domain+ is assigned and allowed, without actually parsing it. 87 | # 88 | # This method doesn't care whether domain is a domain or subdomain. 89 | # The validation is performed using the default {PublicSuffix::List}. 90 | # 91 | # @example Validate a valid domain 92 | # PublicSuffix.valid?("example.com") 93 | # # => true 94 | # 95 | # @example Validate a valid subdomain 96 | # PublicSuffix.valid?("www.example.com") 97 | # # => true 98 | # 99 | # @example Validate a not-listed domain 100 | # PublicSuffix.valid?("example.tldnotlisted") 101 | # # => true 102 | # 103 | # @example Validate a not-listed domain with strict checking (without applying the default * rule) 104 | # PublicSuffix.valid?("example.tldnotlisted") 105 | # # => true 106 | # PublicSuffix.valid?("example.tldnotlisted", default_rule: nil) 107 | # # => false 108 | # 109 | # @example Validate a fully qualified domain 110 | # PublicSuffix.valid?("google.com.") 111 | # # => true 112 | # PublicSuffix.valid?("www.google.com.") 113 | # # => true 114 | # 115 | # @example Check an URL (which is not a valid domain) 116 | # PublicSuffix.valid?("http://www.example.com") 117 | # # => false 118 | # 119 | # 120 | # @param name [#to_s] The domain name or fully qualified domain name to validate. 121 | # @param ignore_private [Boolean] 122 | # @return [Boolean] 123 | def self.valid?(name, list: List.default, default_rule: list.default_rule, ignore_private: false) 124 | what = normalize(name) 125 | return false if what.is_a?(DomainInvalid) 126 | 127 | rule = list.find(what, default: default_rule, ignore_private: ignore_private) 128 | 129 | !rule.nil? && !rule.decompose(what).last.nil? 130 | end 131 | 132 | # Attempt to parse the name and returns the domain, if valid. 133 | # 134 | # This method doesn't raise. Instead, it returns nil if the domain is not valid for whatever reason. 135 | # 136 | # @param name [#to_s] The domain name or fully qualified domain name to parse. 137 | # @param list [PublicSuffix::List] The rule list to search, defaults to the default {PublicSuffix::List} 138 | # @param ignore_private [Boolean] 139 | # @return [String] 140 | def self.domain(name, **options) 141 | parse(name, **options).domain 142 | rescue PublicSuffix::Error 143 | nil 144 | end 145 | 146 | 147 | # private 148 | 149 | def self.decompose(name, rule) 150 | left, right = rule.decompose(name) 151 | 152 | parts = left.split(DOT) 153 | # If we have 0 parts left, there is just a tld and no domain or subdomain 154 | # If we have 1 part left, there is just a tld, domain and not subdomain 155 | # If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain 156 | tld = right 157 | sld = parts.empty? ? nil : parts.pop 158 | trd = parts.empty? ? nil : parts.join(DOT) 159 | 160 | Domain.new(tld, sld, trd) 161 | end 162 | 163 | # Pretend we know how to deal with user input. 164 | def self.normalize(name) 165 | name = name.to_s.dup 166 | name.strip! 167 | name.chomp!(DOT) 168 | name.downcase! 169 | 170 | return DomainInvalid.new("Name is blank") if name.empty? 171 | return DomainInvalid.new("Name starts with a dot") if name.start_with?(DOT) 172 | return DomainInvalid.new(format("%s is not expected to contain a scheme", name)) if name.include?("://") 173 | 174 | name 175 | end 176 | 177 | end 178 | -------------------------------------------------------------------------------- /lib/public_suffix/domain.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # = Public Suffix 4 | # 5 | # Domain name parser based on the Public Suffix List. 6 | # 7 | # Copyright (c) 2009-2022 Simone Carletti 8 | 9 | module PublicSuffix 10 | 11 | # Domain represents a domain name, composed by a TLD, SLD and TRD. 12 | class Domain 13 | 14 | # Splits a string into the labels, that is the dot-separated parts. 15 | # 16 | # The input is not validated, but it is assumed to be a valid domain name. 17 | # 18 | # @example 19 | # 20 | # name_to_labels('example.com') 21 | # # => ['example', 'com'] 22 | # 23 | # name_to_labels('example.co.uk') 24 | # # => ['example', 'co', 'uk'] 25 | # 26 | # @param name [String, #to_s] The domain name to split. 27 | # @return [Array] 28 | def self.name_to_labels(name) 29 | name.to_s.split(DOT) 30 | end 31 | 32 | 33 | attr_reader :tld, :sld, :trd 34 | 35 | # Creates and returns a new {PublicSuffix::Domain} instance. 36 | # 37 | # @overload initialize(tld) 38 | # Initializes with a +tld+. 39 | # @param [String] tld The TLD (extension) 40 | # @overload initialize(tld, sld) 41 | # Initializes with a +tld+ and +sld+. 42 | # @param [String] tld The TLD (extension) 43 | # @param [String] sld The TRD (domain) 44 | # @overload initialize(tld, sld, trd) 45 | # Initializes with a +tld+, +sld+ and +trd+. 46 | # @param [String] tld The TLD (extension) 47 | # @param [String] sld The SLD (domain) 48 | # @param [String] trd The TRD (subdomain) 49 | # 50 | # @yield [self] Yields on self. 51 | # @yieldparam [PublicSuffix::Domain] self The newly creates instance 52 | # 53 | # @example Initialize with a TLD 54 | # PublicSuffix::Domain.new("com") 55 | # # => # 56 | # 57 | # @example Initialize with a TLD and SLD 58 | # PublicSuffix::Domain.new("com", "example") 59 | # # => # 60 | # 61 | # @example Initialize with a TLD, SLD and TRD 62 | # PublicSuffix::Domain.new("com", "example", "wwww") 63 | # # => # 64 | # 65 | def initialize(*args) 66 | @tld, @sld, @trd = args 67 | yield(self) if block_given? 68 | end 69 | 70 | # Returns a string representation of this object. 71 | # 72 | # @return [String] 73 | def to_s 74 | name 75 | end 76 | 77 | # Returns an array containing the domain parts. 78 | # 79 | # @return [Array] 80 | # 81 | # @example 82 | # 83 | # PublicSuffix::Domain.new("google.com").to_a 84 | # # => [nil, "google", "com"] 85 | # 86 | # PublicSuffix::Domain.new("www.google.com").to_a 87 | # # => [nil, "google", "com"] 88 | # 89 | def to_a 90 | [@trd, @sld, @tld] 91 | end 92 | 93 | # Returns the full domain name. 94 | # 95 | # @return [String] 96 | # 97 | # @example Gets the domain name of a domain 98 | # PublicSuffix::Domain.new("com", "google").name 99 | # # => "google.com" 100 | # 101 | # @example Gets the domain name of a subdomain 102 | # PublicSuffix::Domain.new("com", "google", "www").name 103 | # # => "www.google.com" 104 | # 105 | def name 106 | [@trd, @sld, @tld].compact.join(DOT) 107 | end 108 | 109 | # Returns a domain-like representation of this object 110 | # if the object is a {#domain?}, nil otherwise. 111 | # 112 | # PublicSuffix::Domain.new("com").domain 113 | # # => nil 114 | # 115 | # PublicSuffix::Domain.new("com", "google").domain 116 | # # => "google.com" 117 | # 118 | # PublicSuffix::Domain.new("com", "google", "www").domain 119 | # # => "www.google.com" 120 | # 121 | # This method doesn't validate the input. It handles the domain 122 | # as a valid domain name and simply applies the necessary transformations. 123 | # 124 | # This method returns a FQD, not just the domain part. 125 | # To get the domain part, use #sld (aka second level domain). 126 | # 127 | # PublicSuffix::Domain.new("com", "google", "www").domain 128 | # # => "google.com" 129 | # 130 | # PublicSuffix::Domain.new("com", "google", "www").sld 131 | # # => "google" 132 | # 133 | # @see #domain? 134 | # @see #subdomain 135 | # 136 | # @return [String] 137 | def domain 138 | [@sld, @tld].join(DOT) if domain? 139 | end 140 | 141 | # Returns a subdomain-like representation of this object 142 | # if the object is a {#subdomain?}, nil otherwise. 143 | # 144 | # PublicSuffix::Domain.new("com").subdomain 145 | # # => nil 146 | # 147 | # PublicSuffix::Domain.new("com", "google").subdomain 148 | # # => nil 149 | # 150 | # PublicSuffix::Domain.new("com", "google", "www").subdomain 151 | # # => "www.google.com" 152 | # 153 | # This method doesn't validate the input. It handles the domain 154 | # as a valid domain name and simply applies the necessary transformations. 155 | # 156 | # This method returns a FQD, not just the subdomain part. 157 | # To get the subdomain part, use #trd (aka third level domain). 158 | # 159 | # PublicSuffix::Domain.new("com", "google", "www").subdomain 160 | # # => "www.google.com" 161 | # 162 | # PublicSuffix::Domain.new("com", "google", "www").trd 163 | # # => "www" 164 | # 165 | # @see #subdomain? 166 | # @see #domain 167 | # 168 | # @return [String] 169 | def subdomain 170 | [@trd, @sld, @tld].join(DOT) if subdomain? 171 | end 172 | 173 | # Checks whether self looks like a domain. 174 | # 175 | # This method doesn't actually validate the domain. 176 | # It only checks whether the instance contains 177 | # a value for the {#tld} and {#sld} attributes. 178 | # 179 | # @example 180 | # 181 | # PublicSuffix::Domain.new("com").domain? 182 | # # => false 183 | # 184 | # PublicSuffix::Domain.new("com", "google").domain? 185 | # # => true 186 | # 187 | # PublicSuffix::Domain.new("com", "google", "www").domain? 188 | # # => true 189 | # 190 | # # This is an invalid domain, but returns true 191 | # # because this method doesn't validate the content. 192 | # PublicSuffix::Domain.new("com", nil).domain? 193 | # # => true 194 | # 195 | # @see #subdomain? 196 | # 197 | # @return [Boolean] 198 | def domain? 199 | !(@tld.nil? || @sld.nil?) 200 | end 201 | 202 | # Checks whether self looks like a subdomain. 203 | # 204 | # This method doesn't actually validate the subdomain. 205 | # It only checks whether the instance contains 206 | # a value for the {#tld}, {#sld} and {#trd} attributes. 207 | # If you also want to validate the domain, 208 | # use {#valid_subdomain?} instead. 209 | # 210 | # @example 211 | # 212 | # PublicSuffix::Domain.new("com").subdomain? 213 | # # => false 214 | # 215 | # PublicSuffix::Domain.new("com", "google").subdomain? 216 | # # => false 217 | # 218 | # PublicSuffix::Domain.new("com", "google", "www").subdomain? 219 | # # => true 220 | # 221 | # # This is an invalid domain, but returns true 222 | # # because this method doesn't validate the content. 223 | # PublicSuffix::Domain.new("com", "example", nil).subdomain? 224 | # # => true 225 | # 226 | # @see #domain? 227 | # 228 | # @return [Boolean] 229 | def subdomain? 230 | !(@tld.nil? || @sld.nil? || @trd.nil?) 231 | end 232 | 233 | end 234 | 235 | end 236 | -------------------------------------------------------------------------------- /lib/public_suffix/errors.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # = Public Suffix 4 | # 5 | # Domain name parser based on the Public Suffix List. 6 | # 7 | # Copyright (c) 2009-2022 Simone Carletti 8 | 9 | module PublicSuffix 10 | 11 | class Error < StandardError 12 | end 13 | 14 | # Raised when trying to parse an invalid name. 15 | # A name is considered invalid when no rule is found in the definition list. 16 | # 17 | # @example 18 | # 19 | # PublicSuffix.parse("nic.test") 20 | # # => PublicSuffix::DomainInvalid 21 | # 22 | # PublicSuffix.parse("http://www.nic.it") 23 | # # => PublicSuffix::DomainInvalid 24 | # 25 | class DomainInvalid < Error 26 | end 27 | 28 | # Raised when trying to parse a name that matches a suffix. 29 | # 30 | # @example 31 | # 32 | # PublicSuffix.parse("nic.do") 33 | # # => PublicSuffix::DomainNotAllowed 34 | # 35 | # PublicSuffix.parse("www.nic.do") 36 | # # => PublicSuffix::Domain 37 | # 38 | class DomainNotAllowed < DomainInvalid 39 | end 40 | 41 | end 42 | -------------------------------------------------------------------------------- /lib/public_suffix/list.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # = Public Suffix 4 | # 5 | # Domain name parser based on the Public Suffix List. 6 | # 7 | # Copyright (c) 2009-2022 Simone Carletti 8 | 9 | module PublicSuffix 10 | 11 | # A {PublicSuffix::List} is a collection of one 12 | # or more {PublicSuffix::Rule}. 13 | # 14 | # Given a {PublicSuffix::List}, 15 | # you can add or remove {PublicSuffix::Rule}, 16 | # iterate all items in the list or search for the first rule 17 | # which matches a specific domain name. 18 | # 19 | # # Create a new list 20 | # list = PublicSuffix::List.new 21 | # 22 | # # Push two rules to the list 23 | # list << PublicSuffix::Rule.factory("it") 24 | # list << PublicSuffix::Rule.factory("com") 25 | # 26 | # # Get the size of the list 27 | # list.size 28 | # # => 2 29 | # 30 | # # Search for the rule matching given domain 31 | # list.find("example.com") 32 | # # => # 33 | # list.find("example.org") 34 | # # => nil 35 | # 36 | # You can create as many {PublicSuffix::List} you want. 37 | # The {PublicSuffix::List.default} rule list is used 38 | # to tokenize and validate a domain. 39 | # 40 | class List 41 | 42 | DEFAULT_LIST_PATH = File.expand_path("../../data/list.txt", __dir__) 43 | 44 | # Gets the default rule list. 45 | # 46 | # Initializes a new {PublicSuffix::List} parsing the content 47 | # of {PublicSuffix::List.default_list_content}, if required. 48 | # 49 | # @return [PublicSuffix::List] 50 | def self.default(**options) 51 | @default ||= parse(File.read(DEFAULT_LIST_PATH), **options) 52 | end 53 | 54 | # Sets the default rule list to +value+. 55 | # 56 | # @param value [PublicSuffix::List] the new list 57 | # @return [PublicSuffix::List] 58 | def self.default=(value) 59 | @default = value 60 | end 61 | 62 | # Parse given +input+ treating the content as Public Suffix List. 63 | # 64 | # See http://publicsuffix.org/format/ for more details about input format. 65 | # 66 | # @param input [#each_line] the list to parse 67 | # @param private_domains [Boolean] whether to ignore the private domains section 68 | # @return [PublicSuffix::List] 69 | def self.parse(input, private_domains: true) 70 | comment_token = "//" 71 | private_token = "===BEGIN PRIVATE DOMAINS===" 72 | section = nil # 1 == ICANN, 2 == PRIVATE 73 | 74 | new do |list| 75 | input.each_line do |line| 76 | line.strip! 77 | case # rubocop:disable Style/EmptyCaseCondition 78 | 79 | # skip blank lines 80 | when line.empty? 81 | next 82 | 83 | # include private domains or stop scanner 84 | when line.include?(private_token) 85 | break if !private_domains 86 | 87 | section = 2 88 | 89 | # skip comments 90 | when line.start_with?(comment_token) # rubocop:disable Lint/DuplicateBranch 91 | next 92 | 93 | else 94 | list.add(Rule.factory(line, private: section == 2)) 95 | 96 | end 97 | end 98 | end 99 | end 100 | 101 | 102 | # Initializes an empty {PublicSuffix::List}. 103 | # 104 | # @yield [self] Yields on self. 105 | # @yieldparam [PublicSuffix::List] self The newly created instance. 106 | def initialize 107 | @rules = {} 108 | yield(self) if block_given? 109 | end 110 | 111 | 112 | # Checks whether two lists are equal. 113 | # 114 | # List one is equal to two, if two is an instance of 115 | # {PublicSuffix::List} and each +PublicSuffix::Rule::*+ 116 | # in list one is available in list two, in the same order. 117 | # 118 | # @param other [PublicSuffix::List] the List to compare 119 | # @return [Boolean] 120 | def ==(other) 121 | return false unless other.is_a?(List) 122 | 123 | equal?(other) || @rules == other.rules 124 | end 125 | alias eql? == 126 | 127 | # Iterates each rule in the list. 128 | def each(&block) 129 | Enumerator.new do |y| 130 | @rules.each do |key, node| 131 | y << entry_to_rule(node, key) 132 | end 133 | end.each(&block) 134 | end 135 | 136 | 137 | # Adds the given object to the list and optionally refreshes the rule index. 138 | # 139 | # @param rule [PublicSuffix::Rule::*] the rule to add to the list 140 | # @return [self] 141 | def add(rule) 142 | @rules[rule.value] = rule_to_entry(rule) 143 | self 144 | end 145 | alias << add 146 | 147 | # Gets the number of rules in the list. 148 | # 149 | # @return [Integer] 150 | def size 151 | @rules.size 152 | end 153 | 154 | # Checks whether the list is empty. 155 | # 156 | # @return [Boolean] 157 | def empty? 158 | @rules.empty? 159 | end 160 | 161 | # Removes all rules. 162 | # 163 | # @return [self] 164 | def clear 165 | @rules.clear 166 | self 167 | end 168 | 169 | # Finds and returns the rule corresponding to the longest public suffix for the hostname. 170 | # 171 | # @param name [#to_s] the hostname 172 | # @param default [PublicSuffix::Rule::*] the default rule to return in case no rule matches 173 | # @return [PublicSuffix::Rule::*] 174 | def find(name, default: default_rule, **options) 175 | rule = select(name, **options).inject do |l, r| 176 | return r if r.instance_of?(Rule::Exception) 177 | 178 | l.length > r.length ? l : r 179 | end 180 | rule || default 181 | end 182 | 183 | # Selects all the rules matching given hostame. 184 | # 185 | # If `ignore_private` is set to true, the algorithm will skip the rules that are flagged as 186 | # private domain. Note that the rules will still be part of the loop. 187 | # If you frequently need to access lists ignoring the private domains, 188 | # you should create a list that doesn't include these domains setting the 189 | # `private_domains: false` option when calling {.parse}. 190 | # 191 | # Note that this method is currently private, as you should not rely on it. Instead, 192 | # the public interface is {#find}. The current internal algorithm allows to return all 193 | # matching rules, but different data structures may not be able to do it, and instead would 194 | # return only the match. For this reason, you should rely on {#find}. 195 | # 196 | # @param name [#to_s] the hostname 197 | # @param ignore_private [Boolean] 198 | # @return [Array] 199 | def select(name, ignore_private: false) 200 | name = name.to_s 201 | 202 | parts = name.split(DOT).reverse! 203 | index = 0 204 | query = parts[index] 205 | rules = [] 206 | 207 | loop do 208 | match = @rules[query] 209 | rules << entry_to_rule(match, query) if !match.nil? && (ignore_private == false || match.private == false) 210 | 211 | index += 1 212 | break if index >= parts.size 213 | 214 | query = parts[index] + DOT + query 215 | end 216 | 217 | rules 218 | end 219 | private :select 220 | 221 | # Gets the default rule. 222 | # 223 | # @see PublicSuffix::Rule.default_rule 224 | # 225 | # @return [PublicSuffix::Rule::*] 226 | def default_rule 227 | PublicSuffix::Rule.default 228 | end 229 | 230 | 231 | protected 232 | 233 | attr_reader :rules 234 | 235 | 236 | private 237 | 238 | def entry_to_rule(entry, value) 239 | entry.type.new(value: value, length: entry.length, private: entry.private) 240 | end 241 | 242 | def rule_to_entry(rule) 243 | Rule::Entry.new(rule.class, rule.length, rule.private) 244 | end 245 | 246 | end 247 | end 248 | -------------------------------------------------------------------------------- /lib/public_suffix/rule.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # = Public Suffix 4 | # 5 | # Domain name parser based on the Public Suffix List. 6 | # 7 | # Copyright (c) 2009-2022 Simone Carletti 8 | 9 | module PublicSuffix 10 | 11 | # A Rule is a special object which holds a single definition 12 | # of the Public Suffix List. 13 | # 14 | # There are 3 types of rules, each one represented by a specific 15 | # subclass within the +PublicSuffix::Rule+ namespace. 16 | # 17 | # To create a new Rule, use the {PublicSuffix::Rule#factory} method. 18 | # 19 | # PublicSuffix::Rule.factory("ar") 20 | # # => # 21 | # 22 | module Rule 23 | 24 | # @api internal 25 | Entry = Struct.new(:type, :length, :private) # rubocop:disable Lint/StructNewOverride 26 | 27 | # = Abstract rule class 28 | # 29 | # This represent the base class for a Rule definition 30 | # in the {Public Suffix List}[https://publicsuffix.org]. 31 | # 32 | # This is intended to be an Abstract class 33 | # and you shouldn't create a direct instance. The only purpose 34 | # of this class is to expose a common interface 35 | # for all the available subclasses. 36 | # 37 | # * {PublicSuffix::Rule::Normal} 38 | # * {PublicSuffix::Rule::Exception} 39 | # * {PublicSuffix::Rule::Wildcard} 40 | # 41 | # ## Properties 42 | # 43 | # A rule is composed by 4 properties: 44 | # 45 | # value - A normalized version of the rule name. 46 | # The normalization process depends on rule tpe. 47 | # 48 | # Here's an example 49 | # 50 | # PublicSuffix::Rule.factory("*.google.com") 51 | # # 54 | # 55 | # ## Rule Creation 56 | # 57 | # The best way to create a new rule is passing the rule name 58 | # to the PublicSuffix::Rule.factory method. 59 | # 60 | # PublicSuffix::Rule.factory("com") 61 | # # => PublicSuffix::Rule::Normal 62 | # 63 | # PublicSuffix::Rule.factory("*.com") 64 | # # => PublicSuffix::Rule::Wildcard 65 | # 66 | # This method will detect the rule type and create an instance 67 | # from the proper rule class. 68 | # 69 | # ## Rule Usage 70 | # 71 | # A rule describes the composition of a domain name and explains how to tokenize 72 | # the name into tld, sld and trd. 73 | # 74 | # To use a rule, you first need to be sure the name you want to tokenize 75 | # can be handled by the current rule. 76 | # You can use the #match? method. 77 | # 78 | # rule = PublicSuffix::Rule.factory("com") 79 | # 80 | # rule.match?("google.com") 81 | # # => true 82 | # 83 | # rule.match?("google.com") 84 | # # => false 85 | # 86 | # Rule order is significant. A name can match more than one rule. 87 | # See the {Public Suffix Documentation}[http://publicsuffix.org/format/] 88 | # to learn more about rule priority. 89 | # 90 | # When you have the right rule, you can use it to tokenize the domain name. 91 | # 92 | # rule = PublicSuffix::Rule.factory("com") 93 | # 94 | # rule.decompose("google.com") 95 | # # => ["google", "com"] 96 | # 97 | # rule.decompose("www.google.com") 98 | # # => ["www.google", "com"] 99 | # 100 | # @abstract 101 | # 102 | class Base 103 | 104 | # @return [String] the rule definition 105 | attr_reader :value 106 | 107 | # @return [String] the length of the rule 108 | attr_reader :length 109 | 110 | # @return [Boolean] true if the rule is a private domain 111 | attr_reader :private 112 | 113 | 114 | # Initializes a new rule from the content. 115 | # 116 | # @param content [String] the content of the rule 117 | # @param private [Boolean] 118 | def self.build(content, private: false) 119 | new(value: content, private: private) 120 | end 121 | 122 | # Initializes a new rule. 123 | # 124 | # @param value [String] 125 | # @param private [Boolean] 126 | def initialize(value:, length: nil, private: false) 127 | @value = value.to_s 128 | @length = length || (@value.count(DOT) + 1) 129 | @private = private 130 | end 131 | 132 | # Checks whether this rule is equal to other. 133 | # 134 | # @param other [PublicSuffix::Rule::*] The rule to compare 135 | # @return [Boolean] true if this rule and other are instances of the same class 136 | # and has the same value, false otherwise. 137 | def ==(other) 138 | equal?(other) || (self.class == other.class && value == other.value) 139 | end 140 | alias eql? == 141 | 142 | # Checks if this rule matches +name+. 143 | # 144 | # A domain name is said to match a rule if and only if 145 | # all of the following conditions are met: 146 | # 147 | # - When the domain and rule are split into corresponding labels, 148 | # that the domain contains as many or more labels than the rule. 149 | # - Beginning with the right-most labels of both the domain and the rule, 150 | # and continuing for all labels in the rule, one finds that for every pair, 151 | # either they are identical, or that the label from the rule is "*". 152 | # 153 | # @see https://publicsuffix.org/list/ 154 | # 155 | # @example 156 | # PublicSuffix::Rule.factory("com").match?("example.com") 157 | # # => true 158 | # PublicSuffix::Rule.factory("com").match?("example.net") 159 | # # => false 160 | # 161 | # @param name [String] the domain name to check 162 | # @return [Boolean] 163 | def match?(name) 164 | # NOTE: it works because of the assumption there are no 165 | # rules like foo.*.com. If the assumption is incorrect, 166 | # we need to properly walk the input and skip parts according 167 | # to wildcard component. 168 | diff = name.chomp(value) 169 | diff.empty? || diff.end_with?(DOT) 170 | end 171 | 172 | # @abstract 173 | def parts 174 | raise NotImplementedError 175 | end 176 | 177 | # @abstract 178 | # @param domain [#to_s] The domain name to decompose 179 | # @return [Array] 180 | def decompose(*) 181 | raise NotImplementedError 182 | end 183 | 184 | end 185 | 186 | # Normal represents a standard rule (e.g. com). 187 | class Normal < Base 188 | 189 | # Gets the original rule definition. 190 | # 191 | # @return [String] The rule definition. 192 | def rule 193 | value 194 | end 195 | 196 | # Decomposes the domain name according to rule properties. 197 | # 198 | # @param domain [#to_s] The domain name to decompose 199 | # @return [Array] The array with [trd + sld, tld]. 200 | def decompose(domain) 201 | suffix = parts.join('\.') 202 | matches = domain.to_s.match(/^(.*)\.(#{suffix})$/) 203 | matches ? matches[1..2] : [nil, nil] 204 | end 205 | 206 | # dot-split rule value and returns all rule parts 207 | # in the order they appear in the value. 208 | # 209 | # @return [Array] 210 | def parts 211 | @value.split(DOT) 212 | end 213 | 214 | end 215 | 216 | # Wildcard represents a wildcard rule (e.g. *.co.uk). 217 | class Wildcard < Base 218 | 219 | # Initializes a new rule from the content. 220 | # 221 | # @param content [String] the content of the rule 222 | # @param private [Boolean] 223 | def self.build(content, private: false) 224 | new(value: content.to_s[2..], private: private) 225 | end 226 | 227 | # Initializes a new rule. 228 | # 229 | # @param value [String] 230 | # @param length [Integer] 231 | # @param private [Boolean] 232 | def initialize(value:, length: nil, private: false) 233 | super(value: value, length: length, private: private) 234 | length or @length += 1 # * counts as 1 235 | end 236 | 237 | # Gets the original rule definition. 238 | # 239 | # @return [String] The rule definition. 240 | def rule 241 | value == "" ? STAR : STAR + DOT + value 242 | end 243 | 244 | # Decomposes the domain name according to rule properties. 245 | # 246 | # @param domain [#to_s] The domain name to decompose 247 | # @return [Array] The array with [trd + sld, tld]. 248 | def decompose(domain) 249 | suffix = ([".*?"] + parts).join('\.') 250 | matches = domain.to_s.match(/^(.*)\.(#{suffix})$/) 251 | matches ? matches[1..2] : [nil, nil] 252 | end 253 | 254 | # dot-split rule value and returns all rule parts 255 | # in the order they appear in the value. 256 | # 257 | # @return [Array] 258 | def parts 259 | @value.split(DOT) 260 | end 261 | 262 | end 263 | 264 | # Exception represents an exception rule (e.g. !parliament.uk). 265 | class Exception < Base 266 | 267 | # Initializes a new rule from the content. 268 | # 269 | # @param content [#to_s] the content of the rule 270 | # @param private [Boolean] 271 | def self.build(content, private: false) 272 | new(value: content.to_s[1..], private: private) 273 | end 274 | 275 | # Gets the original rule definition. 276 | # 277 | # @return [String] The rule definition. 278 | def rule 279 | BANG + value 280 | end 281 | 282 | # Decomposes the domain name according to rule properties. 283 | # 284 | # @param domain [#to_s] The domain name to decompose 285 | # @return [Array] The array with [trd + sld, tld]. 286 | def decompose(domain) 287 | suffix = parts.join('\.') 288 | matches = domain.to_s.match(/^(.*)\.(#{suffix})$/) 289 | matches ? matches[1..2] : [nil, nil] 290 | end 291 | 292 | # dot-split rule value and returns all rule parts 293 | # in the order they appear in the value. 294 | # The leftmost label is not considered a label. 295 | # 296 | # See http://publicsuffix.org/format/: 297 | # If the prevailing rule is a exception rule, 298 | # modify it by removing the leftmost label. 299 | # 300 | # @return [Array] 301 | def parts 302 | @value.split(DOT)[1..] 303 | end 304 | 305 | end 306 | 307 | 308 | # Takes the +name+ of the rule, detects the specific rule class 309 | # and creates a new instance of that class. 310 | # The +name+ becomes the rule +value+. 311 | # 312 | # @example Creates a Normal rule 313 | # PublicSuffix::Rule.factory("ar") 314 | # # => # 315 | # 316 | # @example Creates a Wildcard rule 317 | # PublicSuffix::Rule.factory("*.ar") 318 | # # => # 319 | # 320 | # @example Creates an Exception rule 321 | # PublicSuffix::Rule.factory("!congresodelalengua3.ar") 322 | # # => # 323 | # 324 | # @param content [#to_s] the content of the rule 325 | # @return [PublicSuffix::Rule::*] A rule instance. 326 | def self.factory(content, private: false) 327 | case content.to_s[0, 1] 328 | when STAR 329 | Wildcard 330 | when BANG 331 | Exception 332 | else 333 | Normal 334 | end.build(content, private: private) 335 | end 336 | 337 | # The default rule to use if no rule match. 338 | # 339 | # The default rule is "*". From https://publicsuffix.org/list/: 340 | # 341 | # > If no rules match, the prevailing rule is "*". 342 | # 343 | # @return [PublicSuffix::Rule::Wildcard] The default rule. 344 | def self.default 345 | factory(STAR) 346 | end 347 | 348 | end 349 | 350 | end 351 | -------------------------------------------------------------------------------- /lib/public_suffix/version.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # 4 | # = Public Suffix 5 | # 6 | # Domain name parser based on the Public Suffix List. 7 | # 8 | # Copyright (c) 2009-2022 Simone Carletti 9 | 10 | module PublicSuffix 11 | 12 | # @return [String] The current library version. 13 | VERSION = "5.0.1" 14 | 15 | end 16 | -------------------------------------------------------------------------------- /public_suffix.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | $LOAD_PATH.push File.expand_path("../lib", __FILE__) 3 | require "public_suffix/version" 4 | 5 | Gem::Specification.new do |s| 6 | s.name = "public_suffix" 7 | s.version = PublicSuffix::VERSION 8 | s.authors = ["Simone Carletti"] 9 | s.email = ["weppos@weppos.net"] 10 | s.homepage = "https://simonecarletti.com/code/publicsuffix-ruby" 11 | s.summary = "Domain name parser based on the Public Suffix List." 12 | s.description = "PublicSuffix can parse and decompose a domain name into top level domain, domain and subdomains." 13 | s.licenses = ["MIT"] 14 | 15 | s.metadata = { 16 | "bug_tracker_uri" => "https://github.com/weppos/publicsuffix-ruby/issues", 17 | "changelog_uri" => "https://github.com/weppos/publicsuffix-ruby/blob/master/CHANGELOG.md", 18 | "documentation_uri" => "https://rubydoc.info/gems/#{s.name}/#{s.version}", 19 | "homepage_uri" => s.homepage, 20 | "source_code_uri" => "https://github.com/weppos/publicsuffix-ruby/tree/v#{s.version}", 21 | } 22 | 23 | s.required_ruby_version = ">= 2.6" 24 | 25 | s.require_paths = ["lib"] 26 | s.files = `git ls-files`.split("\n") 27 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") 28 | s.extra_rdoc_files = %w( LICENSE.txt ) 29 | end 30 | -------------------------------------------------------------------------------- /test/.empty: -------------------------------------------------------------------------------- 1 | # This is an empty file I use to force a non-empty commit when I only need to store notes 2 | .. -------------------------------------------------------------------------------- /test/acceptance_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "test_helper" 4 | 5 | class AcceptanceTest < Minitest::Test 6 | 7 | VALID_CASES = [ 8 | ["example.com", "example.com", [nil, "example", "com"]], 9 | ["foo.example.com", "example.com", ["foo", "example", "com"]], 10 | 11 | ["verybritish.co.uk", "verybritish.co.uk", [nil, "verybritish", "co.uk"]], 12 | ["foo.verybritish.co.uk", "verybritish.co.uk", ["foo", "verybritish", "co.uk"]], 13 | 14 | ["parliament.uk", "parliament.uk", [nil, "parliament", "uk"]], 15 | ["foo.parliament.uk", "parliament.uk", ["foo", "parliament", "uk"]], 16 | ].freeze 17 | 18 | def test_valid 19 | VALID_CASES.each do |input, domain, results| 20 | parsed = PublicSuffix.parse(input) 21 | trd, sld, tld = results 22 | assert_equal tld, parsed.tld, "Invalid tld for `#{name}`" 23 | assert_equal sld, parsed.sld, "Invalid sld for `#{name}`" 24 | if trd.nil? 25 | assert_nil parsed.trd, "Invalid trd for `#{name}`" 26 | else 27 | assert_equal trd, parsed.trd, "Invalid trd for `#{name}`" 28 | end 29 | 30 | assert_equal domain, PublicSuffix.domain(input) 31 | assert PublicSuffix.valid?(input) 32 | end 33 | end 34 | 35 | 36 | INVALID_CASES = [ 37 | ["nic.bd", PublicSuffix::DomainNotAllowed], 38 | [nil, PublicSuffix::DomainInvalid], 39 | ["", PublicSuffix::DomainInvalid], 40 | [" ", PublicSuffix::DomainInvalid], 41 | ].freeze 42 | 43 | def test_invalid 44 | INVALID_CASES.each do |(name, error)| 45 | assert_raises(error) { PublicSuffix.parse(name) } 46 | assert !PublicSuffix.valid?(name) 47 | end 48 | end 49 | 50 | 51 | REJECTED_CASES = [ 52 | ["www. .com", true], 53 | ["foo.co..uk", true], 54 | ["goo,gle.com", true], 55 | ["-google.com", true], 56 | ["google-.com", true], 57 | 58 | # This case was covered in GH-15. 59 | # I decided to cover this case because it's not easily reproducible with URI.parse 60 | # and can lead to several false positives. 61 | ["http://google.com", false], 62 | ].freeze 63 | 64 | def test_rejected 65 | REJECTED_CASES.each do |name, expected| 66 | assert_equal expected, PublicSuffix.valid?(name), 67 | format("Expected %s to be %s", name.inspect, expected.inspect) 68 | assert !valid_domain?(name), 69 | "#{name} expected to be invalid" 70 | end 71 | end 72 | 73 | 74 | CASE_CASES = [ 75 | ["Www.google.com", %w[www google com]], 76 | ["www.Google.com", %w[www google com]], 77 | ["www.google.Com", %w[www google com]], 78 | ].freeze 79 | 80 | def test_ignore_case 81 | CASE_CASES.each do |name, results| 82 | domain = PublicSuffix.parse(name) 83 | trd, sld, tld = results 84 | assert_equal tld, domain.tld, "Invalid tld for `#{name}'" 85 | assert_equal sld, domain.sld, "Invalid sld for `#{name}'" 86 | assert_equal trd, domain.trd, "Invalid trd for `#{name}'" 87 | assert PublicSuffix.valid?(name) 88 | end 89 | end 90 | 91 | 92 | INCLUDE_PRIVATE_CASES = [ 93 | ["blogspot.com", true, "blogspot.com"], 94 | ["blogspot.com", false, nil], 95 | ["subdomain.blogspot.com", true, "blogspot.com"], 96 | ["subdomain.blogspot.com", false, "subdomain.blogspot.com"], 97 | ].freeze 98 | 99 | # rubocop:disable Style/CombinableLoops 100 | def test_ignore_private 101 | # test domain and parse 102 | INCLUDE_PRIVATE_CASES.each do |given, ignore_private, expected| 103 | if expected.nil? 104 | assert_nil PublicSuffix.domain(given, ignore_private: ignore_private) 105 | else 106 | assert_equal expected, PublicSuffix.domain(given, ignore_private: ignore_private) 107 | end 108 | end 109 | # test valid? 110 | INCLUDE_PRIVATE_CASES.each do |given, ignore_private, expected| 111 | assert_equal !expected.nil?, PublicSuffix.valid?(given, ignore_private: ignore_private) 112 | end 113 | end 114 | # rubocop:enable Style/CombinableLoops 115 | 116 | 117 | def valid_uri?(name) 118 | uri = URI.parse(name) 119 | !uri.host.nil? 120 | rescue StandardError 121 | false 122 | end 123 | 124 | def valid_domain?(name) 125 | uri = URI.parse(name) 126 | !uri.host.nil? && uri.scheme.nil? 127 | rescue StandardError 128 | false 129 | end 130 | 131 | end 132 | -------------------------------------------------------------------------------- /test/benchmarks/bm_find.rb: -------------------------------------------------------------------------------- 1 | require 'benchmark' 2 | require_relative "../../lib/public_suffix" 3 | 4 | NAME_SHORT = "example.de" 5 | NAME_MEDIUM = "www.subdomain.example.de" 6 | NAME_LONG = "one.two.three.four.five.example.de" 7 | NAME_WILD = "one.two.three.four.five.example.bd" 8 | NAME_EXCP = "one.two.three.four.five.www.ck" 9 | 10 | IAAA = "www.example.ac" 11 | IZZZ = "www.example.zone" 12 | 13 | PAAA = "one.two.three.four.five.example.beep.pl" 14 | PZZZ = "one.two.three.four.five.example.now.sh" 15 | 16 | JP = "www.yokoshibahikari.chiba.jp" 17 | IT = "www.example.it" 18 | COM = "www.example.com" 19 | 20 | TIMES = (ARGV.first || 50_000).to_i 21 | 22 | # Initialize 23 | PublicSuffixList = PublicSuffix::List.default 24 | PublicSuffixList.find("example.com") 25 | 26 | Benchmark.bmbm(25) do |x| 27 | x.report("NAME_SHORT") do 28 | TIMES.times { PublicSuffixList.find(NAME_SHORT) != nil } 29 | end 30 | x.report("NAME_MEDIUM") do 31 | TIMES.times { PublicSuffixList.find(NAME_MEDIUM) != nil } 32 | end 33 | x.report("NAME_LONG") do 34 | TIMES.times { PublicSuffixList.find(NAME_LONG) != nil } 35 | end 36 | x.report("NAME_WILD") do 37 | TIMES.times { PublicSuffixList.find(NAME_WILD) != nil } 38 | end 39 | x.report("NAME_EXCP") do 40 | TIMES.times { PublicSuffixList.find(NAME_EXCP) != nil } 41 | end 42 | 43 | x.report("IAAA") do 44 | TIMES.times { PublicSuffixList.find(IAAA) != nil } 45 | end 46 | x.report("IZZZ") do 47 | TIMES.times { PublicSuffixList.find(IZZZ) != nil } 48 | end 49 | 50 | x.report("PAAA") do 51 | TIMES.times { PublicSuffixList.find(PAAA) != nil } 52 | end 53 | x.report("PZZZ") do 54 | TIMES.times { PublicSuffixList.find(PZZZ) != nil } 55 | end 56 | 57 | x.report("JP") do 58 | TIMES.times { PublicSuffixList.find(JP) != nil } 59 | end 60 | x.report("IT") do 61 | TIMES.times { PublicSuffixList.find(IT) != nil } 62 | end 63 | x.report("COM") do 64 | TIMES.times { PublicSuffixList.find(COM) != nil } 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /test/benchmarks/bm_find_all.rb: -------------------------------------------------------------------------------- 1 | require 'benchmark' 2 | require_relative "../../lib/public_suffix" 3 | 4 | NAME_SHORT = "example.de" 5 | NAME_MEDIUM = "www.subdomain.example.de" 6 | NAME_LONG = "one.two.three.four.five.example.de" 7 | NAME_WILD = "one.two.three.four.five.example.bd" 8 | NAME_EXCP = "one.two.three.four.five.www.ck" 9 | 10 | IAAA = "www.example.ac" 11 | IZZZ = "www.example.zone" 12 | 13 | PAAA = "one.two.three.four.five.example.beep.pl" 14 | PZZZ = "one.two.three.four.five.example.now.sh" 15 | 16 | JP = "www.yokoshibahikari.chiba.jp" 17 | IT = "www.example.it" 18 | COM = "www.example.com" 19 | 20 | TIMES = (ARGV.first || 50_000).to_i 21 | 22 | # Initialize 23 | PublicSuffixList = PublicSuffix::List.default 24 | PublicSuffixList.find("example.com") 25 | 26 | Benchmark.bmbm(25) do |x| 27 | x.report("NAME_SHORT") do 28 | TIMES.times { PublicSuffixList.find(NAME_SHORT) != nil } 29 | end 30 | x.report("NAME_SHORT (noprivate)") do 31 | TIMES.times { PublicSuffixList.find(NAME_SHORT, ignore_private: true) != nil } 32 | end 33 | x.report("NAME_MEDIUM") do 34 | TIMES.times { PublicSuffixList.find(NAME_MEDIUM) != nil } 35 | end 36 | x.report("NAME_MEDIUM (noprivate)") do 37 | TIMES.times { PublicSuffixList.find(NAME_MEDIUM, ignore_private: true) != nil } 38 | end 39 | x.report("NAME_LONG") do 40 | TIMES.times { PublicSuffixList.find(NAME_LONG) != nil } 41 | end 42 | x.report("NAME_LONG (noprivate)") do 43 | TIMES.times { PublicSuffixList.find(NAME_LONG, ignore_private: true) != nil } 44 | end 45 | x.report("NAME_WILD") do 46 | TIMES.times { PublicSuffixList.find(NAME_WILD) != nil } 47 | end 48 | x.report("NAME_WILD (noprivate)") do 49 | TIMES.times { PublicSuffixList.find(NAME_WILD, ignore_private: true) != nil } 50 | end 51 | x.report("NAME_EXCP") do 52 | TIMES.times { PublicSuffixList.find(NAME_EXCP) != nil } 53 | end 54 | x.report("NAME_EXCP (noprivate)") do 55 | TIMES.times { PublicSuffixList.find(NAME_EXCP, ignore_private: true) != nil } 56 | end 57 | 58 | x.report("IAAA") do 59 | TIMES.times { PublicSuffixList.find(IAAA) != nil } 60 | end 61 | x.report("IAAA (noprivate)") do 62 | TIMES.times { PublicSuffixList.find(IAAA, ignore_private: true) != nil } 63 | end 64 | x.report("IZZZ") do 65 | TIMES.times { PublicSuffixList.find(IZZZ) != nil } 66 | end 67 | x.report("IZZZ (noprivate)") do 68 | TIMES.times { PublicSuffixList.find(IZZZ, ignore_private: true) != nil } 69 | end 70 | 71 | x.report("PAAA") do 72 | TIMES.times { PublicSuffixList.find(PAAA) != nil } 73 | end 74 | x.report("PAAA (noprivate)") do 75 | TIMES.times { PublicSuffixList.find(PAAA, ignore_private: true) != nil } 76 | end 77 | x.report("PZZZ") do 78 | TIMES.times { PublicSuffixList.find(PZZZ) != nil } 79 | end 80 | x.report("PZZZ (noprivate)") do 81 | TIMES.times { PublicSuffixList.find(PZZZ, ignore_private: true) != nil } 82 | end 83 | 84 | x.report("JP") do 85 | TIMES.times { PublicSuffixList.find(JP) != nil } 86 | end 87 | x.report("JP (noprivate)") do 88 | TIMES.times { PublicSuffixList.find(JP, ignore_private: true) != nil } 89 | end 90 | x.report("IT") do 91 | TIMES.times { PublicSuffixList.find(IT) != nil } 92 | end 93 | x.report("IT (noprivate)") do 94 | TIMES.times { PublicSuffixList.find(IT, ignore_private: true) != nil } 95 | end 96 | x.report("COM") do 97 | TIMES.times { PublicSuffixList.find(COM) != nil } 98 | end 99 | x.report("COM (noprivate)") do 100 | TIMES.times { PublicSuffixList.find(COM, ignore_private: true) != nil } 101 | end 102 | end 103 | -------------------------------------------------------------------------------- /test/benchmarks/bm_names.rb: -------------------------------------------------------------------------------- 1 | require 'benchmark/ips' 2 | 3 | STRING = "www.subdomain.example.com" 4 | ARRAY = %w( 5 | com 6 | example.com 7 | subdomain.example.com 8 | www.subdomain.example.com 9 | ) 10 | 11 | def tokenizer1(string) 12 | parts = string.split(".").reverse! 13 | index = 0 14 | query = parts[index] 15 | names = [] 16 | 17 | loop do 18 | names << query 19 | 20 | index += 1 21 | break if index >= parts.size 22 | query = parts[index] + "." + query 23 | end 24 | names 25 | end 26 | 27 | def tokenizer2(string) 28 | parts = string.split(".") 29 | index = parts.size - 1 30 | query = parts[index] 31 | names = [] 32 | 33 | loop do 34 | names << query 35 | 36 | index -= 1 37 | break if index < 0 38 | query = parts[index] + "." + query 39 | end 40 | names 41 | end 42 | 43 | def tokenizer3(string) 44 | isx = string.size 45 | idx = string.size - 1 46 | names = [] 47 | 48 | loop do 49 | isx = string.rindex(".", isx - 1) || -1 50 | names << string[isx + 1, idx - isx] 51 | 52 | break if isx <= 0 53 | end 54 | names 55 | end 56 | 57 | def tokenizer4(string) 58 | isx = string.size 59 | idx = string.size - 1 60 | names = [] 61 | 62 | loop do 63 | isx = string.rindex(".", isx - 1) || -1 64 | names << string[(isx+1)..idx] 65 | 66 | break if isx <= 0 67 | end 68 | names 69 | end 70 | 71 | (x = tokenizer1(STRING)) == ARRAY or fail("tokenizer1 failed: #{x.inspect}") 72 | (x = tokenizer2(STRING)) == ARRAY or fail("tokenizer2 failed: #{x.inspect}") 73 | (x = tokenizer3(STRING)) == ARRAY or fail("tokenizer3 failed: #{x.inspect}") 74 | (x = tokenizer4(STRING)) == ARRAY or fail("tokenizer4 failed: #{x.inspect}") 75 | 76 | Benchmark.ips do |x| 77 | x.report("tokenizer1") do 78 | tokenizer1(STRING).is_a?(Array) 79 | end 80 | x.report("tokenizer2") do 81 | tokenizer2(STRING).is_a?(Array) 82 | end 83 | x.report("tokenizer3") do 84 | tokenizer3(STRING).is_a?(Array) 85 | end 86 | x.report("tokenizer4") do 87 | tokenizer4(STRING).is_a?(Array) 88 | end 89 | 90 | x.compare! 91 | end 92 | -------------------------------------------------------------------------------- /test/benchmarks/bm_select.rb: -------------------------------------------------------------------------------- 1 | require 'benchmark' 2 | require_relative "../../lib/public_suffix" 3 | 4 | JP = "www.yokoshibahikari.chiba.jp" 5 | 6 | TIMES = (ARGV.first || 50_000).to_i 7 | 8 | # Initialize 9 | class PublicSuffix::List 10 | public :select 11 | end 12 | PublicSuffixList = PublicSuffix::List.default 13 | PublicSuffixList.select("example.jp") 14 | PublicSuffixList.find("example.jp") 15 | 16 | Benchmark.bmbm(25) do |x| 17 | x.report("JP select") do 18 | TIMES.times { PublicSuffixList.select(JP) } 19 | end 20 | x.report("JP find") do 21 | TIMES.times { PublicSuffixList.find(JP) } 22 | end 23 | # x.report("JP (noprivate)") do 24 | # TIMES.times { PublicSuffixList.find(JP, ignore_private: true) != nil } 25 | # end 26 | end 27 | -------------------------------------------------------------------------------- /test/benchmarks/bm_select_incremental.rb: -------------------------------------------------------------------------------- 1 | require 'benchmark' 2 | require_relative "../../lib/public_suffix" 3 | 4 | JP = "www.yokoshibahikari.chiba.jp" 5 | 6 | TIMES = (ARGV.first || 50_000).to_i 7 | 8 | # Initialize 9 | class PublicSuffix::List 10 | public :select 11 | end 12 | PublicSuffixList = PublicSuffix::List.default 13 | PublicSuffixList.select("example.jp") 14 | 15 | Benchmark.bmbm(25) do |x| 16 | x.report("select jp") do 17 | TIMES.times { PublicSuffixList.select("jp") } 18 | end 19 | x.report("select example.jp") do 20 | TIMES.times { PublicSuffixList.select("example.jp") } 21 | end 22 | x.report("select www.example.jp") do 23 | TIMES.times { PublicSuffixList.select("www.example.jp") } 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /test/benchmarks/bm_valid.rb: -------------------------------------------------------------------------------- 1 | require 'benchmark' 2 | require_relative "../../lib/public_suffix" 3 | 4 | NAME_SHORT = "example.de" 5 | NAME_MEDIUM = "www.subdomain.example.de" 6 | NAME_LONG = "one.two.three.four.five.example.de" 7 | NAME_WILD = "one.two.three.four.five.example.bd" 8 | NAME_EXCP = "one.two.three.four.five.www.ck" 9 | 10 | IAAA = "www.example.ac" 11 | IZZZ = "www.example.zone" 12 | 13 | PAAA = "one.two.three.four.five.example.beep.pl" 14 | PZZZ = "one.two.three.four.five.example.now.sh" 15 | 16 | JP = "www.yokoshibahikari.chiba.jp" 17 | IT = "www.example.it" 18 | COM = "www.example.com" 19 | 20 | TIMES = (ARGV.first || 50_000).to_i 21 | 22 | # Initialize 23 | PublicSuffix.valid?("example.com") 24 | 25 | Benchmark.bmbm(25) do |x| 26 | x.report("NAME_SHORT") do 27 | TIMES.times { PublicSuffix.valid?(NAME_SHORT) == true } 28 | end 29 | x.report("NAME_SHORT (noprivate)") do 30 | TIMES.times { PublicSuffix.valid?(NAME_SHORT, ignore_private: true) == true } 31 | end 32 | x.report("NAME_MEDIUM") do 33 | TIMES.times { PublicSuffix.valid?(NAME_MEDIUM) == true } 34 | end 35 | x.report("NAME_MEDIUM (noprivate)") do 36 | TIMES.times { PublicSuffix.valid?(NAME_MEDIUM, ignore_private: true) == true } 37 | end 38 | x.report("NAME_LONG") do 39 | TIMES.times { PublicSuffix.valid?(NAME_LONG) == true } 40 | end 41 | x.report("NAME_LONG (noprivate)") do 42 | TIMES.times { PublicSuffix.valid?(NAME_LONG, ignore_private: true) == true } 43 | end 44 | x.report("NAME_WILD") do 45 | TIMES.times { PublicSuffix.valid?(NAME_WILD) == true } 46 | end 47 | x.report("NAME_WILD (noprivate)") do 48 | TIMES.times { PublicSuffix.valid?(NAME_WILD, ignore_private: true) == true } 49 | end 50 | x.report("NAME_EXCP") do 51 | TIMES.times { PublicSuffix.valid?(NAME_EXCP) == true } 52 | end 53 | x.report("NAME_EXCP (noprivate)") do 54 | TIMES.times { PublicSuffix.valid?(NAME_EXCP, ignore_private: true) == true } 55 | end 56 | 57 | x.report("IAAA") do 58 | TIMES.times { PublicSuffix.valid?(IAAA) == true } 59 | end 60 | x.report("IAAA (noprivate)") do 61 | TIMES.times { PublicSuffix.valid?(IAAA, ignore_private: true) == true } 62 | end 63 | x.report("IZZZ") do 64 | TIMES.times { PublicSuffix.valid?(IZZZ) == true } 65 | end 66 | x.report("IZZZ (noprivate)") do 67 | TIMES.times { PublicSuffix.valid?(IZZZ, ignore_private: true) == true } 68 | end 69 | 70 | x.report("PAAA") do 71 | TIMES.times { PublicSuffix.valid?(PAAA) == true } 72 | end 73 | x.report("PAAA (noprivate)") do 74 | TIMES.times { PublicSuffix.valid?(PAAA, ignore_private: true) == true } 75 | end 76 | x.report("PZZZ") do 77 | TIMES.times { PublicSuffix.valid?(PZZZ) == true } 78 | end 79 | x.report("PZZZ (noprivate)") do 80 | TIMES.times { PublicSuffix.valid?(PZZZ, ignore_private: true) == true } 81 | end 82 | 83 | x.report("JP") do 84 | TIMES.times { PublicSuffix.valid?(JP) == true } 85 | end 86 | x.report("JP (noprivate)") do 87 | TIMES.times { PublicSuffix.valid?(JP, ignore_private: true) == true } 88 | end 89 | x.report("IT") do 90 | TIMES.times { PublicSuffix.valid?(IT) == true } 91 | end 92 | x.report("IT (noprivate)") do 93 | TIMES.times { PublicSuffix.valid?(IT, ignore_private: true) == true } 94 | end 95 | x.report("COM") do 96 | TIMES.times { PublicSuffix.valid?(COM) == true } 97 | end 98 | x.report("COM (noprivate)") do 99 | TIMES.times { PublicSuffix.valid?(COM, ignore_private: true) == true } 100 | end 101 | end 102 | -------------------------------------------------------------------------------- /test/profilers/domain_profiler.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift File.expand_path("../../lib", __dir__) 2 | 3 | require "memory_profiler" 4 | require "public_suffix" 5 | 6 | PublicSuffix::List.default 7 | 8 | report = MemoryProfiler.report do 9 | PublicSuffix.domain("www.example.com") 10 | end 11 | 12 | report.pretty_print 13 | -------------------------------------------------------------------------------- /test/profilers/find_profiler.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift File.expand_path("../../lib", __dir__) 2 | 3 | require "memory_profiler" 4 | require "public_suffix" 5 | 6 | PublicSuffix::List.default 7 | 8 | report = MemoryProfiler.report do 9 | PublicSuffix::List.default.find("www.example.com") 10 | end 11 | 12 | report.pretty_print 13 | -------------------------------------------------------------------------------- /test/profilers/find_profiler_jp.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift File.expand_path("../../lib", __dir__) 2 | 3 | require "memory_profiler" 4 | require "public_suffix" 5 | 6 | PublicSuffix::List.default 7 | 8 | report = MemoryProfiler.report do 9 | PublicSuffix::List.default.find("a.b.ide.kyoto.jp") 10 | end 11 | 12 | report.pretty_print 13 | -------------------------------------------------------------------------------- /test/profilers/initialization_profiler.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift File.expand_path("../../lib", __dir__) 2 | 3 | require "memory_profiler" 4 | require "public_suffix" 5 | 6 | report = MemoryProfiler.report do 7 | PublicSuffix::List.default 8 | end 9 | 10 | report.pretty_print 11 | # report.pretty_print(to_file: 'profiler-%s-%d.txt' % [ARGV[0], Time.now.to_i]) 12 | -------------------------------------------------------------------------------- /test/profilers/list_profsize.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift File.expand_path("../../lib", __dir__) 2 | 3 | require_relative "object_binsize" 4 | require "public_suffix" 5 | 6 | list = PublicSuffix::List.default 7 | puts "#{list.size} rules:" 8 | 9 | prof = ObjectBinsize.new 10 | prof.report(PublicSuffix::List.default, label: "PublicSuffix::List size") 11 | prof.report(PublicSuffix::List.default.instance_variable_get(:@rules), label: "Size of rules") 12 | -------------------------------------------------------------------------------- /test/profilers/object_binsize.rb: -------------------------------------------------------------------------------- 1 | require 'tempfile' 2 | 3 | # A very simple memory profiles that checks the full size of a variable 4 | # by serializing into a binary file. 5 | # 6 | # Yes, I know this is very rough, but there are cases where ObjectSpace.memsize_of 7 | # doesn't cooperate, and this is one of the possible workarounds. 8 | # 9 | # For certain cases, it works (TM). 10 | class ObjectBinsize 11 | 12 | def measure(var, label: nil) 13 | dump(var, label: label) 14 | end 15 | 16 | def report(var, label: nil, padding: 10) 17 | file = measure(var, label: label) 18 | 19 | size = format_integer(file.size) 20 | name = label || File.basename(file.path) 21 | printf("%#{padding}s %s\n", size, name) 22 | end 23 | 24 | private 25 | 26 | def dump(var, **args) 27 | file = Tempfile.new(args[:label].to_s) 28 | file.write(Marshal.dump(var)) 29 | file 30 | ensure 31 | file.close 32 | end 33 | 34 | def format_integer(int) 35 | int.to_s.reverse.gsub(/...(?=.)/, '\&,').reverse 36 | end 37 | 38 | end 39 | 40 | if __FILE__ == $0 41 | prof = ObjectBinsize.new 42 | 43 | prof.report(nil, label: "nil") 44 | prof.report(false, label: "false") 45 | prof.report(true, label: "true") 46 | prof.report(0, label: "integer") 47 | prof.report("", label: "empty string") 48 | prof.report({}, label: "empty hash") 49 | prof.report({}, label: "empty array") 50 | 51 | prof.report({ foo: "1" }, label: "hash 1 item (symbol)") 52 | prof.report({ foo: "1", bar: 2 }, label: "hash 2 items (symbol)") 53 | prof.report({ "foo" => "1" }, label: "hash 1 item (string)") 54 | prof.report({ "foo" => "1", "bar" => 2 }, label: "hash 2 items (string)") 55 | 56 | prof.report("big string" * 200, label: "big string * 200") 57 | end 58 | -------------------------------------------------------------------------------- /test/psl_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "test_helper" 4 | require "public_suffix" 5 | 6 | # This test runs against the current PSL file and ensures 7 | # the definitions satisfies the test suite. 8 | class PslTest < Minitest::Test 9 | 10 | ROOT = File.expand_path("..", __dir__) 11 | 12 | # rubocop:disable Security/Eval 13 | def self.tests 14 | File.readlines(File.join(ROOT, "test/tests.txt")).map do |line| 15 | line = line.strip 16 | next if line.empty? 17 | next if line.start_with?("//") 18 | 19 | input, output = line.split(", ") 20 | 21 | # handle the case of eval("null"), it must be eval("nil") 22 | input = "nil" if input == "null" 23 | output = "nil" if output == "null" 24 | 25 | input = eval(input) 26 | output = eval(output) 27 | [input, output] 28 | end 29 | end 30 | # rubocop:enable Security/Eval 31 | 32 | 33 | def test_valid 34 | # Parse the PSL and run the tests 35 | data = File.read(PublicSuffix::List::DEFAULT_LIST_PATH) 36 | PublicSuffix::List.default = PublicSuffix::List.parse(data) 37 | 38 | failures = [] 39 | self.class.tests.each do |input, output| 40 | # Punycode domains are not supported ATM 41 | next if input =~ /xn--/ 42 | 43 | domain = PublicSuffix.domain(input) rescue nil 44 | failures << [input, output, domain] if output != domain 45 | end 46 | 47 | message = "The following #{failures.size} tests fail:\n" 48 | failures.each { |i, o, d| message += format("Expected %s to be %s, got %s\n", i.inspect, o.inspect, d.inspect) } 49 | assert_equal 0, failures.size, message 50 | end 51 | 52 | end 53 | -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "minitest/autorun" 4 | require "minitest/reporters" 5 | require "mocha/minitest" 6 | 7 | Minitest::Reporters.use! Minitest::Reporters::DefaultReporter.new(color: true) 8 | 9 | $LOAD_PATH.unshift File.expand_path("../lib", __dir__) 10 | require "public_suffix" 11 | -------------------------------------------------------------------------------- /test/tests.txt: -------------------------------------------------------------------------------- 1 | // Any copyright is dedicated to the Public Domain. 2 | // http://creativecommons.org/publicdomain/zero/1.0/ 3 | 4 | // null input 5 | null, null 6 | // Mixed case 7 | 'COM', null 8 | 'example.COM', 'example.com' 9 | 'WwW.example.COM', 'example.com' 10 | // Leading dot 11 | '.com', null 12 | '.example', null 13 | '.example.com', null 14 | '.example.example', null 15 | // Unlisted TLD 16 | 'example', null 17 | 'example.example', 'example.example' 18 | 'b.example.example', 'example.example' 19 | 'a.b.example.example', 'example.example' 20 | // Listed, but non-Internet, TLD 21 | //'local', null 22 | //'example.local', null 23 | //'b.example.local', null 24 | //'a.b.example.local', null 25 | // TLD with only 1 rule 26 | 'biz', null 27 | 'domain.biz', 'domain.biz' 28 | 'b.domain.biz', 'domain.biz' 29 | 'a.b.domain.biz', 'domain.biz' 30 | // TLD with some 2-level rules 31 | 'com', null 32 | 'example.com', 'example.com' 33 | 'b.example.com', 'example.com' 34 | 'a.b.example.com', 'example.com' 35 | 'uk.com', null 36 | 'example.uk.com', 'example.uk.com' 37 | 'b.example.uk.com', 'example.uk.com' 38 | 'a.b.example.uk.com', 'example.uk.com' 39 | 'test.ac', 'test.ac' 40 | // TLD with only 1 (wildcard) rule 41 | 'mm', null 42 | 'c.mm', null 43 | 'b.c.mm', 'b.c.mm' 44 | 'a.b.c.mm', 'b.c.mm' 45 | // More complex TLD 46 | 'jp', null 47 | 'test.jp', 'test.jp' 48 | 'www.test.jp', 'test.jp' 49 | 'ac.jp', null 50 | 'test.ac.jp', 'test.ac.jp' 51 | 'www.test.ac.jp', 'test.ac.jp' 52 | 'kyoto.jp', null 53 | 'test.kyoto.jp', 'test.kyoto.jp' 54 | 'ide.kyoto.jp', null 55 | 'b.ide.kyoto.jp', 'b.ide.kyoto.jp' 56 | 'a.b.ide.kyoto.jp', 'b.ide.kyoto.jp' 57 | 'c.kobe.jp', null 58 | 'b.c.kobe.jp', 'b.c.kobe.jp' 59 | 'a.b.c.kobe.jp', 'b.c.kobe.jp' 60 | 'city.kobe.jp', 'city.kobe.jp' 61 | 'www.city.kobe.jp', 'city.kobe.jp' 62 | // TLD with a wildcard rule and exceptions 63 | 'ck', null 64 | 'test.ck', null 65 | 'b.test.ck', 'b.test.ck' 66 | 'a.b.test.ck', 'b.test.ck' 67 | 'www.ck', 'www.ck' 68 | 'www.www.ck', 'www.ck' 69 | // US K12 70 | 'us', null 71 | 'test.us', 'test.us' 72 | 'www.test.us', 'test.us' 73 | 'ak.us', null 74 | 'test.ak.us', 'test.ak.us' 75 | 'www.test.ak.us', 'test.ak.us' 76 | 'k12.ak.us', null 77 | 'test.k12.ak.us', 'test.k12.ak.us' 78 | 'www.test.k12.ak.us', 'test.k12.ak.us' 79 | // IDN labels 80 | '食狮.com.cn', '食狮.com.cn' 81 | '食狮.公司.cn', '食狮.公司.cn' 82 | 'www.食狮.公司.cn', '食狮.公司.cn' 83 | 'shishi.公司.cn', 'shishi.公司.cn' 84 | '公司.cn', null 85 | '食狮.中国', '食狮.中国' 86 | 'www.食狮.中国', '食狮.中国' 87 | 'shishi.中国', 'shishi.中国' 88 | '中国', null 89 | // Same as above, but punycoded 90 | 'xn--85x722f.com.cn', 'xn--85x722f.com.cn' 91 | 'xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn' 92 | 'www.xn--85x722f.xn--55qx5d.cn', 'xn--85x722f.xn--55qx5d.cn' 93 | 'shishi.xn--55qx5d.cn', 'shishi.xn--55qx5d.cn' 94 | 'xn--55qx5d.cn', null 95 | 'xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s' 96 | 'www.xn--85x722f.xn--fiqs8s', 'xn--85x722f.xn--fiqs8s' 97 | 'shishi.xn--fiqs8s', 'shishi.xn--fiqs8s' 98 | 'xn--fiqs8s', null 99 | -------------------------------------------------------------------------------- /test/unit/domain_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "test_helper" 4 | 5 | class PublicSuffix::DomainTest < Minitest::Test 6 | 7 | def setup 8 | @klass = PublicSuffix::Domain 9 | end 10 | 11 | # Tokenizes given input into labels. 12 | def test_self_name_to_labels 13 | assert_equal %w[someone spaces live com], 14 | PublicSuffix::Domain.name_to_labels("someone.spaces.live.com") 15 | assert_equal %w[leontina23samiko wiki zoho com], 16 | PublicSuffix::Domain.name_to_labels("leontina23samiko.wiki.zoho.com") 17 | end 18 | 19 | # Converts input into String. 20 | def test_self_name_to_labels_converts_input_to_string 21 | assert_equal %w[someone spaces live com], 22 | PublicSuffix::Domain.name_to_labels(:"someone.spaces.live.com") 23 | end 24 | 25 | 26 | def test_initialize_with_tld 27 | domain = @klass.new("com") 28 | assert_equal "com", domain.tld 29 | assert_nil domain.sld 30 | assert_nil domain.trd 31 | end 32 | 33 | def test_initialize_with_tld_and_sld 34 | domain = @klass.new("com", "google") 35 | assert_equal "com", domain.tld 36 | assert_equal "google", domain.sld 37 | assert_nil domain.trd 38 | end 39 | 40 | def test_initialize_with_tld_and_sld_and_trd 41 | domain = @klass.new("com", "google", "www") 42 | assert_equal "com", domain.tld 43 | assert_equal "google", domain.sld 44 | assert_equal "www", domain.trd 45 | end 46 | 47 | 48 | def test_to_s 49 | assert_equal "com", @klass.new("com").to_s 50 | assert_equal "google.com", @klass.new("com", "google").to_s 51 | assert_equal "www.google.com", @klass.new("com", "google", "www").to_s 52 | end 53 | 54 | def test_to_a 55 | assert_equal [nil, nil, "com"], @klass.new("com").to_a 56 | assert_equal [nil, "google", "com"], @klass.new("com", "google").to_a 57 | assert_equal ["www", "google", "com"], @klass.new("com", "google", "www").to_a 58 | end 59 | 60 | 61 | def test_tld 62 | assert_equal "com", @klass.new("com", "google", "www").tld 63 | end 64 | 65 | def test_sld 66 | assert_equal "google", @klass.new("com", "google", "www").sld 67 | end 68 | 69 | def test_trd 70 | assert_equal "www", @klass.new("com", "google", "www").trd 71 | end 72 | 73 | 74 | def test_name 75 | assert_equal "com", @klass.new("com").name 76 | assert_equal "google.com", @klass.new("com", "google").name 77 | assert_equal "www.google.com", @klass.new("com", "google", "www").name 78 | end 79 | 80 | def test_domain 81 | assert_nil @klass.new("com").domain 82 | assert_nil @klass.new("tldnotlisted").domain 83 | assert_equal "google.com", @klass.new("com", "google").domain 84 | assert_equal "google.tldnotlisted", @klass.new("tldnotlisted", "google").domain 85 | assert_equal "google.com", @klass.new("com", "google", "www").domain 86 | assert_equal "google.tldnotlisted", @klass.new("tldnotlisted", "google", "www").domain 87 | end 88 | 89 | def test_subdomain 90 | assert_nil @klass.new("com").subdomain 91 | assert_nil @klass.new("tldnotlisted").subdomain 92 | assert_nil @klass.new("com", "google").subdomain 93 | assert_nil @klass.new("tldnotlisted", "google").subdomain 94 | assert_equal "www.google.com", @klass.new("com", "google", "www").subdomain 95 | assert_equal "www.google.tldnotlisted", @klass.new("tldnotlisted", "google", "www").subdomain 96 | end 97 | 98 | 99 | def test_domain_question 100 | assert !@klass.new("com").domain? 101 | assert @klass.new("com", "example").domain? 102 | assert @klass.new("com", "example", "www").domain? 103 | assert @klass.new("tldnotlisted", "example").domain? 104 | end 105 | 106 | end 107 | -------------------------------------------------------------------------------- /test/unit/errors_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "test_helper" 4 | 5 | class ErrorsTest < Minitest::Test 6 | 7 | # Inherits from StandardError 8 | def test_error_inheritance 9 | assert_kind_of StandardError, 10 | PublicSuffix::Error.new 11 | end 12 | 13 | # Inherits from PublicSuffix::Error 14 | def test_domain_invalid_inheritance 15 | assert_kind_of PublicSuffix::Error, 16 | PublicSuffix::DomainInvalid.new 17 | end 18 | 19 | # Inherits from PublicSuffix::DomainInvalid 20 | def test_domain_not_allowed_inheritance 21 | assert_kind_of PublicSuffix::DomainInvalid, 22 | PublicSuffix::DomainNotAllowed.new 23 | end 24 | 25 | end 26 | -------------------------------------------------------------------------------- /test/unit/list_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "test_helper" 4 | 5 | class PublicSuffix::ListTest < Minitest::Test 6 | 7 | def setup 8 | @list = PublicSuffix::List.new 9 | end 10 | 11 | def teardown 12 | PublicSuffix::List.default = nil 13 | end 14 | 15 | 16 | def test_initialize 17 | assert_instance_of PublicSuffix::List, @list 18 | assert_equal 0, @list.size 19 | end 20 | 21 | 22 | def test_equality_with_self 23 | list = PublicSuffix::List.new 24 | assert_equal list, list 25 | end 26 | 27 | def test_equality_with_internals 28 | rule = PublicSuffix::Rule.factory("com") 29 | assert_equal PublicSuffix::List.new.add(rule), PublicSuffix::List.new.add(rule) 30 | end 31 | 32 | def test_each_without_block 33 | list = PublicSuffix::List.parse(<