├── .github
└── workflows
│ ├── codeql.yml
│ ├── release.yml
│ ├── swift.yml
│ └── update.yml
├── .gitignore
├── .spi.yml
├── LICENSE
├── Metadata
└── domain-diagram.svg
├── Package.swift
├── README.md
├── Sources
└── TLDExtract
│ ├── Documentation.docc
│ └── Documentation.md
│ ├── Extension.swift
│ ├── Model.swift
│ ├── Parser.swift
│ ├── PunycodeSwift
│ ├── Extensions.swift
│ ├── Helpers.swift
│ └── Punycode.swift
│ ├── Resources
│ └── public_suffix_list.dat
│ ├── TLDExtract.swift
│ └── TLDExtractError.swift
├── Tests
└── TLDExtractTests
│ ├── PunycodeTests.swift
│ └── TLDExtractTests.swift
├── codecov.yml
└── update-psl.py
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
1 | # For most projects, this workflow file will not need changing; you simply need
2 | # to commit it to your repository.
3 | #
4 | # You may wish to alter this file to override the set of languages analyzed,
5 | # or to provide custom queries or build logic.
6 | #
7 | # ******** NOTE ********
8 | # We have attempted to detect the languages in your repository. Please check
9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 |
14 | on:
15 | push:
16 | branches: [ "master" ]
17 | pull_request:
18 | # The branches below must be a subset of the branches above
19 | branches: [ "master" ]
20 | schedule:
21 | - cron: '18 19 * * 3'
22 |
23 | jobs:
24 | analyze:
25 | name: Analyze
26 | # Runner size impacts CodeQL analysis time. To learn more, please see:
27 | # - https://gh.io/recommended-hardware-resources-for-running-codeql
28 | # - https://gh.io/supported-runners-and-hardware-resources
29 | # - https://gh.io/using-larger-runners
30 | # Consider using larger runners for possible analysis time improvements.
31 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
32 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
33 | permissions:
34 | actions: read
35 | contents: read
36 | security-events: write
37 |
38 | strategy:
39 | fail-fast: false
40 | matrix:
41 | language: [ 'python', 'swift' ]
42 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ]
43 | # Use only 'java' to analyze code written in Java, Kotlin or both
44 | # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
45 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
46 |
47 | steps:
48 | - name: Checkout repository
49 | uses: actions/checkout@v3
50 |
51 | # Initializes the CodeQL tools for scanning.
52 | - name: Initialize CodeQL
53 | uses: github/codeql-action/init@v2
54 | with:
55 | languages: ${{ matrix.language }}
56 | # If you wish to specify custom queries, you can do so here or in a config file.
57 | # By default, queries listed here will override any specified in a config file.
58 | # Prefix the list here with "+" to use these queries and those in the config file.
59 |
60 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
61 | # queries: security-extended,security-and-quality
62 |
63 |
64 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
65 | # If this step fails, then you should remove it and run the build manually (see below)
66 | #- name: Autobuild
67 | # uses: github/codeql-action/autobuild@v2
68 |
69 | - name: Build Swift # github/codeql-action/autobuild@v2 fails :(
70 | run: swift build
71 |
72 | # ℹ️ Command-line programs to run using the OS shell.
73 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
74 |
75 | # If the Autobuild fails above, remove it and uncomment the following three lines.
76 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
77 |
78 | # - run: |
79 | # echo "Run, Build Application using script"
80 | # ./location_of_script_within_repo/buildscript.sh
81 |
82 | - name: Perform CodeQL Analysis
83 | uses: github/codeql-action/analyze@v2
84 | with:
85 | category: "/language:${{matrix.language}}"
86 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Create Release
2 |
3 | on:
4 | workflow_dispatch:
5 |
6 | jobs:
7 | create-release:
8 |
9 | runs-on: ubuntu-latest
10 |
11 | steps:
12 | - uses: actions/checkout@v3
13 | - name: Get next version
14 | uses: reecetech/version-increment@2023.4.1
15 | id: version
16 | with:
17 | scheme: semver
18 | increment: patch
19 | - name: Create tag
20 | run: git tag ${{ steps.version.outputs.version }}
21 | - name: Create GH Release
22 | uses: softprops/action-gh-release@v1
23 | with:
24 | tag_name: ${{ steps.version.outputs.version }}
25 | token: ${{ secrets.GITHUB_TOKEN }}
26 |
--------------------------------------------------------------------------------
/.github/workflows/swift.yml:
--------------------------------------------------------------------------------
1 | # This workflow will build a Swift project
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-swift
3 |
4 | name: Swift
5 |
6 | on:
7 | push:
8 | paths:
9 | - ".github/workflows/**"
10 | - "**/*.swift"
11 | - "**/*.dat"
12 | pull_request:
13 | paths:
14 | - ".github/workflows/**"
15 | - "**/*.swift"
16 | - "**/*.dat"
17 |
18 | jobs:
19 | build-mac:
20 |
21 | runs-on: macos-latest
22 |
23 | steps:
24 | - uses: actions/checkout@v3
25 | - name: Build
26 | run: swift build -v
27 | - name: Run tests
28 | run: swift test -v --enable-code-coverage
29 | - name: Convert code coverage report for Codecov
30 | run: xcrun llvm-cov export -format="lcov" .build/debug/TLDExtractPackageTests.xctest/Contents/MacOS/TLDExtractPackageTests -instr-profile .build/debug/codecov/default.profdata > info.lcov
31 | - name: Upload coverage reports to Codecov
32 | uses: codecov/codecov-action@v3
33 | env:
34 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
35 |
36 | build-linux:
37 |
38 | runs-on: ubuntu-latest
39 |
40 | steps:
41 | - uses: actions/checkout@v3
42 | - name: Build
43 | run: swift build -v
44 | - name: Run tests
45 | run: swift test -v
--------------------------------------------------------------------------------
/.github/workflows/update.yml:
--------------------------------------------------------------------------------
1 | name: Update PSL
2 |
3 | on:
4 | schedule:
5 | - cron: "0 0 */3 * *" # At 00:00 on every 3rd day-of-month.
6 | workflow_dispatch:
7 |
8 | jobs:
9 | update-psl:
10 |
11 | runs-on: macos-latest
12 |
13 | steps:
14 | - uses: actions/checkout@v3
15 | - name: Update PSL
16 | run: python update-psl.py
17 | - name: Create pull request to commit updated psl
18 | uses: peter-evans/create-pull-request@v5
19 | with:
20 | commit-message: Update PSL
21 | title: '[Automatic] Update PSL'
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Xcode
2 | #
3 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
4 |
5 | ## Build generated
6 | build/
7 | DerivedData/
8 |
9 | ## Various settings
10 | *.pbxuser
11 | !default.pbxuser
12 | *.mode1v3
13 | !default.mode1v3
14 | *.mode2v3
15 | !default.mode2v3
16 | *.perspectivev3
17 | !default.perspectivev3
18 | xcuserdata/
19 |
20 | ## Other
21 | *.moved-aside
22 | *.xccheckout
23 | *.xcscmblueprint
24 |
25 | ## Obj-C/Swift specific
26 | *.hmap
27 | *.ipa
28 | *.dSYM.zip
29 | *.dSYM
30 |
31 | ## Playgrounds
32 | timeline.xctimeline
33 | playground.xcworkspace
34 |
35 | # Swift Package Manager
36 | #
37 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
38 | # Packages/
39 | # Package.pins
40 | # Package.resolved
41 | .build/
42 | .swiftpm/
43 |
44 | # CocoaPods
45 | #
46 | # We recommend against adding the Pods directory to your .gitignore. However
47 | # you should judge for yourself, the pros and cons are mentioned at:
48 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
49 | #
50 | # Pods/
51 |
52 | # Carthage
53 | #
54 | Carthage/Checkouts
55 | Carthage/Build
56 |
57 | # fastlane
58 | #
59 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the
60 | # screenshots whenever they are needed.
61 | # For more information about the recommended setup visit:
62 | # https://docs.fastlane.tools/best-practices/source-control/#source-control
63 |
64 | fastlane/report.xml
65 | fastlane/Preview.html
66 | fastlane/screenshots/**/*.png
67 | fastlane/test_output
68 |
69 | # Direnv
70 | .envrc
71 |
72 | # Ruby
73 | .bundle
74 | .ruby-version
75 |
76 | # Python
77 | .python-version
78 |
--------------------------------------------------------------------------------
/.spi.yml:
--------------------------------------------------------------------------------
1 | version: 1
2 | builder:
3 | configs:
4 | - documentation_targets: [TLDExtract]
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Gumob
4 | Copyright (c) 2023 MarcoEidinger
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/Metadata/domain-diagram.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version:5.6
2 | // The swift-tools-version declares the minimum version of Swift required to build this package.
3 |
4 | import PackageDescription
5 |
6 | let package = Package(
7 | name: "TLDExtract",
8 | products: [
9 | .library(
10 | name: "TLDExtract",
11 | targets: ["TLDExtract"])
12 | ],
13 | dependencies: [],
14 | targets: [
15 | .target(
16 | name: "TLDExtract",
17 | dependencies: [],
18 | resources: [
19 | .copy("Resources/public_suffix_list.dat")
20 | ]
21 | ),
22 | .testTarget(
23 | name: "TLDExtractTests",
24 | dependencies: ["TLDExtract"])
25 | ]
26 | )
27 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://swiftpackageindex.com/MarcoEidinger/TLDExtractSwift) [](https://swiftpackageindex.com/MarcoEidinger/TLDExtractSwift) [](https://codecov.io/gh/MarcoEidinger/TLDExtractSwift)
2 |
3 | # TLDExtract
4 |
5 |
6 |
7 | TLDExtract
is a Swift package to allows you extract
8 | - root domain
9 | - top level domain (TLD)
10 | - second level domain
11 | - subdomain
12 |
13 | from a `URL` or a hostname `String`.
14 |
15 | This is a fork to Kojiro Futamura's fantastic work on [gumob/TLDExtractSwift](https://github.com/gumob/TLDExtractSwift).
16 |
17 | ## Main differences to the original repo
18 | - **Always up-to-date**
19 | - leveraging GitHub actions to regularly create new package versions bundling the latest [Public Suffix List](http://www.publicsuffix.org) (PSL) - perfect for offline use
20 | - modern `async` function to invoke a network request fetching the latest PSL from the remote server ad-hoc.
21 | - **Swift Package Manager** (SPM) as the **exclusive distribution channel**
22 | - No package dependencies
23 |
24 | If you want to consume the library through CocoaPods or Carthage then please go ahead and use the [original repository](https://github.com/gumob/TLDExtractSwift).
25 |
26 | ## Usage
27 |
28 | ### Initialization
29 |
30 | ```swift
31 | import TLDExtract
32 |
33 | let extractor = TLDExtract()
34 | ```
35 | ### Extraction
36 |
37 | #### Passing argument as String
38 |
39 | Extract an url:
40 |
41 | ```swift
42 | let urlString: String = "https://www.github.com/gumob/TLDExtract"
43 | guard let result: TLDResult = extractor.parse(urlString) else { return }
44 |
45 | print(result.rootDomain) // Optional("github.com")
46 | print(result.topLevelDomain) // Optional("com")
47 | print(result.secondLevelDomain) // Optional("github")
48 | print(result.subDomain) // Optional("www")
49 | ```
50 |
51 | Extract a hostname:
52 |
53 | ```swift
54 | let hostname: String = "gumob.com"
55 | guard let result: TLDResult = extractor.parse(hostname) else { return }
56 |
57 | print(result.rootDomain) // Optional("gumob.com")
58 | print(result.topLevelDomain) // Optional("com")
59 | print(result.secondLevelDomain) // Optional("gumob")
60 | print(result.subDomain) // nil
61 | ```
62 |
63 | Extract an unicode hostname:
64 |
65 | ```swift
66 | let hostname: String = "www.ラーメン.寿司.co.jp"
67 | guard let result: TLDResult = extractor.parse(hostname) else { return }
68 |
69 | print(result.rootDomain) // Optional("寿司.co.jp")
70 | print(result.topLevelDomain) // Optional("co.jp")
71 | print(result.secondLevelDomain) // Optional("寿司")
72 | print(result.subDomain) // Optional("www.ラーメン")
73 | ```
74 |
75 | Extract a punycoded hostname (Same as above):
76 |
77 | ```swift
78 | let hostname: String = "www.xn--4dkp5a8a.xn--sprr0q.co.jp")"
79 | guard let result: TLDResult = extractor.parse(hostname) else { return }
80 |
81 | print(result.rootDomain) // Optional("xn--sprr0q.co.jp")
82 | print(result.topLevelDomain) // Optional("co.jp")
83 | print(result.secondLevelDomain) // Optional("xn--sprr0q")
84 | print(result.subDomain) // Optional("www.xn--4dkp5a8a")
85 | ```
86 |
87 | #### Passing argument as Foundation URL
88 |
89 | Extract an unicode url:
90 | URL class in Foundation Framework does not support unicode URLs by default. You can use URL extension as a workaround
91 | ```swift
92 | let urlString: String = "http://www.ラーメン.寿司.co.jp"
93 | let url: URL = URL(unicodeString: urlString)
94 | guard let result: TLDResult = extractor.parse(url) else { return }
95 |
96 | print(result.rootDomain) // Optional("www.ラーメン.寿司.co.jp")
97 | print(result.topLevelDomain) // Optional("co.jp")
98 | print(result.secondLevelDomain) // Optional("寿司")
99 | print(result.subDomain) // Optional("www.ラーメン")
100 | ```
101 |
102 | Encode an url by passing argument as percent encoded string (Same as above):
103 | ```swift
104 | let urlString: String = "http://www.ラーメン.寿司.co.jp".addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed)!
105 | let url: URL = URL(string: urlString)
106 | print(urlString) // http://www.%E3%83%A9%E3%83%BC%E3%83%A1%E3%83%B3.%E5%AF%BF%E5%8F%B8.co.jp
107 |
108 | guard let result: TLDResult = extractor.parse(url) else { return }
109 |
110 | print(result.rootDomain) // Optional("www.ラーメン.寿司.co.jp")
111 | print(result.topLevelDomain) // Optional("co.jp")
112 | print(result.secondLevelDomain) // Optional("寿司")
113 | print(result.subDomain) // Optional("www.ラーメン")
114 | ```
115 |
116 | ### Ad-hoc fetching of PSL
117 |
118 | This repository publishes new versions with the latest PSL regularly. This should be sufficient for most app developers assuming you are [updating to the latest version]( https://blog.eidinger.info/how-to-catch-up-with-outdated-dependencies-in-your-swift-package-with-github-actions-141d3d06b1d0).
119 |
120 | Nevertheless, an `async` function allows to invoke a network request fetching the latest PSL from the remote server ad-hoc.
121 |
122 | ```swift
123 | import TLDExtract
124 |
125 | let extractor = TLDExtract()
126 | try await extractor.fetchLatestPSL()
127 | ```
128 |
129 | **Requires network connectivity!**
--------------------------------------------------------------------------------
/Sources/TLDExtract/Documentation.docc/Documentation.md:
--------------------------------------------------------------------------------
1 | # ``TLDExtract``
2 |
3 | Swift package to extract top level domain (TLD), second level domain, subdomain and root domain
4 |
5 | ## Overview
6 |
7 | ```swift
8 | import TLDExtract
9 |
10 | let extractor = TLDExtract()
11 |
12 | let urlString: String = "http://super.duper.domain.co.uk"
13 | guard let result: TLDResult = extractor.parse(urlString) else { return }
14 |
15 | print(result.rootDomain) // Optional("domain.co.uk")
16 | print(result.topLevelDomain) // Optional("co.uk")
17 | print(result.secondLevelDomain) // Optional("domain")
18 | print(result.subDomain) // Optional("super.duper")
19 | ```
20 |
21 | ## Topics
22 |
23 | ### Extracting
24 |
25 | - ``TLDExtract/TLDExtract``
26 | - ``TLDExtractable``
27 | - ``TLDResult``
28 |
--------------------------------------------------------------------------------
/Sources/TLDExtract/Extension.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Created by kojirof on 2018-11-17.
3 | // Copyright (c) 2018 Gumob. All rights reserved.
4 | //
5 |
6 | import Foundation
7 |
8 | internal extension Bundle {
9 | class ClassForFramework {
10 | }
11 |
12 | static var current: Bundle {
13 | //return Bundle.main
14 | return Bundle.init(for: ClassForFramework.self)
15 | }
16 | }
17 |
18 | internal extension String {
19 | var isComment: Bool {
20 | return self.starts(with: "//")
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/Sources/TLDExtract/Model.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Created by kojirof on 2018-11-17.
3 | // Copyright (c) 2018 Gumob. All rights reserved.
4 | //
5 |
6 | import Foundation
7 |
8 | internal struct PSLDataSet {
9 | let exceptions: [PSLData]
10 | let wildcards: [PSLData]
11 | let normals: Set
12 | }
13 |
14 | internal struct PSLData {
15 | /// The flag that indicates data is exception
16 | let isException: Bool
17 | /// TLD Parts split by dot
18 | /// e.g. ["*", "yokohama", "jp"]
19 | let tldParts: [PSLDataPart]
20 | /// The priority score to sort the dataset
21 | /// If the hostname matches more than one rule, the one which has the highest priority is prevailing
22 | let priority: Int
23 |
24 | init(raw: String) {
25 | self.isException = raw.starts(with: "!")
26 | let tldStr: String = self.isException ? String(raw.dropFirst()) : raw
27 | self.tldParts = tldStr.components(separatedBy: ".").map(PSLDataPart.init)
28 | self.priority = (self.isException ? 1000 : 0) + self.tldParts.count
29 | }
30 | }
31 |
32 | extension PSLData {
33 | ///
34 | /// For more information about the public suffix list,
35 | /// See the 'Definitions' section at https://publicsuffix.org/list/
36 | ///
37 | /// A domain is said to match a rule if and only if all of the following conditions are met:
38 | /// - When the domain and rule are split into corresponding labels,
39 | /// that the domain contains as many or more labels than the rule.
40 | /// - Beginning with the right-most labels of both the domain and the rule,
41 | /// and continuing for all labels in the rule, one finds that for every pair,
42 | /// either they are identical, or that the label from the rule is "*".
43 | ///
44 | func matches(hostComponents: [String]) -> Bool {
45 | /// The host must have at least as many components as the TLD
46 | let delta: Int = hostComponents.count - self.tldParts.count
47 | guard delta >= 0 else { return false }
48 |
49 | /// Drop extra components from the host components so that two arrays have the same size
50 | let droppedHostComponents = hostComponents.dropFirst(delta)
51 |
52 | /// Find the PSLDataPart that matches the host component
53 | let zipped: Zip2Sequence<[PSLDataPart], ArraySlice> = zip(self.tldParts, droppedHostComponents)
54 | return zipped.allSatisfy { (pslData: PSLDataPart, hostComponent: String) in
55 | return pslData.matches(component: hostComponent)
56 | }
57 | }
58 |
59 | func parse(hostComponents: [String]) -> TLDResult {
60 | let partsCount: Int = tldParts.count - (self.isException ? 1 : 0)
61 | let delta: Int = hostComponents.count - partsCount
62 |
63 | /// Extract the host name to each level domain
64 | let topLevelDomain: String? = delta == 0 ? nil : hostComponents.dropFirst(delta).joined(separator: ".")
65 | let rootDomain: String? = delta == 0 ? nil : hostComponents.dropFirst(delta - 1).joined(separator: ".")
66 | let secondDomain: String? = delta == 0 ? nil : hostComponents[delta - 1]
67 | let subDomain: String? = delta < 2 ? nil : hostComponents.prefix(delta - 1).joined(separator: ".")
68 |
69 | return TLDResult(rootDomain: rootDomain,
70 | topLevelDomain: topLevelDomain,
71 | secondLevelDomain: secondDomain,
72 | subDomain: subDomain)
73 | }
74 | }
75 |
76 | extension PSLData: Comparable {
77 | static func < (lhs: PSLData, rhs: PSLData) -> Bool {
78 | return lhs.priority < rhs.priority
79 | }
80 |
81 | static func == (lhs: PSLData, rhs: PSLData) -> Bool {
82 | return lhs.priority == rhs.priority
83 | }
84 | }
85 |
86 | internal enum PSLDataPart {
87 | ///
88 | /// For more information about the wildcard character,
89 | /// See the 'Specification' section at https://publicsuffix.org/list/
90 | ///
91 | /// The wildcard character * (asterisk) matches any valid sequence of characters in a hostname part.
92 | /// Wildcards are not restricted to appear only in the leftmost position,
93 | /// but they must wildcard an entire label. (I.e. *.*.foo is a valid rule: *bar.foo is not.)
94 | ///
95 | case wildcard
96 | case characters(String)
97 |
98 | init(component: String) {
99 | self = component == "*" ? .wildcard : .characters(component)
100 | }
101 |
102 | func matches(component: String) -> Bool {
103 | switch self {
104 | case .wildcard:
105 | return true
106 | case let .characters(str):
107 | return str == component
108 | }
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/Sources/TLDExtract/Parser.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Created by kojirof on 2018/11/16.
3 | // Copyright © 2018 Gumob. All rights reserved.
4 | //
5 |
6 | import Foundation
7 |
8 | internal class PSLParser {
9 |
10 | var exceptions: [PSLData] = [PSLData]()
11 | var wildcards: [PSLData] = [PSLData]()
12 | var normals = Set()
13 |
14 | internal func addLine(_ line: String) {
15 | if line.contains("*") {
16 | self.wildcards.append(PSLData(raw: line))
17 | } else if line.starts(with: "!") {
18 | self.exceptions.append(PSLData(raw: line))
19 | } else if !line.isComment && !line.isEmpty {
20 | self.normals.insert(line)
21 | }
22 | }
23 |
24 | internal func parse(data: Data?) throws -> PSLDataSet {
25 | guard let data: Data = data,
26 | let str: String = String(data: data, encoding: .utf8),
27 | str.count > 0 else {
28 | throw TLDExtractError.pslParseError(message: nil)
29 | }
30 |
31 | str.components(separatedBy: .newlines).forEach { [weak self] (line: String) in
32 | if line.isComment {
33 | return
34 | }
35 | if line.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
36 | return
37 | }
38 |
39 | self?.addLine(line)
40 |
41 | // this does the same thing as update-psl.py
42 | #if SWIFT_PACKAGE
43 | if let encoded = line.idnaEncoded {
44 | self?.addLine(encoded)
45 | }
46 | #endif
47 | }
48 | return PSLDataSet(
49 | exceptions: exceptions,
50 | wildcards: wildcards,
51 | normals: normals
52 | )
53 | }
54 | }
55 |
56 | internal class TLDParser {
57 |
58 | private let pslDataSet: PSLDataSet
59 |
60 | internal init(dataSet: PSLDataSet) {
61 | self.pslDataSet = dataSet
62 | }
63 |
64 | internal func parseExceptionsAndWildcards(host: String) -> TLDResult? {
65 | let hostComponents: [String] = host.lowercased().components(separatedBy: ".")
66 | /// Search exceptions first, then search wildcards if not match
67 | let matchClosure: (PSLData) -> Bool = { $0.matches(hostComponents: hostComponents) }
68 | let pslData: PSLData? = self.pslDataSet.exceptions.first(where: matchClosure) ??
69 | self.pslDataSet.wildcards.first(where: matchClosure)
70 | return pslData?.parse(hostComponents: hostComponents)
71 | }
72 |
73 | internal func parseNormals(host: String) -> TLDResult? {
74 | let tldSet: Set = self.pslDataSet.normals
75 | /// Split the hostname to components
76 | let hostComponents = host.lowercased().components(separatedBy: ".")
77 | /// A host must have at least two parts else it's a TLD
78 | guard hostComponents.count >= 2 else { return nil }
79 | /// Iterate from lower level domain and check if the hostname matches a suffix in the dataset
80 | var copiedHostComponents: ArraySlice = ArraySlice(hostComponents)
81 | var topLevelDomain: String?
82 | repeat {
83 | guard !copiedHostComponents.isEmpty else { return nil }
84 | topLevelDomain = copiedHostComponents.joined(separator: ".")
85 | copiedHostComponents = copiedHostComponents.dropFirst()
86 | } while !tldSet.contains(topLevelDomain ?? "")
87 |
88 | if topLevelDomain == host { topLevelDomain = nil }
89 |
90 | /// Extract the host name to each level domain
91 | let rootDomainRange: Range = (copiedHostComponents.startIndex - 2)..= 0 ? hostComponents[rootDomainRange].joined(separator: ".") : nil
93 |
94 | let secondDomainRange: Range = (rootDomainRange.lowerBound)..<(rootDomainRange.lowerBound + 1)
95 | let secondDomain: String? = secondDomainRange.startIndex >= 0 ? hostComponents[secondDomainRange].joined(separator: ".") : nil
96 |
97 | let subDomainRange: Range = (hostComponents.startIndex)..<(max(secondDomainRange.lowerBound, hostComponents.startIndex))
98 | let subDomain: String? = subDomainRange.endIndex >= 1 ? hostComponents[subDomainRange].joined(separator: ".") : nil
99 |
100 | return TLDResult(rootDomain: rootDomain,
101 | topLevelDomain: topLevelDomain,
102 | secondLevelDomain: secondDomain,
103 | subDomain: subDomain)
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/Sources/TLDExtract/PunycodeSwift/Extensions.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Created by kojirof on 2018-11-19.
3 | // Copyright (c) 2018 Gumob. All rights reserved.
4 | //
5 |
6 | import Foundation
7 |
8 | // For calling site convenience everything is implemented over Substring and String API is wrapped around it
9 | internal extension Substring {
10 | /// Returns new string in punycode encoding (RFC 3492)
11 | ///
12 | /// - Returns: Punycode encoded string or nil if the string can't be encoded
13 | var punycodeEncoded: String? {
14 | return Punycode().encodePunycode(self)
15 | }
16 |
17 | /// Returns new string decoded from punycode representation (RFC 3492)
18 | ///
19 | /// - Returns: Original string or nil if the string doesn't contain correct encoding
20 | var punycodeDecoded: String? {
21 | return Punycode().decodePunycode(self)
22 | }
23 |
24 | /// Returns new string containing IDNA-encoded hostname
25 | ///
26 | /// - Returns: IDNA encoded hostname or nil if the string can't be encoded
27 | var idnaEncoded: String? {
28 | return Punycode().encodeIDNA(self)
29 | }
30 |
31 | /// Returns new string containing hostname decoded from IDNA representation
32 | ///
33 | /// - Returns: Original hostname or nil if the string doesn't contain correct encoding
34 | var idnaDecoded: String? {
35 | return Punycode().decodedIDNA(self)
36 | }
37 | }
38 |
39 | internal extension String {
40 |
41 | /// Returns new string in punycode encoding (RFC 3492)
42 | ///
43 | /// - Returns: Punycode encoded string or nil if the string can't be encoded
44 | var punycodeEncoded: String? {
45 | return self[.. String.Index? {
11 | var position: Index = endIndex
12 | while position > startIndex {
13 | position = self.index(before: position)
14 | if self[position] == element {
15 | return position
16 | }
17 | }
18 | return nil
19 | }
20 | }
21 |
22 | extension UnicodeScalar {
23 | internal var isValid: Bool {
24 | return value < 0xD880 || (value >= 0xE000 && value <= 0x1FFFFF)
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/Sources/TLDExtract/PunycodeSwift/Punycode.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Created by kojirof on 2018-11-19.
3 | // Copyright (c) 2018 Gumob. All rights reserved.
4 | //
5 |
6 | import Foundation
7 |
8 | internal class Punycode {
9 |
10 | /// Punycode RFC 3492
11 | /// See https://www.ietf.org/rfc/rfc3492.txt for standard details
12 |
13 | private let base: Int = 36
14 | private let tMin: Int = 1
15 | private let tMax: Int = 26
16 | private let skew: Int = 38
17 | private let damp: Int = 700
18 | private let initialBias: Int = 72
19 | private let initialN: Int = 128
20 |
21 | /// RFC 3492 specific
22 | private let delimiter: Character = "-"
23 | private let lowercase: ClosedRange = "a"..."z"
24 | private let digits: ClosedRange = "0"..."9"
25 | private let lettersBase: UInt32 = Character("a").unicodeScalars.first!.value
26 | private let digitsBase: UInt32 = Character("0").unicodeScalars.first!.value
27 |
28 | /// IDNA
29 | private let ace: String = "xn--"
30 |
31 | private func adaptBias(_ delta: Int, _ numberOfPoints: Int, _ firstTime: Bool) -> Int {
32 | var delta: Int = delta
33 | if firstTime {
34 | delta /= damp
35 | } else {
36 | delta /= 2
37 | }
38 | delta += delta / numberOfPoints
39 | var k: Int = 0
40 | while delta > ((base - tMin) * tMax) / 2 {
41 | delta /= base - tMin
42 | k += base
43 | }
44 | return k + ((base - tMin + 1) * delta) / (delta + skew)
45 | }
46 |
47 | /// Maps a punycode character to index
48 | private func punycodeIndex(for character: Character) -> Int? {
49 | if lowercase.contains(character) {
50 | return Int(character.unicodeScalars.first!.value - lettersBase)
51 | } else if digits.contains(character) {
52 | return Int(character.unicodeScalars.first!.value - digitsBase) + 26 /// count of lowercase letters range
53 | } else {
54 | return nil
55 | }
56 | }
57 |
58 | /// Maps an index to corresponding punycode character
59 | private func punycodeValue(for digit: Int) -> Character? {
60 | guard digit < base else { return nil }
61 | if digit < 26 {
62 | return Character(UnicodeScalar(lettersBase.advanced(by: digit))!)
63 | } else {
64 | return Character(UnicodeScalar(digitsBase.advanced(by: digit - 26))!)
65 | }
66 | }
67 |
68 | /// Decodes punycode encoded string to original representation
69 | ///
70 | /// - Parameter punycode: Punycode encoding (RFC 3492)
71 | /// - Returns: Decoded string or nil if the input cannot be decoded
72 | internal func decodePunycode(_ punycode: Substring) -> String? {
73 | var n: Int = initialN
74 | var i: Int = 0
75 | var bias: Int = initialBias
76 | var output: [Character] = []
77 | var inputPosition = punycode.startIndex
78 |
79 | let delimiterPosition: Substring.Index = punycode.lastIndex(of: delimiter) ?? punycode.startIndex
80 | if delimiterPosition > punycode.startIndex {
81 | output.append(contentsOf: punycode[..= bias + tMax ? tMax : k - bias)
96 | if digit < t {
97 | break
98 | }
99 | w *= base - t
100 | k += base
101 | } while !punycodeInput.isEmpty
102 | bias = adaptBias(i - oldI, output.count + 1, oldI == 0)
103 | n += i / (output.count + 1)
104 | i %= (output.count + 1)
105 | guard n >= 0x80, let scalar = UnicodeScalar(n) else {
106 | return nil
107 | }
108 | output.insert(Character(scalar), at: i)
109 | i += 1
110 | }
111 |
112 | return String(output)
113 | }
114 |
115 | /// Encodes string to punycode (RFC 3492)
116 | ///
117 | /// - Parameter input: Input string
118 | /// - Returns: Punycode encoded string
119 | internal func encodePunycode(_ input: Substring) -> String? {
120 | var n: Int = initialN
121 | var delta: Int = 0
122 | var bias: Int = initialBias
123 | var output: String = ""
124 | for scalar in input.unicodeScalars {
125 | if scalar.isASCII {
126 | let char = Character(scalar)
127 | output.append(char)
128 | } else if !scalar.isValid {
129 | return nil /// Encountered a scalar out of acceptable range
130 | }
131 | }
132 | var handled: Int = output.count
133 | let basic: Int = handled
134 | if basic > 0 {
135 | output.append(delimiter)
136 | }
137 | while handled < input.unicodeScalars.count {
138 | var minimumCodepoint: Int = 0x10FFFF
139 | for scalar: Unicode.Scalar in input.unicodeScalars {
140 | if scalar.value < minimumCodepoint && scalar.value >= n {
141 | minimumCodepoint = Int(scalar.value)
142 | }
143 | }
144 | delta += (minimumCodepoint - n) * (handled + 1)
145 | n = minimumCodepoint
146 | for scalar: Unicode.Scalar in input.unicodeScalars {
147 | if scalar.value < n {
148 | delta += 1
149 | } else if scalar.value == n {
150 | var q: Int = delta
151 | var k: Int = base
152 | while true {
153 | let t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias)
154 | if q < t {
155 | break
156 | }
157 | guard let character: Character = punycodeValue(for: t + ((q - t) % (base - t))) else { return nil }
158 | output.append(character)
159 | q = (q - t) / (base - t)
160 | k += base
161 | }
162 | guard let character: Character = punycodeValue(for: q) else { return nil }
163 | output.append(character)
164 | bias = adaptBias(delta, handled + 1, handled == basic)
165 | delta = 0
166 | handled += 1
167 | }
168 | }
169 | delta += 1
170 | n += 1
171 | }
172 |
173 | return output
174 | }
175 |
176 | /// Returns new string containing IDNA-encoded hostname
177 | ///
178 | /// - Returns: IDNA encoded hostname or nil if the string can't be encoded
179 | internal func encodeIDNA(_ input: Substring) -> String? {
180 | let parts: [Substring] = input.split(separator: ".")
181 | var output: String = ""
182 | for part: Substring in parts {
183 | if output.count > 0 {
184 | output.append(".")
185 | }
186 | if part.rangeOfCharacter(from: CharacterSet.urlHostAllowed.inverted) != nil {
187 | guard let encoded: String = part.lowercased().punycodeEncoded else { return nil }
188 | output += ace + encoded
189 | } else {
190 | output += part
191 | }
192 | }
193 | return output
194 | }
195 |
196 | /// Returns new string containing hostname decoded from IDNA representation
197 | ///
198 | /// - Returns: Original hostname or nil if the string doesn't contain correct encoding
199 | internal func decodedIDNA(_ input: Substring) -> String? {
200 | let parts: [Substring] = input.split(separator: ".")
201 | var output: String = ""
202 | for part: Substring in parts {
203 | if output.count > 0 {
204 | output.append(".")
205 | }
206 | if part.hasPrefix(ace) {
207 | guard let decoded: String = part.dropFirst(ace.count).punycodeDecoded else { return nil }
208 | output += decoded
209 | } else {
210 | output += part
211 | }
212 | }
213 | return output
214 | }
215 | }
216 |
--------------------------------------------------------------------------------
/Sources/TLDExtract/TLDExtract.swift:
--------------------------------------------------------------------------------
1 | //
2 | // TLDExtract.swift
3 | // TLDExtract
4 | //
5 | // Created by kojirof on 2018/11/16.
6 | // Copyright © 2018 Gumob. All rights reserved.
7 | //
8 |
9 | import Foundation
10 | #if canImport(FoundationNetworking)
11 | import FoundationNetworking
12 | #endif
13 |
14 | /**
15 | Extract root domain, top level domain (TLD), second level domain or subdomain from a hostname
16 |
17 | ```swift
18 | import TLDExtract
19 |
20 | let extractor = try! TLDExtract()
21 | let hostname: String = "www.ラーメン.寿司.co.jp"
22 | guard let result: TLDResult = extractor.parse(hostname) else { return }
23 |
24 | print(result.rootDomain) // Optional("寿司.co.jp")
25 | print(result.topLevelDomain) // Optional("co.jp")
26 | print(result.secondLevelDomain) // Optional("寿司")
27 | print(result.subDomain) // Optional("www.ラーメン")
28 | ```
29 |
30 | This is possible thanks to a bundled version of the [Public Suffix List](https://publicsuffix.org/)
31 |
32 | You can also fetch the most up-to-date PSL with async function ``fetchLatestPSL()``
33 |
34 | ```swift
35 | try await extractor.fetchLatestPSL()
36 | ```
37 | */
38 | public class TLDExtract {
39 |
40 | private var tldParser: TLDParser
41 |
42 | /// Initializes the extractor with information from the bundled Public Suffix List.
43 | public init() {
44 | let url = Bundle.module.url(forResource: "public_suffix_list", withExtension: "dat")!
45 | let data: Data = try! Data(contentsOf: url)
46 | let dataSet = try! PSLParser().parse(data: data)
47 | self.tldParser = TLDParser(dataSet: dataSet)
48 | }
49 |
50 | /// invoke network request to fetch latest [Public Suffix List](https://publicsuffix.org/list/public_suffix_list.dat) (PSL) from a remote server ensuring that extractor operates most accurate
51 | @available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *)
52 | public func fetchLatestPSL() async throws {
53 | let url: URL = URL(string: "https://publicsuffix.org/list/public_suffix_list.dat")!
54 | let data: Data = try await withCheckedThrowingContinuation{ continuation in
55 | URLSession.shared.dataTask(with: URLRequest(url: url)) { data, response, error in
56 | if let data = data {
57 | continuation.resume(returning: data)
58 | } else if let error = error {
59 | continuation.resume(throwing: error)
60 | }
61 | }.resume()
62 | }
63 | let dataSet = try PSLParser().parse(data: data)
64 | self.tldParser = TLDParser(dataSet: dataSet)
65 | }
66 |
67 | /// Parse a hostname to extract top-level domain and other properties
68 | /// - Parameters:
69 | /// - input: type conforming to `TLDExtractable`
70 | /// - quick: option to speed up by parsing only normal data excluding exceptions and wildcards
71 | /// - Returns: optional `TLDResult`
72 | public func parse(_ input: TLDExtractable, quick: Bool = false) -> TLDResult? {
73 | guard let host: String = input.hostname else { return nil }
74 | if quick {
75 | return self.tldParser.parseNormals(host: host)
76 | } else {
77 | return self.tldParser.parseExceptionsAndWildcards(host: host) ??
78 | self.tldParser.parseNormals(host: host)
79 | }
80 | }
81 | }
82 |
83 | /**
84 | Types conforming to this protocol can be parsed with ``TLDExtract/TLDExtract``.
85 |
86 | Swift Foundation types `URL` and `String` already conform to ``TLDExtractable``.
87 | */
88 | public protocol TLDExtractable {
89 | var hostname: String? { get }
90 | }
91 |
92 | extension URL: TLDExtractable {
93 |
94 | init?(unicodeString: String) {
95 | if let encodedUrl: String = unicodeString.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) {
96 | self.init(string: encodedUrl)
97 | } else {
98 | self.init(string: unicodeString)
99 | }
100 | }
101 |
102 | public var hostname: String? {
103 | let result: String? = self.absoluteString.removingPercentEncoding?.hostname
104 | return result
105 | }
106 | }
107 |
108 | extension String: TLDExtractable {
109 | public var hostname: String? {
110 | let schemePattern: String = "^(\\p{L}+:)?//"
111 | let hostPattern: String = "([0-9\\p{L}][0-9\\p{L}-]{1,61}\\.?)? ([\\p{L}-]* [0-9\\p{L}]+) (?!.*:$).*$".replace(" ", "")
112 | if self.matches(schemePattern) {
113 | let components: [String] = self.replace(schemePattern, "").components(separatedBy: "/")
114 | guard let component: String = components.first, !component.isEmpty else { return nil }
115 | return component
116 | } else if self.matches("^\(hostPattern)") {
117 | let components: [String] = self.replace(schemePattern, "").components(separatedBy: "/")
118 | guard let component: String = components.first, !component.isEmpty else { return nil }
119 | return component
120 | } else {
121 | return URL(string: self)?.host
122 | }
123 | }
124 | }
125 |
126 | fileprivate extension String {
127 | func matches(_ pattern: String) -> Bool {
128 | guard let regex: NSRegularExpression = try? NSRegularExpression(pattern: pattern) else { return false }
129 | return regex.matches(in: self, range: NSRange(location: 0, length: self.count)).count > 0
130 | }
131 |
132 | func replace(_ pattern: String, _ replacement: String) -> String {
133 | return self.replacingOccurrences(of: pattern, with: replacement, options: .regularExpression)
134 | }
135 | }
136 |
137 | /// Returning type of the parsing method from ``TLDExtract/TLDExtract`` method for accessing top-level domain and other properties.
138 | public struct TLDResult {
139 | public let rootDomain: String?
140 | public let topLevelDomain: String?
141 | public let secondLevelDomain: String?
142 | public let subDomain: String?
143 | }
144 |
--------------------------------------------------------------------------------
/Sources/TLDExtract/TLDExtractError.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Created by kojirof on 2018-11-17.
3 | // Copyright (c) 2018 Gumob. All rights reserved.
4 | //
5 |
6 | import Foundation
7 |
8 | enum TLDExtractError: Error {
9 | case pslParseError(message: Error?)
10 | }
11 |
--------------------------------------------------------------------------------
/Tests/TLDExtractTests/PunycodeTests.swift:
--------------------------------------------------------------------------------
1 | import XCTest
2 |
3 | @testable import TLDExtract
4 |
5 | final class PunycodeTests: XCTestCase {
6 |
7 | let egyptian: String = "\u{0644}\u{064A}\u{0647}\u{0645}\u{0627}\u{0628}\u{062A}\u{0643}\u{0644}\u{0645}\u{0648}\u{0634}\u{0639}\u{0631}\u{0628}\u{064A}\u{061F}"
8 | let chineseSimplified: String = "\u{4ED6}\u{4EEC}\u{4E3A}\u{4EC0}\u{4E48}\u{4E0D}\u{8BF4}\u{4E2D}\u{6587}"
9 | let chineseTraditional: String = "\u{4ED6}\u{5011}\u{7232}\u{4EC0}\u{9EBD}\u{4E0D}\u{8AAA}\u{4E2D}\u{6587}"
10 | let czech: String = "\u{0050}\u{0072}\u{006F}\u{010D}\u{0070}\u{0072}\u{006F}\u{0073}\u{0074}\u{011B}\u{006E}\u{0065}\u{006D}\u{006C}\u{0075}\u{0076}\u{00ED}\u{010D}\u{0065}\u{0073}\u{006B}\u{0079}"
11 | let hebrew: String = "\u{05DC}\u{05DE}\u{05D4}\u{05D4}\u{05DD}\u{05E4}\u{05E9}\u{05D5}\u{05D8}\u{05DC}\u{05D0}\u{05DE}\u{05D3}\u{05D1}\u{05E8}\u{05D9}\u{05DD}\u{05E2}\u{05D1}\u{05E8}\u{05D9}\u{05EA}"
12 | let hindi: String = "\u{092F}\u{0939}\u{0932}\u{094B}\u{0917}\u{0939}\u{093F}\u{0928}\u{094D}\u{0926}\u{0940}\u{0915}\u{094D}\u{092F}\u{094B}\u{0902}\u{0928}\u{0939}\u{0940}\u{0902}\u{092C}\u{094B}\u{0932}\u{0938}\u{0915}\u{0924}\u{0947}\u{0939}\u{0948}\u{0902}"
13 | let japanese: String = "\u{306A}\u{305C}\u{307F}\u{3093}\u{306A}\u{65E5}\u{672C}\u{8A9E}\u{3092}\u{8A71}\u{3057}\u{3066}\u{304F}\u{308C}\u{306A}\u{3044}\u{306E}\u{304B}"
14 | let korean: String = "\u{C138}\u{ACC4}\u{C758}\u{BAA8}\u{B4E0}\u{C0AC}\u{B78C}\u{B4E4}\u{C774}\u{D55C}\u{AD6D}\u{C5B4}\u{B97C}\u{C774}\u{D574}\u{D55C}\u{B2E4}\u{BA74}\u{C5BC}\u{B9C8}\u{B098}\u{C88B}\u{C744}\u{AE4C}"
15 | let russian: String = "\u{043F}\u{043E}\u{0447}\u{0435}\u{043C}\u{0443}\u{0436}\u{0435}\u{043E}\u{043D}\u{0438}\u{043D}\u{0435}\u{0433}\u{043E}\u{0432}\u{043E}\u{0440}\u{044F}\u{0442}\u{043F}\u{043E}\u{0440}\u{0443}\u{0441}\u{0441}\u{043A}\u{0438}"
16 | let spanish: String = "\u{0050}\u{006F}\u{0072}\u{0071}\u{0075}\u{00E9}\u{006E}\u{006F}\u{0070}\u{0075}\u{0065}\u{0064}\u{0065}\u{006E}\u{0073}\u{0069}\u{006D}\u{0070}\u{006C}\u{0065}\u{006D}\u{0065}\u{006E}\u{0074}\u{0065}\u{0068}\u{0061}\u{0062}\u{006C}\u{0061}\u{0072}\u{0065}\u{006E}\u{0045}\u{0073}\u{0070}\u{0061}\u{00F1}\u{006F}\u{006C}"
17 | let vietnamese: String = "\u{0054}\u{1EA1}\u{0069}\u{0073}\u{0061}\u{006F}\u{0068}\u{1ECD}\u{006B}\u{0068}\u{00F4}\u{006E}\u{0067}\u{0074}\u{0068}\u{1EC3}\u{0063}\u{0068}\u{1EC9}\u{006E}\u{00F3}\u{0069}\u{0074}\u{0069}\u{1EBF}\u{006E}\u{0067}\u{0056}\u{0069}\u{1EC7}\u{0074}"
18 |
19 | let jBlockL: String = "\u{0033}\u{5E74}\u{0042}\u{7D44}\u{91D1}\u{516B}\u{5148}\u{751F}"
20 | let jBlockM: String = "\u{5B89}\u{5BA4}\u{5948}\u{7F8E}\u{6075}\u{002D}\u{0077}\u{0069}\u{0074}\u{0068}\u{002D}\u{0053}\u{0055}\u{0050}\u{0045}\u{0052}\u{002D}\u{004D}\u{004F}\u{004E}\u{004B}\u{0045}\u{0059}\u{0053}"
21 | let jBlockN: String = "\u{0048}\u{0065}\u{006C}\u{006C}\u{006F}\u{002D}\u{0041}\u{006E}\u{006F}\u{0074}\u{0068}\u{0065}\u{0072}\u{002D}\u{0057}\u{0061}\u{0079}\u{002D}\u{305D}\u{308C}\u{305E}\u{308C}\u{306E}\u{5834}\u{6240}"
22 | let jBlockO: String = "\u{3072}\u{3068}\u{3064}\u{5C4B}\u{6839}\u{306E}\u{4E0B}\u{0032}"
23 | let jBlockP: String = "\u{004D}\u{0061}\u{006A}\u{0069}\u{3067}\u{004B}\u{006F}\u{0069}\u{3059}\u{308B}\u{0035}\u{79D2}\u{524D}"
24 | let jBlockQ: String = "\u{30D1}\u{30D5}\u{30A3}\u{30FC}\u{0064}\u{0065}\u{30EB}\u{30F3}\u{30D0}"
25 | let jBlockR: String = "\u{305D}\u{306E}\u{30B9}\u{30D4}\u{30FC}\u{30C9}\u{3067}"
26 |
27 | let plain: String = "\u{002D}\u{003E}\u{0020}\u{0024}\u{0031}\u{002E}\u{0030}\u{0030}\u{0020}\u{003C}\u{002D}"
28 | let multiscalar: String = "🇨🇦"
29 | let idna: String = "погода-в-египте.рф.com"
30 |
31 | let egyptianCode: String = "egbpdaj6bu4bxfgehfvwxn"
32 | let chineseSimplifiedCode: String = "ihqwcrb4cv8a8dqg056pqjye"
33 | let chineseTraditionalCode: String = "ihqwctvzc91f659drss3x8bo0yb"
34 | let czechCode: String = "Proprostnemluvesky-uyb24dma41a"
35 | let hebrewCode: String = "4dbcagdahymbxekheh6e0a7fei0b"
36 | let hindiCode: String = "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
37 | let japaneseCode: String = "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
38 | let koreanCode: String = "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
39 | let russianCode: String = "b1abfaaepdrnnbgefbadotcwatmq2g4l"
40 | let spanishCode: String = "PorqunopuedensimplementehablarenEspaol-fmd56a"
41 | let vietnameseCode: String = "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
42 |
43 | let jBlockLCode: String = "3B-ww4c5e180e575a65lsy2b"
44 | let jBlockMCode: String = "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"
45 | let jBlockNCode: String = "Hello-Another-Way--fc4qua05auwb3674vfr0b"
46 | let jBlockOCode: String = "2-u9tlzr9756bt3uc0v"
47 | let jBlockPCode: String = "MajiKoi5-783gue6qz075azm5e"
48 | let jBlockQCode: String = "de-jg4avhby1noc0d"
49 | let jBlockRCode: String = "d9juau41awczczp"
50 |
51 | let plainCode: String = "-> $1.00 <--"
52 | let multiscalarCode: String = "e77hd"
53 |
54 | let idnaCode: String = "xn-----6kcjcecmb3a1dbkl9b.xn--p1ai.com"
55 |
56 | /// https://tools.ietf.org/html/rfc3492#section-7
57 |
58 | func testEncodingCorrectness() {
59 | XCTAssert(egyptian.punycodeEncoded == egyptianCode)
60 | XCTAssert(chineseSimplified.punycodeEncoded == chineseSimplifiedCode)
61 | XCTAssert(chineseTraditional.punycodeEncoded == chineseTraditionalCode)
62 | XCTAssert(czech.punycodeEncoded == czechCode)
63 | XCTAssert(hebrew.punycodeEncoded == hebrewCode)
64 | XCTAssert(hindi.punycodeEncoded == hindiCode)
65 | XCTAssert(japanese.punycodeEncoded == japaneseCode)
66 | XCTAssert(korean.punycodeEncoded == koreanCode)
67 | XCTAssert(russian.punycodeEncoded == russianCode)
68 | XCTAssert(spanish.punycodeEncoded == spanishCode)
69 | XCTAssert(vietnamese.punycodeEncoded == vietnameseCode)
70 |
71 | XCTAssert(jBlockL.punycodeEncoded == jBlockLCode)
72 | XCTAssert(jBlockM.punycodeEncoded == jBlockMCode)
73 | XCTAssert(jBlockN.punycodeEncoded == jBlockNCode)
74 | XCTAssert(jBlockO.punycodeEncoded == jBlockOCode)
75 | XCTAssert(jBlockP.punycodeEncoded == jBlockPCode)
76 | XCTAssert(jBlockQ.punycodeEncoded == jBlockQCode)
77 | XCTAssert(jBlockR.punycodeEncoded == jBlockRCode)
78 |
79 | XCTAssert(multiscalar.punycodeEncoded == multiscalarCode)
80 | XCTAssert(plain.punycodeEncoded == plainCode)
81 |
82 | XCTAssert(idna.idnaEncoded == idnaCode)
83 | }
84 |
85 | func testDecodingCorrectness() {
86 | XCTAssert(egyptianCode.punycodeDecoded == egyptian)
87 | XCTAssert(chineseSimplifiedCode.punycodeDecoded == chineseSimplified)
88 | XCTAssert(chineseTraditionalCode.punycodeDecoded == chineseTraditional)
89 | XCTAssert(czechCode.punycodeDecoded == czech)
90 | XCTAssert(hebrewCode.punycodeDecoded == hebrew)
91 | XCTAssert(hindiCode.punycodeDecoded == hindi)
92 | XCTAssert(japaneseCode.punycodeDecoded == japanese)
93 | XCTAssert(koreanCode.punycodeDecoded == korean)
94 | XCTAssert(russianCode.punycodeDecoded == russian)
95 | XCTAssert(spanishCode.punycodeDecoded == spanish)
96 | XCTAssert(vietnameseCode.punycodeDecoded == vietnamese)
97 |
98 | XCTAssert(jBlockLCode.punycodeDecoded == jBlockL)
99 | XCTAssert(jBlockMCode.punycodeDecoded == jBlockM)
100 | XCTAssert(jBlockNCode.punycodeDecoded == jBlockN)
101 | XCTAssert(jBlockOCode.punycodeDecoded == jBlockO)
102 | XCTAssert(jBlockPCode.punycodeDecoded == jBlockP)
103 | XCTAssert(jBlockQCode.punycodeDecoded == jBlockQ)
104 | XCTAssert(jBlockRCode.punycodeDecoded == jBlockR)
105 |
106 | XCTAssert(multiscalarCode.punycodeDecoded == multiscalar)
107 | XCTAssert(plainCode.punycodeDecoded == plain)
108 |
109 | XCTAssert(idnaCode.idnaDecoded == idna)
110 | }
111 |
112 | func testInvalidPunycodeIsNotFatal() {
113 | let invalidPunycode: String = "xn--g"
114 | XCTAssertNoThrow(invalidPunycode.idnaDecoded)
115 | }
116 |
117 | // func testFoo1() {
118 | // var sushi: String = "寿司"
119 | //
120 | // sushi = sushi.idnaEncoded!
121 | // print(sushi) // xn--sprr0q
122 | //
123 | // sushi = sushi.idnaDecoded!
124 | // print(sushi) // "寿司"
125 | // }
126 | //
127 | // func testFoo2() {
128 | // var sushi: String = "寿司"
129 | //
130 | // sushi = sushi.punycodeEncoded!
131 | // print(sushi) // sprr0q
132 | //
133 | // sushi = sushi.punycodeDecoded!
134 | // print(sushi) // "寿司"
135 | // }
136 | //
137 | // func testFoo3() {
138 | // var sushi: Substring = "寿司大好き".prefix(2)
139 | // print(sushi) // "寿司"
140 | //
141 | // var sushiStr = sushi.idnaEncoded!
142 | // print(sushiStr) // xn--sprr0q
143 | //
144 | // sushiStr = sushiStr.idnaDecoded!
145 | // print(sushiStr) // "寿司"
146 | // }
147 | //
148 | // func testFoo4() {
149 | // var sushi: Substring = "寿司大好き".prefix(2)
150 | // print(sushi) // "寿司"
151 | //
152 | // var sushiStr = sushi.punycodeEncoded!
153 | // print(sushiStr) // sprr0q
154 | //
155 | // sushiStr = sushiStr.punycodeDecoded!
156 | // print(sushiStr) // "寿司"
157 | // }
158 | }
159 |
--------------------------------------------------------------------------------
/Tests/TLDExtractTests/TLDExtractTests.swift:
--------------------------------------------------------------------------------
1 | //
2 | // TLDExtractTests.swift
3 | // TLDExtractTests
4 | //
5 | // Created by kojirof on 2018/11/16.
6 | // Copyright © 2018 Gumob. All rights reserved.
7 | //
8 |
9 | import XCTest
10 | @testable import TLDExtract
11 |
12 | class TLDExtractTests: XCTestCase {
13 |
14 | var tldExtractor: TLDExtract!
15 |
16 | override func setUp() {
17 | super.setUp()
18 | tldExtractor = TLDExtract()
19 | }
20 |
21 | func testMeasureSetupTime() {
22 | self.measure {
23 | _ = TLDExtract()
24 | }
25 | }
26 |
27 | @available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *)
28 | func testFetchLatestPSL() async throws {
29 | try await TLDExtract().fetchLatestPSL()
30 | }
31 |
32 | func testPSLParser() {
33 | XCTAssertThrowsError(try PSLParser().parse(data: Data()))
34 | XCTAssertThrowsError(try PSLParser().parse(data: nil))
35 | }
36 |
37 | func testPSLPriority() {
38 | let exception0: PSLData = PSLData(raw: "!city.yokohama.jp")
39 | let exception1: PSLData = PSLData(raw: "!www.ck")
40 | let wildcard0: PSLData = PSLData(raw: "*.yokohama.jp")
41 | let wildcard1: PSLData = PSLData(raw: "*.ck")
42 |
43 | XCTAssertTrue(exception0 > exception1)
44 | XCTAssertTrue(exception0 >= exception1)
45 | XCTAssertFalse(exception0 < exception1)
46 | XCTAssertFalse(exception0 <= exception1)
47 |
48 | XCTAssertTrue(exception0 == exception0)
49 | XCTAssertTrue(exception1 == exception1)
50 |
51 | XCTAssertTrue(wildcard0 == wildcard0)
52 | XCTAssertTrue(wildcard1 == wildcard1)
53 |
54 | XCTAssertFalse(wildcard0 == wildcard1)
55 |
56 | XCTAssertFalse(exception0 == exception1)
57 |
58 | XCTAssertFalse(exception0 == wildcard0)
59 | XCTAssertFalse(exception0 == wildcard1)
60 |
61 | XCTAssertFalse(exception1 == wildcard0)
62 | XCTAssertFalse(exception1 == wildcard1)
63 | }
64 |
65 | func testMeasureExtractable() {
66 | self.measure {
67 | testExtractableURL()
68 | testExtractableString()
69 | }
70 | }
71 |
72 | func testMeasureParser() {
73 | self.measure {
74 | testTLDExtractString(quick: false)
75 | }
76 | }
77 |
78 | func testMeasureParserQuick() {
79 | self.measure {
80 | testTLDExtractString(quick: true)
81 | }
82 | }
83 |
84 | /// Test TLDExtractable.
85 |
86 | func testExtractableString(file: StaticString = #file, line: UInt = #line) {
87 | /// URL
88 | checkTLDExtractable("http://example.com", "example.com")
89 | checkTLDExtractable("https://example.com", "example.com")
90 |
91 | checkTLDExtractable("http://www.example.com", "www.example.com")
92 | checkTLDExtractable("https://www.example.com", "www.example.com")
93 |
94 | checkTLDExtractable("http://www.example.com/", "www.example.com")
95 | checkTLDExtractable("https://www.example.com/", "www.example.com")
96 |
97 | checkTLDExtractable("http://example.com/a/b/", "example.com")
98 | checkTLDExtractable("http://example.com/a/b/index.html", "example.com")
99 |
100 | /// URL without scheme
101 | checkTLDExtractable("//example.com", "example.com")
102 | checkTLDExtractable("//example.com/a/b/", "example.com")
103 | checkTLDExtractable("//example.com/a/b/index.html", "example.com")
104 |
105 | /// URL with localhost
106 | checkTLDExtractable("http://localhost", "localhost")
107 | checkTLDExtractable("//localhost", "localhost")
108 | checkTLDExtractable("localhost", "localhost")
109 |
110 | /// Only URL scheme
111 | checkTLDExtractable("http", "http")
112 | checkTLDExtractable("http:", nil)
113 | checkTLDExtractable("http://", nil)
114 |
115 | /// Only TLD
116 | checkTLDExtractable("com", "com")
117 |
118 | /// Hostname only
119 | checkTLDExtractable("example.com", "example.com")
120 | checkTLDExtractable("www.example.com", "www.example.com")
121 |
122 | /// IDNA
123 | checkTLDExtractable("表参道.青山.ファッション", "表参道.青山.ファッション")
124 | checkTLDExtractable("//表参道.青山.ファッション", "表参道.青山.ファッション")
125 | checkTLDExtractable("http://表参道.青山.ファッション", "表参道.青山.ファッション")
126 | checkTLDExtractable("http://表参道.青山.ファッション/横浜/ヤンキー/", "表参道.青山.ファッション")
127 | checkTLDExtractable("青山.ファッション/川崎/チンピラ/", "青山.ファッション")
128 | checkTLDExtractable("ファッション/埼玉/ダサイタマ/", "ファッション")
129 | /// Same as above, but punycoded
130 | checkTLDExtractable("xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c", "xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c")
131 | checkTLDExtractable("//xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c", "xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c")
132 | checkTLDExtractable("http://xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c", "xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c")
133 | checkTLDExtractable("http://xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c/xn--4cw21e/xn--nckyfvbwb/", "xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c")
134 | checkTLDExtractable("xn--rht138k.xn--bck1b9a5dre4c/xn--8ltrs/xn--7ckvb7cub/", "xn--rht138k.xn--bck1b9a5dre4c")
135 | checkTLDExtractable("xn--bck1b9a5dre4c/xn--5js045d/xn--eck7a5ab7m/", "xn--bck1b9a5dre4c")
136 |
137 | /// IDNA - All language
138 | checkTLDExtractable("http://افغانستا.museum", "افغانستا.museum")
139 | checkTLDExtractable("http://الجزائر.museum", "الجزائر.museum")
140 | checkTLDExtractable("http://österreich.museum", "österreich.museum")
141 | checkTLDExtractable("http://বাংলাদেশ.museum", "বাংলাদেশ.museum")
142 | checkTLDExtractable("http://беларусь.museum", "беларусь.museum")
143 | checkTLDExtractable("http://belgië.museum", "belgië.museum")
144 | checkTLDExtractable("http://българия.museum", "българия.museum")
145 | checkTLDExtractable("http://تشادر.museum", "تشادر.museum")
146 | checkTLDExtractable("http://中国.museum", "中国.museum")
147 | checkTLDExtractable("http://القمر.museum", "القمر.museum")
148 | checkTLDExtractable("http://κυπρος.museum", "κυπρος.museum")
149 | checkTLDExtractable("http://českárepublika.museum", "českárepublika.museum")
150 | checkTLDExtractable("http://مصر.museum", "مصر.museum")
151 | checkTLDExtractable("http://ελλάδα.museum", "ελλάδα.museum")
152 | checkTLDExtractable("http://magyarország.museum", "magyarország.museum")
153 | checkTLDExtractable("http://ísland.museum", "ísland.museum")
154 | checkTLDExtractable("http://भारत.museum", "भारत.museum")
155 | checkTLDExtractable("http://ايران.museum", "ايران.museum")
156 | checkTLDExtractable("http://éire.museum", "éire.museum")
157 | checkTLDExtractable("http://איקו״ם.ישראל.museum", "איקו״ם.ישראל.museum")
158 | checkTLDExtractable("http://日本.museum", "日本.museum")
159 | checkTLDExtractable("http://الأردن.museum", "الأردن.museum")
160 | checkTLDExtractable("http://қазақстан.museum", "қазақстан.museum")
161 | checkTLDExtractable("http://한국.museum", "한국.museum")
162 | checkTLDExtractable("http://кыргызстан.museum", "кыргызстан.museum")
163 | checkTLDExtractable("http://ລາວ.museum", "ລາວ.museum")
164 | checkTLDExtractable("http://لبنان.museum", "لبنان.museum")
165 | checkTLDExtractable("http://македонија.museum", "македонија.museum")
166 | checkTLDExtractable("http://موريتانيا.museum", "موريتانيا.museum")
167 | checkTLDExtractable("http://méxico.museum", "méxico.museum")
168 | checkTLDExtractable("http://монголулс.museum", "монголулс.museum")
169 | checkTLDExtractable("http://المغرب.museum", "المغرب.museum")
170 | checkTLDExtractable("http://नेपाल.museum", "नेपाल.museum")
171 | checkTLDExtractable("http://عمان.museum", "عمان.museum")
172 | checkTLDExtractable("http://قطر.museum", "قطر.museum")
173 | checkTLDExtractable("http://românia.museum", "românia.museum")
174 | checkTLDExtractable("http://россия.иком.museum", "россия.иком.museum")
175 | checkTLDExtractable("http://србијаицрнагора.иком.museum", "србијаицрнагора.иком.museum")
176 | checkTLDExtractable("http://இலங்கை.museum", "இலங்கை.museum")
177 | checkTLDExtractable("http://españa.museum", "españa.museum")
178 | checkTLDExtractable("http://ไทย.museum", "ไทย.museum")
179 | checkTLDExtractable("http://تونس.museum", "تونس.museum")
180 | checkTLDExtractable("http://türkiye.museum", "türkiye.museum")
181 | checkTLDExtractable("http://украина.museum", "украина.museum")
182 | checkTLDExtractable("http://việtnam.museum", "việtnam.museum")
183 | /// Same as above, but punycoded
184 | checkTLDExtractable("http://xn--mgbaal8b0b9b2b.museum/", "xn--mgbaal8b0b9b2b.museum")
185 | checkTLDExtractable("http://xn--lgbbat1ad8j.museum/", "xn--lgbbat1ad8j.museum")
186 | checkTLDExtractable("http://xn--sterreich-z7a.museum/", "xn--sterreich-z7a.museum")
187 | checkTLDExtractable("http://xn--54b6eqazv8bc7e.museum/", "xn--54b6eqazv8bc7e.museum")
188 | checkTLDExtractable("http://xn--80abmy0agn7e.museum/", "xn--80abmy0agn7e.museum")
189 | checkTLDExtractable("http://xn--belgi-rsa.museum/", "xn--belgi-rsa.museum")
190 | checkTLDExtractable("http://xn--80abgvm6a7d2b.museum/", "xn--80abgvm6a7d2b.museum")
191 | checkTLDExtractable("http://xn--mgbfqim.museum/", "xn--mgbfqim.museum")
192 | checkTLDExtractable("http://xn--fiqs8s.museum/", "xn--fiqs8s.museum")
193 | checkTLDExtractable("http://xn--mgbu4chg.museum/", "xn--mgbu4chg.museum")
194 | checkTLDExtractable("http://xn--vxakceli.museum/", "xn--vxakceli.museum")
195 | checkTLDExtractable("http://xn--eskrepublika-ebb62d.museum/", "xn--eskrepublika-ebb62d.museum")
196 | checkTLDExtractable("http://xn--wgbh1c.museum/", "xn--wgbh1c.museum")
197 | checkTLDExtractable("http://xn--hxakic4aa.museum/", "xn--hxakic4aa.museum")
198 | checkTLDExtractable("http://xn--magyarorszg-t7a.museum/", "xn--magyarorszg-t7a.museum")
199 | checkTLDExtractable("http://xn--sland-ysa.museum/", "xn--sland-ysa.museum")
200 | checkTLDExtractable("http://xn--h2brj9c.museum/", "xn--h2brj9c.museum")
201 | checkTLDExtractable("http://xn--mgba3a4fra.museum/", "xn--mgba3a4fra.museum")
202 | checkTLDExtractable("http://xn--ire-9la.museum/", "xn--ire-9la.museum")
203 | checkTLDExtractable("http://xn--4dbklr2c8d.xn--4dbrk0ce.museum/", "xn--4dbklr2c8d.xn--4dbrk0ce.museum")
204 | checkTLDExtractable("http://xn--wgv71a.museum/", "xn--wgv71a.museum")
205 | checkTLDExtractable("http://xn--igbhzh7gpa.museum/", "xn--igbhzh7gpa.museum")
206 | checkTLDExtractable("http://xn--80aaa0a6awh12ed.museum/", "xn--80aaa0a6awh12ed.museum")
207 | checkTLDExtractable("http://xn--3e0b707e.museum/", "xn--3e0b707e.museum")
208 | checkTLDExtractable("http://xn--80afmksoji0fc.museum/", "xn--80afmksoji0fc.museum")
209 | checkTLDExtractable("http://xn--q7ce6a.museum/", "xn--q7ce6a.museum")
210 | checkTLDExtractable("http://xn--mgbb7fjb.museum/", "xn--mgbb7fjb.museum")
211 | checkTLDExtractable("http://xn--80aaldqjmmi6x.museum/", "xn--80aaldqjmmi6x.museum")
212 | checkTLDExtractable("http://xn--mgbah1a3hjkrd.museum/", "xn--mgbah1a3hjkrd.museum")
213 | checkTLDExtractable("http://xn--mxico-bsa.museum/", "xn--mxico-bsa.museum")
214 | checkTLDExtractable("http://xn--c1aqabffc0aq.museum/", "xn--c1aqabffc0aq.museum")
215 | checkTLDExtractable("http://xn--mgbc0a9azcg.museum/", "xn--mgbc0a9azcg.museum")
216 | checkTLDExtractable("http://xn--l2bey1c2b.museum/", "xn--l2bey1c2b.museum")
217 | checkTLDExtractable("http://xn--mgb9awbf.museum/", "xn--mgb9awbf.museum")
218 | checkTLDExtractable("http://xn--wgbl6a.museum/", "xn--wgbl6a.museum")
219 | checkTLDExtractable("http://xn--romnia-yta.museum/", "xn--romnia-yta.museum")
220 | checkTLDExtractable("http://xn--h1alffa9f.xn--h1aegh.museum/", "xn--h1alffa9f.xn--h1aegh.museum")
221 | checkTLDExtractable("http://xn--80aaabm1ab4blmeec9e7n.xn--h1aegh.museum/", "xn--80aaabm1ab4blmeec9e7n.xn--h1aegh.museum")
222 | checkTLDExtractable("http://xn--xkc2al3hye2a.museum/", "xn--xkc2al3hye2a.museum")
223 | checkTLDExtractable("http://xn--espaa-rta.museum/", "xn--espaa-rta.museum")
224 | checkTLDExtractable("http://xn--o3cw4h.museum/", "xn--o3cw4h.museum")
225 | checkTLDExtractable("http://xn--pgbs0dh.museum/", "xn--pgbs0dh.museum")
226 | checkTLDExtractable("http://xn--trkiye-3ya.museum/", "xn--trkiye-3ya.museum")
227 | checkTLDExtractable("http://xn--80aaxgrpt.museum/", "xn--80aaxgrpt.museum")
228 | checkTLDExtractable("http://xn--vitnam-jk8b.museum/", "xn--vitnam-jk8b.museum")
229 | }
230 |
231 | func testExtractableURL(file: StaticString = #file, line: UInt = #line) {
232 | /// URL
233 | checkTLDExtractable(URL(string: "http://example.com"), "example.com")
234 | checkTLDExtractable(URL(string: "https://example.com"), "example.com")
235 |
236 | checkTLDExtractable(URL(string: "http://www.example.com"), "www.example.com")
237 | checkTLDExtractable(URL(string: "https://www.example.com"), "www.example.com")
238 |
239 | checkTLDExtractable(URL(string: "http://example.com/a/b/"), "example.com")
240 | checkTLDExtractable(URL(string: "http://example.com/a/b/index.html"), "example.com")
241 |
242 | /// URL without scheme
243 | checkTLDExtractable(URL(string: "//example.com"), "example.com")
244 | checkTLDExtractable(URL(string: "//example.com/a/b/"), "example.com")
245 | checkTLDExtractable(URL(string: "//example.com/a/b/index.html"), "example.com")
246 |
247 | /// URL with localhost
248 | checkTLDExtractable(URL(string: "http://localhost"), "localhost")
249 | checkTLDExtractable(URL(string: "//localhost"), "localhost")
250 | checkTLDExtractable(URL(string: "localhost"), "localhost")
251 |
252 | /// Only URL scheme
253 | checkTLDExtractable(URL(string: "http"), "http")
254 | checkTLDExtractable(URL(string: "http:"), nil)
255 | checkTLDExtractable(URL(string: "http://"), nil)
256 |
257 | /// Only TLD
258 | checkTLDExtractable(URL(string: "com"), "com")
259 |
260 | /// Hostname only
261 | checkTLDExtractable(URL(string: "example.com"), "example.com")
262 | checkTLDExtractable(URL(string: "www.example.com"), "www.example.com")
263 |
264 | /// IDNA
265 | checkTLDExtractable(URL(unicodeString: "表参道.青山.ファッション"), "表参道.青山.ファッション")
266 | checkTLDExtractable(URL(unicodeString: "//表参道.青山.ファッション"), "表参道.青山.ファッション")
267 | checkTLDExtractable(URL(unicodeString: "http://表参道.青山.ファッション"), "表参道.青山.ファッション")
268 | checkTLDExtractable(URL(unicodeString: "http://表参道.青山.ファッション/横浜/ヤンキー/"), "表参道.青山.ファッション")
269 | checkTLDExtractable(URL(unicodeString: "青山.ファッション/川崎/チンピラ/"), "青山.ファッション")
270 | checkTLDExtractable(URL(unicodeString: "ファッション/埼玉/ダサイタマ/"), "ファッション")
271 | /// Same as above, but punycoded
272 | checkTLDExtractable(URL(string: "xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c"), "xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c")
273 | checkTLDExtractable(URL(string: "//xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c"), "xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c")
274 | checkTLDExtractable(URL(string: "http://xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c"), "xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c")
275 | checkTLDExtractable(URL(string: "http://xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c/xn--4cw21e/xn--nckyfvbwb/"), "xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c")
276 | checkTLDExtractable(URL(string: "xn--rht138k.xn--bck1b9a5dre4c/xn--8ltrs/xn--7ckvb7cub/"), "xn--rht138k.xn--bck1b9a5dre4c")
277 | checkTLDExtractable(URL(string: "xn--bck1b9a5dre4c/xn--5js045d/xn--eck7a5ab7m/"), "xn--bck1b9a5dre4c")
278 |
279 | /// IDNA - All language
280 | checkTLDExtractable(URL(unicodeString: "http://افغانستا.museum"), "افغانستا.museum")
281 | checkTLDExtractable(URL(unicodeString: "http://الجزائر.museum"), "الجزائر.museum")
282 | checkTLDExtractable(URL(unicodeString: "http://österreich.museum"), "österreich.museum")
283 | checkTLDExtractable(URL(unicodeString: "http://বাংলাদেশ.museum"), "বাংলাদেশ.museum")
284 | checkTLDExtractable(URL(unicodeString: "http://беларусь.museum"), "беларусь.museum")
285 | checkTLDExtractable(URL(unicodeString: "http://belgië.museum"), "belgië.museum")
286 | checkTLDExtractable(URL(unicodeString: "http://българия.museum"), "българия.museum")
287 | checkTLDExtractable(URL(unicodeString: "http://تشادر.museum"), "تشادر.museum")
288 | checkTLDExtractable(URL(unicodeString: "http://中国.museum"), "中国.museum")
289 | checkTLDExtractable(URL(unicodeString: "http://القمر.museum"), "القمر.museum")
290 | checkTLDExtractable(URL(unicodeString: "http://κυπρος.museum"), "κυπρος.museum")
291 | checkTLDExtractable(URL(unicodeString: "http://českárepublika.museum"), "českárepublika.museum")
292 | checkTLDExtractable(URL(unicodeString: "http://مصر.museum"), "مصر.museum")
293 | checkTLDExtractable(URL(unicodeString: "http://ελλάδα.museum"), "ελλάδα.museum")
294 | checkTLDExtractable(URL(unicodeString: "http://magyarország.museum"), "magyarország.museum")
295 | checkTLDExtractable(URL(unicodeString: "http://ísland.museum"), "ísland.museum")
296 | checkTLDExtractable(URL(unicodeString: "http://भारत.museum"), "भारत.museum")
297 | checkTLDExtractable(URL(unicodeString: "http://ايران.museum"), "ايران.museum")
298 | checkTLDExtractable(URL(unicodeString: "http://éire.museum"), "éire.museum")
299 | checkTLDExtractable(URL(unicodeString: "http://איקו״ם.ישראל.museum"), "איקו״ם.ישראל.museum")
300 | checkTLDExtractable(URL(unicodeString: "http://日本.museum"), "日本.museum")
301 | checkTLDExtractable(URL(unicodeString: "http://الأردن.museum"), "الأردن.museum")
302 | checkTLDExtractable(URL(unicodeString: "http://қазақстан.museum"), "қазақстан.museum")
303 | checkTLDExtractable(URL(unicodeString: "http://한국.museum"), "한국.museum")
304 | checkTLDExtractable(URL(unicodeString: "http://кыргызстан.museum"), "кыргызстан.museum")
305 | checkTLDExtractable(URL(unicodeString: "http://ລາວ.museum"), "ລາວ.museum")
306 | checkTLDExtractable(URL(unicodeString: "http://لبنان.museum"), "لبنان.museum")
307 | checkTLDExtractable(URL(unicodeString: "http://македонија.museum"), "македонија.museum")
308 | checkTLDExtractable(URL(unicodeString: "http://موريتانيا.museum"), "موريتانيا.museum")
309 | checkTLDExtractable(URL(unicodeString: "http://méxico.museum"), "méxico.museum")
310 | checkTLDExtractable(URL(unicodeString: "http://монголулс.museum"), "монголулс.museum")
311 | checkTLDExtractable(URL(unicodeString: "http://المغرب.museum"), "المغرب.museum")
312 | checkTLDExtractable(URL(unicodeString: "http://नेपाल.museum"), "नेपाल.museum")
313 | checkTLDExtractable(URL(unicodeString: "http://عمان.museum"), "عمان.museum")
314 | checkTLDExtractable(URL(unicodeString: "http://قطر.museum"), "قطر.museum")
315 | checkTLDExtractable(URL(unicodeString: "http://românia.museum"), "românia.museum")
316 | checkTLDExtractable(URL(unicodeString: "http://россия.иком.museum"), "россия.иком.museum")
317 | checkTLDExtractable(URL(unicodeString: "http://србијаицрнагора.иком.museum"), "србијаицрнагора.иком.museum")
318 | checkTLDExtractable(URL(unicodeString: "http://இலங்கை.museum"), "இலங்கை.museum")
319 | checkTLDExtractable(URL(unicodeString: "http://españa.museum"), "españa.museum")
320 | checkTLDExtractable(URL(unicodeString: "http://ไทย.museum"), "ไทย.museum")
321 | checkTLDExtractable(URL(unicodeString: "http://تونس.museum"), "تونس.museum")
322 | checkTLDExtractable(URL(unicodeString: "http://türkiye.museum"), "türkiye.museum")
323 | checkTLDExtractable(URL(unicodeString: "http://украина.museum"), "украина.museum")
324 | checkTLDExtractable(URL(unicodeString: "http://việtnam.museum"), "việtnam.museum")
325 | /// Same as above, but punycoded
326 | checkTLDExtractable(URL(string: "http://xn--mgbaal8b0b9b2b.museum/"), "xn--mgbaal8b0b9b2b.museum")
327 | checkTLDExtractable(URL(string: "http://xn--lgbbat1ad8j.museum/"), "xn--lgbbat1ad8j.museum")
328 | checkTLDExtractable(URL(string: "http://xn--sterreich-z7a.museum/"), "xn--sterreich-z7a.museum")
329 | checkTLDExtractable(URL(string: "http://xn--54b6eqazv8bc7e.museum/"), "xn--54b6eqazv8bc7e.museum")
330 | checkTLDExtractable(URL(string: "http://xn--80abmy0agn7e.museum/"), "xn--80abmy0agn7e.museum")
331 | checkTLDExtractable(URL(string: "http://xn--belgi-rsa.museum/"), "xn--belgi-rsa.museum")
332 | checkTLDExtractable(URL(string: "http://xn--80abgvm6a7d2b.museum/"), "xn--80abgvm6a7d2b.museum")
333 | checkTLDExtractable(URL(string: "http://xn--mgbfqim.museum/"), "xn--mgbfqim.museum")
334 | checkTLDExtractable(URL(string: "http://xn--fiqs8s.museum/"), "xn--fiqs8s.museum")
335 | checkTLDExtractable(URL(string: "http://xn--mgbu4chg.museum/"), "xn--mgbu4chg.museum")
336 | checkTLDExtractable(URL(string: "http://xn--vxakceli.museum/"), "xn--vxakceli.museum")
337 | checkTLDExtractable(URL(string: "http://xn--eskrepublika-ebb62d.museum/"), "xn--eskrepublika-ebb62d.museum")
338 | checkTLDExtractable(URL(string: "http://xn--wgbh1c.museum/"), "xn--wgbh1c.museum")
339 | checkTLDExtractable(URL(string: "http://xn--hxakic4aa.museum/"), "xn--hxakic4aa.museum")
340 | checkTLDExtractable(URL(string: "http://xn--magyarorszg-t7a.museum/"), "xn--magyarorszg-t7a.museum")
341 | checkTLDExtractable(URL(string: "http://xn--sland-ysa.museum/"), "xn--sland-ysa.museum")
342 | checkTLDExtractable(URL(string: "http://xn--h2brj9c.museum/"), "xn--h2brj9c.museum")
343 | checkTLDExtractable(URL(string: "http://xn--mgba3a4fra.museum/"), "xn--mgba3a4fra.museum")
344 | checkTLDExtractable(URL(string: "http://xn--ire-9la.museum/"), "xn--ire-9la.museum")
345 | checkTLDExtractable(URL(string: "http://xn--4dbklr2c8d.xn--4dbrk0ce.museum/"), "xn--4dbklr2c8d.xn--4dbrk0ce.museum")
346 | checkTLDExtractable(URL(string: "http://xn--wgv71a.museum/"), "xn--wgv71a.museum")
347 | checkTLDExtractable(URL(string: "http://xn--igbhzh7gpa.museum/"), "xn--igbhzh7gpa.museum")
348 | checkTLDExtractable(URL(string: "http://xn--80aaa0a6awh12ed.museum/"), "xn--80aaa0a6awh12ed.museum")
349 | checkTLDExtractable(URL(string: "http://xn--3e0b707e.museum/"), "xn--3e0b707e.museum")
350 | checkTLDExtractable(URL(string: "http://xn--80afmksoji0fc.museum/"), "xn--80afmksoji0fc.museum")
351 | checkTLDExtractable(URL(string: "http://xn--q7ce6a.museum/"), "xn--q7ce6a.museum")
352 | checkTLDExtractable(URL(string: "http://xn--mgbb7fjb.museum/"), "xn--mgbb7fjb.museum")
353 | checkTLDExtractable(URL(string: "http://xn--80aaldqjmmi6x.museum/"), "xn--80aaldqjmmi6x.museum")
354 | checkTLDExtractable(URL(string: "http://xn--mgbah1a3hjkrd.museum/"), "xn--mgbah1a3hjkrd.museum")
355 | checkTLDExtractable(URL(string: "http://xn--mxico-bsa.museum/"), "xn--mxico-bsa.museum")
356 | checkTLDExtractable(URL(string: "http://xn--c1aqabffc0aq.museum/"), "xn--c1aqabffc0aq.museum")
357 | checkTLDExtractable(URL(string: "http://xn--mgbc0a9azcg.museum/"), "xn--mgbc0a9azcg.museum")
358 | checkTLDExtractable(URL(string: "http://xn--l2bey1c2b.museum/"), "xn--l2bey1c2b.museum")
359 | checkTLDExtractable(URL(string: "http://xn--mgb9awbf.museum/"), "xn--mgb9awbf.museum")
360 | checkTLDExtractable(URL(string: "http://xn--wgbl6a.museum/"), "xn--wgbl6a.museum")
361 | checkTLDExtractable(URL(string: "http://xn--romnia-yta.museum/"), "xn--romnia-yta.museum")
362 | checkTLDExtractable(URL(string: "http://xn--h1alffa9f.xn--h1aegh.museum/"), "xn--h1alffa9f.xn--h1aegh.museum")
363 | checkTLDExtractable(URL(string: "http://xn--80aaabm1ab4blmeec9e7n.xn--h1aegh.museum/"), "xn--80aaabm1ab4blmeec9e7n.xn--h1aegh.museum")
364 | checkTLDExtractable(URL(string: "http://xn--xkc2al3hye2a.museum/"), "xn--xkc2al3hye2a.museum")
365 | checkTLDExtractable(URL(string: "http://xn--espaa-rta.museum/"), "xn--espaa-rta.museum")
366 | checkTLDExtractable(URL(string: "http://xn--o3cw4h.museum/"), "xn--o3cw4h.museum")
367 | checkTLDExtractable(URL(string: "http://xn--pgbs0dh.museum/"), "xn--pgbs0dh.museum")
368 | checkTLDExtractable(URL(string: "http://xn--trkiye-3ya.museum/"), "xn--trkiye-3ya.museum")
369 | checkTLDExtractable(URL(string: "http://xn--80aaxgrpt.museum/"), "xn--80aaxgrpt.museum")
370 | checkTLDExtractable(URL(string: "http://xn--vitnam-jk8b.museum/"), "xn--vitnam-jk8b.museum")
371 | }
372 |
373 | /// Common PSL Unit Test case.
374 | /// Source: https://raw.githubusercontent.com/publicsuffix/list/master/tests/test_psl.txt
375 | func testTLDExtractString(quick: Bool) {
376 | NSLog("Quick option is \(quick ? "enabled" : "disabled").")
377 |
378 | /// nil input.
379 | let val: String? = nil
380 | checkPublicSuffix(val, nil, nil, nil, nil, quick: quick)
381 |
382 | /// Mixed case.
383 | checkPublicSuffix("COM", nil, nil, nil, nil, quick: quick)
384 | checkPublicSuffix("example.COM", "example.com", "com", "example", nil, quick: quick)
385 | checkPublicSuffix("WwW.example.COM", "example.com", "com", "example", "www", quick: quick)
386 |
387 | /// Leading dot.
388 | /// Listed, but non - Internet, TLD.
389 | checkPublicSuffix("local", nil, nil, nil, nil, quick: quick)
390 | checkPublicSuffix("example.local", nil, nil, nil, nil, quick: quick)
391 | checkPublicSuffix("b.example.local", nil, nil, nil, nil, quick: quick)
392 | checkPublicSuffix("a.b.example.local", nil, nil, nil, nil, quick: quick)
393 |
394 | /// TLD with only 1 rule.
395 | checkPublicSuffix("biz", nil, nil, nil, nil, quick: quick)
396 | checkPublicSuffix("domain.biz", "domain.biz", "biz", "domain", nil, quick: quick)
397 | checkPublicSuffix("b.domain.biz", "domain.biz", "biz", "domain", "b", quick: quick)
398 | checkPublicSuffix("a.b.domain.biz", "domain.biz", "biz", "domain", "a.b", quick: quick)
399 |
400 | /// TLD with some 2-level rules.
401 | checkPublicSuffix("com", nil, nil, nil, nil, quick: quick)
402 | checkPublicSuffix("example.com", "example.com", "com", "example", nil, quick: quick)
403 | checkPublicSuffix("b.example.com", "example.com", "com", "example", "b", quick: quick)
404 | checkPublicSuffix("a.b.example.com", "example.com", "com", "example", "a.b", quick: quick)
405 | checkPublicSuffix("uk.com", nil, nil, nil, nil, quick: quick)
406 | checkPublicSuffix("example.uk.com", "example.uk.com", "uk.com", "example", nil, quick: quick)
407 | checkPublicSuffix("b.example.uk.com", "example.uk.com", "uk.com", "example", "b", quick: quick)
408 | checkPublicSuffix("a.b.example.uk.com", "example.uk.com", "uk.com", "example", "a.b", quick: quick)
409 | checkPublicSuffix("test.ac", "test.ac", "ac", "test", nil, quick: quick)
410 |
411 | /// TLD with only 1 (wildcard, quick: quick) rule.
412 | checkPublicSuffix("mm", nil, nil, nil, nil, quick: quick)
413 | checkPublicSuffix("c.mm", nil, nil, nil, nil, quick: quick)
414 | if quick {
415 | /// Wildcards and exception data with quick option always returns nil
416 | checkPublicSuffix("b.c.mm", nil, nil, nil, nil, quick: quick)
417 | checkPublicSuffix("a.b.c.mm", nil, nil, nil, nil, quick: quick)
418 | } else {
419 | checkPublicSuffix("b.c.mm", "b.c.mm", "c.mm", "b", nil, quick: quick)
420 | checkPublicSuffix("a.b.c.mm", "b.c.mm", "c.mm", "b", "a", quick: quick)
421 | }
422 |
423 | /// More complex TLD.
424 | checkPublicSuffix("jp", nil, nil, nil, nil, quick: quick)
425 | checkPublicSuffix("test.jp", "test.jp", "jp", "test", nil, quick: quick)
426 | checkPublicSuffix("www.test.jp", "test.jp", "jp", "test", "www", quick: quick)
427 | checkPublicSuffix("ac.jp", nil, nil, nil, nil, quick: quick)
428 | checkPublicSuffix("test.ac.jp", "test.ac.jp", "ac.jp", "test", nil, quick: quick)
429 | checkPublicSuffix("www.test.ac.jp", "test.ac.jp", "ac.jp", "test", "www", quick: quick)
430 | checkPublicSuffix("kyoto.jp", nil, nil, nil, nil, quick: quick)
431 | checkPublicSuffix("test.kyoto.jp", "test.kyoto.jp", "kyoto.jp", "test", nil, quick: quick)
432 | checkPublicSuffix("ide.kyoto.jp", nil, nil, nil, nil, quick: quick)
433 | checkPublicSuffix("b.ide.kyoto.jp", "b.ide.kyoto.jp", "ide.kyoto.jp", "b", nil, quick: quick)
434 | checkPublicSuffix("a.b.ide.kyoto.jp", "b.ide.kyoto.jp", "ide.kyoto.jp", "b", "a", quick: quick)
435 | if quick {
436 | /// The results of wildcards and exceptions depend on the quick option
437 | checkPublicSuffix("c.kobe.jp", "kobe.jp", "jp", "kobe", "c", quick: quick)
438 | checkPublicSuffix("b.c.kobe.jp", "kobe.jp", "jp", "kobe", "b.c", quick: quick)
439 | checkPublicSuffix("a.b.c.kobe.jp", "kobe.jp", "jp", "kobe", "a.b.c", quick: quick)
440 | checkPublicSuffix("city.kobe.jp", "kobe.jp", "jp", "kobe", "city", quick: quick)
441 | checkPublicSuffix("www.city.kobe.jp", "kobe.jp", "jp", "kobe", "www.city", quick: quick)
442 | } else {
443 | checkPublicSuffix("c.kobe.jp", nil, nil, nil, nil, quick: quick)
444 | checkPublicSuffix("b.c.kobe.jp", "b.c.kobe.jp", "c.kobe.jp", "b", nil, quick: quick)
445 | checkPublicSuffix("a.b.c.kobe.jp", "b.c.kobe.jp", "c.kobe.jp", "b", "a", quick: quick)
446 | checkPublicSuffix("city.kobe.jp", "city.kobe.jp", "kobe.jp", "city", nil, quick: quick)
447 | checkPublicSuffix("www.city.kobe.jp", "city.kobe.jp", "kobe.jp", "city", "www", quick: quick)
448 | }
449 |
450 | /// TLD with a wildcard rule and exceptions.
451 | if quick {
452 | /// Wildcards and exception data with quick option always returns nil
453 | checkPublicSuffix("ck", nil, nil, nil, nil, quick: quick)
454 | checkPublicSuffix("test.ck", nil, nil, nil, nil, quick: quick)
455 | checkPublicSuffix("b.test.ck", nil, nil, nil, nil, quick: quick)
456 | checkPublicSuffix("a.b.test.ck", nil, nil, nil, nil, quick: quick)
457 | checkPublicSuffix("www.ck", nil, nil, nil, nil, quick: quick)
458 | checkPublicSuffix("www.www.ck", nil, nil, nil, nil, quick: quick)
459 | } else {
460 | checkPublicSuffix("ck", nil, nil, nil, nil, quick: quick)
461 | checkPublicSuffix("test.ck", nil, nil, nil, nil, quick: quick)
462 | checkPublicSuffix("b.test.ck", "b.test.ck", "test.ck", "b", nil, quick: quick)
463 | checkPublicSuffix("a.b.test.ck", "b.test.ck", "test.ck", "b", "a", quick: quick)
464 | checkPublicSuffix("www.ck", "www.ck", "ck", "www", nil, quick: quick)
465 | checkPublicSuffix("www.www.ck", "www.ck", "ck", "www", "www", quick: quick)
466 | }
467 |
468 | /// US K12.
469 | checkPublicSuffix("us", nil, nil, nil, nil, quick: quick)
470 | checkPublicSuffix("test.us", "test.us", "us", "test", nil, quick: quick)
471 | checkPublicSuffix("www.test.us", "test.us", "us", "test", "www", quick: quick)
472 | checkPublicSuffix("ak.us", nil, nil, nil, nil, quick: quick)
473 | checkPublicSuffix("test.ak.us", "test.ak.us", "ak.us", "test", nil, quick: quick)
474 | checkPublicSuffix("www.test.ak.us", "test.ak.us", "ak.us", "test", "www", quick: quick)
475 | checkPublicSuffix("k12.ak.us", nil, nil, nil, nil, quick: quick)
476 | checkPublicSuffix("test.k12.ak.us", "test.k12.ak.us", "k12.ak.us", "test", nil, quick: quick)
477 | checkPublicSuffix("www.test.k12.ak.us", "test.k12.ak.us", "k12.ak.us", "test", "www", quick: quick)
478 |
479 | /// IDN labels.
480 | checkPublicSuffix("食狮.com.cn", "食狮.com.cn", "com.cn", "食狮", nil, quick: quick)
481 | checkPublicSuffix("食狮.公司.cn", "食狮.公司.cn", "公司.cn", "食狮", nil, quick: quick)
482 | checkPublicSuffix("www.食狮.公司.cn", "食狮.公司.cn", "公司.cn", "食狮", "www", quick: quick)
483 | checkPublicSuffix("shishi.公司.cn", "shishi.公司.cn", "公司.cn", "shishi", nil, quick: quick)
484 | checkPublicSuffix("公司.cn", nil, nil, nil, nil, quick: quick)
485 | checkPublicSuffix("食狮.中国", "食狮.中国", "中国", "食狮", nil, quick: quick)
486 | checkPublicSuffix("www.食狮.中国", "食狮.中国", "中国", "食狮", "www", quick: quick)
487 | checkPublicSuffix("shishi.中国", "shishi.中国", "中国", "shishi", nil, quick: quick)
488 | checkPublicSuffix("中国", nil, nil, nil, nil, quick: quick)
489 | /// Same as above, but punycoded.
490 | checkPublicSuffix("xn--85x722f.com.cn", "xn--85x722f.com.cn", "com.cn", "xn--85x722f", nil, quick: quick)
491 | checkPublicSuffix("xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn", "xn--55qx5d.cn", "xn--85x722f", nil, quick: quick)
492 | checkPublicSuffix("www.xn--85x722f.xn--55qx5d.cn", "xn--85x722f.xn--55qx5d.cn", "xn--55qx5d.cn", "xn--85x722f", "www", quick: quick)
493 | checkPublicSuffix("shishi.xn--55qx5d.cn", "shishi.xn--55qx5d.cn", "xn--55qx5d.cn", "shishi", nil, quick: quick)
494 | checkPublicSuffix("xn--55qx5d.cn", nil, nil, nil, nil, quick: quick)
495 | checkPublicSuffix("xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s", "xn--fiqs8s", "xn--85x722f", nil, quick: quick)
496 | checkPublicSuffix("www.xn--85x722f.xn--fiqs8s", "xn--85x722f.xn--fiqs8s", "xn--fiqs8s", "xn--85x722f", "www", quick: quick)
497 | checkPublicSuffix("shishi.xn--fiqs8s", "shishi.xn--fiqs8s", "xn--fiqs8s", "shishi", nil, quick: quick)
498 | checkPublicSuffix("xn--fiqs8s", nil, nil, nil, nil, quick: quick)
499 |
500 | /// Japanese IDN labels.
501 | checkPublicSuffix("忍者.jp", "忍者.jp", "jp", "忍者", nil, quick: quick)
502 | checkPublicSuffix("サムライ.忍者.jp", "忍者.jp", "jp", "忍者", "サムライ", quick: quick)
503 | checkPublicSuffix("www.サムライ.忍者.jp", "忍者.jp", "jp", "忍者", "www.サムライ", quick: quick)
504 | checkPublicSuffix("ラーメン.寿司.co.jp", "寿司.co.jp", "co.jp", "寿司", "ラーメン", quick: quick)
505 | checkPublicSuffix("www.ラーメン.寿司.co.jp", "寿司.co.jp", "co.jp", "寿司", "www.ラーメン", quick: quick)
506 | checkPublicSuffix("餃子.食品", "餃子.食品", "食品", "餃子", nil, quick: quick)
507 | checkPublicSuffix("チャーハン.餃子.食品", "餃子.食品", "食品", "餃子", "チャーハン", quick: quick)
508 | checkPublicSuffix("www.チャーハン.餃子.食品", "餃子.食品", "食品", "餃子", "www.チャーハン", quick: quick)
509 | checkPublicSuffix("青山.ファッション", "青山.ファッション", "ファッション", "青山", nil, quick: quick)
510 | checkPublicSuffix("表参道.青山.ファッション", "青山.ファッション", "ファッション", "青山", "表参道", quick: quick)
511 | checkPublicSuffix("www.表参道.青山.ファッション", "青山.ファッション", "ファッション", "青山", "www.表参道", quick: quick)
512 | checkPublicSuffix("www.おしゃれ.表参道.青山.ファッション", "青山.ファッション", "ファッション", "青山", "www.おしゃれ.表参道", quick: quick)
513 | checkPublicSuffix("日本", nil, nil, nil, nil, quick: quick)
514 | /// Same as above, but punycoded.
515 | checkPublicSuffix("xn--c6t203e.jp", "xn--c6t203e.jp", "jp", "xn--c6t203e", nil, quick: quick)
516 | checkPublicSuffix("xn--eck7azimb.xn--c6t203e.jp", "xn--c6t203e.jp", "jp", "xn--c6t203e", "xn--eck7azimb", quick: quick)
517 | checkPublicSuffix("www.xn--eck7azimb.xn--c6t203e.jp", "xn--c6t203e.jp", "jp", "xn--c6t203e", "www.xn--eck7azimb", quick: quick)
518 | checkPublicSuffix("xn--4dkp5a8a.xn--sprr0q.co.jp", "xn--sprr0q.co.jp", "co.jp", "xn--sprr0q", "xn--4dkp5a8a", quick: quick)
519 | checkPublicSuffix("www.xn--4dkp5a8a.xn--sprr0q.co.jp", "xn--sprr0q.co.jp", "co.jp", "xn--sprr0q", "www.xn--4dkp5a8a", quick: quick)
520 | checkPublicSuffix("xn--i8st94l.xn--jvr189m", "xn--i8st94l.xn--jvr189m", "xn--jvr189m", "xn--i8st94l", nil, quick: quick)
521 | checkPublicSuffix("xn--7ck2a9c3czb.xn--i8st94l.xn--jvr189m", "xn--i8st94l.xn--jvr189m", "xn--jvr189m", "xn--i8st94l", "xn--7ck2a9c3czb", quick: quick)
522 | checkPublicSuffix("www.xn--7ck2a9c3czb.xn--i8st94l.xn--jvr189m", "xn--i8st94l.xn--jvr189m", "xn--jvr189m", "xn--i8st94l", "www.xn--7ck2a9c3czb", quick: quick)
523 | checkPublicSuffix("xn--rht138k.xn--bck1b9a5dre4c", "xn--rht138k.xn--bck1b9a5dre4c", "xn--bck1b9a5dre4c", "xn--rht138k", nil, quick: quick)
524 | checkPublicSuffix("xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c", "xn--rht138k.xn--bck1b9a5dre4c", "xn--bck1b9a5dre4c", "xn--rht138k", "xn--8nr183j17e", quick: quick)
525 | checkPublicSuffix("www.xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c", "xn--rht138k.xn--bck1b9a5dre4c", "xn--bck1b9a5dre4c", "xn--rht138k", "www.xn--8nr183j17e", quick: quick)
526 | checkPublicSuffix("www.xn--t8j0ayjlb.xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c", "xn--rht138k.xn--bck1b9a5dre4c", "xn--bck1b9a5dre4c", "xn--rht138k", "www.xn--t8j0ayjlb.xn--8nr183j17e", quick: quick)
527 | checkPublicSuffix("xn--wgv71a", nil, nil, nil, nil, quick: quick)
528 |
529 | }
530 |
531 | func testTLDExtractURL(quick: Bool) {
532 | NSLog("Quick option is \(quick ? "enabled" : "disabled").")
533 |
534 | /// nil input.
535 | let val: URL? = nil
536 | checkPublicSuffix(val, nil, nil, nil, nil, quick: quick)
537 |
538 | /// Mixed case.
539 | checkPublicSuffix(URL(string: "COM"), nil, nil, nil, nil, quick: quick)
540 | checkPublicSuffix(URL(string: "example.COM"), "example.com", "com", "example", nil, quick: quick)
541 | checkPublicSuffix(URL(string: "WwW.example.COM"), "example.com", "com", "example", "www", quick: quick)
542 |
543 | /// Leading dot.
544 | /// Listed, but non - Internet, TLD.
545 | checkPublicSuffix(URL(string: "local"), nil, nil, nil, nil, quick: quick)
546 | checkPublicSuffix(URL(string: "example.local"), nil, nil, nil, nil, quick: quick)
547 | checkPublicSuffix(URL(string: "b.example.local"), nil, nil, nil, nil, quick: quick)
548 | checkPublicSuffix(URL(string: "a.b.example.local"), nil, nil, nil, nil, quick: quick)
549 |
550 | /// TLD with only 1 rule.
551 | checkPublicSuffix(URL(string: "biz"), nil, nil, nil, nil, quick: quick)
552 | checkPublicSuffix(URL(string: "domain.biz"), "domain.biz", "biz", "domain", nil, quick: quick)
553 | checkPublicSuffix(URL(string: "b.domain.biz"), "domain.biz", "biz", "domain", "b", quick: quick)
554 | checkPublicSuffix(URL(string: "a.b.domain.biz"), "domain.biz", "biz", "domain", "a.b", quick: quick)
555 |
556 | /// TLD with some 2-level rules.
557 | checkPublicSuffix(URL(string: "com"), nil, nil, nil, nil, quick: quick)
558 | checkPublicSuffix(URL(string: "example.com"), "example.com", "com", "example", nil, quick: quick)
559 | checkPublicSuffix(URL(string: "b.example.com"), "example.com", "com", "example", "b", quick: quick)
560 | checkPublicSuffix(URL(string: "a.b.example.com"), "example.com", "com", "example", "a.b", quick: quick)
561 | checkPublicSuffix(URL(string: "uk.com"), nil, nil, nil, nil, quick: quick)
562 | checkPublicSuffix(URL(string: "example.uk.com"), "example.uk.com", "uk.com", "example", nil, quick: quick)
563 | checkPublicSuffix(URL(string: "b.example.uk.com"), "example.uk.com", "uk.com", "example", "b", quick: quick)
564 | checkPublicSuffix(URL(string: "a.b.example.uk.com"), "example.uk.com", "uk.com", "example", "a.b", quick: quick)
565 | checkPublicSuffix(URL(string: "test.ac"), "test.ac", "ac", "test", nil, quick: quick)
566 |
567 | /// TLD with only 1 (wildcard, quick: quick) rule.
568 | checkPublicSuffix(URL(string: "mm"), nil, nil, nil, nil, quick: quick)
569 | checkPublicSuffix(URL(string: "c.mm"), nil, nil, nil, nil, quick: quick)
570 | if quick {
571 | /// Wildcards and exception data with quick option always returns nil
572 | checkPublicSuffix(URL(string: "b.c.mm"), nil, nil, nil, nil, quick: quick)
573 | checkPublicSuffix(URL(string: "a.b.c.mm"), nil, nil, nil, nil, quick: quick)
574 | } else {
575 | checkPublicSuffix(URL(string: "b.c.mm"), "b.c.mm", "c.mm", "b", nil, quick: quick)
576 | checkPublicSuffix(URL(string: "a.b.c.mm"), "b.c.mm", "c.mm", "b", "a", quick: quick)
577 | }
578 |
579 | /// More complex TLD.
580 | checkPublicSuffix(URL(string: "jp"), nil, nil, nil, nil, quick: quick)
581 | checkPublicSuffix(URL(string: "test.jp"), "test.jp", "jp", "test", nil, quick: quick)
582 | checkPublicSuffix(URL(string: "www.test.jp"), "test.jp", "jp", "test", "www", quick: quick)
583 | checkPublicSuffix(URL(string: "ac.jp"), nil, nil, nil, nil, quick: quick)
584 | checkPublicSuffix(URL(string: "test.ac.jp"), "test.ac.jp", "ac.jp", "test", nil, quick: quick)
585 | checkPublicSuffix(URL(string: "www.test.ac.jp"), "test.ac.jp", "ac.jp", "test", "www", quick: quick)
586 | checkPublicSuffix(URL(string: "kyoto.jp"), nil, nil, nil, nil, quick: quick)
587 | checkPublicSuffix(URL(string: "test.kyoto.jp"), "test.kyoto.jp", "kyoto.jp", "test", nil, quick: quick)
588 | checkPublicSuffix(URL(string: "ide.kyoto.jp"), nil, nil, nil, nil, quick: quick)
589 | checkPublicSuffix(URL(string: "b.ide.kyoto.jp"), "b.ide.kyoto.jp", "ide.kyoto.jp", "b", nil, quick: quick)
590 | checkPublicSuffix(URL(string: "a.b.ide.kyoto.jp"), "b.ide.kyoto.jp", "ide.kyoto.jp", "b", "a", quick: quick)
591 | if quick {
592 | /// The results of wildcards and exceptions depend on the quick option
593 | checkPublicSuffix(URL(string: "c.kobe.jp"), "kobe.jp", "jp", "kobe", "c", quick: quick)
594 | checkPublicSuffix(URL(string: "b.c.kobe.jp"), "kobe.jp", "jp", "kobe", "b.c", quick: quick)
595 | checkPublicSuffix(URL(string: "a.b.c.kobe.jp"), "kobe.jp", "jp", "kobe", "a.b.c", quick: quick)
596 | checkPublicSuffix(URL(string: "city.kobe.jp"), "kobe.jp", "jp", "kobe", "city", quick: quick)
597 | checkPublicSuffix(URL(string: "www.city.kobe.jp"), "kobe.jp", "jp", "kobe", "www.city", quick: quick)
598 | } else {
599 | checkPublicSuffix(URL(string: "c.kobe.jp"), nil, nil, nil, nil, quick: quick)
600 | checkPublicSuffix(URL(string: "b.c.kobe.jp"), "b.c.kobe.jp", "c.kobe.jp", "b", nil, quick: quick)
601 | checkPublicSuffix(URL(string: "a.b.c.kobe.jp"), "b.c.kobe.jp", "c.kobe.jp", "b", "a", quick: quick)
602 | checkPublicSuffix(URL(string: "city.kobe.jp"), "city.kobe.jp", "kobe.jp", "city", nil, quick: quick)
603 | checkPublicSuffix(URL(string: "www.city.kobe.jp"), "city.kobe.jp", "kobe.jp", "city", "www", quick: quick)
604 | }
605 |
606 | /// TLD with a wildcard rule and exceptions.
607 | if quick {
608 | /// Wildcards and exception data with quick option always returns nil
609 | checkPublicSuffix(URL(string: "ck"), nil, nil, nil, nil, quick: quick)
610 | checkPublicSuffix(URL(string: "test.ck"), nil, nil, nil, nil, quick: quick)
611 | checkPublicSuffix(URL(string: "b.test.ck"), nil, nil, nil, nil, quick: quick)
612 | checkPublicSuffix(URL(string: "a.b.test.ck"), nil, nil, nil, nil, quick: quick)
613 | checkPublicSuffix(URL(string: "www.ck"), nil, nil, nil, nil, quick: quick)
614 | checkPublicSuffix(URL(string: "www.www.ck"), nil, nil, nil, nil, quick: quick)
615 | } else {
616 | checkPublicSuffix(URL(string: "ck"), nil, nil, nil, nil, quick: quick)
617 | checkPublicSuffix(URL(string: "test.ck"), nil, nil, nil, nil, quick: quick)
618 | checkPublicSuffix(URL(string: "b.test.ck"), "b.test.ck", "test.ck", "b", nil, quick: quick)
619 | checkPublicSuffix(URL(string: "a.b.test.ck"), "b.test.ck", "test.ck", "b", "a", quick: quick)
620 | checkPublicSuffix(URL(string: "www.ck"), "www.ck", "ck", "www", nil, quick: quick)
621 | checkPublicSuffix(URL(string: "www.www.ck"), "www.ck", "ck", "www", "www", quick: quick)
622 | }
623 |
624 | /// US K12.
625 | checkPublicSuffix(URL(string: "us"), nil, nil, nil, nil, quick: quick)
626 | checkPublicSuffix(URL(string: "test.us"), "test.us", "us", "test", nil, quick: quick)
627 | checkPublicSuffix(URL(string: "www.test.us"), "test.us", "us", "test", "www", quick: quick)
628 | checkPublicSuffix(URL(string: "ak.us"), nil, nil, nil, nil, quick: quick)
629 | checkPublicSuffix(URL(string: "test.ak.us"), "test.ak.us", "ak.us", "test", nil, quick: quick)
630 | checkPublicSuffix(URL(string: "www.test.ak.us"), "test.ak.us", "ak.us", "test", "www", quick: quick)
631 | checkPublicSuffix(URL(string: "k12.ak.us"), nil, nil, nil, nil, quick: quick)
632 | checkPublicSuffix(URL(string: "test.k12.ak.us"), "test.k12.ak.us", "k12.ak.us", "test", nil, quick: quick)
633 | checkPublicSuffix(URL(string: "www.test.k12.ak.us"), "test.k12.ak.us", "k12.ak.us", "test", "www", quick: quick)
634 |
635 | /// IDN labels.
636 | checkPublicSuffix(URL(string: "食狮.com.cn"), "食狮.com.cn", "com.cn", "食狮", nil, quick: quick)
637 | checkPublicSuffix(URL(string: "食狮.公司.cn"), "食狮.公司.cn", "公司.cn", "食狮", nil, quick: quick)
638 | checkPublicSuffix(URL(string: "www.食狮.公司.cn"), "食狮.公司.cn", "公司.cn", "食狮", "www", quick: quick)
639 | checkPublicSuffix(URL(string: "shishi.公司.cn"), "shishi.公司.cn", "公司.cn", "shishi", nil, quick: quick)
640 | checkPublicSuffix(URL(string: "公司.cn"), nil, nil, nil, nil, quick: quick)
641 | checkPublicSuffix(URL(string: "食狮.中国"), "食狮.中国", "中国", "食狮", nil, quick: quick)
642 | checkPublicSuffix(URL(string: "www.食狮.中国"), "食狮.中国", "中国", "食狮", "www", quick: quick)
643 | checkPublicSuffix(URL(string: "shishi.中国"), "shishi.中国", "中国", "shishi", nil, quick: quick)
644 | checkPublicSuffix(URL(string: "中国"), nil, nil, nil, nil, quick: quick)
645 | /// Same as above, but punycoded.
646 | checkPublicSuffix(URL(string: "xn--85x722f.com.cn"), "xn--85x722f.com.cn", "com.cn", "xn--85x722f", nil, quick: quick)
647 | checkPublicSuffix(URL(string: "xn--85x722f.xn--55qx5d.cn"), "xn--85x722f.xn--55qx5d.cn", "xn--55qx5d.cn", "xn--85x722f", nil, quick: quick)
648 | checkPublicSuffix(URL(string: "www.xn--85x722f.xn--55qx5d.cn"), "xn--85x722f.xn--55qx5d.cn", "xn--55qx5d.cn", "xn--85x722f", "www", quick: quick)
649 | checkPublicSuffix(URL(string: "shishi.xn--55qx5d.cn"), "shishi.xn--55qx5d.cn", "xn--55qx5d.cn", "shishi", nil, quick: quick)
650 | checkPublicSuffix(URL(string: "xn--55qx5d.cn"), nil, nil, nil, nil, quick: quick)
651 | checkPublicSuffix(URL(string: "xn--85x722f.xn--fiqs8s"), "xn--85x722f.xn--fiqs8s", "xn--fiqs8s", "xn--85x722f", nil, quick: quick)
652 | checkPublicSuffix(URL(string: "www.xn--85x722f.xn--fiqs8s"), "xn--85x722f.xn--fiqs8s", "xn--fiqs8s", "xn--85x722f", "www", quick: quick)
653 | checkPublicSuffix(URL(string: "shishi.xn--fiqs8s"), "shishi.xn--fiqs8s", "xn--fiqs8s", "shishi", nil, quick: quick)
654 | checkPublicSuffix(URL(string: "xn--fiqs8s"), nil, nil, nil, nil, quick: quick)
655 |
656 | /// Japanese IDN labels.
657 | checkPublicSuffix(URL(unicodeString: "忍者.jp"), "忍者.jp", "jp", "忍者", nil, quick: quick)
658 | checkPublicSuffix(URL(unicodeString: "サムライ.忍者.jp"), "忍者.jp", "jp", "忍者", "サムライ", quick: quick)
659 | checkPublicSuffix(URL(unicodeString: "www.サムライ.忍者.jp"), "忍者.jp", "jp", "忍者", "www.サムライ", quick: quick)
660 | checkPublicSuffix(URL(unicodeString: "ラーメン.寿司.co.jp"), "寿司.co.jp", "co.jp", "寿司", "ラーメン", quick: quick)
661 | checkPublicSuffix(URL(unicodeString: "www.ラーメン.寿司.co.jp"), "寿司.co.jp", "co.jp", "寿司", "www.ラーメン", quick: quick)
662 | checkPublicSuffix(URL(unicodeString: "餃子.食品"), "餃子.食品", "食品", "餃子", nil, quick: quick)
663 | checkPublicSuffix(URL(unicodeString: "チャーハン.餃子.食品"), "餃子.食品", "食品", "餃子", "チャーハン", quick: quick)
664 | checkPublicSuffix(URL(unicodeString: "www.チャーハン.餃子.食品"), "餃子.食品", "食品", "餃子", "www.チャーハン", quick: quick)
665 | checkPublicSuffix(URL(unicodeString: "青山.ファッション"), "青山.ファッション", "ファッション", "青山", nil, quick: quick)
666 | checkPublicSuffix(URL(unicodeString: "表参道.青山.ファッション"), "青山.ファッション", "ファッション", "青山", "表参道", quick: quick)
667 | checkPublicSuffix(URL(unicodeString: "www.表参道.青山.ファッション"), "青山.ファッション", "ファッション", "青山", "www.表参道", quick: quick)
668 | checkPublicSuffix(URL(unicodeString: "www.おしゃれ.表参道.青山.ファッション"), "青山.ファッション", "ファッション", "青山", "www.おしゃれ.表参道", quick: quick)
669 | checkPublicSuffix(URL(unicodeString: "日本"), nil, nil, nil, nil, quick: quick)
670 | /// Same as above, but punycoded.
671 | checkPublicSuffix(URL(string: "xn--c6t203e.jp"), "xn--c6t203e.jp", "jp", "xn--c6t203e", nil, quick: quick)
672 | checkPublicSuffix(URL(string: "xn--eck7azimb.xn--c6t203e.jp"), "xn--c6t203e.jp", "jp", "xn--c6t203e", "xn--eck7azimb", quick: quick)
673 | checkPublicSuffix(URL(string: "www.xn--eck7azimb.xn--c6t203e.jp"), "xn--c6t203e.jp", "jp", "xn--c6t203e", "www.xn--eck7azimb", quick: quick)
674 | checkPublicSuffix(URL(string: "xn--4dkp5a8a.xn--sprr0q.co.jp"), "xn--sprr0q.co.jp", "co.jp", "xn--sprr0q", "xn--4dkp5a8a", quick: quick)
675 | checkPublicSuffix(URL(string: "www.xn--4dkp5a8a.xn--sprr0q.co.jp"), "xn--sprr0q.co.jp", "co.jp", "xn--sprr0q", "www.xn--4dkp5a8a", quick: quick)
676 | checkPublicSuffix(URL(string: "xn--i8st94l.xn--jvr189m"), "xn--i8st94l.xn--jvr189m", "xn--jvr189m", "xn--i8st94l", nil, quick: quick)
677 | checkPublicSuffix(URL(string: "xn--7ck2a9c3czb.xn--i8st94l.xn--jvr189m"), "xn--i8st94l.xn--jvr189m", "xn--jvr189m", "xn--i8st94l", "xn--7ck2a9c3czb", quick: quick)
678 | checkPublicSuffix(URL(string: "www.xn--7ck2a9c3czb.xn--i8st94l.xn--jvr189m"), "xn--i8st94l.xn--jvr189m", "xn--jvr189m", "xn--i8st94l", "www.xn--7ck2a9c3czb", quick: quick)
679 | checkPublicSuffix(URL(string: "xn--rht138k.xn--bck1b9a5dre4c"), "xn--rht138k.xn--bck1b9a5dre4c", "xn--bck1b9a5dre4c", "xn--rht138k", nil, quick: quick)
680 | checkPublicSuffix(URL(string: "xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c"), "xn--rht138k.xn--bck1b9a5dre4c", "xn--bck1b9a5dre4c", "xn--rht138k", "xn--8nr183j17e", quick: quick)
681 | checkPublicSuffix(URL(string: "www.xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c"), "xn--rht138k.xn--bck1b9a5dre4c", "xn--bck1b9a5dre4c", "xn--rht138k", "www.xn--8nr183j17e", quick: quick)
682 | checkPublicSuffix(URL(string: "www.xn--t8j0ayjlb.xn--8nr183j17e.xn--rht138k.xn--bck1b9a5dre4c"), "xn--rht138k.xn--bck1b9a5dre4c", "xn--bck1b9a5dre4c", "xn--rht138k", "www.xn--t8j0ayjlb.xn--8nr183j17e", quick: quick)
683 | checkPublicSuffix(URL(string: "xn--wgv71a"), nil, nil, nil, nil, quick: quick)
684 | }
685 |
686 | func checkTLDExtractable(_ input: T?,
687 | _ expected: String?,
688 | file: StaticString = #file, line: UInt = #line) {
689 | let result: String? = input?.hostname
690 |
691 | // logTLDExtractable(input, result, expected)
692 |
693 | XCTAssertEqual(result, expected, file: file, line: line)
694 | }
695 |
696 | func checkPublicSuffix(_ input: T?,
697 | _ expectedRootDomain: String?,
698 | _ expectedTopLevelDomain: String?,
699 | _ expectedSecondDomain: String?,
700 | _ expectedSubDomain: String?,
701 | quick: Bool = false,
702 | file: StaticString = #file, line: UInt = #line) {
703 | guard let input: T = input else { return }
704 | let result: TLDResult? = tldExtractor.parse(input, quick: quick)
705 |
706 | // logTLDResult(host, expectedRootDomain, expectedTopLevelDomain, expectedSecondDomain, expectedSubDomain, result)
707 |
708 | XCTAssertEqual(result?.rootDomain, expectedRootDomain, file: file, line: line)
709 | XCTAssertEqual(result?.topLevelDomain, expectedTopLevelDomain, file: file, line: line)
710 | XCTAssertEqual(result?.secondLevelDomain, expectedSecondDomain, file: file, line: line)
711 | XCTAssertEqual(result?.subDomain, expectedSubDomain, file: file, line: line)
712 | }
713 |
714 | /// For debugging
715 | func logTLDExtractable(_ input: TLDExtractable?,
716 | _ result: String?,
717 | _ expected: String?) {
718 |
719 | print("----------------------------")
720 | print("input: \(input ?? "nil")")
721 | print("result: \(result ?? "nil")")
722 | print("expected: \(expected ?? "nil")")
723 | print("")
724 | }
725 |
726 | func logTLDResult(_ host: String?,
727 | _ expectedRootDomain: String?,
728 | _ expectedTopLevelDomain: String?,
729 | _ expectedSecondDomain: String?,
730 | _ expectedSubDomain: String?,
731 | _ result: TLDResult?) {
732 | guard let host = host else { return }
733 | let hostStr: String = host.padding(toLength: 20, withPad: " ", startingAt: 0)
734 |
735 | let expectedRootStr: String = "\(expectedRootDomain ?? "nil")".padding(toLength: 20, withPad: " ", startingAt: 0)
736 | let expectedTopStr: String = "\(expectedTopLevelDomain ?? "nil")".padding(toLength: 20, withPad: " ", startingAt: 0)
737 | let expectedSecondStr: String = "\(expectedSecondDomain ?? "nil")".padding(toLength: 20, withPad: " ", startingAt: 0)
738 | let expectedSubStr: String = "\(expectedSubDomain ?? "nil")".padding(toLength: 20, withPad: " ", startingAt: 0)
739 |
740 | let resultRootStr: String = "\(result?.rootDomain ?? "nil")".padding(toLength: 20, withPad: " ", startingAt: 0)
741 | let resultTopStr: String = "\(result?.topLevelDomain ?? "nil")".padding(toLength: 20, withPad: " ", startingAt: 0)
742 | let resultSecondStr: String = "\(result?.secondLevelDomain ?? "nil")".padding(toLength: 20, withPad: " ", startingAt: 0)
743 | let resultSubStr: String = "\(result?.subDomain ?? "nil")".padding(toLength: 20, withPad: " ", startingAt: 0)
744 |
745 | print("----------------------------")
746 | print("hostStr: \(hostStr)")
747 | print("Root domain: \(expectedRootStr) => \(resultRootStr)")
748 | print("Top level domain: \(expectedTopStr) => \(resultTopStr)")
749 | print("Second level domain: \(expectedSecondStr) => \(resultSecondStr)")
750 | print("Sub domain: \(expectedSubStr) => \(resultSubStr)")
751 | }
752 | }
753 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | ignore:
2 | - "Tests" # ignore this folder and all its contents
--------------------------------------------------------------------------------
/update-psl.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding:utf-8 -*-
3 |
4 | import os
5 | import sys
6 | import re
7 |
8 | if __name__ == '__main__':
9 |
10 | # Variables
11 | src_dir = os.path.dirname(os.path.abspath(__file__))
12 | psl_file = os.path.abspath(os.path.join(src_dir, 'Sources/TLDExtract/Resources/public_suffix_list.dat'))
13 | psl_url = 'https://publicsuffix.org/list/public_suffix_list.dat'
14 |
15 | try:
16 | # Python 3
17 | from urllib.request import urlopen
18 | response = urlopen(psl_url)
19 | psl_str = response.read().decode('utf-8')
20 | except ImportError:
21 | # Python 2
22 | from urllib import urlopen
23 | response = urlopen(psl_url)
24 | psl_str = response.read()
25 |
26 | # Remove comment
27 | psl_str = re.sub(r'//.*', '', psl_str)
28 | # Remove duplicated line breaks
29 | psl_str = re.sub(r'\n{2,}|^\n', '\n', psl_str)
30 | # Remove blank line from beginning and end
31 | psl_str = re.sub(r'^\n?|\n$\s{,0}', '', psl_str)
32 |
33 | # Add punycoded rules
34 | if sys.version_info[0] >= 3:
35 | # Python 3
36 | lines = psl_str.splitlines()
37 | insert_count = 0
38 | for index, line in enumerate(lines[:]):
39 | line_punycoded = line.encode('idna').decode('utf-8')
40 | if line != line_punycoded:
41 | # print("line", line, "line_punycoded", line_punycoded, type(line_punycoded))
42 | insert_at = index + insert_count + 1
43 | lines[insert_at:insert_at] = [line_punycoded]
44 | insert_count += 1
45 | psl_str = '\n'.join(lines)
46 |
47 | else:
48 | # Python 2
49 | lines = psl_str.splitlines()
50 | insert_count = 0
51 | for index, line in enumerate(lines[:]):
52 | line_punycoded = unicode(line, "utf-8").encode('idna').encode('ascii','replace')
53 | if line != line_punycoded:
54 | # print("line", line, "line_punycoded", line_punycoded, type(line_punycoded))
55 | insert_at = index + insert_count + 1
56 | lines[insert_at:insert_at] = [line_punycoded]
57 | insert_count += 1
58 | psl_str = '\n'.join(lines)
59 |
60 | # Save file
61 | with open(psl_file, mode='w') as f:
62 | f.write(psl_str)
63 | f.close()
64 |
--------------------------------------------------------------------------------