├── .gitignore ├── Dockerfile ├── LICENSE ├── Package.swift ├── README.md ├── Sources └── SwiftHTMLParser │ ├── Extensions │ ├── Appendable.swift │ ├── CharacterExtensions.swift │ └── StringExtensions.swift │ ├── Helpers │ ├── RegexError.swift │ └── RegexHelper.swift │ ├── Parser │ ├── AttributeParser.swift │ ├── CDATAParser.swift │ ├── CommentParser.swift │ ├── ElementParser.swift │ ├── HTMLParser.swift │ ├── LookaheadValidator.swift │ ├── Models │ │ ├── Attribute.swift │ │ ├── Nodes │ │ │ ├── CData.swift │ │ │ ├── Comment.swift │ │ │ ├── DocumentTypeNode.swift │ │ │ ├── Element.swift │ │ │ ├── Node.swift │ │ │ ├── NodeType.swift │ │ │ └── TextNode.swift │ │ └── Tag.swift │ ├── ParseError.swift │ ├── ParseFormat.swift │ ├── ScriptParser.swift │ ├── TagParser.swift │ └── Tags │ │ ├── KnownHTMLTags.swift │ │ ├── SVGTags.swift │ │ └── XMLTags.swift │ ├── ProjectConfig.swift │ └── Traverser │ ├── HTMLTraverser.swift │ └── Selectors │ ├── AttributeSelector.swift │ ├── ClassSelector.swift │ ├── IntSelector.swift │ ├── NodeSelector.swift │ ├── NodeSelectors │ ├── CDataSelector.swift │ ├── CommentSelector.swift │ ├── ElementSelector.swift │ └── TextNodeSelector.swift │ ├── SelectorBuilders │ ├── IdStringSelectorBuilder.swift │ ├── PositionIntSelectorBuilder.swift │ ├── TagNameStringSelectorBuilder.swift │ ├── TextStringSelectorBuilder.swift │ └── ValueStringSelectorBuilder.swift │ └── StringSelector.swift └── Tests ├── SwiftHTMLParserTests ├── AppendableTests.swift ├── AttributeParserTests.swift ├── CommentParserTests.swift ├── DocumentationTests.swift ├── ElementTests.swift ├── ElementTraverserTests.swift ├── JavascriptParserTests.swift ├── PerformanceTests.swift ├── RealWorldTests.swift ├── SVGParserTests.swift └── TestHelper.swift └── TestFiles ├── Mock ├── Attributes │ ├── attributes-multiple-value-class.html │ ├── attributes-quotes.html │ ├── attributes-simple.html │ └── attributes-tabs.html ├── Comments │ ├── comments.html │ ├── conditional-comments-salvageable.html │ └── declarations.html ├── Documentation │ └── simple.html ├── Elements │ ├── element-name-on-new-line.html │ ├── element-unclosed-end-tag.html │ ├── elements-quotes.html │ ├── elements-simple.html │ ├── elemnent-stray-end-html-tag.html │ ├── elemnent-stray-end-tag.html │ └── empty-element.html ├── Javascript │ ├── javascript-comments.html │ ├── javascript-quotes-with-escape-characters.html │ ├── javascript-quotes.html │ └── javascript-simple.html ├── Performance │ ├── deep.html │ └── long.txt └── SVG │ └── svg-simple.html ├── RealWorld ├── amazon-home-page.html ├── apple-home-page.html ├── cnn-home-page.html ├── digitalocean-home-page.html ├── espn-home-page.html ├── facebook-home-page.html ├── google-home-page.html ├── linkedin-home-page.html ├── medium-home-page.html ├── reddit-home-page.html ├── weather-forcast.html ├── weather-forcast.xml ├── weather-hourly.html ├── weather-radar-2.html ├── weather-radar.html ├── wikipedia-home-page.html └── youtube-trending.html └── TestFileURLs.swift /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /.build 3 | /Packages 4 | /*.xcodeproj 5 | /.swiftpm 6 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 1 2 | FROM vapor/swift:5.1-bionic 3 | # 2 4 | WORKDIR /package 5 | # 3 6 | COPY . ./ 7 | # 4 8 | RUN swift package resolve 9 | RUN swift package clean 10 | # 5 11 | #RUN swift test --enable-test-discovery 12 | CMD ["swift", "test", "--enable-test-discovery"] 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version:5.6 2 | 3 | import PackageDescription 4 | 5 | let package = Package( 6 | name: "swift-html-parser", 7 | products: [ 8 | // Products define the executables and libraries produced by a package, and make them visible to other packages. 9 | .library( 10 | name: "SwiftHTMLParser", 11 | targets: ["SwiftHTMLParser"]), 12 | ], 13 | dependencies: [ 14 | // Dependencies declare other packages that this package depends on. 15 | ], 16 | targets: [ 17 | // Targets are the basic building blocks of a package. A target can define a module or a test suite. 18 | // Targets can depend on other targets in this package, and on products in packages which this package depends on. 19 | .target( 20 | name: "SwiftHTMLParser", 21 | dependencies: []), 22 | .target( 23 | name: "TestFiles", 24 | dependencies: [], 25 | path: "Tests/TestFiles", 26 | resources: [.copy("Mock"),.copy("RealWorld")] 27 | ), 28 | .testTarget( 29 | name: "SwiftHTMLParserTests", 30 | dependencies: ["SwiftHTMLParser", "TestFiles"]), 31 | ] 32 | ) 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SwiftHTMLParser 2 | SwiftHTMLParser is a library for parsing and traverseing HTML and XML written in Swift. It parses plaintext HTML or XML into an object tree (DOM), and allows for the easy traversal and searching of the tree's nodes, similar to an HTML Selector or XPath. 3 | 4 | ## Installation 5 | To depend on SwiftHTMLParser in your own project, add it to the `dependencies` clause in your `Package.swift` file: 6 | ```swift 7 | dependencies: [ 8 | .package(url: "https://github.com/rnantes/swift-html-parser.git", from: "1.0.0") 9 | ] 10 | ``` 11 | 12 | ## Basic Structure 13 | Object naming is based on the [HTML Standard](https://html.spec.whatwg.org/dev/syntax.html#syntax). There are also easy to follow introductions available from [w3schools](https://www.w3schools.com/html/default.asp) and [w3](https://www.w3.org/TR/html53/introduction.html#a-quick-introduction-to-html). 14 | 15 | * `Node`, a protocol: - Consists of an start and closing `Tag`. (Closing tags may be ommited in some special cases) 16 | * `Tag`, a struct: - contains the tag's name, the opening tag contains any of the node's `Attribute`s 17 | * `Attribute`, a struct: - consist of a name and an associated value 18 | 19 | #### Nodes 20 | * `Element`, a struct: - a Node that may contain nested nodes. 21 | * `TextNode`, a struct:- a Node that represents a block of text. 22 | * `Comment`, a struct: - a Node that represents a single or multi-line comment within an element. 23 | * `CData`, a struct: - a Node that represents a CData section and its associated text. 24 | * `DocumentTypeNode`, a struct: - a Node which provides metadata on how to parse the document 25 | 26 | ## Using the API 27 | 28 | #### Read in Plaintext HTML from a File 29 | ```swift 30 | let fileURL = URL.init(fileURLWithPath: "/some/absolute/path/simple.html")! 31 | ``` 32 | 33 | #### Parse the HTML String Into a Tree of Node Objects (DOM) 34 | ```swift 35 | let nodeTree = try HTMLParser.parse(htmlString) 36 | ``` 37 | Alternativly to parse an XML file 38 | ```swift 39 | let nodeTree = try XMLParser.parse(xmlString) 40 | ``` 41 | 42 | #### Create a Node Selector Path Then Traverse the Node Tree to Find Matching Nodes 43 | Element, Text, Comment, and CData selectors are availabe 44 | ```swift 45 | // create a node selector path to describe what nodes to match in the nodeTree 46 | let nodeSelectorPath: [NodeSelector] = [ 47 | ElementSelector().withTagName("html"), 48 | ElementSelector().withTagName("body"), 49 | ElementSelector().withTagName("div").withClassName("essay"), 50 | ElementSelector().withTagName("p").atPosition(0) 51 | ] 52 | 53 | // find the nodes that match the nodeSelectorPath 54 | let matchingNodes = HTMLTraverser.findNodes(in: nodeTree, matching: nodeSelectorPath) 55 | ``` 56 | 57 | ## Tutorial 58 | 59 | #### The HTML File We Will Use for The Following Examples 60 | We will use the example file: simple.html 61 | ```HTML 62 | 63 | 64 | 65 | This is a Simple Example 66 | 67 | 68 |

This is a Heading

69 | 70 |
71 |

This is the first paragraph.

72 |

This is the second paragraph.

73 |

This is the third paragraph.

74 |

This is the fourth paragraph.

75 |

This is the fifth paragraph.

76 | 77 |
78 |

Editor Notes

79 | No notes here 80 |
81 |
82 | 83 |
84 | 89 | 90 |
91 |

Bibliography Notes

92 | No notes here 93 |
94 |
95 | 96 | 97 | 98 | ``` 99 | 100 | #### Find Matching Elements 101 | ```swift 102 | func parseAndTraverseSimpleHTML() throws { 103 | // get string from file 104 | let fileURL = URL.init(fileURLWithPath: "/some/absolute/path/simple.html")! 105 | let htmlString = try String(contentsOf: fileURL, encoding: .utf8) 106 | 107 | // parse the htmlString into a tree of node objects (DOM) 108 | let nodeTree = try HTMLParser.parse(htmlString) 109 | 110 | // create a node selector path to describe what nodes to match in the nodeTree 111 | let nodeSelectorPath: [NodeSelector] = [ 112 | ElementSelector().withTagName("html"), 113 | ElementSelector().withTagName("body"), 114 | ElementSelector().withTagName("div").atPosition(0), 115 | ElementSelector().withTagName("p").withClassName("body-paragraph") 116 | ] 117 | 118 | // find the elements that match the nodeSelectorPath 119 | // notice we use the findElements() function which only matches elements 120 | let matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 121 | 122 | // matchingElements will contain the 3 matching

elements with the className 'body-paragraph' 123 | // will print: 3 124 | print(matchingElements.count) 125 | } 126 | ``` 127 | 128 | #### Find a Matching Text Node 129 | ```swift 130 | func parseAndTraverseSimpleHTMLTextNode() throws { 131 | // get string from file 132 | let fileURL = URL.init(fileURLWithPath: "/some/absolute/path/simple.html")! 133 | let htmlString = try String(contentsOf: fileURL, encoding: .utf8) 134 | 135 | // parse the htmlString into a tree of node objects (DOM) 136 | let nodeTree = try HTMLParser.parse(htmlString) 137 | 138 | // create a node selector path to describe what nodes to match in the nodeTree 139 | // this is equvalent to the selector: body > p or xpath: /html/body/p 140 | let nodeSelectorPath: [NodeSelector] = [ 141 | ElementSelector().withTagName("html"), 142 | ElementSelector().withTagName("body"), 143 | ElementSelector().withTagName("div").withClassName("bibliography"), 144 | ElementSelector().withTagName("ul"), 145 | ElementSelector().withTagName("li").withId("citation-1999"), 146 | TextNodeSelector() 147 | ] 148 | 149 | // find the nodes that match the nodeSelectorPath 150 | // Notice we use the findNodes() function which can match with any node type 151 | let matchingNodes = HTMLTraverser.findNodes(in: nodeTree, matching: nodeSelectorPath) 152 | 153 | // matchingNodes will contain the matching generic node 154 | // we have to cast the Node to a TextNode to access its text property 155 | guard let paragraphTextNode = matchingNodes.first as? TextNode else { 156 | // could not find paragraph text node 157 | return 158 | } 159 | 160 | // will print: This is the second citation. 161 | print(paragraphTextNode.text) 162 | } 163 | ``` 164 | 165 | #### Find Matching Elements Using a Child Node Selector Path 166 | ```swift 167 | func parseAndTraverseSimpleHTMLChildNodeSelectorPath() throws { 168 | // get string from file 169 | let fileURL = URL.init(fileURLWithPath: "/some/absolute/path/simple.html")! 170 | let htmlString = try String(contentsOf: fileURL, encoding: .utf8) 171 | 172 | // parse the htmlString into a tree of node objects (DOM) 173 | let nodeTree = try HTMLParser.parse(htmlString) 174 | 175 | // create a child node selector path that will match the parent node 176 | // only if the childNodeSelectorPath matches the element's child nodes 177 | let childNodeSelectorPath: [NodeSelector] = [ 178 | ElementSelector().withTagName("div"), 179 | ElementSelector().withTagName("h3"), 180 | TextNodeSelector().withText("Editor Notes") 181 | ] 182 | 183 | // create a node selector path to describe what nodes to match in the nodeTree 184 | // Notice the last ElementSelector will only match if the element contains 185 | // child nodes that match the childNodeSelectorPath 186 | let nodeSelectorPath: [NodeSelector] = [ 187 | ElementSelector().withTagName("html"), 188 | ElementSelector().withTagName("body"), 189 | ElementSelector().withTagName("div").withChildNodeSelectorPath(childNodeSelectorPath) 190 | ] 191 | 192 | // find the nodes that match the nodeSelectorPath 193 | // Notice we use the findNodes() function which can match with any node type 194 | let matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 195 | 196 | // matchingElements should only contain the div element with the 'essay' class name 197 | // will print: 1 198 | print(matchingElements.count) 199 | 200 | guard let divElement = matchingElements.first else { 201 | // could not find paragraph text node 202 | XCTFail("could not find paragraph text node") 203 | return 204 | } 205 | 206 | guard let firstClassName = divElement.classNames.first else { 207 | // divElement does not have any classnames 208 | return 209 | } 210 | 211 | // will print: essay 212 | print(firstClassName) 213 | } 214 | ``` 215 | 216 | ## Testing 217 | Automated testing was used to validate the parsing of tags, comments, single and double quoted attributes, imbedded JavaScript, etc. Specially created sample HTML files as well as HTML from top sites were used in testing. However, all cases may not have been covered. Please open a issue on Github and provide sample HTML if you discover a bug so it can be fixed and a test case can be added. 218 | 219 | 220 | #### Run Tests Via the Command Line 221 | `swift test` 222 | 223 | #### Run Tests Via Docker 224 | `docker build -t swift-html-parser . && docker run -it swift-html-parser` 225 | 226 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Extensions/Appendable.swift: -------------------------------------------------------------------------------- 1 | // 2 | // File.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-10-25. 6 | // 7 | 8 | import Foundation 9 | 10 | protocol Insertable: Collection { 11 | init() 12 | mutating func append(_ newElement: Element) 13 | mutating func append(contentsOf newElements: S) where Element == S.Element, S : Sequence 14 | } 15 | 16 | protocol SetInsertable: Collection { 17 | init() 18 | mutating func insert(_ newMember: Element) -> (inserted: Bool, memberAfterInsert: Element) 19 | mutating func formUnion(_ other: S) where Element == S.Element, S : Sequence 20 | } 21 | 22 | extension Array: Insertable {} 23 | extension Set: SetInsertable {} 24 | 25 | extension Optional where Wrapped: Insertable { 26 | mutating func appendOrInit(_ newElement: Wrapped.Iterator.Element) { 27 | if self == nil { 28 | var newArray = Wrapped.init() 29 | newArray.append(newElement) 30 | self = newArray 31 | } else { 32 | self?.append(newElement) 33 | } 34 | } 35 | 36 | mutating func appendOrInit(contentsOf newElements: S) where Wrapped.Iterator.Element == S.Element, S : Sequence { 37 | if self == nil { 38 | var newArray = Wrapped.init() 39 | newArray.append(contentsOf: newElements) 40 | self = newArray 41 | } else { 42 | self?.append(contentsOf: newElements) 43 | } 44 | } 45 | } 46 | 47 | extension Optional where Wrapped: SetInsertable { 48 | mutating func insertOrInit(_ newElement: Wrapped.Iterator.Element) { 49 | if self == nil { 50 | var newSet = Wrapped.init() 51 | _ = newSet.insert(newElement) 52 | self = newSet 53 | } else { 54 | _ = self?.insert(newElement) 55 | } 56 | } 57 | 58 | mutating func formUnionOrInit(_ other: S) where Wrapped.Iterator.Element == S.Element, S : Sequence { 59 | if self == nil { 60 | var newSet = Wrapped.init() 61 | newSet.formUnion(other) 62 | self = newSet 63 | } else { 64 | self?.formUnion(other) 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Extensions/CharacterExtensions.swift: -------------------------------------------------------------------------------- 1 | // 2 | // CharacterExtensions.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-09-11. 6 | // 7 | 8 | import Foundation 9 | 10 | extension Character { 11 | func isEqualToOneOf(characters: [Character]) -> Bool { 12 | for aCharacter in characters { 13 | if self == aCharacter { 14 | return true 15 | } 16 | } 17 | 18 | return false 19 | } 20 | 21 | func isNotEqualToOneOf(characters: [Character]) -> Bool { 22 | return !self.isEqualToOneOf(characters: characters) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Extensions/StringExtensions.swift: -------------------------------------------------------------------------------- 1 | // 2 | // StringExtensions.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-08-19. 6 | // 7 | 8 | import Foundation 9 | 10 | extension String { 11 | func subscring(after afterIndex: String.Index, numberOfCharacters: Int) -> String { 12 | let lastIndex = self.index(afterIndex, offsetBy: numberOfCharacters) 13 | if lastIndex < self.endIndex { 14 | return String(self[afterIndex...lastIndex]) 15 | } else { 16 | return String(self[afterIndex...self.endIndex]) 17 | } 18 | } 19 | 20 | func encompassesIndex(_ index: String.Index) -> Bool { 21 | if (index < self.endIndex) { 22 | return true 23 | } 24 | 25 | return false 26 | } 27 | 28 | func isEmptyOrWhitespace() -> Bool { 29 | if(self.isEmpty) { 30 | return true 31 | } 32 | 33 | if self.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines).isEmpty { 34 | return true 35 | } 36 | 37 | return false 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Helpers/RegexError.swift: -------------------------------------------------------------------------------- 1 | // 2 | // RegexError.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-05. 6 | // 7 | 8 | import Foundation 9 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Helpers/RegexHelper.swift: -------------------------------------------------------------------------------- 1 | // 2 | // RegexHelper.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-08-07. 6 | // 7 | 8 | import Foundation 9 | 10 | struct RegexHelper { 11 | 12 | func matchRanges(for regexPattern: String, inString inputString: String) -> [Range] { 13 | guard let regex = try? NSRegularExpression(pattern: regexPattern, options: [.caseInsensitive]) else { 14 | //print("invalid regex") 15 | return [] 16 | } 17 | 18 | let range = NSRange(inputString.startIndex..., in: inputString) 19 | let matches = regex.matches(in: inputString, options: [], range: range) 20 | 21 | var matchRanges = [Range]() 22 | for match in matches { 23 | matchRanges.append(Range(match.range, in: inputString)!) 24 | } 25 | 26 | return matchRanges 27 | } 28 | 29 | func matches(for regexPattern: String, inString inputString: String) -> [String] { 30 | let matchRanges = self.matchRanges(for: regexPattern, inString: inputString) 31 | 32 | var matchingStrings = [String]() 33 | for range in matchRanges { 34 | matchingStrings.append(String(inputString[range])) 35 | } 36 | 37 | return matchingStrings 38 | } 39 | 40 | func firstMatchRange(for regexPattern: String, inString inputString: String) -> Range? { 41 | guard let regex = try? NSRegularExpression(pattern: regexPattern, options: [.caseInsensitive]) else { 42 | //print("Invalid Regex Pattern: \(regexPattern)") 43 | return nil 44 | } 45 | 46 | let range = NSRange(inputString.startIndex..., in: inputString) 47 | let firstMatch = regex.firstMatch(in: inputString, options: [], range: range) 48 | 49 | if let match = firstMatch { 50 | // first match found 51 | return Range(match.range, in: inputString)! 52 | } else { 53 | // no match found 54 | return nil 55 | } 56 | } 57 | 58 | func firstMatch(for regexPattern: String, inString inputString: String) -> String? { 59 | let firstMatchRange = self.firstMatchRange(for: regexPattern, inString: inputString) 60 | 61 | if let range = firstMatchRange { 62 | // match found 63 | let matchingString = String(inputString[range]) 64 | return matchingString 65 | } else { 66 | // no match found 67 | return nil 68 | } 69 | } 70 | 71 | func replaceFirstMatch(for regexPattern: String, inString inputString: String, withString replacementString: String) -> String { 72 | let firstMatchRange = self.firstMatchRange(for: regexPattern, inString: inputString) 73 | 74 | if let range = firstMatchRange { 75 | // match found 76 | return inputString.replacingCharacters(in: range, with: replacementString) 77 | } else { 78 | // no match found 79 | return inputString 80 | } 81 | } 82 | 83 | func replaceMatches(for regexPattern: String, inString inputString: String, withString replacementString: String) -> String? { 84 | guard let regex = try? NSRegularExpression(pattern: regexPattern, options: []) else { 85 | return inputString 86 | } 87 | 88 | let range = NSRange(inputString.startIndex..., in: inputString) 89 | return regex.stringByReplacingMatches(in: inputString, options: [], range: range, withTemplate: replacementString) 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/AttributeParser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AttributeParser.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-05. 6 | // 7 | 8 | import Foundation 9 | 10 | struct AttributeParser { 11 | 12 | enum AttributeParserState { 13 | case lookingForAttributeName 14 | case readingAttributeName 15 | case readingFirstAttributeValue 16 | case readingAttributeValue 17 | case foundAttribute 18 | } 19 | 20 | enum AttributeValueParseState { 21 | case withinDoubleQuotes 22 | case withinSingleQuotes 23 | case notWithinQuotes 24 | } 25 | 26 | fileprivate let specificCharacters = TagSpecificCharacters() 27 | 28 | func parseAttributes(tagText: String, tagName: String) -> [String: Attribute] { 29 | var attributes = [String: Attribute]() 30 | 31 | let regexHelper = RegexHelper() 32 | let tagNameRegexPattern = "^(<\\s*\(tagName))" 33 | let rangeOfTagNameResult = regexHelper.firstMatchRange(for: tagNameRegexPattern, inString: tagText) 34 | guard let rangeOfTagName = rangeOfTagNameResult else { 35 | //print("Error: Could not find tag name in tag text") 36 | return attributes 37 | } 38 | var currentIndex = rangeOfTagName.upperBound 39 | 40 | var couldNotFindAttribute = false 41 | while currentIndex < tagText.endIndex && couldNotFindAttribute == false { 42 | do { 43 | let attribute = try getNextAttribute(tagText: tagText, currentIndex: currentIndex) 44 | attributes[attribute.name] = attribute 45 | // set the currentIndex to endIndex of the attribute 46 | currentIndex = tagText.index(attribute.endIndex, offsetBy: 1) 47 | } catch { 48 | couldNotFindAttribute = true 49 | } 50 | } 51 | 52 | return attributes 53 | } 54 | 55 | func getNextAttribute(tagText: String, currentIndex: String.Index) throws -> Attribute { 56 | var localCurrentIndex = currentIndex 57 | 58 | var parserState = AttributeParserState.lookingForAttributeName 59 | var nameStartIndex: String.Index? 60 | var nameEndIndex: String.Index? 61 | 62 | var valueParseState = AttributeValueParseState.notWithinQuotes 63 | var valueStartIndex: String.Index? 64 | var valueEndIndex: String.Index? 65 | var valueStartIndexWithQuotes: String.Index? 66 | var valueEndIndexWithQuotes: String.Index? 67 | 68 | while localCurrentIndex < tagText.endIndex && parserState != .foundAttribute { 69 | 70 | switch parserState { 71 | case .lookingForAttributeName: 72 | if tagText[localCurrentIndex].isWhitespace == false { 73 | nameStartIndex = localCurrentIndex 74 | parserState = .readingAttributeName 75 | } 76 | case .readingAttributeName: 77 | if tagText[localCurrentIndex] == specificCharacters.equalSign { 78 | // attribute name ended - continue looking for value 79 | nameEndIndex = tagText.index(localCurrentIndex, offsetBy: -1) 80 | parserState = .readingFirstAttributeValue 81 | } 82 | if tagText[localCurrentIndex].isWhitespace { 83 | // attribute name only 84 | nameEndIndex = tagText.index(localCurrentIndex, offsetBy: -1) 85 | parserState = .foundAttribute 86 | } 87 | if tagText[localCurrentIndex] == specificCharacters.tagClosingCharacter { 88 | // end of tag - attribute name only 89 | nameEndIndex = tagText.index(localCurrentIndex, offsetBy: -1) 90 | parserState = .foundAttribute 91 | } 92 | case .readingFirstAttributeValue: 93 | if tagText[localCurrentIndex] == specificCharacters.doubleQuote { 94 | // dont include quotes in valueStartIndex (only in valueStartIndexWithQuotes) 95 | valueStartIndexWithQuotes = localCurrentIndex 96 | valueParseState = .withinDoubleQuotes 97 | } 98 | if tagText[localCurrentIndex] == specificCharacters.singleQuote { 99 | // dont include quotes in valueStartIndex (only in valueStartIndexWithQuotes) 100 | valueStartIndexWithQuotes = localCurrentIndex 101 | valueParseState = .withinSingleQuotes 102 | } 103 | parserState = .readingAttributeValue 104 | case .readingAttributeValue: 105 | switch valueParseState { 106 | case .notWithinQuotes: 107 | if tagText[localCurrentIndex].isWhitespace { 108 | // attribute name only 109 | valueEndIndex = tagText.index(localCurrentIndex, offsetBy: -1) 110 | parserState = .foundAttribute 111 | } 112 | if tagText[localCurrentIndex] == specificCharacters.tagClosingCharacter { 113 | // end of tag - attribute name only 114 | valueEndIndex = tagText.index(localCurrentIndex, offsetBy: -1) 115 | parserState = .foundAttribute 116 | } 117 | case .withinDoubleQuotes: 118 | if tagText[localCurrentIndex] == specificCharacters.doubleQuote { 119 | valueEndIndexWithQuotes = localCurrentIndex 120 | parserState = .foundAttribute 121 | } 122 | 123 | case .withinSingleQuotes: 124 | if tagText[localCurrentIndex] == specificCharacters.singleQuote { 125 | valueEndIndexWithQuotes = localCurrentIndex 126 | parserState = .foundAttribute 127 | } 128 | } 129 | case .foundAttribute: 130 | break 131 | } 132 | 133 | // increment localCurentIndex 134 | localCurrentIndex = tagText.index(localCurrentIndex, offsetBy: 1) 135 | } 136 | 137 | if let nameStartIndex = nameStartIndex, let nameEndIndex = nameEndIndex { 138 | let name = String(tagText[nameStartIndex...nameEndIndex]) 139 | var value: String? 140 | 141 | var isAttributeValueAnEmptyString = false 142 | if let valueStartIndexWithQuotes = valueStartIndexWithQuotes, let valueEndIndexWithQuotes = valueEndIndexWithQuotes { 143 | // value is within qutoes 144 | if tagText.distance(from: valueStartIndexWithQuotes, to: valueEndIndexWithQuotes) == 1 { 145 | // value is empty string - i.e "" 146 | isAttributeValueAnEmptyString = true 147 | valueStartIndex = valueStartIndexWithQuotes 148 | valueEndIndex = valueEndIndexWithQuotes 149 | value = "" 150 | } else { 151 | // value is not empty string 152 | valueStartIndex = tagText.index(valueStartIndexWithQuotes, offsetBy: 1) 153 | valueEndIndex = tagText.index(valueEndIndexWithQuotes, offsetBy: -1) 154 | } 155 | } 156 | 157 | // set value 158 | if isAttributeValueAnEmptyString == false { 159 | if let valueStartIndex = valueStartIndex, let valueEndIndex = valueEndIndex { 160 | value = String(tagText[valueStartIndex...valueEndIndex]) 161 | } 162 | } 163 | 164 | return Attribute.init(nameStartIndex: nameStartIndex, 165 | nameEndIndex: nameEndIndex, 166 | valueStartIndex: valueStartIndex, 167 | valueEndIndex: valueEndIndex, 168 | valueStartIndexWithQuotes: valueStartIndexWithQuotes, 169 | valueEndIndexWithQuotes: valueEndIndexWithQuotes, 170 | name: name, 171 | value: value) 172 | } 173 | 174 | throw ParseError.attributeNotFound 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/CDATAParser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // CDATAParser.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-13. 6 | // 7 | 8 | import Foundation 9 | 10 | struct CDATASpecialCharacters { 11 | // strings 12 | let CDATAOpening = "" 14 | } 15 | 16 | struct CDATAParser { 17 | fileprivate let lookaheadValidator = LookaheadValidator() 18 | fileprivate let specialCharacters = CDATASpecialCharacters() 19 | 20 | func parse(source: String, currentIndex: String.Index) throws -> CData { 21 | var localCurrentIndex = currentIndex 22 | let startIndex = currentIndex 23 | var textStartIndex: String.Index? 24 | 25 | // validate stating 26 | if lookaheadValidator.isValidLookahead(for: source, atIndex: localCurrentIndex, 27 | checkFor: specialCharacters.CDATAOpening) { 28 | localCurrentIndex = source.index(localCurrentIndex, offsetBy: specialCharacters.CDATAOpening.count) 29 | textStartIndex = localCurrentIndex 30 | } else { 31 | throw ParseError.invalidCDATA 32 | } 33 | 34 | while localCurrentIndex < source.endIndex { 35 | if lookaheadValidator.isValidLookahead(for: source, atIndex: localCurrentIndex, 36 | checkFor: specialCharacters.CDATAClosing) { 37 | let textEndIndex = source.index(localCurrentIndex, offsetBy: -1) 38 | let endIndex = source.index(localCurrentIndex, offsetBy: (specialCharacters.CDATAClosing.count - 1)) 39 | 40 | return CData.init(startIndex: startIndex, 41 | endIndex: endIndex, 42 | textStartIndex: textStartIndex!, 43 | textEndIndex: textEndIndex, 44 | text: String(source[textStartIndex!...textEndIndex])) 45 | } 46 | // increment localCurrentIndex 47 | localCurrentIndex = source.index(localCurrentIndex, offsetBy: 1) 48 | } 49 | 50 | throw ParseError.endOfFileReachedBeforeCDATACloseFound 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/CommentParser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // CommentParser.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-08. 6 | // 7 | 8 | import Foundation 9 | 10 | struct CommentSpecialCharacters { 11 | // strings 12 | let declarationOpening = "" 15 | let declarationClosing = ">" 16 | let conditionalCommentOpening = "" 18 | } 19 | 20 | enum CommentType { 21 | case comment 22 | case declaration 23 | } 24 | 25 | /// Parses comments 26 | struct CommentParser { 27 | fileprivate let lookaheadValidator = LookaheadValidator() 28 | fileprivate let SpecialCharacters = CommentSpecialCharacters() 29 | 30 | /// Parses a comment starting at currentIndex 31 | /// Example of a comment: 32 | func parseComment(source: String, currentIndex: String.Index, commentType: CommentType) throws -> Comment { 33 | let startIndex = currentIndex 34 | 35 | // skip over html comment opening i.e 84 | func parseConditionalComment(source: String, currentIndex: String.Index) throws -> Comment { 85 | let startIndex = currentIndex 86 | // skip over html comment opening i.e 63 | static func removeIEStatments(pageSource: String) -> String { 64 | //let pattern = "" 65 | //return pageSource.replacingOccurrences(of: pattern, with: "", options: [.regularExpression]) 66 | 67 | return pageSource 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/LookaheadValidator.swift: -------------------------------------------------------------------------------- 1 | // 2 | // LookAheadValidator.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-09. 6 | // 7 | 8 | import Foundation 9 | 10 | struct LookaheadValidator { 11 | 12 | func isValidLookahead(for source: String, atIndex currentIndex: String.Index, checkFor stringToCheckFor: String) -> Bool { 13 | var localCurrentIndex = currentIndex 14 | var stringToCheckForCurrentIndex = stringToCheckFor.startIndex 15 | 16 | while stringToCheckForCurrentIndex < stringToCheckFor.endIndex { 17 | // check localCurrentIndex hasn't gone past soure endIndex 18 | if (localCurrentIndex > source.endIndex) { 19 | return false 20 | } 21 | 22 | // compare characters 23 | if source[localCurrentIndex] != stringToCheckFor[stringToCheckForCurrentIndex] { 24 | // found a character in source that did not match lookahead 25 | return false 26 | } 27 | 28 | // increment localCurrentIndex and stringToCheckForCurrentIndex (go to next character in string) 29 | localCurrentIndex = source.index(localCurrentIndex, offsetBy: 1) 30 | stringToCheckForCurrentIndex = stringToCheckFor.index(stringToCheckForCurrentIndex, offsetBy: 1) 31 | } 32 | 33 | return true 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/Models/Attribute.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Attribute.swift 3 | // HTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-02-13. 6 | // Copyright © 2018 Reid Nantes. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public struct Attribute: Node { 12 | public var nodeType = NodeType.attribute 13 | public var name: String 14 | public var value: String? 15 | 16 | var nameStartIndex: String.Index 17 | var nameEndIndex: String.Index 18 | 19 | var valueStartIndex: String.Index? 20 | var valueEndIndex: String.Index? 21 | var valueStartIndexWithQuotes: String.Index? 22 | var valueEndIndexWithQuotes: String.Index? 23 | 24 | public var endIndex: String.Index { 25 | if valueEndIndexWithQuotes != nil { 26 | return valueEndIndexWithQuotes! 27 | } 28 | 29 | if valueEndIndex != nil { 30 | return valueEndIndex! 31 | } 32 | 33 | return nameEndIndex 34 | } 35 | 36 | public var startIndex: String.Index { 37 | return nameStartIndex 38 | } 39 | 40 | public init(nameStartIndex: String.Index, 41 | nameEndIndex: String.Index, 42 | valueStartIndex: String.Index?, 43 | valueEndIndex: String.Index?, 44 | valueStartIndexWithQuotes: String.Index?, 45 | valueEndIndexWithQuotes: String.Index?, 46 | name: String, 47 | value: String?) { 48 | self.nameStartIndex = nameStartIndex 49 | self.nameEndIndex = nameEndIndex 50 | 51 | self.valueStartIndex = valueStartIndex 52 | self.valueEndIndex = valueEndIndex 53 | 54 | self.valueStartIndexWithQuotes = valueStartIndexWithQuotes 55 | self.valueEndIndexWithQuotes = valueEndIndexWithQuotes 56 | 57 | self.name = name 58 | self.value = value 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/Models/Nodes/CData.swift: -------------------------------------------------------------------------------- 1 | // 2 | // CDATA.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-13. 6 | // 7 | 8 | import Foundation 9 | 10 | public struct CData: Node { 11 | public let nodeType = NodeType.CDATASection 12 | public var startIndex: String.Index 13 | public var endIndex: String.Index 14 | 15 | var textStartIndex: String.Index 16 | var textEndIndex: String.Index 17 | 18 | public var text: String 19 | 20 | public init (startIndex: String.Index, endIndex: String.Index, textStartIndex: String.Index, textEndIndex: String.Index, text: String) { 21 | self.startIndex = startIndex 22 | self.endIndex = endIndex 23 | self.textStartIndex = textStartIndex 24 | self.textEndIndex = textEndIndex 25 | 26 | self.text = text 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/Models/Nodes/Comment.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Comment.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-08. 6 | // 7 | 8 | import Foundation 9 | 10 | public struct Comment: Node { 11 | public let nodeType = NodeType.comment 12 | public var startIndex: String.Index 13 | public var endIndex: String.Index 14 | 15 | var textStartIndex: String.Index 16 | var textEndIndex: String.Index 17 | 18 | public var text: String 19 | 20 | init (startIndex: String.Index, endIndex: String.Index, textStartIndex: String.Index, textEndIndex: String.Index, text: String) { 21 | self.startIndex = startIndex 22 | self.endIndex = endIndex 23 | self.textStartIndex = textStartIndex 24 | self.textEndIndex = textEndIndex 25 | 26 | self.text = text 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/Models/Nodes/DocumentTypeNode.swift: -------------------------------------------------------------------------------- 1 | // 2 | // DocumentTypeNode.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-09-07. 6 | // 7 | 8 | import Foundation 9 | 10 | struct DocumentTypeNode: Node { 11 | public let nodeType = NodeType.documentType 12 | public var startIndex: String.Index 13 | public var endIndex: String.Index 14 | public var name: String 15 | } 16 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/Models/Nodes/Element.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ElementF.swift 3 | // HTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-02-13. 6 | // Copyright © 2018 Reid Nantes. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public struct Element: Node { 12 | public let nodeType = NodeType.element 13 | var openingTag: Tag 14 | var closingTag: Tag? 15 | 16 | public var childNodes: [Node] 17 | 18 | // index information 19 | public var depth: Int 20 | 21 | public var startIndex: String.Index { 22 | get { 23 | return openingTag.startIndex 24 | } 25 | } 26 | public var endIndex: String.Index { 27 | get { 28 | if closingTag != nil { 29 | return closingTag!.endIndex 30 | } else { 31 | return openingTag.endIndex 32 | } 33 | } 34 | } 35 | 36 | public var isEmptyElement: Bool { 37 | get { 38 | return openingTag.isEmptyElementTag 39 | } 40 | } 41 | 42 | public var isSelfClosingElement: Bool { 43 | get { 44 | return openingTag.isSelfClosing 45 | } 46 | } 47 | 48 | public var tagName: String { 49 | return openingTag.tagName 50 | } 51 | 52 | public var id: String? { 53 | return openingTag.attributes["id"]?.value 54 | } 55 | 56 | public var classNames: [String] { 57 | return openingTag.classNames 58 | } 59 | 60 | public let commentNodes: [Comment] 61 | // lazy var commentNodes: [Comment] = { 62 | // return childNodes.filter({ $0.nodeType == NodeType.comment }) as! [Comment] 63 | // }() 64 | 65 | public let textNodes: [TextNode] 66 | // lazy var textNodes: [TextNode] = { 67 | // return childNodes.filter({ $0.nodeType == NodeType.text }) as! [TextNode] 68 | // }() 69 | 70 | public let CDATASections: [CData] 71 | // lazy var CDATASections: [CData] = { 72 | // return childNodes.filter({ $0.nodeType == NodeType.CDATASection }) as! [CData] 73 | // }() 74 | 75 | public let childElements: [Element] 76 | // lazy var childElements: [Element] = { 77 | // return childNodes.filter({ $0.nodeType == NodeType.element }) as! [Element] 78 | // }() 79 | 80 | init(openingTag: Tag, closingTag: Tag?, childNodes: [Node], depth: Int) { 81 | self.depth = depth 82 | self.openingTag = openingTag 83 | self.closingTag = closingTag 84 | self.childNodes = childNodes 85 | self.depth = depth 86 | 87 | self.textNodes = childNodes.filter({ $0.nodeType == NodeType.text }) as! [TextNode] 88 | self.CDATASections = childNodes.filter({ $0.nodeType == NodeType.CDATASection }) as! [CData] 89 | self.commentNodes = childNodes.filter({ $0.nodeType == NodeType.comment }) as! [Comment] 90 | self.childElements = childNodes.filter({ $0.nodeType == NodeType.element }) as! [Element] 91 | } 92 | 93 | public func attribute(attributeName: String) -> Attribute? { 94 | return openingTag.attributes[attributeName] 95 | } 96 | 97 | public func attributeValue(for attributeName: String) -> String? { 98 | return openingTag.attributes[attributeName]?.value 99 | } 100 | 101 | func containsAttribute(_ attributeName: String) -> Bool { 102 | if openingTag.attributes[attributeName] != nil { 103 | return true 104 | } else { 105 | return false 106 | } 107 | } 108 | 109 | func innerTextBlocksContains(text: String) -> Bool { 110 | for textNode in textNodes { 111 | if textNode.text.contains(text) { 112 | return true 113 | } 114 | } 115 | 116 | return false 117 | } 118 | 119 | func innerCDataContains(text: String) -> Bool { 120 | for cData in CDATASections { 121 | if cData.text.contains(text) { 122 | return true 123 | } 124 | } 125 | 126 | return false 127 | } 128 | 129 | func commentsContains(text: String) -> Bool { 130 | for comment in commentNodes { 131 | if comment.text.contains(text) { 132 | return true 133 | } 134 | } 135 | 136 | return false 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/Models/Nodes/Node.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Node.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-08. 6 | // 7 | 8 | import Foundation 9 | 10 | public protocol Node { 11 | var nodeType: NodeType { get } 12 | var startIndex: String.Index { get } 13 | var endIndex: String.Index { get } 14 | } 15 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/Models/Nodes/NodeType.swift: -------------------------------------------------------------------------------- 1 | // 2 | // NodeType.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-08. 6 | // 7 | 8 | import Foundation 9 | 10 | public enum NodeType: Int { 11 | case element = 1 12 | case attribute = 2 13 | case text = 3 14 | case CDATASection = 4 15 | case comment = 8 16 | case documentType = 10 17 | } 18 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/Models/Nodes/TextNode.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TextNode.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-08. 6 | // 7 | 8 | import Foundation 9 | 10 | public struct TextNode: Node { 11 | public let nodeType = NodeType.text 12 | public var startIndex: String.Index 13 | public var endIndex: String.Index 14 | public var text: String 15 | 16 | init (startIndex: String.Index, endIndex: String.Index, text: String) { 17 | self.startIndex = startIndex 18 | self.endIndex = endIndex 19 | self.text = text.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/Models/Tag.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Tag.swift 3 | // HTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-02-13. 6 | // Copyright © 2018 Reid Nantes. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | // a closing or opening tag 12 | public struct Tag { 13 | let startIndex: String.Index 14 | let endIndex: String.Index 15 | 16 | fileprivate var _isEmptyElementTag: Bool = false 17 | var isEmptyElementTag: Bool { 18 | return _isEmptyElementTag 19 | } 20 | fileprivate var _isClosingTag: Bool = false 21 | var isClosingTag: Bool { 22 | return _isClosingTag 23 | } 24 | fileprivate var _isSelfClosing: Bool = false 25 | var isSelfClosing: Bool { 26 | return _isSelfClosing 27 | } 28 | 29 | fileprivate var classNamesCache = [String]() 30 | var classNames: [String] { 31 | guard let classAttribute = attributes["class"] else { 32 | return [] 33 | } 34 | 35 | guard let classAttributeValue = classAttribute.value else { 36 | return [] 37 | } 38 | 39 | return getClassNames(classAttributeValue: classAttributeValue) 40 | } 41 | 42 | let tagText: String 43 | 44 | let tagName: String 45 | let attributes: [String: Attribute] 46 | 47 | public init(startIndex: String.Index, endIndex: String.Index, tagText: String, tagName: String) { 48 | self.startIndex = startIndex 49 | self.endIndex = endIndex 50 | 51 | self.tagText = tagText 52 | self.tagName = tagName 53 | 54 | let attributeParser = AttributeParser() 55 | self.attributes = attributeParser.parseAttributes(tagText: tagText, tagName: tagName) 56 | 57 | self._isSelfClosing = checkIsSelfClosing(tagText: tagText) 58 | self._isEmptyElementTag = checkIsEmptyElementTag(tagName: tagName) 59 | self._isClosingTag = checkIsClosingTag() 60 | } 61 | 62 | // checks if empty element 63 | // HTML elements with no content are called empty elements. Empty elements do not have an end tag (ex:
) 64 | func checkIsEmptyElementTag(tagName: String) -> Bool { 65 | let tagNameWithoutSlash = tagName.replacingOccurrences(of: "/", with: "") 66 | 67 | // check if known empty element 68 | if emptyElementTagNames.contains(tagNameWithoutSlash) { 69 | return true 70 | } 71 | 72 | // check if DOCTYPE 73 | if tagName.caseInsensitiveCompare("!DOCTYPE") == ComparisonResult.orderedSame { 74 | return true 75 | } 76 | 77 | return false 78 | } 79 | 80 | // check if tag is self closing, ending with /> 81 | // ex: i.e 82 | func checkIsSelfClosing(tagText: String) -> Bool { 83 | let lastCharacter = tagText[tagText.index(tagText.endIndex, offsetBy: -1)] 84 | let secondLastCharacter = tagText[tagText.index(tagText.endIndex, offsetBy: -2)] 85 | 86 | if lastCharacter == ">" && secondLastCharacter == "/" { 87 | return true 88 | } else { 89 | return false 90 | } 91 | } 92 | 93 | func checkIsClosingTag() -> Bool { 94 | if tagText.prefix(2) == "" { 99 | return true 100 | } 101 | 102 | return false 103 | } 104 | 105 | func getDescription() -> String { 106 | var description = "" 107 | description = description + "tagText: \(tagText)\n" 108 | description = description + "tagText.count: \(tagText.count)\n" 109 | description = description + "tag.startIndex: \(startIndex.utf16Offset(in: tagText))\n" 110 | description = description + "tag.endIndex: \(endIndex.utf16Offset(in: tagText))\n" 111 | 112 | return description 113 | } 114 | 115 | func getClassNames(classAttributeValue: String) -> [String] { 116 | // (?=\s*) -> 0 or more whitespaces, but dont capture 117 | // [\w\d]+ -> 1 or more non-whitespace characters 118 | let classNameRegexPattern = "(?=\\s*)[^\\n\\r\\s]+(?=\\s*)" 119 | 120 | let regexHelper = RegexHelper() 121 | return regexHelper.matches(for: classNameRegexPattern, inString: classAttributeValue) 122 | } 123 | 124 | } 125 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/ParseError.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ParseError.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-05. 6 | // 7 | 8 | import Foundation 9 | 10 | enum ParseError: Error { 11 | case tagNotFound 12 | case tagNameNotFound 13 | case invalidTag 14 | case openingTagNotFound 15 | case canNotFindClosingTagWithoutAnyOpenedTags 16 | case closingTagNotFound(String) 17 | case attributeNotFound 18 | case closingTagNameDoesNotMatchOpeningTagName(erroredTag: Tag) 19 | case endOfFileReachedBeforeClosingTagFound 20 | case endOfFileReachedBeforeScriptClosingTagFound 21 | case endOfFileReachedBeforeCommentCloseFound 22 | case endOfFileReachedBeforeCDATACloseFound 23 | case invalidCDATA 24 | } 25 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/ParseFormat.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ParseFormat.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-13. 6 | // 7 | 8 | import Foundation 9 | 10 | public enum ParseFormat { 11 | case html 12 | case xml 13 | case svg 14 | } 15 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/ScriptParser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ScriptParser.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-09. 6 | // 7 | 8 | import Foundation 9 | 10 | struct ScriptParser { 11 | 12 | enum ScriptParseState { 13 | case notWithinQuotesOrComment 14 | case withinDoubleQuotes 15 | case withinSingleQuotes 16 | case withinMultiLineComment 17 | case withinSingleLineComment 18 | } 19 | 20 | struct ScriptSpecificCharacters { 21 | let scriptEndTag = "" 22 | 23 | // strings 24 | let multiLineCommentOpening = "/*" 25 | let multiLineCommentClosing = "*/" 26 | let SingleLineCommentOpening = "//" 27 | 28 | let escapedBackslash = "\\\\" // i.e \\ 29 | let escapedDoubleQuote = "\\\"" // i.e \" 30 | let escapedSingleQuote = "\\'" // i.e \' 31 | 32 | // characters 33 | let doubleQuote: Character = "\"" // i.e " 34 | let singleQuote: Character = "'" // i.e ' 35 | let newline: Character = "\n" 36 | } 37 | 38 | fileprivate let lookaheadValidator = LookaheadValidator() 39 | 40 | // not intended to fully parse javascript, rather save it to inner text 41 | func parseScript(source: String, currentIndex: String.Index) throws -> (innerTextBlock: TextNode, closingScriptTag: Tag) { 42 | var localCurrentIndex = currentIndex 43 | var parseState = ScriptParseState.notWithinQuotesOrComment 44 | //var isTagOpened = false 45 | 46 | //var tagStartIndex: String.Index? = nil 47 | let specificCharacters = ScriptSpecificCharacters() 48 | 49 | while localCurrentIndex < source.endIndex { 50 | switch parseState { 51 | case .notWithinQuotesOrComment: 52 | if lookaheadValidator.isValidLookahead(for: source, atIndex: localCurrentIndex, checkFor: specificCharacters.scriptEndTag) { 53 | let tagStartIndex = localCurrentIndex 54 | let tagEndIndex = source.index(localCurrentIndex, offsetBy: 8) 55 | 56 | // create tagText string from indexes 57 | let tagText = String(source[tagStartIndex...tagEndIndex]) 58 | let tagName = "/script" 59 | 60 | //define innerTextBlock 61 | let textBlockStartIndex = currentIndex 62 | let textBlockEndIndex = source.index(tagStartIndex, offsetBy: -1) 63 | var textBlockString = "" 64 | // create string if text block is not an empty string - i.e 65 | if (source.distance(from: textBlockStartIndex, to: textBlockEndIndex) > 0) { 66 | textBlockString = String(source[textBlockStartIndex...textBlockEndIndex]) 67 | } 68 | 69 | let innerTextBlock = TextNode.init(startIndex: textBlockStartIndex, 70 | endIndex: textBlockEndIndex, 71 | text: textBlockString) 72 | 73 | let tag = Tag.init(startIndex: tagStartIndex, endIndex: tagEndIndex, tagText: tagText, tagName: tagName) 74 | return (innerTextBlock, tag) 75 | } 76 | // look for quotes and comments 77 | if source[localCurrentIndex] == specificCharacters.doubleQuote { 78 | parseState = .withinDoubleQuotes 79 | } else if source[localCurrentIndex] == specificCharacters.singleQuote { 80 | parseState = .withinSingleQuotes 81 | } else if lookaheadValidator.isValidLookahead(for: source, atIndex: localCurrentIndex, 82 | checkFor: specificCharacters.multiLineCommentOpening) { 83 | parseState = .withinMultiLineComment 84 | } else if lookaheadValidator.isValidLookahead(for: source, atIndex: localCurrentIndex, 85 | checkFor: specificCharacters.SingleLineCommentOpening) { 86 | parseState = .withinSingleLineComment 87 | } 88 | case .withinDoubleQuotes: 89 | if lookaheadValidator.isValidLookahead(for: source, atIndex: localCurrentIndex, 90 | checkFor: specificCharacters.escapedBackslash) { 91 | // is escaped backslash 92 | localCurrentIndex = source.index(localCurrentIndex, offsetBy: 1) 93 | } else if lookaheadValidator.isValidLookahead(for: source, atIndex: localCurrentIndex, 94 | checkFor: specificCharacters.escapedDoubleQuote) { 95 | // is double quote escape character - increment localCurrentIndex past it 96 | localCurrentIndex = source.index(localCurrentIndex, offsetBy: 1) 97 | } else if source[localCurrentIndex] == specificCharacters.doubleQuote { 98 | parseState = .notWithinQuotesOrComment 99 | } 100 | case .withinSingleQuotes: 101 | if lookaheadValidator.isValidLookahead(for: source, atIndex: localCurrentIndex, 102 | checkFor: specificCharacters.escapedBackslash) { 103 | // is escaped backslash 104 | localCurrentIndex = source.index(localCurrentIndex, offsetBy: 1) 105 | } else if lookaheadValidator.isValidLookahead(for: source, atIndex: localCurrentIndex, 106 | checkFor: specificCharacters.escapedSingleQuote) { 107 | // is single quoute escape character - increment localCurrentIndex past it 108 | localCurrentIndex = source.index(localCurrentIndex, offsetBy: 1) 109 | } else if source[localCurrentIndex] == specificCharacters.singleQuote { 110 | parseState = .notWithinQuotesOrComment 111 | } 112 | case .withinMultiLineComment: 113 | if lookaheadValidator.isValidLookahead(for: source, atIndex: localCurrentIndex, 114 | checkFor: specificCharacters.multiLineCommentClosing) { 115 | parseState = .notWithinQuotesOrComment 116 | } 117 | case .withinSingleLineComment: 118 | if source[localCurrentIndex] == specificCharacters.newline { 119 | parseState = .notWithinQuotesOrComment 120 | } 121 | } 122 | 123 | //print(localCurrentIndex.encodedOffset) 124 | 125 | // if localCurrentIndex.encodedOffset % 100 == 0 { 126 | // print(localCurrentIndex.encodedOffset) 127 | // } 128 | 129 | // increment localCurrentIndex (go to next character in string) 130 | localCurrentIndex = source.index(localCurrentIndex, offsetBy: 1) 131 | } 132 | 133 | // throw error if a tag not found before end of file reached 134 | throw ParseError.endOfFileReachedBeforeScriptClosingTagFound 135 | } 136 | 137 | func checkIfScriptClosingTag() { 138 | 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/TagParser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TagParser.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-04. 6 | // 7 | 8 | import Foundation 9 | 10 | enum TagParserState { 11 | case notWithinQuotesOrComment 12 | case withinDoubleQuotes 13 | case withinSingleQuotes 14 | } 15 | 16 | enum TagOpeningType { 17 | case element 18 | case CDATA 19 | case declaration 20 | case comment 21 | } 22 | 23 | struct TagSpecificCharacters { 24 | // characters 25 | let tagOpeningCharacter: Character = "<" 26 | let tagClosingCharacter: Character = ">" 27 | let doubleQuote: Character = "\"" // i.e " 28 | let singleQuote: Character = "'" // i.e ' 29 | let space: Character = " " 30 | let equalSign: Character = "=" 31 | 32 | // strings 33 | let declarationOpening = "" 36 | let conditionalCommentOpening = "" 38 | let CDATAOpening = "" 40 | 41 | // array 42 | 43 | } 44 | 45 | struct TagParser { 46 | fileprivate let commentParser = CommentParser() 47 | fileprivate let cdataParser = CDATAParser() 48 | fileprivate let lookaheadValidator = LookaheadValidator() 49 | fileprivate let specificCharacters = TagSpecificCharacters() 50 | fileprivate let isPoorlyFormattedCommentsAllowed: Bool = true 51 | 52 | func getNextTag(source: String, currentIndex: String.Index) throws -> (childNodes: [Node], tag: Tag?) { 53 | var isTagOpened = false 54 | var localCurrentIndex = currentIndex 55 | var tagStartIndex: String.Index? 56 | 57 | var childNodes = [Node]() 58 | var parseState = TagParserState.notWithinQuotesOrComment 59 | 60 | // iterate through string indices until tag is found or end of string 61 | while source.encompassesIndex(localCurrentIndex) { 62 | 63 | if isTagOpened == false { 64 | if parseState == .notWithinQuotesOrComment { 65 | if let tagOpeningType = resolveTagOpeningType(source: source, index: 66 | localCurrentIndex) { 67 | 68 | // set inner text block 69 | if (currentIndex != localCurrentIndex) { 70 | var textBlockStartIndex = currentIndex 71 | 72 | // changed 73 | if let lastChildNode = childNodes.last { 74 | textBlockStartIndex = source.index(lastChildNode.endIndex, offsetBy: 1) 75 | } 76 | 77 | let textBlockEndIndex = source.index(localCurrentIndex, offsetBy: -1) 78 | 79 | // if tags or comments are right beside each other dont add text block i.e 80 | if textBlockStartIndex <= textBlockEndIndex { 81 | let textBlockText = String(source[textBlockStartIndex...textBlockEndIndex]) 82 | if (textBlockText.isEmptyOrWhitespace() == false) { 83 | let innerTextBlock = TextNode.init(startIndex: textBlockStartIndex, 84 | endIndex: textBlockEndIndex, 85 | text: textBlockText) 86 | childNodes.append(innerTextBlock) 87 | } 88 | } 89 | } 90 | 91 | switch tagOpeningType { 92 | case .element: 93 | isTagOpened = true 94 | tagStartIndex = localCurrentIndex 95 | case .comment: 96 | do { 97 | let comment = try commentParser.parseComment(source: source, 98 | currentIndex: localCurrentIndex, 99 | commentType: .comment) 100 | localCurrentIndex = comment.endIndex 101 | childNodes.append(comment) 102 | } catch { 103 | throw ParseError.endOfFileReachedBeforeCommentCloseFound 104 | } 105 | case .declaration: 106 | do { 107 | let comment = try commentParser.parseComment(source: source, 108 | currentIndex: localCurrentIndex, 109 | commentType: .declaration) 110 | localCurrentIndex = comment.endIndex 111 | childNodes.append(comment) 112 | } catch { 113 | throw ParseError.endOfFileReachedBeforeCommentCloseFound 114 | } 115 | case .CDATA: 116 | // is CDATA 117 | do { 118 | let cdata = try cdataParser.parse(source: source, currentIndex: localCurrentIndex) 119 | localCurrentIndex = cdata.endIndex 120 | childNodes.append(cdata) 121 | } catch { 122 | throw ParseError.endOfFileReachedBeforeCommentCloseFound 123 | } 124 | } 125 | } 126 | } 127 | } else { 128 | switch parseState { 129 | case .notWithinQuotesOrComment: 130 | if source[localCurrentIndex] == specificCharacters.tagClosingCharacter { 131 | // tag is closed 132 | do { 133 | let tag = try foundTag(source: source, tagStartIndex: tagStartIndex!, tagEndIndex: localCurrentIndex) 134 | return (childNodes, tag) 135 | } catch { 136 | throw error 137 | } 138 | } 139 | if source[localCurrentIndex] == specificCharacters.doubleQuote { 140 | parseState = .withinDoubleQuotes 141 | } else if source[localCurrentIndex] == specificCharacters.singleQuote { 142 | parseState = .withinSingleQuotes 143 | } 144 | case .withinDoubleQuotes: 145 | if source[localCurrentIndex] == specificCharacters.doubleQuote { 146 | parseState = .notWithinQuotesOrComment 147 | } 148 | case .withinSingleQuotes: 149 | if source[localCurrentIndex] == specificCharacters.singleQuote { 150 | parseState = .notWithinQuotesOrComment 151 | } 152 | } 153 | } 154 | 155 | // increment localCurrentIndex 156 | localCurrentIndex = source.index(localCurrentIndex, offsetBy: 1) 157 | 158 | // if source.encompassesIndex(localCurrentIndex) { 159 | // print("localCurrentIndex: \(localCurrentIndex)") 160 | // print(source[localCurrentIndex]) 161 | // } 162 | } 163 | 164 | // a tag not found before end of file reached 165 | return (childNodes, nil) 166 | } 167 | 168 | func resolveTagOpeningType(source: String, index: String.Index) -> TagOpeningType? { 169 | if source[index] == specificCharacters.tagOpeningCharacter { 170 | if lookaheadValidator.isValidLookahead(for: source, atIndex: index, checkFor: specificCharacters.declarationOpening) { 171 | // check if comment opening 172 | if lookaheadValidator.isValidLookahead(for: source, atIndex: index, checkFor: specificCharacters.commentOpening) { 173 | return TagOpeningType.comment 174 | } else if lookaheadValidator.isValidLookahead(for: source, atIndex: index, checkFor: specificCharacters.CDATAOpening) { 175 | return TagOpeningType.CDATA 176 | } 177 | return TagOpeningType.declaration 178 | } 179 | return TagOpeningType.element 180 | } 181 | return nil 182 | } 183 | 184 | /// produces a `tag` from the found tag text, parsing attributes etc 185 | func foundTag(source: String, tagStartIndex: String.Index, tagEndIndex: String.Index) throws -> Tag { 186 | // create tagText string from indexes 187 | let tagText = String(source[tagStartIndex...tagEndIndex]) 188 | 189 | // get tagName from tagText 190 | let tagNameResult: String? 191 | do { 192 | tagNameResult = try parseTagName(tagText: tagText) 193 | } catch { 194 | throw ParseError.tagNameNotFound 195 | } 196 | guard let tagName = tagNameResult else { 197 | throw ParseError.tagNameNotFound 198 | } 199 | 200 | // create the tag 201 | return Tag.init(startIndex: tagStartIndex, endIndex: tagEndIndex, tagText: tagText, tagName: tagName) 202 | } 203 | 204 | func parseTagName(tagText: String) throws -> String { 205 | var currentIndex = tagText.startIndex 206 | let endIndex = tagText.endIndex 207 | 208 | var startTagNameIndex: String.Index? 209 | 210 | 211 | var isFirstCharacterFound = false 212 | while currentIndex < endIndex { 213 | if isFirstCharacterFound == false { 214 | // keep going until you find the first char (ignore < and whitespace) 215 | if tagText[currentIndex] != TagSpecificCharacters().tagOpeningCharacter && tagText[currentIndex].isWhitespace == false { 216 | isFirstCharacterFound = true 217 | // add char to tag 218 | startTagNameIndex = currentIndex 219 | } 220 | } else { 221 | if tagText[currentIndex] == ">" || tagText[currentIndex].isWhitespace { 222 | // dont include last > or whitespace in tagName 223 | let endTagNameIndex = tagText.index(currentIndex, offsetBy: -1) 224 | let tagName = String(tagText[startTagNameIndex!...endTagNameIndex]) 225 | return tagName.trimmingCharacters(in: .whitespacesAndNewlines) 226 | } 227 | } 228 | 229 | currentIndex = tagText.index(currentIndex, offsetBy: 1) 230 | } 231 | 232 | throw ParseError.tagNameNotFound 233 | } 234 | 235 | } 236 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/Tags/KnownHTMLTags.swift: -------------------------------------------------------------------------------- 1 | // 2 | // tags.swift 3 | // HTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-05-26. 6 | // Copyright © 2018 Reid Nantes. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | struct HTMLTag { 12 | let name: String 13 | let isEmpty: Bool 14 | 15 | init(name: String, isEmpty: Bool = false) { 16 | self.name = name 17 | self.isEmpty = isEmpty 18 | } 19 | } 20 | 21 | enum HTMLTagID: String { 22 | case a = "a" 23 | case abbr = "abbr" 24 | case address = "address" 25 | case area = "area" 26 | case article = "article" 27 | case aside = "aside" 28 | case audio = "audio" 29 | case b = "b" 30 | case base = "base" 31 | case bdi = "bdi" 32 | case bdo = "bdo" 33 | case blockquote = "blockquote" 34 | case body = "body" 35 | case br = "br" 36 | case button = "button" 37 | case canvas = "canvas" 38 | case caption = "caption" 39 | case cite = "cite" 40 | case code = "code" 41 | case col = "col" 42 | case colgroup = "colgroup" 43 | case data = "data" 44 | case datalist = "datalist" 45 | case dd = "dd" 46 | case del = "del" 47 | case details = "details" 48 | case dfn = "dfn" 49 | case dialog = "dialog" 50 | case div = "div" 51 | case dl = "dl" 52 | case dt = "dt" 53 | case element = "Element" 54 | case em = "em" 55 | case embed = "embed" 56 | case fieldset = "fieldset" 57 | case figcaption = "figcaption" 58 | case figure = "figure" 59 | case footer = "footer" 60 | case form = "form" 61 | case h1 = "h1" 62 | case h2 = "h2" 63 | case h3 = "h3" 64 | case h4 = "h4" 65 | case h5 = "h5" 66 | case h6 = "h6" 67 | case head = "head" 68 | case header = "header" 69 | case hgroup = "hgroup" 70 | case hr = "hr" 71 | case html = "html" 72 | case i = "i" 73 | case iframe = "iframe" 74 | case img = "img" 75 | case input = "input" 76 | case ins = "ins" 77 | case kbd = "kbd" 78 | case label = "label" 79 | case legend = "legend" 80 | case li = "li" 81 | case link = "link" 82 | case main = "main" 83 | case map = "map" 84 | case mark = "mark" 85 | case mathML = "MathML" 86 | case math = "math " 87 | case menu = "menu" 88 | case meta = "meta" 89 | case meter = "meter" 90 | case nav = "nav" 91 | case noscript = "noscript" 92 | case object = "object" 93 | case ol = "ol" 94 | case optgroup = "optgroup" 95 | case option = "option" 96 | case output = "output" 97 | case p = "p" 98 | case param = "param" 99 | case picture = "picture" 100 | case pre = "pre" 101 | case progress = "progress" 102 | case q = "q" 103 | case rp = "rp" 104 | case rt = "rt" 105 | case ruby = "ruby" 106 | case s = "s" 107 | case samp = "samp" 108 | case script = "script" 109 | case section = "section" 110 | case select = "select" 111 | case slot = "slot" 112 | case small = "small" 113 | case source = "source" 114 | case span = "span" 115 | case strong = "strong" 116 | case style = "style" 117 | case sub = "sub" 118 | case summary = "summary" 119 | case sup = "sup" 120 | case svg = "svg" 121 | case table = "table" 122 | case tbody = "tbody" 123 | case td = "td" 124 | case template = "template" 125 | case textarea = "textarea" 126 | case tfoot = "tfoot" 127 | case th = "th" 128 | case thead = "thead" 129 | case time = "time" 130 | case title = "title" 131 | case tr = "tr" 132 | case track = "track" 133 | case u = "u" 134 | case ul = "ul" 135 | case `var` = "var" 136 | case video = "video" 137 | case wbr = "wbr" 138 | } 139 | 140 | let htmlTags: [HTMLTagID: HTMLTag] = [ 141 | .a: HTMLTag.init(name: "a"), 142 | .abbr: HTMLTag.init(name: "abbr"), 143 | .address: HTMLTag.init(name: "address"), 144 | .area: HTMLTag.init(name: "area", isEmpty: true), 145 | .article: HTMLTag.init(name: "article"), 146 | .aside: HTMLTag.init(name: "aside"), 147 | .audio: HTMLTag.init(name: "audio"), 148 | .b: HTMLTag.init(name: "b"), 149 | .base: HTMLTag.init(name: "base", isEmpty: true), 150 | .bdi: HTMLTag.init(name: "bdi"), 151 | .bdo: HTMLTag.init(name: "bdo"), 152 | .blockquote: HTMLTag.init(name: "blockquote"), 153 | .body: HTMLTag.init(name: "body"), 154 | .br: HTMLTag.init(name: "br", isEmpty: true), 155 | .button: HTMLTag.init(name: "button"), 156 | .canvas: HTMLTag.init(name: "canvas"), 157 | .caption: HTMLTag.init(name: "caption"), 158 | .cite: HTMLTag.init(name: "cite"), 159 | .code: HTMLTag.init(name: "code"), 160 | .col: HTMLTag.init(name: "col", isEmpty: true), 161 | .colgroup: HTMLTag.init(name: "colgroup"), 162 | .data: HTMLTag.init(name: "data"), 163 | .datalist: HTMLTag.init(name: "datalist"), 164 | .dd: HTMLTag.init(name: "dd"), 165 | .del: HTMLTag.init(name: "del"), 166 | .details: HTMLTag.init(name: "details"), 167 | .dfn: HTMLTag.init(name: "dfn"), 168 | .dialog: HTMLTag.init(name: "dialog"), 169 | .div: HTMLTag.init(name: "div"), 170 | .dl: HTMLTag.init(name: "dl"), 171 | .dt: HTMLTag.init(name: "dt"), 172 | .element: HTMLTag.init(name: "Element"), 173 | .em: HTMLTag.init(name: "em"), 174 | .embed: HTMLTag.init(name: "embed", isEmpty: true), 175 | .fieldset: HTMLTag.init(name: "fieldset"), 176 | .figcaption: HTMLTag.init(name: "figcaption"), 177 | .figure: HTMLTag.init(name: "figure"), 178 | .footer: HTMLTag.init(name: "footer"), 179 | .form: HTMLTag.init(name: "form"), 180 | .h1: HTMLTag.init(name: "h1"), 181 | .h2: HTMLTag.init(name: "h2"), 182 | .h3: HTMLTag.init(name: "h3"), 183 | .h4: HTMLTag.init(name: "h4"), 184 | .h5: HTMLTag.init(name: "h5"), 185 | .h6: HTMLTag.init(name: "h6"), 186 | .head: HTMLTag.init(name: "head"), 187 | .header: HTMLTag.init(name: "header"), 188 | .hgroup: HTMLTag.init(name: "hgroup"), 189 | .hr: HTMLTag.init(name: "hr", isEmpty: true), 190 | .html: HTMLTag.init(name: "html"), 191 | .i: HTMLTag.init(name: "i"), 192 | .iframe: HTMLTag.init(name: "iframe", isEmpty: true), 193 | .img: HTMLTag.init(name: "img", isEmpty: true), 194 | .input: HTMLTag.init(name: "input", isEmpty: true), 195 | .ins: HTMLTag.init(name: "ins"), 196 | .kbd: HTMLTag.init(name: "kbd"), 197 | .label: HTMLTag.init(name: "label"), 198 | .legend: HTMLTag.init(name: "legend"), 199 | .li: HTMLTag.init(name: "li"), 200 | .link: HTMLTag.init(name: "link", isEmpty: true), 201 | .main: HTMLTag.init(name: "main"), 202 | .map: HTMLTag.init(name: "map"), 203 | .mark: HTMLTag.init(name: "mark"), 204 | .mathML: HTMLTag.init(name: "MathML"), 205 | .math: HTMLTag.init(name: "math "), 206 | .menu: HTMLTag.init(name: "menu"), 207 | .meta: HTMLTag.init(name: "meta", isEmpty: true), 208 | .meter: HTMLTag.init(name: "meter"), 209 | .nav: HTMLTag.init(name: "nav"), 210 | .noscript: HTMLTag.init(name: "noscript"), 211 | .object: HTMLTag.init(name: "object"), 212 | .ol: HTMLTag.init(name: "ol"), 213 | .optgroup: HTMLTag.init(name: "optgroup"), 214 | .option: HTMLTag.init(name: "option"), 215 | .output: HTMLTag.init(name: "output"), 216 | .p: HTMLTag.init(name: "p"), 217 | .param: HTMLTag.init(name: "param", isEmpty: true), 218 | .picture: HTMLTag.init(name: "picture"), 219 | .pre: HTMLTag.init(name: "pre"), 220 | .progress: HTMLTag.init(name: "progress"), 221 | .q: HTMLTag.init(name: "q"), 222 | .rp: HTMLTag.init(name: "rp"), 223 | .rt: HTMLTag.init(name: "rt"), 224 | .ruby: HTMLTag.init(name: "ruby"), 225 | .s: HTMLTag.init(name: "s"), 226 | .samp: HTMLTag.init(name: "samp"), 227 | .script: HTMLTag.init(name: "script"), 228 | .section: HTMLTag.init(name: "section"), 229 | .select: HTMLTag.init(name: "select"), 230 | .slot: HTMLTag.init(name: "slot"), 231 | .small: HTMLTag.init(name: "small"), 232 | .source: HTMLTag.init(name: "source", isEmpty: true), 233 | .span: HTMLTag.init(name: "span"), 234 | .strong: HTMLTag.init(name: "strong"), 235 | .style: HTMLTag.init(name: "style"), 236 | .sub: HTMLTag.init(name: "sub"), 237 | .summary: HTMLTag.init(name: "summary"), 238 | .sup: HTMLTag.init(name: "sup"), 239 | .svg: HTMLTag.init(name: "svg"), 240 | .table: HTMLTag.init(name: "table"), 241 | .tbody: HTMLTag.init(name: "tbody"), 242 | .td: HTMLTag.init(name: "td"), 243 | .template: HTMLTag.init(name: "template", isEmpty: true), 244 | .textarea: HTMLTag.init(name: "textarea"), 245 | .tfoot: HTMLTag.init(name: "tfoot"), 246 | .th: HTMLTag.init(name: "th"), 247 | .thead: HTMLTag.init(name: "thead"), 248 | .time: HTMLTag.init(name: "time"), 249 | .title: HTMLTag.init(name: "title"), 250 | .tr: HTMLTag.init(name: "tr"), 251 | .track: HTMLTag.init(name: "track", isEmpty: true), 252 | .u: HTMLTag.init(name: "u"), 253 | .ul: HTMLTag.init(name: "ul"), 254 | .var: HTMLTag.init(name: "var"), 255 | .video: HTMLTag.init(name: "video"), 256 | .wbr: HTMLTag.init(name: "wbr", isEmpty: true) 257 | ] 258 | let selfClosingHTMLTags: [HTMLTagID] = [ 259 | .area, 260 | .base, 261 | .br, 262 | .col, 263 | .embed, 264 | .hr, 265 | .iframe, 266 | .img, 267 | .input, 268 | .link, 269 | .meta, 270 | .param, 271 | .source, 272 | .template, 273 | .track, 274 | .wbr 275 | ] 276 | 277 | let allHTMLTagNames = [ 278 | "a", 279 | "abbr", 280 | "address", 281 | "area", 282 | "article", 283 | "aside", 284 | "audio", 285 | "b", 286 | "base", 287 | "bdi", 288 | "bdo", 289 | "blockquote", 290 | "body", 291 | "br", 292 | "button", 293 | "canvas", 294 | "caption", 295 | "cite", 296 | "code", 297 | "col", 298 | "colgroup", 299 | "data", 300 | "datalist", 301 | "dd", 302 | "del", 303 | "details", 304 | "dfn", 305 | "dialog", 306 | "div", 307 | "dl", 308 | "dt", 309 | "em", 310 | "embed", 311 | "fieldset", 312 | "figcaption", 313 | "figure", 314 | "footer", 315 | "form", 316 | "h1,", 317 | "h2,", 318 | "h3,", 319 | "h4,", 320 | "h5,", 321 | "h6", 322 | "head", 323 | "header", 324 | "hgroup", 325 | "hr", 326 | "html", 327 | "i", 328 | "iframe", 329 | "img", 330 | "input", 331 | "ins", 332 | "kbd", 333 | "label", 334 | "legend", 335 | "li", 336 | "link", 337 | "main", 338 | "map", 339 | "mark", 340 | "MathMLmath", 341 | "menu", 342 | "meta", 343 | "meter", 344 | "nav", 345 | "noscript", 346 | "object", 347 | "ol", 348 | "optgroup", 349 | "option", 350 | "output", 351 | "p", 352 | "param", 353 | "picture", 354 | "pre", 355 | "progress", 356 | "q", 357 | "rp", 358 | "rt", 359 | "ruby", 360 | "s", 361 | "samp", 362 | "script", 363 | "section", 364 | "select", 365 | "slot", 366 | "small", 367 | "source", 368 | "span", 369 | "strong", 370 | "style", 371 | "sub", 372 | "summary", 373 | "sup", 374 | "SVG", 375 | "svg", 376 | "table", 377 | "tbody", 378 | "td", 379 | "template", 380 | "textarea", 381 | "tfoot", 382 | "th", 383 | "thead", 384 | "time", 385 | "title", 386 | "tr", 387 | "track", 388 | "u", 389 | "ul", 390 | "var", 391 | "video", 392 | "wbr" 393 | ] 394 | 395 | // elements with no end tag 396 | // reference: https://developer.mozilla.org/en-US/docs/Glossary/Empty_element 397 | let emptyElementTagNames = [ 398 | "area", 399 | "base", 400 | "br", 401 | "col", 402 | "embed", 403 | "hr", 404 | "iframe", 405 | "img", 406 | "input", 407 | "link", 408 | "meta", 409 | "param", 410 | "source", 411 | "template", 412 | "track", 413 | "wbr" 414 | ] 415 | 416 | let ignoredTags: [String] = [ 417 | "svg", 418 | "script" 419 | ] 420 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/Tags/SVGTags.swift: -------------------------------------------------------------------------------- 1 | // 2 | // SVGTags.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-12. 6 | // 7 | 8 | import Foundation 9 | 10 | enum SVGTagNames: String { 11 | case a 12 | case altGlyph 13 | case altGlyphDef 14 | case altGlyphItem 15 | case animate 16 | case animateColor 17 | case animateMotion 18 | case animateTransform 19 | case circle 20 | case clipPath 21 | case colorProfile = "color-profile" 22 | case cursor 23 | case defs 24 | case desc 25 | case ellipse 26 | case feBlend 27 | case feColorMatrix 28 | case feComponentTransfer 29 | case feComposite 30 | case feConvolveMatrix 31 | case feDiffuseLighting 32 | case feDisplacementMap 33 | case feDistantLight 34 | case feFlood 35 | case feFuncA 36 | case feFuncB 37 | case feFuncG 38 | case feFuncR 39 | case feGaussianBlur 40 | case feImage 41 | case feMerge 42 | case feMergeNode 43 | case feMorphology 44 | case feOffset 45 | case fePointLight 46 | case feSpecularLighting 47 | case feSpotLight 48 | case feTile 49 | case feTurbulence 50 | case filter 51 | case font 52 | case fontFace = "font-face" 53 | case fontFaceFormat = "font-face-format" 54 | case fontFaceName = "font-face-name" 55 | case fontFaceSRC = "font-face-src" 56 | case fontFaceURI = "font-face-uri" 57 | case foreignObject 58 | case g 59 | case glyph 60 | case glyphRef 61 | case hkern 62 | case image 63 | case line 64 | case linearGradient 65 | case marker 66 | case mask 67 | case metadata 68 | case missingGlyph = "missing-glyph" 69 | case mpath 70 | case path 71 | case pattern 72 | case polygon 73 | case polyline 74 | case radialGradient 75 | case rect 76 | case script 77 | case set 78 | case stop 79 | case style 80 | case svg 81 | case switchTag = "switch" 82 | case symbol 83 | case text 84 | case textPath 85 | case title 86 | case tref 87 | case tspan 88 | case use 89 | case view 90 | case vkern 91 | } 92 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Parser/Tags/XMLTags.swift: -------------------------------------------------------------------------------- 1 | // 2 | // XMLTags.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-13. 6 | // 7 | 8 | import Foundation 9 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/ProjectConfig.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TestConfig.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-04. 6 | // 7 | 8 | import Foundation 9 | 10 | struct ProjectConfig { 11 | 12 | // for Debugging 13 | static let shouldPrintTags = false 14 | static let shouldPrintWarnings = false 15 | } 16 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/HTMLTraverser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HTMLTraverser.swift 3 | // 4 | // Created by Reid Nantes on 2018-05-27. 5 | // Copyright © 2018 Reid Nantes. All rights reserved. 6 | // 7 | 8 | import Foundation 9 | 10 | public struct HTMLTraverser { 11 | 12 | public static func hasMatchingNode(in parsedNodes: [Node], matching nodeSelctorPath: [NodeSelector]) -> Bool { 13 | if findNodes(in: parsedNodes, matching: nodeSelctorPath).count > 0 { 14 | return true 15 | } else { 16 | return false 17 | } 18 | } 19 | 20 | public static func findElements(in parsedNodes: [Node], matching nodeSelectorPath: [NodeSelector]) -> [Element] { 21 | let nodes = findNodes(in: parsedNodes, matching: nodeSelectorPath) 22 | return nodes.compactMap({ $0 as? Element }) 23 | } 24 | 25 | public static func findNodes(in parsedNodes: [Node], matching nodeSelectorPath: [NodeSelector]) -> [Node] { 26 | // start with every element matching 27 | var matchingNodes = parsedNodes 28 | var selectorPathIndex = 0 29 | var matchedSelectors = [NodeSelector]() 30 | // var unmatchedSelector: NodeSelector? = nil 31 | 32 | while selectorPathIndex < nodeSelectorPath.count && matchingNodes.count > 0 { 33 | var shouldReturnChildrenOfMatches = true 34 | // if not the last selectorNode get the children 35 | if selectorPathIndex == nodeSelectorPath.count - 1 { 36 | shouldReturnChildrenOfMatches = false 37 | } 38 | 39 | let currentSelector = nodeSelectorPath[selectorPathIndex] 40 | matchingNodes = getMatchesAtDepth(nodeSelector: currentSelector, 41 | nodesAtDepth: matchingNodes, 42 | shouldReturnChildrenOfMatches: shouldReturnChildrenOfMatches) 43 | 44 | // if matched add currentSelector to list of matchedSelectors 45 | if (matchingNodes.count > 0) { 46 | matchedSelectors.append(currentSelector) 47 | } else { 48 | // if not matched set unmatchedSelector 49 | // TODO: return result or throw error with this result? 50 | //unmatchedSelector = currentSelector 51 | } 52 | 53 | selectorPathIndex += 1 54 | } 55 | 56 | return matchingNodes 57 | } 58 | 59 | private static func getMatchesAtDepth(nodeSelector: NodeSelector, nodesAtDepth: [Node], shouldReturnChildrenOfMatches: Bool) -> [Node] { 60 | var matchesAtDepth = [Node]() 61 | 62 | var currentPosition = 0 63 | 64 | for node in nodesAtDepth { 65 | if compare(nodeSelector: nodeSelector, node: node) == true { 66 | if nodeSelector.position.testAgainst(currentPosition) { 67 | if shouldReturnChildrenOfMatches == true { 68 | if let element = node as? Element { 69 | matchesAtDepth.append(contentsOf: element.childNodes) 70 | } 71 | } else { 72 | matchesAtDepth.append(node) 73 | } 74 | } 75 | currentPosition += 1 76 | } 77 | } 78 | 79 | return matchesAtDepth 80 | } 81 | 82 | private static func compare(nodeSelector: NodeSelector, node: Node) -> Bool { 83 | if nodeSelector.testAgainst(node) == false { 84 | return false 85 | } 86 | 87 | return true 88 | } 89 | 90 | } 91 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/Selectors/AttributeSelector.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AttributeSelector.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-14. 6 | // 7 | 8 | import Foundation 9 | 10 | public class AttributeSelector: ValueStringSelectorBuilder { 11 | private(set) public var name: String 12 | private(set) public var value = StringSelector() 13 | 14 | public init(name: String) { 15 | self.name = name 16 | } 17 | 18 | public func withValue(_ value: String) -> Self { 19 | self.value.withString(value) 20 | return self 21 | } 22 | 23 | /// returns true if the element satisfies the selector 24 | public func testSelector(against element: Element) -> Bool { 25 | let attributeValue = element.attributeValue(for: self.name) 26 | 27 | if value.testAgainst(attributeValue) == false { 28 | return false 29 | } 30 | 31 | return true 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/Selectors/ClassSelector.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ClassSelector.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-10-22. 6 | // 7 | 8 | import Foundation 9 | 10 | extension ElementSelector { 11 | /// Matches if the target matches the given className 12 | public func withClassName(_ className: String) -> Self { 13 | self.classNameSelector.hasClassNameAny.appendOrInit(className) 14 | return self 15 | } 16 | 17 | /// Matches if the target matches any of the given classNames 18 | public func withClassNamesAny(_ classNames: [String]) -> Self { 19 | self.classNameSelector.hasClassNameAny.appendOrInit(contentsOf: classNames) 20 | return self 21 | } 22 | 23 | /// Matches if the target matches all of the given classNames 24 | public func withClassNamesAll(_ classNames: [String]) -> Self { 25 | self.classNameSelector.hasClassNamesAll.appendOrInit(contentsOf: classNames) 26 | return self 27 | } 28 | 29 | /// Matches if the target has the exact of the given classNames. 30 | public func withClassNamesExact(_ classNames: [String]) -> Self { 31 | self.classNameSelector.hasClassNamesExact.appendOrInit(contentsOf: classNames) 32 | return self 33 | } 34 | 35 | // negatives 36 | /// Does not match if the node has the className 37 | public func withoutClassName(_ className : String) -> Self { 38 | self.classNameSelector.doesNotHaveClassNameAny.appendOrInit(className) 39 | return self 40 | } 41 | 42 | /// Does not match if any of the given classNames are present 43 | public func withoutClassNameAny(_ classNames : [String]) -> Self { 44 | self.classNameSelector.doesNotHaveClassNameAny.appendOrInit(contentsOf: classNames) 45 | return self 46 | } 47 | 48 | /// Does not match if all of the given classNames are present 49 | public func withoutClassNameAll(_ classNames : [String]) -> Self { 50 | self.classNameSelector.doesNotHaveClassNamesAll.appendOrInit(contentsOf: classNames) 51 | return self 52 | } 53 | 54 | /// Does not match if the element has the exact classNames 55 | public func withoutClassNameExact(_ classNames : [String]) -> Self { 56 | self.classNameSelector.doesNotHaveClassNamesExact.appendOrInit(contentsOf: classNames) 57 | return self 58 | } 59 | 60 | } 61 | 62 | internal final class ClassSelector { 63 | var hasClassNameAny: [String]? 64 | var hasClassNamesAll: [String]? 65 | var hasClassNamesExact: [String]? 66 | 67 | // negatives 68 | // does not match if any of the given classNames are present 69 | var doesNotHaveClassNameAny: [String]? 70 | // does not match if all of the given classNames are present 71 | var doesNotHaveClassNamesAll: [String]? 72 | // does not match if the element has the exact classNames 73 | var doesNotHaveClassNamesExact: [String]? 74 | 75 | 76 | /// returns true if the element satisfies the selector 77 | internal func testAgainst(_ element: Element) -> Bool { 78 | let classNamesSet = Set(element.classNames) 79 | 80 | if let hasClassNameAny = hasClassNameAny { 81 | if hasClassNameAny.contains(where: { classNamesSet.contains($0) }) == false { 82 | return false 83 | } 84 | } 85 | 86 | if let hasClassNamesAll = hasClassNamesAll { 87 | if hasClassNamesAll.allSatisfy({ classNamesSet.contains($0) }) == false { 88 | return false 89 | } 90 | } 91 | 92 | if let hasClassNamesExact = hasClassNamesExact { 93 | if hasClassNamesExact.allSatisfy({ classNamesSet.contains($0) }) == false || hasClassNamesExact.count != classNamesSet.count { 94 | return false 95 | } 96 | } 97 | 98 | if let doesNotHaveClassNameAny = doesNotHaveClassNameAny { 99 | if doesNotHaveClassNameAny.contains(where: { classNamesSet.contains($0) }) == true { 100 | return false 101 | } 102 | } 103 | 104 | if let doesNotHaveClassNamesAll = doesNotHaveClassNamesAll { 105 | if doesNotHaveClassNamesAll.allSatisfy({ classNamesSet.contains($0) }) == true { 106 | return false 107 | } 108 | } 109 | 110 | if let doesNotHaveClassNamesExact = doesNotHaveClassNamesExact { 111 | if doesNotHaveClassNamesExact.allSatisfy({ classNamesSet.contains($0) }) == true && doesNotHaveClassNamesExact.count == classNamesSet.count { 112 | return false 113 | } 114 | } 115 | 116 | return true 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/Selectors/IntSelector.swift: -------------------------------------------------------------------------------- 1 | // 2 | // File.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-11-03. 6 | // 7 | 8 | import Foundation 9 | 10 | public final class IntSelector { 11 | private(set) var anyValues: [Int]? 12 | private(set) var lessThanValues: [Int]? 13 | private(set) var greaterThanValues: [Int]? 14 | 15 | // negatives 16 | private(set) var notAnyValues: [Int]? 17 | } 18 | 19 | internal extension IntSelector { 20 | func withValue(_ value: Int) { 21 | anyValues.appendOrInit(value) 22 | } 23 | 24 | func whenValueIsAny(_ values: [Int]) { 25 | anyValues.appendOrInit(contentsOf: values) 26 | } 27 | 28 | func whenValueIsLessThan(_ value: Int) { 29 | lessThanValues.appendOrInit(value) 30 | } 31 | 32 | func whenValueIsGreaterThan(_ value: Int) { 33 | greaterThanValues.appendOrInit(value) 34 | } 35 | 36 | // negatives 37 | func whenValueIsNot(_ value: Int) { 38 | notAnyValues.appendOrInit(value) 39 | } 40 | 41 | func whenValueIsNotAny(_ values: [Int]) { 42 | notAnyValues.appendOrInit(contentsOf: values) 43 | } 44 | 45 | func testAgainst(_ value: Int?) -> Bool { 46 | guard let value = value else { 47 | if anyValues != nil || lessThanValues != nil || greaterThanValues != nil { 48 | return false 49 | } else { 50 | return true 51 | } 52 | } 53 | 54 | if let anyValues = anyValues { 55 | if anyValues.contains(where: { value == $0 }) == false { 56 | return false 57 | } 58 | } 59 | 60 | if let lessThanValues = lessThanValues { 61 | if lessThanValues.allSatisfy({ value < $0 }) == false { 62 | return false 63 | } 64 | } 65 | 66 | if let greaterThanValues = greaterThanValues { 67 | if greaterThanValues.allSatisfy({ value > $0 }) == false { 68 | return false 69 | } 70 | } 71 | 72 | if let notAnyValues = notAnyValues { 73 | if notAnyValues.allSatisfy({ value != $0 }) == false { 74 | return false 75 | } 76 | } 77 | 78 | return true 79 | } 80 | } 81 | 82 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/Selectors/NodeSelector.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ElementSelector.swift 3 | // HTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-05-27. 6 | // Copyright © 2018 Reid Nantes. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | /// based on Xpath and selector 12 | public protocol NodeSelector: AnyObject, PositionIntSelectorBuilder { 13 | 14 | /// Tests the element against the current Node 15 | func testAgainst(_ node: Node) -> Bool 16 | } 17 | 18 | //extension NodeSelector { 19 | // 20 | //// /// Matches when position is less than the given value 21 | //// func whenPositionLessThan(_ position: Int) -> NodeSelector { 22 | //// 23 | //// } 24 | //// 25 | //// /// Matches when position is greater than the given value 26 | //// func whenPositionGreaterThan(_ position: Int) -> NodeSelector { 27 | //// 28 | //// } 29 | //} 30 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/Selectors/NodeSelectors/CDataSelector.swift: -------------------------------------------------------------------------------- 1 | // 2 | // File.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-10-29. 6 | // 7 | 8 | import Foundation 9 | 10 | public final class CDataSelector: NodeSelector, TextStringSelectorBuilder { 11 | private(set) public var position = IntSelector() 12 | private(set) public var text = StringSelector() 13 | 14 | // public init 15 | public init() {} 16 | 17 | public func testAgainst(_ node: Node) -> Bool { 18 | // return false if node is not an element 19 | guard let cdata = node as? CData else { 20 | return false 21 | } 22 | 23 | if text.testAgainst(cdata.text) == false { 24 | return false 25 | } 26 | 27 | return true 28 | } 29 | 30 | 31 | } 32 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/Selectors/NodeSelectors/CommentSelector.swift: -------------------------------------------------------------------------------- 1 | // 2 | // CommentSelector.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-10-31. 6 | // 7 | 8 | import Foundation 9 | 10 | public final class CommentSelector: NodeSelector, TextStringSelectorBuilder { 11 | private(set) public var position = IntSelector() 12 | private(set) public var text = StringSelector() 13 | 14 | // public init 15 | public init() {} 16 | 17 | /// returns true if the Node = satisfies the selector 18 | public func testAgainst(_ node: Node) -> Bool { 19 | // return false if node is not an CommentNode 20 | guard let comment = node as? Comment else { 21 | return false 22 | } 23 | 24 | if text.testAgainst(comment.text) == false { 25 | return false 26 | } 27 | 28 | return true 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/Selectors/NodeSelectors/ElementSelector.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ElementSelector.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-10-29. 6 | // 7 | 8 | import Foundation 9 | 10 | public class ElementSelector: NodeSelector, TagNameStringSelectorBuilder, IdStringSelectorBuilder { 11 | private(set) public var position = IntSelector() 12 | 13 | // string selector 14 | private(set) public var tagName = StringSelector() 15 | private(set) public var id = StringSelector() 16 | 17 | // className selector 18 | private(set) var classNameSelector = ClassSelector() 19 | 20 | // attribute selectors 21 | private(set) public var attributes: [AttributeSelector]? 22 | 23 | // childNode selector 24 | private(set) public var childNodeSelectors: [NodeSelector]? 25 | private(set) public var childNodeSelectorPathsAll: [[NodeSelector]]? 26 | 27 | // public init 28 | public init() {} 29 | 30 | /// Selects element if it has the given attribute 31 | public func withAttribute(_ attributeSelector: AttributeSelector) -> ElementSelector { 32 | self.attributes.appendOrInit(attributeSelector) 33 | return self 34 | } 35 | 36 | /// Selects element if it has the given id attribute value 37 | public func withId(_ id: String) -> ElementSelector { 38 | self.attributes.appendOrInit(AttributeSelector.init(name: "id").withValue(id)) 39 | return self 40 | } 41 | 42 | /// Selects element if it has a child node matching the given childNodeSelector 43 | public func withChildNodeSelectorPath(_ childNodeSelectorPath: [NodeSelector]) -> Self { 44 | self.childNodeSelectorPathsAll.appendOrInit(childNodeSelectorPath) 45 | return self 46 | } 47 | 48 | public func withChildElement(_ elementSelector: ElementSelector) -> Self { 49 | self.childNodeSelectors.appendOrInit(elementSelector) 50 | return self 51 | } 52 | 53 | public func withChildTextNode(_ textNodeSelector: TextNodeSelector) -> Self { 54 | self.childNodeSelectors.appendOrInit(textNodeSelector) 55 | return self 56 | } 57 | 58 | public func withChildCommentNode(_ commentNodeSelector: CommentSelector) -> Self { 59 | self.childNodeSelectors.appendOrInit(commentNodeSelector) 60 | return self 61 | } 62 | 63 | public func withChildCDataNode(_ cDataSelector: CDataSelector) -> Self { 64 | self.childNodeSelectors.appendOrInit(cDataSelector) 65 | return self 66 | } 67 | 68 | public func testAgainst(_ node: Node) -> Bool { 69 | // return false if node is not an element 70 | guard let element = node as? Element else { 71 | return false 72 | } 73 | 74 | // test tagName selector 75 | if self.tagName.testAgainst(element.tagName) == false { 76 | return false 77 | } 78 | 79 | //test classNames 80 | if self.classNameSelector.testAgainst(element) == false { 81 | return false 82 | } 83 | 84 | //test attributes (including id) 85 | if self.attributes?.allSatisfy({ $0.testSelector(against: element) }) == false { 86 | return false 87 | } 88 | 89 | // test child selectors 90 | if childNodeSelectors?.allSatisfy( { 91 | HTMLTraverser.hasMatchingNode(in: element.childNodes, matching: [$0]) 92 | }) == false { 93 | return false 94 | } 95 | 96 | // test childNodeSelectorPaths 97 | if childNodeSelectorPathsAll?.allSatisfy( { 98 | HTMLTraverser.hasMatchingNode(in: element.childNodes, matching: $0) 99 | }) == false { 100 | return false 101 | } 102 | 103 | return true 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/Selectors/NodeSelectors/TextNodeSelector.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TextNodeSelector.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-10-30. 6 | // 7 | 8 | import Foundation 9 | 10 | public final class TextNodeSelector: NodeSelector, TextStringSelectorBuilder { 11 | private(set) public var text = StringSelector() 12 | private(set) public var position = IntSelector() 13 | 14 | // public init 15 | public init() {} 16 | 17 | public func testAgainst(_ node: Node) -> Bool { 18 | // return false if node is not an TextNode 19 | guard let textNode = node as? TextNode else { 20 | return false 21 | } 22 | 23 | if text.testAgainst(textNode.text) == false { 24 | return false 25 | } 26 | 27 | return true 28 | } 29 | 30 | 31 | } 32 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/Selectors/SelectorBuilders/IdStringSelectorBuilder.swift: -------------------------------------------------------------------------------- 1 | // 2 | // IdStringSelectorBuilder.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-11-02. 6 | // 7 | 8 | import Foundation 9 | 10 | public protocol IdStringSelectorBuilder { 11 | var id: StringSelector { get } 12 | } 13 | 14 | public extension IdStringSelectorBuilder { 15 | /// Matches when the target equals the given id 16 | func withId(_ id: String) -> Self { 17 | self.id.withString(id) 18 | return self 19 | } 20 | 21 | /// Matches when the target equals any of the given ids 22 | func whenIdIsAny(_ ids: [String]) -> Self { 23 | self.id.whenStringIsAny(ids) 24 | return self 25 | } 26 | 27 | /// Matches when the target contains the given id 28 | func containsId(_ keyword: String) -> Self { 29 | self.id.whenStringContainsAny([keyword]) 30 | return self 31 | } 32 | 33 | /// Matches when the target contains any of the given ids 34 | func whenIdContainsAny(_ keywords: [String]) -> Self { 35 | self.id.whenStringContainsAny(keywords) 36 | return self 37 | } 38 | 39 | /// Matches when the target contains all of the given ids 40 | func whenIdContainsAll(_ keywords: [String]) -> Self { 41 | self.id.whenStringContainsAll(keywords) 42 | return self 43 | } 44 | 45 | // negatives 46 | /// Does not match when the target equals the given id 47 | func whenIdIsNot(_ id: String) -> Self { 48 | self.id.whenStringIsNot(id) 49 | return self 50 | } 51 | 52 | /// Does not match if the target equals any of the given ids 53 | func whenIdIsNotAny(_ ids: [String]) -> Self { 54 | self.id.whenStringIsNotAny(ids) 55 | return self 56 | } 57 | 58 | /// Does not match if the target contains the given id 59 | func whenIdDoesNotContain(_ keyword: String) -> Self { 60 | self.id.whenStringDoesNotContainAny([keyword]) 61 | return self 62 | } 63 | 64 | /// Does not match if the target contains any of the given ids 65 | func whenIdDoesNotContainAny(_ keywords: [String]) -> Self { 66 | self.id.whenStringDoesNotContainAny(keywords) 67 | return self 68 | } 69 | 70 | /// Does not match if the target contains all of the given ids 71 | func whenIdDoesNotContainAll(_ keywords: [String]) -> Self { 72 | self.id.whenStringDoesNotContainAll(keywords) 73 | return self 74 | } 75 | 76 | } 77 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/Selectors/SelectorBuilders/PositionIntSelectorBuilder.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PositionIntSelectorBuilder.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-11-03. 6 | // 7 | 8 | import Foundation 9 | 10 | 11 | public protocol PositionIntSelectorBuilder { 12 | var position: IntSelector { get } 13 | } 14 | 15 | public extension PositionIntSelectorBuilder { 16 | /// Matches when the target equals the given value 17 | func atPosition(_ value: Int) -> Self { 18 | self.position.withValue(value) 19 | return self 20 | } 21 | 22 | /// Matches when the target equals any of the given values 23 | func whenPositionIsAny(_ values: [Int]) -> Self { 24 | self.position.whenValueIsAny(values) 25 | return self 26 | } 27 | 28 | /// Matches when the target is less than the given value 29 | func whenPositionIsLessThan(_ value: Int) -> Self { 30 | self.position.whenValueIsLessThan(value) 31 | return self 32 | } 33 | 34 | /// Matches when the target is greater than the given value 35 | func whenPositionIsGreaterThan(_ value: Int) -> Self { 36 | self.position.whenValueIsGreaterThan(value) 37 | return self 38 | } 39 | 40 | /// Does not match if the target equals is the given value 41 | func whenPositionIsNot(_ value: Int) -> Self { 42 | self.position.whenValueIsNot(value) 43 | return self 44 | } 45 | 46 | /// Does not match if the target equals any of the given values 47 | func whenPositionIsNotAny(_ values: [Int]) -> Self { 48 | self.position.whenValueIsNotAny(values) 49 | return self 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/Selectors/SelectorBuilders/TagNameStringSelectorBuilder.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TagNameSelectorBuilder.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-11-02. 6 | // 7 | 8 | import Foundation 9 | 10 | public protocol TagNameStringSelectorBuilder { 11 | var tagName: StringSelector { get } 12 | } 13 | 14 | public extension TagNameStringSelectorBuilder { 15 | /// Matches when the target equals the given value 16 | func withTagName(_ value: String) -> Self { 17 | self.tagName.withString(value) 18 | return self 19 | } 20 | 21 | /// Matches when the target equals any of the given values 22 | func whenTagNameIsAny(_ values: [String]) -> Self { 23 | self.tagName.whenStringIsAny(values) 24 | return self 25 | } 26 | 27 | /// Matches when the target contains the given value 28 | func containingTagName(_ value: String) -> Self { 29 | self.tagName.whenStringContainsAny([value]) 30 | return self 31 | } 32 | 33 | /// Matches when the target contains any of the given values 34 | func whenTagNameContainsAny(_ keywords: [String]) -> Self { 35 | self.tagName.whenStringContainsAny(keywords) 36 | return self 37 | } 38 | 39 | /// Matches when the target contains all of the given values 40 | func whenTagNameContainsAll(_ keywords: [String]) -> Self { 41 | self.tagName.whenStringContainsAll(keywords) 42 | return self 43 | } 44 | 45 | // negatives 46 | /// Does not match when the target equals the given value 47 | func whenTagNameIsNot(_ value: String) -> Self { 48 | self.tagName.whenStringIsNot(value) 49 | return self 50 | } 51 | 52 | /// Does not match if the target equals any of the given values 53 | func whenTagNameIsNotAny(_ values: [String]) -> Self { 54 | self.tagName.whenStringIsNotAny(values) 55 | return self 56 | } 57 | 58 | /// Does not match if the target contains the given value 59 | func whenTagNameDoesNotContain(_ keyword: String) -> Self { 60 | self.tagName.whenStringDoesNotContainAny([keyword]) 61 | return self 62 | } 63 | 64 | /// Does not match if the target contains any of the given values 65 | func whenTagNameDoesNotContainAny(_ keywords: [String]) -> Self { 66 | self.tagName.whenStringDoesNotContainAny(keywords) 67 | return self 68 | } 69 | 70 | /// Does not match if the target contains all of the given values 71 | func whenTagNameDoesNotContainAll(_ keywords: [String]) -> Self { 72 | self.tagName.whenStringDoesNotContainAll(keywords) 73 | return self 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/Selectors/SelectorBuilders/TextStringSelectorBuilder.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TextStringSelectorBuilder.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-10-31. 6 | // 7 | 8 | import Foundation 9 | 10 | public protocol TextStringSelectorBuilder { 11 | var text: StringSelector { get } 12 | } 13 | 14 | public extension TextStringSelectorBuilder { 15 | /// Matches when the target equals the given value 16 | func withText(_ value: String) -> Self { 17 | self.text.withString(value) 18 | return self 19 | } 20 | 21 | /// Matches when the target equals any of the given values 22 | func whenTextIsAny(_ values: [String]) -> Self { 23 | self.text.whenStringIsAny(values) 24 | return self 25 | } 26 | 27 | /// Matches when the target contains the given value 28 | func containingText(_ value: String) -> Self { 29 | self.text.whenStringContainsAny([value]) 30 | return self 31 | } 32 | 33 | /// Matches when the target contains any of the given values 34 | func whenTextContainsAny(_ keywords: [String]) -> Self { 35 | self.text.whenStringContainsAny(keywords) 36 | return self 37 | } 38 | 39 | /// Matches when the target contains all of the given values 40 | func whenTextContainsAll(_ keywords: [String]) -> Self { 41 | self.text.whenStringContainsAll(keywords) 42 | return self 43 | } 44 | 45 | // negatives 46 | /// Does not match when the target equals the given value 47 | func whenTextIsNot(_ value: String) -> Self { 48 | self.text.whenStringIsNot(value) 49 | return self 50 | } 51 | 52 | /// Does not match if the target equals any of the given values 53 | func whenTextIsNotAny(_ values: [String]) -> Self { 54 | self.text.whenStringIsNotAny(values) 55 | return self 56 | } 57 | 58 | /// Does not match if the target contains the given value 59 | func whenTextDoesNotContain(_ keyword: String) -> Self { 60 | self.text.whenStringDoesNotContainAny([keyword]) 61 | return self 62 | } 63 | 64 | /// Does not match if the target contains any of the given values 65 | func whenTextDoesNotContainAny(_ keywords: [String]) -> Self { 66 | self.text.whenStringDoesNotContainAny(keywords) 67 | return self 68 | } 69 | 70 | /// Does not match if the target contains all of the given values 71 | func whenTextDoesNotContainAll(_ keywords: [String]) -> Self { 72 | self.text.whenStringDoesNotContainAll(keywords) 73 | return self 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/Selectors/SelectorBuilders/ValueStringSelectorBuilder.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ValueStringSelectorBuilder.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-10-31. 6 | // 7 | 8 | import Foundation 9 | 10 | public protocol ValueStringSelectorBuilder { 11 | var value: StringSelector { get } 12 | } 13 | 14 | public extension ValueStringSelectorBuilder { 15 | /// Matches when the target equals the given value 16 | func withValue(_ value: String) -> Self { 17 | self.value.withString(value) 18 | return self 19 | } 20 | 21 | /// Matches when the target equals any of the given values 22 | func whenValueIsAny(_ values: [String]) -> Self { 23 | self.value.whenStringIsAny(values) 24 | return self 25 | } 26 | 27 | /// Matches when the target contains the given value 28 | func containingValue(_ value: String) -> Self { 29 | self.value.whenStringContainsAny([value]) 30 | return self 31 | } 32 | 33 | /// Matches when the target contains any of the given values 34 | func whenValueContainsAny(_ keywords: [String]) -> Self { 35 | self.value.whenStringContainsAny(keywords) 36 | return self 37 | } 38 | 39 | /// Matches when the target contains all of the given values 40 | func whenValueContainsAll(_ keywords: [String]) -> Self { 41 | self.value.whenStringContainsAll(keywords) 42 | return self 43 | } 44 | 45 | // negatives 46 | /// Does not match when the target equals the given value 47 | func whenValueIsNot(_ value: String) -> Self { 48 | self.value.whenStringIsNot(value) 49 | return self 50 | } 51 | 52 | /// Does not match if the target equals any of the given values 53 | func whenValueIsNotAny(_ values: [String]) -> Self { 54 | self.value.whenStringIsNotAny(values) 55 | return self 56 | } 57 | 58 | /// Does not match if the target contains the given value 59 | func whenValueDoesNotContain(_ keyword: String) -> Self { 60 | self.value.whenStringDoesNotContainAny([keyword]) 61 | return self 62 | } 63 | 64 | /// Does not match if the target contains any of the given values 65 | func whenValueDoesNotContainAny(_ keywords: [String]) -> Self { 66 | self.value.whenStringDoesNotContainAny(keywords) 67 | return self 68 | } 69 | 70 | /// Does not match if the target contains all of the given values 71 | func whenValueDoesNotContainAll(_ keywords: [String]) -> Self { 72 | self.value.whenStringDoesNotContainAll(keywords) 73 | return self 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /Sources/SwiftHTMLParser/Traverser/Selectors/StringSelector.swift: -------------------------------------------------------------------------------- 1 | // 2 | // File.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-10-31. 6 | // 7 | 8 | import Foundation 9 | 10 | public final class StringSelector { 11 | public init() {} 12 | 13 | /// Matches if the target is any of the keywords 14 | private(set) var stringIsAny: [String]? 15 | /// Matches if the target contains anhy the keywords 16 | private(set) var stringContainsAny: [String]? 17 | /// Matches if the target contains all the keywords 18 | private(set) var stringContainsAll: [String]? 19 | 20 | // negatives 21 | /// Does not match if the target is any of the keywords 22 | private(set) var stringIsNotAny: [String]? 23 | /// Does not match if the target contains any of the keywords 24 | private(set) var stringDoesNotContainAny: [String]? 25 | /// Does not match if the target contains all of the keywords 26 | private(set) var stringDoesNotContainAll: [String]? 27 | } 28 | 29 | 30 | internal extension StringSelector { 31 | func withString(_ value: String) { 32 | self.stringIsAny.appendOrInit(value) 33 | } 34 | 35 | func whenStringIsAny(_ values: [String]) { 36 | self.stringIsAny.appendOrInit(contentsOf: values) 37 | } 38 | 39 | /// matches when attribute value contains the given values 40 | func whenStringContainsAny(_ keywords: [String]) { 41 | self.stringContainsAny.appendOrInit(contentsOf: keywords) 42 | } 43 | 44 | /// matches when the target value does not contains the given values 45 | func whenStringContainsAll(_ keywords: [String]) { 46 | self.stringContainsAll.appendOrInit(contentsOf: keywords) 47 | } 48 | 49 | /// Does not match when the target equals the given value 50 | func whenStringIsNot(_ value: String) { 51 | self.stringIsNotAny.appendOrInit(value) 52 | } 53 | 54 | /// Does not match if the target equals any of the given values 55 | func whenStringIsNotAny(_ values: [String]) { 56 | self.stringIsNotAny.appendOrInit(contentsOf: values) 57 | } 58 | 59 | /// Does not match if the target contains any of the given values 60 | func whenStringDoesNotContainAny(_ values: [String]) { 61 | self.stringDoesNotContainAny.appendOrInit(contentsOf: values) 62 | } 63 | 64 | /// Does not match if the target contains all of the given values 65 | func whenStringDoesNotContainAll(_ values: [String]) { 66 | self.stringDoesNotContainAll.appendOrInit(contentsOf: values) 67 | } 68 | } 69 | 70 | extension StringSelector { 71 | func testAgainst(_ string: String?) -> Bool { 72 | guard let string = string else { 73 | if stringIsAny != nil || stringContainsAny != nil || stringContainsAll != nil { 74 | return false 75 | } else { 76 | return true 77 | } 78 | } 79 | 80 | if let stringIsAny = stringIsAny { 81 | if stringIsAny.contains(where: { string == $0 }) == false { 82 | return false 83 | } 84 | } 85 | 86 | if let stringContainsAll = stringContainsAll { 87 | if stringContainsAll.allSatisfy({ string.contains($0) }) == false { 88 | return false 89 | } 90 | } 91 | 92 | if let stringContainsAny = stringContainsAny { 93 | if stringContainsAny.contains(where: { string.contains($0) }) == false { 94 | return false 95 | } 96 | } 97 | 98 | // negatives 99 | 100 | // fails if string is any of the keywords 101 | if let stringIsNotAny = stringIsNotAny { 102 | if stringIsNotAny.contains(where: { string == $0 }) == true { 103 | return false 104 | } 105 | } 106 | 107 | // fails if string contains any of the keywords 108 | if let stringDoesNotContainsAny = stringDoesNotContainAny { 109 | if stringDoesNotContainsAny.contains(where: { string.contains($0) }) == true { 110 | return false 111 | } 112 | } 113 | 114 | // fails if string contains all the keywords 115 | if let stringDoesNotContainsAll = stringDoesNotContainAll { 116 | if stringDoesNotContainsAll.allSatisfy({ string.contains($0) }) == true { 117 | return false 118 | } 119 | } 120 | 121 | return true 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /Tests/SwiftHTMLParserTests/AppendableTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AppendableITests.swift.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-11-05. 6 | // 7 | 8 | import Foundation 9 | 10 | import XCTest 11 | @testable import SwiftHTMLParser 12 | 13 | final class AppendableTests: XCTestCase { 14 | 15 | func testAppendOrIntialize() { 16 | // single value 17 | var optArray: [String]? = nil 18 | optArray.appendOrInit("hello appendOrInit") 19 | XCTAssertEqual(optArray![0], "hello appendOrInit") 20 | 21 | // multiple values 22 | var optArray2: [String]? = nil 23 | optArray2.appendOrInit(contentsOf: ["sunny", "rainy", "cloudy"]) 24 | XCTAssertEqual(optArray2?.count, 3) 25 | 26 | var optSet: Set? = nil 27 | optSet.insertOrInit("apple") 28 | optSet.formUnionOrInit(["banana", "pineapple", "cherry", "pear"]) 29 | XCTAssertEqual(optSet?.count, 5) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /Tests/SwiftHTMLParserTests/AttributeParserTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AttributeParserTests.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-08. 6 | // 7 | 8 | import XCTest 9 | @testable import SwiftHTMLParser 10 | import TestFiles 11 | 12 | final class AttributeParserTests: XCTestCase { 13 | 14 | func testAttributes() { 15 | guard let fileURL = TestFileURLs.attributesTestFilesDirectoryURL? 16 | .appendingPathComponent("attributes-simple.html") else { 17 | XCTFail("Could find get file URL to parse") 18 | return 19 | } 20 | 21 | // get html string from file 22 | var htmlStringResult: String? = nil 23 | do { 24 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 25 | } catch { 26 | XCTFail("Could not open file URL: \(fileURL)") 27 | return 28 | } 29 | guard let htmlString = htmlStringResult else { 30 | XCTFail("Could not open file URL: \(fileURL)") 31 | return 32 | } 33 | 34 | // create object from raw html file 35 | guard let nodeTree = try? HTMLParser.parse(htmlString) else { 36 | XCTFail("Could not parse HTML") 37 | return 38 | } 39 | 40 | // find matching elements by traversing the created html object 41 | var nodeSelectorPath = [ 42 | ElementSelector().withTagName("html"), 43 | ElementSelector().withTagName("body"), 44 | ElementSelector().withTagName("a") 45 | ] 46 | 47 | var matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 48 | 49 | XCTAssertEqual(matchingElements.count, 2) 50 | 51 | // test basic example 52 | XCTAssertEqual(matchingElements[0].attributeValue(for: "href"), "https://www.google.com") 53 | 54 | // test multiple attributes 55 | // id short form and attribute id should be the same 56 | XCTAssertEqual(matchingElements[1].id!, "alternate-search-engine") 57 | XCTAssertEqual(matchingElements[1].attributeValue(for: "id")!, "alternate-search-engine") 58 | XCTAssertEqual(matchingElements[1].attributeValue(for: "href")!, "https://duckduckgo.com") 59 | 60 | 61 | // test empty attribute - ex

62 | nodeSelectorPath = [ 63 | ElementSelector().withTagName("html"), 64 | ElementSelector().withTagName("body"), 65 | ElementSelector().withTagName("div") 66 | ] 67 | 68 | matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 69 | 70 | XCTAssertEqual(matchingElements.first!.attributeValue(for: "emptyAtrribute")!, "") 71 | 72 | 73 | nodeSelectorPath = [ 74 | ElementSelector().withTagName("html"), 75 | ElementSelector().withTagName("body"), 76 | ElementSelector().withTagName("form"), 77 | ElementSelector().withTagName("input") 78 | ] 79 | 80 | matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 81 | 82 | XCTAssertEqual(matchingElements.count, 1) 83 | 84 | // test attribute with name but no value 85 | XCTAssertEqual(matchingElements[0].containsAttribute("disabled"), true) 86 | XCTAssertEqual(matchingElements[0].attributeValue(for: "disabled"), nil) 87 | } 88 | 89 | func testAttributesQuotes() { 90 | guard let fileURL = TestFileURLs.attributesTestFilesDirectoryURL? 91 | .appendingPathComponent("attributes-quotes.html") else { 92 | XCTFail("Could find get file URL to parse") 93 | return 94 | } 95 | 96 | // get html string from file 97 | var htmlStringResult: String? = nil 98 | do { 99 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 100 | } catch { 101 | XCTFail("Could not open file URL: \(fileURL)") 102 | return 103 | } 104 | guard let htmlString = htmlStringResult else { 105 | XCTFail("Could not open file URL: \(fileURL)") 106 | return 107 | } 108 | 109 | // create object from raw html file 110 | guard let elementArray = try? HTMLParser.parse(htmlString) else { 111 | XCTFail("Could not parse HTML") 112 | return 113 | } 114 | 115 | // find matching elements by traversing the created html object 116 | let nodeSelectorPath = [ 117 | ElementSelector().withTagName("html"), 118 | ElementSelector().withTagName("body"), 119 | ElementSelector().withTagName("p") 120 | ] 121 | 122 | let matchingElements = HTMLTraverser.findElements(in: elementArray, matching: nodeSelectorPath) 123 | 124 | XCTAssertEqual(matchingElements.count, 2) 125 | 126 | // test attribute with double quotes within single quotes 127 | XCTAssertEqual(matchingElements[0].openingTag.attributes.count, 2) 128 | XCTAssertEqual(matchingElements[0].attributeValue(for: "title")!, "John \"ShotGun\" Nelson") 129 | 130 | // test attribute with single quotes within double quotes 131 | XCTAssertEqual(matchingElements[1].openingTag.attributes.count, 2) 132 | XCTAssertEqual(matchingElements[1].attributeValue(for: "title")!, "John 'ShotGun' Nelson") 133 | } 134 | 135 | func testAttributesTabs() { 136 | guard let fileURL = TestFileURLs.attributesTestFilesDirectoryURL? 137 | .appendingPathComponent("attributes-tabs.html") else { 138 | XCTFail("Could find get file URL to parse") 139 | return 140 | } 141 | 142 | // get html string from file 143 | var htmlStringResult: String? = nil 144 | do { 145 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 146 | } catch { 147 | XCTFail("Could not open file URL: \(fileURL)") 148 | return 149 | } 150 | guard let htmlString = htmlStringResult else { 151 | XCTFail("Could not open file URL: \(fileURL)") 152 | return 153 | } 154 | 155 | // create object from raw html file 156 | guard let elementArray = try? HTMLParser.parse(htmlString) else { 157 | XCTFail("Could not parse HTML") 158 | return 159 | } 160 | 161 | // find matching elements by traversing the created html object 162 | let nodeSelectorPath = [ 163 | ElementSelector().withTagName("html"), 164 | ElementSelector().withTagName("body"), 165 | ElementSelector().withTagName("img") 166 | ] 167 | 168 | let matchingElements = HTMLTraverser.findElements(in: elementArray, matching: nodeSelectorPath) 169 | 170 | XCTAssertEqual(matchingElements.count, 1) 171 | XCTAssertEqual(matchingElements[0].openingTag.tagName, "img") 172 | 173 | // test attribute 174 | XCTAssertEqual(matchingElements[0].attributeValue(for: "height")!, "580") 175 | XCTAssertEqual(matchingElements[0].attributeValue(for: "width")!, "480") 176 | XCTAssertEqual(matchingElements[0].attributeValue(for: "src")!, "/some/img.jpg") 177 | XCTAssertEqual(matchingElements[0].attributeValue(for: "alt")!, "/some/other/img.png") 178 | } 179 | 180 | } 181 | -------------------------------------------------------------------------------- /Tests/SwiftHTMLParserTests/CommentParserTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // CommentTests.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-11. 6 | // 7 | 8 | import XCTest 9 | @testable import SwiftHTMLParser 10 | import TestFiles 11 | 12 | final class CommentParserTests: XCTestCase { 13 | func testComments() { 14 | guard let fileURL = TestFileURLs.commentsTestFilesDirectoryURL? 15 | .appendingPathComponent("comments.html") else { 16 | XCTFail("Could not get url to test file") 17 | return 18 | } 19 | 20 | // get html string from file 21 | var htmlStringResult: String? = nil 22 | do { 23 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 24 | } catch { 25 | XCTFail("Could not open file at: \(fileURL.path)") 26 | } 27 | guard let htmlString = htmlStringResult else { 28 | XCTFail("Could not open file at: \(fileURL.path)") 29 | return 30 | } 31 | 32 | // create object from raw html file 33 | guard let elementArray = try? HTMLParser.parse(htmlString) else { 34 | XCTFail("Could not parse HTML") 35 | return 36 | } 37 | 38 | // find matching elements by traversing the created html object 39 | var nodeSelectorPath = [ 40 | ElementSelector().withTagName("html"), 41 | ElementSelector().withTagName("body") 42 | ] 43 | 44 | var matchingElements = HTMLTraverser.findElements(in: elementArray, matching: nodeSelectorPath) 45 | 46 | XCTAssertEqual(matchingElements[0].childNodes.count, 15) 47 | XCTAssertEqual(matchingElements[0].commentNodes.count, 6) 48 | XCTAssertEqual(matchingElements[0].childElements.count, 3) 49 | XCTAssertEqual(matchingElements[0].textNodes.count, 6) 50 | 51 | XCTAssertEqual(matchingElements[0].commentNodes[0].text, " This is a comment ") 52 | XCTAssertEqual(matchingElements[0].commentNodes[1].text, " This is annother comment ") 53 | XCTAssertEqual(matchingElements[0].commentNodes[3].text, " no space between the comment and div ") 54 | XCTAssertEqual(matchingElements[0].commentNodes[4].text, "x") 55 | XCTAssertEqual(matchingElements[0].commentNodes[5].text, "") 56 | 57 | nodeSelectorPath = [ 58 | ElementSelector().withTagName("html"), 59 | ElementSelector().withTagName("body"), 60 | ElementSelector().withTagName("div"), 61 | ] 62 | 63 | matchingElements = HTMLTraverser.findElements(in: elementArray, matching: nodeSelectorPath) 64 | XCTAssertEqual(matchingElements.count, 1) 65 | XCTAssertEqual(matchingElements[0].textNodes.first!.text, "This is a div") 66 | } 67 | 68 | func testConditionalComments() throws { 69 | guard let fileURL = TestFileURLs.commentsTestFilesDirectoryURL? 70 | .appendingPathComponent("conditional-comments-salvageable.html") else { 71 | XCTFail("Could not get url to test file") 72 | return 73 | } 74 | 75 | // get html string from file 76 | var htmlStringResult: String? = nil 77 | do { 78 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 79 | } catch { 80 | XCTFail("Could not open file at: \(fileURL.path)") 81 | } 82 | guard let htmlString = htmlStringResult else { 83 | XCTFail("Could not open file at: \(fileURL.path)") 84 | return 85 | } 86 | 87 | // create object from raw html file 88 | guard let elementArray = try? HTMLParser.parse(htmlString) else { 89 | XCTFail("Could not parse HTML") 90 | return 91 | } 92 | 93 | //XCTAssertEqual(elementArray.count, 2) 94 | 95 | // find matching elements by traversing the created html object 96 | let nodeSelectorPath = [ 97 | ElementSelector().withTagName("html"), 98 | ElementSelector().withTagName("body") 99 | ] 100 | 101 | let matchingElements = HTMLTraverser.findElements(in: elementArray, matching: nodeSelectorPath) 102 | 103 | XCTAssertEqual(matchingElements.count, 1) 104 | XCTAssertEqual(matchingElements.first!.commentNodes.count, 1) 105 | //let commentText = try XCTUnwrap(matchingElements.first?.commentNodes.first?.text) 106 | let commentText = matchingElements.first!.commentNodes.first!.text 107 | XCTAssertTrue(commentText.contains("

You are using Internet Explorer 6. :(

")) 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /Tests/SwiftHTMLParserTests/DocumentationTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // DocumentationTests.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-11-04. 6 | // 7 | 8 | import XCTest 9 | @testable import SwiftHTMLParser 10 | import TestFiles 11 | 12 | final class DocumentationTests: XCTestCase { 13 | 14 | func parseAndTraverseSimpleHTML() throws { 15 | // get string from file 16 | let fileURL = TestFileURLs.documentationTestFilesDirectoryURL!.appendingPathComponent("simple.html") 17 | let htmlString = try String(contentsOf: fileURL, encoding: .utf8) 18 | 19 | // parse the htmlString into a tree of node objects (DOM) 20 | let nodeTree = try HTMLParser.parse(htmlString) 21 | 22 | // create a node selector path to describe what nodes to match in the nodeTree 23 | let nodeSelectorPath: [NodeSelector] = [ 24 | ElementSelector().withTagName("html"), 25 | ElementSelector().withTagName("body"), 26 | ElementSelector().withTagName("div").atPosition(0), 27 | ElementSelector().withTagName("p").withClassName("body-paragraph") 28 | ] 29 | 30 | // find the elements that match the nodeSelectorPath 31 | // notice we use the findElements() function which only matches elements 32 | let matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 33 | 34 | // matchingElements will contain the 3 matching

elements with the className 'body-paragraph' 35 | // will print: 3 36 | print(matchingElements.count) 37 | } 38 | 39 | func parseAndTraverseSimpleHTMLTextNode() throws { 40 | // get string from file 41 | let fileURL = TestFileURLs.documentationTestFilesDirectoryURL!.appendingPathComponent("simple.html") 42 | let htmlString = try String(contentsOf: fileURL, encoding: .utf8) 43 | 44 | // parse the htmlString into a tree of node objects (DOM) 45 | let nodeTree = try HTMLParser.parse(htmlString) 46 | 47 | // create a node selector path to describe what nodes to match in the nodeTree 48 | // this is equvalent to the selector: body > p or xpath: /html/body/p 49 | let nodeSelectorPath: [NodeSelector] = [ 50 | ElementSelector().withTagName("html"), 51 | ElementSelector().withTagName("body"), 52 | ElementSelector().withTagName("div").withClassName("bibliography"), 53 | ElementSelector().withTagName("ul"), 54 | ElementSelector().withTagName("li").withId("citation-1999"), 55 | TextNodeSelector() 56 | ] 57 | 58 | // find the nodes that match the nodeSelectorPath 59 | // Notice we use the findNodes() function which can match with any node type 60 | let matchingNodes = HTMLTraverser.findNodes(in: nodeTree, matching: nodeSelectorPath) 61 | 62 | // matchingNodes will contain the matching node 63 | // we have to cast the Node to a TextNode to access its text property 64 | guard let paragraphTextNode = matchingNodes.first as? TextNode else { 65 | // could not find paragraph text node 66 | return 67 | } 68 | 69 | // will print: This is the second citation. 70 | print(paragraphTextNode.text) 71 | } 72 | 73 | func testParseAndTraverseSimpleHTMLChildNodeSelectorPath() throws { 74 | // get string from file 75 | let fileURL = TestFileURLs.documentationTestFilesDirectoryURL!.appendingPathComponent("simple.html") 76 | let htmlString = try String(contentsOf: fileURL, encoding: .utf8) 77 | 78 | // parse the htmlString into a tree of node objects (DOM) 79 | let nodeTree = try HTMLParser.parse(htmlString) 80 | 81 | // create a child node selector path that will match the parent node 82 | // only if the childNodeSelectorPath matches the element's child nodes 83 | let childNodeSelectorPath: [NodeSelector] = [ 84 | ElementSelector().withTagName("div"), 85 | ElementSelector().withTagName("p"), 86 | TextNodeSelector().withText("Editor Notes") 87 | ] 88 | 89 | // create a node selector path to describe what nodes to match in the nodeTree 90 | // Notice the last ElementSelector will only match if the element contains 91 | // child nodes that match the childNodeSelectorPath 92 | let nodeSelectorPath: [NodeSelector] = [ 93 | ElementSelector().withTagName("html"), 94 | ElementSelector().withTagName("body"), 95 | ElementSelector().withTagName("div").withChildNodeSelectorPath(childNodeSelectorPath), 96 | ] 97 | 98 | // find the nodes that match the nodeSelectorPath 99 | // Notice we use the findNodes() function which can match with any node type 100 | let matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 101 | 102 | // matchingElements should only contain the div element with the 'essay' class namee 103 | // will print: 1 104 | print(matchingElements.count) 105 | 106 | guard let divElement = matchingElements.first else { 107 | // could not find paragraph text node 108 | XCTFail("could not find paragraph text node") 109 | return 110 | } 111 | 112 | guard let firstClassName = divElement.classNames.first else { 113 | // divElement does not have any classnames 114 | XCTFail("divElement does not have any classnames") 115 | return 116 | } 117 | 118 | // will print: essay 119 | print(firstClassName) 120 | 121 | XCTAssertEqual(matchingElements.count, 1) 122 | XCTAssertEqual(firstClassName, "essay") 123 | } 124 | 125 | } 126 | -------------------------------------------------------------------------------- /Tests/SwiftHTMLParserTests/ElementTests.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | @testable import SwiftHTMLParser 3 | import TestFiles 4 | 5 | final class SwiftHTMLParserTests: XCTestCase { 6 | 7 | func testOpenFile() { 8 | guard let fileURL = TestFileURLs.elementsTestFilesDirectoryURL? 9 | .appendingPathComponent("elements-simple.html") else { 10 | XCTFail("Could find get file URL to parse") 11 | return 12 | } 13 | 14 | // get html string from file 15 | var htmlStringResult: String? = nil 16 | do { 17 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 18 | } catch { 19 | XCTFail("Could not open file URL: \(fileURL)") 20 | return 21 | } 22 | guard let htmlString = htmlStringResult else { 23 | XCTFail("Could not open file URL: \(fileURL)") 24 | return 25 | } 26 | 27 | XCTAssertTrue(htmlString.count > 100) 28 | XCTAssertTrue(htmlString.hasPrefix("")) 29 | XCTAssertTrue(htmlString.contains("")) 30 | XCTAssertTrue(htmlString.contains("Test Simple Title")) 31 | XCTAssertTrue(htmlString.contains("

This is a Heading

")) 32 | XCTAssertTrue(htmlString.contains("")) 33 | } 34 | 35 | func testSimple() { 36 | guard let fileURL = TestFileURLs.elementsTestFilesDirectoryURL? 37 | .appendingPathComponent("elements-simple.html") else { 38 | XCTFail("Could find get file URL to parse") 39 | return 40 | } 41 | 42 | // get html string from file 43 | var htmlStringResult: String? = nil 44 | do { 45 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 46 | } catch { 47 | XCTFail("Could not open file URL: \(fileURL)") 48 | return 49 | } 50 | guard let htmlString = htmlStringResult else { 51 | XCTFail("Could not open file URL: \(fileURL)") 52 | return 53 | } 54 | 55 | // create object from raw html file 56 | guard let nodeArray = try? HTMLParser.parse(htmlString) else { 57 | XCTFail("Could not parse HTML") 58 | return 59 | } 60 | 61 | XCTAssertEqual(nodeArray.count, 2) 62 | 63 | // find matching elements by traversing the created html object 64 | var nodeSelectorPath = [ 65 | ElementSelector().withTagName("html"), 66 | ElementSelector().withTagName("head"), 67 | ElementSelector().withTagName("title") 68 | ] 69 | 70 | var matchingElements = HTMLTraverser.findElements(in: nodeArray, matching: nodeSelectorPath) 71 | 72 | XCTAssertEqual(matchingElements.count, 1) 73 | XCTAssertEqual(matchingElements[0].textNodes[0].text, "Test Simple Title") 74 | 75 | nodeSelectorPath = [ 76 | ElementSelector().withTagName("html"), 77 | ElementSelector().withTagName("body"), 78 | ElementSelector().withTagName("p") 79 | ] 80 | 81 | matchingElements = HTMLTraverser.findElements(in: nodeArray, matching: nodeSelectorPath) 82 | 83 | XCTAssertEqual(matchingElements.count, 3) 84 | XCTAssertEqual(matchingElements[1].textNodes[0].text, "This is the second paragraph.") 85 | } 86 | 87 | func testQuotes() { 88 | guard let fileURL = TestFileURLs.elementsTestFilesDirectoryURL? 89 | .appendingPathComponent("elements-quotes.html") else { 90 | XCTFail("Could find get file URL to parse") 91 | return 92 | } 93 | 94 | // get html string from file 95 | var htmlStringResult: String? = nil 96 | do { 97 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 98 | } catch { 99 | XCTFail("Could not open file URL: \(fileURL)") 100 | return 101 | } 102 | guard let htmlString = htmlStringResult else { 103 | XCTFail("Could not open file URL: \(fileURL)") 104 | return 105 | } 106 | 107 | // create object from raw html file 108 | guard let elementArray = try? HTMLParser.parse(htmlString) else { 109 | XCTFail("Could not parse HTML") 110 | return 111 | } 112 | 113 | // find matching elements by traversing the created html object 114 | let nodeSelectorPath = [ 115 | ElementSelector().withTagName("html"), 116 | ElementSelector().withTagName("body"), 117 | ElementSelector().withTagName("p") 118 | ] 119 | 120 | let matchingElements = HTMLTraverser.findElements(in: elementArray, 121 | matching: nodeSelectorPath) 122 | 123 | 124 | XCTAssertEqual(matchingElements.count, 4) 125 | XCTAssertEqual(matchingElements[0].textNodes.first!.text, "'John \"ShotGun\" Nelson'") 126 | XCTAssertEqual(matchingElements[1].textNodes.first!.text, "\"John 'ShotGun' Nelson\"") 127 | XCTAssertEqual(matchingElements[2].textNodes.first!.text, "It's alright") 128 | XCTAssertEqual(matchingElements[3].textNodes.first!.text, "I love the \" (double Quote) character") 129 | } 130 | 131 | func testClosingEmptyTag() { 132 | guard let fileURL = TestFileURLs.elementsTestFilesDirectoryURL? 133 | .appendingPathComponent("empty-element.html") else { 134 | XCTFail("Could find get file URL to parse") 135 | return 136 | } 137 | 138 | // get html string from file 139 | var htmlStringResult: String? = nil 140 | do { 141 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 142 | } catch { 143 | XCTFail("Could not open file URL: \(fileURL)") 144 | return 145 | } 146 | guard let htmlString = htmlStringResult else { 147 | XCTFail("Could not open file URL: \(fileURL)") 148 | return 149 | } 150 | 151 | // create object from raw html file 152 | guard let nodeArray = try? HTMLParser.parse(htmlString) else { 153 | XCTFail("Could not parse HTML") 154 | return 155 | } 156 | 157 | // find matching elements by traversing the created html object 158 | let nodeSelectorPath = [ 159 | ElementSelector().withTagName("html"), 160 | ElementSelector().withTagName("body"), 161 | ElementSelector().withTagName("form") 162 | ] 163 | 164 | let matchingElements = HTMLTraverser.findElements(in: nodeArray, matching: nodeSelectorPath) 165 | 166 | XCTAssertEqual(matchingElements.count, 1) 167 | XCTAssertEqual(matchingElements[0].childElements.count, 1) 168 | } 169 | 170 | func testElementNameOnNewLine() { 171 | guard let fileURL = TestFileURLs.elementsTestFilesDirectoryURL? 172 | .appendingPathComponent("element-name-on-new-line.html") else { 173 | XCTFail("Could find get file URL to parse") 174 | return 175 | } 176 | 177 | // get html string from file 178 | var htmlStringResult: String? = nil 179 | do { 180 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 181 | } catch { 182 | XCTFail("Could not open file URL: \(fileURL)") 183 | return 184 | } 185 | guard let htmlString = htmlStringResult else { 186 | XCTFail("Could not open file URL: \(fileURL)") 187 | return 188 | } 189 | 190 | // create object from raw html file 191 | guard let nodeArray = try? HTMLParser.parse(htmlString) else { 192 | XCTFail("Could not parse HTML") 193 | return 194 | } 195 | 196 | // find matching elements by traversing the created html object 197 | let nodeSelectorPath = [ 198 | ElementSelector().withTagName("html"), 199 | ElementSelector().withTagName("body"), 200 | ElementSelector().withTagName("div") 201 | ] 202 | 203 | let matchingElements = HTMLTraverser.findElements(in: nodeArray, matching: nodeSelectorPath) 204 | 205 | XCTAssertEqual(matchingElements.count, 1) 206 | XCTAssertEqual(matchingElements.first?.tagName, "div") 207 | XCTAssertEqual(matchingElements.first?.attributeValue(for: "name"), "bob") 208 | XCTAssertEqual(matchingElements.first?.attributeValue(for: "type"), "email") 209 | } 210 | 211 | func testElementUnclosedEndTag() { 212 | guard let fileURL = TestFileURLs.elementsTestFilesDirectoryURL? 213 | .appendingPathComponent("element-unclosed-end-tag.html") else { 214 | XCTFail("Could find get file URL to parse") 215 | return 216 | } 217 | 218 | // get html string from file 219 | var htmlStringResult: String? = nil 220 | do { 221 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 222 | } catch { 223 | XCTFail("Could not open file URL: \(fileURL)") 224 | return 225 | } 226 | guard let htmlString = htmlStringResult else { 227 | XCTFail("Could not open file URL: \(fileURL)") 228 | return 229 | } 230 | 231 | // create object from raw html file 232 | guard let nodeArray = try? HTMLParser.parse(htmlString) else { 233 | XCTFail("Could not parse HTML") 234 | return 235 | } 236 | 237 | // find matching elements by traversing the created html object 238 | let nodeSelectorPath = [ 239 | ElementSelector().withTagName("html"), 240 | ElementSelector().withTagName("body"), 241 | ElementSelector().withTagName("div") 242 | ] 243 | 244 | let matchingElements = HTMLTraverser.findElements(in: nodeArray, matching: nodeSelectorPath) 245 | 246 | XCTAssertEqual(matchingElements.count, 1) 247 | XCTAssertEqual(matchingElements.first?.tagName, "div") 248 | XCTAssertEqual(matchingElements.first?.childElements.count, 1) 249 | } 250 | 251 | func testElementStrayEndTag() { 252 | guard let fileURL = TestFileURLs.elementsTestFilesDirectoryURL? 253 | .appendingPathComponent("elemnent-stray-end-tag.html") else { 254 | XCTFail("Could find get file URL to parse") 255 | return 256 | } 257 | 258 | // get html string from file 259 | var htmlStringResult: String? = nil 260 | do { 261 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 262 | } catch { 263 | XCTFail("Could not open file URL: \(fileURL)") 264 | return 265 | } 266 | guard let htmlString = htmlStringResult else { 267 | XCTFail("Could not open file URL: \(fileURL)") 268 | return 269 | } 270 | 271 | // create object from raw html file 272 | guard let nodeArray = try? HTMLParser.parse(htmlString) else { 273 | XCTFail("Could not parse HTML") 274 | return 275 | } 276 | 277 | // find matching elements by traversing the created html object 278 | let nodeSelectorPath = [ 279 | ElementSelector().withTagName("html"), 280 | ElementSelector().withTagName("body"), 281 | ElementSelector().withTagName("div") 282 | ] 283 | 284 | let matchingElements = HTMLTraverser.findElements(in: nodeArray, matching: nodeSelectorPath) 285 | 286 | XCTAssertEqual(matchingElements.count, 1) 287 | XCTAssertEqual(matchingElements.first?.tagName, "div") 288 | XCTAssertEqual(matchingElements.first?.childElements.count, 1) 289 | } 290 | 291 | func testElementStrayHTMLEndTag() { 292 | guard let fileURL = TestFileURLs.elementsTestFilesDirectoryURL? 293 | .appendingPathComponent("elemnent-stray-end-html-tag.html") else { 294 | XCTFail("Could find get file URL to parse") 295 | return 296 | } 297 | 298 | // get html string from file 299 | var htmlStringResult: String? = nil 300 | do { 301 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 302 | } catch { 303 | XCTFail("Could not open file URL: \(fileURL)") 304 | return 305 | } 306 | guard let htmlString = htmlStringResult else { 307 | XCTFail("Could not open file URL: \(fileURL)") 308 | return 309 | } 310 | 311 | // create object from raw html file 312 | guard let nodeArray = try? HTMLParser.parse(htmlString) else { 313 | XCTFail("Could not parse HTML") 314 | return 315 | } 316 | 317 | // find matching elements by traversing the created html object 318 | let nodeSelectorPath = [ 319 | ElementSelector().withTagName("html"), 320 | ElementSelector().withTagName("body"), 321 | ElementSelector().withTagName("div") 322 | ] 323 | 324 | let matchingElements = HTMLTraverser.findElements(in: nodeArray, matching: nodeSelectorPath) 325 | 326 | XCTAssertEqual(matchingElements.count, 1) 327 | XCTAssertEqual(matchingElements.first?.tagName, "div") 328 | XCTAssertEqual(matchingElements.first?.childElements.count, 1) 329 | } 330 | } 331 | -------------------------------------------------------------------------------- /Tests/SwiftHTMLParserTests/ElementTraverserTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TestElementTraverser.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-10-22. 6 | // 7 | 8 | import XCTest 9 | import SwiftHTMLParser 10 | import TestFiles 11 | 12 | final class ElementTraverserTests: XCTestCase { 13 | 14 | func testSelectTagName() { 15 | guard let fileURL = TestFileURLs.attributesTestFilesDirectoryURL? 16 | .appendingPathComponent("attributes-multiple-value-class.html") else { 17 | XCTFail("Could find get file URL to parse") 18 | return 19 | } 20 | 21 | var nodeTreeResult: [Node]? = nil 22 | do { 23 | nodeTreeResult = try TestHelper.openFileAndParseHTML(fileURL: fileURL) 24 | } catch { 25 | XCTFail(error.localizedDescription) 26 | return 27 | } 28 | guard let nodeTree = nodeTreeResult else { 29 | XCTFail("nodeTreeResult was nil") 30 | return 31 | } 32 | 33 | // find matching elements by traversing the created html object 34 | let nodeSelectorPath = [ 35 | ElementSelector().withTagName("html"), 36 | ElementSelector().containingTagName("bod"), 37 | ElementSelector().withTagName("p") 38 | ] 39 | 40 | let matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 41 | XCTAssertEqual(matchingElements.count, 4) 42 | } 43 | 44 | 45 | func testSelectAttributes() { 46 | guard let fileURL = TestFileURLs.attributesTestFilesDirectoryURL? 47 | .appendingPathComponent("attributes-simple.html") else { 48 | XCTFail("Could find get file URL to parse") 49 | return 50 | } 51 | 52 | var nodeTreeResult: [Node]? = nil 53 | do { 54 | nodeTreeResult = try TestHelper.openFileAndParseHTML(fileURL: fileURL) 55 | } catch { 56 | XCTFail(error.localizedDescription) 57 | return 58 | } 59 | guard let nodeTree = nodeTreeResult else { 60 | XCTFail("nodeTreeResult was nil") 61 | return 62 | } 63 | 64 | // find matching elements by traversing the created html object 65 | let nodeSelectorPath: [NodeSelector] = [ 66 | ElementSelector().withTagName("html"), 67 | ElementSelector().withTagName("body"), 68 | ElementSelector().withTagName("a") 69 | .withAttribute(AttributeSelector.init(name: "href").withValue("https://duckduckgo.com")) 70 | ] 71 | 72 | let matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 73 | XCTAssertEqual(matchingElements.count, 1) 74 | XCTAssertEqual(matchingElements[0].textNodes[0].text, "This is an alternate link") 75 | } 76 | 77 | func testSelectClassName() { 78 | guard let fileURL = TestFileURLs.attributesTestFilesDirectoryURL? 79 | .appendingPathComponent("attributes-multiple-value-class.html") else { 80 | XCTFail("Could find get file URL to parse") 81 | return 82 | } 83 | 84 | var nodeTreeResult: [Node]? = nil 85 | do { 86 | nodeTreeResult = try TestHelper.openFileAndParseHTML(fileURL: fileURL) 87 | } catch { 88 | XCTFail(error.localizedDescription) 89 | return 90 | } 91 | guard let nodeTree = nodeTreeResult else { 92 | XCTFail("nodeTreeResult was nil") 93 | return 94 | } 95 | 96 | // find matching elements by traversing the created html object 97 | var nodeSelectorPath = [ 98 | ElementSelector().withTagName("html"), 99 | ElementSelector().withTagName("body"), 100 | ElementSelector().withTagName("p") 101 | .withClassNamesAny(["body-paragraph"]) 102 | ] 103 | 104 | var matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 105 | XCTAssertTrue(matchingElements.count == 1) 106 | XCTAssertEqual(matchingElements[0].textNodes[0].text, "This is the second paragraph.") 107 | 108 | // find matching elements by traversing the created html object 109 | nodeSelectorPath = [ 110 | ElementSelector().withTagName("html"), 111 | ElementSelector().withTagName("body"), 112 | ElementSelector().withTagName("p").withClassName("stylized-paragraph") 113 | 114 | //.withoutClassName("into-paragraph") 115 | ] 116 | 117 | matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 118 | XCTAssertTrue(matchingElements.count == 4) 119 | XCTAssertEqual(matchingElements[0].textNodes[0].text, "This is the first paragraph.") 120 | XCTAssertEqual(matchingElements[1].textNodes[0].text, "This is the second paragraph.") 121 | XCTAssertEqual(matchingElements[2].textNodes[0].text, "This is the third paragraph.") 122 | XCTAssertEqual(matchingElements[3].textNodes[0].text, "This is the fourth paragraph.") 123 | 124 | // find matching elements by traversing the created html object 125 | nodeSelectorPath = [ 126 | ElementSelector().withTagName("html"), 127 | ElementSelector().withTagName("body"), 128 | ElementSelector().withTagName("p") 129 | .withClassNamesExact(["stylized-paragraph"]) 130 | ] 131 | 132 | matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 133 | XCTAssertTrue(matchingElements.count == 1) 134 | XCTAssertEqual(matchingElements[0].textNodes[0].text, "This is the third paragraph.") 135 | 136 | 137 | // find matching elements by traversing the created html object 138 | nodeSelectorPath = [ 139 | ElementSelector().withTagName("html"), 140 | ElementSelector().withTagName("body"), 141 | ElementSelector().withTagName("p") 142 | .withoutClassNameAny(["into-paragraph"]) 143 | ] 144 | matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 145 | XCTAssertEqual(matchingElements.count, 3) 146 | XCTAssertEqual(matchingElements[0].textNodes[0].text, "This is the second paragraph.") 147 | XCTAssertEqual(matchingElements[1].textNodes[0].text, "This is the third paragraph.") 148 | XCTAssertEqual(matchingElements[2].textNodes[0].text, "This is the fourth paragraph.") 149 | } 150 | 151 | func testSelectPosition() { 152 | guard let fileURL = TestFileURLs.attributesTestFilesDirectoryURL? 153 | .appendingPathComponent("attributes-multiple-value-class.html") else { 154 | XCTFail("Could find get file URL to parse") 155 | return 156 | } 157 | 158 | var nodeTreeResult: [Node]? = nil 159 | do { 160 | nodeTreeResult = try TestHelper.openFileAndParseHTML(fileURL: fileURL) 161 | } catch { 162 | XCTFail(error.localizedDescription) 163 | return 164 | } 165 | guard let nodeTree = nodeTreeResult else { 166 | XCTFail("nodeTreeResult was nil") 167 | return 168 | } 169 | 170 | // test position equal 171 | var nodeSelectorPath = [ 172 | ElementSelector().withTagName("html"), 173 | ElementSelector().withTagName("body"), 174 | ElementSelector().withTagName("p").atPosition(1) 175 | ] 176 | var matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 177 | XCTAssertTrue(matchingElements.count == 1) 178 | XCTAssertEqual(matchingElements[0].textNodes[0].text, "This is the second paragraph.") 179 | 180 | // test position greater than 181 | nodeSelectorPath = [ 182 | ElementSelector().withTagName("html"), 183 | ElementSelector().withTagName("body"), 184 | ElementSelector().withTagName("p").whenPositionIsGreaterThan(1) 185 | ] 186 | matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 187 | XCTAssertTrue(matchingElements.count == 2) 188 | XCTAssertEqual(matchingElements[0].textNodes[0].text, "This is the third paragraph.") 189 | XCTAssertEqual(matchingElements[1].textNodes[0].text, "This is the fourth paragraph.") 190 | 191 | // test position less than 192 | nodeSelectorPath = [ 193 | ElementSelector().withTagName("html"), 194 | ElementSelector().withTagName("body"), 195 | ElementSelector().withTagName("p").whenPositionIsLessThan(3) 196 | ] 197 | matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 198 | XCTAssertTrue(matchingElements.count == 3) 199 | XCTAssertEqual(matchingElements[0].textNodes[0].text, "This is the first paragraph.") 200 | XCTAssertEqual(matchingElements[1].textNodes[0].text, "This is the second paragraph.") 201 | XCTAssertEqual(matchingElements[2].textNodes[0].text, "This is the third paragraph.") 202 | } 203 | 204 | func testSelectInnerText() { 205 | guard let fileURL = TestFileURLs.attributesTestFilesDirectoryURL? 206 | .appendingPathComponent("attributes-multiple-value-class.html") else { 207 | XCTFail("Could find get file URL to parse") 208 | return 209 | } 210 | 211 | var nodeTreeResult: [Node]? = nil 212 | do { 213 | nodeTreeResult = try TestHelper.openFileAndParseHTML(fileURL: fileURL) 214 | } catch { 215 | XCTFail(error.localizedDescription) 216 | return 217 | } 218 | guard let nodeTree = nodeTreeResult else { 219 | XCTFail("nodeTreeResult was nil") 220 | return 221 | } 222 | 223 | // find matching elements by traversing the created html object 224 | let nodeSelectorPath = [ 225 | ElementSelector().withTagName("html"), 226 | ElementSelector().withTagName("body"), 227 | ElementSelector().withTagName("p") 228 | .withChildTextNode(TextNodeSelector().withText("This is the second paragraph.")) 229 | ] 230 | let matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 231 | XCTAssertEqual(matchingElements.count, 1) 232 | XCTAssertEqual(matchingElements[0].textNodes[0].text, "This is the second paragraph.") 233 | } 234 | 235 | func testSelectInnerComment() { 236 | guard let fileURL = TestFileURLs.attributesTestFilesDirectoryURL? 237 | .appendingPathComponent("attributes-multiple-value-class.html") else { 238 | XCTFail("Could find get file URL to parse") 239 | return 240 | } 241 | 242 | var nodeTreeResult: [Node]? = nil 243 | do { 244 | nodeTreeResult = try TestHelper.openFileAndParseHTML(fileURL: fileURL) 245 | } catch { 246 | XCTFail(error.localizedDescription) 247 | return 248 | } 249 | guard let nodeTree = nodeTreeResult else { 250 | XCTFail("nodeTreeResult was nil") 251 | return 252 | } 253 | 254 | // find matching elements by traversing the created html object 255 | let nodeSelectorPath = [ 256 | ElementSelector().withTagName("html"), 257 | ElementSelector().withTagName("body"), 258 | ElementSelector().withTagName("p") 259 | .withChildCommentNode(CommentSelector().containingText("This is a comment")) 260 | ] 261 | let matchingElements = HTMLTraverser.findElements(in: nodeTree, matching: nodeSelectorPath) 262 | XCTAssertEqual(matchingElements.count, 1) 263 | XCTAssertEqual(matchingElements[0].textNodes[0].text, "This is the fourth paragraph.") 264 | } 265 | 266 | } 267 | 268 | -------------------------------------------------------------------------------- /Tests/SwiftHTMLParserTests/JavascriptParserTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // JavascriptParserTests.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-09. 6 | // 7 | 8 | import XCTest 9 | @testable import SwiftHTMLParser 10 | import TestFiles 11 | 12 | final class JavascriptParserTests: XCTestCase { 13 | 14 | func testJavascriptSimple() { 15 | guard let fileURL = TestFileURLs.javascriptTestFilesDirectoryURL? 16 | .appendingPathComponent("javascript-simple.html") else { 17 | XCTFail("Could not get url to test file") 18 | return 19 | } 20 | 21 | // get html string from file 22 | var htmlStringResult: String? = nil 23 | do { 24 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 25 | } catch { 26 | XCTFail("Could not open file at: \(fileURL.path)") 27 | } 28 | guard let htmlString = htmlStringResult else { 29 | XCTFail("Could not open file at: \(fileURL.path)") 30 | return 31 | } 32 | 33 | // create object from raw html file 34 | guard let elementArray = try? HTMLParser.parse(htmlString) else { 35 | XCTFail("Could not parse HTML") 36 | return 37 | } 38 | 39 | // find matching elements by traversing the created html object 40 | let nodeSelectorPath = [ 41 | ElementSelector().withTagName("html"), 42 | ElementSelector().withTagName("body"), 43 | ElementSelector().withTagName("script") 44 | ] 45 | 46 | let matchingElements = HTMLTraverser.findElements(in: elementArray, 47 | matching: nodeSelectorPath) 48 | XCTAssertEqual(matchingElements[0].childElements.count, 0) 49 | } 50 | 51 | func testJavascriptComments() { 52 | guard let fileURL = TestFileURLs.javascriptTestFilesDirectoryURL? 53 | .appendingPathComponent("javascript-comments.html") else { 54 | XCTFail("Could not get url to test file") 55 | return 56 | } 57 | 58 | // get html string from file 59 | var htmlStringResult: String? = nil 60 | do { 61 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 62 | } catch { 63 | XCTFail("Could not open file at: \(fileURL.path)") 64 | } 65 | guard let htmlString = htmlStringResult else { 66 | XCTFail("Could not open file at: \(fileURL.path)") 67 | return 68 | } 69 | 70 | // create object from raw html file 71 | guard let elementArray = try? HTMLParser.parse(htmlString) else { 72 | XCTFail("Could not parse HTML") 73 | return 74 | } 75 | 76 | // find matching elements by traversing the created html object 77 | let nodeSelectorPath = [ 78 | ElementSelector().withTagName("html"), 79 | ElementSelector().withTagName("body"), 80 | ElementSelector().withTagName("script") 81 | ] 82 | 83 | let matchingElements = HTMLTraverser.findElements(in: elementArray, matching: nodeSelectorPath) 84 | 85 | XCTAssertEqual(matchingElements[0].childElements.count, 0) 86 | XCTAssertEqual(matchingElements[0].textNodes.count, 1) 87 | } 88 | 89 | func testJavascriptQuotes() { 90 | guard let fileURL = TestFileURLs.javascriptTestFilesDirectoryURL? 91 | .appendingPathComponent("javascript-quotes.html") else { 92 | XCTFail("Could not get url to test file") 93 | return 94 | } 95 | 96 | // get html string from file 97 | var htmlStringResult: String? = nil 98 | do { 99 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 100 | } catch { 101 | XCTFail("Could not open file at: \(fileURL.path)") 102 | } 103 | guard let htmlString = htmlStringResult else { 104 | XCTFail("Could not open file at: \(fileURL.path)") 105 | return 106 | } 107 | 108 | // create object from raw html file 109 | guard let elementArray = try? HTMLParser.parse(htmlString) else { 110 | XCTFail("Could not parse HTML") 111 | return 112 | } 113 | 114 | // find matching elements by traversing the created html object 115 | let nodeSelectorPath = [ 116 | ElementSelector().withTagName("html"), 117 | ElementSelector().withTagName("body"), 118 | ElementSelector().withTagName("script") 119 | ] 120 | 121 | let matchingElements = HTMLTraverser.findElements(in: elementArray, matching: nodeSelectorPath) 122 | 123 | XCTAssertEqual(matchingElements[0].childElements.count, 0) 124 | XCTAssertEqual(matchingElements[0].textNodes.count, 1) 125 | XCTAssertEqual(matchingElements[0].textNodes[0].text.count, 803) 126 | } 127 | 128 | func testJavascriptQuotesWithEscapeCharacters() { 129 | guard let fileURL = TestFileURLs.javascriptTestFilesDirectoryURL? 130 | .appendingPathComponent("javascript-quotes-with-escape-characters.html") else { 131 | XCTFail("Could not get url to test file") 132 | return 133 | } 134 | 135 | // get html string from file 136 | var htmlStringResult: String? = nil 137 | do { 138 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 139 | } catch { 140 | XCTFail("Could not open file at: \(fileURL.path)") 141 | } 142 | guard let htmlString = htmlStringResult else { 143 | XCTFail("Could not open file at: \(fileURL.path)") 144 | return 145 | } 146 | 147 | // create object from raw html file 148 | guard let elementArray = try? HTMLParser.parse(htmlString) else { 149 | XCTFail("Could not parse HTML") 150 | return 151 | } 152 | 153 | XCTAssertEqual(elementArray.count, 2) 154 | 155 | // find matching elements by traversing the created html object 156 | let nodeSelectorPath = [ 157 | ElementSelector().withTagName("html"), 158 | ElementSelector().withTagName("body"), 159 | ElementSelector().withTagName("script") 160 | ] 161 | 162 | let matchingElements = HTMLTraverser.findElements(in: elementArray, matching: nodeSelectorPath) 163 | 164 | XCTAssertEqual(matchingElements.count, 1) 165 | XCTAssertEqual(matchingElements[0].childElements.count, 0) 166 | XCTAssertEqual(matchingElements[0].textNodes.count, 1) 167 | } 168 | 169 | } 170 | -------------------------------------------------------------------------------- /Tests/SwiftHTMLParserTests/PerformanceTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PerformanceTests.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-11. 6 | // 7 | 8 | import XCTest 9 | @testable import SwiftHTMLParser 10 | import TestFiles 11 | 12 | final class PerformanceTests: XCTestCase { 13 | 14 | func testIteratingString() { 15 | guard let fileURL = TestFileURLs.realWorldTestFilesDirectoryURL? 16 | .appendingPathComponent("google-home-page.html") else { 17 | XCTFail("Could not get url to test file") 18 | return 19 | } 20 | 21 | // get html string from file 22 | var htmlStringResult: String? = nil 23 | do { 24 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 25 | } catch { 26 | XCTFail("Could not open file at: \(fileURL.path)") 27 | } 28 | guard let htmlString = htmlStringResult else { 29 | XCTFail("Could not open file at: \(fileURL.path)") 30 | return 31 | } 32 | 33 | var currentIndex = htmlString.startIndex 34 | var numberOfMatchingCharacters = 0 35 | let charToMatch: Character = "a" 36 | 37 | let start = Date() 38 | while currentIndex < htmlString.endIndex { 39 | if (htmlString[currentIndex] == charToMatch) { 40 | numberOfMatchingCharacters += 1 41 | } 42 | 43 | // iterate current index 44 | currentIndex = htmlString.index(currentIndex, offsetBy: 1) 45 | } 46 | let end = Date() 47 | 48 | let timeElapsed = end.timeIntervalSince(start) 49 | print("time elapsed: \(timeElapsed) seconds") 50 | 51 | print("found \(numberOfMatchingCharacters) matching the string '\(charToMatch)'") 52 | 53 | print("--------------------") 54 | } 55 | 56 | func testStringIteration() { 57 | guard let fileURL = TestFileURLs.realWorldTestFilesDirectoryURL? 58 | .appendingPathComponent("google-home-page.html") else { 59 | XCTFail("Could not get url to test file") 60 | return 61 | } 62 | 63 | // get html string from file 64 | var htmlStringResult: String? = nil 65 | do { 66 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 67 | } catch { 68 | XCTFail("Could not open file at: \(fileURL.path)") 69 | } 70 | guard let text = htmlStringResult else { 71 | XCTFail("Could not open file at: \(fileURL.path)") 72 | return 73 | } 74 | 75 | var currentIndex = text.startIndex 76 | var numberOfMatchingCharacters = 0 77 | 78 | let lookaheadValidator = LookaheadValidator() 79 | let scriptEndTag = "" 80 | 81 | let start = Date() 82 | while currentIndex < text.endIndex { 83 | // test for character 84 | if containsInner(text: text, currentIndex: currentIndex) { 85 | numberOfMatchingCharacters += 1 86 | } 87 | 88 | // test speed of lookahead validator 89 | if lookaheadValidator.isValidLookahead(for: text, atIndex: currentIndex, checkFor: scriptEndTag) { 90 | //print("found") 91 | } 92 | 93 | // iterate current index 94 | currentIndex = text.index(currentIndex, offsetBy: 1) 95 | } 96 | let end = Date() 97 | 98 | let timeElapsed = end.timeIntervalSince(start) 99 | print("time elapsed: \(timeElapsed) seconds") 100 | 101 | print("found \(numberOfMatchingCharacters) matching characters.") 102 | 103 | print("--------------------") 104 | } 105 | 106 | func containsInner(text: String, currentIndex: String.Index) -> Bool { 107 | let localIndex = currentIndex 108 | 109 | if text[localIndex] == "a" || text[localIndex] == "A" { 110 | return true 111 | } else { 112 | return false 113 | } 114 | } 115 | 116 | // func testDeep() { 117 | // guard let fileURL = TestsConfig.performanceTestFilesDirectoryURL? 118 | // .appendingPathComponent("deep.html") else { 119 | // XCTFail("Could not get url to test file") 120 | // return 121 | // } 122 | // 123 | // // get html string from file 124 | // var htmlStringResult: String? = nil 125 | // do { 126 | // htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 127 | // } catch { 128 | // XCTFail("Could not open file at: \(fileURL.path)") 129 | // } 130 | // guard let htmlString = htmlStringResult else { 131 | // XCTFail("Could not open file at: \(fileURL.path)") 132 | // return 133 | // } 134 | // 135 | // // create object from raw html file 136 | // let htmlParser = HTMLParser() 137 | // guard let elementArray = try? HTMLParser.parse(htmlString) else { 138 | // XCTFail("Could not parse HTML") 139 | // return 140 | // } 141 | // 142 | // // find matching elements by traversing the created html object 143 | // let nodeSelectorPath = [ 144 | // ElementSelector.init(tagName: "html"), 145 | // ElementSelector.init(tagName: "body") 146 | // ] 147 | // 148 | // let traverser = HTMLTraverser() 149 | // let matchingElements = traverser.findElements(in: elementArray, 150 | // matchingNodeSelectorPath: nodeSelectorPath) 151 | // 152 | // XCTAssertEqual(matchingElements[0].childElements.count, 300) 153 | // } 154 | 155 | // func testTimeDeep() { 156 | // guard let fileURL = TestsConfig.performanceTestFilesDirectoryURL? 157 | // .appendingPathComponent("deep.html") else { 158 | // XCTFail("Could not get url to test file") 159 | // return 160 | // } 161 | // 162 | // // get html string from file 163 | // var htmlStringResult: String? = nil 164 | // do { 165 | // htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 166 | // } catch { 167 | // XCTFail("Could not open file at: \(fileURL.path)") 168 | // } 169 | // guard let htmlString = htmlStringResult else { 170 | // XCTFail("Could not open file at: \(fileURL.path)") 171 | // return 172 | // } 173 | // 174 | // // create object from raw html file 175 | // let start = Date() 176 | // let htmlParser = HTMLParser() 177 | // for _ in 0..<20 { 178 | // do { 179 | // _ = try HTMLParser.parse(htmlString) 180 | // } catch { 181 | // XCTFail("Could not parse HTML") 182 | // } 183 | // } 184 | // let end = Date() 185 | // 186 | // let timeElapsed = end.timeIntervalSince(start) 187 | // print("time elapsed: \(timeElapsed) seconds") 188 | // } 189 | } 190 | -------------------------------------------------------------------------------- /Tests/SwiftHTMLParserTests/SVGParserTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // SVGParserTests.swift 3 | // SwiftHTMLParser 4 | // 5 | // Created by Reid Nantes on 2018-12-12. 6 | // 7 | 8 | import Foundation 9 | 10 | import XCTest 11 | @testable import SwiftHTMLParser 12 | import TestFiles 13 | 14 | 15 | final class SVGParserTests: XCTestCase { 16 | func testSVG() { 17 | guard let fileURL = TestFileURLs.svgTestFilesDirectoryURL? 18 | .appendingPathComponent("svg-simple.html") else { 19 | XCTFail("Could find get file URL to parse") 20 | return 21 | } 22 | 23 | // get html string from file 24 | var htmlStringResult: String? = nil 25 | do { 26 | htmlStringResult = try String(contentsOf: fileURL, encoding: .utf8) 27 | } catch { 28 | XCTFail("Could not open file URL: \(fileURL)") 29 | return 30 | } 31 | guard let htmlString = htmlStringResult else { 32 | XCTFail("Could not open file URL: \(fileURL)") 33 | return 34 | } 35 | 36 | // create object from raw html file 37 | guard let elementArray = try? HTMLParser.parse(htmlString) else { 38 | XCTFail("Could not parse HTML") 39 | return 40 | } 41 | 42 | XCTAssertEqual(elementArray.count, 2) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /Tests/SwiftHTMLParserTests/TestHelper.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TestHelper.swift 3 | // 4 | // 5 | // Created by Reid Nantes on 2019-10-25. 6 | // 7 | 8 | import Foundation 9 | import SwiftHTMLParser 10 | 11 | 12 | struct TestHelper { 13 | static func openFileAndParseHTML(fileURL: URL) throws -> [Node] { 14 | // get html string from file 15 | let htmlString = try String(contentsOf: fileURL, encoding: .utf8) 16 | 17 | // create object from raw html file 18 | let nodeTree = try HTMLParser.parse(htmlString) 19 | 20 | return nodeTree 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Attributes/attributes-multiple-value-class.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 |

This is a Heading

8 | 9 |

10 | This is the first paragraph. 11 |

12 |

13 | This is the second paragraph. 14 |

15 |

16 | This is the third paragraph. 17 |

18 |

19 | 20 | This is the fourth paragraph. 21 |

22 | 23 | 24 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Attributes/attributes-quotes.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 |

This is a Heading

8 | 9 |

10 | This is the first paragraph. 11 |

12 |

13 | This is the second paragraph. 14 |

15 | 16 | Girl with a jacket 17 | 18 | 19 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Attributes/attributes-simple.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 |

This is a Heading

8 |

This is a paragraph.

9 | 10 | This is a link 11 | This is an alternate link 12 | 13 |
This is a div with an empty attribute
14 | 15 |
16 | First name:
17 | 18 |
19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Attributes/attributes-tabs.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 |

This is a Heading

8 |

This is a paragraph.

9 | 10 | /some/other/img.png 16 | 17 |
18 | First name:
19 | 20 |
21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Comments/comments.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 | 8 | this is inner text 9 | 10 | here is some more inner text 11 | 12 |

This is a Heading

13 |

This is a paragraph.

14 | 15 | here is even more inner text 16 | 17 | 21 | 22 |
This is a div
23 | 24 | This next comment has one character of text 25 | 26 | 27 | This next comment has no text 28 | 29 | 30 | This inner text is getting out of control 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Comments/conditional-comments-salvageable.html: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | Test Simple Title 9 | 10 | 11 |

This is a heading

12 | 13 | 16 |
17 | 18 |

Above conditional comments incorect, should ignore div

19 | 20 | 21 |
hello world
22 | 23 |

This is shown in chrome

24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Comments/declarations.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 | 8 | this is inner text 9 | 10 | here is some more inner text 11 | 12 |

This is a Heading

13 |

This is a paragraph.

14 | 15 | here is even more inner text 16 | 17 | 18 | 19 |
This is a div
20 | 21 | This inner text is getting out of control 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Documentation/simple.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | This is a Simple Example 5 | 6 | 7 |

This is a Heading

8 | 9 |
10 |

This is the first paragraph.

11 |

This is the second paragraph.

12 |

This is the third paragraph.

13 |

This is the fourth paragraph.

14 |

This is the fifth paragraph.

15 | 16 |
17 |

Editor Notes

18 |
19 |
20 | 21 |
22 |
    23 |
  • This is the first citation.
  • 24 |
  • This is the second citation.
  • 25 |
  • This is the third citation.
  • 26 |
27 | 28 |
29 |

Bibliography Notes

30 |
31 |
32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Elements/element-name-on-new-line.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 |

This is a Heading

8 |
This is text in a div
11 | Cities 16 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Elements/element-unclosed-end-tag.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 |

This is a Heading

8 |

This is the first paragraph.

9 | 10 |
11 |
12 |

This is the second paragraph.

13 | 14 | 15 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Elements/elements-quotes.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 |

This is a Heading

8 |

'John "ShotGun" Nelson'

9 |

"John 'ShotGun' Nelson"

10 |

It's alright

11 |

I love the " (double Quote) character

12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Elements/elements-simple.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 |

This is a Heading

8 |

This is the first paragraph.

9 |

This is the second paragraph.

10 |

This is the third paragraph.

11 | 12 | 13 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Elements/elemnent-stray-end-html-tag.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 |

This is a Heading

8 |

This is the first paragraph.

9 | 10 |
11 |

This is the second paragraph.

12 |
13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Elements/elemnent-stray-end-tag.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 |

This is a Heading

8 |

This is the first paragraph.

9 | 10 |
11 |

This is the second paragraph.

12 |
13 | 14 | 15 |
16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Elements/empty-element.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 |

This is a Heading

8 |

This is a paragraph.

9 | 10 |
11 | hello12344 12 |
13 | 14 | 15 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Javascript/javascript-comments.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 |

This is a Heading

8 |

This is a paragraph.

9 | 10 | 11 | 12 |

This is a demonstration.

13 | 14 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Javascript/javascript-quotes-with-escape-characters.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | This is a title 5 | 6 | 7 |

This is a Heading

8 |

This is a paragraph.

9 | 10 | 11 | 12 |

This is a demonstration.

13 | 14 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Javascript/javascript-quotes.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | This is a title 5 | 6 | 7 |

This is a Heading

8 |

This is a paragraph.

9 | 10 | 11 | 12 |

This is a demonstration.

13 | 14 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/Javascript/javascript-simple.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 |

This is a Heading

8 |

This is a paragraph.

9 | 10 | 11 | 12 |

This is a demonstration.

13 | 14 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /Tests/TestFiles/Mock/SVG/svg-simple.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test Simple Title 5 | 6 | 7 |

This is a Heading

8 |

This is a paragraph.

9 | 10 | 11 | 12 |

This is a demonstration.

13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /Tests/TestFiles/RealWorld/weather-forcast.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | Guelph - Weather - Environment Canada 7 | 8 | 9 | 10 | 11 | Environment Canada 12 | https://www.weather.gc.ca 13 | 14 | 2018-12-14T10:01:44Z 15 | tag:weather.gc.ca,2013-04-16:20181214100144 16 | https://www.weather.gc.ca/template/gcweb/v4.0.24/assets/wmms-alt.png 17 | https://www.weather.gc.ca/template/gcweb/v4.0.24/assets/favicon.ico 18 | Copyright 2018, Environment Canada 19 | 20 | FOG ADVISORY , Guelph 21 | 22 | 2018-12-14T04:24:00Z 23 | 2018-12-14T04:24:00Z 24 | 25 | Persons in or near this area should be on the lookout for adverse weather conditions and take necessary safety precautions. Issued: 11:24 PM EST Thursday 13 December 2018 26 | tag:weather.gc.ca,2013-04-16:on-5_w1:201812140424 27 | 28 | 29 | Current Conditions: -0.8°C 30 | 31 | 2018-12-14T10:00:00Z 32 | 2018-12-14T10:00:00Z 33 | 34 | Observed at: Guelph Turfgrass 05:00 AM EST Friday 14 December 2018
35 | Temperature: -0.8°C
36 | Humidity: 99 %
37 | Dewpoint: -0.9°C
38 | Wind: 0 km/h
39 | Air Quality Health Index: 2
40 | ]]>
41 | tag:weather.gc.ca,2013-04-16:on-5_cc:20181214100000 42 |
43 | 44 | Friday: A few showers. High plus 4. 45 | 46 | 2018-12-14T10:00:00Z 47 | 2018-12-14T10:00:00Z 48 | 49 | Cloudy. A few rain showers beginning early this morning and ending early this afternoon. Risk of freezing rain early this morning. Fog dissipating this morning. Wind becoming southwest 20 km/h gusting to 40 near noon. High plus 4. UV index 1 or low. Forecast issued 05:00 AM EST Friday 14 December 2018 50 | tag:weather.gc.ca,2013-04-16:on-5_fc1:20181214100000 51 | 52 | 53 | Friday night: Mainly cloudy. Low minus 2. 54 | 55 | 2018-12-14T10:00:00Z 56 | 2018-12-14T10:00:00Z 57 | 58 | Mainly cloudy. Fog patches developing near midnight. Wind up to 15 km/h. Low minus 2. Wind chill minus 6 overnight. Forecast issued 05:00 AM EST Friday 14 December 2018 59 | tag:weather.gc.ca,2013-04-16:on-5_fc2:20181214100000 60 | 61 | 62 | Saturday: Mainly cloudy. High plus 4. 63 | 64 | 2018-12-14T10:00:00Z 65 | 2018-12-14T10:00:00Z 66 | 67 | Mainly cloudy. Fog patches dissipating in the morning. Wind becoming northeast 20 km/h gusting to 40 in the morning. High plus 4. Forecast issued 05:00 AM EST Friday 14 December 2018 68 | tag:weather.gc.ca,2013-04-16:on-5_fc3:20181214100000 69 | 70 | 71 | Saturday night: Chance of flurries. Low minus 1. POP 30% 72 | 73 | 2018-12-14T10:00:00Z 74 | 2018-12-14T10:00:00Z 75 | 76 | Cloudy periods with 30 percent chance of flurries. Low minus 1. Forecast issued 05:00 AM EST Friday 14 December 2018 77 | tag:weather.gc.ca,2013-04-16:on-5_fc4:20181214100000 78 | 79 | 80 | Sunday: A mix of sun and cloud. High plus 5. 81 | 82 | 2018-12-14T10:00:00Z 83 | 2018-12-14T10:00:00Z 84 | 85 | A mix of sun and cloud. High plus 5. Forecast issued 05:00 AM EST Friday 14 December 2018 86 | tag:weather.gc.ca,2013-04-16:on-5_fc5:20181214100000 87 | 88 | 89 | Sunday night: Chance of flurries. Low zero. POP 30% 90 | 91 | 2018-12-14T10:00:00Z 92 | 2018-12-14T10:00:00Z 93 | 94 | Cloudy periods with 30 percent chance of flurries. Low zero. Forecast issued 05:00 AM EST Friday 14 December 2018 95 | tag:weather.gc.ca,2013-04-16:on-5_fc6:20181214100000 96 | 97 | 98 | Monday: Chance of flurries. High zero. POP 30% 99 | 100 | 2018-12-14T10:00:00Z 101 | 2018-12-14T10:00:00Z 102 | 103 | Cloudy with 30 percent chance of flurries. High zero. Forecast issued 05:00 AM EST Friday 14 December 2018 104 | tag:weather.gc.ca,2013-04-16:on-5_fc7:20181214100000 105 | 106 | 107 | Monday night: Cloudy periods. Low minus 8. 108 | 109 | 2018-12-14T10:00:00Z 110 | 2018-12-14T10:00:00Z 111 | 112 | Cloudy periods. Low minus 8. Forecast issued 05:00 AM EST Friday 14 December 2018 113 | tag:weather.gc.ca,2013-04-16:on-5_fc8:20181214100000 114 | 115 | 116 | Tuesday: A mix of sun and cloud. High minus 2. 117 | 118 | 2018-12-14T10:00:00Z 119 | 2018-12-14T10:00:00Z 120 | 121 | A mix of sun and cloud. High minus 2. Forecast issued 05:00 AM EST Friday 14 December 2018 122 | tag:weather.gc.ca,2013-04-16:on-5_fc9:20181214100000 123 | 124 | 125 | Tuesday night: Cloudy periods. Low minus 5. 126 | 127 | 2018-12-14T10:00:00Z 128 | 2018-12-14T10:00:00Z 129 | 130 | Cloudy periods. Low minus 5. Forecast issued 05:00 AM EST Friday 14 December 2018 131 | tag:weather.gc.ca,2013-04-16:on-5_fc10:20181214100000 132 | 133 | 134 | Wednesday: Cloudy. High plus 1. 135 | 136 | 2018-12-14T10:00:00Z 137 | 2018-12-14T10:00:00Z 138 | 139 | Cloudy. High plus 1. Forecast issued 05:00 AM EST Friday 14 December 2018 140 | tag:weather.gc.ca,2013-04-16:on-5_fc11:20181214100000 141 | 142 | 143 | Wednesday night: Chance of flurries. Low zero. POP 40% 144 | 145 | 2018-12-14T10:00:00Z 146 | 2018-12-14T10:00:00Z 147 | 148 | Cloudy with 40 percent chance of flurries. Low zero. Forecast issued 05:00 AM EST Friday 14 December 2018 149 | tag:weather.gc.ca,2013-04-16:on-5_fc12:20181214100000 150 | 151 | 152 | Thursday: Chance of flurries or rain showers. High plus 2. POP 40% 153 | 154 | 2018-12-14T10:00:00Z 155 | 2018-12-14T10:00:00Z 156 | 157 | Cloudy with 40 percent chance of flurries or rain showers. High plus 2. Forecast issued 05:00 AM EST Friday 14 December 2018 158 | tag:weather.gc.ca,2013-04-16:on-5_fc13:20181214100000 159 | 160 |
161 | -------------------------------------------------------------------------------- /Tests/TestFiles/TestFileURLs.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Reid Nantes on 2019-08-17. 3 | // 4 | 5 | import Foundation 6 | 7 | 8 | public struct TestFileURLs { 9 | static public let testFilesResourceDirectoryURL: URL? = Bundle.module.resourceURL 10 | 11 | // mock 12 | static public let testFilesMockDirectoryURL: URL? = Self.testFilesResourceDirectoryURL?.appendingPathComponent("Mock") 13 | 14 | static public let attributesTestFilesDirectoryURL: URL? = Self.testFilesMockDirectoryURL?.appendingPathComponent("Attributes") 15 | static public let commentsTestFilesDirectoryURL: URL? = Self.testFilesMockDirectoryURL?.appendingPathComponent("Comments") 16 | static public let documentationTestFilesDirectoryURL: URL? = Self.testFilesMockDirectoryURL?.appendingPathComponent("Documentation") 17 | static public let elementsTestFilesDirectoryURL: URL? = Self.testFilesMockDirectoryURL?.appendingPathComponent("Elements") 18 | static public let javascriptTestFilesDirectoryURL: URL? = Self.testFilesMockDirectoryURL?.appendingPathComponent("Javascript") 19 | static public let performanceTestFilesDirectoryURL: URL? = Self.testFilesMockDirectoryURL?.appendingPathComponent("Performance") 20 | static public let svgTestFilesDirectoryURL: URL? = Self.testFilesMockDirectoryURL?.appendingPathComponent("SVG") 21 | 22 | // real world 23 | static public let realWorldTestFilesDirectoryURL: URL? = Self.testFilesResourceDirectoryURL?.appendingPathComponent("RealWorld") 24 | } 25 | 26 | 27 | --------------------------------------------------------------------------------